Index: sys/sys/callout.h =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/sys/sys/callout.h,v retrieving revision 1.17 diff -u -u -2 -0 -r1.17 callout.h --- sys/sys/callout.h 4 Feb 2003 01:21:06 -0000 1.17 +++ sys/sys/callout.h 13 Mar 2003 21:05:56 -0000 @@ -66,40 +66,43 @@ */ #ifndef _SYS_CALLOUT_H_ #define _SYS_CALLOUT_H_ struct callout_circq { struct callout_circq *cq_next; /* next element */ struct callout_circq *cq_prev; /* previous element */ }; struct callout { struct callout_circq c_list; /* linkage on queue */ void (*c_func)(void *); /* function to call */ void *c_arg; /* function argument */ int c_time; /* when callout fires */ int c_flags; /* state of this entry */ }; #define CALLOUT_PENDING 0x0002 /* callout is on the queue */ #define CALLOUT_FIRED 0x0004 /* callout has fired */ +#define CALLOUT_RUNNING 0x0008 /* callout function is being invoked */ #define CALLOUT_INITIALIZER_SETFUNC(func, arg) \ { { NULL, NULL }, func, arg, 0, 0 } #define CALLOUT_INITIALIZER CALLOUT_INITIALIZER_SETFUNC(NULL, NULL) #ifdef _KERNEL void callout_startup(void); void callout_init(struct callout *); void callout_setfunc(struct callout *, void (*)(void *), void *); void callout_reset(struct callout *, int, void (*)(void *), void *); void callout_schedule(struct callout *, int); void callout_stop(struct callout *); int callout_hardclock(void); #define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING) #define callout_expired(c) ((c)->c_flags & CALLOUT_FIRED) +#define callout_is_running(c) ((c)->c_flags & CALLOUT_RUNNING) +#define callout_ack(c) ((c)->c_flags &= ~CALLOUT_RUNNING) #endif /* _KERNEL */ #endif /* !_SYS_CALLOUT_H_ */ Index: sys/kern/kern_timeout.c =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/sys/kern/kern_timeout.c,v retrieving revision 1.5 diff -u -u -2 -0 -r1.5 kern_timeout.c --- sys/kern/kern_timeout.c 26 Feb 2003 23:13:19 -0000 1.5 +++ sys/kern/kern_timeout.c 13 Mar 2003 20:57:45 -0000 @@ -238,117 +238,117 @@ } /* * callout_reset: * * Reset a callout structure with a new function and argument, and * schedule it to run. */ void callout_reset(struct callout *c, int to_ticks, void (*func)(void *), void *arg) { int s, old_time; KASSERT(to_ticks >= 0); CALLOUT_LOCK(s); /* Initialize the time here, it won't change. */ old_time = c->c_time; c->c_time = to_ticks + hardclock_ticks; - c->c_flags &= ~CALLOUT_FIRED; + c->c_flags &= ~(CALLOUT_FIRED|CALLOUT_RUNNING); c->c_func = func; c->c_arg = arg; /* * If this timeout is already scheduled and now is moved * earlier, reschedule it now. Otherwise leave it in place * and let it be rescheduled later. */ if (callout_pending(c)) { if (c->c_time - old_time < 0) { CIRCQ_REMOVE(&c->c_list); CIRCQ_INSERT(&c->c_list, &timeout_todo); } } else { c->c_flags |= CALLOUT_PENDING; CIRCQ_INSERT(&c->c_list, &timeout_todo); } CALLOUT_UNLOCK(s); } /* * callout_schedule: * * Schedule a callout to run. The function and argument must * already be set in the callout structure. */ void callout_schedule(struct callout *c, int to_ticks) { int s, old_time; KASSERT(to_ticks >= 0); CALLOUT_LOCK(s); /* Initialize the time here, it won't change. */ old_time = c->c_time; c->c_time = to_ticks + hardclock_ticks; - c->c_flags &= ~CALLOUT_FIRED; + c->c_flags &= ~(CALLOUT_FIRED|CALLOUT_RUNNING); /* * If this timeout is already scheduled and now is moved * earlier, reschedule it now. Otherwise leave it in place * and let it be rescheduled later. */ if (callout_pending(c)) { if (c->c_time - old_time < 0) { CIRCQ_REMOVE(&c->c_list); CIRCQ_INSERT(&c->c_list, &timeout_todo); } } else { c->c_flags |= CALLOUT_PENDING; CIRCQ_INSERT(&c->c_list, &timeout_todo); } CALLOUT_UNLOCK(s); } /* * callout_stop: * * Cancel a pending callout. */ void callout_stop(struct callout *c) { int s; CALLOUT_LOCK(s); if (callout_pending(c)) CIRCQ_REMOVE(&c->c_list); - c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); + c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED|CALLOUT_RUNNING); CALLOUT_UNLOCK(s); } /* * This is called from hardclock() once every tick. * We return !0 if we need to schedule a softclock. */ int callout_hardclock(void) { int s; int needsoftclock; CALLOUT_LOCK(s); MOVEBUCKET(0, hardclock_ticks); if (MASKWHEEL(0, hardclock_ticks) == 0) { MOVEBUCKET(1, hardclock_ticks); if (MASKWHEEL(1, hardclock_ticks) == 0) { @@ -373,41 +373,41 @@ void *arg; int s; CALLOUT_LOCK(s); while (!CIRCQ_EMPTY(&timeout_todo)) { c = (struct callout *)CIRCQ_FIRST(&timeout_todo); /* XXX */ CIRCQ_REMOVE(&c->c_list); /* If due run it, otherwise insert it into the right bucket. */ if (c->c_time - hardclock_ticks > 0) { CIRCQ_INSERT(&c->c_list, BUCKET((c->c_time - hardclock_ticks), c->c_time)); } else { #ifdef CALLOUT_EVENT_COUNTERS if (c->c_time - hardclock_ticks < 0) callout_ev_late.ev_count++; #endif c->c_flags = (c->c_flags & ~CALLOUT_PENDING) | - CALLOUT_FIRED; + (CALLOUT_FIRED|CALLOUT_RUNNING); func = c->c_func; arg = c->c_arg; CALLOUT_UNLOCK(s); (*func)(arg); CALLOUT_LOCK(s); } } CALLOUT_UNLOCK(s); } #ifdef DDB static void db_show_callout_bucket(struct callout_circq *bucket) { struct callout *c; struct callout_circq *p; db_expr_t offset; Index: sys/netinet/tcp_input.c =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/sys/netinet/tcp_input.c,v retrieving revision 1.163 diff -u -u -2 -0 -r1.163 tcp_input.c --- sys/netinet/tcp_input.c 1 Mar 2003 04:40:27 -0000 1.163 +++ sys/netinet/tcp_input.c 13 Mar 2003 20:59:15 -0000 @@ -2865,41 +2865,44 @@ } while (/*CONSTCOND*/0) #endif /* INET6 */ #define SYN_CACHE_RM(sc) \ do { \ TAILQ_REMOVE(&tcp_syn_cache[(sc)->sc_bucketidx].sch_bucket, \ (sc), sc_bucketq); \ (sc)->sc_tp = NULL; \ LIST_REMOVE((sc), sc_tpq); \ tcp_syn_cache[(sc)->sc_bucketidx].sch_length--; \ callout_stop(&(sc)->sc_timer); \ syn_cache_count--; \ } while (/*CONSTCOND*/0) #define SYN_CACHE_PUT(sc) \ do { \ if ((sc)->sc_ipopts) \ (void) m_free((sc)->sc_ipopts); \ if ((sc)->sc_route4.ro_rt != NULL) \ RTFREE((sc)->sc_route4.ro_rt); \ - pool_put(&syn_cache_pool, (sc)); \ + if (callout_is_running(&(sc)->sc_timer)) \ + (sc)->sc_flags |= SCF_DEAD; \ + else \ + pool_put(&syn_cache_pool, (sc)); \ } while (/*CONSTCOND*/0) struct pool syn_cache_pool; /* * We don't estimate RTT with SYNs, so each packet starts with the default * RTT and each timer step has a fixed timeout value. */ #define SYN_CACHE_TIMER_ARM(sc) \ do { \ TCPT_RANGESET((sc)->sc_rxtcur, \ TCPTV_SRTTDFLT * tcp_backoff[(sc)->sc_rxtshift], TCPTV_MIN, \ TCPTV_REXMTMAX); \ callout_reset(&(sc)->sc_timer, \ (sc)->sc_rxtcur * (hz / PR_SLOWHZ), syn_cache_timer, (sc)); \ } while (/*CONSTCOND*/0) #define SYN_CACHE_TIMESTAMP(sc) (tcp_now - (sc)->sc_timebase) void @@ -3012,40 +3015,48 @@ TAILQ_INSERT_TAIL(&scp->sch_bucket, sc, sc_bucketq); scp->sch_length++; syn_cache_count++; tcpstat.tcps_sc_added++; splx(s); } /* * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted. * If we have retransmitted an entry the maximum number of times, expire * that entry. */ void syn_cache_timer(void *arg) { struct syn_cache *sc = arg; int s; s = splsoftnet(); + + callout_ack(&sc->sc_timer); + if (__predict_false(sc->sc_flags & SCF_DEAD)) { + tcpstat.tcps_sc_delayed_free++; + pool_put(&syn_cache_pool, sc); + splx(s); + return; + } if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) { /* Drop it -- too many retransmissions. */ goto dropit; } /* * Compute the total amount of time this entry has * been on a queue. If this entry has been on longer * than the keep alive timer would allow, expire it. */ sc->sc_rxttot += sc->sc_rxtcur; if (sc->sc_rxttot >= TCPTV_KEEP_INIT) goto dropit; tcpstat.tcps_sc_retransmitted++; (void) syn_cache_respond(sc, NULL); /* Advance the timer back-off. */ sc->sc_rxtshift++; Index: sys/netinet/tcp_subr.c =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/sys/netinet/tcp_subr.c,v retrieving revision 1.139 diff -u -u -2 -0 -r1.139 tcp_subr.c --- sys/netinet/tcp_subr.c 1 Mar 2003 04:40:28 -0000 1.139 +++ sys/netinet/tcp_subr.c 17 Mar 2003 20:48:25 -0000 @@ -1004,40 +1004,62 @@ #ifdef INET6 if (tp->t_in6pcb) so = tp->t_in6pcb->in6p_socket; #endif if (!so) return NULL; if (TCPS_HAVERCVDSYN(tp->t_state)) { tp->t_state = TCPS_CLOSED; (void) tcp_output(tp); tcpstat.tcps_drops++; } else tcpstat.tcps_conndrops++; if (errno == ETIMEDOUT && tp->t_softerror) errno = tp->t_softerror; so->so_error = errno; return (tcp_close(tp)); } /* + * Return whether this tcpcb is marked as dead, indicating + * to the calling timer function that no further action should + * be taken, as we are about to release this tcpcb. The release + * of the storage will be done here if no other timer functions + * are about to be invoked. + */ +int +tcp_isdead(tp) + struct tcpcb *tp; +{ + int dead = (tp->t_flags & TF_DEAD); + + if (dead) { + if (tcp_timers_running(tp)) + return dead; /* not quite there yet -- should count? */ + tcpstat.tcps_delayed_free++; + pool_put(&tcpcb_pool, tp); + } + return dead; +} + +/* * Close a TCP control block: * discard all space held by the tcp * discard internet protocol block * wake up any sleepers */ struct tcpcb * tcp_close(tp) struct tcpcb *tp; { struct inpcb *inp; #ifdef INET6 struct in6pcb *in6p; #endif struct socket *so; #ifdef RTV_RTT struct rtentry *rt; #endif struct route *ro; inp = tp->t_inpcb; @@ -1121,41 +1143,45 @@ rt->rt_rmx.rmx_ssthresh = (rt->rt_rmx.rmx_ssthresh + i) / 2; else rt->rt_rmx.rmx_ssthresh = i; } } #endif /* RTV_RTT */ /* free the reassembly queue, if any */ TCP_REASS_LOCK(tp); (void) tcp_freeq(tp); TCP_REASS_UNLOCK(tp); tcp_canceltimers(tp); TCP_CLEAR_DELACK(tp); syn_cache_cleanup(tp); if (tp->t_template) { m_free(tp->t_template); tp->t_template = NULL; } - pool_put(&tcpcb_pool, tp); + if (tcp_timers_running(tp)) + tp->t_flags |= TF_DEAD; + else + pool_put(&tcpcb_pool, tp); + if (inp) { inp->inp_ppcb = 0; soisdisconnected(so); in_pcbdetach(inp); } #ifdef INET6 else if (in6p) { in6p->in6p_ppcb = 0; soisdisconnected(so); in6_pcbdetach(in6p); } #endif tcpstat.tcps_closed++; return ((struct tcpcb *)0); } int tcp_freeq(tp) struct tcpcb *tp; { Index: sys/netinet/tcp_timer.c =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/sys/netinet/tcp_timer.c,v retrieving revision 1.62 diff -u -u -2 -0 -r1.62 tcp_timer.c --- sys/netinet/tcp_timer.c 3 Feb 2003 23:51:04 -0000 1.62 +++ sys/netinet/tcp_timer.c 13 Mar 2003 21:03:36 -0000 @@ -179,55 +179,80 @@ tcp_timer_init(void) { if (tcp_keepidle == 0) tcp_keepidle = TCPTV_KEEP_IDLE; if (tcp_keepintvl == 0) tcp_keepintvl = TCPTV_KEEPINTVL; if (tcp_keepcnt == 0) tcp_keepcnt = TCPTV_KEEPCNT; if (tcp_maxpersistidle == 0) tcp_maxpersistidle = TCPTV_KEEP_IDLE; if (tcp_delack_ticks == 0) tcp_delack_ticks = TCP_DELACK_TICKS; } /* + * Return how many timers are currently (about to be) running, + * i.e. how many callouts are about to be started. + */ +int +tcp_timers_running(struct tcpcb *tp) +{ + int i; + int count = 0; + + for (i = 0; i < TCPT_NTIMERS; i++) + if (callout_is_running(&tp->t_timer[i])) + count++; + if (callout_is_running(&tp->t_delack_ch)) + count++; + + return count; +} + +/* * Callout to process delayed ACKs for a TCPCB. */ void tcp_delack(void *arg) { struct tcpcb *tp = arg; int s; /* * If tcp_output() wasn't able to transmit the ACK * for whatever reason, it will restart the delayed * ACK callout. */ s = splsoftnet(); + callout_ack(&tp->t_delack_ch); + if (tcp_isdead(tp)) { + splx(s); + return; + } + tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); splx(s); } /* * Tcp protocol timeout routine called every 500 ms. * Updates the timers in all active tcb's and * causes finite state machine actions if timers expire. */ void tcp_slowtimo() { int s; s = splsoftnet(); tcp_maxidle = tcp_keepcnt * tcp_keepintvl; tcp_iss_seq += TCP_ISSINCR; /* increment iss */ tcp_now++; /* for timestamps */ splx(s); @@ -251,40 +276,46 @@ const int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ /* * TCP timer processing. */ void tcp_timer_rexmt(void *arg) { struct tcpcb *tp = arg; uint32_t rto; int s; #ifdef TCP_DEBUG struct socket *so; short ostate; #endif s = splsoftnet(); + callout_ack(&tp->t_timer[TCPT_REXMT]); + if (tcp_isdead(tp)) { + splx(s); + return; + } + #ifdef TCP_DEBUG #ifdef INET if (tp->t_inpcb) so = tp->t_inpcb->inp_socket; #endif #ifdef INET6 if (tp->t_in6pcb) so = tp->t_in6pcb->in6p_socket; #endif ostate = tp->t_state; #endif /* TCP_DEBUG */ /* * Retransmission timer went off. Message has not * been acked within retransmit interval. Back off * to a longer retransmit interval and retransmit one segment. */ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { tp->t_rxtshift = TCP_MAXRXTSHIFT; @@ -398,40 +429,46 @@ if (tp && so->so_options & SO_DEBUG) tcp_trace(TA_USER, ostate, tp, NULL, PRU_SLOWTIMO | (TCPT_REXMT << 8)); #endif splx(s); } void tcp_timer_persist(void *arg) { struct tcpcb *tp = arg; uint32_t rto; int s; #ifdef TCP_DEBUG struct socket *so; short ostate; #endif s = splsoftnet(); + callout_ack(&tp->t_timer[TCPT_PERSIST]); + if (tcp_isdead(tp)) { + splx(s); + return; + } + #ifdef TCP_DEBUG #ifdef INET if (tp->t_inpcb) so = tp->t_inpcb->inp_socket; #endif #ifdef INET6 if (tp->t_in6pcb) so = tp->t_in6pcb->in6p_socket; #endif ostate = tp->t_state; #endif /* TCP_DEBUG */ /* * Persistance timer into zero window. * Force a byte to be output, if possible. */ /* * Hack: if the peer is dead/unreachable, we do not @@ -460,40 +497,46 @@ #ifdef TCP_DEBUG if (tp && so->so_options & SO_DEBUG) tcp_trace(TA_USER, ostate, tp, NULL, PRU_SLOWTIMO | (TCPT_PERSIST << 8)); #endif splx(s); } void tcp_timer_keep(void *arg) { struct tcpcb *tp = arg; struct socket *so = NULL; /* Quell compiler warning */ int s; #ifdef TCP_DEBUG short ostate; #endif s = splsoftnet(); + callout_ack(&tp->t_timer[TCPT_KEEP]); + if (tcp_isdead(tp)) { + splx(s); + return; + } + #ifdef TCP_DEBUG ostate = tp->t_state; #endif /* TCP_DEBUG */ /* * Keep-alive timer went off; send something * or drop connection if idle for too long. */ tcpstat.tcps_keeptimeo++; if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) goto dropit; #ifdef INET if (tp->t_inpcb) so = tp->t_inpcb->inp_socket; #endif #ifdef INET6 if (tp->t_in6pcb) so = tp->t_in6pcb->in6p_socket; #endif @@ -541,40 +584,46 @@ splx(s); return; dropit: tcpstat.tcps_keepdrops++; (void) tcp_drop(tp, ETIMEDOUT); splx(s); } void tcp_timer_2msl(void *arg) { struct tcpcb *tp = arg; int s; #ifdef TCP_DEBUG struct socket *so; short ostate; #endif s = splsoftnet(); + + callout_ack(&tp->t_timer[TCPT_2MSL]); + if (tcp_isdead(tp)) { + splx(s); + return; + } #ifdef TCP_DEBUG #ifdef INET if (tp->t_inpcb) so = tp->t_inpcb->inp_socket; #endif #ifdef INET6 if (tp->t_in6pcb) so = tp->t_in6pcb->in6p_socket; #endif ostate = tp->t_state; #endif /* TCP_DEBUG */ /* * 2 MSL timeout in shutdown went off. If we're closed but * still waiting for peer to close and connection has been idle * too long, or if 2MSL time is up from TIME_WAIT, delete connection * control block. Otherwise, check again in a bit. */ Index: sys/netinet/tcp_var.h =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/sys/netinet/tcp_var.h,v retrieving revision 1.96 diff -u -u -2 -0 -r1.96 tcp_var.h --- sys/netinet/tcp_var.h 1 Mar 2003 04:40:28 -0000 1.96 +++ sys/netinet/tcp_var.h 7 Mar 2003 18:05:11 -0000 @@ -167,40 +167,41 @@ short t_dupacks; /* consecutive dup acks recd */ u_short t_peermss; /* peer's maximum segment size */ u_short t_ourmss; /* our's maximum segment size */ u_short t_segsz; /* current segment size in use */ char t_force; /* 1 if forcing out a byte */ u_int t_flags; #define TF_ACKNOW 0x0001 /* ack peer immediately */ #define TF_DELACK 0x0002 /* ack, but try to delay it */ #define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ #define TF_NOOPT 0x0008 /* don't use tcp options */ #define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ #define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ #define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ #define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ #define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ #define TF_SYN_REXMT 0x0400 /* rexmit timer fired on SYN */ #define TF_WILL_SACK 0x0800 /* try to use SACK */ #define TF_CANT_TXSACK 0x1000 /* other side said I could not SACK */ #define TF_IGNR_RXSACK 0x2000 /* ignore received SACK blocks */ #define TF_REASSEMBLING 0x4000 /* we're busy reassembling */ +#define TF_DEAD 0x8000 /* dead and to-be-released */ struct mbuf *t_template; /* skeletal packet for transmit */ struct inpcb *t_inpcb; /* back pointer to internet pcb */ struct in6pcb *t_in6pcb; /* back pointer to internet pcb */ struct callout t_delack_ch; /* delayed ACK callout */ /* * The following fields are used as in the protocol specification. * See RFC783, Dec. 1981, page 21. */ /* send sequence variables */ tcp_seq snd_una; /* send unacknowledged */ tcp_seq snd_nxt; /* send next */ tcp_seq snd_up; /* send urgent pointer */ tcp_seq snd_wl1; /* window update seg seq number */ tcp_seq snd_wl2; /* window update seg ack number */ tcp_seq iss; /* initial send sequence number */ u_long snd_wnd; /* send window */ tcp_seq snd_recover; /* for use in fast recovery */ /* receive sequence variables */ @@ -393,40 +394,41 @@ #define sc_route4 sc_route_u.route4 #ifdef INET6 #define sc_route6 sc_route_u.route6 #endif long sc_win; /* advertised window */ int sc_bucketidx; /* our bucket index */ u_int32_t sc_hash; u_int32_t sc_timestamp; /* timestamp from SYN */ u_int32_t sc_timebase; /* our local timebase */ union syn_cache_sa sc_src; union syn_cache_sa sc_dst; tcp_seq sc_irs; tcp_seq sc_iss; u_int sc_rxtcur; /* current rxt timeout */ u_int sc_rxttot; /* total time spend on queues */ u_short sc_rxtshift; /* for computing backoff */ u_short sc_flags; #define SCF_UNREACH 0x0001 /* we've had an unreach error */ #define SCF_TIMESTAMP 0x0002 /* peer will do timestamps */ +#define SCF_DEAD 0x0004 /* this entry to be released */ struct mbuf *sc_ipopts; /* IP options */ u_int16_t sc_peermaxseg; u_int16_t sc_ourmaxseg; u_int8_t sc_request_r_scale : 4, sc_requested_s_scale : 4; struct tcpcb *sc_tp; /* tcb for listening socket */ LIST_ENTRY(syn_cache) sc_tpq; /* list of entries by same tp */ }; struct syn_cache_head { TAILQ_HEAD(, syn_cache) sch_bucket; /* bucket entries */ u_short sch_length; /* # entries in bucket */ }; #define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) #ifdef INET6 #define in6totcpcb(ip) ((struct tcpcb *)(ip)->in6p_ppcb) #endif @@ -523,54 +525,56 @@ u_quad_t tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */ u_quad_t tcps_rcvoopack; /* out-of-order packets received */ u_quad_t tcps_rcvoobyte; /* out-of-order bytes received */ u_quad_t tcps_rcvpackafterwin; /* packets with data after window */ u_quad_t tcps_rcvbyteafterwin; /* bytes rcvd after window */ u_quad_t tcps_rcvafterclose; /* packets rcvd after "close" */ u_quad_t tcps_rcvwinprobe; /* rcvd window probe packets */ u_quad_t tcps_rcvdupack; /* rcvd duplicate acks */ u_quad_t tcps_rcvacktoomuch; /* rcvd acks for unsent data */ u_quad_t tcps_rcvackpack; /* rcvd ack packets */ u_quad_t tcps_rcvackbyte; /* bytes acked by rcvd acks */ u_quad_t tcps_rcvwinupd; /* rcvd window update packets */ u_quad_t tcps_pawsdrop; /* segments dropped due to PAWS */ u_quad_t tcps_predack; /* times hdr predict ok for acks */ u_quad_t tcps_preddat; /* times hdr predict ok for data pkts */ u_quad_t tcps_pcbhashmiss; /* input packets missing pcb hash */ u_quad_t tcps_noport; /* no socket on port */ u_quad_t tcps_badsyn; /* received ack for which we have no SYN in compressed state */ + u_quad_t tcps_delayed_free; /* delayed pool_put() of tcpcb */ /* These statistics deal with the SYN cache. */ u_quad_t tcps_sc_added; /* # of entries added */ u_quad_t tcps_sc_completed; /* # of connections completed */ u_quad_t tcps_sc_timed_out; /* # of entries timed out */ u_quad_t tcps_sc_overflowed; /* # dropped due to overflow */ u_quad_t tcps_sc_reset; /* # dropped due to RST */ u_quad_t tcps_sc_unreach; /* # dropped due to ICMP unreach */ u_quad_t tcps_sc_bucketoverflow;/* # dropped due to bucket overflow */ u_quad_t tcps_sc_aborted; /* # of entries aborted (no mem) */ u_quad_t tcps_sc_dupesyn; /* # of duplicate SYNs received */ u_quad_t tcps_sc_dropped; /* # of SYNs dropped (no route/mem) */ u_quad_t tcps_sc_collisions; /* # of hash collisions */ u_quad_t tcps_sc_retransmitted; /* # of retransmissions */ + u_quad_t tcps_sc_delayed_free; /* # of delayed pool_put()s */ u_quad_t tcps_selfquench; /* # of ENOBUFS we get on output */ }; /* * Names for TCP sysctl objects. */ #define TCPCTL_RFC1323 1 /* RFC1323 timestamps/scaling */ #define TCPCTL_SENDSPACE 2 /* default send buffer */ #define TCPCTL_RECVSPACE 3 /* default recv buffer */ #define TCPCTL_MSSDFLT 4 /* default seg size */ #define TCPCTL_SYN_CACHE_LIMIT 5 /* max size of comp. state engine */ #define TCPCTL_SYN_BUCKET_LIMIT 6 /* max size of hash bucket */ #if 0 /*obsoleted*/ #define TCPCTL_SYN_CACHE_INTER 7 /* interval of comp. state timer */ #endif #define TCPCTL_INIT_WIN 8 /* initial window */ #define TCPCTL_MSS_IFMTU 9 /* mss from interface, not in_maxmtu */ #define TCPCTL_SACK 10 /* RFC2018 selective acknowledgement */ #define TCPCTL_WSCALE 11 /* RFC1323 window scaling */ @@ -680,42 +684,44 @@ { 1, 0, &tcp_keepidle }, \ { 1, 0, &tcp_keepintvl }, \ { 1, 0, &tcp_keepcnt }, \ { 1, 1, 0, PR_SLOWHZ }, \ { 1, 0, &tcp_do_newreno }, \ { 1, 0, &tcp_log_refused }, \ { 0 }, \ { 1, 0, &tcp_rst_ppslim }, \ { 1, 0, &tcp_delack_ticks }, \ { 1, 0, &tcp_init_win_local }, \ } #ifdef __NO_STRICT_ALIGNMENT #define TCP_HDR_ALIGNED_P(th) 1 #else #define TCP_HDR_ALIGNED_P(th) ((((vaddr_t) (th)) & 3) == 0) #endif int tcp_attach __P((struct socket *)); void tcp_canceltimers __P((struct tcpcb *)); +int tcp_timers_running __P((struct tcpcb*)); struct tcpcb * tcp_close __P((struct tcpcb *)); +int tcp_isdead __P((struct tcpcb *)); #ifdef INET6 void tcp6_ctlinput __P((int, struct sockaddr *, void *)); #endif void *tcp_ctlinput __P((int, struct sockaddr *, void *)); int tcp_ctloutput __P((int, struct socket *, int, int, struct mbuf **)); struct tcpcb * tcp_disconnect __P((struct tcpcb *)); struct tcpcb * tcp_drop __P((struct tcpcb *, int)); void tcp_dooptions __P((struct tcpcb *, u_char *, int, struct tcphdr *, struct tcp_opt_info *)); void tcp_drain __P((void)); #ifdef INET6 void tcp6_drain __P((void)); #endif void tcp_established __P((struct tcpcb *)); void tcp_init __P((void)); #ifdef INET6 int tcp6_input __P((struct mbuf **, int *, int)); #endif Index: usr.bin/netstat/inet.c =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/usr.bin/netstat/inet.c,v retrieving revision 1.54 diff -u -u -2 -0 -r1.54 inet.c --- usr.bin/netstat/inet.c 4 Feb 2003 01:22:08 -0000 1.54 +++ usr.bin/netstat/inet.c 13 Mar 2003 21:05:08 -0000 @@ -246,69 +246,72 @@ p(tcps_pawsdrop, "\t\t%llu old duplicate packet%s\n"); p2(tcps_rcvpartduppack, tcps_rcvpartdupbyte, "\t\t%llu packet%s with some dup. data (%llu byte%s duped)\n"); p2(tcps_rcvoopack, tcps_rcvoobyte, "\t\t%llu out-of-order packet%s (%llu byte%s)\n"); p2(tcps_rcvpackafterwin, tcps_rcvbyteafterwin, "\t\t%llu packet%s (%llu byte%s) of data after window\n"); p(tcps_rcvwinprobe, "\t\t%llu window probe%s\n"); p(tcps_rcvwinupd, "\t\t%llu window update packet%s\n"); p(tcps_rcvafterclose, "\t\t%llu packet%s received after close\n"); p(tcps_rcvbadsum, "\t\t%llu discarded for bad checksum%s\n"); p(tcps_rcvbadoff, "\t\t%llu discarded for bad header offset field%s\n"); ps(tcps_rcvshort, "\t\t%llu discarded because packet too short\n"); p(tcps_connattempt, "\t%llu connection request%s\n"); p(tcps_accepts, "\t%llu connection accept%s\n"); p(tcps_connects, "\t%llu connection%s established (including accepts)\n"); p2(tcps_closed, tcps_drops, "\t%llu connection%s closed (including %llu drop%s)\n"); p(tcps_conndrops, "\t%llu embryonic connection%s dropped\n"); + p(tcps_delayed_free, "\t%llu delayed free%s of tcpcb\n"); p2(tcps_rttupdated, tcps_segstimed, "\t%llu segment%s updated rtt (of %llu attempt%s)\n"); p(tcps_rexmttimeo, "\t%llu retransmit timeout%s\n"); p(tcps_timeoutdrop, "\t\t%llu connection%s dropped by rexmit timeout\n"); p2(tcps_persisttimeo, tcps_persistdrops, "\t%llu persist timeout%s (resulting in %llu dropped " "connection%s)\n"); p(tcps_keeptimeo, "\t%llu keepalive timeout%s\n"); p(tcps_keepprobe, "\t\t%llu keepalive probe%s sent\n"); p(tcps_keepdrops, "\t\t%llu connection%s dropped by keepalive\n"); p(tcps_predack, "\t%llu correct ACK header prediction%s\n"); p(tcps_preddat, "\t%llu correct data packet header prediction%s\n"); p3(tcps_pcbhashmiss, "\t%llu PCB hash miss%s\n"); ps(tcps_noport, "\t%llu dropped due to no socket\n"); p(tcps_connsdrained, "\t%llu connection%s drained due to memory " "shortage\n"); p(tcps_pmtublackhole, "\t%llu PMTUD blackhole%s detected\n"); p(tcps_badsyn, "\t%llu bad connection attempt%s\n"); ps(tcps_sc_added, "\t%llu SYN cache entries added\n"); p(tcps_sc_collisions, "\t\t%llu hash collision%s\n"); ps(tcps_sc_completed, "\t\t%llu completed\n"); ps(tcps_sc_aborted, "\t\t%llu aborted (no space to build PCB)\n"); ps(tcps_sc_timed_out, "\t\t%llu timed out\n"); ps(tcps_sc_overflowed, "\t\t%llu dropped due to overflow\n"); ps(tcps_sc_bucketoverflow, "\t\t%llu dropped due to bucket overflow\n"); ps(tcps_sc_reset, "\t\t%llu dropped due to RST\n"); ps(tcps_sc_unreach, "\t\t%llu dropped due to ICMP unreachable\n"); + ps(tcps_sc_delayed_free, "\t\t%llu delayed free of SYN cache " + "entries\n"); p(tcps_sc_retransmitted, "\t%llu SYN,ACK%s retransmitted\n"); p(tcps_sc_dupesyn, "\t%llu duplicate SYN%s received for entries " "already in the cache\n"); p(tcps_sc_dropped, "\t%llu SYN%s dropped (no route or no space)\n"); #undef p #undef ps #undef p2 #undef p2s #undef p3 } /* * Dump UDP statistics structure. */ void udp_stats(off, name) u_long off; char *name; { Index: share/man/man9/callout.9 =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/share/man/man9/callout.9,v retrieving revision 1.8 diff -u -u -2 -0 -r1.8 callout.9 --- share/man/man9/callout.9 4 Feb 2003 01:22:36 -0000 1.8 +++ share/man/man9/callout.9 17 Mar 2003 20:54:23 -0000 @@ -26,60 +26,67 @@ .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" .Dd February 3, 2003 .Dt CALLOUT 9 .Os .Sh NAME .Nm callout_init , .Nm callout_reset , .Nm callout_schedule , .Nm callout_setfunc , .Nm callout_stop , +.Nm callout_expired , +.Nm callout_is_running , +.Nm callout_ack , .Nm CALLOUT_INITIALIZER , .Nm CALLOUT_INITIALIZER_SETFUNC .Nd execute a function after a specified length of time .Sh SYNOPSIS .Fd #include \*[Lt]sys/callout.h\*[Gt] .Ft void .Fn "callout_init" "struct callout *c" .Ft void .Fn "callout_reset" "struct callout *c" "int ticks" \ "void (*func)(void *)" "void *arg" .Ft void .Fn "callout_schedule" "struct callout *c" "int ticks" .Ft void .Fn "callout_setfunc" "struct callout *c" "void (*func)(void *)" "void *arg" .Ft void .Fn "callout_stop" "struct callout *c" .Ft int .Fn "callout_pending" "struct callout *c" .Ft int .Fn "callout_expired" "struct callout *c" +.Ft int +.Fn "callout_is_running" "struct callout *c" +.Ft void +.Fn "callout_ack" "struct callout *c" .Fd CALLOUT_INITIALIZER .Pp .Fd CALLOUT_INITIALIZER_SETFUNC(func, arg) .Sh DESCRIPTION The .Nm callout facility provides a mechanism to execute a function at a given time. The timer is based on the hardclock timer which ticks .Dv hz times per second. The function is called at softclock interrupt level. .Pp Clients of the .Nm callout facility are responsible for providing pre-allocated callout structures, or .Dq handles . The .Nm callout facility replaces the historic @@ -100,100 +107,133 @@ the value .Dv CALLOUT_INITIALIZER to them. .Pp The .Fn callout_reset function resets and starts the timer associated with the callout handle .Fa c . When the timer expires after .Fa ticks Ns No /hz seconds, the function specified by .Fa func will be called with the argument .Fa arg . If the timer associated with the callout handle is already running, the callout will simply be rescheduled to execute at the newly specified time. Once the timer is started, the callout handle is marked as .Em PENDING . Once the timer expires, -the handle is marked at +the handle is marked as .Em EXPIRED +and +.Em RUNNING and the .Em PENDING status is cleared. .Pp The .Fn callout_setfunc function initializes the callout handle .Fa c for use and sets the function and argument to .Fa func and .Fa arg respectively. If a callout will always be used with the same function and argument, then .Fn callout_setfunc used in conjunction with .Fn callout_schedule is slightly more efficient than using .Fn callout_init and .Fn callout_reset . If it is inconvenient to call .Fn callout_setfunc , statically-allocated callout handles may be initialized by assigning the value .Dv CALLOUT_INITIALIZER_SETFUNC to them, passing the function and argument to the initializer. .Pp The .Fn callout_stop function stops the timer associated the callout handle .Fa c . The -.Em PENDING +.Em PENDING , +.Em EXPIRED , +and +.Em RUNNING status for the callout handle is cleared. -The -.Em EXPIRED -status is not affected. It is safe to call .Fn callout_stop on a callout handle that is not pending, so long as it is initialized. .Pp The .Fn callout_pending function tests the .Em PENDING status of the callout handle .Fa c . A .Em PENDING callout is one that has been started and whose function has not yet been called. Note that it is possible for a callout's timer to have expired without its function being called if interrupt level has not dropped low enough to let softclock interrupts through. Note that it is only safe to test .Em PENDING status when at softclock interrupt level or higher. .Pp The .Fn callout_expired function tests to see if the callout's timer has expired and its function called. +.Pp +The +.Fn callout_is_running +function tests to see if the callout's function is being called. +For this to work, the callout function will have to use the +.Fn callout_ack +function to clear this flag after raising the priority level as +appropriate. +Since the priority is lowered prior to invocation of the callout +function, other pending higher-priority code may run before the +callout function is actually invoked. +This may create a race condition if this higher-priority code +deallocates storage containing one or more callout structures whose +callout functions are about to be invoked. +In such cases one technique to prevent references to deallocated +storage would be to mark the data structure and defer deallocation +until the callout function runs. +.Pp +The +.Fn callout_ack +function clears the +.Em RUNNING +state in the callout handle +.Em c . +This is used in situations where it is necessary to protect against +the race condition described under +.Fn callout_is_running . +The +.Fn callout_ack +function would typically be called in the callout function after +raising the priority level as appropriate. .Sh SEE ALSO .Xr hz 9 .Sh HISTORY The .Nm callout facility was implemented by Artur Grabowski and Thomas Nordin, based on the work of G. Varghese and A. Lauck, described in the paper Hashed and Hierarchical Timing Wheels: Data Structures for the Efficient Implementation of a Timer Facility in the Proceedings of the 11th ACM Annual Symposium on Operating System Principles, Austin, Texas, November 1987. It was adapted to the .Nx kernel by Jason R. Thorpe.