diff options
author | John Baldwin <jhb@FreeBSD.org> | 2011-04-29 15:40:12 +0000 |
---|---|---|
committer | John Baldwin <jhb@FreeBSD.org> | 2011-04-29 15:40:12 +0000 |
commit | 672dc4aea2e764d32069892353809c67b402effc (patch) | |
tree | 0fae3721504af39587a4b5740b6628dc072aab74 /sys | |
parent | 98be0dfebdc49a0ff7f4eceff40785c421720dde (diff) | |
download | src-672dc4aea2e764d32069892353809c67b402effc.tar.gz src-672dc4aea2e764d32069892353809c67b402effc.zip |
TCP reuses t_rxtshift to determine the backoff timer used for both the
persist state and the retransmit timer. However, the code that implements
"bad retransmit recovery" only checks t_rxtshift to see if an ACK has been
received in during the first retransmit timeout window. As a result, if
ticks has wrapped over to a negative value and a socket is in the persist
state, it can incorrectly treat an ACK from the remote peer as a
"bad retransmit recovery" and restore saved values such as snd_ssthresh and
snd_cwnd. However, if the socket has never had a retransmit timeout, then
these saved values will be zero, so snd_ssthresh and snd_cwnd will be set
to 0.
If the socket is in fast recovery (this can be caused by excessive
duplicate ACKs such as those fixed by 220794), then each ACK that arrives
triggers either NewReno or SACK partial ACK handling which clamps snd_cwnd
to be no larger than snd_ssthresh. In effect, the socket's send window
is permamently stuck at 0 even though the remote peer is advertising a
much larger window and pending data is only sent via TCP window probes
(so one byte every few seconds).
Fix this by adding a new TCP pcb flag (TF_PREVVALID) that indicates that
the various snd_*_prev fields in the pcb are valid and only perform
"bad retransmit recovery" if this flag is set in the pcb. The flag is set
on the first retransmit timeout that occurs and is cleared on subsequent
retransmit timeouts or when entering the persist state.
Reviewed by: bz
MFC after: 2 weeks
Notes
Notes:
svn path=/head/; revision=221209
Diffstat (limited to 'sys')
-rw-r--r-- | sys/netinet/tcp_input.c | 5 | ||||
-rw-r--r-- | sys/netinet/tcp_output.c | 1 | ||||
-rw-r--r-- | sys/netinet/tcp_timer.c | 4 | ||||
-rw-r--r-- | sys/netinet/tcp_var.h | 1 |
4 files changed, 9 insertions, 2 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index cf1daa8c73c6..30e84296afec 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -420,6 +420,7 @@ cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type) if (tp->t_flags & TF_WASCRECOVERY) ENTER_CONGRECOVERY(tp->t_flags); tp->snd_nxt = tp->snd_max; + tp->t_flags &= ~TF_PREVVALID; tp->t_badrxtwin = 0; break; } @@ -1575,6 +1576,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, * "bad retransmit" recovery. */ if (tp->t_rxtshift == 1 && + tp->t_flags & TF_PREVVALID && (int)(ticks - tp->t_badrxtwin) < 0) { cc_cong_signal(tp, th, CC_RTO_ERR); } @@ -2521,7 +2523,8 @@ process_ACK: * original cwnd and ssthresh, and proceed to transmit where * we left off. */ - if (tp->t_rxtshift == 1 && (int)(ticks - tp->t_badrxtwin) < 0) + if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID && + (int)(ticks - tp->t_badrxtwin) < 0) cc_cong_signal(tp, th, CC_RTO_ERR); /* diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index dc2e5ca23f07..f6488a36090f 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1346,6 +1346,7 @@ tcp_setpersist(struct tcpcb *tp) int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; int tt; + tp->t_flags &= ~TF_PREVVALID; if (tcp_timer_active(tp, TT_REXMT)) panic("tcp_setpersist: retransmit pending"); /* diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 6d0fa1c559d0..5c2c5c2862c8 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -524,7 +524,9 @@ tcp_timer_rexmt(void * xtp) else tp->t_flags &= ~TF_WASCRECOVERY; tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); - } + tp->t_flags |= TF_PREVVALID; + } else + tp->t_flags &= ~TF_PREVVALID; TCPSTAT_INC(tcps_rexmttimeo); if (tp->t_state == TCPS_SYN_SENT) rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 5d92d870da5d..e8189b020357 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -224,6 +224,7 @@ struct tcpcb { #define TF_NEEDSYN 0x000400 /* send SYN (implicit state) */ #define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */ #define TF_NOPUSH 0x001000 /* don't push */ +#define TF_PREVVALID 0x002000 /* saved values for bad rxmit valid */ #define TF_MORETOCOME 0x010000 /* More data to be appended to sock */ #define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */ #define TF_LASTIDLE 0x040000 /* connection was previously idle */ |