From a2b33c9a7a952ff35d5061ae1dc76980d40320d3 Mon Sep 17 00:00:00 2001 From: Randall Stewart Date: Mon, 10 Apr 2023 16:33:56 -0400 Subject: tcp: Rack - in the absence of LRO fixed rate pacing (loopback or interfaces with no LRO) does not work correctly. Rack is capable of fixed rate or dynamic rate pacing. Both of these can get mixed up when LRO is not available. This is because LRO will hold off waking up the tcp connection to processing the inbound packets until the pacing timer is up. Without LRO the pacing only sort-of works. Sometimes we pace correctly, other times not so much. This set of changes will make it so pacing works properly in the absence of LRO. Reviewed by: tuexen Sponsored by: Netflix Inc Differential Revision:https://reviews.freebsd.org/D39494 --- sys/netinet/tcp_stacks/rack.c | 54 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index a1dda9889549..1bab253818b5 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -16433,6 +16433,9 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb return (0); } +#define TCP_LRO_TS_OPTION \ + ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | \ + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP) static int rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, @@ -16458,6 +16461,8 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, struct tcp_rack *rack; struct rack_sendmap *rsm; int32_t prev_state = 0; + int no_output = 0; + int slot_remaining = 0; #ifdef TCP_ACCOUNTING int ack_val_set = 0xf; #endif @@ -16480,6 +16485,43 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, */ rack_deferred_init(tp, rack); } + /* + * Check to see if we need to skip any output plans. This + * can happen in the non-LRO path where we are pacing and + * must process the ack coming in but need to defer sending + * anything becase a pacing timer is running. + */ + us_cts = tcp_tv_to_usectick(tv); + if ((rack->rc_always_pace == 1) && + (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) && + (TSTMP_LT(us_cts, rack->r_ctl.rc_last_output_to))) { + /* + * Ok conditions are right for queuing the packets + * but we do have to check the flags in the inp, it + * could be, if a sack is present, we want to be awoken and + * so should process the packets. + */ + slot_remaining = rack->r_ctl.rc_last_output_to - us_cts; + if (rack->rc_inp->inp_flags2 & INP_DONT_SACK_QUEUE) { + no_output = 1; + } else { + /* + * If there is no options, or just a + * timestamp option, we will want to queue + * the packets. This is the same that LRO does + * and will need to change with accurate ECN. + */ + uint32_t *ts_ptr; + int optlen; + + optlen = (th->th_off << 2) - sizeof(struct tcphdr); + ts_ptr = (uint32_t *)(th + 1); + if ((optlen == 0) || + ((optlen == TCPOLEN_TSTAMP_APPA) && + (*ts_ptr == TCP_LRO_TS_OPTION))) + no_output = 1; + } + } if (m->m_flags & M_ACKCMP) { /* * All compressed ack's are ack's by definition so @@ -16914,7 +16956,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, } } #endif - if (nxt_pkt == 0) { + if ((nxt_pkt == 0) && (no_output == 0)) { if ((rack->r_wanted_output != 0) || (rack->r_fast_output != 0)) { do_output_now: if (tcp_output(tp) < 0) { @@ -16927,6 +16969,16 @@ do_output_now: } rack_start_hpts_timer(rack, tp, cts, 0, 0, 0); rack_free_trim(rack); + } else if ((no_output == 1) && + (nxt_pkt == 0) && + (tcp_in_hpts(rack->rc_inp) == 0)) { + /* + * We are not in hpts and we had a pacing timer up. Use + * the remaining time (slot_remaining) to restart the timer. + */ + KASSERT ((slot_remaining != 0), ("slot remaining is zero for rack:%p tp:%p", rack, tp)); + rack_start_hpts_timer(rack, tp, cts, slot_remaining, 0, 0); + rack_free_trim(rack); } /* Update any rounds needed */ if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) -- cgit v1.2.3