diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/conf/files | 1 | ||||
-rw-r--r-- | sys/conf/options | 1 | ||||
-rw-r--r-- | sys/netinet/sctp.h | 12 | ||||
-rw-r--r-- | sys/netinet/sctp_asconf.c | 2 | ||||
-rw-r--r-- | sys/netinet/sctp_cc_functions.c | 1631 | ||||
-rw-r--r-- | sys/netinet/sctp_cc_functions.h | 107 | ||||
-rw-r--r-- | sys/netinet/sctp_constants.h | 7 | ||||
-rw-r--r-- | sys/netinet/sctp_indata.c | 490 | ||||
-rw-r--r-- | sys/netinet/sctp_input.c | 236 | ||||
-rw-r--r-- | sys/netinet/sctp_os.h | 1 | ||||
-rw-r--r-- | sys/netinet/sctp_output.c | 201 | ||||
-rw-r--r-- | sys/netinet/sctp_pcb.c | 52 | ||||
-rw-r--r-- | sys/netinet/sctp_pcb.h | 4 | ||||
-rw-r--r-- | sys/netinet/sctp_structs.h | 85 | ||||
-rw-r--r-- | sys/netinet/sctp_sysctl.c | 11 | ||||
-rw-r--r-- | sys/netinet/sctp_sysctl.h | 30 | ||||
-rw-r--r-- | sys/netinet/sctp_timer.c | 271 | ||||
-rw-r--r-- | sys/netinet/sctp_timer.h | 2 | ||||
-rw-r--r-- | sys/netinet/sctp_usrreq.c | 131 | ||||
-rw-r--r-- | sys/netinet/sctp_var.h | 47 | ||||
-rw-r--r-- | sys/netinet/sctputil.c | 85 | ||||
-rw-r--r-- | sys/netinet6/sctp6_usrreq.c | 6 |
22 files changed, 2620 insertions, 793 deletions
diff --git a/sys/conf/files b/sys/conf/files index 399b339a3c63..8d81bae11172 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1851,6 +1851,7 @@ netinet/raw_ip.c optional inet netinet/sctp_asconf.c optional inet inet6 sctp netinet/sctp_auth.c optional inet inet6 sctp netinet/sctp_bsd_addr.c optional inet inet6 sctp +netinet/sctp_cc_functions.c optional inet inet6 sctp netinet/sctp_crc32.c optional inet inet6 sctp netinet/sctp_indata.c optional inet inet6 sctp netinet/sctp_input.c optional inet inet6 sctp diff --git a/sys/conf/options b/sys/conf/options index 65dadaa94cf3..281c40d2bf27 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -396,7 +396,6 @@ XBONEHACK # SCTP opt_sctp.h SCTP_DEBUG opt_sctp.h # Enable debug printfs -SCTP_HIGH_SPEED opt_sctp.h # Enable Sally Floyds HS TCP CC SCTP_WITH_NO_CSUM opt_sctp.h # Use this at your peril SCTP_LOCK_LOGGING opt_sctp.h # Log to KTR lock activity SCTP_MBUF_LOGGING opt_sctp.h # Log to KTR general mbuf aloc/free diff --git a/sys/netinet/sctp.h b/sys/netinet/sctp.h index b367f199b9e0..642e22759d33 100644 --- a/sys/netinet/sctp.h +++ b/sys/netinet/sctp.h @@ -153,6 +153,8 @@ __attribute__((packed)); /* CMT ON/OFF socket option */ #define SCTP_CMT_ON_OFF 0x00001200 #define SCTP_CMT_USE_DAC 0x00001201 +/* JRS - Pluggable Congestion Control Socket option */ +#define SCTP_PLUGGABLE_CC 0x00001202 /* read only */ #define SCTP_GET_SNDBUF_USE 0x00001101 @@ -238,6 +240,16 @@ __attribute__((packed)); /* Debug things that need to be purged */ #define SCTP_SET_INITIAL_DBG_SEQ 0x00009f00 +/* JRS - Supported congestion control modules for pluggable + * congestion control + */ +/* Standard TCP Congestion Control */ +#define SCTP_CC_RFC2581 0x00000000 +/* High Speed TCP Congestion Control (Floyd) */ +#define SCTP_CC_HSTCP 0x00000001 +/* HTCP Congestion Control */ +#define SCTP_CC_HTCP 0x00000002 + /* fragment interleave constants * setting must be one of these or diff --git a/sys/netinet/sctp_asconf.c b/sys/netinet/sctp_asconf.c index b559b19a369f..6235cfd57b86 100644 --- a/sys/netinet/sctp_asconf.c +++ b/sys/netinet/sctp_asconf.c @@ -1798,7 +1798,7 @@ sctp_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr, * cwnd/rto to start as if its a new * address? */ - sctp_set_initial_cc_param(stcb, net); + stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net); net->RTO = 0; } diff --git a/sys/netinet/sctp_cc_functions.c b/sys/netinet/sctp_cc_functions.c new file mode 100644 index 000000000000..3bc3b104afc9 --- /dev/null +++ b/sys/netinet/sctp_cc_functions.c @@ -0,0 +1,1631 @@ +/*- + * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * a) Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * b) Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * c) Neither the name of Cisco Systems, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <netinet/sctp_os.h> +#include <netinet/sctp_var.h> +#include <netinet/sctp_sysctl.h> +#include <netinet/sctp_pcb.h> +#include <netinet/sctp_header.h> +#include <netinet/sctputil.h> +#include <netinet/sctp_output.h> +#include <netinet/sctp_input.h> +#include <netinet/sctp_indata.h> +#include <netinet/sctp_uio.h> +#include <netinet/sctp_timer.h> +#include <netinet/sctp_auth.h> +#include <netinet/sctp_asconf.h> +#include <netinet/sctp_cc_functions.h> +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); +void +sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + /* + * We take the max of the burst limit times a MTU or the + * INITIAL_CWND. We then limit this to 4 MTU's of sending. + */ + net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); + /* we always get at LEAST 2 MTU's */ + if (net->cwnd < (2 * net->mtu)) { + net->cwnd = 2 * net->mtu; + } + net->ssthresh = stcb->asoc.peers_rwnd; + + if (sctp_logging_level & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { + sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); + } +} + +void +sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, + struct sctp_association *asoc) +{ + struct sctp_nets *net; + + /*- + * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) && + * (net->fast_retran_loss_recovery == 0))) + */ + TAILQ_FOREACH(net, &asoc->nets, sctp_next) { + if ((asoc->fast_retran_loss_recovery == 0) || (sctp_cmt_on_off == 1)) { + /* out of a RFC2582 Fast recovery window? */ + if (net->net_ack > 0) { + /* + * per section 7.2.3, are there any + * destinations that had a fast retransmit + * to them. If so what we need to do is + * adjust ssthresh and cwnd. + */ + struct sctp_tmit_chunk *lchk; + int old_cwnd = net->cwnd; + + net->ssthresh = net->cwnd / 2; + if (net->ssthresh < (net->mtu * 2)) { + net->ssthresh = 2 * net->mtu; + } + net->cwnd = net->ssthresh; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), + SCTP_CWND_LOG_FROM_FR); + } + lchk = TAILQ_FIRST(&asoc->send_queue); + + net->partial_bytes_acked = 0; + /* Turn on fast recovery window */ + asoc->fast_retran_loss_recovery = 1; + if (lchk == NULL) { + /* Mark end of the window */ + asoc->fast_recovery_tsn = asoc->sending_seq - 1; + } else { + asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; + } + + /* + * CMT fast recovery -- per destination + * recovery variable. + */ + net->fast_retran_loss_recovery = 1; + + if (lchk == NULL) { + /* Mark end of the window */ + net->fast_recovery_tsn = asoc->sending_seq - 1; + } else { + net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; + } + + /* + * Disable Nonce Sum Checking and store the + * resync tsn + */ + asoc->nonce_sum_check = 0; + asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; + + sctp_timer_stop(SCTP_TIMER_TYPE_SEND, + stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); + sctp_timer_start(SCTP_TIMER_TYPE_SEND, + stcb->sctp_ep, stcb, net); + } + } else if (net->net_ack > 0) { + /* + * Mark a peg that we WOULD have done a cwnd + * reduction but RFC2582 prevented this action. + */ + SCTP_STAT_INCR(sctps_fastretransinrtt); + } + } +} + +void +sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, + struct sctp_association *asoc, + int accum_moved, int reneged_all, int will_exit) +{ + struct sctp_nets *net; + + /******************************/ + /* update cwnd and Early FR */ + /******************************/ + TAILQ_FOREACH(net, &asoc->nets, sctp_next) { + +#ifdef JANA_CMT_FAST_RECOVERY + /* + * CMT fast recovery code. Need to debug. + */ + if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { + if (compare_with_wrap(asoc->last_acked_seq, + net->fast_recovery_tsn, MAX_TSN) || + (asoc->last_acked_seq == net->fast_recovery_tsn) || + compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || + (net->pseudo_cumack == net->fast_recovery_tsn)) { + net->will_exit_fast_recovery = 1; + } + } +#endif + if (sctp_early_fr) { + /* + * So, first of all do we need to have a Early FR + * timer running? + */ + if (((TAILQ_FIRST(&asoc->sent_queue)) && + (net->ref_count > 1) && + (net->flight_size < net->cwnd)) || + (reneged_all)) { + /* + * yes, so in this case stop it if its + * running, and then restart it. Reneging + * all is a special case where we want to + * run the Early FR timer and then force the + * last few unacked to be sent, causing us + * to illicit a sack with gaps to force out + * the others. + */ + if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { + SCTP_STAT_INCR(sctps_earlyfrstpidsck2); + sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, + SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); + } + SCTP_STAT_INCR(sctps_earlyfrstrid); + sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); + } else { + /* No, stop it if its running */ + if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { + SCTP_STAT_INCR(sctps_earlyfrstpidsck3); + sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, + SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); + } + } + } + /* if nothing was acked on this destination skip it */ + if (net->net_ack == 0) { + if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { + sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); + } + continue; + } + if (net->net_ack2 > 0) { + /* + * Karn's rule applies to clearing error count, this + * is optional. + */ + net->error_count = 0; + if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == + SCTP_ADDR_NOT_REACHABLE) { + /* addr came good */ + net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; + net->dest_state |= SCTP_ADDR_REACHABLE; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, + SCTP_RECEIVED_SACK, (void *)net); + /* now was it the primary? if so restore */ + if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { + (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); + } + } + /* + * JRS 5/14/07 - If CMT PF is on and the destination + * is in PF state, set the destination to active + * state and set the cwnd to one or two MTU's based + * on whether PF1 or PF2 is being used. + * + * Should we stop any running T3 timer here? + */ + if (sctp_cmt_pf && ((net->dest_state & SCTP_ADDR_PF) == + SCTP_ADDR_PF)) { + net->dest_state &= ~SCTP_ADDR_PF; + net->cwnd = net->mtu * sctp_cmt_pf; + SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", + net, net->cwnd); + /* + * Since the cwnd value is explicitly set, + * skip the code that updates the cwnd + * value. + */ + goto skip_cwnd_update; + } + } +#ifdef JANA_CMT_FAST_RECOVERY + /* + * CMT fast recovery code + */ + /* + * if (sctp_cmt_on_off == 1 && + * net->fast_retran_loss_recovery && + * net->will_exit_fast_recovery == 0) { @@@ Do something } + * else if (sctp_cmt_on_off == 0 && + * asoc->fast_retran_loss_recovery && will_exit == 0) { + */ +#endif + + if (asoc->fast_retran_loss_recovery && will_exit == 0 && sctp_cmt_on_off == 0) { + /* + * If we are in loss recovery we skip any cwnd + * update + */ + goto skip_cwnd_update; + } + /* + * CMT: CUC algorithm. Update cwnd if pseudo-cumack has + * moved. + */ + if (accum_moved || (sctp_cmt_on_off && net->new_pseudo_cumack)) { + /* If the cumulative ack moved we can proceed */ + if (net->cwnd <= net->ssthresh) { + /* We are in slow start */ + if (net->flight_size + net->net_ack >= + net->cwnd) { + if (net->net_ack > (net->mtu * sctp_L2_abc_variable)) { + net->cwnd += (net->mtu * sctp_L2_abc_variable); + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, net->mtu, + SCTP_CWND_LOG_FROM_SS); + } + } else { + net->cwnd += net->net_ack; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, net->net_ack, + SCTP_CWND_LOG_FROM_SS); + } + } + } else { + unsigned int dif; + + dif = net->cwnd - (net->flight_size + + net->net_ack); + if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { + sctp_log_cwnd(stcb, net, net->net_ack, + SCTP_CWND_LOG_NOADV_SS); + } + } + } else { + /* We are in congestion avoidance */ + if (net->flight_size + net->net_ack >= + net->cwnd) { + /* + * add to pba only if we had a + * cwnd's worth (or so) in flight OR + * the burst limit was applied. + */ + net->partial_bytes_acked += + net->net_ack; + + /* + * Do we need to increase (if pba is + * > cwnd)? + */ + if (net->partial_bytes_acked >= + net->cwnd) { + if (net->cwnd < + net->partial_bytes_acked) { + net->partial_bytes_acked -= + net->cwnd; + } else { + net->partial_bytes_acked = + 0; + } + net->cwnd += net->mtu; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, net->mtu, + SCTP_CWND_LOG_FROM_CA); + } + } else { + if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { + sctp_log_cwnd(stcb, net, net->net_ack, + SCTP_CWND_LOG_NOADV_CA); + } + } + } else { + unsigned int dif; + + if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { + sctp_log_cwnd(stcb, net, net->net_ack, + SCTP_CWND_LOG_NOADV_CA); + } + dif = net->cwnd - (net->flight_size + + net->net_ack); + } + } + } else { + if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { + sctp_log_cwnd(stcb, net, net->mtu, + SCTP_CWND_LOG_NO_CUMACK); + } + } +skip_cwnd_update: + /* + * NOW, according to Karn's rule do we need to restore the + * RTO timer back? Check our net_ack2. If not set then we + * have a ambiguity.. i.e. all data ack'd was sent to more + * than one place. + */ + if (net->net_ack2) { + /* restore any doubled timers */ + net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; + if (net->RTO < stcb->asoc.minrto) { + net->RTO = stcb->asoc.minrto; + } + if (net->RTO > stcb->asoc.maxrto) { + net->RTO = stcb->asoc.maxrto; + } + } + } +} + +void +sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, + struct sctp_nets *net) +{ + int old_cwnd = net->cwnd; + + net->ssthresh = net->cwnd >> 1; + if (net->ssthresh < (net->mtu << 1)) { + net->ssthresh = (net->mtu << 1); + } + net->cwnd = net->mtu; + /* floor of 1 mtu */ + if (net->cwnd < net->mtu) + net->cwnd = net->mtu; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); + } + net->partial_bytes_acked = 0; +} + +struct sctp_hs_raise_drop { + int32_t cwnd; + int32_t increase; + int32_t drop_percent; +}; + +#define SCTP_HS_TABLE_SIZE 73 + +struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = { + {38, 1, 50}, /* 0 */ + {118, 2, 44}, /* 1 */ + {221, 3, 41}, /* 2 */ + {347, 4, 38}, /* 3 */ + {495, 5, 37}, /* 4 */ + {663, 6, 35}, /* 5 */ + {851, 7, 34}, /* 6 */ + {1058, 8, 33}, /* 7 */ + {1284, 9, 32}, /* 8 */ + {1529, 10, 31}, /* 9 */ + {1793, 11, 30}, /* 10 */ + {2076, 12, 29}, /* 11 */ + {2378, 13, 28}, /* 12 */ + {2699, 14, 28}, /* 13 */ + {3039, 15, 27}, /* 14 */ + {3399, 16, 27}, /* 15 */ + {3778, 17, 26}, /* 16 */ + {4177, 18, 26}, /* 17 */ + {4596, 19, 25}, /* 18 */ + {5036, 20, 25}, /* 19 */ + {5497, 21, 24}, /* 20 */ + {5979, 22, 24}, /* 21 */ + {6483, 23, 23}, /* 22 */ + {7009, 24, 23}, /* 23 */ + {7558, 25, 22}, /* 24 */ + {8130, 26, 22}, /* 25 */ + {8726, 27, 22}, /* 26 */ + {9346, 28, 21}, /* 27 */ + {9991, 29, 21}, /* 28 */ + {10661, 30, 21}, /* 29 */ + {11358, 31, 20}, /* 30 */ + {12082, 32, 20}, /* 31 */ + {12834, 33, 20}, /* 32 */ + {13614, 34, 19}, /* 33 */ + {14424, 35, 19}, /* 34 */ + {15265, 36, 19}, /* 35 */ + {16137, 37, 19}, /* 36 */ + {17042, 38, 18}, /* 37 */ + {17981, 39, 18}, /* 38 */ + {18955, 40, 18}, /* 39 */ + {19965, 41, 17}, /* 40 */ + {21013, 42, 17}, /* 41 */ + {22101, 43, 17}, /* 42 */ + {23230, 44, 17}, /* 43 */ + {24402, 45, 16}, /* 44 */ + {25618, 46, 16}, /* 45 */ + {26881, 47, 16}, /* 46 */ + {28193, 48, 16}, /* 47 */ + {29557, 49, 15}, /* 48 */ + {30975, 50, 15}, /* 49 */ + {32450, 51, 15}, /* 50 */ + {33986, 52, 15}, /* 51 */ + {35586, 53, 14}, /* 52 */ + {37253, 54, 14}, /* 53 */ + {38992, 55, 14}, /* 54 */ + {40808, 56, 14}, /* 55 */ + {42707, 57, 13}, /* 56 */ + {44694, 58, 13}, /* 57 */ + {46776, 59, 13}, /* 58 */ + {48961, 60, 13}, /* 59 */ + {51258, 61, 13}, /* 60 */ + {53677, 62, 12}, /* 61 */ + {56230, 63, 12}, /* 62 */ + {58932, 64, 12}, /* 63 */ + {61799, 65, 12}, /* 64 */ + {64851, 66, 11}, /* 65 */ + {68113, 67, 11}, /* 66 */ + {71617, 68, 11}, /* 67 */ + {75401, 69, 10}, /* 68 */ + {79517, 70, 10}, /* 69 */ + {84035, 71, 10}, /* 70 */ + {89053, 72, 10}, /* 71 */ + {94717, 73, 9} /* 72 */ +}; + +static void +sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + int cur_val, i, indx, incr; + + cur_val = net->cwnd >> 10; + indx = SCTP_HS_TABLE_SIZE - 1; +#ifdef SCTP_DEBUG + printf("HS CC CAlled.\n"); +#endif + if (cur_val < sctp_cwnd_adjust[0].cwnd) { + /* normal mode */ + if (net->net_ack > net->mtu) { + net->cwnd += net->mtu; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS); + } + } else { + net->cwnd += net->net_ack; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS); + } + } + } else { + for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) { + if (cur_val < sctp_cwnd_adjust[i].cwnd) { + indx = i; + break; + } + } + net->last_hs_used = indx; + incr = ((sctp_cwnd_adjust[indx].increase) << 10); + net->cwnd += incr; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS); + } + } +} + +static void +sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + int cur_val, i, indx; + int old_cwnd = net->cwnd; + + cur_val = net->cwnd >> 10; + indx = net->last_hs_used; + if (cur_val < sctp_cwnd_adjust[0].cwnd) { + /* normal mode */ + net->ssthresh = net->cwnd / 2; + if (net->ssthresh < (net->mtu * 2)) { + net->ssthresh = 2 * net->mtu; + } + net->cwnd = net->ssthresh; + } else { + /* drop by the proper amount */ + net->ssthresh = net->cwnd - (int)((net->cwnd / 100) * + sctp_cwnd_adjust[net->last_hs_used].drop_percent); + net->cwnd = net->ssthresh; + /* now where are we */ + indx = net->last_hs_used; + cur_val = net->cwnd >> 10; + /* reset where we are in the table */ + if (cur_val < sctp_cwnd_adjust[0].cwnd) { + /* feel out of hs */ + net->last_hs_used = 0; + } else { + for (i = indx; i >= 1; i--) { + if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) { + break; + } + } + net->last_hs_used = indx; + } + } + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR); + } +} + +void +sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb, + struct sctp_association *asoc) +{ + struct sctp_nets *net; + + /* + * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) && + * (net->fast_retran_loss_recovery == 0))) + */ + TAILQ_FOREACH(net, &asoc->nets, sctp_next) { + if ((asoc->fast_retran_loss_recovery == 0) || (sctp_cmt_on_off == 1)) { + /* out of a RFC2582 Fast recovery window? */ + if (net->net_ack > 0) { + /* + * per section 7.2.3, are there any + * destinations that had a fast retransmit + * to them. If so what we need to do is + * adjust ssthresh and cwnd. + */ + struct sctp_tmit_chunk *lchk; + + sctp_hs_cwnd_decrease(stcb, net); + + lchk = TAILQ_FIRST(&asoc->send_queue); + + net->partial_bytes_acked = 0; + /* Turn on fast recovery window */ + asoc->fast_retran_loss_recovery = 1; + if (lchk == NULL) { + /* Mark end of the window */ + asoc->fast_recovery_tsn = asoc->sending_seq - 1; + } else { + asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; + } + + /* + * CMT fast recovery -- per destination + * recovery variable. + */ + net->fast_retran_loss_recovery = 1; + + if (lchk == NULL) { + /* Mark end of the window */ + net->fast_recovery_tsn = asoc->sending_seq - 1; + } else { + net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; + } + + /* + * Disable Nonce Sum Checking and store the + * resync tsn + */ + asoc->nonce_sum_check = 0; + asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; + + sctp_timer_stop(SCTP_TIMER_TYPE_SEND, + stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); + sctp_timer_start(SCTP_TIMER_TYPE_SEND, + stcb->sctp_ep, stcb, net); + } + } else if (net->net_ack > 0) { + /* + * Mark a peg that we WOULD have done a cwnd + * reduction but RFC2582 prevented this action. + */ + SCTP_STAT_INCR(sctps_fastretransinrtt); + } + } +} + +void +sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, + struct sctp_association *asoc, + int accum_moved, int reneged_all, int will_exit) +{ + struct sctp_nets *net; + + /******************************/ + /* update cwnd and Early FR */ + /******************************/ + TAILQ_FOREACH(net, &asoc->nets, sctp_next) { + +#ifdef JANA_CMT_FAST_RECOVERY + /* + * CMT fast recovery code. Need to debug. + */ + if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { + if (compare_with_wrap(asoc->last_acked_seq, + net->fast_recovery_tsn, MAX_TSN) || + (asoc->last_acked_seq == net->fast_recovery_tsn) || + compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || + (net->pseudo_cumack == net->fast_recovery_tsn)) { + net->will_exit_fast_recovery = 1; + } + } +#endif + if (sctp_early_fr) { + /* + * So, first of all do we need to have a Early FR + * timer running? + */ + if (((TAILQ_FIRST(&asoc->sent_queue)) && + (net->ref_count > 1) && + (net->flight_size < net->cwnd)) || + (reneged_all)) { + /* + * yes, so in this case stop it if its + * running, and then restart it. Reneging + * all is a special case where we want to + * run the Early FR timer and then force the + * last few unacked to be sent, causing us + * to illicit a sack with gaps to force out + * the others. + */ + if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { + SCTP_STAT_INCR(sctps_earlyfrstpidsck2); + sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, + SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); + } + SCTP_STAT_INCR(sctps_earlyfrstrid); + sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); + } else { + /* No, stop it if its running */ + if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { + SCTP_STAT_INCR(sctps_earlyfrstpidsck3); + sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, + SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); + } + } + } + /* if nothing was acked on this destination skip it */ + if (net->net_ack == 0) { + if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { + sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); + } + continue; + } + if (net->net_ack2 > 0) { + /* + * Karn's rule applies to clearing error count, this + * is optional. + */ + net->error_count = 0; + if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == + SCTP_ADDR_NOT_REACHABLE) { + /* addr came good */ + net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; + net->dest_state |= SCTP_ADDR_REACHABLE; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, + SCTP_RECEIVED_SACK, (void *)net); + /* now was it the primary? if so restore */ + if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { + (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); + } + } + /* + * JRS 5/14/07 - If CMT PF is on and the destination + * is in PF state, set the destination to active + * state and set the cwnd to one or two MTU's based + * on whether PF1 or PF2 is being used. + * + * Should we stop any running T3 timer here? + */ + if (sctp_cmt_pf && ((net->dest_state & SCTP_ADDR_PF) == + SCTP_ADDR_PF)) { + net->dest_state &= ~SCTP_ADDR_PF; + net->cwnd = net->mtu * sctp_cmt_pf; + SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", + net, net->cwnd); + /* + * Since the cwnd value is explicitly set, + * skip the code that updates the cwnd + * value. + */ + goto skip_cwnd_update; + } + } +#ifdef JANA_CMT_FAST_RECOVERY + /* + * CMT fast recovery code + */ + /* + * if (sctp_cmt_on_off == 1 && + * net->fast_retran_loss_recovery && + * net->will_exit_fast_recovery == 0) { @@@ Do something } + * else if (sctp_cmt_on_off == 0 && + * asoc->fast_retran_loss_recovery && will_exit == 0) { + */ +#endif + + if (asoc->fast_retran_loss_recovery && will_exit == 0 && sctp_cmt_on_off == 0) { + /* + * If we are in loss recovery we skip any cwnd + * update + */ + goto skip_cwnd_update; + } + /* + * CMT: CUC algorithm. Update cwnd if pseudo-cumack has + * moved. + */ + if (accum_moved || (sctp_cmt_on_off && net->new_pseudo_cumack)) { + /* If the cumulative ack moved we can proceed */ + if (net->cwnd <= net->ssthresh) { + /* We are in slow start */ + if (net->flight_size + net->net_ack >= + net->cwnd) { + + sctp_hs_cwnd_increase(stcb, net); + + } else { + unsigned int dif; + + dif = net->cwnd - (net->flight_size + + net->net_ack); + if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { + sctp_log_cwnd(stcb, net, net->net_ack, + SCTP_CWND_LOG_NOADV_SS); + } + } + } else { + /* We are in congestion avoidance */ + if (net->flight_size + net->net_ack >= + net->cwnd) { + /* + * add to pba only if we had a + * cwnd's worth (or so) in flight OR + * the burst limit was applied. + */ + net->partial_bytes_acked += + net->net_ack; + + /* + * Do we need to increase (if pba is + * > cwnd)? + */ + if (net->partial_bytes_acked >= + net->cwnd) { + if (net->cwnd < + net->partial_bytes_acked) { + net->partial_bytes_acked -= + net->cwnd; + } else { + net->partial_bytes_acked = + 0; + } + net->cwnd += net->mtu; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, net->mtu, + SCTP_CWND_LOG_FROM_CA); + } + } else { + if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { + sctp_log_cwnd(stcb, net, net->net_ack, + SCTP_CWND_LOG_NOADV_CA); + } + } + } else { + unsigned int dif; + + if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { + sctp_log_cwnd(stcb, net, net->net_ack, + SCTP_CWND_LOG_NOADV_CA); + } + dif = net->cwnd - (net->flight_size + + net->net_ack); + } + } + } else { + if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { + sctp_log_cwnd(stcb, net, net->mtu, + SCTP_CWND_LOG_NO_CUMACK); + } + } +skip_cwnd_update: + /* + * NOW, according to Karn's rule do we need to restore the + * RTO timer back? Check our net_ack2. If not set then we + * have a ambiguity.. i.e. all data ack'd was sent to more + * than one place. + */ + if (net->net_ack2) { + /* restore any doubled timers */ + net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; + if (net->RTO < stcb->asoc.minrto) { + net->RTO = stcb->asoc.minrto; + } + if (net->RTO > stcb->asoc.maxrto) { + net->RTO = stcb->asoc.maxrto; + } + } + } +} + +void +sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, + struct sctp_nets *net) +{ + int old_cwnd; + + old_cwnd = net->cwnd; + + SCTP_STAT_INCR(sctps_ecnereducedcwnd); + net->ssthresh = net->cwnd / 2; + if (net->ssthresh < net->mtu) { + net->ssthresh = net->mtu; + /* here back off the timer as well, to slow us down */ + net->RTO <<= 1; + } + net->cwnd = net->ssthresh; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + } +} + +void +sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, + struct sctp_nets *net, struct sctp_pktdrop_chunk *cp, + uint32_t * bottle_bw, uint32_t * on_queue) +{ + uint32_t bw_avail; + int rtt, incr; + int old_cwnd = net->cwnd; + + /* need real RTT for this calc */ + rtt = ((net->lastsa >> 2) + net->lastsv) >> 1; + /* get bottle neck bw */ + *bottle_bw = ntohl(cp->bottle_bw); + /* and whats on queue */ + *on_queue = ntohl(cp->current_onq); + /* + * adjust the on-queue if our flight is more it could be that the + * router has not yet gotten data "in-flight" to it + */ + if (*on_queue < net->flight_size) + *on_queue = net->flight_size; + /* calculate the available space */ + bw_avail = (*bottle_bw * rtt) / 1000; + if (bw_avail > *bottle_bw) { + /* + * Cap the growth to no more than the bottle neck. This can + * happen as RTT slides up due to queues. It also means if + * you have more than a 1 second RTT with a empty queue you + * will be limited to the bottle_bw per second no matter if + * other points have 1/2 the RTT and you could get more + * out... + */ + bw_avail = *bottle_bw; + } + if (*on_queue > bw_avail) { + /* + * No room for anything else don't allow anything else to be + * "added to the fire". + */ + int seg_inflight, seg_onqueue, my_portion; + + net->partial_bytes_acked = 0; + + /* how much are we over queue size? */ + incr = *on_queue - bw_avail; + if (stcb->asoc.seen_a_sack_this_pkt) { + /* + * undo any cwnd adjustment that the sack might have + * made + */ + net->cwnd = net->prev_cwnd; + } + /* Now how much of that is mine? */ + seg_inflight = net->flight_size / net->mtu; + seg_onqueue = *on_queue / net->mtu; + my_portion = (incr * seg_inflight) / seg_onqueue; + + /* Have I made an adjustment already */ + if (net->cwnd > net->flight_size) { + /* + * for this flight I made an adjustment we need to + * decrease the portion by a share our previous + * adjustment. + */ + int diff_adj; + + diff_adj = net->cwnd - net->flight_size; + if (diff_adj > my_portion) + my_portion = 0; + else + my_portion -= diff_adj; + } + /* + * back down to the previous cwnd (assume we have had a sack + * before this packet). minus what ever portion of the + * overage is my fault. + */ + net->cwnd -= my_portion; + + /* we will NOT back down more than 1 MTU */ + if (net->cwnd <= net->mtu) { + net->cwnd = net->mtu; + } + /* force into CA */ + net->ssthresh = net->cwnd - 1; + } else { + /* + * Take 1/4 of the space left or max burst up .. whichever + * is less. + */ + incr = min((bw_avail - *on_queue) >> 2, + stcb->asoc.max_burst * net->mtu); + net->cwnd += incr; + } + if (net->cwnd > bw_avail) { + /* We can't exceed the pipe size */ + net->cwnd = bw_avail; + } + if (net->cwnd < net->mtu) { + /* We always have 1 MTU */ + net->cwnd = net->mtu; + } + if (net->cwnd - old_cwnd != 0) { + /* log only changes */ + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), + SCTP_CWND_LOG_FROM_SAT); + } + } +} + +void +sctp_cwnd_update_after_output(struct sctp_tcb *stcb, + struct sctp_nets *net, int burst_limit) +{ + int old_cwnd; + + if (net->ssthresh < net->cwnd) + net->ssthresh = net->cwnd; + old_cwnd = net->cwnd; + net->cwnd = (net->flight_size + (burst_limit * net->mtu)); + + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST); + } +} + +void +sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, + struct sctp_tcb *stcb, struct sctp_nets *net) +{ + int old_cwnd; + + old_cwnd = net->cwnd; + + sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR); + /* + * make a small adjustment to cwnd and force to CA. + */ + if (net->cwnd > net->mtu) + /* drop down one MTU after sending */ + net->cwnd -= net->mtu; + if (net->cwnd < net->ssthresh) + /* still in SS move to CA */ + net->ssthresh = net->cwnd - 1; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); + } +} + +/* + * H-TCP congestion control. The algorithm is detailed in: + * R.N.Shorten, D.J.Leith: + * "H-TCP: TCP for high-speed and long-distance networks" + * Proc. PFLDnet, Argonne, 2004. + * http://www.hamilton.ie/net/htcp3.pdf + */ + + +static int use_rtt_scaling = 1; +static int use_bandwidth_switch = 1; + +static inline int +between(uint32_t seq1, uint32_t seq2, uint32_t seq3) +{ + return seq3 - seq2 >= seq1 - seq2; +} + +static inline uint32_t +htcp_cong_time(struct htcp *ca) +{ + return ticks - ca->last_cong; +} + +static inline uint32_t +htcp_ccount(struct htcp *ca) +{ + return htcp_cong_time(ca) / ca->minRTT; +} + +static inline void +htcp_reset(struct htcp *ca) +{ + ca->undo_last_cong = ca->last_cong; + ca->undo_maxRTT = ca->maxRTT; + ca->undo_old_maxB = ca->old_maxB; + ca->last_cong = ticks; +} + +#ifdef SCTP_NOT_USED + +static uint32_t +htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + net->htcp_ca.last_cong = net->htcp_ca.undo_last_cong; + net->htcp_ca.maxRTT = net->htcp_ca.undo_maxRTT; + net->htcp_ca.old_maxB = net->htcp_ca.undo_old_maxB; + return max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->htcp_ca.beta) * net->mtu); +} + +#endif + +static inline void +measure_rtt(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + uint32_t srtt = net->lastsa >> 3; + + /* keep track of minimum RTT seen so far, minRTT is zero at first */ + if (net->htcp_ca.minRTT > srtt || !net->htcp_ca.minRTT) + net->htcp_ca.minRTT = srtt; + + /* max RTT */ + if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->htcp_ca) > 3) { + if (net->htcp_ca.maxRTT < net->htcp_ca.minRTT) + net->htcp_ca.maxRTT = net->htcp_ca.minRTT; + if (net->htcp_ca.maxRTT < srtt && srtt <= net->htcp_ca.maxRTT + MSEC_TO_TICKS(20)) + net->htcp_ca.maxRTT = srtt; + } +} + +static void +measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + uint32_t now = ticks; + + if (net->fast_retran_ip == 0) + net->htcp_ca.bytes_acked = net->net_ack; + + if (!use_bandwidth_switch) + return; + + /* achieved throughput calculations */ + /* JRS - not 100% sure of this statement */ + if (net->fast_retran_ip == 1) { + net->htcp_ca.bytecount = 0; + net->htcp_ca.lasttime = now; + return; + } + net->htcp_ca.bytecount += net->net_ack; + + if (net->htcp_ca.bytecount >= net->cwnd - ((net->htcp_ca.alpha >> 7 ? : 1) * net->mtu) + && now - net->htcp_ca.lasttime >= net->htcp_ca.minRTT + && net->htcp_ca.minRTT > 0) { + uint32_t cur_Bi = net->htcp_ca.bytecount / net->mtu * hz / (now - net->htcp_ca.lasttime); + + if (htcp_ccount(&net->htcp_ca) <= 3) { + /* just after backoff */ + net->htcp_ca.minB = net->htcp_ca.maxB = net->htcp_ca.Bi = cur_Bi; + } else { + net->htcp_ca.Bi = (3 * net->htcp_ca.Bi + cur_Bi) / 4; + if (net->htcp_ca.Bi > net->htcp_ca.maxB) + net->htcp_ca.maxB = net->htcp_ca.Bi; + if (net->htcp_ca.minB > net->htcp_ca.maxB) + net->htcp_ca.minB = net->htcp_ca.maxB; + } + net->htcp_ca.bytecount = 0; + net->htcp_ca.lasttime = now; + } +} + +static inline void +htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT) +{ + if (use_bandwidth_switch) { + uint32_t maxB = ca->maxB; + uint32_t old_maxB = ca->old_maxB; + + ca->old_maxB = ca->maxB; + + if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) { + ca->beta = BETA_MIN; + ca->modeswitch = 0; + return; + } + } + if (ca->modeswitch && minRTT > (uint32_t) MSEC_TO_TICKS(10) && maxRTT) { + ca->beta = (minRTT << 7) / maxRTT; + if (ca->beta < BETA_MIN) + ca->beta = BETA_MIN; + else if (ca->beta > BETA_MAX) + ca->beta = BETA_MAX; + } else { + ca->beta = BETA_MIN; + ca->modeswitch = 1; + } +} + +static inline void +htcp_alpha_update(struct htcp *ca) +{ + uint32_t minRTT = ca->minRTT; + uint32_t factor = 1; + uint32_t diff = htcp_cong_time(ca); + + if (diff > (uint32_t) hz) { + diff -= hz; + factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz; + } + if (use_rtt_scaling && minRTT) { + uint32_t scale = (hz << 3) / (10 * minRTT); + + scale = min(max(scale, 1U << 2), 10U << 3); /* clamping ratio to + * interval [0.5,10]<<3 */ + factor = (factor << 3) / scale; + if (!factor) + factor = 1; + } + ca->alpha = 2 * factor * ((1 << 7) - ca->beta); + if (!ca->alpha) + ca->alpha = ALPHA_BASE; +} + +/* After we have the rtt data to calculate beta, we'd still prefer to wait one + * rtt before we adjust our beta to ensure we are working from a consistent + * data. + * + * This function should be called when we hit a congestion event since only at + * that point do we really have a real sense of maxRTT (the queues en route + * were getting just too full now). + */ +static void +htcp_param_update(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + uint32_t minRTT = net->htcp_ca.minRTT; + uint32_t maxRTT = net->htcp_ca.maxRTT; + + htcp_beta_update(&net->htcp_ca, minRTT, maxRTT); + htcp_alpha_update(&net->htcp_ca); + + /* + * add slowly fading memory for maxRTT to accommodate routing + * changes etc + */ + if (minRTT > 0 && maxRTT > minRTT) + net->htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100; +} + +static uint32_t +htcp_recalc_ssthresh(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + htcp_param_update(stcb, net); + return max(((net->cwnd / net->mtu * net->htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu); +} + +static void +htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + /*- + * How to handle these functions? + * if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question. + * return; + */ + if (net->cwnd <= net->ssthresh) { + /* We are in slow start */ + if (net->flight_size + net->net_ack >= net->cwnd) { + if (net->net_ack > (net->mtu * sctp_L2_abc_variable)) { + net->cwnd += (net->mtu * sctp_L2_abc_variable); + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, net->mtu, + SCTP_CWND_LOG_FROM_SS); + } + } else { + net->cwnd += net->net_ack; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, net->net_ack, + SCTP_CWND_LOG_FROM_SS); + } + } + } else { + unsigned int dif; + + dif = net->cwnd - (net->flight_size + + net->net_ack); + if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { + sctp_log_cwnd(stcb, net, net->net_ack, + SCTP_CWND_LOG_NOADV_SS); + } + } + } else { + measure_rtt(stcb, net); + + /* + * In dangerous area, increase slowly. In theory this is + * net->cwnd += alpha / net->cwnd + */ + /* What is snd_cwnd_cnt?? */ + if (((net->partial_bytes_acked / net->mtu * net->htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) { + /*- + * Does SCTP have a cwnd clamp? + * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS). + */ + net->cwnd += net->mtu; + net->partial_bytes_acked = 0; + htcp_alpha_update(&net->htcp_ca); + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, net->mtu, + SCTP_CWND_LOG_FROM_CA); + } + } else { + net->partial_bytes_acked += net->net_ack; + if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { + sctp_log_cwnd(stcb, net, net->net_ack, + SCTP_CWND_LOG_NOADV_CA); + } + } + + net->htcp_ca.bytes_acked = net->mtu; + } +} + +#ifdef SCTP_NOT_USED +/* Lower bound on congestion window. */ +static uint32_t +htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + return net->ssthresh; +} + +#endif + +static void +htcp_init(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + memset(&net->htcp_ca, 0, sizeof(struct htcp)); + net->htcp_ca.alpha = ALPHA_BASE; + net->htcp_ca.beta = BETA_MIN; + net->htcp_ca.bytes_acked = net->mtu; + net->htcp_ca.last_cong = ticks; +} + +void +sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + /* + * We take the max of the burst limit times a MTU or the + * INITIAL_CWND. We then limit this to 4 MTU's of sending. + */ + net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); + /* we always get at LEAST 2 MTU's */ + if (net->cwnd < (2 * net->mtu)) { + net->cwnd = 2 * net->mtu; + } + net->ssthresh = stcb->asoc.peers_rwnd; + htcp_init(stcb, net); + + if (sctp_logging_level & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { + sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); + } +} + +void +sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, + struct sctp_association *asoc, + int accum_moved, int reneged_all, int will_exit) +{ + struct sctp_nets *net; + + /******************************/ + /* update cwnd and Early FR */ + /******************************/ + TAILQ_FOREACH(net, &asoc->nets, sctp_next) { + +#ifdef JANA_CMT_FAST_RECOVERY + /* + * CMT fast recovery code. Need to debug. + */ + if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { + if (compare_with_wrap(asoc->last_acked_seq, + net->fast_recovery_tsn, MAX_TSN) || + (asoc->last_acked_seq == net->fast_recovery_tsn) || + compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || + (net->pseudo_cumack == net->fast_recovery_tsn)) { + net->will_exit_fast_recovery = 1; + } + } +#endif + if (sctp_early_fr) { + /* + * So, first of all do we need to have a Early FR + * timer running? + */ + if (((TAILQ_FIRST(&asoc->sent_queue)) && + (net->ref_count > 1) && + (net->flight_size < net->cwnd)) || + (reneged_all)) { + /* + * yes, so in this case stop it if its + * running, and then restart it. Reneging + * all is a special case where we want to + * run the Early FR timer and then force the + * last few unacked to be sent, causing us + * to illicit a sack with gaps to force out + * the others. + */ + if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { + SCTP_STAT_INCR(sctps_earlyfrstpidsck2); + sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, + SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); + } + SCTP_STAT_INCR(sctps_earlyfrstrid); + sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); + } else { + /* No, stop it if its running */ + if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { + SCTP_STAT_INCR(sctps_earlyfrstpidsck3); + sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, + SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); + } + } + } + /* if nothing was acked on this destination skip it */ + if (net->net_ack == 0) { + if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { + sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); + } + continue; + } + if (net->net_ack2 > 0) { + /* + * Karn's rule applies to clearing error count, this + * is optional. + */ + net->error_count = 0; + if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == + SCTP_ADDR_NOT_REACHABLE) { + /* addr came good */ + net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; + net->dest_state |= SCTP_ADDR_REACHABLE; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, + SCTP_RECEIVED_SACK, (void *)net); + /* now was it the primary? if so restore */ + if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { + (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); + } + } + /* + * JRS 5/14/07 - If CMT PF is on and the destination + * is in PF state, set the destination to active + * state and set the cwnd to one or two MTU's based + * on whether PF1 or PF2 is being used. + * + * Should we stop any running T3 timer here? + */ + if (sctp_cmt_pf && ((net->dest_state & SCTP_ADDR_PF) == + SCTP_ADDR_PF)) { + net->dest_state &= ~SCTP_ADDR_PF; + net->cwnd = net->mtu * sctp_cmt_pf; + SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", + net, net->cwnd); + /* + * Since the cwnd value is explicitly set, + * skip the code that updates the cwnd + * value. + */ + goto skip_cwnd_update; + } + } +#ifdef JANA_CMT_FAST_RECOVERY + /* + * CMT fast recovery code + */ + /* + * if (sctp_cmt_on_off == 1 && + * net->fast_retran_loss_recovery && + * net->will_exit_fast_recovery == 0) { @@@ Do something } + * else if (sctp_cmt_on_off == 0 && + * asoc->fast_retran_loss_recovery && will_exit == 0) { + */ +#endif + + if (asoc->fast_retran_loss_recovery && will_exit == 0 && sctp_cmt_on_off == 0) { + /* + * If we are in loss recovery we skip any cwnd + * update + */ + goto skip_cwnd_update; + } + /* + * CMT: CUC algorithm. Update cwnd if pseudo-cumack has + * moved. + */ + if (accum_moved || (sctp_cmt_on_off && net->new_pseudo_cumack)) { + htcp_cong_avoid(stcb, net); + measure_achieved_throughput(stcb, net); + } else { + if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { + sctp_log_cwnd(stcb, net, net->mtu, + SCTP_CWND_LOG_NO_CUMACK); + } + } +skip_cwnd_update: + /* + * NOW, according to Karn's rule do we need to restore the + * RTO timer back? Check our net_ack2. If not set then we + * have a ambiguity.. i.e. all data ack'd was sent to more + * than one place. + */ + if (net->net_ack2) { + /* restore any doubled timers */ + net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; + if (net->RTO < stcb->asoc.minrto) { + net->RTO = stcb->asoc.minrto; + } + if (net->RTO > stcb->asoc.maxrto) { + net->RTO = stcb->asoc.maxrto; + } + } + } +} + +void +sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, + struct sctp_association *asoc) +{ + struct sctp_nets *net; + + /* + * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) && + * (net->fast_retran_loss_recovery == 0))) + */ + TAILQ_FOREACH(net, &asoc->nets, sctp_next) { + if ((asoc->fast_retran_loss_recovery == 0) || (sctp_cmt_on_off == 1)) { + /* out of a RFC2582 Fast recovery window? */ + if (net->net_ack > 0) { + /* + * per section 7.2.3, are there any + * destinations that had a fast retransmit + * to them. If so what we need to do is + * adjust ssthresh and cwnd. + */ + struct sctp_tmit_chunk *lchk; + int old_cwnd = net->cwnd; + + /* JRS - reset as if state were changed */ + htcp_reset(&net->htcp_ca); + net->ssthresh = htcp_recalc_ssthresh(stcb, net); + net->cwnd = net->ssthresh; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), + SCTP_CWND_LOG_FROM_FR); + } + lchk = TAILQ_FIRST(&asoc->send_queue); + + net->partial_bytes_acked = 0; + /* Turn on fast recovery window */ + asoc->fast_retran_loss_recovery = 1; + if (lchk == NULL) { + /* Mark end of the window */ + asoc->fast_recovery_tsn = asoc->sending_seq - 1; + } else { + asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; + } + + /* + * CMT fast recovery -- per destination + * recovery variable. + */ + net->fast_retran_loss_recovery = 1; + + if (lchk == NULL) { + /* Mark end of the window */ + net->fast_recovery_tsn = asoc->sending_seq - 1; + } else { + net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; + } + + /* + * Disable Nonce Sum Checking and store the + * resync tsn + */ + asoc->nonce_sum_check = 0; + asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; + + sctp_timer_stop(SCTP_TIMER_TYPE_SEND, + stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); + sctp_timer_start(SCTP_TIMER_TYPE_SEND, + stcb->sctp_ep, stcb, net); + } + } else if (net->net_ack > 0) { + /* + * Mark a peg that we WOULD have done a cwnd + * reduction but RFC2582 prevented this action. + */ + SCTP_STAT_INCR(sctps_fastretransinrtt); + } + } +} + +void +sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb, + struct sctp_nets *net) +{ + int old_cwnd = net->cwnd; + + /* JRS - reset as if the state were being changed to timeout */ + htcp_reset(&net->htcp_ca); + net->ssthresh = htcp_recalc_ssthresh(stcb, net); + net->cwnd = net->mtu; + /* floor of 1 mtu */ + if (net->cwnd < net->mtu) + net->cwnd = net->mtu; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); + } + net->partial_bytes_acked = 0; +} + +void +sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, + struct sctp_tcb *stcb, struct sctp_nets *net) +{ + int old_cwnd; + + old_cwnd = net->cwnd; + + sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR); + net->htcp_ca.last_cong = ticks; + /* + * make a small adjustment to cwnd and force to CA. + */ + if (net->cwnd > net->mtu) + /* drop down one MTU after sending */ + net->cwnd -= net->mtu; + if (net->cwnd < net->ssthresh) + /* still in SS move to CA */ + net->ssthresh = net->cwnd - 1; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); + } +} + +void +sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, + struct sctp_nets *net) +{ + int old_cwnd; + + old_cwnd = net->cwnd; + + /* JRS - reset hctp as if state changed */ + htcp_reset(&net->htcp_ca); + SCTP_STAT_INCR(sctps_ecnereducedcwnd); + net->ssthresh = htcp_recalc_ssthresh(stcb, net); + if (net->ssthresh < net->mtu) { + net->ssthresh = net->mtu; + /* here back off the timer as well, to slow us down */ + net->RTO <<= 1; + } + net->cwnd = net->ssthresh; + if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + } +} diff --git a/sys/netinet/sctp_cc_functions.h b/sys/netinet/sctp_cc_functions.h new file mode 100644 index 000000000000..fa1e2fc2a3fc --- /dev/null +++ b/sys/netinet/sctp_cc_functions.h @@ -0,0 +1,107 @@ +/*- + * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * a) Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * b) Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * c) Neither the name of Cisco Systems, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#ifndef __sctp_cc_functions_h__ +#define __sctp_cc_functions_h__ + +#if defined(_KERNEL) + +void +sctp_set_initial_cc_param(struct sctp_tcb *stcb, + struct sctp_nets *net); + +void +sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, + struct sctp_association *asoc); + +void +sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, + struct sctp_association *asoc, + int accum_moved, int reneged_all, int will_exit); + +void +sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, + struct sctp_nets *net); + +void +sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb, + struct sctp_association *asoc); + +void +sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, + struct sctp_association *asoc, + int accum_moved, int reneged_all, int will_exit); + +void +sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, + struct sctp_nets *net); + +void +sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, + struct sctp_nets *net, struct sctp_pktdrop_chunk *cp, + uint32_t * bottle_bw, uint32_t * on_queue); + +void +sctp_cwnd_update_after_output(struct sctp_tcb *stcb, + struct sctp_nets *net, int burst_limit); + +void +sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, + struct sctp_tcb *stcb, struct sctp_nets *net); + +void +sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, + struct sctp_nets *net); + +void +sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, + struct sctp_association *asoc); + +void +sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, + struct sctp_association *asoc, + int accum_moved, int reneged_all, int will_exit); + +void +sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb, + struct sctp_nets *net); + +void +sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, + struct sctp_nets *net); + +void +sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, + struct sctp_tcb *stcb, struct sctp_nets *net); + +#endif +#endif diff --git a/sys/netinet/sctp_constants.h b/sys/netinet/sctp_constants.h index bcf1aae90add..477244bfda07 100644 --- a/sys/netinet/sctp_constants.h +++ b/sys/netinet/sctp_constants.h @@ -83,6 +83,11 @@ __FBSDID("$FreeBSD$"); */ #define SCTP_DEFAULT_VRF_SIZE 4 +/* JRS - Values defined for the HTCP algorithm */ +#define ALPHA_BASE (1<<7) /* 1.0 with shift << 7 */ +#define BETA_MIN (1<<6) /* 0.5 with shift << 7 */ +#define BETA_MAX 102 /* 0.8 with shift << 7 */ + /* Places that CWND log can happen from */ #define SCTP_CWND_LOG_FROM_FR 1 #define SCTP_CWND_LOG_FROM_RTX 2 @@ -483,6 +488,8 @@ __FBSDID("$FreeBSD$"); #define SCTP_ADDR_DOUBLE_SWITCH 0x100 #define SCTP_ADDR_UNCONFIRMED 0x200 #define SCTP_ADDR_REQ_PRIMARY 0x400 +/* JRS 5/13/07 - Added potentially failed state for CMT PF */ +#define SCTP_ADDR_PF 0x800 #define SCTP_REACHABLE_MASK 0x203 /* bound address types (e.g. valid address types to allow) */ diff --git a/sys/netinet/sctp_indata.c b/sys/netinet/sctp_indata.c index 06fcddceb04f..0d8bbac00e31 100644 --- a/sys/netinet/sctp_indata.c +++ b/sys/netinet/sctp_indata.c @@ -3456,7 +3456,21 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, tp1->no_fr_allowed = 1; alt = tp1->whoTo; /* sa_ignore NO_NULL_CHK */ - alt = sctp_find_alternate_net(stcb, alt, 1); + if (sctp_cmt_pf) { + /* + * JRS 5/18/07 - If CMT PF is on, + * use the PF version of + * find_alt_net() + */ + alt = sctp_find_alternate_net(stcb, alt, 2); + } else { + /* + * JRS 5/18/07 - If only CMT is on, + * use the CMT version of + * find_alt_net() + */ + alt = sctp_find_alternate_net(stcb, alt, 1); + } if (alt == NULL) { alt = tp1->whoTo; } @@ -3675,387 +3689,6 @@ sctp_try_advance_peer_ack_point(struct sctp_tcb *stcb, return (a_adv); } -#ifdef SCTP_HIGH_SPEED -struct sctp_hs_raise_drop { - int32_t cwnd; - int32_t increase; - int32_t drop_percent; -}; - -#define SCTP_HS_TABLE_SIZE 73 - -struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = { - {38, 1, 50}, /* 0 */ - {118, 2, 44}, /* 1 */ - {221, 3, 41}, /* 2 */ - {347, 4, 38}, /* 3 */ - {495, 5, 37}, /* 4 */ - {663, 6, 35}, /* 5 */ - {851, 7, 34}, /* 6 */ - {1058, 8, 33}, /* 7 */ - {1284, 9, 32}, /* 8 */ - {1529, 10, 31}, /* 9 */ - {1793, 11, 30}, /* 10 */ - {2076, 12, 29}, /* 11 */ - {2378, 13, 28}, /* 12 */ - {2699, 14, 28}, /* 13 */ - {3039, 15, 27}, /* 14 */ - {3399, 16, 27}, /* 15 */ - {3778, 17, 26}, /* 16 */ - {4177, 18, 26}, /* 17 */ - {4596, 19, 25}, /* 18 */ - {5036, 20, 25}, /* 19 */ - {5497, 21, 24}, /* 20 */ - {5979, 22, 24}, /* 21 */ - {6483, 23, 23}, /* 22 */ - {7009, 24, 23}, /* 23 */ - {7558, 25, 22}, /* 24 */ - {8130, 26, 22}, /* 25 */ - {8726, 27, 22}, /* 26 */ - {9346, 28, 21}, /* 27 */ - {9991, 29, 21}, /* 28 */ - {10661, 30, 21}, /* 29 */ - {11358, 31, 20}, /* 30 */ - {12082, 32, 20}, /* 31 */ - {12834, 33, 20}, /* 32 */ - {13614, 34, 19}, /* 33 */ - {14424, 35, 19}, /* 34 */ - {15265, 36, 19}, /* 35 */ - {16137, 37, 19}, /* 36 */ - {17042, 38, 18}, /* 37 */ - {17981, 39, 18}, /* 38 */ - {18955, 40, 18}, /* 39 */ - {19965, 41, 17}, /* 40 */ - {21013, 42, 17}, /* 41 */ - {22101, 43, 17}, /* 42 */ - {23230, 44, 17}, /* 43 */ - {24402, 45, 16}, /* 44 */ - {25618, 46, 16}, /* 45 */ - {26881, 47, 16}, /* 46 */ - {28193, 48, 16}, /* 47 */ - {29557, 49, 15}, /* 48 */ - {30975, 50, 15}, /* 49 */ - {32450, 51, 15}, /* 50 */ - {33986, 52, 15}, /* 51 */ - {35586, 53, 14}, /* 52 */ - {37253, 54, 14}, /* 53 */ - {38992, 55, 14}, /* 54 */ - {40808, 56, 14}, /* 55 */ - {42707, 57, 13}, /* 56 */ - {44694, 58, 13}, /* 57 */ - {46776, 59, 13}, /* 58 */ - {48961, 60, 13}, /* 59 */ - {51258, 61, 13}, /* 60 */ - {53677, 62, 12}, /* 61 */ - {56230, 63, 12}, /* 62 */ - {58932, 64, 12}, /* 63 */ - {61799, 65, 12}, /* 64 */ - {64851, 66, 11}, /* 65 */ - {68113, 67, 11}, /* 66 */ - {71617, 68, 11}, /* 67 */ - {75401, 69, 10}, /* 68 */ - {79517, 70, 10}, /* 69 */ - {84035, 71, 10}, /* 70 */ - {89053, 72, 10}, /* 71 */ - {94717, 73, 9} /* 72 */ -}; - -static void -sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net) -{ - int cur_val, i, indx, incr; - - cur_val = net->cwnd >> 10; - indx = SCTP_HS_TABLE_SIZE - 1; - - if (cur_val < sctp_cwnd_adjust[0].cwnd) { - /* normal mode */ - if (net->net_ack > net->mtu) { - net->cwnd += net->mtu; - if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS); - } - } else { - net->cwnd += net->net_ack; - if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS); - } - } - } else { - for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) { - if (cur_val < sctp_cwnd_adjust[i].cwnd) { - indx = i; - break; - } - } - net->last_hs_used = indx; - incr = ((sctp_cwnd_adjust[indx].increase) << 10); - net->cwnd += incr; - if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS); - } - } -} - -static void -sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net) -{ - int cur_val, i, indx; - int old_cwnd = net->cwnd; - - cur_val = net->cwnd >> 10; - indx = net->last_hs_used; - if (cur_val < sctp_cwnd_adjust[0].cwnd) { - /* normal mode */ - net->ssthresh = net->cwnd / 2; - if (net->ssthresh < (net->mtu * 2)) { - net->ssthresh = 2 * net->mtu; - } - net->cwnd = net->ssthresh; - } else { - /* drop by the proper amount */ - net->ssthresh = net->cwnd - (int)((net->cwnd / 100) * - sctp_cwnd_adjust[net->last_hs_used].drop_percent); - net->cwnd = net->ssthresh; - /* now where are we */ - indx = net->last_hs_used; - cur_val = net->cwnd >> 10; - /* reset where we are in the table */ - if (cur_val < sctp_cwnd_adjust[0].cwnd) { - /* feel out of hs */ - net->last_hs_used = 0; - } else { - for (i = indx; i >= 1; i--) { - if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) { - break; - } - } - net->last_hs_used = indx; - } - } - if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR); - } -} - -#endif - - -static void -sctp_cwnd_update(struct sctp_tcb *stcb, - struct sctp_association *asoc, - int accum_moved, int reneged_all, int will_exit) -{ - struct sctp_nets *net; - - /******************************/ - /* update cwnd and Early FR */ - /******************************/ - TAILQ_FOREACH(net, &asoc->nets, sctp_next) { - -#ifdef JANA_CMT_FAST_RECOVERY - /* - * CMT fast recovery code. Need to debug. - */ - if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { - if (compare_with_wrap(asoc->last_acked_seq, - net->fast_recovery_tsn, MAX_TSN) || - (asoc->last_acked_seq == net->fast_recovery_tsn) || - compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || - (net->pseudo_cumack == net->fast_recovery_tsn)) { - net->will_exit_fast_recovery = 1; - } - } -#endif - if (sctp_early_fr) { - /* - * So, first of all do we need to have a Early FR - * timer running? - */ - if (((TAILQ_FIRST(&asoc->sent_queue)) && - (net->ref_count > 1) && - (net->flight_size < net->cwnd)) || - (reneged_all)) { - /* - * yes, so in this case stop it if its - * running, and then restart it. Reneging - * all is a special case where we want to - * run the Early FR timer and then force the - * last few unacked to be sent, causing us - * to illicit a sack with gaps to force out - * the others. - */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck2); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); - } - SCTP_STAT_INCR(sctps_earlyfrstrid); - sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); - } else { - /* No, stop it if its running */ - if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { - SCTP_STAT_INCR(sctps_earlyfrstpidsck3); - sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, - SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); - } - } - } - /* if nothing was acked on this destination skip it */ - if (net->net_ack == 0) { - if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { - sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); - } - continue; - } - if (net->net_ack2 > 0) { - /* - * Karn's rule applies to clearing error count, this - * is optional. - */ - net->error_count = 0; - if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == - SCTP_ADDR_NOT_REACHABLE) { - /* addr came good */ - net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; - net->dest_state |= SCTP_ADDR_REACHABLE; - sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, - SCTP_RECEIVED_SACK, (void *)net); - /* now was it the primary? if so restore */ - if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { - (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); - } - } - } -#ifdef JANA_CMT_FAST_RECOVERY - /* - * CMT fast recovery code - */ - /* - * if (sctp_cmt_on_off == 1 && - * net->fast_retran_loss_recovery && - * net->will_exit_fast_recovery == 0) { // @@@ Do something - * } else if (sctp_cmt_on_off == 0 && - * asoc->fast_retran_loss_recovery && will_exit == 0) { - */ -#endif - - if (asoc->fast_retran_loss_recovery && will_exit == 0 && sctp_cmt_on_off == 0) { - /* - * If we are in loss recovery we skip any cwnd - * update - */ - goto skip_cwnd_update; - } - /* - * CMT: CUC algorithm. Update cwnd if pseudo-cumack has - * moved. - */ - if (accum_moved || (sctp_cmt_on_off && net->new_pseudo_cumack)) { - /* If the cumulative ack moved we can proceed */ - if (net->cwnd <= net->ssthresh) { - /* We are in slow start */ - if (net->flight_size + net->net_ack >= - net->cwnd) { -#ifdef SCTP_HIGH_SPEED - sctp_hs_cwnd_increase(stcb, net); -#else - if (net->net_ack > (net->mtu * sctp_L2_abc_variable)) { - net->cwnd += (net->mtu * sctp_L2_abc_variable); - if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, net->mtu, - SCTP_CWND_LOG_FROM_SS); - } - } else { - net->cwnd += net->net_ack; - sctp_log_cwnd(stcb, net, net->net_ack, - SCTP_CWND_LOG_FROM_SS); - } -#endif - } else { - unsigned int dif; - - dif = net->cwnd - (net->flight_size + - net->net_ack); - if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { - sctp_log_cwnd(stcb, net, net->net_ack, - SCTP_CWND_LOG_NOADV_SS); - } - } - } else { - /* We are in congestion avoidance */ - if (net->flight_size + net->net_ack >= - net->cwnd) { - /* - * add to pba only if we had a - * cwnd's worth (or so) in flight OR - * the burst limit was applied. - */ - net->partial_bytes_acked += - net->net_ack; - - /* - * Do we need to increase (if pba is - * > cwnd)? - */ - if (net->partial_bytes_acked >= - net->cwnd) { - if (net->cwnd < - net->partial_bytes_acked) { - net->partial_bytes_acked -= - net->cwnd; - } else { - net->partial_bytes_acked = - 0; - } - net->cwnd += net->mtu; - if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, net->mtu, - SCTP_CWND_LOG_FROM_CA); - } - } else { - if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { - sctp_log_cwnd(stcb, net, net->net_ack, - SCTP_CWND_LOG_NOADV_CA); - } - } - } else { - unsigned int dif; - - if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { - sctp_log_cwnd(stcb, net, net->net_ack, - SCTP_CWND_LOG_NOADV_CA); - } - dif = net->cwnd - (net->flight_size + - net->net_ack); - } - } - } else { - if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { - sctp_log_cwnd(stcb, net, net->mtu, - SCTP_CWND_LOG_NO_CUMACK); - } - } -skip_cwnd_update: - /* - * NOW, according to Karn's rule do we need to restore the - * RTO timer back? Check our net_ack2. If not set then we - * have a ambiguity.. i.e. all data ack'd was sent to more - * than one place. - */ - if (net->net_ack2) { - /* restore any doubled timers */ - net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; - if (net->RTO < stcb->asoc.minrto) { - net->RTO = stcb->asoc.minrto; - } - if (net->RTO > stcb->asoc.maxrto) { - net->RTO = stcb->asoc.maxrto; - } - } - } -} - static void sctp_fs_audit(struct sctp_association *asoc) { @@ -4130,7 +3763,6 @@ sctp_window_probe_recovery(struct sctp_tcb *stcb, } } - void sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, uint32_t rwnd, int nonce_sum_flag, int *abort_now) @@ -4342,9 +3974,9 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, } } - + /* JRS - Use the congestion control given in the CC module */ if (asoc->last_acked_seq != cumack) - sctp_cwnd_update(stcb, asoc, 1, 0, 0); + asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, 1, 0, 0); asoc->last_acked_seq = cumack; @@ -4578,8 +4210,6 @@ again: } } - - void sctp_handle_sack(struct mbuf *m, int offset, struct sctp_sack_chunk *ch, struct sctp_tcb *stcb, @@ -5116,8 +4746,8 @@ done_with_it: else asoc->saw_sack_with_frags = 0; - - sctp_cwnd_update(stcb, asoc, accum_moved, reneged_all, will_exit_fast_recovery); + /* JRS - Use the congestion control given in the CC module */ + asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, accum_moved, reneged_all, will_exit_fast_recovery); if (TAILQ_EMPTY(&asoc->sent_queue)) { /* nothing left in-flight */ @@ -5286,86 +4916,8 @@ done_with_it: } } } - /* - * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) && - * (net->fast_retran_loss_recovery == 0))) - */ - TAILQ_FOREACH(net, &asoc->nets, sctp_next) { - if ((asoc->fast_retran_loss_recovery == 0) || (sctp_cmt_on_off == 1)) { - /* out of a RFC2582 Fast recovery window? */ - if (net->net_ack > 0) { - /* - * per section 7.2.3, are there any - * destinations that had a fast retransmit - * to them. If so what we need to do is - * adjust ssthresh and cwnd. - */ - struct sctp_tmit_chunk *lchk; - -#ifdef SCTP_HIGH_SPEED - sctp_hs_cwnd_decrease(stcb, net); -#else - int old_cwnd = net->cwnd; - - net->ssthresh = net->cwnd / 2; - if (net->ssthresh < (net->mtu * 2)) { - net->ssthresh = 2 * net->mtu; - } - net->cwnd = net->ssthresh; - if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), - SCTP_CWND_LOG_FROM_FR); - } -#endif - - lchk = TAILQ_FIRST(&asoc->send_queue); - - net->partial_bytes_acked = 0; - /* Turn on fast recovery window */ - asoc->fast_retran_loss_recovery = 1; - if (lchk == NULL) { - /* Mark end of the window */ - asoc->fast_recovery_tsn = asoc->sending_seq - 1; - } else { - asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; - } - - /* - * CMT fast recovery -- per destination - * recovery variable. - */ - net->fast_retran_loss_recovery = 1; - - if (lchk == NULL) { - /* Mark end of the window */ - net->fast_recovery_tsn = asoc->sending_seq - 1; - } else { - net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; - } - - - - /* - * Disable Nonce Sum Checking and store the - * resync tsn - */ - asoc->nonce_sum_check = 0; - asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; - - sctp_timer_stop(SCTP_TIMER_TYPE_SEND, - stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); - sctp_timer_start(SCTP_TIMER_TYPE_SEND, - stcb->sctp_ep, stcb, net); - } - } else if (net->net_ack > 0) { - /* - * Mark a peg that we WOULD have done a cwnd - * reduction but RFC2582 prevented this action. - */ - SCTP_STAT_INCR(sctps_fastretransinrtt); - } - } - + /* JRS - Use the congestion control given in the CC module */ + asoc->cc_functions.sctp_cwnd_update_after_fr(stcb, asoc); /****************************************************************** * Here we do the stuff with ECN Nonce checking. diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c index 5f292142e01b..eaedc02f47d5 100644 --- a/sys/netinet/sctp_input.c +++ b/sys/netinet/sctp_input.c @@ -127,6 +127,8 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh, op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM); sctp_abort_association(inp, stcb, m, iphlen, sh, op_err, vrf_id); + if (stcb) + *abort_no_unlock = 1; return; } if (init->num_inbound_streams == 0) { @@ -546,6 +548,25 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp, (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, r_net); } } + /* + * JRS 5/14/07 - If CMT PF is on and the destination is in PF state, + * set the destination to active state and set the cwnd to one or + * two MTU's based on whether PF1 or PF2 is being used. If a T3 + * timer is running, for the destination, stop the timer because a + * PF-heartbeat was received. + */ + if (sctp_cmt_pf && (net->dest_state & SCTP_ADDR_PF) == + SCTP_ADDR_PF) { + if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) { + sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, + stcb, net, + SCTP_FROM_SCTP_INPUT + SCTP_LOC_5); + } + net->dest_state &= ~SCTP_ADDR_PF; + net->cwnd = net->mtu * sctp_cmt_pf; + SCTPDBG(SCTP_DEBUG_INPUT1, "Destination %p moved from PF to reachable with cwnd %d.\n", + net, net->cwnd); + } /* Now lets do a RTO with this */ r_net->RTO = sctp_calculate_rto(stcb, &stcb->asoc, r_net, &tv); } @@ -559,7 +580,7 @@ sctp_handle_abort(struct sctp_abort_chunk *cp, return; /* stop any receive timers */ - sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_5); + sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_6); /* notify user of the abort and clean up... */ sctp_abort_notification(stcb, 0); /* free the tcb */ @@ -629,7 +650,7 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp, * stop the shutdown timer, since we WILL move to * SHUTDOWN-ACK-SENT. */ - sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_7); + sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_8); } /* Now is there unsent data on a stream somewhere? */ some_on_streamwheel = sctp_is_there_unsent_data(stcb); @@ -693,7 +714,7 @@ sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp, sctp_report_all_outbound(stcb, 0); } /* stop the timer */ - sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_8); + sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9); /* send SHUTDOWN-COMPLETE */ sctp_send_shutdown_complete(stcb, net); /* notify upper layer protocol */ @@ -708,7 +729,7 @@ sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp, SCTP_STAT_INCR_COUNTER32(sctps_shutdown); /* free the TCB but first save off the ep */ sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, - SCTP_FROM_SCTP_INPUT + SCTP_LOC_9); + SCTP_FROM_SCTP_INPUT + SCTP_LOC_10); } /* @@ -834,7 +855,7 @@ sctp_handle_error(struct sctp_chunkhdr *ch, asoc->max_init_times) { sctp_abort_notification(stcb, 0); /* now free the asoc */ - sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_10); + sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_11); return (-1); } /* blast back to INIT state */ @@ -1156,8 +1177,8 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, } /* we have already processed the INIT so no problem */ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, - net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_11); - sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_12); + net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_12); + sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_13); /* update current state */ if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) SCTP_STAT_INCR_COUNTER32(sctps_activeestab); @@ -1274,7 +1295,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, } if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 8; - sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_13); + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_14); sctp_stop_all_cookie_timers(stcb); /* * since we did not send a HB make sure we don't double @@ -1305,6 +1326,8 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) { if (chk->sent < SCTP_DATAGRAM_RESEND) { chk->sent = SCTP_DATAGRAM_RESEND; + sctp_flight_size_decrease(chk); + sctp_total_flight_decrease(stcb, chk); sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); spec_flag++; } @@ -1386,8 +1409,8 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, /* temp code */ if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 12; - sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_14); - sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_15); + sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_15); + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16); *sac_assoc_id = sctp_get_associd(stcb); /* notify upper layer */ @@ -1624,6 +1647,8 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC); sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen, sh, op_err, vrf_id); + sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, + SCTP_FROM_SCTP_INPUT + SCTP_LOC_16); atomic_add_int(&stcb->asoc.refcnt, -1); return (NULL); } @@ -1676,7 +1701,9 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, /* auth HMAC failed, dump the assoc and packet */ SCTPDBG(SCTP_DEBUG_AUTH1, "COOKIE-ECHO: AUTH failed\n"); + atomic_add_int(&stcb->asoc.refcnt, 1); sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_18); + atomic_add_int(&stcb->asoc.refcnt, -1); return (NULL); } else { /* remaining chunks checked... good to go */ @@ -1771,6 +1798,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, } /* respond with a COOKIE-ACK */ /* calculate the RTT */ + (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered); if ((netp) && (*netp)) { (*netp)->RTO = sctp_calculate_rto(stcb, asoc, *netp, &cookie->time_entered); @@ -2363,20 +2391,11 @@ sctp_handle_ecn_echo(struct sctp_ecne_chunk *cp, net = stcb->asoc.primary_destination; if (compare_with_wrap(tsn, stcb->asoc.last_cwr_tsn, MAX_TSN)) { - int old_cwnd; - - old_cwnd = net->cwnd; - SCTP_STAT_INCR(sctps_ecnereducedcwnd); - net->ssthresh = net->cwnd / 2; - if (net->ssthresh < net->mtu) { - net->ssthresh = net->mtu; - /* here back off the timer as well, to slow us down */ - net->RTO <<= 1; - } - net->cwnd = net->ssthresh; - if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); - } + /* + * JRS - Use the congestion control given in the pluggable + * CC module + */ + stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo(stcb, net); /* * we reduce once every RTT. So we will only lower cwnd at * the next sending seq i.e. the resync_tsn. @@ -2458,10 +2477,10 @@ sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp, } } /* stop the timer */ - sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_21); + sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_22); SCTP_STAT_INCR_COUNTER32(sctps_shutdown); /* free the TCB */ - sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_22); + sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_23); return; } @@ -2570,7 +2589,7 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc, /* restart the timer */ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, - stcb, tp1->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_23); + stcb, tp1->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_24); sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, tp1->whoTo); @@ -2635,7 +2654,7 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc, * this, otherwise we let the timer fire. */ sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep, - stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_24); + stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_25); sctp_send_initiate(stcb->sctp_ep, stcb); } break; @@ -2645,7 +2664,13 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc, break; case SCTP_HEARTBEAT_REQUEST: /* resend a demand HB */ - (void)sctp_send_hb(stcb, 1, net); + if ((stcb->asoc.overall_error_count + 3) < stcb->asoc.max_send_times) { + /* + * Only retransmit if we KNOW we wont destroy the + * tcb + */ + (void)sctp_send_hb(stcb, 1, net); + } break; case SCTP_SHUTDOWN: sctp_send_shutdown(stcb, net); @@ -2806,7 +2831,7 @@ sctp_clean_up_stream_reset(struct sctp_tcb *stcb) } asoc = &stcb->asoc; - sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_25); + sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_26); TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next); @@ -3203,7 +3228,6 @@ strres_nochunk: num_req++; - req_in = (struct sctp_stream_reset_in_request *)ph; sctp_handle_str_reset_request_in(stcb, chk, req_in, trunc); @@ -3398,112 +3422,8 @@ sctp_handle_packet_dropped(struct sctp_pktdrop_chunk *cp, * Note if a T3 timer has went off, we will prohibit any * changes to cwnd until we exit the t3 loss recovery. */ - uint32_t bw_avail; - int rtt, incr; - - int old_cwnd = net->cwnd; - - /* need real RTT for this calc */ - rtt = ((net->lastsa >> 2) + net->lastsv) >> 1; - /* get bottle neck bw */ - bottle_bw = ntohl(cp->bottle_bw); - /* and whats on queue */ - on_queue = ntohl(cp->current_onq); - /* - * adjust the on-queue if our flight is more it could be - * that the router has not yet gotten data "in-flight" to it - */ - if (on_queue < net->flight_size) - on_queue = net->flight_size; - - /* calculate the available space */ - bw_avail = (bottle_bw * rtt) / 1000; - if (bw_avail > bottle_bw) { - /* - * Cap the growth to no more than the bottle neck. - * This can happen as RTT slides up due to queues. - * It also means if you have more than a 1 second - * RTT with a empty queue you will be limited to the - * bottle_bw per second no matter if other points - * have 1/2 the RTT and you could get more out... - */ - bw_avail = bottle_bw; - } - if (on_queue > bw_avail) { - /* - * No room for anything else don't allow anything - * else to be "added to the fire". - */ - int seg_inflight, seg_onqueue, my_portion; - - net->partial_bytes_acked = 0; - - /* how much are we over queue size? */ - incr = on_queue - bw_avail; - if (stcb->asoc.seen_a_sack_this_pkt) { - /* - * undo any cwnd adjustment that the sack - * might have made - */ - net->cwnd = net->prev_cwnd; - } - /* Now how much of that is mine? */ - seg_inflight = net->flight_size / net->mtu; - seg_onqueue = on_queue / net->mtu; - my_portion = (incr * seg_inflight) / seg_onqueue; - - /* Have I made an adjustment already */ - if (net->cwnd > net->flight_size) { - /* - * for this flight I made an adjustment we - * need to decrease the portion by a share - * our previous adjustment. - */ - int diff_adj; - - diff_adj = net->cwnd - net->flight_size; - if (diff_adj > my_portion) - my_portion = 0; - else - my_portion -= diff_adj; - } - /* - * back down to the previous cwnd (assume we have - * had a sack before this packet). minus what ever - * portion of the overage is my fault. - */ - net->cwnd -= my_portion; - - /* we will NOT back down more than 1 MTU */ - if (net->cwnd <= net->mtu) { - net->cwnd = net->mtu; - } - /* force into CA */ - net->ssthresh = net->cwnd - 1; - } else { - /* - * Take 1/4 of the space left or max burst up .. - * whichever is less. - */ - incr = min((bw_avail - on_queue) >> 2, - stcb->asoc.max_burst * net->mtu); - net->cwnd += incr; - } - if (net->cwnd > bw_avail) { - /* We can't exceed the pipe size */ - net->cwnd = bw_avail; - } - if (net->cwnd < net->mtu) { - /* We always have 1 MTU */ - net->cwnd = net->mtu; - } - if (net->cwnd - old_cwnd != 0) { - /* log only changes */ - if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), - SCTP_CWND_LOG_FROM_SAT); - } - } + stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped(stcb, + net, cp, &bottle_bw, &on_queue); } } @@ -3886,7 +3806,7 @@ process_control_chunks: } *offset = length; if (stcb) { - sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_26); + sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27); } return (NULL); } @@ -4297,7 +4217,7 @@ process_control_chunks: *fwd_tsn_seen = 1; if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* We are not interested anymore */ - sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_28); + sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_29); *offset = length; return (NULL); } @@ -4324,7 +4244,7 @@ process_control_chunks: } if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* We are not interested anymore */ - sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_29); + sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_30); *offset = length; return (NULL); } @@ -4525,6 +4445,21 @@ sctp_process_ecn_marked_b(struct sctp_tcb *stcb, struct sctp_nets *net, } } +#ifdef INVARIANTS +static void +sctp_validate_no_locks(struct sctp_inpcb *inp) +{ + struct sctp_tcb *stcb; + + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + if (mtx_owned(&stcb->tcb_mtx)) { + panic("Own lock on stcb at return from input"); + } + } +} + +#endif + /* * common input chunk processing (v4 and v6) */ @@ -4586,19 +4521,19 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, /* "silently" ignore */ SCTP_STAT_INCR(sctps_recvauthmissing); SCTP_TCB_UNLOCK(stcb); - return; + goto out_now; } if (stcb == NULL) { /* out of the blue DATA chunk */ sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL, vrf_id); - return; + goto out_now; } if (stcb->asoc.my_vtag != ntohl(sh->v_tag)) { /* v_tag mismatch! */ SCTP_STAT_INCR(sctps_badvtag); SCTP_TCB_UNLOCK(stcb); - return; + goto out_now; } } @@ -4608,7 +4543,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, * packet while processing control, or we're done with this * packet (done or skip rest of data), so we drop it... */ - return; + goto out_now; } /* * DATA chunk processing @@ -4654,7 +4589,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL, vrf_id); SCTP_TCB_UNLOCK(stcb); - return; + goto out_now; break; case SCTP_STATE_EMPTY: /* should not happen */ case SCTP_STATE_INUSE: /* should not happen */ @@ -4662,7 +4597,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, case SCTP_STATE_SHUTDOWN_ACK_SENT: default: SCTP_TCB_UNLOCK(stcb); - return; + goto out_now; break; case SCTP_STATE_OPEN: case SCTP_STATE_SHUTDOWN_SENT: @@ -4681,7 +4616,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, * The association aborted, NO UNLOCK needed since * the association is destroyed. */ - return; + goto out_now; } data_processed = 1; if (retval == 0) { @@ -4708,7 +4643,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, sctp_sack_check(stcb, 1, was_a_gap, &abort_flag); if (abort_flag) { /* Again, we aborted so NO UNLOCK needed */ - return; + goto out_now; } } /* trigger send of any chunks in queue... */ @@ -4737,6 +4672,10 @@ trigger_send: sctp_auditing(2, inp, stcb, net); #endif SCTP_TCB_UNLOCK(stcb); +out_now: +#ifdef INVARIANTS + sctp_validate_no_locks(inp); +#endif return; } @@ -4932,7 +4871,6 @@ sctp_skip_csum_4: * I very much doubt any of the IPSEC stuff will work but I have no * idea, so I will leave it in place. */ - if (inp && ipsec4_in_reject(m, &inp->ip_inp.inp)) { ipsec4stat.in_polvio++; SCTP_STAT_INCR(sctps_hdrops); diff --git a/sys/netinet/sctp_os.h b/sys/netinet/sctp_os.h index fb7c53606ffe..1e38cdff9993 100644 --- a/sys/netinet/sctp_os.h +++ b/sys/netinet/sctp_os.h @@ -61,7 +61,6 @@ __FBSDID("$FreeBSD$"); - /* All os's must implement this address gatherer. If * no VRF's exist, then vrf 0 is the only one and all * addresses and ifn's live here. diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c index 7234ce7454d0..0488763e9af3 100644 --- a/sys/netinet/sctp_output.c +++ b/sys/netinet/sctp_output.c @@ -3413,6 +3413,35 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, (void *)net); net->dest_state &= ~SCTP_ADDR_REACHABLE; net->dest_state |= SCTP_ADDR_NOT_REACHABLE; + /* + * JRS 5/14/07 - If a + * destination is + * unreachable, the PF bit + * is turned off. This + * allows an unambiguous use + * of the PF bit for + * destinations that are + * reachable but potentially + * failed. If the + * destination is set to the + * unreachable state, also + * set the destination to + * the PF state. + */ + /* + * Add debug message here if + * destination is not in PF + * state. + */ + /* + * Stop any running T3 + * timers here? + */ + if (sctp_cmt_pf) { + net->dest_state &= ~SCTP_ADDR_PF; + SCTPDBG(SCTP_DEBUG_OUTPUT1, "Destination %p moved from PF to unreachable.\n", + net); + } } } if (stcb) { @@ -6109,7 +6138,8 @@ sctp_move_to_outqueue(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t frag_point, int *locked, int *giveup, - int eeor_mode) + int eeor_mode, + int *bail) { /* Move from the stream to the send_queue keeping track of the total */ struct sctp_association *asoc; @@ -6309,6 +6339,7 @@ out_gu: if (chk->data == NULL) { sp->some_taken = some_taken; sctp_free_a_chunk(stcb, chk); + *bail = 1; goto out_gu; } /* Pull off the data */ @@ -6381,6 +6412,7 @@ out_gu: sp->some_taken = some_taken; atomic_add_int(&sp->length, to_move); chk->data = NULL; + *bail = 1; sctp_free_a_chunk(stcb, chk); goto out_gu; } else { @@ -6399,6 +6431,7 @@ out_gu: SCTP_PRINTF("prepend fails HELP?\n"); sctp_free_a_chunk(stcb, chk); #endif + *bail = 1; goto out_gu; } sctp_snd_sb_alloc(stcb, sizeof(struct sctp_data_chunk)); @@ -6554,17 +6587,19 @@ done_it: if (strq == NULL) { strq = asoc->last_out_stream = TAILQ_FIRST(&asoc->out_wheel); } + /* Save off the last stream */ + asoc->last_out_stream = strq; return (strq); } static void sctp_fill_outqueue(struct sctp_tcb *stcb, - struct sctp_nets *net, int frag_point, int eeor_mode) + struct sctp_nets *net, int frag_point, int eeor_mode, int *quit_now) { struct sctp_association *asoc; struct sctp_stream_out *strq, *strqn, *strqt; - int goal_mtu, moved_how_much, total_moved = 0; + int goal_mtu, moved_how_much, total_moved = 0, bail = 0; int locked, giveup; struct sctp_stream_queue_pending *sp; @@ -6627,12 +6662,13 @@ sctp_fill_outqueue(struct sctp_tcb *stcb, } } giveup = 0; + bail = 0; moved_how_much = sctp_move_to_outqueue(stcb, net, strq, goal_mtu, frag_point, &locked, - &giveup, eeor_mode); + &giveup, eeor_mode, &bail); asoc->last_out_stream = strq; if (locked) { asoc->locked_on_sending = strq; - if ((moved_how_much == 0) || (giveup)) + if ((moved_how_much == 0) || (giveup) || bail) /* no more to move for now */ break; } else { @@ -6651,7 +6687,7 @@ sctp_fill_outqueue(struct sctp_tcb *stcb, } sctp_remove_from_wheel(stcb, asoc, strq); } - if (giveup) { + if ((giveup) || bail) { break; } strq = strqt; @@ -6663,6 +6699,9 @@ sctp_fill_outqueue(struct sctp_tcb *stcb, goal_mtu -= (moved_how_much + sizeof(struct sctp_data_chunk)); goal_mtu &= 0xfffffffc; } + if (bail) + *quit_now = 1; + if (total_moved == 0) { if ((sctp_cmt_on_off == 0) && (net == stcb->asoc.primary_destination)) { @@ -6696,7 +6735,16 @@ sctp_move_to_an_alt(struct sctp_tcb *stcb, struct sctp_nets *a_net; SCTP_TCB_LOCK_ASSERT(stcb); - a_net = sctp_find_alternate_net(stcb, net, 0); + /* + * JRS 5/14/07 - If CMT PF is turned on, find an alternate + * destination using the PF algorithm for finding alternate + * destinations. + */ + if (sctp_cmt_pf) { + a_net = sctp_find_alternate_net(stcb, net, 2); + } else { + a_net = sctp_find_alternate_net(stcb, net, 0); + } if ((a_net != net) && ((a_net->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE)) { /* @@ -6751,6 +6799,13 @@ sctp_med_chunk_output(struct sctp_inpcb *inp, uint32_t auth_offset = 0; struct sctp_auth_chunk *auth = NULL; + /* + * JRS 5/14/07 - Add flag for whether a heartbeat is sent to the + * destination. + */ + int pf_hbflag = 0; + int quit_now = 0; + *num_out = 0; cwnd_full_ind = 0; @@ -6849,13 +6904,19 @@ one_more_time: if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FILL_OUTQ_CALLED); } - sctp_fill_outqueue(stcb, net, frag_point, eeor_mode); + sctp_fill_outqueue(stcb, net, frag_point, eeor_mode, &quit_now); + if (quit_now) { + /* memory alloc failure */ + no_data_chunks = 1; + goto skip_the_fill_from_streams; + } } if (start_at != TAILQ_FIRST(&asoc->nets)) { /* got to pick up the beginning stuff. */ old_startat = start_at; start_at = net = TAILQ_FIRST(&asoc->nets); - goto one_more_time; + if (old_startat) + goto one_more_time; } } skip_the_fill_from_streams: @@ -6867,7 +6928,11 @@ skip_the_fill_from_streams: *reason_code = 8; return (0); } - chk = TAILQ_FIRST(&asoc->send_queue); + if (no_data_chunks) { + chk = TAILQ_FIRST(&asoc->control_send_queue); + } else { + chk = TAILQ_FIRST(&asoc->send_queue); + } if (chk) { send_start_at = chk->whoTo; } else { @@ -7034,8 +7099,15 @@ again_one_more_time: (chk->rec.chunk_id.id == SCTP_PACKET_DROPPED) || (chk->rec.chunk_id.id == SCTP_ASCONF_ACK)) { - if (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) + if (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) { hbflag = 1; + /* + * JRS 5/14/07 - Set the + * flag to say a heartbeat + * is being sent. + */ + pf_hbflag = 1; + } /* remove these chunks at the end */ if (chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) { /* turn off the timer */ @@ -7338,6 +7410,15 @@ again_one_more_time: * restart it. */ sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net); + } else if (sctp_cmt_pf && pf_hbflag && ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF) + && (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer))) { + /* + * JRS 5/14/07 - If a HB has been sent to a + * PF destination and no T3 timer is + * currently running, start the T3 timer to + * track the HBs that were sent. + */ + sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net); } /* Now send it, if there is anything to send :> */ SCTP_BUF_PREPEND(outchain, sizeof(struct sctphdr), M_DONTWAIT); @@ -7422,7 +7503,14 @@ again_one_more_time: if (bundle_at) { /* setup for a RTO measurement */ tsns_sent = data_list[0]->rec.data.TSN_seq; - + /* fill time if not already filled */ + if (*now_filled == 0) { + (void)SCTP_GETTIME_TIMEVAL(&asoc->time_last_sent); + *now_filled = 1; + *now = asoc->time_last_sent; + } else { + asoc->time_last_sent = *now; + } data_list[0]->do_rtt = 1; SCTP_STAT_INCR_BY(sctps_senddata, bundle_at); sctp_clean_up_datalist(stcb, asoc, data_list, bundle_at, net); @@ -7456,7 +7544,8 @@ again_one_more_time: if (old_startat == NULL) { old_startat = send_start_at; send_start_at = TAILQ_FIRST(&asoc->nets); - goto again_one_more_time; + if (old_startat) + goto again_one_more_time; } /* * At the end there should be no NON timed chunks hanging on this @@ -8570,6 +8659,15 @@ sctp_chunk_output(struct sctp_inpcb *inp, */ if (net->ref_count > 1) sctp_move_to_an_alt(stcb, asoc, net); + } else if (sctp_cmt_pf && ((net->dest_state & SCTP_ADDR_PF) == + SCTP_ADDR_PF)) { + /* + * JRS 5/14/07 - If CMT PF is on and the current + * destination is in PF state, move all queued data + * to an alternate desination. + */ + if (net->ref_count > 1) + sctp_move_to_an_alt(stcb, asoc, net); } else { /*- * if ((asoc->sat_network) || (net->addr_is_local)) @@ -8578,16 +8676,12 @@ sctp_chunk_output(struct sctp_inpcb *inp, */ if (sctp_use_cwnd_based_maxburst) { if ((net->flight_size + (burst_limit * net->mtu)) < net->cwnd) { - int old_cwnd; - - if (net->ssthresh < net->cwnd) - net->ssthresh = net->cwnd; - old_cwnd = net->cwnd; - net->cwnd = (net->flight_size + (burst_limit * net->mtu)); - - if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST); - } + /* + * JRS - Use the congestion control + * given in the congestion control + * module + */ + asoc->cc_functions.sctp_cwnd_update_after_output(stcb, net, burst_limit); if (sctp_logging_level & SCTP_LOG_MAXBURST_ENABLE) { sctp_log_maxburst(stcb, net, 0, burst_limit, SCTP_MAX_BURST_APPLIED); } @@ -9562,24 +9656,43 @@ sctp_send_hb(struct sctp_tcb *stcb, int user_req, struct sctp_nets *u_net) /* huh compiler bug */ return (0); } - /* ok we have a destination that needs a beat */ - /* lets do the theshold management Qiaobing style */ - if (sctp_threshold_management(stcb->sctp_ep, stcb, net, - stcb->asoc.max_send_times)) { - /*- - * we have lost the association, in a way this is - * quite bad since we really are one less time since - * we really did not send yet. This is the down side - * to the Q's style as defined in the RFC and not my - * alternate style defined in the RFC. - */ - if (chk->data != NULL) { - sctp_m_freem(chk->data); - chk->data = NULL; + /* + * JRS 5/14/07 - In CMT PF, the T3 timer is used to track + * PF-heartbeats. Because of this, threshold management is done by + * the t3 timer handler, and does not need to be done upon the send + * of a PF-heartbeat. If CMT PF is on and the destination to which a + * heartbeat is being sent is in PF state, do NOT do threshold + * management. + */ + if ((sctp_cmt_pf == 0) || ((net->dest_state & SCTP_ADDR_PF) != SCTP_ADDR_PF)) { + /* ok we have a destination that needs a beat */ + /* lets do the theshold management Qiaobing style */ + if (sctp_threshold_management(stcb->sctp_ep, stcb, net, + stcb->asoc.max_send_times)) { + /*- + * we have lost the association, in a way this is + * quite bad since we really are one less time since + * we really did not send yet. This is the down side + * to the Q's style as defined in the RFC and not my + * alternate style defined in the RFC. + */ + if (chk->data != NULL) { + sctp_m_freem(chk->data); + chk->data = NULL; + } + /* + * Here we do NOT use the macro since the + * association is now gone. + */ + if (chk->whoTo) { + sctp_free_remote_addr(chk->whoTo); + chk->whoTo = NULL; + } + SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, chk); + SCTP_DECR_CHK_COUNT(); + return (-1); } - sctp_free_a_chunk(stcb, chk); - return (-1); } net->hb_responded = 0; TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next); @@ -10679,7 +10792,8 @@ sctp_lower_sosend(struct socket *so, } } /* now we must find the assoc */ - if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { SCTP_INP_RLOCK(inp); stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb == NULL) { @@ -10807,9 +10921,14 @@ sctp_lower_sosend(struct socket *so, } else { hold_tcblock = 1; } + if (t_inp != inp) { + error = ENOTCONN; + goto out_unlocked; + } } if (stcb == NULL) { - if (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { error = ENOTCONN; goto out_unlocked; } else if (addr == NULL) { diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c index 260c6287e1f2..fc847f63ad47 100644 --- a/sys/netinet/sctp_pcb.c +++ b/sys/netinet/sctp_pcb.c @@ -1911,6 +1911,13 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id) m->sctp_sws_sender = SCTP_SWS_SENDER_DEF; m->sctp_sws_receiver = SCTP_SWS_RECEIVER_DEF; m->max_burst = sctp_max_burst_default; + if ((sctp_default_cc_module >= SCTP_CC_RFC2581) && + (sctp_default_cc_module <= SCTP_CC_HTCP)) { + m->sctp_default_cc_module = sctp_default_cc_module; + } else { + /* sysctl done with invalid value, set to 2581 */ + m->sctp_default_cc_module = SCTP_CC_RFC2581; + } /* number of streams to pre-open on a association */ m->pre_open_stream_count = sctp_nr_outgoing_streams_default; @@ -2944,19 +2951,6 @@ sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id) } } -void -sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) -{ - net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); - /* we always get at LEAST 2 MTU's */ - if (net->cwnd < (2 * net->mtu)) { - net->cwnd = 2 * net->mtu; - } - net->ssthresh = stcb->asoc.peers_rwnd; -} - - - /* * add's a remote endpoint address, done with the INIT/INIT-ACK as well as * when a ASCONF arrives that adds it. It will also initialize all the cwnd @@ -3210,16 +3204,9 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, #endif stcb->asoc.smallest_mtu = net->mtu; } - /* - * We take the max of the burst limit times a MTU or the - * INITIAL_CWND. We then limit this to 4 MTU's of sending. - */ - sctp_set_initial_cc_param(stcb, net); - + /* JRS - Use the congestion control given in the CC module */ + stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net); - if (sctp_logging_level & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { - sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); - } /* * CMT: CUC algo - set find_pseudo_cumack to TRUE (1) at beginning * of assoc (2005/06/27, iyengar@cis.udel.edu) @@ -3802,19 +3789,14 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre */ sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL); SCTP_TCB_UNLOCK(stcb); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) + /* nothing around */ + so = NULL; if (so) { - SCTP_INP_RLOCK(inp); - if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || - (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) - /* nothing around */ - so = NULL; - if (so) { - /* Wake any reader/writers */ - sctp_sorwakeup(inp, so); - sctp_sowwakeup(inp, so); - } - SCTP_INP_RUNLOCK(inp); - + /* Wake any reader/writers */ + sctp_sorwakeup(inp, so); + sctp_sowwakeup(inp, so); } #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, stcb, 9); @@ -4918,6 +4900,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, * strange, address is in another * assoc? straighten out locks. */ + SCTP_TCB_UNLOCK(stcb_tmp); if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-12); @@ -4992,6 +4975,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, * strange, address is in another * assoc? straighten out locks. */ + SCTP_TCB_UNLOCK(stcb_tmp); if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-21); diff --git a/sys/netinet/sctp_pcb.h b/sys/netinet/sctp_pcb.h index 2921459b7134..31660c687dee 100644 --- a/sys/netinet/sctp_pcb.h +++ b/sys/netinet/sctp_pcb.h @@ -256,6 +256,7 @@ struct sctp_pcb { uint32_t sctp_sws_sender; uint32_t sctp_sws_receiver; + uint32_t sctp_default_cc_module; /* authentication related fields */ struct sctp_keyhead shared_keys; sctp_auth_chklist_t *local_auth_chunks; @@ -509,9 +510,6 @@ void sctp_remove_laddr(struct sctp_laddr *); void sctp_del_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *); -void sctp_set_initial_cc_param(struct sctp_tcb *, struct sctp_nets *net); - - int sctp_add_remote_addr(struct sctp_tcb *, struct sockaddr *, int, int); void sctp_remove_net(struct sctp_tcb *, struct sctp_nets *); diff --git a/sys/netinet/sctp_structs.h b/sys/netinet/sctp_structs.h index 327486096eba..f4d7835e85f4 100644 --- a/sys/netinet/sctp_structs.h +++ b/sys/netinet/sctp_structs.h @@ -144,13 +144,36 @@ struct sctp_asconf_iterator { int cnt; }; - struct sctp_net_route { sctp_rtentry_t *ro_rt; union sctp_sockstore _l_addr; /* remote peer addr */ struct sctp_ifa *_s_addr; /* our selected src addr */ }; +struct htcp { + uint16_t alpha; /* Fixed point arith, << 7 */ + uint8_t beta; /* Fixed point arith, << 7 */ + uint8_t modeswitch; /* Delay modeswitch until we had at least one + * congestion event */ + uint32_t last_cong; /* Time since last congestion event end */ + uint32_t undo_last_cong; + uint16_t bytes_acked; + uint32_t bytecount; + uint32_t minRTT; + uint32_t maxRTT; + + uint32_t undo_maxRTT; + uint32_t undo_old_maxB; + + /* Bandwidth estimation */ + uint32_t minB; + uint32_t maxB; + uint32_t old_maxB; + uint32_t Bi; + uint32_t lasttime; +}; + + struct sctp_nets { TAILQ_ENTRY(sctp_nets) sctp_next; /* next link */ @@ -198,6 +221,12 @@ struct sctp_nets { unsigned int net_ack2; /* + * JRS - 5/8/07 - Variable to track last time a destination was + * active for CMT PF + */ + uint32_t last_active; + + /* * CMT variables (iyengar@cis.udel.edu) */ uint32_t this_sack_highest_newack; /* tracks highest TSN newly @@ -263,9 +292,9 @@ struct sctp_nets { * rtx-pseudo-cumack has been received */ uint8_t window_probe; /* Doing a window probe? */ uint8_t RTO_measured; /* Have we done the first measure */ -#ifdef SCTP_HIGH_SPEED uint8_t last_hs_used; /* index into the last HS table entry we used */ -#endif + /* JRS - struct used in HTCP algorithm */ + struct htcp htcp_ca; }; @@ -462,7 +491,15 @@ struct sctp_tsn_log { uint16_t flgs; }; - +#define SCTP_FS_SPEC_LOG_SIZE 200 +struct sctp_fs_spec_log { + uint32_t sent; + uint32_t total_flight; + uint32_t tsn; + uint16_t book; + uint8_t incr; + uint8_t decr; +}; /* This struct is here to cut out the compatiabilty * pad that bulks up both the inp and stcb. The non @@ -483,6 +520,31 @@ struct sctp_nonpad_sndrcvinfo { }; /* + * JRS - Structure to hold function pointers to the functions responsible + * for congestion control. + */ + +struct sctp_cc_functions { + void (*sctp_set_initial_cc_param) (struct sctp_tcb *stcb, struct sctp_nets *net); + void (*sctp_cwnd_update_after_sack) (struct sctp_tcb *stcb, + struct sctp_association *asoc, + int accum_moved, int reneged_all, int will_exit); + void (*sctp_cwnd_update_after_fr) (struct sctp_tcb *stcb, + struct sctp_association *asoc); + void (*sctp_cwnd_update_after_timeout) (struct sctp_tcb *stcb, + struct sctp_nets *net); + void (*sctp_cwnd_update_after_ecn_echo) (struct sctp_tcb *stcb, + struct sctp_nets *net); + void (*sctp_cwnd_update_after_packet_dropped) (struct sctp_tcb *stcb, + struct sctp_nets *net, struct sctp_pktdrop_chunk *cp, + uint32_t * bottle_bw, uint32_t * on_queue); + void (*sctp_cwnd_update_after_output) (struct sctp_tcb *stcb, + struct sctp_nets *net, int burst_limit); + void (*sctp_cwnd_update_after_fr_timer) (struct sctp_inpcb *inp, + struct sctp_tcb *stcb, struct sctp_nets *net); +}; + +/* * Here we have information about each individual association that we track. * We probably in production would be more dynamic. But for ease of * implementation we will have a fixed array that we hunt for in a linear @@ -598,6 +660,14 @@ struct sctp_association { /* queue of chunks waiting to be sent into the local stack */ struct sctp_readhead pending_reply_queue; + /* JRS - the congestion control functions are in this struct */ + struct sctp_cc_functions cc_functions; + /* + * JRS - value to store the currently loaded congestion control + * module + */ + uint32_t congestion_control_module; + uint32_t vrf_id; uint32_t cookie_preserve_req; @@ -696,6 +766,11 @@ struct sctp_association { uint16_t tsn_in_wrapped; uint16_t tsn_out_wrapped; #endif /* SCTP_ASOCLOG_OF_TSNS */ +#ifdef SCTP_FS_SPEC_LOG + struct sctp_fs_spec_log fslog[SCTP_FS_SPEC_LOG_SIZE]; + uint16_t fs_index; +#endif + /* * window state information and smallest MTU that I use to bound * segmentation @@ -926,6 +1001,8 @@ struct sctp_association { uint8_t sctp_cmt_on_off; uint8_t iam_blocking; uint8_t cookie_how[8]; + /* JRS 5/21/07 - CMT PF variable */ + uint8_t sctp_cmt_pf; /* * The mapping array is used to track out of order sequences above * last_acked_seq. 0 indicates packet missing 1 indicates packet diff --git a/sys/netinet/sctp_sysctl.c b/sys/netinet/sctp_sysctl.c index 321a0bb06170..b3a6783995b4 100644 --- a/sys/netinet/sctp_sysctl.c +++ b/sys/netinet/sctp_sysctl.c @@ -93,8 +93,11 @@ uint32_t sctp_chunkscale = SCTP_CHUNKQUEUE_SCALE; uint32_t sctp_cmt_on_off = 0; uint32_t sctp_cmt_use_dac = 0; +uint32_t sctp_cmt_pf = 0; uint32_t sctp_max_retran_chunk = SCTPCTL_MAX_RETRAN_CHUNK_DEFAULT; +/* JRS - Variable for default congestion control module */ +uint32_t sctp_default_cc_module = SCTPCTL_DEFAULT_CC_MODULE_DEFAULT; uint32_t sctp_L2_abc_variable = 1; uint32_t sctp_early_fr = 0; @@ -613,6 +616,14 @@ SYSCTL_UINT(_net_inet_sctp, OID_AUTO, cmt_on_off, CTLFLAG_RW, &sctp_cmt_on_off, 0, "CMT ON/OFF flag"); +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, cmt_pf, CTLFLAG_RW, + &sctp_cmt_pf, 0, + "CMT PF type flag"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, default_cc_module, CTLFLAG_RW, + &sctp_default_cc_module, 0, + "Default congestion control module"); + SYSCTL_UINT(_net_inet_sctp, OID_AUTO, cwnd_maxburst, CTLFLAG_RW, &sctp_use_cwnd_based_maxburst, 0, "Use a CWND adjusting maxburst"); diff --git a/sys/netinet/sctp_sysctl.h b/sys/netinet/sctp_sysctl.h index 83da0a992ed5..59e092267bc2 100644 --- a/sys/netinet/sctp_sysctl.h +++ b/sys/netinet/sctp_sysctl.h @@ -399,18 +399,32 @@ __FBSDID("$FreeBSD$"); #define SCTPCTL_LOGGING_LEVEL_MAX 0xffffffff #define SCTPCTL_LOGGING_LEVEL_DEFAULT 0 +/* JRS 5/2107 - CMT PF type flag */ +#define SCTPCTL_CMT_PF 52 +#define SCTPCTL_CMT_PF_DESC "CMT PF type flag" +#define SCTPCTL_CMT_PF_MIN 0 +#define SCTPCTL_CMT_PF_MAX 2 +#define SCTPCTL_CMT_PF_DEFAULT 0 + +/* JRS - default congestion control module sysctl */ +#define SCTPCTL_DEFAULT_CC_MODULE 53 +#define SCTPCTL_DEFAULT_CC_MODULE_DESC "Default congestion control module" +#define SCTPCTL_DEFAULT_CC_MODULE_MIN 0 +#define SCTPCTL_DEFAULT_CC_MODULE_MAX 2 +#define SCTPCTL_DEFAULT_CC_MODULE_DEFAULT 0 + #ifdef SCTP_DEBUG /* debug: Configure debug output */ -#define SCTPCTL_DEBUG 52 +#define SCTPCTL_DEBUG 54 #define SCTPCTL_DEBUG_DESC "Configure debug output" #define SCTPCTL_DEBUG_MIN 0 #define SCTPCTL_DEBUG_MAX 0xFFFFFFFF #define SCTPCTL_DEBUG_DEFAULT 0 -#define SCTPCTL_MAXID 52 +#define SCTPCTL_MAXID 54 #else -#define SCTPCTL_MAXID 51 +#define SCTPCTL_MAXID 55 #endif /* @@ -447,6 +461,8 @@ __FBSDID("$FreeBSD$"); { "path_rtx_max", CTLTYPE_INT }, \ { "outgoing_streams", CTLTYPE_INT }, \ { "cmt_on_off", CTLTYPE_INT }, \ + { "cmt_on_pf", CTLTYPE_INT }, \ + { "default_cc_module", CTLTYPE_INT }, \ { "cwnd_maxburst", CTLTYPE_INT }, \ { "early_fast_retran", CTLTYPE_INT }, \ { "deadlock_detect", CTLTYPE_INT }, \ @@ -503,6 +519,8 @@ __FBSDID("$FreeBSD$"); { "path_rtx_max", CTLTYPE_INT }, \ { "outgoing_streams", CTLTYPE_INT }, \ { "cmt_on_off", CTLTYPE_INT }, \ + { "cmt_on_pf", CTLTYPE_INT }, \ + { "default_cc_module", CTLTYPE_INT }, \ { "cwnd_maxburst", CTLTYPE_INT }, \ { "early_fast_retran", CTLTYPE_INT }, \ { "deadlock_detect", CTLTYPE_INT }, \ @@ -570,6 +588,12 @@ extern uint32_t sctp_path_rtx_max_default; extern uint32_t sctp_add_more_threshold; extern uint32_t sctp_nr_outgoing_streams_default; extern uint32_t sctp_cmt_on_off; + +/* JRS 5/21/07 - CMT PF type flag variables */ +extern uint32_t sctp_cmt_pf; + +/* JRS - Variable for the default congestion control module */ +extern uint32_t sctp_default_cc_module; extern uint32_t sctp_use_cwnd_based_maxburst; extern uint32_t sctp_early_fr; extern uint32_t sctp_use_rttvar_cc; diff --git a/sys/netinet/sctp_timer.c b/sys/netinet/sctp_timer.c index 380c357bad77..f25e10204898 100644 --- a/sys/netinet/sctp_timer.c +++ b/sys/netinet/sctp_timer.c @@ -137,23 +137,11 @@ sctp_early_fr_timer(struct sctp_inpcb *inp, } } if (cnt) { - int old_cwnd; - - old_cwnd = net->cwnd; - sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR); /* - * make a small adjustment to cwnd and force to CA. + * JRS - Use the congestion control given in the congestion + * control module */ - - if (net->cwnd > net->mtu) - /* drop down one MTU after sending */ - net->cwnd -= net->mtu; - if (net->cwnd < net->ssthresh) - /* still in SS move to CA */ - net->ssthresh = net->cwnd - 1; - if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); - } + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer(inp, stcb, net); } else if (cnt_resend) { sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR); } @@ -208,6 +196,26 @@ sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb, if (net == stcb->asoc.primary_destination) { net->dest_state |= SCTP_ADDR_WAS_PRIMARY; } + /* + * JRS 5/14/07 - If a destination is + * unreachable, the PF bit is turned off. + * This allows an unambiguous use of the PF + * bit for destinations that are reachable + * but potentially failed. If the + * destination is set to the unreachable + * state, also set the destination to the PF + * state. + */ + /* + * Add debug message here if destination is + * not in PF state. + */ + /* Stop any running T3 timers here? */ + if (sctp_cmt_pf) { + net->dest_state &= ~SCTP_ADDR_PF; + SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to unreachable.\n", + net); + } sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, SCTP_FAILED_THRESHOLD, @@ -267,18 +275,151 @@ sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets * sctp_find_alternate_net(struct sctp_tcb *stcb, struct sctp_nets *net, - int highest_ssthresh) + int mode) { /* Find and return an alternate network if possible */ - struct sctp_nets *alt, *mnet, *hthresh = NULL; + struct sctp_nets *alt, *mnet, *min_errors_net = NULL, *max_cwnd_net = NULL; int once; - uint32_t val = 0; + + /* JRS 5/14/07 - Initialize min_errors to an impossible value. */ + int min_errors = -1; + uint32_t max_cwnd = 0; if (stcb->asoc.numnets == 1) { /* No others but net */ return (TAILQ_FIRST(&stcb->asoc.nets)); } - if (highest_ssthresh) { + /* + * JRS 5/14/07 - If mode is set to 2, use the CMT PF find alternate + * net algorithm. This algorithm chooses the active destination (not + * in PF state) with the largest cwnd value. If all destinations are + * in PF state, unreachable, or unconfirmed, choose the desination + * that is in PF state with the lowest error count. In case of a + * tie, choose the destination that was most recently active. + */ + if (mode == 2) { + TAILQ_FOREACH(mnet, &stcb->asoc.nets, sctp_next) { + /* + * JRS 5/14/07 - If the destination is unreachable + * or unconfirmed, skip it. + */ + if (((mnet->dest_state & SCTP_ADDR_REACHABLE) != SCTP_ADDR_REACHABLE) || + (mnet->dest_state & SCTP_ADDR_UNCONFIRMED)) { + continue; + } + /* + * JRS 5/14/07 - If the destination is reachable + * but in PF state, compare the error count of the + * destination to the minimum error count seen thus + * far. Store the destination with the lower error + * count. If the error counts are equal, store the + * destination that was most recently active. + */ + if (mnet->dest_state & SCTP_ADDR_PF) { + /* + * JRS 5/14/07 - If the destination under + * consideration is the current destination, + * work as if the error count is one higher. + * The actual error count will not be + * incremented until later in the t3 + * handler. + */ + if (mnet == net) { + if (min_errors == -1) { + min_errors = mnet->error_count + 1; + min_errors_net = mnet; + } else if (mnet->error_count + 1 < min_errors) { + min_errors = mnet->error_count + 1; + min_errors_net = mnet; + } else if (mnet->error_count + 1 == min_errors + && mnet->last_active > min_errors_net->last_active) { + min_errors_net = mnet; + min_errors = mnet->error_count + 1; + } + continue; + } else { + if (min_errors == -1) { + min_errors = mnet->error_count; + min_errors_net = mnet; + } else if (mnet->error_count < min_errors) { + min_errors = mnet->error_count; + min_errors_net = mnet; + } else if (mnet->error_count == min_errors + && mnet->last_active > min_errors_net->last_active) { + min_errors_net = mnet; + min_errors = mnet->error_count; + } + continue; + } + } + /* + * JRS 5/14/07 - If the destination is reachable and + * not in PF state, compare the cwnd of the + * destination to the highest cwnd seen thus far. + * Store the destination with the higher cwnd value. + * If the cwnd values are equal, randomly choose one + * of the two destinations. + */ + if (max_cwnd < mnet->cwnd) { + max_cwnd_net = mnet; + max_cwnd = mnet->cwnd; + } else if (max_cwnd == mnet->cwnd) { + uint32_t rndval; + uint8_t this_random; + + if (stcb->asoc.hb_random_idx > 3) { + rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep); + memcpy(stcb->asoc.hb_random_values, &rndval, sizeof(stcb->asoc.hb_random_values)); + this_random = stcb->asoc.hb_random_values[0]; + stcb->asoc.hb_random_idx++; + stcb->asoc.hb_ect_randombit = 0; + } else { + this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx]; + stcb->asoc.hb_random_idx++; + stcb->asoc.hb_ect_randombit = 0; + } + if (this_random % 2 == 1) { + max_cwnd_net = mnet; + max_cwnd = mnet->cwnd; + //Useless ? + } + } + } + /* + * JRS 5/14/07 - After all destination have been considered + * as alternates, check to see if there was some active + * destination (not in PF state). If not, check to see if + * there was some PF destination with the minimum number of + * errors. If not, return the original destination. If + * there is a min_errors_net, remove the PF flag from that + * destination, set the cwnd to one or two MTUs, and return + * the destination as an alt. If there was some active + * destination with a highest cwnd, return the destination + * as an alt. + */ + if (max_cwnd_net == NULL) { + if (min_errors_net == NULL) { + return (net); + } + min_errors_net->dest_state &= ~SCTP_ADDR_PF; + min_errors_net->cwnd = min_errors_net->mtu * sctp_cmt_pf; + if (SCTP_OS_TIMER_PENDING(&min_errors_net->rxt_timer.timer)) { + sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, + stcb, min_errors_net, + SCTP_FROM_SCTP_TIMER + SCTP_LOC_2); + } + SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to active with %d errors.\n", + min_errors_net, min_errors_net->error_count); + return (min_errors_net); + } else { + return (max_cwnd_net); + } + } + /* + * JRS 5/14/07 - If mode is set to 1, use the CMT policy for + * choosing an alternate net. + */ + else if (mode == 1) { TAILQ_FOREACH(mnet, &stcb->asoc.nets, sctp_next) { if (((mnet->dest_state & SCTP_ADDR_REACHABLE) != SCTP_ADDR_REACHABLE) || (mnet->dest_state & SCTP_ADDR_UNCONFIRMED) @@ -289,10 +430,10 @@ sctp_find_alternate_net(struct sctp_tcb *stcb, */ continue; } - if (val < mnet->ssthresh) { - hthresh = mnet; - val = mnet->ssthresh; - } else if (val == mnet->ssthresh) { + if (max_cwnd < mnet->cwnd) { + max_cwnd_net = mnet; + max_cwnd = mnet->cwnd; + } else if (max_cwnd == mnet->cwnd) { uint32_t rndval; uint8_t this_random; @@ -309,13 +450,13 @@ sctp_find_alternate_net(struct sctp_tcb *stcb, stcb->asoc.hb_ect_randombit = 0; } if (this_random % 2) { - hthresh = mnet; - val = mnet->ssthresh; + max_cwnd_net = mnet; + max_cwnd = mnet->cwnd; } } } - if (hthresh) { - return (hthresh); + if (max_cwnd_net) { + return (max_cwnd_net); } } mnet = net; @@ -382,6 +523,8 @@ sctp_find_alternate_net(struct sctp_tcb *stcb, return (alt); } + + static void sctp_backoff_on_timeout(struct sctp_tcb *stcb, struct sctp_nets *net, @@ -397,20 +540,8 @@ sctp_backoff_on_timeout(struct sctp_tcb *stcb, } if ((win_probe == 0) && num_marked) { /* We don't apply penalty to window probe scenarios */ - int old_cwnd = net->cwnd; - - net->ssthresh = net->cwnd >> 1; - if (net->ssthresh < (net->mtu << 1)) { - net->ssthresh = (net->mtu << 1); - } - net->cwnd = net->mtu; - /* floor of 1 mtu */ - if (net->cwnd < net->mtu) - net->cwnd = net->mtu; - if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); - } - net->partial_bytes_acked = 0; + /* JRS - Use the congestion control given in the CC module */ + stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout(stcb, net); } } @@ -437,22 +568,7 @@ sctp_mark_all_for_resend(struct sctp_tcb *stcb, uint32_t orig_flight, orig_tf; uint32_t tsnlast, tsnfirst; - /* - * CMT: Using RTX_SSTHRESH policy for CMT. If CMT is being used, - * then pick dest with largest ssthresh for any retransmission. - * (iyengar@cis.udel.edu, 2005/08/12) - */ - if (sctp_cmt_on_off) { - alt = sctp_find_alternate_net(stcb, net, 1); - /* - * CUCv2: If a different dest is picked for the - * retransmission, then new (rtx-)pseudo_cumack needs to be - * tracked for orig dest. Let CUCv2 track new (rtx-) - * pseudo-cumack always. - */ - net->find_pseudo_cumack = 1; - net->find_rtx_pseudo_cumack = 1; - } + /* none in flight now */ audit_tf = 0; fir = 0; @@ -625,6 +741,7 @@ sctp_mark_all_for_resend(struct sctp_tcb *stcb, SCTP_STAT_INCR(sctps_markedretrans); /* reset the TSN for striking and other FR stuff */ + chk->window_probe = 0; chk->rec.data.doing_fast_retransmit = 0; /* Clear any time so NO RTT is being done */ chk->do_rtt = 0; @@ -820,7 +937,22 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp, win_probe = 0; } - if (sctp_cmt_on_off) { + /* + * JRS 5/14/07 - If CMT PF is on and the destination if not already + * in PF state, set the destination to PF state and store the + * current time as the time that the destination was last active. In + * addition, find an alternate destination with PF-based + * find_alt_net(). + */ + if (sctp_cmt_pf) { + if ((net->dest_state & SCTP_ADDR_PF) != SCTP_ADDR_PF) { + net->dest_state |= SCTP_ADDR_PF; + net->last_active = ticks; + SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from active to PF.\n", + net); + } + alt = sctp_find_alternate_net(stcb, net, 2); + } else if (sctp_cmt_on_off) { /* * CMT: Using RTX_SSTHRESH policy for CMT. If CMT is being * used, then pick dest with largest ssthresh for any @@ -838,7 +970,6 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp, net->find_rtx_pseudo_cumack = 1; } else { /* CMT is OFF */ - alt = sctp_find_alternate_net(stcb, net, 0); } @@ -880,7 +1011,8 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp, * no recent feed back in an RTO or * more, request a RTT update */ - (void)sctp_send_hb(stcb, 1, net); + if (sctp_send_hb(stcb, 1, net) < 0) + return 1; } } } @@ -931,6 +1063,14 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp, net->dest_state |= SCTP_ADDR_WAS_PRIMARY; } } + } else if (sctp_cmt_pf && (net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF) { + /* + * JRS 5/14/07 - If the destination hasn't failed completely + * but is in PF state, a PF-heartbeat needs to be sent + * manually. + */ + if (sctp_send_hb(stcb, 1, net) < 0) + return 1; } /* * Special case for cookie-echo'ed case, we don't do output but must @@ -1048,9 +1188,9 @@ sctp_cookie_timer(struct sctp_inpcb *inp, ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION); ph->param_length = htons(SCTP_BUF_LEN(oper)); ippp = (uint32_t *) (ph + 1); - *ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_2); + *ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_3); } - inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_3; + inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_4; sctp_abort_an_association(inp, stcb, SCTP_INTERNAL_ERROR, oper); } else { @@ -1375,6 +1515,8 @@ int sctp_heartbeat_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net, int cnt_of_unconf) { + int ret; + if (net) { if (net->hb_responded == 0) { if (net->ro._s_addr) { @@ -1422,7 +1564,10 @@ sctp_heartbeat_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, net->src_addr_selected = 0; } } - if (sctp_send_hb(stcb, 1, net) == 0) { + ret = sctp_send_hb(stcb, 1, net); + if (ret < 0) + return 1; + else if (ret == 0) { break; } if (cnt_sent >= sctp_hb_maxburst) diff --git a/sys/netinet/sctp_timer.h b/sys/netinet/sctp_timer.h index e28408453bc8..425d748376b6 100644 --- a/sys/netinet/sctp_timer.h +++ b/sys/netinet/sctp_timer.h @@ -46,7 +46,7 @@ sctp_early_fr_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets * sctp_find_alternate_net(struct sctp_tcb *, - struct sctp_nets *, int high_ssthresh); + struct sctp_nets *, int mode); int sctp_threshold_management(struct sctp_inpcb *, struct sctp_tcb *, diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c index a62497ca547d..6461c9150d0d 100644 --- a/sys/netinet/sctp_usrreq.c +++ b/sys/netinet/sctp_usrreq.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_timer.h> #include <netinet/sctp_auth.h> #include <netinet/sctp_bsd_addr.h> +#include <netinet/sctp_cc_functions.h> @@ -61,6 +62,7 @@ sctp_init(void) sctp_pcb_init(); + if ((nmbclusters / 8) > SCTP_ASOC_MAX_CHUNKS_ON_QUEUE) sctp_max_chunks_on_queue = (nmbclusters / 8); /* @@ -77,7 +79,6 @@ sctp_init(void) */ sctp_recvspace = sctp_sendspace; - } @@ -261,6 +262,26 @@ sctp_notify(struct sctp_inpcb *inp, net->dest_state &= ~SCTP_ADDR_REACHABLE; net->dest_state |= SCTP_ADDR_NOT_REACHABLE; + /* + * JRS 5/14/07 - If a destination is + * unreachable, the PF bit is turned off. + * This allows an unambiguous use of the PF + * bit for destinations that are reachable + * but potentially failed. If the + * destination is set to the unreachable + * state, also set the destination to the PF + * state. + */ + /* + * Add debug message here if destination is + * not in PF state. + */ + /* Stop any running T3 timers here? */ + if (sctp_cmt_pf) { + net->dest_state &= ~SCTP_ADDR_PF; + SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to unreachable.\n", + net); + } net->error_count = net->failure_threshold + 1; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, SCTP_FAILED_THRESHOLD, @@ -487,6 +508,7 @@ sctp_attach(struct socket *so, int proto, struct thread *p) #ifdef IPSEC uint32_t flags; + #endif inp = (struct sctp_inpcb *)so->so_pcb; if (inp != 0) { @@ -1537,6 +1559,22 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, *optsize = sizeof(*av); } break; + /* JRS - Get socket option for pluggable congestion control */ + case SCTP_PLUGGABLE_CC: + { + struct sctp_assoc_value *av; + + SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); + SCTP_FIND_STCB(inp, stcb, av->assoc_id); + if (stcb) { + av->assoc_value = stcb->asoc.congestion_control_module; + SCTP_TCB_UNLOCK(stcb); + } else { + av->assoc_value = inp->sctp_ep.sctp_default_cc_module; + } + *optsize = sizeof(*av); + } + break; case SCTP_GET_ADDR_LEN: { struct sctp_assoc_value *av; @@ -2512,6 +2550,91 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } } break; + /* JRS - Set socket option for pluggable congestion control */ + case SCTP_PLUGGABLE_CC: + { + struct sctp_assoc_value *av; + + SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); + SCTP_FIND_STCB(inp, stcb, av->assoc_id); + if (stcb) { + switch (av->assoc_value) { + /* + * JRS - Standard TCP congestion + * control + */ + case SCTP_CC_RFC2581: + { + stcb->asoc.congestion_control_module = SCTP_CC_RFC2581; + stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param; + stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack; + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr; + stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout; + stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo; + stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped; + stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output; + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer; + SCTP_TCB_UNLOCK(stcb); + break; + } + /* + * JRS - High Speed TCP congestion + * control (Floyd) + */ + case SCTP_CC_HSTCP: + { + stcb->asoc.congestion_control_module = SCTP_CC_HSTCP; + stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param; + stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_hs_cwnd_update_after_sack; + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_hs_cwnd_update_after_fr; + stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout; + stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo; + stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped; + stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output; + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer; + SCTP_TCB_UNLOCK(stcb); + break; + } + /* JRS - HTCP congestion control */ + case SCTP_CC_HTCP: + { + stcb->asoc.congestion_control_module = SCTP_CC_HTCP; + stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_htcp_set_initial_cc_param; + stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_htcp_cwnd_update_after_sack; + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_htcp_cwnd_update_after_fr; + stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_htcp_cwnd_update_after_timeout; + stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_htcp_cwnd_update_after_ecn_echo; + stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped; + stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output; + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_htcp_cwnd_update_after_fr_timer; + SCTP_TCB_UNLOCK(stcb); + break; + } + /* + * JRS - All other values are + * invalid + */ + default: + { + error = EINVAL; + SCTP_TCB_UNLOCK(stcb); + break; + } + } + } else { + switch (av->assoc_value) { + case SCTP_CC_RFC2581: + case SCTP_CC_HSTCP: + case SCTP_CC_HTCP: + inp->sctp_ep.sctp_default_cc_module = av->assoc_value; + break; + default: + error = EINVAL; + break; + }; + } + } + break; case SCTP_CLR_STAT_LOG: error = EOPNOTSUPP; break; @@ -3123,7 +3246,11 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, /************************NET SPECIFIC SET ******************/ if (paddrp->spp_flags & SPP_HB_DEMAND) { /* on demand HB */ - (void)sctp_send_hb(stcb, 1, net); + if (sctp_send_hb(stcb, 1, net) < 0) { + /* asoc destroyed */ + error = EINVAL; + break; + } } if (paddrp->spp_flags & SPP_HB_DISABLE) { net->dest_state |= SCTP_ADDR_NOHB; diff --git a/sys/netinet/sctp_var.h b/sys/netinet/sctp_var.h index f05a36803f29..b6e1d0656669 100644 --- a/sys/netinet/sctp_var.h +++ b/sys/netinet/sctp_var.h @@ -90,6 +90,7 @@ extern struct pr_usrreqs sctp_usrreqs; #define sctp_free_a_chunk(_stcb, _chk) { \ + SCTP_TCB_LOCK_ASSERT((_stcb)); \ if ((_chk)->whoTo) { \ sctp_free_remote_addr((_chk)->whoTo); \ (_chk)->whoTo = NULL; \ @@ -223,7 +224,6 @@ extern struct pr_usrreqs sctp_usrreqs; } \ } while (0) -#ifdef RANDY_WILL_USE_LATER /* this will be the non-invarant version */ #define sctp_flight_size_decrease(tp1) do { \ if (tp1->whoTo->flight_size >= tp1->book_size) \ tp1->whoTo->flight_size -= tp1->book_size; \ @@ -231,8 +231,21 @@ extern struct pr_usrreqs sctp_usrreqs; tp1->whoTo->flight_size = 0; \ } while (0) +#define sctp_flight_size_increase(tp1) do { \ + (tp1)->whoTo->flight_size += (tp1)->book_size; \ +} while (0) +#ifdef SCTP_FS_SPEC_LOG #define sctp_total_flight_decrease(stcb, tp1) do { \ + if(stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \ + stcb->asoc.fs_index = 0;\ + stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \ + stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.TSN_seq; \ + stcb->asoc.fslog[stcb->asoc.fs_index].book = tp1->book_size; \ + stcb->asoc.fslog[stcb->asoc.fs_index].sent = tp1->sent; \ + stcb->asoc.fslog[stcb->asoc.fs_index].incr = 0; \ + stcb->asoc.fslog[stcb->asoc.fs_index].decr = 1; \ + stcb->asoc.fs_index++; \ if (stcb->asoc.total_flight >= tp1->book_size) { \ stcb->asoc.total_flight -= tp1->book_size; \ if (stcb->asoc.total_flight_count > 0) \ @@ -243,15 +256,21 @@ extern struct pr_usrreqs sctp_usrreqs; } \ } while (0) -#else - -#define sctp_flight_size_decrease(tp1) do { \ - if (tp1->whoTo->flight_size >= tp1->book_size) \ - tp1->whoTo->flight_size -= tp1->book_size; \ - else \ - panic("flight size corruption"); \ +#define sctp_total_flight_increase(stcb, tp1) do { \ + if(stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \ + stcb->asoc.fs_index = 0;\ + stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \ + stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.TSN_seq; \ + stcb->asoc.fslog[stcb->asoc.fs_index].book = tp1->book_size; \ + stcb->asoc.fslog[stcb->asoc.fs_index].sent = tp1->sent; \ + stcb->asoc.fslog[stcb->asoc.fs_index].incr = 1; \ + stcb->asoc.fslog[stcb->asoc.fs_index].decr = 0; \ + stcb->asoc.fs_index++; \ + (stcb)->asoc.total_flight_count++; \ + (stcb)->asoc.total_flight += (tp1)->book_size; \ } while (0) +#else #define sctp_total_flight_decrease(stcb, tp1) do { \ if (stcb->asoc.total_flight >= tp1->book_size) { \ @@ -259,22 +278,18 @@ extern struct pr_usrreqs sctp_usrreqs; if (stcb->asoc.total_flight_count > 0) \ stcb->asoc.total_flight_count--; \ } else { \ - panic("total flight size corruption"); \ + stcb->asoc.total_flight = 0; \ + stcb->asoc.total_flight_count = 0; \ } \ } while (0) -#endif - -#define sctp_flight_size_increase(tp1) do { \ - (tp1)->whoTo->flight_size += (tp1)->book_size; \ -} while (0) - - #define sctp_total_flight_increase(stcb, tp1) do { \ (stcb)->asoc.total_flight_count++; \ (stcb)->asoc.total_flight += (tp1)->book_size; \ } while (0) +#endif + struct sctp_nets; struct sctp_inpcb; struct sctp_tcb; diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c index 319bbcc32f8f..4fa0d4591e7d 100644 --- a/sys/netinet/sctputil.c +++ b/sys/netinet/sctputil.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_indata.h>/* for sctp_deliver_data() */ #include <netinet/sctp_auth.h> #include <netinet/sctp_asconf.h> +#include <netinet/sctp_cc_functions.h> #define NUMBER_OF_MTU_SIZES 18 @@ -854,7 +855,6 @@ sctp_select_a_tag(struct sctp_inpcb *m) return (x); } - int sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb, int for_a_init, uint32_t override_tag, uint32_t vrf_id) @@ -881,6 +881,8 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb, asoc->heart_beat_delay = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]); asoc->cookie_life = m->sctp_ep.def_cookie_life; asoc->sctp_cmt_on_off = (uint8_t) sctp_cmt_on_off; + /* JRS 5/21/07 - Init CMT PF variables */ + asoc->sctp_cmt_pf = (uint8_t) sctp_cmt_pf; asoc->sctp_frag_point = m->sctp_frag_point; #ifdef INET asoc->default_tos = m->ip_inp.inp.inp_ip_tos; @@ -1010,6 +1012,69 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb, asoc->sctp_autoclose_ticks = m->sctp_ep.auto_close_time; /* + * JRS - Pick the default congestion control module based on the + * sysctl. + */ + switch (m->sctp_ep.sctp_default_cc_module) { + /* JRS - Standard TCP congestion control */ + case SCTP_CC_RFC2581: + { + stcb->asoc.congestion_control_module = SCTP_CC_RFC2581; + stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param; + stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack; + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr; + stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout; + stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo; + stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped; + stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output; + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer; + break; + } + /* JRS - High Speed TCP congestion control (Floyd) */ + case SCTP_CC_HSTCP: + { + stcb->asoc.congestion_control_module = SCTP_CC_HSTCP; + stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param; + stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_hs_cwnd_update_after_sack; + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_hs_cwnd_update_after_fr; + stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout; + stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo; + stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped; + stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output; + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer; + break; + } + /* JRS - HTCP congestion control */ + case SCTP_CC_HTCP: + { + stcb->asoc.congestion_control_module = SCTP_CC_HTCP; + stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_htcp_set_initial_cc_param; + stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_htcp_cwnd_update_after_sack; + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_htcp_cwnd_update_after_fr; + stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_htcp_cwnd_update_after_timeout; + stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_htcp_cwnd_update_after_ecn_echo; + stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped; + stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output; + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_htcp_cwnd_update_after_fr_timer; + break; + } + /* JRS - By default, use RFC2581 */ + default: + { + stcb->asoc.congestion_control_module = SCTP_CC_RFC2581; + stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param; + stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack; + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr; + stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout; + stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo; + stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped; + stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output; + stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer; + break; + } + } + + /* * Now the stream parameters, here we allocate space for all streams * that we request by default. */ @@ -1309,6 +1374,9 @@ sctp_handle_addr_wq(void) } +int retcode = 0; +int cur_oerr = 0; + void sctp_timeout_handler(void *t) { @@ -1397,6 +1465,15 @@ sctp_timeout_handler(void *t) if (stcb) { SCTP_TCB_LOCK(stcb); atomic_add_int(&stcb->asoc.refcnt, -1); + if ((tmr->type != SCTP_TIMER_TYPE_ASOCKILL) && + ((stcb->asoc.state == 0) || + (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED))) { + SCTP_TCB_UNLOCK(stcb); + if (inp) { + SCTP_INP_DECR_REF(inp); + } + return; + } } /* record in stopped what t-o occured */ tmr->stopped_from = tmr->type; @@ -1451,11 +1528,15 @@ sctp_timeout_handler(void *t) if (stcb->asoc.num_send_timers_up < 0) { stcb->asoc.num_send_timers_up = 0; } - if (sctp_t3rxt_timer(inp, stcb, net)) { + SCTP_TCB_LOCK_ASSERT(stcb); + cur_oerr = stcb->asoc.overall_error_count; + retcode = sctp_t3rxt_timer(inp, stcb, net); + if (retcode) { /* no need to unlock on tcb its gone */ goto out_decr; } + SCTP_TCB_LOCK_ASSERT(stcb); #ifdef SCTP_AUDITING_ENABLED sctp_auditing(4, inp, stcb, net); #endif diff --git a/sys/netinet6/sctp6_usrreq.c b/sys/netinet6/sctp6_usrreq.c index 0065244de1e0..9e93755a65f4 100644 --- a/sys/netinet6/sctp6_usrreq.c +++ b/sys/netinet6/sctp6_usrreq.c @@ -56,8 +56,8 @@ __FBSDID("$FreeBSD$"); #include <netipsec/ipsec.h> #if defined(INET6) #include <netipsec/ipsec6.h> -#endif /* INET6 */ -#endif /* IPSEC */ +#endif /* INET6 */ +#endif /* IPSEC */ extern struct protosw inetsw[]; @@ -210,7 +210,7 @@ sctp_skip_csum: ipsec6stat.in_polvio++; goto bad; } -#endif /* IPSEC */ +#endif /* IPSEC */ /* * CONTROL chunk processing |