aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet
diff options
context:
space:
mode:
authorAlexander V. Chernikov <melifaro@FreeBSD.org>2020-10-18 17:15:47 +0000
committerAlexander V. Chernikov <melifaro@FreeBSD.org>2020-10-18 17:15:47 +0000
commit0c325f53f16731f608919a4489f96fbbe28d2344 (patch)
tree47edfa6af78113b91e7dad7f002817c6e110b01f /sys/netinet
parent186bcdaac7c70e59eb04412ad402a6fb57b97d11 (diff)
downloadsrc-0c325f53f16731f608919a4489f96fbbe28d2344.tar.gz
src-0c325f53f16731f608919a4489f96fbbe28d2344.zip
Implement flowid calculation for outbound connections to balance
connections over multiple paths. Multipath routing relies on mbuf flowid data for both transit and outbound traffic. Current code fills mbuf flowid from inp_flowid for connection-oriented sockets. However, inp_flowid is currently not calculated for outbound connections. This change creates simple hashing functions and starts calculating hashes for TCP,UDP/UDP-Lite and raw IP if multipath routes are present in the system. Reviewed by: glebius (previous version),ae Differential Revision: https://reviews.freebsd.org/D26523
Notes
Notes: svn path=/head/; revision=366813
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/in_fib.c35
-rw-r--r--sys/netinet/in_fib.h3
-rw-r--r--sys/netinet/in_pcb.c11
-rw-r--r--sys/netinet/in_rss.c42
-rw-r--r--sys/netinet/in_rss.h2
-rw-r--r--sys/netinet/raw_ip.c25
-rw-r--r--sys/netinet/udp_usrreq.c32
7 files changed, 126 insertions, 24 deletions
diff --git a/sys/netinet/in_fib.c b/sys/netinet/in_fib.c
index 4c84de2c7281..031277add777 100644
--- a/sys/netinet/in_fib.c
+++ b/sys/netinet/in_fib.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <net/route/route_ctl.h>
#include <net/route/route_var.h>
#include <net/route/nhop.h>
+#include <net/toeplitz.h>
#include <net/vnet.h>
#include <netinet/in.h>
@@ -62,6 +63,40 @@ __FBSDID("$FreeBSD$");
/* Assert 'struct route_in' is compatible with 'struct route' */
CHK_STRUCT_ROUTE_COMPAT(struct route_in, ro_dst4);
+#ifdef ROUTE_MPATH
+struct _hash_5tuple_ipv4 {
+ struct in_addr src;
+ struct in_addr dst;
+ unsigned short src_port;
+ unsigned short dst_port;
+ char proto;
+ char spare[3];
+};
+_Static_assert(sizeof(struct _hash_5tuple_ipv4) == 16,
+ "_hash_5tuple_ipv4 size is wrong");
+
+
+uint32_t
+fib4_calc_software_hash(struct in_addr src, struct in_addr dst,
+ unsigned short src_port, unsigned short dst_port, char proto,
+ uint32_t *phashtype)
+{
+ struct _hash_5tuple_ipv4 data;
+
+ data.src = src;
+ data.dst = dst;
+ data.src_port = src_port;
+ data.dst_port = dst_port;
+ data.proto = proto;
+ data.spare[0] = data.spare[1] = data.spare[2] = 0;
+
+ *phashtype = M_HASHTYPE_OPAQUE;
+
+ return (toeplitz_hash(MPATH_ENTROPY_KEY_LEN, mpath_entropy_key,
+ sizeof(data), (uint8_t *)&data));
+}
+#endif
+
/*
* Looks up path in fib @fibnum specified by @dst.
* Returns path nexthop on success. Nexthop is safe to use
diff --git a/sys/netinet/in_fib.h b/sys/netinet/in_fib.h
index bc2a2ad6a30e..0d93086bc116 100644
--- a/sys/netinet/in_fib.h
+++ b/sys/netinet/in_fib.h
@@ -51,4 +51,7 @@ int fib4_check_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
uint32_t flags, const struct ifnet *src_if);
struct nhop_object *fib4_lookup_debugnet(uint32_t fibnum, struct in_addr dst,
uint32_t scopeid, uint32_t flags);
+uint32_t fib4_calc_software_hash(struct in_addr src, struct in_addr dst,
+ unsigned short src_port, unsigned short dst_port, char proto,
+ uint32_t *phashtype);
#endif
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 30bf72f83e33..37438dd68c71 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet6.h"
#include "opt_ratelimit.h"
#include "opt_pcbgroup.h"
+#include "opt_route.h"
#include "opt_rss.h"
#include <sys/param.h>
@@ -1327,7 +1328,17 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
lport = *lportp;
faddr = sin->sin_addr;
fport = sin->sin_port;
+#ifdef ROUTE_MPATH
+ if (CALC_FLOWID_OUTBOUND) {
+ uint32_t hash_val, hash_type;
+ hash_val = fib4_calc_software_hash(laddr, faddr, 0, fport,
+ inp->inp_socket->so_proto->pr_protocol, &hash_type);
+
+ inp->inp_flowid = hash_val;
+ inp->inp_flowtype = hash_type;
+ }
+#endif
if (!CK_STAILQ_EMPTY(&V_in_ifaddrhead)) {
/*
* If the destination address is INADDR_ANY,
diff --git a/sys/netinet/in_rss.c b/sys/netinet/in_rss.c
index f3184175a7cf..05659b97fe7c 100644
--- a/sys/netinet/in_rss.c
+++ b/sys/netinet/in_rss.c
@@ -152,6 +152,48 @@ rss_proto_software_hash_v4(struct in_addr s, struct in_addr d,
}
/*
+ * Calculate an appropriate ipv4 2-tuple or 4-tuple given the given
+ * IPv4 source/destination address, UDP or TCP source/destination ports
+ * and the protocol type.
+ *
+ * The protocol code may wish to do a software hash of the given
+ * tuple. This depends upon the currently configured RSS hash types.
+ *
+ * It assumes the packet source/destination address
+ * are in "outgoing" packet order (ie, destination is "far" address.)
+ */
+uint32_t
+xps_proto_software_hash_v4(struct in_addr s, struct in_addr d,
+ u_short sp, u_short dp, int proto, uint32_t *hashtype)
+{
+ uint32_t hash;
+
+ /*
+ * Next, choose the hash type depending upon the protocol
+ * identifier.
+ */
+ if ((proto == IPPROTO_TCP) &&
+ (rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
+ hash = rss_hash_ip4_4tuple(d, dp, s, sp);
+ *hashtype = M_HASHTYPE_RSS_TCP_IPV4;
+ return (hash);
+ } else if ((proto == IPPROTO_UDP) &&
+ (rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
+ hash = rss_hash_ip4_4tuple(d, dp, s, sp);
+ *hashtype = M_HASHTYPE_RSS_UDP_IPV4;
+ return (hash);
+ } else if (rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV4) {
+ /* RSS doesn't hash on other protocols like SCTP; so 2-tuple */
+ hash = rss_hash_ip4_2tuple(d, s);
+ *hashtype = M_HASHTYPE_RSS_IPV4;
+ return (hash);
+ }
+
+ *hashtype = M_HASHTYPE_NONE;
+ return (0);
+}
+
+/*
* Do a software calculation of the RSS for the given mbuf.
*
* This is typically used by the input path to recalculate the RSS after
diff --git a/sys/netinet/in_rss.h b/sys/netinet/in_rss.h
index fd300ac5fdff..cdc69bc64709 100644
--- a/sys/netinet/in_rss.h
+++ b/sys/netinet/in_rss.h
@@ -53,5 +53,7 @@ int rss_proto_software_hash_v4(struct in_addr src,
uint32_t *hashtype);
struct mbuf * rss_soft_m2cpuid_v4(struct mbuf *m, uintptr_t source,
u_int *cpuid);
+uint32_t xps_proto_software_hash_v4(struct in_addr s, struct in_addr d,
+ u_short sp, u_short dp, int proto, uint32_t *hashtype);
#endif /* !_NETINET_IN_RSS_H_ */
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 6240a77bdb39..a63fc19587f9 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_route.h"
#include <sys/param.h>
#include <sys/jail.h>
@@ -67,6 +68,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <netinet/in_systm.h>
+#include <netinet/in_fib.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/if_ether.h>
@@ -484,6 +486,17 @@ rip_output(struct mbuf *m, struct socket *so, ...)
ip->ip_len = htons(m->m_pkthdr.len);
ip->ip_src = inp->inp_laddr;
ip->ip_dst.s_addr = dst;
+#ifdef ROUTE_MPATH
+ if (CALC_FLOWID_OUTBOUND) {
+ uint32_t hash_type, hash_val;
+
+ hash_val = fib4_calc_software_hash(ip->ip_src,
+ ip->ip_dst, 0, 0, ip->ip_p, &hash_type);
+ m->m_pkthdr.flowid = hash_val;
+ M_HASHTYPE_SET(m, hash_type);
+ flags |= IP_NODEFAULTFLOWID;
+ }
+#endif
if (jailed(inp->inp_cred)) {
/*
* prison_local_ip4() would be good enough but would
@@ -519,7 +532,17 @@ rip_output(struct mbuf *m, struct socket *so, ...)
return (EINVAL);
ip = mtod(m, struct ip *);
}
-
+#ifdef ROUTE_MPATH
+ if (CALC_FLOWID_OUTBOUND) {
+ uint32_t hash_type, hash_val;
+
+ hash_val = fib4_calc_software_hash(ip->ip_dst,
+ ip->ip_src, 0, 0, ip->ip_p, &hash_type);
+ m->m_pkthdr.flowid = hash_val;
+ M_HASHTYPE_SET(m, hash_type);
+ flags |= IP_NODEFAULTFLOWID;
+ }
+#endif
INP_RLOCK(inp);
/*
* Don't allow both user specified and setsockopt options,
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index e1bb5f07c0d0..52304ddd6584 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_route.h"
#include "opt_rss.h"
#include <sys/param.h>
@@ -76,6 +77,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <netinet/in_kdtrace.h>
+#include <netinet/in_fib.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -1483,30 +1485,14 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
m->m_pkthdr.flowid = flowid;
M_HASHTYPE_SET(m, flowtype);
}
-#ifdef RSS
- else {
+#if defined(ROUTE_MPATH) || defined(RSS)
+ else if (CALC_FLOWID_OUTBOUND_SENDTO) {
uint32_t hash_val, hash_type;
- /*
- * Calculate an appropriate RSS hash for UDP and
- * UDP Lite.
- *
- * The called function will take care of figuring out
- * whether a 2-tuple or 4-tuple hash is required based
- * on the currently configured scheme.
- *
- * Later later on connected socket values should be
- * cached in the inpcb and reused, rather than constantly
- * re-calculating it.
- *
- * UDP Lite is a different protocol number and will
- * likely end up being hashed as a 2-tuple until
- * RSS / NICs grow UDP Lite protocol awareness.
- */
- if (rss_proto_software_hash_v4(faddr, laddr, fport, lport,
- pr, &hash_val, &hash_type) == 0) {
- m->m_pkthdr.flowid = hash_val;
- M_HASHTYPE_SET(m, hash_type);
- }
+
+ hash_val = fib4_calc_packet_hash(laddr, faddr,
+ lport, fport, pr, &hash_type);
+ m->m_pkthdr.flowid = hash_val;
+ M_HASHTYPE_SET(m, hash_type);
}
/*