diff options
author | Andrey V. Elsukov <ae@FreeBSD.org> | 2018-06-05 20:51:01 +0000 |
---|---|---|
committer | Andrey V. Elsukov <ae@FreeBSD.org> | 2018-06-05 20:51:01 +0000 |
commit | 6d8fdfa9d5e7d4871c5039b0131829f9cbefeee9 (patch) | |
tree | 0ebd65e97a5973725170f97863d5c1133dc381ce /sys/net | |
parent | 56009ba0ed678cffa7bece86518aef47767b791b (diff) | |
download | src-6d8fdfa9d5e7d4871c5039b0131829f9cbefeee9.tar.gz src-6d8fdfa9d5e7d4871c5039b0131829f9cbefeee9.zip |
Rework IP encapsulation handling code.
Currently it has several disadvantages:
- it uses single mutex to protect internal structures. It is used by
data- and control- path, thus there are no parallelism at all.
- it uses single list to keep encap handlers for both INET and INET6
families.
- struct encaptab keeps unneeded information (src, dst, masks, protosw),
that isn't used by code in the source tree.
- matches are prioritized and when many tunneling interfaces are
registered, encapcheck handler of each interface is invoked for each
packet. The search takes O(n) for n interfaces. All this work is done
with exclusive lock held.
What this patch includes:
- the datapath is converted to be lockless using epoch(9) KPI.
- struct encaptab now linked using CK_LIST.
- all unused fields removed from struct encaptab. Several new fields
addedr: min_length is the minimum packet length, that encapsulation
handler expects to see; exact_match is maximum number of bits, that
can return an encapsulation handler, when it wants to consume a packet.
- IPv6 and IPv4 handlers are stored in separate lists;
- added new "encap_lookup_t" method, that will be used later. It is
targeted to speedup lookup of needed interface, when gif(4)/gre(4) have
many interfaces.
- the need to use protosw structure is eliminated. The only pr_input
method was used from this structure, so I don't see the need to keep
using it.
- encap_input_t method changed to avoid using mbuf tags to store softc
pointer. Now it is passed directly trough encap_input_t method.
encap_getarg() funtions is removed.
- all sockaddr structures and code that uses them removed. We don't have
any code in the tree that uses them. All consumers use encap_attach_func()
method, that relies on invoking of encapcheck() to determine the needed
handler.
- introduced struct encap_config, it contains parameters of encap handler
that is going to be registered by encap_attach() function.
- encap handlers are stored in lists ordered by exact_match value, thus
handlers that need more bits to match will be checked first, and if
encapcheck method returns exact_match value, the search will be stopped.
- all current consumers changed to use new KPI.
Reviewed by: mmacy
Sponsored by: Yandex LLC
Differential Revision: https://reviews.freebsd.org/D15617
Notes
Notes:
svn path=/head/; revision=334671
Diffstat (limited to 'sys/net')
-rw-r--r-- | sys/net/if_gif.c | 22 | ||||
-rw-r--r-- | sys/net/if_gre.c | 39 | ||||
-rw-r--r-- | sys/net/if_gre.h | 2 | ||||
-rw-r--r-- | sys/net/if_me.c | 50 | ||||
-rw-r--r-- | sys/net/if_stf.c | 42 |
5 files changed, 74 insertions, 81 deletions
diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c index 011ad7aedeaf..a8c6fb5c9c7d 100644 --- a/sys/net/if_gif.c +++ b/sys/net/if_gif.c @@ -923,12 +923,24 @@ bad: } static void -gif_detach(struct gif_softc *sc) +gif_detach(struct gif_softc *sc, int family) { sx_assert(&gif_ioctl_sx, SA_XLOCKED); - if (sc->gif_ecookie != NULL) - encap_detach(sc->gif_ecookie); + if (sc->gif_ecookie != NULL) { + switch (family) { +#ifdef INET + case AF_INET: + ip_encap_detach(sc->gif_ecookie); + break; +#endif +#ifdef INET6 + case AF_INET6: + ip6_encap_detach(sc->gif_ecookie); + break; +#endif + } + } sc->gif_ecookie = NULL; } @@ -1020,7 +1032,7 @@ gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) } if (sc->gif_family != src->sa_family) - gif_detach(sc); + gif_detach(sc, sc->gif_family); if (sc->gif_family == 0 || sc->gif_family != src->sa_family) error = gif_attach(sc, src->sa_family); @@ -1058,7 +1070,7 @@ gif_delete_tunnel(struct ifnet *ifp) sc->gif_family = 0; GIF_WUNLOCK(sc); if (family != 0) { - gif_detach(sc); + gif_detach(sc, family); free(sc->gif_hdr, M_GIF); } ifp->if_drv_flags &= ~IFF_DRV_RUNNING; diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c index a3e578e4cad3..2160f350bf9b 100644 --- a/sys/net/if_gre.c +++ b/sys/net/if_gre.c @@ -551,12 +551,24 @@ gre_updatehdr(struct gre_softc *sc) } static void -gre_detach(struct gre_softc *sc) +gre_detach(struct gre_softc *sc, int family) { sx_assert(&gre_ioctl_sx, SA_XLOCKED); - if (sc->gre_ecookie != NULL) - encap_detach(sc->gre_ecookie); + if (sc->gre_ecookie != NULL) { + switch (family) { +#ifdef INET + case AF_INET: + ip_encap_detach(sc->gre_ecookie); + break; +#endif +#ifdef INET6 + case AF_INET6: + ip6_encap_detach(sc->gre_ecookie); + break; +#endif + } + } sc->gre_ecookie = NULL; } @@ -624,7 +636,7 @@ gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src, return (EAFNOSUPPORT); } if (sc->gre_family != 0) - gre_detach(sc); + gre_detach(sc, sc->gre_family); GRE_WLOCK(sc); if (sc->gre_family != 0) free(sc->gre_hdr, M_GRE); @@ -666,7 +678,7 @@ gre_delete_tunnel(struct ifnet *ifp) sc->gre_family = 0; GRE_WUNLOCK(sc); if (family != 0) { - gre_detach(sc); + gre_detach(sc, family); free(sc->gre_hdr, M_GRE); } ifp->if_drv_flags &= ~IFF_DRV_RUNNING; @@ -674,12 +686,11 @@ gre_delete_tunnel(struct ifnet *ifp) } int -gre_input(struct mbuf **mp, int *offp, int proto) +gre_input(struct mbuf *m, int off, int proto, void *arg) { - struct gre_softc *sc; + struct gre_softc *sc = arg; struct grehdr *gh; struct ifnet *ifp; - struct mbuf *m; uint32_t *opts; #ifdef notyet uint32_t key; @@ -687,12 +698,8 @@ gre_input(struct mbuf **mp, int *offp, int proto) uint16_t flags; int hlen, isr, af; - m = *mp; - sc = encap_getarg(m); - KASSERT(sc != NULL, ("encap_getarg returned NULL")); - ifp = GRE2IFP(sc); - hlen = *offp + sizeof(struct grehdr) + 4 * sizeof(uint32_t); + hlen = off + sizeof(struct grehdr) + 4 * sizeof(uint32_t); if (m->m_pkthdr.len < hlen) goto drop; if (m->m_len < hlen) { @@ -700,7 +707,7 @@ gre_input(struct mbuf **mp, int *offp, int proto) if (m == NULL) goto drop; } - gh = (struct grehdr *)mtodo(m, *offp); + gh = (struct grehdr *)mtodo(m, off); flags = ntohs(gh->gre_flags); if (flags & ~GRE_FLAGS_MASK) goto drop; @@ -710,7 +717,7 @@ gre_input(struct mbuf **mp, int *offp, int proto) /* reserved1 field must be zero */ if (((uint16_t *)opts)[1] != 0) goto drop; - if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0) + if (in_cksum_skip(m, m->m_pkthdr.len, off) != 0) goto drop; hlen += 2 * sizeof(uint16_t); opts++; @@ -760,7 +767,7 @@ gre_input(struct mbuf **mp, int *offp, int proto) default: goto drop; } - m_adj(m, *offp + hlen); + m_adj(m, off + hlen); m_clrprotoflags(m); m->m_pkthdr.rcvif = ifp; M_SETFIB(m, ifp->if_fib); diff --git a/sys/net/if_gre.h b/sys/net/if_gre.h index 0eac9e9f33b0..1a068d4b1118 100644 --- a/sys/net/if_gre.h +++ b/sys/net/if_gre.h @@ -101,7 +101,7 @@ struct gre_softc { #define gre_oip gre_gihdr->gi_ip #define gre_oip6 gre_gi6hdr->gi6_ip6 -int gre_input(struct mbuf **, int *, int); +int gre_input(struct mbuf *, int, int, void *); #ifdef INET int in_gre_attach(struct gre_softc *); int in_gre_output(struct mbuf *, int, int); diff --git a/sys/net/if_me.c b/sys/net/if_me.c index 4ab013bdcce2..806c57eabb4f 100644 --- a/sys/net/if_me.c +++ b/sys/net/if_me.c @@ -37,7 +37,6 @@ __FBSDID("$FreeBSD$"); #include <sys/mbuf.h> #include <sys/priv.h> #include <sys/proc.h> -#include <sys/protosw.h> #include <sys/rmlock.h> #include <sys/socket.h> #include <sys/sockio.h> @@ -122,11 +121,22 @@ static int me_transmit(struct ifnet *, struct mbuf *); static int me_ioctl(struct ifnet *, u_long, caddr_t); static int me_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); -static int me_input(struct mbuf **, int *, int); +static int me_input(struct mbuf *, int, int, void *); static int me_set_tunnel(struct ifnet *, struct sockaddr_in *, struct sockaddr_in *); static void me_delete_tunnel(struct ifnet *); +static int me_encapcheck(const struct mbuf *, int, int, void *); + +#define ME_MINLEN (sizeof(struct ip) + sizeof(struct mobhdr) -\ + sizeof(in_addr_t)) +static const struct encap_config ipv4_encap_cfg = { + .proto = IPPROTO_MOBILE, + .min_length = ME_MINLEN, + .exact_match = (sizeof(in_addr_t) << 4) + 8, + .check = me_encapcheck, + .input = me_input +}; SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0, @@ -140,19 +150,6 @@ static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST; SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(max_me_nesting), 0, "Max nested tunnels"); -extern struct domain inetdomain; -static const struct protosw in_mobile_protosw = { - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_MOBILE, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = me_input, - .pr_output = rip_output, - .pr_ctlinput = rip_ctlinput, - .pr_ctloutput = rip_ctloutput, - .pr_usrreqs = &rip_usrreqs -}; - static void vnet_me_init(const void *unused __unused) { @@ -334,17 +331,13 @@ me_encapcheck(const struct mbuf *m, int off, int proto, void *arg) M_ASSERTPKTHDR(m); - if (m->m_pkthdr.len < sizeof(struct ip) + sizeof(struct mobhdr) - - sizeof(struct in_addr)) - return (0); - ret = 0; ME_RLOCK(sc); if (ME_READY(sc)) { ip = mtod(m, struct ip *); if (sc->me_src.s_addr == ip->ip_dst.s_addr && sc->me_dst.s_addr == ip->ip_src.s_addr) - ret = 32 * 2; + ret = 32 * 2 + 8; } ME_RUNLOCK(sc); return (ret); @@ -376,8 +369,8 @@ me_set_tunnel(struct ifnet *ifp, struct sockaddr_in *src, ME_WUNLOCK(sc); if (sc->me_ecookie == NULL) - sc->me_ecookie = encap_attach_func(AF_INET, IPPROTO_MOBILE, - me_encapcheck, &in_mobile_protosw, sc); + sc->me_ecookie = ip_encap_attach(&ipv4_encap_cfg, + sc, M_WAITOK); if (sc->me_ecookie != NULL) { ifp->if_drv_flags |= IFF_DRV_RUNNING; if_link_state_change(ifp, LINK_STATE_UP); @@ -392,7 +385,7 @@ me_delete_tunnel(struct ifnet *ifp) sx_assert(&me_ioctl_sx, SA_XLOCKED); if (sc->me_ecookie != NULL) - encap_detach(sc->me_ecookie); + ip_encap_detach(sc->me_ecookie); sc->me_ecookie = NULL; ME_WLOCK(sc); sc->me_src.s_addr = 0; @@ -414,20 +407,15 @@ me_in_cksum(uint16_t *p, int nwords) return (~sum); } -int -me_input(struct mbuf **mp, int *offp, int proto) +static int +me_input(struct mbuf *m, int off, int proto, void *arg) { - struct me_softc *sc; + struct me_softc *sc = arg; struct mobhdr *mh; struct ifnet *ifp; - struct mbuf *m; struct ip *ip; int hlen; - m = *mp; - sc = encap_getarg(m); - KASSERT(sc != NULL, ("encap_getarg returned NULL")); - ifp = ME2IFP(sc); /* checks for short packets */ hlen = sizeof(struct mobhdr); diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c index b10202076d02..f073e20858cd 100644 --- a/sys/net/if_stf.c +++ b/sys/net/if_stf.c @@ -85,7 +85,6 @@ #include <sys/kernel.h> #include <sys/lock.h> #include <sys/module.h> -#include <sys/protosw.h> #include <sys/proc.h> #include <sys/queue.h> #include <sys/rmlock.h> @@ -151,19 +150,7 @@ static const char stfname[] = "stf"; static MALLOC_DEFINE(M_STF, stfname, "6to4 Tunnel Interface"); static const int ip_stf_ttl = 40; -extern struct domain inetdomain; -static int in_stf_input(struct mbuf **, int *, int); -static struct protosw in_stf_protosw = { - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_IPV6, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = in_stf_input, - .pr_output = rip_output, - .pr_ctloutput = rip_ctloutput, - .pr_usrreqs = &rip_usrreqs -}; - +static int in_stf_input(struct mbuf *, int, int, void *); static char *stfnames[] = {"stf0", "stf", "6to4", NULL}; static int stfmodevent(module_t, int, void *); @@ -183,6 +170,14 @@ static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t); static int stf_clone_destroy(struct if_clone *, struct ifnet *); static struct if_clone *stf_cloner; +static const struct encap_config ipv4_encap_cfg = { + .proto = IPPROTO_IPV6, + .min_length = sizeof(struct ip), + .exact_match = (sizeof(in_addr_t) << 3) + 8, + .check = stf_encapcheck, + .input = in_stf_input +}; + static int stf_clone_match(struct if_clone *ifc, const char *name) { @@ -250,8 +245,7 @@ stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) ifp->if_dname = stfname; ifp->if_dunit = IF_DUNIT_NONE; - sc->encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV6, - stf_encapcheck, &in_stf_protosw, sc); + sc->encap_cookie = ip_encap_attach(&ipv4_encap_cfg, sc, M_WAITOK); if (sc->encap_cookie == NULL) { if_printf(ifp, "attach failed\n"); free(sc, M_STF); @@ -274,7 +268,7 @@ stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) struct stf_softc *sc = ifp->if_softc; int err __unused; - err = encap_detach(sc->encap_cookie); + err = ip_encap_detach(sc->encap_cookie); KASSERT(err == 0, ("Unexpected error detaching encap_cookie")); bpfdetach(ifp); if_detach(ifp); @@ -608,18 +602,13 @@ stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp) } static int -in_stf_input(struct mbuf **mp, int *offp, int proto) +in_stf_input(struct mbuf *m, int off, int proto, void *arg) { - struct stf_softc *sc; + struct stf_softc *sc = arg; struct ip *ip; struct ip6_hdr *ip6; - struct mbuf *m; u_int8_t otos, itos; struct ifnet *ifp; - int off; - - m = *mp; - off = *offp; if (proto != IPPROTO_IPV6) { m_freem(m); @@ -627,9 +616,6 @@ in_stf_input(struct mbuf **mp, int *offp, int proto) } ip = mtod(m, struct ip *); - - sc = (struct stf_softc *)encap_getarg(m); - if (sc == NULL || (STF2IFP(sc)->if_flags & IFF_UP) == 0) { m_freem(m); return (IPPROTO_DONE); @@ -680,7 +666,7 @@ in_stf_input(struct mbuf **mp, int *offp, int proto) ip6->ip6_flow |= htonl((u_int32_t)itos << 20); m->m_pkthdr.rcvif = ifp; - + if (bpf_peers_present(ifp->if_bpf)) { /* * We need to prepend the address family as |