aboutsummaryrefslogtreecommitdiff
path: root/sys/net
diff options
context:
space:
mode:
authorAndrey V. Elsukov <ae@FreeBSD.org>2018-06-05 20:51:01 +0000
committerAndrey V. Elsukov <ae@FreeBSD.org>2018-06-05 20:51:01 +0000
commit6d8fdfa9d5e7d4871c5039b0131829f9cbefeee9 (patch)
tree0ebd65e97a5973725170f97863d5c1133dc381ce /sys/net
parent56009ba0ed678cffa7bece86518aef47767b791b (diff)
downloadsrc-6d8fdfa9d5e7d4871c5039b0131829f9cbefeee9.tar.gz
src-6d8fdfa9d5e7d4871c5039b0131829f9cbefeee9.zip
Rework IP encapsulation handling code.
Currently it has several disadvantages: - it uses single mutex to protect internal structures. It is used by data- and control- path, thus there are no parallelism at all. - it uses single list to keep encap handlers for both INET and INET6 families. - struct encaptab keeps unneeded information (src, dst, masks, protosw), that isn't used by code in the source tree. - matches are prioritized and when many tunneling interfaces are registered, encapcheck handler of each interface is invoked for each packet. The search takes O(n) for n interfaces. All this work is done with exclusive lock held. What this patch includes: - the datapath is converted to be lockless using epoch(9) KPI. - struct encaptab now linked using CK_LIST. - all unused fields removed from struct encaptab. Several new fields addedr: min_length is the minimum packet length, that encapsulation handler expects to see; exact_match is maximum number of bits, that can return an encapsulation handler, when it wants to consume a packet. - IPv6 and IPv4 handlers are stored in separate lists; - added new "encap_lookup_t" method, that will be used later. It is targeted to speedup lookup of needed interface, when gif(4)/gre(4) have many interfaces. - the need to use protosw structure is eliminated. The only pr_input method was used from this structure, so I don't see the need to keep using it. - encap_input_t method changed to avoid using mbuf tags to store softc pointer. Now it is passed directly trough encap_input_t method. encap_getarg() funtions is removed. - all sockaddr structures and code that uses them removed. We don't have any code in the tree that uses them. All consumers use encap_attach_func() method, that relies on invoking of encapcheck() to determine the needed handler. - introduced struct encap_config, it contains parameters of encap handler that is going to be registered by encap_attach() function. - encap handlers are stored in lists ordered by exact_match value, thus handlers that need more bits to match will be checked first, and if encapcheck method returns exact_match value, the search will be stopped. - all current consumers changed to use new KPI. Reviewed by: mmacy Sponsored by: Yandex LLC Differential Revision: https://reviews.freebsd.org/D15617
Notes
Notes: svn path=/head/; revision=334671
Diffstat (limited to 'sys/net')
-rw-r--r--sys/net/if_gif.c22
-rw-r--r--sys/net/if_gre.c39
-rw-r--r--sys/net/if_gre.h2
-rw-r--r--sys/net/if_me.c50
-rw-r--r--sys/net/if_stf.c42
5 files changed, 74 insertions, 81 deletions
diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c
index 011ad7aedeaf..a8c6fb5c9c7d 100644
--- a/sys/net/if_gif.c
+++ b/sys/net/if_gif.c
@@ -923,12 +923,24 @@ bad:
}
static void
-gif_detach(struct gif_softc *sc)
+gif_detach(struct gif_softc *sc, int family)
{
sx_assert(&gif_ioctl_sx, SA_XLOCKED);
- if (sc->gif_ecookie != NULL)
- encap_detach(sc->gif_ecookie);
+ if (sc->gif_ecookie != NULL) {
+ switch (family) {
+#ifdef INET
+ case AF_INET:
+ ip_encap_detach(sc->gif_ecookie);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ ip6_encap_detach(sc->gif_ecookie);
+ break;
+#endif
+ }
+ }
sc->gif_ecookie = NULL;
}
@@ -1020,7 +1032,7 @@ gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
}
if (sc->gif_family != src->sa_family)
- gif_detach(sc);
+ gif_detach(sc, sc->gif_family);
if (sc->gif_family == 0 ||
sc->gif_family != src->sa_family)
error = gif_attach(sc, src->sa_family);
@@ -1058,7 +1070,7 @@ gif_delete_tunnel(struct ifnet *ifp)
sc->gif_family = 0;
GIF_WUNLOCK(sc);
if (family != 0) {
- gif_detach(sc);
+ gif_detach(sc, family);
free(sc->gif_hdr, M_GIF);
}
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c
index a3e578e4cad3..2160f350bf9b 100644
--- a/sys/net/if_gre.c
+++ b/sys/net/if_gre.c
@@ -551,12 +551,24 @@ gre_updatehdr(struct gre_softc *sc)
}
static void
-gre_detach(struct gre_softc *sc)
+gre_detach(struct gre_softc *sc, int family)
{
sx_assert(&gre_ioctl_sx, SA_XLOCKED);
- if (sc->gre_ecookie != NULL)
- encap_detach(sc->gre_ecookie);
+ if (sc->gre_ecookie != NULL) {
+ switch (family) {
+#ifdef INET
+ case AF_INET:
+ ip_encap_detach(sc->gre_ecookie);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ ip6_encap_detach(sc->gre_ecookie);
+ break;
+#endif
+ }
+ }
sc->gre_ecookie = NULL;
}
@@ -624,7 +636,7 @@ gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src,
return (EAFNOSUPPORT);
}
if (sc->gre_family != 0)
- gre_detach(sc);
+ gre_detach(sc, sc->gre_family);
GRE_WLOCK(sc);
if (sc->gre_family != 0)
free(sc->gre_hdr, M_GRE);
@@ -666,7 +678,7 @@ gre_delete_tunnel(struct ifnet *ifp)
sc->gre_family = 0;
GRE_WUNLOCK(sc);
if (family != 0) {
- gre_detach(sc);
+ gre_detach(sc, family);
free(sc->gre_hdr, M_GRE);
}
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
@@ -674,12 +686,11 @@ gre_delete_tunnel(struct ifnet *ifp)
}
int
-gre_input(struct mbuf **mp, int *offp, int proto)
+gre_input(struct mbuf *m, int off, int proto, void *arg)
{
- struct gre_softc *sc;
+ struct gre_softc *sc = arg;
struct grehdr *gh;
struct ifnet *ifp;
- struct mbuf *m;
uint32_t *opts;
#ifdef notyet
uint32_t key;
@@ -687,12 +698,8 @@ gre_input(struct mbuf **mp, int *offp, int proto)
uint16_t flags;
int hlen, isr, af;
- m = *mp;
- sc = encap_getarg(m);
- KASSERT(sc != NULL, ("encap_getarg returned NULL"));
-
ifp = GRE2IFP(sc);
- hlen = *offp + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
+ hlen = off + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
if (m->m_pkthdr.len < hlen)
goto drop;
if (m->m_len < hlen) {
@@ -700,7 +707,7 @@ gre_input(struct mbuf **mp, int *offp, int proto)
if (m == NULL)
goto drop;
}
- gh = (struct grehdr *)mtodo(m, *offp);
+ gh = (struct grehdr *)mtodo(m, off);
flags = ntohs(gh->gre_flags);
if (flags & ~GRE_FLAGS_MASK)
goto drop;
@@ -710,7 +717,7 @@ gre_input(struct mbuf **mp, int *offp, int proto)
/* reserved1 field must be zero */
if (((uint16_t *)opts)[1] != 0)
goto drop;
- if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0)
+ if (in_cksum_skip(m, m->m_pkthdr.len, off) != 0)
goto drop;
hlen += 2 * sizeof(uint16_t);
opts++;
@@ -760,7 +767,7 @@ gre_input(struct mbuf **mp, int *offp, int proto)
default:
goto drop;
}
- m_adj(m, *offp + hlen);
+ m_adj(m, off + hlen);
m_clrprotoflags(m);
m->m_pkthdr.rcvif = ifp;
M_SETFIB(m, ifp->if_fib);
diff --git a/sys/net/if_gre.h b/sys/net/if_gre.h
index 0eac9e9f33b0..1a068d4b1118 100644
--- a/sys/net/if_gre.h
+++ b/sys/net/if_gre.h
@@ -101,7 +101,7 @@ struct gre_softc {
#define gre_oip gre_gihdr->gi_ip
#define gre_oip6 gre_gi6hdr->gi6_ip6
-int gre_input(struct mbuf **, int *, int);
+int gre_input(struct mbuf *, int, int, void *);
#ifdef INET
int in_gre_attach(struct gre_softc *);
int in_gre_output(struct mbuf *, int, int);
diff --git a/sys/net/if_me.c b/sys/net/if_me.c
index 4ab013bdcce2..806c57eabb4f 100644
--- a/sys/net/if_me.c
+++ b/sys/net/if_me.c
@@ -37,7 +37,6 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/priv.h>
#include <sys/proc.h>
-#include <sys/protosw.h>
#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
@@ -122,11 +121,22 @@ static int me_transmit(struct ifnet *, struct mbuf *);
static int me_ioctl(struct ifnet *, u_long, caddr_t);
static int me_output(struct ifnet *, struct mbuf *,
const struct sockaddr *, struct route *);
-static int me_input(struct mbuf **, int *, int);
+static int me_input(struct mbuf *, int, int, void *);
static int me_set_tunnel(struct ifnet *, struct sockaddr_in *,
struct sockaddr_in *);
static void me_delete_tunnel(struct ifnet *);
+static int me_encapcheck(const struct mbuf *, int, int, void *);
+
+#define ME_MINLEN (sizeof(struct ip) + sizeof(struct mobhdr) -\
+ sizeof(in_addr_t))
+static const struct encap_config ipv4_encap_cfg = {
+ .proto = IPPROTO_MOBILE,
+ .min_length = ME_MINLEN,
+ .exact_match = (sizeof(in_addr_t) << 4) + 8,
+ .check = me_encapcheck,
+ .input = me_input
+};
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0,
@@ -140,19 +150,6 @@ static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST;
SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
&VNET_NAME(max_me_nesting), 0, "Max nested tunnels");
-extern struct domain inetdomain;
-static const struct protosw in_mobile_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_MOBILE,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = me_input,
- .pr_output = rip_output,
- .pr_ctlinput = rip_ctlinput,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-
static void
vnet_me_init(const void *unused __unused)
{
@@ -334,17 +331,13 @@ me_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
M_ASSERTPKTHDR(m);
- if (m->m_pkthdr.len < sizeof(struct ip) + sizeof(struct mobhdr) -
- sizeof(struct in_addr))
- return (0);
-
ret = 0;
ME_RLOCK(sc);
if (ME_READY(sc)) {
ip = mtod(m, struct ip *);
if (sc->me_src.s_addr == ip->ip_dst.s_addr &&
sc->me_dst.s_addr == ip->ip_src.s_addr)
- ret = 32 * 2;
+ ret = 32 * 2 + 8;
}
ME_RUNLOCK(sc);
return (ret);
@@ -376,8 +369,8 @@ me_set_tunnel(struct ifnet *ifp, struct sockaddr_in *src,
ME_WUNLOCK(sc);
if (sc->me_ecookie == NULL)
- sc->me_ecookie = encap_attach_func(AF_INET, IPPROTO_MOBILE,
- me_encapcheck, &in_mobile_protosw, sc);
+ sc->me_ecookie = ip_encap_attach(&ipv4_encap_cfg,
+ sc, M_WAITOK);
if (sc->me_ecookie != NULL) {
ifp->if_drv_flags |= IFF_DRV_RUNNING;
if_link_state_change(ifp, LINK_STATE_UP);
@@ -392,7 +385,7 @@ me_delete_tunnel(struct ifnet *ifp)
sx_assert(&me_ioctl_sx, SA_XLOCKED);
if (sc->me_ecookie != NULL)
- encap_detach(sc->me_ecookie);
+ ip_encap_detach(sc->me_ecookie);
sc->me_ecookie = NULL;
ME_WLOCK(sc);
sc->me_src.s_addr = 0;
@@ -414,20 +407,15 @@ me_in_cksum(uint16_t *p, int nwords)
return (~sum);
}
-int
-me_input(struct mbuf **mp, int *offp, int proto)
+static int
+me_input(struct mbuf *m, int off, int proto, void *arg)
{
- struct me_softc *sc;
+ struct me_softc *sc = arg;
struct mobhdr *mh;
struct ifnet *ifp;
- struct mbuf *m;
struct ip *ip;
int hlen;
- m = *mp;
- sc = encap_getarg(m);
- KASSERT(sc != NULL, ("encap_getarg returned NULL"));
-
ifp = ME2IFP(sc);
/* checks for short packets */
hlen = sizeof(struct mobhdr);
diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c
index b10202076d02..f073e20858cd 100644
--- a/sys/net/if_stf.c
+++ b/sys/net/if_stf.c
@@ -85,7 +85,6 @@
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/module.h>
-#include <sys/protosw.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/rmlock.h>
@@ -151,19 +150,7 @@ static const char stfname[] = "stf";
static MALLOC_DEFINE(M_STF, stfname, "6to4 Tunnel Interface");
static const int ip_stf_ttl = 40;
-extern struct domain inetdomain;
-static int in_stf_input(struct mbuf **, int *, int);
-static struct protosw in_stf_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_IPV6,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = in_stf_input,
- .pr_output = rip_output,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-
+static int in_stf_input(struct mbuf *, int, int, void *);
static char *stfnames[] = {"stf0", "stf", "6to4", NULL};
static int stfmodevent(module_t, int, void *);
@@ -183,6 +170,14 @@ static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t);
static int stf_clone_destroy(struct if_clone *, struct ifnet *);
static struct if_clone *stf_cloner;
+static const struct encap_config ipv4_encap_cfg = {
+ .proto = IPPROTO_IPV6,
+ .min_length = sizeof(struct ip),
+ .exact_match = (sizeof(in_addr_t) << 3) + 8,
+ .check = stf_encapcheck,
+ .input = in_stf_input
+};
+
static int
stf_clone_match(struct if_clone *ifc, const char *name)
{
@@ -250,8 +245,7 @@ stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
ifp->if_dname = stfname;
ifp->if_dunit = IF_DUNIT_NONE;
- sc->encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV6,
- stf_encapcheck, &in_stf_protosw, sc);
+ sc->encap_cookie = ip_encap_attach(&ipv4_encap_cfg, sc, M_WAITOK);
if (sc->encap_cookie == NULL) {
if_printf(ifp, "attach failed\n");
free(sc, M_STF);
@@ -274,7 +268,7 @@ stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
struct stf_softc *sc = ifp->if_softc;
int err __unused;
- err = encap_detach(sc->encap_cookie);
+ err = ip_encap_detach(sc->encap_cookie);
KASSERT(err == 0, ("Unexpected error detaching encap_cookie"));
bpfdetach(ifp);
if_detach(ifp);
@@ -608,18 +602,13 @@ stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp)
}
static int
-in_stf_input(struct mbuf **mp, int *offp, int proto)
+in_stf_input(struct mbuf *m, int off, int proto, void *arg)
{
- struct stf_softc *sc;
+ struct stf_softc *sc = arg;
struct ip *ip;
struct ip6_hdr *ip6;
- struct mbuf *m;
u_int8_t otos, itos;
struct ifnet *ifp;
- int off;
-
- m = *mp;
- off = *offp;
if (proto != IPPROTO_IPV6) {
m_freem(m);
@@ -627,9 +616,6 @@ in_stf_input(struct mbuf **mp, int *offp, int proto)
}
ip = mtod(m, struct ip *);
-
- sc = (struct stf_softc *)encap_getarg(m);
-
if (sc == NULL || (STF2IFP(sc)->if_flags & IFF_UP) == 0) {
m_freem(m);
return (IPPROTO_DONE);
@@ -680,7 +666,7 @@ in_stf_input(struct mbuf **mp, int *offp, int proto)
ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
m->m_pkthdr.rcvif = ifp;
-
+
if (bpf_peers_present(ifp->if_bpf)) {
/*
* We need to prepend the address family as