aboutsummaryrefslogtreecommitdiff
path: root/sys/net
diff options
context:
space:
mode:
authorRobert Watson <rwatson@FreeBSD.org>2009-08-23 20:40:19 +0000
committerRobert Watson <rwatson@FreeBSD.org>2009-08-23 20:40:19 +0000
commit77dfcdc44559d4bca03d156d87c2e214c6172c04 (patch)
tree83b839696bd918acacee8b38993abd3c6857a5fb /sys/net
parentbfdaa523828e5a7e690ec5d7e9b5353bef992d2f (diff)
downloadsrc-77dfcdc44559d4bca03d156d87c2e214c6172c04.tar.gz
src-77dfcdc44559d4bca03d156d87c2e214c6172c04.zip
Rework global locks for interface list and index management, correcting
several critical bugs, including race conditions and lock order issues: Replace the single rwlock, ifnet_lock, with two locks, an rwlock and an sxlock. Either can be held to stablize the lists and indexes, but both are required to write. This allows the list to be held stable in both network interrupt contexts and sleepable user threads across sleeping memory allocations or device driver interactions. As before, writes to the interface list must occur from sleepable contexts. Reviewed by: bz, julian MFC after: 3 days
Notes
Notes: svn path=/head/; revision=196481
Diffstat (limited to 'sys/net')
-rw-r--r--sys/net/bridgestp.c4
-rw-r--r--sys/net/if.c94
-rw-r--r--sys/net/if_llatbl.c8
-rw-r--r--sys/net/if_var.h42
-rw-r--r--sys/net/if_vlan.c4
5 files changed, 100 insertions, 52 deletions
diff --git a/sys/net/bridgestp.c b/sys/net/bridgestp.c
index 3e65113c2d56..2993838ac683 100644
--- a/sys/net/bridgestp.c
+++ b/sys/net/bridgestp.c
@@ -2019,7 +2019,7 @@ bstp_reinit(struct bstp_state *bs)
* not need to be part of the bridge, it just needs to be a unique
* value.
*/
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (ifp->if_type != IFT_ETHER)
continue;
@@ -2036,7 +2036,7 @@ bstp_reinit(struct bstp_state *bs)
continue;
}
}
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
if (LIST_EMPTY(&bs->bs_bplist) || mif == NULL) {
/* Set the bridge and root id (lower bits) to zero */
diff --git a/sys/net/if.c b/sys/net/if.c
index 5d108981cdde..fe497d69de2d 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -154,14 +154,26 @@ VNET_DEFINE(struct ifgrouphead, ifg_head);
VNET_DEFINE(int, if_index);
static VNET_DEFINE(int, if_indexlim) = 8;
-/* Table of ifnet by index. Locked with ifnet_lock. */
+/* Table of ifnet by index. */
static VNET_DEFINE(struct ifindex_entry *, ifindex_table);
#define V_if_indexlim VNET(if_indexlim)
#define V_ifindex_table VNET(ifindex_table)
int ifqmaxlen = IFQ_MAXLEN;
-struct rwlock ifnet_lock;
+
+/*
+ * The global network interface list (V_ifnet) and related state (such as
+ * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and
+ * an rwlock. Either may be acquired shared to stablize the list, but both
+ * must be acquired writable to modify the list. This model allows us to
+ * both stablize the interface list during interrupt thread processing, but
+ * also to stablize it over long-running ioctls, without introducing priority
+ * inversions and deadlocks.
+ */
+struct rwlock ifnet_rwlock;
+struct sx ifnet_sxlock;
+
static if_com_alloc_t *if_com_alloc[256];
static if_com_free_t *if_com_free[256];
@@ -188,9 +200,9 @@ ifnet_byindex(u_short idx)
{
struct ifnet *ifp;
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
ifp = ifnet_byindex_locked(idx);
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (ifp);
}
@@ -199,19 +211,19 @@ ifnet_byindex_ref(u_short idx)
{
struct ifnet *ifp;
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
ifp = ifnet_byindex_locked(idx);
if (ifp == NULL || (ifp->if_flags & IFF_DYING)) {
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (NULL);
}
if_ref(ifp);
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (ifp);
}
static void
-ifnet_setbyindex(u_short idx, struct ifnet *ifp)
+ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp)
{
IFNET_WLOCK_ASSERT();
@@ -219,16 +231,25 @@ ifnet_setbyindex(u_short idx, struct ifnet *ifp)
V_ifindex_table[idx].ife_ifnet = ifp;
}
+static void
+ifnet_setbyindex(u_short idx, struct ifnet *ifp)
+{
+
+ IFNET_WLOCK();
+ ifnet_setbyindex_locked(idx, ifp);
+ IFNET_WUNLOCK();
+}
+
struct ifaddr *
ifaddr_byindex(u_short idx)
{
struct ifaddr *ifa;
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
ifa = ifnet_byindex_locked(idx)->if_addr;
if (ifa != NULL)
ifa_ref(ifa);
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (ifa);
}
@@ -361,9 +382,7 @@ if_alloc(u_char type)
ifq_init(&ifp->if_snd, ifp);
refcount_init(&ifp->if_refcount, 1); /* Index reference. */
- IFNET_WLOCK();
ifnet_setbyindex(ifp->if_index, ifp);
- IFNET_WUNLOCK();
return (ifp);
}
@@ -383,7 +402,7 @@ if_free_internal(struct ifnet *ifp)
KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
("%s: freeing unallocated ifnet", ifp->if_xname));
- ifnet_setbyindex(ifp->if_index, NULL);
+ ifnet_setbyindex_locked(ifp->if_index, NULL);
while (V_if_index > 0 && ifnet_byindex_locked(V_if_index) == NULL)
V_if_index--;
IFNET_WUNLOCK();
@@ -855,11 +874,14 @@ if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
if_detach_internal(ifp, 1);
/*
- * Unlink the ifnet from ifindex_table[] in current vnet,
- * and shrink the if_index for that vnet if possible.
+ * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
+ * the if_index for that vnet if possible.
+ *
+ * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
+ * or we'd lock on one vnet and unlock on another.
*/
IFNET_WLOCK();
- ifnet_setbyindex(ifp->if_index, NULL);
+ ifnet_setbyindex_locked(ifp->if_index, NULL);
while (V_if_index > 0 && ifnet_byindex_locked(V_if_index) == NULL)
V_if_index--;
IFNET_WUNLOCK();
@@ -886,7 +908,7 @@ if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
V_if_index = ifp->if_index;
if (V_if_index >= V_if_indexlim)
if_grow();
- ifnet_setbyindex(ifp->if_index, ifp);
+ ifnet_setbyindex_locked(ifp->if_index, ifp);
IFNET_WUNLOCK();
if_attach_internal(ifp, 1);
@@ -1368,7 +1390,7 @@ ifa_ifwithaddr_internal(struct sockaddr *addr, int getref)
struct ifnet *ifp;
struct ifaddr *ifa;
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
IF_ADDR_LOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
@@ -1395,7 +1417,7 @@ ifa_ifwithaddr_internal(struct sockaddr *addr, int getref)
}
ifa = NULL;
done:
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (ifa);
}
@@ -1423,7 +1445,7 @@ ifa_ifwithbroadaddr(struct sockaddr *addr)
struct ifnet *ifp;
struct ifaddr *ifa;
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
IF_ADDR_LOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
@@ -1442,7 +1464,7 @@ ifa_ifwithbroadaddr(struct sockaddr *addr)
}
ifa = NULL;
done:
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (ifa);
}
@@ -1456,7 +1478,7 @@ ifa_ifwithdstaddr(struct sockaddr *addr)
struct ifnet *ifp;
struct ifaddr *ifa;
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
continue;
@@ -1475,7 +1497,7 @@ ifa_ifwithdstaddr(struct sockaddr *addr)
}
ifa = NULL;
done:
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (ifa);
}
@@ -1508,7 +1530,7 @@ ifa_ifwithnet(struct sockaddr *addr)
* we find one, as we release the IF_ADDR_LOCK() that kept it stable
* when we move onto the next interface.
*/
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
IF_ADDR_LOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
@@ -1584,7 +1606,7 @@ next: continue;
ifa = ifa_maybe;
ifa_maybe = NULL;
done:
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
if (ifa_maybe != NULL)
ifa_free(ifa_maybe);
return (ifa);
@@ -1863,7 +1885,7 @@ if_slowtimo(void *arg)
int s = splimp();
VNET_LIST_RLOCK_NOSLEEP();
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
@@ -1874,7 +1896,7 @@ if_slowtimo(void *arg)
}
CURVNET_RESTORE();
}
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
VNET_LIST_RUNLOCK_NOSLEEP();
splx(s);
timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
@@ -1889,7 +1911,7 @@ ifunit_ref(const char *name)
{
struct ifnet *ifp;
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
!(ifp->if_flags & IFF_DYING))
@@ -1897,7 +1919,7 @@ ifunit_ref(const char *name)
}
if (ifp != NULL)
if_ref(ifp);
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (ifp);
}
@@ -1906,12 +1928,12 @@ ifunit(const char *name)
{
struct ifnet *ifp;
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
break;
}
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (ifp);
}
@@ -2521,7 +2543,7 @@ again:
max_len = 0;
valid_len = 0;
- IFNET_RLOCK(); /* could sleep XXX */
+ IFNET_RLOCK();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
int addrs;
@@ -2846,13 +2868,13 @@ if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
#ifdef INVARIANTS
struct ifnet *oifp;
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(oifp, &V_ifnet, if_link)
if (ifp == oifp)
break;
if (ifp != oifp)
ifp = NULL;
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
#endif
@@ -2895,7 +2917,7 @@ if_delmulti_ifma(struct ifmultiaddr *ifma)
} else {
struct ifnet *oifp;
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(oifp, &V_ifnet, if_link)
if (ifp == oifp)
break;
@@ -2903,7 +2925,7 @@ if_delmulti_ifma(struct ifmultiaddr *ifma)
printf("%s: ifnet %p disappeared\n", __func__, ifp);
ifp = NULL;
}
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
}
#endif
/*
diff --git a/sys/net/if_llatbl.c b/sys/net/if_llatbl.c
index a1b574f9f371..a3617f50d652 100644
--- a/sys/net/if_llatbl.c
+++ b/sys/net/if_llatbl.c
@@ -200,14 +200,14 @@ lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask)
{
struct lltable *llt;
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
SLIST_FOREACH(llt, &lltables, llt_link) {
if (llt->llt_af != af)
continue;
llt->llt_prefix_free(llt, prefix, mask);
}
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
}
@@ -300,13 +300,13 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
}
/* XXX linked list may be too expensive */
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
SLIST_FOREACH(llt, &lltables, llt_link) {
if (llt->llt_af == dst->sa_family &&
llt->llt_ifp == ifp)
break;
}
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
KASSERT(llt != NULL, ("Yep, ugly hacks are bad\n"));
if (flags && LLE_CREATE)
diff --git a/sys/net/if_var.h b/sys/net/if_var.h
index aa83161cce3c..0bfd9f19a8a4 100644
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@@ -85,6 +85,7 @@ struct vnet;
#include <sys/lock.h> /* XXX */
#include <sys/mutex.h> /* XXX */
#include <sys/rwlock.h> /* XXX */
+#include <sys/sx.h> /* XXX */
#include <sys/event.h> /* XXX */
#include <sys/_task.h>
@@ -754,14 +755,39 @@ struct ifmultiaddr {
#ifdef _KERNEL
-extern struct rwlock ifnet_lock;
-#define IFNET_LOCK_INIT() \
- rw_init_flags(&ifnet_lock, "ifnet", RW_RECURSE)
-#define IFNET_WLOCK() rw_wlock(&ifnet_lock)
-#define IFNET_WUNLOCK() rw_wunlock(&ifnet_lock)
-#define IFNET_WLOCK_ASSERT() rw_assert(&ifnet_lock, RA_LOCKED)
-#define IFNET_RLOCK() rw_rlock(&ifnet_lock)
-#define IFNET_RUNLOCK() rw_runlock(&ifnet_lock)
+extern struct rwlock ifnet_rwlock;
+extern struct sx ifnet_sxlock;
+
+#define IFNET_LOCK_INIT() do { \
+ rw_init_flags(&ifnet_rwlock, "ifnet_rw", RW_RECURSE); \
+ sx_init_flags(&ifnet_sxlock, "ifnet_sx", SX_RECURSE); \
+} while(0)
+
+#define IFNET_WLOCK() do { \
+ sx_xlock(&ifnet_sxlock); \
+ rw_wlock(&ifnet_rwlock); \
+} while (0)
+
+#define IFNET_WUNLOCK() do { \
+ rw_wunlock(&ifnet_rwlock); \
+ sx_xunlock(&ifnet_sxlock); \
+} while (0)
+
+/*
+ * To assert the ifnet lock, you must know not only whether it's for read or
+ * write, but also whether it was acquired with sleep support or not.
+ */
+#define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED)
+#define IFNET_RLOCK_NOSLEEP_ASSERT() rw_assert(&ifnet_rwlock, RA_RLOCKED)
+#define IFNET_WLOCK_ASSERT() do { \
+ sx_assert(&ifnet_sxlock, SA_XLOCKED); \
+ rw_assert(&ifnet_rwlock, RA_WLOCKED); \
+} while (0)
+
+#define IFNET_RLOCK() sx_slock(&ifnet_sxlock)
+#define IFNET_RLOCK_NOSLEEP() rw_rlock(&ifnet_rwlock)
+#define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock)
+#define IFNET_RUNLOCK_NOSLEEP() rw_runlock(&ifnet_rwlock)
/*
* Look up an ifnet given its index; the _ref variant also acquires a
diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c
index 04c2c0dc6448..126d83ffb35d 100644
--- a/sys/net/if_vlan.c
+++ b/sys/net/if_vlan.c
@@ -580,7 +580,7 @@ vlan_clone_match_ethertag(struct if_clone *ifc, const char *name, int *tag)
int t = 0;
/* Check for <etherif>.<vlan> style interface names. */
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (ifp->if_type != IFT_ETHER)
continue;
@@ -598,7 +598,7 @@ vlan_clone_match_ethertag(struct if_clone *ifc, const char *name, int *tag)
*tag = t;
break;
}
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (ifp);
}