aboutsummaryrefslogtreecommitdiff
path: root/sys/ofed
diff options
context:
space:
mode:
authorGleb Smirnoff <glebius@FreeBSD.org>2017-06-08 21:30:34 +0000
committerGleb Smirnoff <glebius@FreeBSD.org>2017-06-08 21:30:34 +0000
commit779f106aa169256b7010a1d8f963ff656b881e92 (patch)
tree01f96ac5827e2a95a385258086155b37b80ec6b9 /sys/ofed
parent4623e047a7c87148916e11e6c7b80409f6e45519 (diff)
downloadsrc-779f106aa169256b7010a1d8f963ff656b881e92.tar.gz
src-779f106aa169256b7010a1d8f963ff656b881e92.zip
Listening sockets improvements.
o Separate fields of struct socket that belong to listening from fields that belong to normal dataflow, and unionize them. This shrinks the structure a bit. - Take out selinfo's from the socket buffers into the socket. The first reason is to support braindamaged scenario when a socket is added to kevent(2) and then listen(2) is cast on it. The second reason is that there is future plan to make socket buffers pluggable, so that for a dataflow socket a socket buffer can be changed, and in this case we also want to keep same selinfos through the lifetime of a socket. - Remove struct struct so_accf. Since now listening stuff no longer affects struct socket size, just move its fields into listening part of the union. - Provide sol_upcall field and enforce that so_upcall_set() may be called only on a dataflow socket, which has buffers, and for listening sockets provide solisten_upcall_set(). o Remove ACCEPT_LOCK() global. - Add a mutex to socket, to be used instead of socket buffer lock to lock fields of struct socket that don't belong to a socket buffer. - Allow to acquire two socket locks, but the first one must belong to a listening socket. - Make soref()/sorele() to use atomic(9). This allows in some situations to do soref() without owning socket lock. There is place for improvement here, it is possible to make sorele() also to lock optionally. - Most protocols aren't touched by this change, except UNIX local sockets. See below for more information. o Reduce copy-and-paste in kernel modules that accept connections from listening sockets: provide function solisten_dequeue(), and use it in the following modules: ctl(4), iscsi(4), ng_btsocket(4), ng_ksocket(4), infiniband, rpc. o UNIX local sockets. - Removal of ACCEPT_LOCK() global uncovered several races in the UNIX local sockets. Most races exist around spawning a new socket, when we are connecting to a local listening socket. To cover them, we need to hold locks on both PCBs when spawning a third one. This means holding them across sonewconn(). This creates a LOR between pcb locks and unp_list_lock. - To fix the new LOR, abandon the global unp_list_lock in favor of global unp_link_lock. Indeed, separating these two locks didn't provide us any extra parralelism in the UNIX sockets. - Now call into uipc_attach() may happen with unp_link_lock hold if, we are accepting, or without unp_link_lock in case if we are just creating a socket. - Another problem in UNIX sockets is that uipc_close() basicly did nothing for a listening socket. The vnode remained opened for connections. This is fixed by removing vnode in uipc_close(). Maybe the right way would be to do it for all sockets (not only listening), simply move the vnode teardown from uipc_detach() to uipc_close()? Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D9770
Notes
Notes: svn path=/head/; revision=319722
Diffstat (limited to 'sys/ofed')
-rw-r--r--sys/ofed/drivers/infiniband/core/iwcm.c77
-rw-r--r--sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c1
2 files changed, 23 insertions, 55 deletions
diff --git a/sys/ofed/drivers/infiniband/core/iwcm.c b/sys/ofed/drivers/infiniband/core/iwcm.c
index fa6b674bb0db..a40d9dc8bbe9 100644
--- a/sys/ofed/drivers/infiniband/core/iwcm.c
+++ b/sys/ofed/drivers/infiniband/core/iwcm.c
@@ -416,34 +416,19 @@ dequeue_socket(struct socket *head)
{
struct socket *so;
struct sockaddr_in *remote;
+ int error;
- ACCEPT_LOCK();
- so = TAILQ_FIRST(&head->so_comp);
- if (!so) {
- ACCEPT_UNLOCK();
- return NULL;
- }
-
- SOCK_LOCK(so);
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- soref(so);
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
- so->so_state |= SS_NBIO;
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
+ SOLISTEN_LOCK(head);
+ error = solisten_dequeue(head, &so, SOCK_NONBLOCK);
+ if (error == EWOULDBLOCK)
+ return (NULL);
remote = NULL;
soaccept(so, (struct sockaddr **)&remote);
free(remote, M_SONAME);
return so;
}
+
static void
iw_so_event_handler(struct work_struct *_work)
{
@@ -485,18 +470,17 @@ err:
#endif
return;
}
+
static int
iw_so_upcall(struct socket *parent_so, void *arg, int waitflag)
{
struct iwcm_listen_work *work;
- struct socket *so;
struct iw_cm_id *cm_id = arg;
/* check whether iw_so_event_handler() already dequeued this 'so' */
- so = TAILQ_FIRST(&parent_so->so_comp);
- if (!so)
+ if (TAILQ_EMPTY(&parent_so->sol_comp))
return SU_OK;
- work = kzalloc(sizeof(*work), M_NOWAIT);
+ work = kzalloc(sizeof(*work), waitflag);
if (!work)
return -ENOMEM;
work->cm_id = cm_id;
@@ -507,17 +491,21 @@ iw_so_upcall(struct socket *parent_so, void *arg, int waitflag)
return SU_OK;
}
-static void
-iw_init_sock(struct iw_cm_id *cm_id)
+static int
+iw_create_listen(struct iw_cm_id *cm_id, int backlog)
{
struct sockopt sopt;
struct socket *so = cm_id->so;
int on = 1;
+ int rc;
- SOCK_LOCK(so);
- soupcall_set(so, SO_RCV, iw_so_upcall, cm_id);
+ rc = -solisten(cm_id->so, backlog, curthread);
+ if (rc != 0)
+ return (rc);
+ SOLISTEN_LOCK(so);
+ solisten_upcall_set(so, iw_so_upcall, cm_id);
so->so_state |= SS_NBIO;
- SOCK_UNLOCK(so);
+ SOLISTEN_UNLOCK(so);
sopt.sopt_dir = SOPT_SET;
sopt.sopt_level = IPPROTO_TCP;
sopt.sopt_name = TCP_NODELAY;
@@ -525,37 +513,18 @@ iw_init_sock(struct iw_cm_id *cm_id)
sopt.sopt_valsize = sizeof(on);
sopt.sopt_td = NULL;
sosetopt(so, &sopt);
-}
-
-static int
-iw_uninit_socket(struct iw_cm_id *cm_id)
-{
- struct socket *so = cm_id->so;
-
- SOCK_LOCK(so);
- soupcall_clear(so, SO_RCV);
- SOCK_UNLOCK(so);
-
return (0);
}
static int
-iw_create_listen(struct iw_cm_id *cm_id, int backlog)
-{
- int rc;
-
- iw_init_sock(cm_id);
- rc = -solisten(cm_id->so, backlog, curthread);
- if (rc != 0)
- iw_uninit_socket(cm_id);
- return (rc);
-}
-
-static int
iw_destroy_listen(struct iw_cm_id *cm_id)
{
+ struct socket *so = cm_id->so;
- return (iw_uninit_socket(cm_id));
+ SOLISTEN_LOCK(so);
+ solisten_upcall_set(so, NULL, NULL);
+ SOLISTEN_UNLOCK(so);
+ return (0);
}
diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
index f822466e4abc..a1ae56e52832 100644
--- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
+++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
@@ -310,7 +310,6 @@ sdp_closed(struct sdp_sock *ssk)
("sdp_closed: !SS_PROTOREF"));
ssk->flags &= ~SDP_SOCKREF;
SDP_WUNLOCK(ssk);
- ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);