diff options
author | Gleb Smirnoff <glebius@FreeBSD.org> | 2017-06-08 21:30:34 +0000 |
---|---|---|
committer | Gleb Smirnoff <glebius@FreeBSD.org> | 2017-06-08 21:30:34 +0000 |
commit | 779f106aa169256b7010a1d8f963ff656b881e92 (patch) | |
tree | 01f96ac5827e2a95a385258086155b37b80ec6b9 /sys/ofed | |
parent | 4623e047a7c87148916e11e6c7b80409f6e45519 (diff) | |
download | src-779f106aa169256b7010a1d8f963ff656b881e92.tar.gz src-779f106aa169256b7010a1d8f963ff656b881e92.zip |
Listening sockets improvements.
o Separate fields of struct socket that belong to listening from
fields that belong to normal dataflow, and unionize them. This
shrinks the structure a bit.
- Take out selinfo's from the socket buffers into the socket. The
first reason is to support braindamaged scenario when a socket is
added to kevent(2) and then listen(2) is cast on it. The second
reason is that there is future plan to make socket buffers pluggable,
so that for a dataflow socket a socket buffer can be changed, and
in this case we also want to keep same selinfos through the lifetime
of a socket.
- Remove struct struct so_accf. Since now listening stuff no longer
affects struct socket size, just move its fields into listening part
of the union.
- Provide sol_upcall field and enforce that so_upcall_set() may be called
only on a dataflow socket, which has buffers, and for listening sockets
provide solisten_upcall_set().
o Remove ACCEPT_LOCK() global.
- Add a mutex to socket, to be used instead of socket buffer lock to lock
fields of struct socket that don't belong to a socket buffer.
- Allow to acquire two socket locks, but the first one must belong to a
listening socket.
- Make soref()/sorele() to use atomic(9). This allows in some situations
to do soref() without owning socket lock. There is place for improvement
here, it is possible to make sorele() also to lock optionally.
- Most protocols aren't touched by this change, except UNIX local sockets.
See below for more information.
o Reduce copy-and-paste in kernel modules that accept connections from
listening sockets: provide function solisten_dequeue(), and use it in
the following modules: ctl(4), iscsi(4), ng_btsocket(4), ng_ksocket(4),
infiniband, rpc.
o UNIX local sockets.
- Removal of ACCEPT_LOCK() global uncovered several races in the UNIX
local sockets. Most races exist around spawning a new socket, when we
are connecting to a local listening socket. To cover them, we need to
hold locks on both PCBs when spawning a third one. This means holding
them across sonewconn(). This creates a LOR between pcb locks and
unp_list_lock.
- To fix the new LOR, abandon the global unp_list_lock in favor of global
unp_link_lock. Indeed, separating these two locks didn't provide us any
extra parralelism in the UNIX sockets.
- Now call into uipc_attach() may happen with unp_link_lock hold if, we
are accepting, or without unp_link_lock in case if we are just creating
a socket.
- Another problem in UNIX sockets is that uipc_close() basicly did nothing
for a listening socket. The vnode remained opened for connections. This
is fixed by removing vnode in uipc_close(). Maybe the right way would be
to do it for all sockets (not only listening), simply move the vnode
teardown from uipc_detach() to uipc_close()?
Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D9770
Notes
Notes:
svn path=/head/; revision=319722
Diffstat (limited to 'sys/ofed')
-rw-r--r-- | sys/ofed/drivers/infiniband/core/iwcm.c | 77 | ||||
-rw-r--r-- | sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c | 1 |
2 files changed, 23 insertions, 55 deletions
diff --git a/sys/ofed/drivers/infiniband/core/iwcm.c b/sys/ofed/drivers/infiniband/core/iwcm.c index fa6b674bb0db..a40d9dc8bbe9 100644 --- a/sys/ofed/drivers/infiniband/core/iwcm.c +++ b/sys/ofed/drivers/infiniband/core/iwcm.c @@ -416,34 +416,19 @@ dequeue_socket(struct socket *head) { struct socket *so; struct sockaddr_in *remote; + int error; - ACCEPT_LOCK(); - so = TAILQ_FIRST(&head->so_comp); - if (!so) { - ACCEPT_UNLOCK(); - return NULL; - } - - SOCK_LOCK(so); - /* - * Before changing the flags on the socket, we have to bump the - * reference count. Otherwise, if the protocol calls sofree(), - * the socket will be released due to a zero refcount. - */ - soref(so); - TAILQ_REMOVE(&head->so_comp, so, so_list); - head->so_qlen--; - so->so_qstate &= ~SQ_COMP; - so->so_head = NULL; - so->so_state |= SS_NBIO; - SOCK_UNLOCK(so); - ACCEPT_UNLOCK(); + SOLISTEN_LOCK(head); + error = solisten_dequeue(head, &so, SOCK_NONBLOCK); + if (error == EWOULDBLOCK) + return (NULL); remote = NULL; soaccept(so, (struct sockaddr **)&remote); free(remote, M_SONAME); return so; } + static void iw_so_event_handler(struct work_struct *_work) { @@ -485,18 +470,17 @@ err: #endif return; } + static int iw_so_upcall(struct socket *parent_so, void *arg, int waitflag) { struct iwcm_listen_work *work; - struct socket *so; struct iw_cm_id *cm_id = arg; /* check whether iw_so_event_handler() already dequeued this 'so' */ - so = TAILQ_FIRST(&parent_so->so_comp); - if (!so) + if (TAILQ_EMPTY(&parent_so->sol_comp)) return SU_OK; - work = kzalloc(sizeof(*work), M_NOWAIT); + work = kzalloc(sizeof(*work), waitflag); if (!work) return -ENOMEM; work->cm_id = cm_id; @@ -507,17 +491,21 @@ iw_so_upcall(struct socket *parent_so, void *arg, int waitflag) return SU_OK; } -static void -iw_init_sock(struct iw_cm_id *cm_id) +static int +iw_create_listen(struct iw_cm_id *cm_id, int backlog) { struct sockopt sopt; struct socket *so = cm_id->so; int on = 1; + int rc; - SOCK_LOCK(so); - soupcall_set(so, SO_RCV, iw_so_upcall, cm_id); + rc = -solisten(cm_id->so, backlog, curthread); + if (rc != 0) + return (rc); + SOLISTEN_LOCK(so); + solisten_upcall_set(so, iw_so_upcall, cm_id); so->so_state |= SS_NBIO; - SOCK_UNLOCK(so); + SOLISTEN_UNLOCK(so); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_TCP; sopt.sopt_name = TCP_NODELAY; @@ -525,37 +513,18 @@ iw_init_sock(struct iw_cm_id *cm_id) sopt.sopt_valsize = sizeof(on); sopt.sopt_td = NULL; sosetopt(so, &sopt); -} - -static int -iw_uninit_socket(struct iw_cm_id *cm_id) -{ - struct socket *so = cm_id->so; - - SOCK_LOCK(so); - soupcall_clear(so, SO_RCV); - SOCK_UNLOCK(so); - return (0); } static int -iw_create_listen(struct iw_cm_id *cm_id, int backlog) -{ - int rc; - - iw_init_sock(cm_id); - rc = -solisten(cm_id->so, backlog, curthread); - if (rc != 0) - iw_uninit_socket(cm_id); - return (rc); -} - -static int iw_destroy_listen(struct iw_cm_id *cm_id) { + struct socket *so = cm_id->so; - return (iw_uninit_socket(cm_id)); + SOLISTEN_LOCK(so); + solisten_upcall_set(so, NULL, NULL); + SOLISTEN_UNLOCK(so); + return (0); } diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c index f822466e4abc..a1ae56e52832 100644 --- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c @@ -310,7 +310,6 @@ sdp_closed(struct sdp_sock *ssk) ("sdp_closed: !SS_PROTOREF")); ssk->flags &= ~SDP_SOCKREF; SDP_WUNLOCK(ssk); - ACCEPT_LOCK(); SOCK_LOCK(so); so->so_state &= ~SS_PROTOREF; sofree(so); |