aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMateusz Guzik <mjg@FreeBSD.org>2020-08-04 23:07:42 +0000
committerMateusz Guzik <mjg@FreeBSD.org>2020-08-04 23:07:42 +0000
commitdb99ec5656363acc0bd3df133f57639fb839bf5f (patch)
tree5d6e9990e61199b004e8737fa3bbce94a9a28a12
parentb403aa126e47c8702138ab66c1a8918cbc7a6c4b (diff)
downloadsrc-db99ec5656363acc0bd3df133f57639fb839bf5f.tar.gz
src-db99ec5656363acc0bd3df133f57639fb839bf5f.zip
vfs: support lockless dotdot lookup
Tested by: pho
Notes
Notes: svn path=/head/; revision=363874
-rw-r--r--sys/kern/vfs_cache.c124
-rw-r--r--sys/kern/vfs_subr.c25
-rw-r--r--sys/sys/vnode.h2
3 files changed, 132 insertions, 19 deletions
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 721779df7aef..a27f475f6615 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/fcntl.h>
+#include <sys/jail.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/proc.h>
@@ -957,16 +958,22 @@ cache_zap_locked(struct namecache *ncp)
SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp,
ncp->nc_name, ncp->nc_vp);
TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
- if (ncp == ncp->nc_vp->v_cache_dd)
+ if (ncp == ncp->nc_vp->v_cache_dd) {
+ vn_seqc_write_begin_unheld(ncp->nc_vp);
ncp->nc_vp->v_cache_dd = NULL;
+ vn_seqc_write_end(ncp->nc_vp);
+ }
} else {
SDT_PROBE2(vfs, namecache, zap_negative, done, ncp->nc_dvp,
ncp->nc_name);
cache_negative_remove(ncp);
}
if (ncp->nc_flag & NCF_ISDOTDOT) {
- if (ncp == ncp->nc_dvp->v_cache_dd)
+ if (ncp == ncp->nc_dvp->v_cache_dd) {
+ vn_seqc_write_begin_unheld(ncp->nc_dvp);
ncp->nc_dvp->v_cache_dd = NULL;
+ vn_seqc_write_end(ncp->nc_dvp);
+ }
} else {
LIST_REMOVE(ncp, nc_src);
if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
@@ -1306,7 +1313,9 @@ retry_dotdot:
mtx_unlock(dvlp2);
cache_free(ncp);
} else {
+ vn_seqc_write_begin(dvp);
dvp->v_cache_dd = NULL;
+ vn_seqc_write_end(dvp);
mtx_unlock(dvlp);
if (dvlp2 != NULL)
mtx_unlock(dvlp2);
@@ -1817,6 +1826,7 @@ cache_enter_dotdot_prep(struct vnode *dvp, struct vnode *vp,
cache_celockstate_init(&cel);
hash = cache_get_hash(cnp->cn_nameptr, len, dvp);
cache_enter_lock_dd(&cel, dvp, vp, hash);
+ vn_seqc_write_begin(dvp);
ncp = dvp->v_cache_dd;
if (ncp != NULL && (ncp->nc_flag & NCF_ISDOTDOT)) {
KASSERT(ncp->nc_dvp == dvp, ("wrong isdotdot parent"));
@@ -1825,6 +1835,7 @@ cache_enter_dotdot_prep(struct vnode *dvp, struct vnode *vp,
ncp = NULL;
}
dvp->v_cache_dd = NULL;
+ vn_seqc_write_end(dvp);
cache_enter_unlock(&cel);
cache_free(ncp);
}
@@ -1939,7 +1950,9 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
goto out_unlock_free;
KASSERT(vp == NULL || vp->v_type == VDIR,
("wrong vnode type %p", vp));
+ vn_seqc_write_begin(dvp);
dvp->v_cache_dd = ncp;
+ vn_seqc_write_end(dvp);
}
if (vp != NULL) {
@@ -1950,6 +1963,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
* directory name in it and the name ".." for the
* directory's parent.
*/
+ vn_seqc_write_begin(vp);
if ((ndd = vp->v_cache_dd) != NULL) {
if ((ndd->nc_flag & NCF_ISDOTDOT) != 0)
cache_zap_locked(ndd);
@@ -1957,9 +1971,14 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
ndd = NULL;
}
vp->v_cache_dd = ncp;
+ vn_seqc_write_end(vp);
}
} else {
- vp->v_cache_dd = NULL;
+ if (vp->v_cache_dd != NULL) {
+ vn_seqc_write_begin(vp);
+ vp->v_cache_dd = NULL;
+ vn_seqc_write_end(vp);
+ }
}
}
@@ -3425,6 +3444,75 @@ cache_fplookup_dot(struct cache_fpl *fpl)
return (0);
}
+static int __noinline
+cache_fplookup_dotdot(struct cache_fpl *fpl)
+{
+ struct nameidata *ndp;
+ struct componentname *cnp;
+ struct namecache *ncp;
+ struct vnode *dvp;
+ struct prison *pr;
+ u_char nc_flag;
+
+ ndp = fpl->ndp;
+ cnp = fpl->cnp;
+ dvp = fpl->dvp;
+
+ /*
+ * XXX this is racy the same way regular lookup is
+ */
+ for (pr = cnp->cn_cred->cr_prison; pr != NULL;
+ pr = pr->pr_parent)
+ if (dvp == pr->pr_root)
+ break;
+
+ if (dvp == ndp->ni_rootdir ||
+ dvp == ndp->ni_topdir ||
+ dvp == rootvnode ||
+ pr != NULL) {
+ fpl->tvp = dvp;
+ fpl->tvp_seqc = vn_seqc_read_any(dvp);
+ if (seqc_in_modify(fpl->tvp_seqc)) {
+ return (cache_fpl_aborted(fpl));
+ }
+ return (0);
+ }
+
+ if ((dvp->v_vflag & VV_ROOT) != 0) {
+ /*
+ * TODO
+ * The opposite of climb mount is needed here.
+ */
+ return (cache_fpl_aborted(fpl));
+ }
+
+ ncp = atomic_load_ptr(&dvp->v_cache_dd);
+ if (ncp == NULL) {
+ return (cache_fpl_aborted(fpl));
+ }
+
+ nc_flag = atomic_load_char(&ncp->nc_flag);
+ if ((nc_flag & NCF_ISDOTDOT) != 0) {
+ if ((nc_flag & NCF_NEGATIVE) != 0)
+ return (cache_fpl_aborted(fpl));
+ fpl->tvp = ncp->nc_vp;
+ } else {
+ fpl->tvp = ncp->nc_dvp;
+ }
+
+ if (__predict_false(!cache_ncp_canuse(ncp))) {
+ return (cache_fpl_aborted(fpl));
+ }
+
+ fpl->tvp_seqc = vn_seqc_read_any(fpl->tvp);
+ if (seqc_in_modify(fpl->tvp_seqc)) {
+ return (cache_fpl_partial(fpl));
+ }
+
+ counter_u64_add(dotdothits, 1);
+ return (0);
+}
+
static int
cache_fplookup_next(struct cache_fpl *fpl)
{
@@ -3782,11 +3870,6 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl)
break;
}
- if (cnp->cn_flags & ISDOTDOT) {
- error = cache_fpl_partial(fpl);
- break;
- }
-
VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp);
error = VOP_FPLOOKUP_VEXEC(fpl->dvp, cnp->cn_cred, cnp->cn_thread);
@@ -3795,18 +3878,25 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl)
break;
}
- error = cache_fplookup_next(fpl);
- if (__predict_false(error != 0)) {
- break;
- }
-
- VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp);
-
- if (cache_fplookup_need_climb_mount(fpl)) {
- error = cache_fplookup_climb_mount(fpl);
+ if (__predict_false(cnp->cn_flags & ISDOTDOT)) {
+ error = cache_fplookup_dotdot(fpl);
if (__predict_false(error != 0)) {
break;
}
+ } else {
+ error = cache_fplookup_next(fpl);
+ if (__predict_false(error != 0)) {
+ break;
+ }
+
+ VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp);
+
+ if (cache_fplookup_need_climb_mount(fpl)) {
+ error = cache_fplookup_climb_mount(fpl);
+ if (__predict_false(error != 0)) {
+ break;
+ }
+ }
}
VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 0ce9dd785213..d13150c2c7cf 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -6881,12 +6881,15 @@ vn_dir_check_exec(struct vnode *vp, struct componentname *cnp)
return (VOP_ACCESS(vp, VEXEC, cnp->cn_cred, cnp->cn_thread));
}
+/*
+ * Do not use this variant unless you have means other than the hold count
+ * to prevent the vnode from getting freed.
+ */
void
-vn_seqc_write_begin_locked(struct vnode *vp)
+vn_seqc_write_begin_unheld_locked(struct vnode *vp)
{
ASSERT_VI_LOCKED(vp, __func__);
- VNPASS(vp->v_holdcnt > 0, vp);
VNPASS(vp->v_seqc_users >= 0, vp);
vp->v_seqc_users++;
if (vp->v_seqc_users == 1)
@@ -6894,6 +6897,15 @@ vn_seqc_write_begin_locked(struct vnode *vp)
}
void
+vn_seqc_write_begin_locked(struct vnode *vp)
+{
+
+ ASSERT_VI_LOCKED(vp, __func__);
+ VNPASS(vp->v_holdcnt > 0, vp);
+ vn_seqc_write_begin_unheld_locked(vp);
+}
+
+void
vn_seqc_write_begin(struct vnode *vp)
{
@@ -6903,6 +6915,15 @@ vn_seqc_write_begin(struct vnode *vp)
}
void
+vn_seqc_write_begin_unheld(struct vnode *vp)
+{
+
+ VI_LOCK(vp);
+ vn_seqc_write_begin_unheld_locked(vp);
+ VI_UNLOCK(vp);
+}
+
+void
vn_seqc_write_end_locked(struct vnode *vp)
{
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index f754b2b52724..398fb088d7f1 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -764,6 +764,8 @@ int vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio);
int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
struct uio *uio);
+void vn_seqc_write_begin_unheld_locked(struct vnode *vp);
+void vn_seqc_write_begin_unheld(struct vnode *vp);
void vn_seqc_write_begin_locked(struct vnode *vp);
void vn_seqc_write_begin(struct vnode *vp);
void vn_seqc_write_end_locked(struct vnode *vp);