aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Roberson <jeff@FreeBSD.org>2019-10-15 03:35:11 +0000
committerJeff Roberson <jeff@FreeBSD.org>2019-10-15 03:35:11 +0000
commit63e9755548e4feebf798686ab8bce0cdaaaf7b46 (patch)
tree73004f9ecd43d157304327e6d0feb4ddf93012af
parentf44e7436797617b6c6a42a280befb312f1ebf50f (diff)
downloadsrc-63e9755548e4feebf798686ab8bce0cdaaaf7b46.tar.gz
src-63e9755548e4feebf798686ab8bce0cdaaaf7b46.zip
(1/6) Replace busy checks with acquires where it is trival to do so.
This is the first in a series of patches that promotes the page busy field to a first class lock that no longer requires the object lock for consistency. Reviewed by: kib, markj Tested by: pho Sponsored by: Netflix, Intel Differential Revision: https://reviews.freebsd.org/D21548
Notes
Notes: svn path=/head/; revision=353535
-rw-r--r--sys/dev/xen/gntdev/gntdev.c2
-rw-r--r--sys/dev/xen/privcmd/privcmd.c2
-rw-r--r--sys/fs/tmpfs/tmpfs_subr.c6
-rw-r--r--sys/kern/kern_exec.c6
-rw-r--r--sys/kern/uipc_shm.c7
-rw-r--r--sys/kern/vfs_bio.c48
-rw-r--r--sys/kern/vfs_cluster.c18
-rw-r--r--sys/sys/buf.h3
-rw-r--r--sys/vm/phys_pager.c26
-rw-r--r--sys/vm/vm_fault.c12
-rw-r--r--sys/vm/vm_object.c49
-rw-r--r--sys/vm/vm_page.c90
-rw-r--r--sys/vm/vm_page.h1
-rw-r--r--sys/vm/vm_pageout.c57
-rw-r--r--sys/vm/vm_swapout.c11
15 files changed, 211 insertions, 127 deletions
diff --git a/sys/dev/xen/gntdev/gntdev.c b/sys/dev/xen/gntdev/gntdev.c
index 3ad4072e3ea2..c9e42c1fd02b 100644
--- a/sys/dev/xen/gntdev/gntdev.c
+++ b/sys/dev/xen/gntdev/gntdev.c
@@ -835,9 +835,9 @@ gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot,
*mres = NULL;
}
+ vm_page_busy_acquire(page, 0);
vm_page_insert(page, object, pidx);
page->valid = VM_PAGE_BITS_ALL;
- vm_page_xbusy(page);
*mres = page;
return (VM_PAGER_OK);
}
diff --git a/sys/dev/xen/privcmd/privcmd.c b/sys/dev/xen/privcmd/privcmd.c
index 691229018f2e..e424dc20d905 100644
--- a/sys/dev/xen/privcmd/privcmd.c
+++ b/sys/dev/xen/privcmd/privcmd.c
@@ -178,9 +178,9 @@ privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
*mres = NULL;
}
+ vm_page_busy_acquire(page, 0);
vm_page_insert(page, object, pidx);
page->valid = VM_PAGE_BITS_ALL;
- vm_page_xbusy(page);
*mres = page;
return (VM_PAGER_OK);
}
diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c
index 14169de46f1a..c5b2286b1ece 100644
--- a/sys/fs/tmpfs/tmpfs_subr.c
+++ b/sys/fs/tmpfs/tmpfs_subr.c
@@ -1406,10 +1406,8 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr)
if (base != 0) {
idx = OFF_TO_IDX(newsize);
retry:
- m = vm_page_lookup(uobj, idx);
+ m = vm_page_grab(uobj, idx, VM_ALLOC_NOCREAT);
if (m != NULL) {
- if (vm_page_sleep_if_busy(m, "tmfssz"))
- goto retry;
MPASS(m->valid == VM_PAGE_BITS_ALL);
} else if (vm_pager_has_page(uobj, idx, NULL, NULL)) {
m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL |
@@ -1430,7 +1428,6 @@ retry:
vm_page_lock(m);
vm_page_launder(m);
vm_page_unlock(m);
- vm_page_xunbusy(m);
} else {
vm_page_free(m);
if (ignerr)
@@ -1444,6 +1441,7 @@ retry:
if (m != NULL) {
pmap_zero_page_area(m, base, PAGE_SIZE - base);
vm_page_dirty(m);
+ vm_page_xunbusy(m);
vm_pager_page_unswapped(m);
}
}
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 5abced8fbbaa..e1c647ca4f00 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -976,10 +976,14 @@ exec_map_first_page(struct image_params *imgp)
#if VM_NRESERVLEVEL > 0
vm_object_color(object, 0);
#endif
+retry:
ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY |
VM_ALLOC_WIRED);
if (ma[0]->valid != VM_PAGE_BITS_ALL) {
- vm_page_xbusy(ma[0]);
+ if (vm_page_busy_acquire(ma[0], VM_ALLOC_WAITFAIL) == 0) {
+ vm_page_unwire_noq(ma[0]);
+ goto retry;
+ }
if (!vm_pager_has_page(object, 0, NULL, &after)) {
if (vm_page_unwire_noq(ma[0]))
vm_page_free(ma[0]);
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index e0c6be1f0fab..b0aaac0659a5 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -457,10 +457,9 @@ shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie)
if (base != 0) {
idx = OFF_TO_IDX(length);
retry:
- m = vm_page_lookup(object, idx);
+ m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
if (m != NULL) {
- if (vm_page_sleep_if_busy(m, "shmtrc"))
- goto retry;
+ MPASS(m->valid == VM_PAGE_BITS_ALL);
} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
m = vm_page_alloc(object, idx,
VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
@@ -478,7 +477,6 @@ retry:
* as an access.
*/
vm_page_launder(m);
- vm_page_xunbusy(m);
} else {
vm_page_free(m);
VM_OBJECT_WUNLOCK(object);
@@ -490,6 +488,7 @@ retry:
KASSERT(m->valid == VM_PAGE_BITS_ALL,
("shm_dotruncate: page %p is invalid", m));
vm_page_dirty(m);
+ vm_page_xunbusy(m);
vm_pager_page_unswapped(m);
}
}
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 8e14592b402d..baeaf2e32dc0 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -2945,10 +2945,10 @@ vfs_vmio_invalidate(struct buf *bp)
presid = resid > (PAGE_SIZE - poffset) ?
(PAGE_SIZE - poffset) : resid;
KASSERT(presid >= 0, ("brelse: extra page"));
- while (vm_page_xbusied(m))
- vm_page_sleep_if_xbusy(m, "mbncsh");
+ vm_page_busy_acquire(m, VM_ALLOC_SBUSY);
if (pmap_page_wired_mappings(m) == 0)
vm_page_set_invalid(m, poffset, presid);
+ vm_page_sunbusy(m);
vm_page_release_locked(m, flags);
resid -= presid;
poffset = 0;
@@ -3651,7 +3651,7 @@ vfs_clean_pages_dirty_buf(struct buf *bp)
("vfs_clean_pages_dirty_buf: no buffer offset"));
VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
- vfs_drain_busy_pages(bp);
+ vfs_busy_pages_acquire(bp);
vfs_setdirty_locked_object(bp);
for (i = 0; i < bp->b_npages; i++) {
noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
@@ -3663,6 +3663,7 @@ vfs_clean_pages_dirty_buf(struct buf *bp)
/* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */
foff = noff;
}
+ vfs_busy_pages_release(bp);
VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
}
@@ -4559,28 +4560,25 @@ vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, vm_page_t m)
}
/*
- * Ensure that all buffer pages are not exclusive busied. If any page is
- * exclusive busy, drain it.
+ * Acquire a shared busy on all pages in the buf.
*/
void
-vfs_drain_busy_pages(struct buf *bp)
+vfs_busy_pages_acquire(struct buf *bp)
{
- vm_page_t m;
- int i, last_busied;
+ int i;
VM_OBJECT_ASSERT_WLOCKED(bp->b_bufobj->bo_object);
- last_busied = 0;
- for (i = 0; i < bp->b_npages; i++) {
- m = bp->b_pages[i];
- if (vm_page_xbusied(m)) {
- for (; last_busied < i; last_busied++)
- vm_page_sbusy(bp->b_pages[last_busied]);
- while (vm_page_xbusied(m)) {
- vm_page_sleep_if_xbusy(m, "vbpage");
- }
- }
- }
- for (i = 0; i < last_busied; i++)
+ for (i = 0; i < bp->b_npages; i++)
+ vm_page_busy_acquire(bp->b_pages[i], VM_ALLOC_SBUSY);
+}
+
+void
+vfs_busy_pages_release(struct buf *bp)
+{
+ int i;
+
+ VM_OBJECT_ASSERT_WLOCKED(bp->b_bufobj->bo_object);
+ for (i = 0; i < bp->b_npages; i++)
vm_page_sunbusy(bp->b_pages[i]);
}
@@ -4613,17 +4611,17 @@ vfs_busy_pages(struct buf *bp, int clear_modify)
KASSERT(bp->b_offset != NOOFFSET,
("vfs_busy_pages: no buffer offset"));
VM_OBJECT_WLOCK(obj);
- vfs_drain_busy_pages(bp);
+ if ((bp->b_flags & B_CLUSTER) == 0) {
+ vm_object_pip_add(obj, bp->b_npages);
+ vfs_busy_pages_acquire(bp);
+ }
if (bp->b_bufsize != 0)
vfs_setdirty_locked_object(bp);
bogus = false;
for (i = 0; i < bp->b_npages; i++) {
m = bp->b_pages[i];
+ vm_page_assert_sbusied(m);
- if ((bp->b_flags & B_CLUSTER) == 0) {
- vm_object_pip_add(obj, 1);
- vm_page_sbusy(m);
- }
/*
* When readying a buffer for a read ( i.e
* clear_modify == 0 ), it is important to do
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index 21efe900eea0..6a87dd28d57b 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -418,11 +418,9 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
for (bn = blkno, i = 0; i < run; ++i, bn += inc) {
if (i == 0) {
VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
- vfs_drain_busy_pages(tbp);
vm_object_pip_add(tbp->b_bufobj->bo_object,
tbp->b_npages);
- for (k = 0; k < tbp->b_npages; k++)
- vm_page_sbusy(tbp->b_pages[k]);
+ vfs_busy_pages_acquire(tbp);
VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
} else {
if ((bp->b_npages * PAGE_SIZE) +
@@ -470,10 +468,9 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
if ((tbp->b_pages[j]->valid &
vm_page_bits(toff, tinc)) != 0)
break;
- if (vm_page_xbusied(tbp->b_pages[j]))
+ if (vm_page_trysbusy(tbp->b_pages[j]) == 0)
break;
vm_object_pip_add(tbp->b_bufobj->bo_object, 1);
- vm_page_sbusy(tbp->b_pages[j]);
off += tinc;
tsize -= tinc;
}
@@ -991,11 +988,14 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
if (i == 0) {
- vfs_drain_busy_pages(tbp);
+ vfs_busy_pages_acquire(tbp);
} else { /* if not first buffer */
for (j = 0; j < tbp->b_npages; j += 1) {
m = tbp->b_pages[j];
- if (vm_page_xbusied(m)) {
+ if (vm_page_trysbusy(m) == 0) {
+ for (j--; j >= 0; j--)
+ vm_page_sunbusy(
+ tbp->b_pages[j]);
VM_OBJECT_WUNLOCK(
tbp->b_object);
bqrelse(tbp);
@@ -1003,10 +1003,10 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
}
}
}
+ vm_object_pip_add(tbp->b_bufobj->bo_object,
+ tbp->b_npages);
for (j = 0; j < tbp->b_npages; j += 1) {
m = tbp->b_pages[j];
- vm_page_sbusy(m);
- vm_object_pip_add(m->object, 1);
if ((bp->b_npages == 0) ||
(bp->b_pages[bp->b_npages - 1] != m)) {
bp->b_pages[bp->b_npages] = m;
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index f419617abfab..f71c88c82d49 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -539,7 +539,8 @@ void bufstrategy(struct bufobj *, struct buf *);
void brelse(struct buf *);
void bqrelse(struct buf *);
int vfs_bio_awrite(struct buf *);
-void vfs_drain_busy_pages(struct buf *bp);
+void vfs_busy_pages_acquire(struct buf *bp);
+void vfs_busy_pages_release(struct buf *bp);
struct buf *incore(struct bufobj *, daddr_t);
struct buf *gbincore(struct bufobj *, daddr_t);
struct buf *getblk(struct vnode *, daddr_t, int, int, int, int);
diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c
index 4b076f43a896..43e63ec1d5dd 100644
--- a/sys/vm/phys_pager.c
+++ b/sys/vm/phys_pager.c
@@ -206,29 +206,13 @@ phys_pager_populate(vm_object_t object, vm_pindex_t pidx,
*last = end;
for (i = base; i <= end; i++) {
-retry:
- m = vm_page_lookup(object, i);
- if (m == NULL) {
- ahead = MIN(end - i, PHYSALLOC);
- m = vm_page_alloc(object, i, VM_ALLOC_NORMAL |
- VM_ALLOC_ZERO | VM_ALLOC_WAITFAIL |
- VM_ALLOC_COUNT(ahead));
- if (m == NULL)
- goto retry;
- if ((m->flags & PG_ZERO) == 0)
- pmap_zero_page(m);
+ ahead = MIN(end - i, PHYSALLOC);
+ m = vm_page_grab(object, i,
+ VM_ALLOC_NORMAL | VM_ALLOC_COUNT(ahead));
+ if (m->valid != VM_PAGE_BITS_ALL) {
+ vm_page_zero_invalid(m, TRUE);
m->valid = VM_PAGE_BITS_ALL;
- } else if (vm_page_xbusied(m)) {
- vm_page_sleep_if_xbusy(m, "physb");
- goto retry;
- } else {
- vm_page_xbusy(m);
- if (m->valid != VM_PAGE_BITS_ALL)
- vm_page_zero_invalid(m, TRUE);
}
-
- KASSERT(m->valid == VM_PAGE_BITS_ALL,
- ("phys_pager_populate: partially valid page %p", m));
KASSERT(m->dirty == 0,
("phys_pager_populate: dirty page %p", m));
}
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index e0e758acc92f..688c59fe9b88 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -787,7 +787,7 @@ RetryFault_oom:
* around with a shared busied page except, perhaps,
* to pmap it.
*/
- if (vm_page_busied(fs.m)) {
+ if (vm_page_tryxbusy(fs.m) == 0) {
/*
* Reference the page before unlocking and
* sleeping so that the page daemon is less
@@ -819,12 +819,11 @@ RetryFault_oom:
}
/*
- * Mark page busy for other processes, and the
+ * The page is marked busy for other processes and the
* pagedaemon. If it still isn't completely valid
* (readable), jump to readrest, else break-out ( we
* found the page ).
*/
- vm_page_xbusy(fs.m);
if (fs.m->valid != VM_PAGE_BITS_ALL)
goto readrest;
break; /* break to PAGE HAS BEEN FOUND */
@@ -1826,16 +1825,17 @@ again:
dst_m->dirty = dst_m->valid = src_m->valid;
} else {
dst_m = src_m;
- if (vm_page_sleep_if_busy(dst_m, "fltupg"))
+ if (vm_page_busy_acquire(dst_m, VM_ALLOC_WAITFAIL) == 0)
goto again;
- if (dst_m->pindex >= dst_object->size)
+ if (dst_m->pindex >= dst_object->size) {
/*
* We are upgrading. Index can occur
* out of bounds if the object type is
* vnode and the file was truncated.
*/
+ vm_page_xunbusy(dst_m);
break;
- vm_page_xbusy(dst_m);
+ }
}
VM_OBJECT_WUNLOCK(dst_object);
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 0fb873ba32b8..5665330119fd 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -838,7 +838,7 @@ rescan:
np = TAILQ_NEXT(p, listq);
if (p->valid == 0)
continue;
- if (vm_page_sleep_if_busy(p, "vpcwai")) {
+ if (vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL) == 0) {
if (object->generation != curgeneration) {
if ((flags & OBJPC_SYNC) != 0)
goto rescan;
@@ -848,8 +848,10 @@ rescan:
np = vm_page_find_least(object, pi);
continue;
}
- if (!vm_object_page_remove_write(p, flags, &clearobjflags))
+ if (!vm_object_page_remove_write(p, flags, &clearobjflags)) {
+ vm_page_xunbusy(p);
continue;
+ }
n = vm_object_page_collect_flush(object, p, pagerflags,
flags, &clearobjflags, &eio);
@@ -899,6 +901,7 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags,
int count, i, mreq, runlen;
vm_page_lock_assert(p, MA_NOTOWNED);
+ vm_page_assert_xbusied(p);
VM_OBJECT_ASSERT_WLOCKED(object);
count = 1;
@@ -906,18 +909,22 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags,
for (tp = p; count < vm_pageout_page_count; count++) {
tp = vm_page_next(tp);
- if (tp == NULL || vm_page_busied(tp))
+ if (tp == NULL || vm_page_tryxbusy(tp) == 0)
break;
- if (!vm_object_page_remove_write(tp, flags, clearobjflags))
+ if (!vm_object_page_remove_write(tp, flags, clearobjflags)) {
+ vm_page_xunbusy(tp);
break;
+ }
}
for (p_first = p; count < vm_pageout_page_count; count++) {
tp = vm_page_prev(p_first);
- if (tp == NULL || vm_page_busied(tp))
+ if (tp == NULL || vm_page_tryxbusy(tp) == 0)
break;
- if (!vm_object_page_remove_write(tp, flags, clearobjflags))
+ if (!vm_object_page_remove_write(tp, flags, clearobjflags)) {
+ vm_page_xunbusy(tp);
break;
+ }
p_first = tp;
mreq++;
}
@@ -1158,7 +1165,7 @@ next_page:
("vm_object_madvise: page %p is fictitious", tm));
KASSERT((tm->oflags & VPO_UNMANAGED) == 0,
("vm_object_madvise: page %p is not managed", tm));
- if (vm_page_busied(tm)) {
+ if (vm_page_tryxbusy(tm) == 0) {
if (object != tobject)
VM_OBJECT_WUNLOCK(object);
if (advice == MADV_WILLNEED) {
@@ -1175,6 +1182,7 @@ next_page:
vm_page_lock(tm);
vm_page_advise(tm, advice);
vm_page_unlock(tm);
+ vm_page_xunbusy(tm);
vm_object_madvise_freespace(tobject, advice, tm->pindex, 1);
next_pindex:
if (tobject != object)
@@ -1341,7 +1349,7 @@ retry:
* We do not have to VM_PROT_NONE the page as mappings should
* not be changed by this operation.
*/
- if (vm_page_busied(m)) {
+ if (vm_page_tryxbusy(m) == 0) {
VM_OBJECT_WUNLOCK(new_object);
vm_page_sleep_if_busy(m, "spltwt");
VM_OBJECT_WLOCK(new_object);
@@ -1350,6 +1358,7 @@ retry:
/* vm_page_rename() will dirty the page. */
if (vm_page_rename(m, new_object, idx)) {
+ vm_page_xunbusy(m);
VM_OBJECT_WUNLOCK(new_object);
VM_OBJECT_WUNLOCK(orig_object);
vm_radix_wait();
@@ -1357,6 +1366,8 @@ retry:
VM_OBJECT_WLOCK(new_object);
goto retry;
}
+ /* Rename released the xbusy lock. */
+
#if VM_NRESERVLEVEL > 0
/*
* If some of the reservation's allocated pages remain with
@@ -1405,7 +1416,6 @@ vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p, vm_page_t next,
backing_object = object->backing_object;
VM_OBJECT_ASSERT_WLOCKED(backing_object);
- KASSERT(p == NULL || vm_page_busied(p), ("unbusy page %p", p));
KASSERT(p == NULL || p->object == object || p->object == backing_object,
("invalid ownership %p %p %p", p, object, backing_object));
if ((op & OBSC_COLLAPSE_NOWAIT) != 0)
@@ -1510,7 +1520,7 @@ vm_object_collapse_scan(vm_object_t object, int op)
/*
* Check for busy page
*/
- if (vm_page_busied(p)) {
+ if (vm_page_tryxbusy(p) == 0) {
next = vm_object_collapse_scan_wait(object, p, next, op);
continue;
}
@@ -1532,7 +1542,8 @@ vm_object_collapse_scan(vm_object_t object, int op)
}
pp = vm_page_lookup(object, new_pindex);
- if (pp != NULL && vm_page_busied(pp)) {
+ if (pp != NULL && vm_page_tryxbusy(pp) == 0) {
+ vm_page_xunbusy(p);
/*
* The page in the parent is busy and possibly not
* (yet) valid. Until its state is finalized by the
@@ -1568,6 +1579,8 @@ vm_object_collapse_scan(vm_object_t object, int op)
("freeing mapped page %p", p));
if (vm_page_remove(p))
vm_page_free(p);
+ if (pp != NULL)
+ vm_page_xunbusy(pp);
continue;
}
@@ -1579,10 +1592,14 @@ vm_object_collapse_scan(vm_object_t object, int op)
* through the rename. vm_page_rename() will dirty the page.
*/
if (vm_page_rename(p, object, new_pindex)) {
+ vm_page_xunbusy(p);
+ if (pp != NULL)
+ vm_page_xunbusy(pp);
next = vm_object_collapse_scan_wait(object, NULL, next,
op);
continue;
}
+ /* Rename released the xbusy lock. */
/* Use the old pindex to free the right page. */
if (backing_object->type == OBJT_SWAP)
@@ -1859,7 +1876,7 @@ again:
* however, be invalidated if the option OBJPR_CLEANONLY is
* not specified.
*/
- if (vm_page_busied(p)) {
+ if (vm_page_tryxbusy(p) == 0) {
vm_page_sleep_if_busy(p, "vmopar");
goto again;
}
@@ -1872,6 +1889,7 @@ wired:
p->valid = 0;
vm_page_undirty(p);
}
+ vm_page_xunbusy(p);
continue;
}
KASSERT((p->flags & PG_FICTITIOUS) == 0,
@@ -1881,8 +1899,10 @@ wired:
object->ref_count != 0 &&
!vm_page_try_remove_write(p))
goto wired;
- if (p->dirty != 0)
+ if (p->dirty != 0) {
+ vm_page_xunbusy(p);
continue;
+ }
}
if ((options & OBJPR_NOTMAPPED) == 0 &&
object->ref_count != 0 && !vm_page_try_remove_all(p))
@@ -2168,7 +2188,7 @@ again:
tm = m;
m = TAILQ_NEXT(m, listq);
}
- if (vm_page_xbusied(tm)) {
+ if (vm_page_trysbusy(tm) == 0) {
for (tobject = object; locked_depth >= 1;
locked_depth--) {
t1object = tobject->backing_object;
@@ -2180,6 +2200,7 @@ again:
goto again;
}
vm_page_unwire(tm, queue);
+ vm_page_sunbusy(tm);
next_page:
pindex++;
}
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 1d133cf8597b..f436d254e2c8 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -972,6 +972,32 @@ vm_page_busy_downgrade(vm_page_t m)
}
/*
+ *
+ * vm_page_busy_tryupgrade:
+ *
+ * Attempt to upgrade a single shared busy into an exclusive busy.
+ */
+int
+vm_page_busy_tryupgrade(vm_page_t m)
+{
+ u_int x;
+
+ vm_page_assert_sbusied(m);
+
+ x = m->busy_lock;
+ for (;;) {
+ if (VPB_SHARERS(x) > 1)
+ return (0);
+ KASSERT((x & ~VPB_BIT_WAITERS) == VPB_SHARERS_WORD(1),
+ ("vm_page_busy_tryupgrade: invalid lock state"));
+ if (!atomic_fcmpset_acq_int(&m->busy_lock, &x,
+ VPB_SINGLE_EXCLUSIVER | (x & VPB_BIT_WAITERS)))
+ continue;
+ return (1);
+ }
+}
+
+/*
* vm_page_sbusied:
*
* Return a positive value if the page is shared busied, 0 otherwise.
@@ -2570,7 +2596,12 @@ retry:
else if (object->memattr != VM_MEMATTR_DEFAULT)
error = EINVAL;
else if (vm_page_queue(m) != PQ_NONE &&
- !vm_page_busied(m) && !vm_page_wired(m)) {
+ vm_page_tryxbusy(m) != 0) {
+ if (vm_page_wired(m)) {
+ vm_page_xunbusy(m);
+ error = EBUSY;
+ goto unlock;
+ }
KASSERT(pmap_page_get_memattr(m) ==
VM_MEMATTR_DEFAULT,
("page %p has an unexpected memattr", m));
@@ -2616,6 +2647,7 @@ retry:
VM_MEMATTR_DEFAULT);
}
if (m_new == NULL) {
+ vm_page_xunbusy(m);
error = ENOMEM;
goto unlock;
}
@@ -2647,7 +2679,6 @@ retry:
m_new->valid = m->valid;
m_new->dirty = m->dirty;
m->flags &= ~PG_ZERO;
- vm_page_xbusy(m);
vm_page_dequeue(m);
vm_page_replace_checked(m_new, object,
m->pindex, m);
@@ -4046,8 +4077,8 @@ vm_page_try_blocked_op(vm_page_t m, void (*op)(vm_page_t))
KASSERT(m->object != NULL && (m->oflags & VPO_UNMANAGED) == 0,
("vm_page_try_blocked_op: page %p has no object", m));
- KASSERT(!vm_page_busied(m),
- ("vm_page_try_blocked_op: page %p is busy", m));
+ KASSERT(vm_page_busied(m),
+ ("vm_page_try_blocked_op: page %p is not busy", m));
VM_OBJECT_ASSERT_LOCKED(m->object);
old = m->ref_count;
@@ -4163,13 +4194,18 @@ vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
(allocflags & VM_ALLOC_IGN_SBUSY) != 0,
("vm_page_grab: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch"));
pflags = allocflags &
- ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL);
+ ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL |
+ VM_ALLOC_NOBUSY);
if ((allocflags & VM_ALLOC_NOWAIT) == 0)
pflags |= VM_ALLOC_WAITFAIL;
+ if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0)
+ pflags |= VM_ALLOC_SBUSY;
retrylookup:
if ((m = vm_page_lookup(object, pindex)) != NULL) {
- sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ?
- vm_page_xbusied(m) : vm_page_busied(m);
+ if ((allocflags & (VM_ALLOC_IGN_SBUSY | VM_ALLOC_SBUSY)) != 0)
+ sleep = !vm_page_trysbusy(m);
+ else
+ sleep = !vm_page_tryxbusy(m);
if (sleep) {
if ((allocflags & VM_ALLOC_NOWAIT) != 0)
return (NULL);
@@ -4189,12 +4225,7 @@ retrylookup:
} else {
if ((allocflags & VM_ALLOC_WIRED) != 0)
vm_page_wire(m);
- if ((allocflags &
- (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
- vm_page_xbusy(m);
- else if ((allocflags & VM_ALLOC_SBUSY) != 0)
- vm_page_sbusy(m);
- return (m);
+ goto out;
}
}
if ((allocflags & VM_ALLOC_NOCREAT) != 0)
@@ -4207,6 +4238,14 @@ retrylookup:
}
if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
+
+out:
+ if ((allocflags & VM_ALLOC_NOBUSY) != 0) {
+ if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0)
+ vm_page_sunbusy(m);
+ else
+ vm_page_xunbusy(m);
+ }
return (m);
}
@@ -4359,10 +4398,13 @@ vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
("vm_page_grab_pages: VM_ALLOC_SBUSY/IGN_SBUSY mismatch"));
if (count == 0)
return (0);
- pflags = allocflags & ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK |
- VM_ALLOC_WAITFAIL | VM_ALLOC_IGN_SBUSY);
+ pflags = allocflags &
+ ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL |
+ VM_ALLOC_NOBUSY);
if ((allocflags & VM_ALLOC_NOWAIT) == 0)
pflags |= VM_ALLOC_WAITFAIL;
+ if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0)
+ pflags |= VM_ALLOC_SBUSY;
i = 0;
retrylookup:
m = vm_radix_lookup_le(&object->rtree, pindex + i);
@@ -4373,8 +4415,11 @@ retrylookup:
mpred = TAILQ_PREV(m, pglist, listq);
for (; i < count; i++) {
if (m != NULL) {
- sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ?
- vm_page_xbusied(m) : vm_page_busied(m);
+ if ((allocflags &
+ (VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY)) != 0)
+ sleep = !vm_page_trysbusy(m);
+ else
+ sleep = !vm_page_tryxbusy(m);
if (sleep) {
if ((allocflags & VM_ALLOC_NOWAIT) != 0)
break;
@@ -4392,11 +4437,6 @@ retrylookup:
}
if ((allocflags & VM_ALLOC_WIRED) != 0)
vm_page_wire(m);
- if ((allocflags & (VM_ALLOC_NOBUSY |
- VM_ALLOC_SBUSY)) == 0)
- vm_page_xbusy(m);
- if ((allocflags & VM_ALLOC_SBUSY) != 0)
- vm_page_sbusy(m);
} else {
if ((allocflags & VM_ALLOC_NOCREAT) != 0)
break;
@@ -4413,6 +4453,12 @@ retrylookup:
pmap_zero_page(m);
m->valid = VM_PAGE_BITS_ALL;
}
+ if ((allocflags & VM_ALLOC_NOBUSY) != 0) {
+ if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0)
+ vm_page_sunbusy(m);
+ else
+ vm_page_xunbusy(m);
+ }
ma[i] = mpred = m;
m = vm_page_next(m);
}
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index c210367dc30a..d3e7230e87ca 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -542,6 +542,7 @@ malloc2vm_flags(int malloc_flags)
int vm_page_busy_acquire(vm_page_t m, int allocflags);
void vm_page_busy_downgrade(vm_page_t m);
+int vm_page_busy_tryupgrade(vm_page_t m);
void vm_page_busy_sleep(vm_page_t m, const char *msg, bool nonshared);
void vm_page_free(vm_page_t m);
void vm_page_free_zero(vm_page_t m);
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index f3c40cc41dc3..2ad647c96562 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -334,7 +334,7 @@ vm_pageout_cluster(vm_page_t m)
VM_OBJECT_ASSERT_WLOCKED(object);
pindex = m->pindex;
- vm_page_assert_unbusied(m);
+ vm_page_assert_xbusied(m);
mc[vm_pageout_page_count] = pb = ps = m;
pageout_count = 1;
@@ -360,19 +360,26 @@ more:
ib = 0;
break;
}
- if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p) ||
- vm_page_wired(p)) {
+ if ((p = vm_page_prev(pb)) == NULL ||
+ vm_page_tryxbusy(p) == 0) {
ib = 0;
break;
}
+ if (vm_page_wired(p)) {
+ ib = 0;
+ vm_page_xunbusy(p);
+ break;
+ }
vm_page_test_dirty(p);
if (p->dirty == 0) {
ib = 0;
+ vm_page_xunbusy(p);
break;
}
vm_page_lock(p);
if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) {
vm_page_unlock(p);
+ vm_page_xunbusy(p);
ib = 0;
break;
}
@@ -390,15 +397,22 @@ more:
}
while (pageout_count < vm_pageout_page_count &&
pindex + is < object->size) {
- if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p) ||
- vm_page_wired(p))
+ if ((p = vm_page_next(ps)) == NULL ||
+ vm_page_tryxbusy(p) == 0)
break;
+ if (vm_page_wired(p)) {
+ vm_page_xunbusy(p);
+ break;
+ }
vm_page_test_dirty(p);
- if (p->dirty == 0)
+ if (p->dirty == 0) {
+ vm_page_xunbusy(p);
break;
+ }
vm_page_lock(p);
if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) {
vm_page_unlock(p);
+ vm_page_xunbusy(p);
break;
}
vm_page_unlock(p);
@@ -445,8 +459,8 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
VM_OBJECT_ASSERT_WLOCKED(object);
/*
- * Initiate I/O. Mark the pages busy and verify that they're valid
- * and read-only.
+ * Initiate I/O. Mark the pages shared busy and verify that they're
+ * valid and read-only.
*
* We do not have to fixup the clean/dirty bits here... we can
* allow the pager to do it after the I/O completes.
@@ -460,7 +474,7 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
mc[i], i, count));
KASSERT((mc[i]->aflags & PGA_WRITEABLE) == 0,
("vm_pageout_flush: writeable page %p", mc[i]));
- vm_page_sbusy(mc[i]);
+ vm_page_busy_downgrade(mc[i]);
}
vm_object_pip_add(object, count);
@@ -598,6 +612,7 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
*/
if (object->type == OBJT_VNODE) {
vm_page_unlock(m);
+ vm_page_xunbusy(m);
vp = object->handle;
if (vp->v_type == VREG &&
vn_start_write(vp, &mp, V_NOWAIT) != 0) {
@@ -648,7 +663,7 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
* The page may have been busied while the object and page
* locks were released.
*/
- if (vm_page_busied(m)) {
+ if (vm_page_tryxbusy(m) == 0) {
vm_page_unlock(m);
error = EBUSY;
goto unlock_all;
@@ -659,6 +674,7 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
* Remove all writeable mappings, failing if the page is wired.
*/
if (!vm_page_try_remove_write(m)) {
+ vm_page_xunbusy(m);
vm_page_unlock(m);
error = EBUSY;
goto unlock_all;
@@ -792,7 +808,7 @@ recheck:
KASSERT(m->object == object, ("page %p does not belong to %p",
m, object));
- if (vm_page_busied(m))
+ if (vm_page_tryxbusy(m) == 0)
continue;
/*
@@ -804,6 +820,7 @@ recheck:
* wire count is guaranteed not to increase.
*/
if (__predict_false(vm_page_wired(m))) {
+ vm_page_xunbusy(m);
vm_page_dequeue_deferred(m);
continue;
}
@@ -837,6 +854,7 @@ recheck:
}
if (act_delta != 0) {
if (object->ref_count != 0) {
+ vm_page_xunbusy(m);
VM_CNT_INC(v_reactivated);
vm_page_activate(m);
@@ -861,6 +879,7 @@ recheck:
launder--;
continue;
} else if ((object->flags & OBJ_DEAD) == 0) {
+ vm_page_xunbusy(m);
vm_page_requeue(m);
continue;
}
@@ -876,6 +895,7 @@ recheck:
if (object->ref_count != 0) {
vm_page_test_dirty(m);
if (m->dirty == 0 && !vm_page_try_remove_all(m)) {
+ vm_page_xunbusy(m);
vm_page_dequeue_deferred(m);
continue;
}
@@ -900,6 +920,7 @@ free_page:
else
pageout_ok = true;
if (!pageout_ok) {
+ vm_page_xunbusy(m);
vm_page_requeue(m);
continue;
}
@@ -927,7 +948,8 @@ free_page:
}
mtx = NULL;
object = NULL;
- }
+ } else
+ vm_page_xunbusy(m);
}
if (mtx != NULL) {
mtx_unlock(mtx);
@@ -1507,7 +1529,7 @@ recheck:
KASSERT(m->object == object, ("page %p does not belong to %p",
m, object));
- if (vm_page_busied(m)) {
+ if (vm_page_tryxbusy(m) == 0) {
/*
* Don't mess with busy pages. Leave them at
* the front of the queue. Most likely, they
@@ -1529,6 +1551,7 @@ recheck:
* wire count is guaranteed not to increase.
*/
if (__predict_false(vm_page_wired(m))) {
+ vm_page_xunbusy(m);
vm_page_dequeue_deferred(m);
continue;
}
@@ -1562,6 +1585,7 @@ recheck:
}
if (act_delta != 0) {
if (object->ref_count != 0) {
+ vm_page_xunbusy(m);
VM_CNT_INC(v_reactivated);
vm_page_activate(m);
@@ -1575,6 +1599,7 @@ recheck:
m->act_count += act_delta + ACT_ADVANCE;
continue;
} else if ((object->flags & OBJ_DEAD) == 0) {
+ vm_page_xunbusy(m);
vm_page_aflag_set(m, PGA_REQUEUE);
goto reinsert;
}
@@ -1590,6 +1615,7 @@ recheck:
if (object->ref_count != 0) {
vm_page_test_dirty(m);
if (m->dirty == 0 && !vm_page_try_remove_all(m)) {
+ vm_page_xunbusy(m);
vm_page_dequeue_deferred(m);
continue;
}
@@ -1615,7 +1641,10 @@ free_page:
m->queue = PQ_NONE;
vm_page_free(m);
page_shortage--;
- } else if ((object->flags & OBJ_DEAD) == 0)
+ continue;
+ }
+ vm_page_xunbusy(m);
+ if ((object->flags & OBJ_DEAD) == 0)
vm_page_launder(m);
continue;
reinsert:
diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c
index 0f6f5cc1efb6..ada881018f32 100644
--- a/sys/vm/vm_swapout.c
+++ b/sys/vm/vm_swapout.c
@@ -208,6 +208,9 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
goto unlock_return;
if (should_yield())
goto unlock_return;
+ if (vm_page_tryxbusy(p) == 0)
+ continue;
+ VM_CNT_INC(v_pdpages);
/*
* The page may acquire a wiring after this check.
@@ -215,11 +218,10 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
* no harm done if a wiring appears while we are
* attempting to deactivate the page.
*/
- if (vm_page_busied(p) || vm_page_wired(p))
- continue;
- VM_CNT_INC(v_pdpages);
- if (!pmap_page_exists_quick(pmap, p))
+ if (vm_page_wired(p) || !pmap_page_exists_quick(pmap, p)) {
+ vm_page_xunbusy(p);
continue;
+ }
act_delta = pmap_ts_referenced(p);
vm_page_lock(p);
if ((p->aflags & PGA_REFERENCED) != 0) {
@@ -251,6 +253,7 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
} else if (vm_page_inactive(p))
(void)vm_page_try_remove_all(p);
vm_page_unlock(p);
+ vm_page_xunbusy(p);
}
if ((backing_object = object->backing_object) == NULL)
goto unlock_return;