aboutsummaryrefslogtreecommitdiff
path: root/sys/vm/vm_page.c
diff options
context:
space:
mode:
authorJeff Roberson <jeff@FreeBSD.org>2020-02-27 02:37:27 +0000
committerJeff Roberson <jeff@FreeBSD.org>2020-02-27 02:37:27 +0000
commitc49be4f1c6845b24981037794c561cf319d6040a (patch)
tree3c65a8cf858bdceb0ec1a55e25e33a955685feef /sys/vm/vm_page.c
parent600ee699ed2805894f5972c6ac2c3d17dca7f6ce (diff)
downloadsrc-c49be4f1c6845b24981037794c561cf319d6040a.tar.gz
src-c49be4f1c6845b24981037794c561cf319d6040a.zip
Add unlocked grab* function variants that use lockless radix code to
lookup pages. These variants will fall back to their locked counterparts if the page is not present. Discussed with: kib, markj Differential Revision: https://reviews.freebsd.org/D23449
Notes
Notes: svn path=/head/; revision=358363
Diffstat (limited to 'sys/vm/vm_page.c')
-rw-r--r--sys/vm/vm_page.c425
1 files changed, 333 insertions, 92 deletions
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 885a5bea8e69..d3ad661c16e9 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -830,47 +830,39 @@ vm_page_reference(vm_page_t m)
vm_page_aflag_set(m, PGA_REFERENCED);
}
+/*
+ * vm_page_trybusy
+ *
+ * Helper routine for grab functions to trylock busy.
+ *
+ * Returns true on success and false on failure.
+ */
static bool
-vm_page_acquire_flags(vm_page_t m, int allocflags)
+vm_page_trybusy(vm_page_t m, int allocflags)
{
- bool locked;
if ((allocflags & (VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY)) != 0)
- locked = vm_page_trysbusy(m);
+ return (vm_page_trysbusy(m));
else
- locked = vm_page_tryxbusy(m);
- if (locked && (allocflags & VM_ALLOC_WIRED) != 0)
- vm_page_wire(m);
- return (locked);
+ return (vm_page_tryxbusy(m));
}
/*
- * vm_page_busy_sleep_flags
+ * vm_page_tryacquire
*
- * Sleep for busy according to VM_ALLOC_ parameters. Returns true
- * if the caller should retry and false otherwise.
+ * Helper routine for grab functions to trylock busy and wire.
+ *
+ * Returns true on success and false on failure.
*/
-static bool
-vm_page_busy_sleep_flags(vm_object_t object, vm_page_t m, const char *wmesg,
- int allocflags)
+static inline bool
+vm_page_tryacquire(vm_page_t m, int allocflags)
{
+ bool locked;
- if ((allocflags & VM_ALLOC_NOWAIT) != 0)
- return (false);
-
- /*
- * Reference the page before unlocking and sleeping so that
- * the page daemon is less likely to reclaim it.
- */
- if ((allocflags & VM_ALLOC_NOCREAT) == 0)
- vm_page_reference(m);
-
- if (_vm_page_busy_sleep(object, m, m->pindex, wmesg, allocflags, true))
- VM_OBJECT_WLOCK(object);
- if ((allocflags & VM_ALLOC_WAITFAIL) != 0)
- return (false);
-
- return (true);
+ locked = vm_page_trybusy(m, allocflags);
+ if (locked && (allocflags & VM_ALLOC_WIRED) != 0)
+ vm_page_wire(m);
+ return (locked);
}
/*
@@ -894,7 +886,7 @@ vm_page_busy_acquire(vm_page_t m, int allocflags)
*/
obj = m->object;
for (;;) {
- if (vm_page_acquire_flags(m, allocflags))
+ if (vm_page_tryacquire(m, allocflags))
return (true);
if ((allocflags & VM_ALLOC_NOWAIT) != 0)
return (false);
@@ -1604,6 +1596,7 @@ vm_page_object_remove(vm_page_t m)
if ((m->a.flags & PGA_SWAP_FREE) != 0)
vm_pager_page_unswapped(m);
+ m->object = NULL;
mrem = vm_radix_remove(&object->rtree, m->pindex);
KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m));
@@ -1658,7 +1651,6 @@ vm_page_remove_xbusy(vm_page_t m)
{
vm_page_object_remove(m);
- m->object = NULL;
return (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF);
}
@@ -1679,6 +1671,38 @@ vm_page_lookup(vm_object_t object, vm_pindex_t pindex)
}
/*
+ * This should only be used by lockless functions for releasing transient
+ * incorrect acquires. The page may have been freed after we acquired a
+ * busy lock. In this case busy_lock == VPB_FREED and we have nothing
+ * further to do.
+ */
+static void
+vm_page_busy_release(vm_page_t m)
+{
+ u_int x;
+
+ x = atomic_load_int(&m->busy_lock);
+ for (;;) {
+ if (x == VPB_FREED)
+ break;
+ if ((x & VPB_BIT_SHARED) != 0 && VPB_SHARERS(x) > 1) {
+ if (atomic_fcmpset_int(&m->busy_lock, &x,
+ x - VPB_ONE_SHARER))
+ break;
+ continue;
+ }
+ KASSERT((x & VPB_BIT_SHARED) != 0 ||
+ (x & ~VPB_BIT_WAITERS) == VPB_CURTHREAD_EXCLUSIVE,
+ ("vm_page_busy_release: %p xbusy not owned.", m));
+ if (!atomic_fcmpset_rel_int(&m->busy_lock, &x, VPB_UNBUSIED))
+ continue;
+ if ((x & VPB_BIT_WAITERS) != 0)
+ wakeup(m);
+ break;
+ }
+}
+
+/*
* vm_page_find_least:
*
* Returns the page associated with the object with least pindex
@@ -3688,7 +3712,6 @@ vm_page_free_prep(vm_page_t m)
("vm_page_free_prep: page %p has unexpected ref_count %u",
m, m->ref_count));
vm_page_object_remove(m);
- m->object = NULL;
m->ref_count -= VPRC_OBJREF;
} else
vm_page_assert_unbusied(m);
@@ -4245,18 +4268,81 @@ vm_page_advise(vm_page_t m, int advice)
vm_page_launder(m);
}
-static inline int
-vm_page_grab_pflags(int allocflags)
+/*
+ * vm_page_grab_release
+ *
+ * Helper routine for grab functions to release busy on return.
+ */
+static inline void
+vm_page_grab_release(vm_page_t m, int allocflags)
+{
+
+ if ((allocflags & VM_ALLOC_NOBUSY) != 0) {
+ if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0)
+ vm_page_sunbusy(m);
+ else
+ vm_page_xunbusy(m);
+ }
+}
+
+/*
+ * vm_page_grab_sleep
+ *
+ * Sleep for busy according to VM_ALLOC_ parameters. Returns true
+ * if the caller should retry and false otherwise.
+ *
+ * If the object is locked on entry the object will be unlocked with
+ * false returns and still locked but possibly having been dropped
+ * with true returns.
+ */
+static bool
+vm_page_grab_sleep(vm_object_t object, vm_page_t m, vm_pindex_t pindex,
+ const char *wmesg, int allocflags, bool locked)
+{
+
+ if ((allocflags & VM_ALLOC_NOWAIT) != 0)
+ return (false);
+
+ /*
+ * Reference the page before unlocking and sleeping so that
+ * the page daemon is less likely to reclaim it.
+ */
+ if (locked && (allocflags & VM_ALLOC_NOCREAT) == 0)
+ vm_page_reference(m);
+
+ if (_vm_page_busy_sleep(object, m, m->pindex, wmesg, allocflags,
+ locked) && locked)
+ VM_OBJECT_WLOCK(object);
+ if ((allocflags & VM_ALLOC_WAITFAIL) != 0)
+ return (false);
+
+ return (true);
+}
+
+/*
+ * Assert that the grab flags are valid.
+ */
+static inline void
+vm_page_grab_check(int allocflags)
{
- int pflags;
KASSERT((allocflags & VM_ALLOC_NOBUSY) == 0 ||
(allocflags & VM_ALLOC_WIRED) != 0,
- ("vm_page_grab_pflags: the pages must be busied or wired"));
+ ("vm_page_grab*: the pages must be busied or wired"));
+
KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 ||
(allocflags & VM_ALLOC_IGN_SBUSY) != 0,
- ("vm_page_grab_pflags: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY "
- "mismatch"));
+ ("vm_page_grab*: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch"));
+}
+
+/*
+ * Calculate the page allocation flags for grab.
+ */
+static inline int
+vm_page_grab_pflags(int allocflags)
+{
+ int pflags;
+
pflags = allocflags &
~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL |
VM_ALLOC_NOBUSY);
@@ -4283,15 +4369,15 @@ vm_page_t
vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
{
vm_page_t m;
- int pflags;
VM_OBJECT_ASSERT_WLOCKED(object);
- pflags = vm_page_grab_pflags(allocflags);
+ vm_page_grab_check(allocflags);
+
retrylookup:
if ((m = vm_page_lookup(object, pindex)) != NULL) {
- if (!vm_page_acquire_flags(m, allocflags)) {
- if (vm_page_busy_sleep_flags(object, m, "pgrbwt",
- allocflags))
+ if (!vm_page_tryacquire(m, allocflags)) {
+ if (vm_page_grab_sleep(object, m, pindex, "pgrbwt",
+ allocflags, true))
goto retrylookup;
return (NULL);
}
@@ -4299,7 +4385,7 @@ retrylookup:
}
if ((allocflags & VM_ALLOC_NOCREAT) != 0)
return (NULL);
- m = vm_page_alloc(object, pindex, pflags);
+ m = vm_page_alloc(object, pindex, vm_page_grab_pflags(allocflags));
if (m == NULL) {
if ((allocflags & (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL)) != 0)
return (NULL);
@@ -4309,12 +4395,104 @@ retrylookup:
pmap_zero_page(m);
out:
- if ((allocflags & VM_ALLOC_NOBUSY) != 0) {
- if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0)
- vm_page_sunbusy(m);
- else
- vm_page_xunbusy(m);
+ vm_page_grab_release(m, allocflags);
+
+ return (m);
+}
+
+/*
+ * Locklessly attempt to acquire a page given a (object, pindex) tuple
+ * and an optional previous page to avoid the radix lookup. The resulting
+ * page will be validated against the identity tuple and busied or wired
+ * as requested. A NULL *mp return guarantees that the page was not in
+ * radix at the time of the call but callers must perform higher level
+ * synchronization or retry the operation under a lock if they require
+ * an atomic answer. This is the only lock free validation routine,
+ * other routines can depend on the resulting page state.
+ *
+ * The return value indicates whether the operation failed due to caller
+ * flags. The return is tri-state with mp:
+ *
+ * (true, *mp != NULL) - The operation was successful.
+ * (true, *mp == NULL) - The page was not found in tree.
+ * (false, *mp == NULL) - WAITFAIL or NOWAIT prevented acquisition.
+ */
+static bool
+vm_page_acquire_unlocked(vm_object_t object, vm_pindex_t pindex,
+ vm_page_t prev, vm_page_t *mp, int allocflags)
+{
+ vm_page_t m;
+
+ vm_page_grab_check(allocflags);
+ MPASS(prev == NULL || vm_page_busied(prev) || vm_page_wired(prev));
+
+ *mp = NULL;
+ for (;;) {
+ /*
+ * We may see a false NULL here because the previous page
+ * has been removed or just inserted and the list is loaded
+ * without barriers. Switch to radix to verify.
+ */
+ if (prev == NULL || (m = TAILQ_NEXT(prev, listq)) == NULL ||
+ m->pindex != pindex ||
+ atomic_load_ptr(&m->object) != object) {
+ prev = NULL;
+ /*
+ * This guarantees the result is instantaneously
+ * correct.
+ */
+ m = vm_radix_lookup_unlocked(&object->rtree, pindex);
+ }
+ if (m == NULL)
+ return (true);
+ if (vm_page_trybusy(m, allocflags)) {
+ if (m->object == object && m->pindex == pindex)
+ break;
+ /* relookup. */
+ vm_page_busy_release(m);
+ cpu_spinwait();
+ continue;
+ }
+ if (!vm_page_grab_sleep(object, m, pindex, "pgnslp",
+ allocflags, false))
+ return (false);
}
+ if ((allocflags & VM_ALLOC_WIRED) != 0)
+ vm_page_wire(m);
+ vm_page_grab_release(m, allocflags);
+ *mp = m;
+ return (true);
+}
+
+/*
+ * Try to locklessly grab a page and fall back to the object lock if NOCREAT
+ * is not set.
+ */
+vm_page_t
+vm_page_grab_unlocked(vm_object_t object, vm_pindex_t pindex, int allocflags)
+{
+ vm_page_t m;
+
+ vm_page_grab_check(allocflags);
+
+ if (!vm_page_acquire_unlocked(object, pindex, NULL, &m, allocflags))
+ return (NULL);
+ if (m != NULL)
+ return (m);
+
+ /*
+ * The radix lockless lookup should never return a false negative
+ * errors. If the user specifies NOCREAT they are guaranteed there
+ * was no page present at the instant of the call. A NOCREAT caller
+ * must handle create races gracefully.
+ */
+ if ((allocflags & VM_ALLOC_NOCREAT) != 0)
+ return (NULL);
+
+ VM_OBJECT_WLOCK(object);
+ m = vm_page_grab(object, pindex, allocflags);
+ VM_OBJECT_WUNLOCK(object);
+
return (m);
}
@@ -4330,7 +4508,6 @@ vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int al
{
vm_page_t m;
vm_page_t ma[VM_INITIAL_PAGEIN];
- bool sleep, xbusy;
int after, i, pflags, rv;
KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 ||
@@ -4344,7 +4521,6 @@ vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int al
pflags |= VM_ALLOC_WAITFAIL;
retrylookup:
- xbusy = false;
if ((m = vm_page_lookup(object, pindex)) != NULL) {
/*
* If the page is fully valid it can only become invalid
@@ -4355,41 +4531,27 @@ retrylookup:
* However, we will not end up with an invalid page and a
* shared lock.
*/
- if (!vm_page_all_valid(m) ||
- (allocflags & (VM_ALLOC_IGN_SBUSY | VM_ALLOC_SBUSY)) == 0) {
- sleep = !vm_page_tryxbusy(m);
- xbusy = true;
- } else
- sleep = !vm_page_trysbusy(m);
- if (sleep) {
- (void)vm_page_busy_sleep_flags(object, m, "pgrbwt",
- allocflags);
+ if (!vm_page_trybusy(m,
+ vm_page_all_valid(m) ? allocflags : 0)) {
+ (void)vm_page_grab_sleep(object, m, pindex, "pgrbwt",
+ allocflags, true);
goto retrylookup;
}
- if ((allocflags & VM_ALLOC_NOCREAT) != 0 &&
- !vm_page_all_valid(m)) {
- if (xbusy)
- vm_page_xunbusy(m);
- else
- vm_page_sunbusy(m);
+ if (vm_page_all_valid(m))
+ goto out;
+ if ((allocflags & VM_ALLOC_NOCREAT) != 0) {
+ vm_page_busy_release(m);
*mp = NULL;
return (VM_PAGER_FAIL);
}
- if ((allocflags & VM_ALLOC_WIRED) != 0)
- vm_page_wire(m);
- if (vm_page_all_valid(m))
- goto out;
} else if ((allocflags & VM_ALLOC_NOCREAT) != 0) {
*mp = NULL;
return (VM_PAGER_FAIL);
- } else if ((m = vm_page_alloc(object, pindex, pflags)) != NULL) {
- xbusy = true;
- } else {
+ } else if ((m = vm_page_alloc(object, pindex, pflags)) == NULL) {
goto retrylookup;
}
vm_page_assert_xbusied(m);
- MPASS(xbusy);
if (vm_pager_has_page(object, pindex, NULL, &after)) {
after = MIN(after, VM_INITIAL_PAGEIN);
after = MIN(after, allocflags >> VM_ALLOC_COUNT_SHIFT);
@@ -4415,8 +4577,6 @@ retrylookup:
/* Pager may have replaced a page. */
m = ma[0];
if (rv != VM_PAGER_OK) {
- if ((allocflags & VM_ALLOC_WIRED) != 0)
- vm_page_unwire_noq(m);
for (i = 0; i < after; i++) {
if (!vm_page_wired(ma[i]))
vm_page_free(ma[i]);
@@ -4433,19 +4593,60 @@ retrylookup:
vm_page_zero_invalid(m, TRUE);
}
out:
- if ((allocflags & VM_ALLOC_NOBUSY) != 0) {
- if (xbusy)
- vm_page_xunbusy(m);
- else
- vm_page_sunbusy(m);
- }
- if ((allocflags & VM_ALLOC_SBUSY) != 0 && xbusy)
+ if ((allocflags & VM_ALLOC_WIRED) != 0)
+ vm_page_wire(m);
+ if ((allocflags & VM_ALLOC_SBUSY) != 0 && vm_page_xbusied(m))
vm_page_busy_downgrade(m);
+ else if ((allocflags & VM_ALLOC_NOBUSY) != 0)
+ vm_page_busy_release(m);
*mp = m;
return (VM_PAGER_OK);
}
/*
+ * Locklessly grab a valid page. If the page is not valid or not yet
+ * allocated this will fall back to the object lock method.
+ */
+int
+vm_page_grab_valid_unlocked(vm_page_t *mp, vm_object_t object,
+ vm_pindex_t pindex, int allocflags)
+{
+ vm_page_t m;
+ int flags;
+ int error;
+
+ KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 ||
+ (allocflags & VM_ALLOC_IGN_SBUSY) != 0,
+ ("vm_page_grab_valid_unlocked: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY "
+ "mismatch"));
+ KASSERT((allocflags &
+ (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL | VM_ALLOC_ZERO)) == 0,
+ ("vm_page_grab_valid_unlocked: Invalid flags 0x%X", allocflags));
+
+ /*
+ * Attempt a lockless lookup and busy. We need at least an sbusy
+ * before we can inspect the valid field and return a wired page.
+ */
+ flags = allocflags & ~(VM_ALLOC_NOBUSY | VM_ALLOC_WIRED);
+ if (!vm_page_acquire_unlocked(object, pindex, NULL, mp, flags))
+ return (VM_PAGER_FAIL);
+ if ((m = *mp) != NULL) {
+ if (vm_page_all_valid(m)) {
+ if ((allocflags & VM_ALLOC_WIRED) != 0)
+ vm_page_wire(m);
+ vm_page_grab_release(m, allocflags);
+ return (VM_PAGER_OK);
+ }
+ vm_page_busy_release(m);
+ }
+ VM_OBJECT_WLOCK(object);
+ error = vm_page_grab_valid(mp, object, pindex, allocflags);
+ VM_OBJECT_WUNLOCK(object);
+
+ return (error);
+}
+
+/*
* Return the specified range of pages from the given object. For each
* page offset within the range, if a page already exists within the object
* at that offset and it is busy, then wait for it to change state. If,
@@ -4482,6 +4683,7 @@ vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(((u_int)allocflags >> VM_ALLOC_COUNT_SHIFT) == 0,
("vm_page_grap_pages: VM_ALLOC_COUNT() is not allowed"));
+ vm_page_grab_check(allocflags);
pflags = vm_page_grab_pflags(allocflags);
if (count == 0)
@@ -4497,9 +4699,9 @@ retrylookup:
mpred = TAILQ_PREV(m, pglist, listq);
for (; i < count; i++) {
if (m != NULL) {
- if (!vm_page_acquire_flags(m, allocflags)) {
- if (vm_page_busy_sleep_flags(object, m,
- "grbmaw", allocflags))
+ if (!vm_page_tryacquire(m, allocflags)) {
+ if (vm_page_grab_sleep(object, m, pindex,
+ "grbmaw", allocflags, true))
goto retrylookup;
break;
}
@@ -4521,12 +4723,7 @@ retrylookup:
pmap_zero_page(m);
vm_page_valid(m);
}
- if ((allocflags & VM_ALLOC_NOBUSY) != 0) {
- if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0)
- vm_page_sunbusy(m);
- else
- vm_page_xunbusy(m);
- }
+ vm_page_grab_release(m, allocflags);
ma[i] = mpred = m;
m = vm_page_next(m);
}
@@ -4534,6 +4731,50 @@ retrylookup:
}
/*
+ * Unlocked variant of vm_page_grab_pages(). This accepts the same flags
+ * and will fall back to the locked variant to handle allocation.
+ */
+int
+vm_page_grab_pages_unlocked(vm_object_t object, vm_pindex_t pindex,
+ int allocflags, vm_page_t *ma, int count)
+{
+ vm_page_t m, pred;
+ int flags;
+ int i;
+
+ vm_page_grab_check(allocflags);
+
+ /*
+ * Modify flags for lockless acquire to hold the page until we
+ * set it valid if necessary.
+ */
+ flags = allocflags & ~VM_ALLOC_NOBUSY;
+ pred = NULL;
+ for (i = 0; i < count; i++, pindex++) {
+ if (!vm_page_acquire_unlocked(object, pindex, pred, &m, flags))
+ return (i);
+ if (m == NULL)
+ break;
+ if ((flags & VM_ALLOC_ZERO) != 0 && vm_page_none_valid(m)) {
+ if ((m->flags & PG_ZERO) == 0)
+ pmap_zero_page(m);
+ vm_page_valid(m);
+ }
+ /* m will still be wired or busy according to flags. */
+ vm_page_grab_release(m, allocflags);
+ pred = ma[i] = m;
+ }
+ if ((allocflags & VM_ALLOC_NOCREAT) != 0)
+ return (i);
+ count -= i;
+ VM_OBJECT_WLOCK(object);
+ i += vm_page_grab_pages(object, pindex, allocflags, &ma[i], count);
+ VM_OBJECT_WUNLOCK(object);
+
+ return (i);
+}
+
+/*
* Mapping function for valid or dirty bits in a page.
*
* Inputs are required to range within a page.