aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Johnston <markj@FreeBSD.org>2019-07-29 22:01:28 +0000
committerMark Johnston <markj@FreeBSD.org>2019-07-29 22:01:28 +0000
commit98549e2dc6fb0c38fef2a5357b10c4eb99674d9d (patch)
tree936bb59b20e13c4792fdc1b407ad6aee087773c8
parent724450761630cc0b3d8991ec2de00a8ceb507384 (diff)
downloadsrc-98549e2dc6fb0c38fef2a5357b10c4eb99674d9d.tar.gz
src-98549e2dc6fb0c38fef2a5357b10c4eb99674d9d.zip
Centralize the logic in vfs_vmio_unwire() and sendfile_free_page().
Both of these functions atomically unwire a page, optionally attempt to free the page, and enqueue or requeue the page. Add functions vm_page_release() and vm_page_release_locked() to perform the same task. The latter must be called with the page's object lock held. As a side effect of this refactoring, the buffer cache will no longer attempt to free mapped pages when completing direct I/O. This is consistent with the handling of pages by sendfile(SF_NOCACHE). Reviewed by: alc, kib MFC after: 2 weeks Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D20986
Notes
Notes: svn path=/head/; revision=350431
-rw-r--r--sys/kern/kern_sendfile.c70
-rw-r--r--sys/kern/vfs_bio.c62
-rw-r--r--sys/vm/vm_page.c97
-rw-r--r--sys/vm/vm_page.h7
4 files changed, 108 insertions, 128 deletions
diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c
index 169c5d604787..543c9f209266 100644
--- a/sys/kern/kern_sendfile.c
+++ b/sys/kern/kern_sendfile.c
@@ -121,76 +121,22 @@ sfstat_sysctl(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW,
NULL, 0, sfstat_sysctl, "I", "sendfile statistics");
-/*
- * Detach mapped page and release resources back to the system. Called
- * by mbuf(9) code when last reference to a page is freed.
- */
-static void
-sendfile_free_page(vm_page_t pg, bool nocache)
-{
- bool freed;
-
- vm_page_lock(pg);
- /*
- * In either case check for the object going away on us. This can
- * happen since we don't hold a reference to it. If so, we're
- * responsible for freeing the page. In 'noncache' case try to free
- * the page, but only if it is cheap to.
- */
- if (vm_page_unwire_noq(pg)) {
- vm_object_t obj;
-
- if ((obj = pg->object) == NULL)
- vm_page_free(pg);
- else {
- freed = false;
- if (nocache && !vm_page_xbusied(pg) &&
- VM_OBJECT_TRYWLOCK(obj)) {
- /* Only free unmapped pages. */
- if (obj->ref_count == 0 ||
- !pmap_page_is_mapped(pg))
- /*
- * The busy test before the object is
- * locked cannot be relied upon.
- */
- freed = vm_page_try_to_free(pg);
- VM_OBJECT_WUNLOCK(obj);
- }
- if (!freed) {
- /*
- * If we were asked to not cache the page, place
- * it near the head of the inactive queue so
- * that it is reclaimed sooner. Otherwise,
- * maintain LRU.
- */
- if (nocache)
- vm_page_deactivate_noreuse(pg);
- else if (vm_page_active(pg))
- vm_page_reference(pg);
- else
- vm_page_deactivate(pg);
- }
- }
- }
- vm_page_unlock(pg);
-}
-
static void
sendfile_free_mext(struct mbuf *m)
{
struct sf_buf *sf;
vm_page_t pg;
- bool nocache;
+ int flags;
KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_SFBUF,
("%s: m %p !M_EXT or !EXT_SFBUF", __func__, m));
sf = m->m_ext.ext_arg1;
pg = sf_buf_page(sf);
- nocache = m->m_ext.ext_flags & EXT_FLAG_NOCACHE;
+ flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0;
sf_buf_free(sf);
- sendfile_free_page(pg, nocache);
+ vm_page_release(pg, flags);
if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
struct sendfile_sync *sfs = m->m_ext.ext_arg2;
@@ -208,21 +154,21 @@ sendfile_free_mext_pg(struct mbuf *m)
{
struct mbuf_ext_pgs *ext_pgs;
vm_page_t pg;
- int i;
- bool nocache, cache_last;
+ int flags, i;
+ bool cache_last;
KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_PGS,
("%s: m %p !M_EXT or !EXT_PGS", __func__, m));
- nocache = m->m_ext.ext_flags & EXT_FLAG_NOCACHE;
cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST;
ext_pgs = m->m_ext.ext_pgs;
+ flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0;
for (i = 0; i < ext_pgs->npgs; i++) {
if (cache_last && i == ext_pgs->npgs - 1)
- nocache = false;
+ flags = 0;
pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
- sendfile_free_page(pg, nocache);
+ vm_page_release(pg, flags);
}
if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index e6cb88447061..9dda13677a30 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -2895,47 +2895,6 @@ vfs_vmio_iodone(struct buf *bp)
}
/*
- * Unwire a page held by a buf and either free it or update the page queues to
- * reflect its recent use.
- */
-static void
-vfs_vmio_unwire(struct buf *bp, vm_page_t m)
-{
- bool freed;
-
- vm_page_lock(m);
- if (vm_page_unwire_noq(m)) {
- if ((bp->b_flags & B_DIRECT) != 0)
- freed = vm_page_try_to_free(m);
- else
- freed = false;
- if (!freed) {
- /*
- * Use a racy check of the valid bits to determine
- * whether we can accelerate reclamation of the page.
- * The valid bits will be stable unless the page is
- * being mapped or is referenced by multiple buffers,
- * and in those cases we expect races to be rare. At
- * worst we will either accelerate reclamation of a
- * valid page and violate LRU, or unnecessarily defer
- * reclamation of an invalid page.
- *
- * The B_NOREUSE flag marks data that is not expected to
- * be reused, so accelerate reclamation in that case
- * too. Otherwise, maintain LRU.
- */
- if (m->valid == 0 || (bp->b_flags & B_NOREUSE) != 0)
- vm_page_deactivate_noreuse(m);
- else if (vm_page_active(m))
- vm_page_reference(m);
- else
- vm_page_deactivate(m);
- }
- }
- vm_page_unlock(m);
-}
-
-/*
* Perform page invalidation when a buffer is released. The fully invalid
* pages will be reclaimed later in vfs_vmio_truncate().
*/
@@ -2944,7 +2903,7 @@ vfs_vmio_invalidate(struct buf *bp)
{
vm_object_t obj;
vm_page_t m;
- int i, resid, poffset, presid;
+ int flags, i, resid, poffset, presid;
if (buf_mapped(bp)) {
BUF_CHECK_MAPPED(bp);
@@ -2963,6 +2922,7 @@ vfs_vmio_invalidate(struct buf *bp)
*
* See man buf(9) for more information
*/
+ flags = (bp->b_flags & B_NOREUSE) != 0 ? VPR_NOREUSE : 0;
obj = bp->b_bufobj->bo_object;
resid = bp->b_bufsize;
poffset = bp->b_offset & PAGE_MASK;
@@ -2984,7 +2944,7 @@ vfs_vmio_invalidate(struct buf *bp)
}
if (pmap_page_wired_mappings(m) == 0)
vm_page_set_invalid(m, poffset, presid);
- vfs_vmio_unwire(bp, m);
+ vm_page_release_locked(m, flags);
resid -= presid;
poffset = 0;
}
@@ -3000,7 +2960,7 @@ vfs_vmio_truncate(struct buf *bp, int desiredpages)
{
vm_object_t obj;
vm_page_t m;
- int i;
+ int flags, i;
if (bp->b_npages == desiredpages)
return;
@@ -3015,14 +2975,22 @@ vfs_vmio_truncate(struct buf *bp, int desiredpages)
/*
* The object lock is needed only if we will attempt to free pages.
*/
- obj = (bp->b_flags & B_DIRECT) != 0 ? bp->b_bufobj->bo_object : NULL;
- if (obj != NULL)
+ flags = (bp->b_flags & B_NOREUSE) != 0 ? VPR_NOREUSE : 0;
+ if ((bp->b_flags & B_DIRECT) != 0) {
+ flags |= VPR_TRYFREE;
+ obj = bp->b_bufobj->bo_object;
VM_OBJECT_WLOCK(obj);
+ } else {
+ obj = NULL;
+ }
for (i = desiredpages; i < bp->b_npages; i++) {
m = bp->b_pages[i];
KASSERT(m != bogus_page, ("allocbuf: bogus page found"));
bp->b_pages[i] = NULL;
- vfs_vmio_unwire(bp, m);
+ if (obj != NULL)
+ vm_page_release_locked(m, flags);
+ else
+ vm_page_release(m, flags);
}
if (obj != NULL)
VM_OBJECT_WUNLOCK(obj);
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 3d80dcda16d0..26398a7a7408 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -3747,29 +3747,92 @@ vm_page_unswappable(vm_page_t m)
vm_page_enqueue(m, PQ_UNSWAPPABLE);
}
+static void
+vm_page_release_toq(vm_page_t m, int flags)
+{
+
+ /*
+ * Use a check of the valid bits to determine whether we should
+ * accelerate reclamation of the page. The object lock might not be
+ * held here, in which case the check is racy. At worst we will either
+ * accelerate reclamation of a valid page and violate LRU, or
+ * unnecessarily defer reclamation of an invalid page.
+ *
+ * If we were asked to not cache the page, place it near the head of the
+ * inactive queue so that is reclaimed sooner.
+ */
+ if ((flags & (VPR_TRYFREE | VPR_NOREUSE)) != 0 || m->valid == 0)
+ vm_page_deactivate_noreuse(m);
+ else if (vm_page_active(m))
+ vm_page_reference(m);
+ else
+ vm_page_deactivate(m);
+}
+
/*
- * Attempt to free the page. If it cannot be freed, do nothing. Returns true
- * if the page is freed and false otherwise.
- *
- * The page must be managed. The page and its containing object must be
- * locked.
+ * Unwire a page and either attempt to free it or re-add it to the page queues.
*/
-bool
-vm_page_try_to_free(vm_page_t m)
+void
+vm_page_release(vm_page_t m, int flags)
+{
+ vm_object_t object;
+ bool freed;
+
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("vm_page_release: page %p is unmanaged", m));
+
+ vm_page_lock(m);
+ if (m->object != NULL)
+ VM_OBJECT_ASSERT_UNLOCKED(m->object);
+ if (vm_page_unwire_noq(m)) {
+ if ((object = m->object) == NULL) {
+ vm_page_free(m);
+ } else {
+ freed = false;
+ if ((flags & VPR_TRYFREE) != 0 && !vm_page_busied(m) &&
+ /* Depends on type stability. */
+ VM_OBJECT_TRYWLOCK(object)) {
+ /*
+ * Only free unmapped pages. The busy test from
+ * before the object was locked cannot be relied
+ * upon.
+ */
+ if ((object->ref_count == 0 ||
+ !pmap_page_is_mapped(m)) && m->dirty == 0 &&
+ !vm_page_busied(m)) {
+ vm_page_free(m);
+ freed = true;
+ }
+ VM_OBJECT_WUNLOCK(object);
+ }
+
+ if (!freed)
+ vm_page_release_toq(m, flags);
+ }
+ }
+ vm_page_unlock(m);
+}
+
+/* See vm_page_release(). */
+void
+vm_page_release_locked(vm_page_t m, int flags)
{
- vm_page_assert_locked(m);
VM_OBJECT_ASSERT_WLOCKED(m->object);
- KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m));
- if (m->dirty != 0 || vm_page_wired(m) || vm_page_busied(m))
- return (false);
- if (m->object->ref_count != 0) {
- pmap_remove_all(m);
- if (m->dirty != 0)
- return (false);
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("vm_page_release_locked: page %p is unmanaged", m));
+
+ vm_page_lock(m);
+ if (vm_page_unwire_noq(m)) {
+ if ((flags & VPR_TRYFREE) != 0 &&
+ (m->object->ref_count == 0 || !pmap_page_is_mapped(m)) &&
+ m->dirty == 0 && !vm_page_busied(m)) {
+ vm_page_free(m);
+ } else {
+ vm_page_release_toq(m, flags);
+ }
}
- vm_page_free(m);
- return (true);
+ vm_page_unlock(m);
}
/*
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 3edde63abb9b..58f181d599c5 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -562,8 +562,12 @@ bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low,
bool vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
void vm_page_reference(vm_page_t m);
+#define VPR_TRYFREE 0x01
+#define VPR_NOREUSE 0x02
+void vm_page_release(vm_page_t m, int flags);
+void vm_page_release_locked(vm_page_t m, int flags);
bool vm_page_remove(vm_page_t);
-int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
+int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t);
vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object,
vm_pindex_t pindex);
void vm_page_requeue(vm_page_t m);
@@ -574,7 +578,6 @@ void vm_page_set_valid_range(vm_page_t m, int base, int size);
int vm_page_sleep_if_busy(vm_page_t m, const char *msg);
vm_offset_t vm_page_startup(vm_offset_t vaddr);
void vm_page_sunbusy(vm_page_t m);
-bool vm_page_try_to_free(vm_page_t m);
int vm_page_trysbusy(vm_page_t m);
void vm_page_unhold_pages(vm_page_t *ma, int count);
void vm_page_unswappable(vm_page_t m);