From d6e13f3b4d5743177ac22f3642b07ba652c1f1a8 Mon Sep 17 00:00:00 2001 From: Jeff Roberson Date: Sun, 19 Jan 2020 23:47:32 +0000 Subject: Don't hold the object lock while calling getpages. The vnode pager does not want the object lock held. Moving this out allows further object lock scope reduction in callers. While here add some missing paging in progress calls and an assert. The object handle is now protected explicitly with pip. Reviewed by: kib, markj Differential Revision: https://reviews.freebsd.org/D23033 --- sys/dev/md/md.c | 11 +++++++++-- sys/fs/tmpfs/tmpfs_subr.c | 4 ++++ sys/kern/kern_sendfile.c | 8 ++++++++ sys/kern/uipc_shm.c | 4 ++++ sys/vm/device_pager.c | 3 ++- sys/vm/phys_pager.c | 1 - sys/vm/sg_pager.c | 4 ++-- sys/vm/swap_pager.c | 11 ++++++++--- sys/vm/vm_fault.c | 2 ++ sys/vm/vm_object.h | 7 +++++++ sys/vm/vm_page.c | 4 ++++ sys/vm/vm_pager.c | 7 ++++++- sys/vm/vm_swapout.c | 4 +++- sys/vm/vnode_pager.c | 5 +---- 14 files changed, 60 insertions(+), 15 deletions(-) (limited to 'sys') diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index ee7fe8f89a5d..4a07591ed51d 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -1057,11 +1057,12 @@ mdstart_swap(struct md_s *sc, struct bio *bp) lastend = (bp->bio_offset + bp->bio_length - 1) % PAGE_SIZE + 1; rv = VM_PAGER_OK; - VM_OBJECT_WLOCK(sc->object); vm_object_pip_add(sc->object, 1); for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) { len = ((i == lastp) ? lastend : PAGE_SIZE) - offs; + VM_OBJECT_WLOCK(sc->object); m = vm_page_grab(sc->object, i, VM_ALLOC_SYSTEM); + VM_OBJECT_WUNLOCK(sc->object); if (bp->bio_cmd == BIO_READ) { if (vm_page_all_valid(m)) rv = VM_PAGER_OK; @@ -1069,7 +1070,9 @@ mdstart_swap(struct md_s *sc, struct bio *bp) rv = vm_pager_get_pages(sc->object, &m, 1, NULL, NULL); if (rv == VM_PAGER_ERROR) { + VM_OBJECT_WLOCK(sc->object); vm_page_free(m); + VM_OBJECT_WUNLOCK(sc->object); break; } else if (rv == VM_PAGER_FAIL) { /* @@ -1099,7 +1102,9 @@ mdstart_swap(struct md_s *sc, struct bio *bp) rv = vm_pager_get_pages(sc->object, &m, 1, NULL, NULL); if (rv == VM_PAGER_ERROR) { + VM_OBJECT_WLOCK(sc->object); vm_page_free(m); + VM_OBJECT_WUNLOCK(sc->object); break; } else if (rv == VM_PAGER_FAIL) pmap_zero_page(m); @@ -1122,8 +1127,10 @@ mdstart_swap(struct md_s *sc, struct bio *bp) else rv = vm_pager_get_pages(sc->object, &m, 1, NULL, NULL); + VM_OBJECT_WLOCK(sc->object); if (rv == VM_PAGER_ERROR) { vm_page_free(m); + VM_OBJECT_WUNLOCK(sc->object); break; } else if (rv == VM_PAGER_FAIL) { vm_page_free(m); @@ -1139,6 +1146,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp) m = NULL; } } + VM_OBJECT_WUNLOCK(sc->object); } if (m != NULL) { vm_page_xunbusy(m); @@ -1160,7 +1168,6 @@ mdstart_swap(struct md_s *sc, struct bio *bp) ma_offs += len; } vm_object_pip_wakeup(sc->object); - VM_OBJECT_WUNLOCK(sc->object); return (rv != VM_PAGER_ERROR ? 0 : ENOSPC); } diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index 1bf1062c0270..e93c8f2192ed 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -1480,8 +1480,12 @@ retry: VM_ALLOC_WAITFAIL); if (m == NULL) goto retry; + vm_object_pip_add(uobj, 1); + VM_OBJECT_WUNLOCK(uobj); rv = vm_pager_get_pages(uobj, &m, 1, NULL, NULL); + VM_OBJECT_WLOCK(uobj); + vm_object_pip_wakeup(uobj); if (rv == VM_PAGER_OK) { /* * Since the page was not resident, diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c index ba08cab4f5ce..106019328b7e 100644 --- a/sys/kern/kern_sendfile.c +++ b/sys/kern/kern_sendfile.c @@ -89,6 +89,7 @@ struct sf_io { int npages; struct socket *so; struct mbuf *m; + vm_object_t obj; #ifdef KERN_TLS struct ktls_session *tls; #endif @@ -269,6 +270,8 @@ sendfile_iodone(void *arg, vm_page_t *pg, int count, int error) if (!refcount_release(&sfio->nios)) return; + vm_object_pip_wakeup(sfio->obj); + if (__predict_false(sfio->error && sfio->m == NULL)) { /* * I/O operation failed, but pru_send hadn't been executed - @@ -421,9 +424,11 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, int *nios, off_t off, } refcount_acquire(&sfio->nios); + VM_OBJECT_WUNLOCK(obj); rv = vm_pager_get_pages_async(obj, pa + i, count, NULL, i + count == npages ? &rhpages : NULL, &sendfile_iodone, sfio); + VM_OBJECT_WLOCK(obj); if (__predict_false(rv != VM_PAGER_OK)) { /* * Perform full pages recovery before returning EIO. @@ -815,7 +820,9 @@ retry_space: npages * sizeof(vm_page_t), M_TEMP, M_WAITOK); refcount_init(&sfio->nios, 1); sfio->so = so; + sfio->obj = obj; sfio->error = 0; + vm_object_pip_add(obj, 1); #ifdef KERN_TLS /* @@ -1053,6 +1060,7 @@ prepend_header: * we can send data right now without the * PRUS_NOTREADY flag. */ + vm_object_pip_wakeup(sfio->obj); free(sfio, M_TEMP); #ifdef KERN_TLS if (tls != NULL && tls->mode == TCP_TLS_MODE_SW) { diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index f6ae4fb58bb6..755c5f56744c 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -504,8 +504,12 @@ retry: VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL); if (m == NULL) goto retry; + vm_object_pip_add(object, 1); + VM_OBJECT_WUNLOCK(object); rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); + VM_OBJECT_WLOCK(object); + vm_object_pip_wakeup(object); if (rv == VM_PAGER_OK) { /* * Since the page was not resident, diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index a38f11277c36..40b32e9bad74 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -289,9 +289,9 @@ dev_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind, /* Since our haspage reports zero after/before, the count is 1. */ KASSERT(count == 1, ("%s: count %d", __func__, count)); - VM_OBJECT_ASSERT_WLOCKED(object); if (object->un_pager.devp.ops->cdev_pg_fault == NULL) return (VM_PAGER_FAIL); + VM_OBJECT_WLOCK(object); error = object->un_pager.devp.ops->cdev_pg_fault(object, IDX_TO_OFF(ma[0]->pindex), PROT_READ, &ma[0]); @@ -312,6 +312,7 @@ dev_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind, if (rahead) *rahead = 0; } + VM_OBJECT_WUNLOCK(object); return (error); } diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c index 7da2adf823d1..1fbc87bb95df 100644 --- a/sys/vm/phys_pager.c +++ b/sys/vm/phys_pager.c @@ -143,7 +143,6 @@ phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, { int i; - VM_OBJECT_ASSERT_WLOCKED(object); for (i = 0; i < count; i++) { if (vm_page_none_valid(m[i])) { if ((m[i]->flags & PG_ZERO) == 0) diff --git a/sys/vm/sg_pager.c b/sys/vm/sg_pager.c index f5e211ab9bc6..8faaad724f01 100644 --- a/sys/vm/sg_pager.c +++ b/sys/vm/sg_pager.c @@ -155,10 +155,9 @@ sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, /* Since our haspage reports zero after/before, the count is 1. */ KASSERT(count == 1, ("%s: count %d", __func__, count)); - VM_OBJECT_ASSERT_WLOCKED(object); + /* Handle is stable while paging is in progress. */ sg = object->handle; memattr = object->memattr; - VM_OBJECT_WUNLOCK(object); offset = m[0]->pindex; /* @@ -196,6 +195,7 @@ sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, VM_OBJECT_WLOCK(object); TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, plinks.q); vm_page_replace(page, object, offset, m[0]); + VM_OBJECT_WUNLOCK(object); m[0] = page; vm_page_valid(page); diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 3347536e54a6..98bc52693fef 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1197,12 +1197,15 @@ swap_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind, daddr_t blk; int i, maxahead, maxbehind, reqcount; + VM_OBJECT_WLOCK(object); reqcount = count; KASSERT(object->type == OBJT_SWAP, ("%s: object not swappable", __func__)); - if (!swap_pager_haspage(object, ma[0]->pindex, &maxbehind, &maxahead)) + if (!swap_pager_haspage(object, ma[0]->pindex, &maxbehind, &maxahead)) { + VM_OBJECT_WUNLOCK(object); return (VM_PAGER_FAIL); + } KASSERT(reqcount - 1 <= maxahead, ("page count %d extends beyond swap block", reqcount)); @@ -1319,6 +1322,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind, * is set in the metadata for each page in the request. */ VM_OBJECT_WLOCK(object); + /* This could be implemented more efficiently with aflags */ while ((ma[0]->oflags & VPO_SWAPINPROG) != 0) { ma[0]->oflags |= VPO_SWAPSLEEP; VM_CNT_INC(v_intrans); @@ -1329,6 +1333,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind, bp->b_bufobj, (intmax_t)bp->b_blkno, bp->b_bcount); } } + VM_OBJECT_WUNLOCK(object); /* * If we had an unrecoverable read error pages will not be valid. @@ -1360,7 +1365,6 @@ swap_pager_getpages_async(vm_object_t object, vm_page_t *ma, int count, int r, error; r = swap_pager_getpages(object, ma, count, rbehind, rahead); - VM_OBJECT_WUNLOCK(object); switch (r) { case VM_PAGER_OK: error = 0; @@ -1375,7 +1379,6 @@ swap_pager_getpages_async(vm_object_t object, vm_page_t *ma, int count, panic("unhandled swap_pager_getpages() error %d", r); } (iodone)(arg, ma, count, error); - VM_OBJECT_WLOCK(object); return (r); } @@ -1756,10 +1759,12 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex, int npages) if (i < npages && ma[i]->valid != VM_PAGE_BITS_ALL) continue; if (j < i) { + VM_OBJECT_WUNLOCK(object); /* Page-in nonresident pages. Mark for laundering. */ if (swap_pager_getpages(object, &ma[j], i - j, NULL, NULL) != VM_PAGER_OK) panic("%s: read from swap failed", __func__); + VM_OBJECT_WLOCK(object); do { swp_pager_force_launder(ma[j]); } while (++j < i); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index da962038b06e..1bb636d9ee9c 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -1080,8 +1080,10 @@ readrest: } ahead = ulmin(ahead, atop(e_end - vaddr) - 1); } + VM_OBJECT_WUNLOCK(fs.object); rv = vm_pager_get_pages(fs.object, &fs.m, 1, &behind, &ahead); + VM_OBJECT_WLOCK(fs.object); if (rv == VM_PAGER_OK) { faultcount = behind + 1 + ahead; hardfault = true; diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 780f7c7e83b7..dd0314bdb05e 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -264,6 +264,13 @@ extern struct vm_object kernel_object_store; #define VM_OBJECT_PICKUP(object, state) \ lock_class_rw.lc_lock(&(object)->lock.lock_object, (state)) +#define VM_OBJECT_ASSERT_PAGING(object) \ + KASSERT((object)->paging_in_progress != 0, \ + ("vm_object %p is not paging", object)) +#define VM_OBJECT_ASSERT_REFERENCE(object) \ + KASSERT((object)->reference_count != 0, \ + ("vm_object %p is not referenced", object)) + struct vnode; /* diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 51d1f0059a0d..dc137e82aaf2 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -4398,7 +4398,11 @@ retrylookup: } } after = i; + vm_object_pip_add(object, after); + VM_OBJECT_WUNLOCK(object); rv = vm_pager_get_pages(object, ma, after, NULL, NULL); + VM_OBJECT_WLOCK(object); + vm_object_pip_wakeupn(object, after); /* Pager may have replaced a page. */ m = ma[0]; if (rv != VM_PAGER_OK) { diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index f9b1e252d6fd..31890cd8d40c 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -263,7 +263,8 @@ vm_pager_assert_in(vm_object_t object, vm_page_t *m, int count) * bogus page, but the first and last pages must be a real ones. */ - VM_OBJECT_ASSERT_WLOCKED(object); + VM_OBJECT_ASSERT_UNLOCKED(object); + VM_OBJECT_ASSERT_PAGING(object); KASSERT(count > 0, ("%s: 0 count", __func__)); for (int i = 0 ; i < count; i++) { if (m[i] == bogus_page) { @@ -311,9 +312,13 @@ vm_pager_get_pages(vm_object_t object, vm_page_t *m, int count, int *rbehind, * If pager has replaced a page, assert that it had * updated the array. */ +#ifdef INVARIANTS + VM_OBJECT_RLOCK(object); KASSERT(m[i] == vm_page_lookup(object, pindex++), ("%s: mismatch page %p pindex %ju", __func__, m[i], (uintmax_t )pindex - 1)); + VM_OBJECT_RUNLOCK(object); +#endif /* * Zero out partially filled data. */ diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c index f52a9cc26b47..8a7c6f57588b 100644 --- a/sys/vm/vm_swapout.c +++ b/sys/vm/vm_swapout.c @@ -560,6 +560,7 @@ vm_thread_swapin(struct thread *td, int oom_alloc) VM_OBJECT_WLOCK(ksobj); (void)vm_page_grab_pages(ksobj, 0, oom_alloc | VM_ALLOC_WIRED, ma, pages); + VM_OBJECT_WUNLOCK(ksobj); for (i = 0; i < pages;) { vm_page_assert_xbusied(ma[i]); if (vm_page_all_valid(ma[i])) { @@ -571,7 +572,9 @@ vm_thread_swapin(struct thread *td, int oom_alloc) for (j = i + 1; j < pages; j++) if (vm_page_all_valid(ma[j])) break; + VM_OBJECT_WLOCK(ksobj); rv = vm_pager_has_page(ksobj, ma[i]->pindex, NULL, &a); + VM_OBJECT_WUNLOCK(ksobj); KASSERT(rv == 1, ("%s: missing page %p", __func__, ma[i])); count = min(a + 1, j - i); rv = vm_pager_get_pages(ksobj, ma + i, count, NULL, NULL); @@ -582,7 +585,6 @@ vm_thread_swapin(struct thread *td, int oom_alloc) vm_page_xunbusy(ma[j]); i += count; } - VM_OBJECT_WUNLOCK(ksobj); pmap_qenter(td->td_kstack, ma, pages); cpu_thread_swapin(td); } diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index ad7677ab2678..82baefe612c5 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -735,12 +735,11 @@ vnode_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, struct vnode *vp; int rtval; + /* Handle is stable with paging in progress. */ vp = object->handle; - VM_OBJECT_WUNLOCK(object); rtval = VOP_GETPAGES(vp, m, count, rbehind, rahead); KASSERT(rtval != EOPNOTSUPP, ("vnode_pager: FS getpages not implemented\n")); - VM_OBJECT_WLOCK(object); return rtval; } @@ -752,11 +751,9 @@ vnode_pager_getpages_async(vm_object_t object, vm_page_t *m, int count, int rtval; vp = object->handle; - VM_OBJECT_WUNLOCK(object); rtval = VOP_GETPAGES_ASYNC(vp, m, count, rbehind, rahead, iodone, arg); KASSERT(rtval != EOPNOTSUPP, ("vnode_pager: FS getpages_async not implemented\n")); - VM_OBJECT_WLOCK(object); return (rtval); } -- cgit v1.2.3