src - FreeBSD source tree

diff options


context:
space:
mode:

author	Mark Johnston <markj@FreeBSD.org>	2019-09-16 15:03:12 +0000
committer	Mark Johnston <markj@FreeBSD.org>	2019-09-16 15:03:12 +0000
commit	41fd4b9422e37d764a0a6fe2f7f2cda3a523d822 (patch)
tree	a0d6cb9b95485038424eb7e4b81d94481f4c5cf9 /sys
parent	42767f76af274d75d648b25913c67fca5e786aca (diff)
download	src-41fd4b9422e37d764a0a6fe2f7f2cda3a523d822.tar.gz src-41fd4b9422e37d764a0a6fe2f7f2cda3a523d822.zip

Fix a couple of nits in r352110.

- Remove a dead variable from the amd64 pmap_extract_and_hold(). - Fix grammar in the vm_page_wire man page. Reported by: alc Reviewed by: alc, kib Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D21639

Notes

Notes: svn path=/head/; revision=352406

Diffstat (limited to 'sys')

-rw-r--r--

sys/amd64/amd64/pmap.c

-rw-r--r--

sys/amd64/include/pmap.h

-rw-r--r--

sys/arm/arm/pmap-v4.c

-rw-r--r--

sys/arm/arm/pmap-v6.c

-rw-r--r--

sys/arm64/arm64/pmap.c

-rw-r--r--

sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c

-rw-r--r--

sys/dev/virtio/balloon/virtio_balloon.c

-rw-r--r--

sys/i386/i386/pmap.c

-rw-r--r--

sys/mips/mips/pmap.c

-rw-r--r--

sys/powerpc/aim/mmu_oea.c

-rw-r--r--

sys/powerpc/aim/mmu_oea64.c

-rw-r--r--

sys/powerpc/booke/pmap.c

-rw-r--r--

sys/riscv/riscv/pmap.c

-rw-r--r--

sys/sparc64/sparc64/pmap.c

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

720

-rw-r--r--

sys/vm/vm_page.h

157

-rw-r--r--

sys/vm/vm_pageout.c

554

-rw-r--r--

sys/vm/vm_pagequeue.h

-rw-r--r--

sys/vm/vm_swapout.c

23 files changed, 806 insertions, 859 deletions

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index f2ebee2ca550..c4e4762fe751 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c

@@ -3064,10 +3064,8 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)

{

pd_entry_t pde, *pdep;

pt_entry_t pte, PG_RW, PG_V;

- vm_paddr_t pa;

vm_page_t m;

- pa = 0;

m = NULL;

PG_RW = pmap_rw_bit(pmap);

PG_V = pmap_valid_bit(pmap);

@@ -5806,7 +5804,7 @@ retry:

("pmap_enter: no PV entry for %#lx", va));

if ((newpte & PG_MANAGED) == 0)

free_pv_entry(pmap, pv);

- if ((om->aflags & PGA_WRITEABLE) != 0 &&

+ if ((vm_page_aflags(om) & PGA_WRITEABLE) != 0 &&

TAILQ_EMPTY(&om->md.pv_list) &&

((om->flags & PG_FICTITIOUS) != 0 ||

TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))

@@ -6989,7 +6987,7 @@ pmap_remove_pages(pmap_t pmap)

pvh->pv_gen++;

if (TAILQ_EMPTY(&pvh->pv_list)) {

for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)

- if ((mt->aflags & PGA_WRITEABLE) != 0 &&

+ if ((vm_page_aflags(mt) & PGA_WRITEABLE) != 0 &&

TAILQ_EMPTY(&mt->md.pv_list))

vm_page_aflag_clear(mt, PGA_WRITEABLE);

}

@@ -7007,7 +7005,7 @@ pmap_remove_pages(pmap_t pmap)

pmap_resident_count_dec(pmap, 1);

TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);

m->md.pv_gen++;

- if ((m->aflags & PGA_WRITEABLE) != 0 &&

+ if ((vm_page_aflags(m) & PGA_WRITEABLE) != 0 &&

TAILQ_EMPTY(&m->md.pv_list) &&

(m->flags & PG_FICTITIOUS) == 0) {

pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));

@@ -7138,7 +7136,7 @@ pmap_is_modified(vm_page_t m)

* is clear, no PTEs can have PG_M set.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return (FALSE);

return (pmap_page_test_mappings(m, FALSE, TRUE));

}

@@ -7207,7 +7205,7 @@ pmap_remove_write(vm_page_t m)

* if PGA_WRITEABLE is clear, no page table entries need updating.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

lock = VM_PAGE_TO_PV_LIST_LOCK(m);

pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :

@@ -7690,7 +7688,7 @@ pmap_clear_modify(vm_page_t m)

* If the object containing the page is locked and the page is not

* exclusive busied, then PGA_WRITEABLE cannot be concurrently set.

- if ((m->aflags & PGA_WRITEABLE) == 0)

+ if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :

pa_to_pvh(VM_PAGE_TO_PHYS(m));

diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index b0a15a1bab86..ac8ed1c88063 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h

@@ -423,7 +423,8 @@ extern int pmap_pcid_enabled;

extern int invpcid_works;

#define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode)

-#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0)

+#define pmap_page_is_write_mapped(m) \

+ (((m)->astate.flags & PGA_WRITEABLE) != 0)

#define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz))

struct thread;

diff --git a/sys/arm/arm/pmap-v4.c b/sys/arm/arm/pmap-v4.c
index e1f411ccc832..e746d66f9bf2 100644
--- a/sys/arm/arm/pmap-v4.c
+++ b/sys/arm/arm/pmap-v4.c

@@ -4104,7 +4104,7 @@ pmap_clear_modify(vm_page_t m)

* If the object containing the page is locked and the page is not

* exclusive busied, then PGA_WRITEABLE cannot be concurrently set.

- if ((m->aflags & PGA_WRITEABLE) == 0)

+ if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

if (m->md.pvh_attrs & PVF_MOD)

pmap_clearbit(m, PVF_MOD);

@@ -4143,7 +4143,7 @@ pmap_remove_write(vm_page_t m)

* if PGA_WRITEABLE is clear, no page table entries need updating.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (vm_page_xbusied(m) || (m->aflags & PGA_WRITEABLE) != 0)

+ if (vm_page_xbusied(m) || (vm_page_aflags(m) & PGA_WRITEABLE) != 0)

pmap_clearbit(m, PVF_WRITE);

}

diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c
index 1d82ebf48cb2..2ad04723a7c7 100644
--- a/sys/arm/arm/pmap-v6.c
+++ b/sys/arm/arm/pmap-v6.c

@@ -5197,7 +5197,7 @@ pmap_is_modified(vm_page_t m)

* is clear, no PTE2s can have PG_M set.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return (FALSE);

rw_wlock(&pvh_global_lock);

rv = pmap_is_modified_pvh(&m->md) ||

@@ -5540,7 +5540,7 @@ pmap_remove_write(vm_page_t m)

* if PGA_WRITEABLE is clear, no page table entries need updating.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && !pmap_page_is_write_mapped(m))

return;

rw_wlock(&pvh_global_lock);

sched_pin();

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 919537e86b84..c5063828d6a1 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c

@@ -3333,7 +3333,7 @@ havel3:

pv = pmap_pvh_remove(&om->md, pmap, va);

if ((m->oflags & VPO_UNMANAGED) != 0)

free_pv_entry(pmap, pv);

- if ((om->aflags & PGA_WRITEABLE) != 0 &&

+ if ((vm_page_aflags(om) & PGA_WRITEABLE) != 0 &&

TAILQ_EMPTY(&om->md.pv_list) &&

((om->flags & PG_FICTITIOUS) != 0 ||

TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))

@@ -4372,7 +4372,7 @@ pmap_remove_pages(pmap_t pmap)

pvh->pv_gen++;

if (TAILQ_EMPTY(&pvh->pv_list)) {

for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)

- if ((mt->aflags & PGA_WRITEABLE) != 0 &&

+ if (vm_page_aflags(mt) & PGA_WRITEABLE) != 0 &&

TAILQ_EMPTY(&mt->md.pv_list))

vm_page_aflag_clear(mt, PGA_WRITEABLE);

}

@@ -4394,7 +4394,7 @@ pmap_remove_pages(pmap_t pmap)

TAILQ_REMOVE(&m->md.pv_list, pv,

pv_next);

m->md.pv_gen++;

- if ((m->aflags & PGA_WRITEABLE) != 0 &&

+ if (vm_page_aflags(m) & PGA_WRITEABLE) != 0 &&

TAILQ_EMPTY(&m->md.pv_list) &&

(m->flags & PG_FICTITIOUS) == 0) {

pvh = pa_to_pvh(

@@ -4534,7 +4534,7 @@ pmap_is_modified(vm_page_t m)

* is clear, no PTEs can have PG_M set.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return (FALSE);

return (pmap_page_test_mappings(m, FALSE, TRUE));

}

@@ -4600,7 +4600,7 @@ pmap_remove_write(vm_page_t m)

* if PGA_WRITEABLE is clear, no page table entries need updating.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

lock = VM_PAGE_TO_PV_LIST_LOCK(m);

pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :

@@ -4977,7 +4977,7 @@ pmap_clear_modify(vm_page_t m)

* set. If the object containing the page is locked and the page is not

* exclusive busied, then PGA_WRITEABLE cannot be concurrently set.

- if ((m->aflags & PGA_WRITEABLE) == 0)

+ if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :

pa_to_pvh(VM_PAGE_TO_PHYS(m));

diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
index 59147515097f..9afc7db022b2 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c

@@ -1718,12 +1718,10 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,

bcopy((char *)db->db_data + bufoff, va, PAGESIZE);

zfs_unmap_page(sf);

m->valid = VM_PAGE_BITS_ALL;

- vm_page_lock(m);

if ((m->busy_lock & VPB_BIT_WAITERS) != 0)

vm_page_activate(m);

else

vm_page_deactivate(m);

- vm_page_unlock(m);

}

*rbehind = i;

@@ -1838,12 +1836,10 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,

}

zfs_unmap_page(sf);

m->valid = VM_PAGE_BITS_ALL;

- vm_page_lock(m);

if ((m->busy_lock & VPB_BIT_WAITERS) != 0)

vm_page_activate(m);

else

vm_page_deactivate(m);

- vm_page_unlock(m);

}

*rahead = i;

zfs_vmobject_wunlock(vmobj);

diff --git a/sys/dev/virtio/balloon/virtio_balloon.c b/sys/dev/virtio/balloon/virtio_balloon.c
index 060d6d68afc7..32b9b41b8d94 100644
--- a/sys/dev/virtio/balloon/virtio_balloon.c
+++ b/sys/dev/virtio/balloon/virtio_balloon.c

@@ -332,8 +332,6 @@ vtballoon_inflate(struct vtballoon_softc *sc, int npages)

sc->vtballoon_page_frames[i] =

VM_PAGE_TO_PHYS(m) >> VIRTIO_BALLOON_PFN_SHIFT;

- KASSERT(m->queue == PQ_NONE,

- ("%s: allocated page %p on queue", __func__, m));

TAILQ_INSERT_TAIL(&sc->vtballoon_pages, m, plinks.q);

}

diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index f07f500e8977..3e2748ad1c88 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c

@@ -3752,7 +3752,7 @@ __CONCAT(PMTYPE, enter)(pmap_t pmap, vm_offset_t va, vm_page_t m,

("pmap_enter: no PV entry for %#x", va));

if ((newpte & PG_MANAGED) == 0)

free_pv_entry(pmap, pv);

- if ((om->aflags & PGA_WRITEABLE) != 0 &&

+ if ((vm_page_aflags(om) & PGA_WRITEABLE) != 0 &&

TAILQ_EMPTY(&om->md.pv_list) &&

((om->flags & PG_FICTITIOUS) != 0 ||

TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))

@@ -4848,7 +4848,7 @@ __CONCAT(PMTYPE, is_modified)(vm_page_t m)

* is clear, no PTEs can have PG_M set.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return (FALSE);

rw_wlock(&pvh_global_lock);

rv = pmap_is_modified_pvh(&m->md) ||

@@ -4979,7 +4979,7 @@ __CONCAT(PMTYPE, remove_write)(vm_page_t m)

* if PGA_WRITEABLE is clear, no page table entries need updating.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

rw_wlock(&pvh_global_lock);

sched_pin();

@@ -5291,7 +5291,7 @@ __CONCAT(PMTYPE, clear_modify)(vm_page_t m)

* If the object containing the page is locked and the page is not

* exclusive busied, then PGA_WRITEABLE cannot be concurrently set.

- if ((m->aflags & PGA_WRITEABLE) == 0)

+ if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

rw_wlock(&pvh_global_lock);

sched_pin();

diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c
index 072618f793a0..571fe83397bd 100644
--- a/sys/mips/mips/pmap.c
+++ b/sys/mips/mips/pmap.c

@@ -2164,7 +2164,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,

pv = pmap_pvh_remove(&om->md, pmap, va);

if (!pte_test(&newpte, PTE_MANAGED))

free_pv_entry(pmap, pv);

- if ((om->aflags & PGA_WRITEABLE) != 0 &&

+ if (vm_page_aflags(m) & PGA_WRITEABLE) != 0 &&

TAILQ_EMPTY(&om->md.pv_list))

vm_page_aflag_clear(om, PGA_WRITEABLE);

}

@@ -2934,7 +2934,7 @@ pmap_remove_write(vm_page_t m)

* if PGA_WRITEABLE is clear, no page table entries need updating.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

rw_wlock(&pvh_global_lock);

TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {

@@ -2999,7 +2999,7 @@ pmap_is_modified(vm_page_t m)

* is clear, no PTEs can have PTE_D set.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return (FALSE);

rw_wlock(&pvh_global_lock);

rv = pmap_testbit(m, PTE_D);

@@ -3143,7 +3143,7 @@ pmap_clear_modify(vm_page_t m)

* If the object containing the page is locked and the page is not

* write busied, then PGA_WRITEABLE cannot be concurrently set.

- if ((m->aflags & PGA_WRITEABLE) == 0)

+ if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

rw_wlock(&pvh_global_lock);

TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {

@@ -3270,7 +3270,7 @@ retry:

* determine if the address is MINCORE_REFERENCED.

m = PHYS_TO_VM_PAGE(pa);

- if ((m->aflags & PGA_REFERENCED) != 0)

+ if ((vm_page_aflags(m) & PGA_REFERENCED) != 0)

val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;

}

if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=

diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c
index 9eacac27707b..5ef269db98cc 100644
--- a/sys/powerpc/aim/mmu_oea.c
+++ b/sys/powerpc/aim/mmu_oea.c

@@ -1319,7 +1319,7 @@ moea_is_modified(mmu_t mmu, vm_page_t m)

* is clear, no PTEs can have PTE_CHG set.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return (FALSE);

rw_wlock(&pvh_global_lock);

rv = moea_query_bit(m, PTE_CHG);

@@ -1355,7 +1355,7 @@ moea_clear_modify(mmu_t mmu, vm_page_t m)

* set. If the object containing the page is locked and the page is

* not exclusive busied, then PGA_WRITEABLE cannot be concurrently set.

- if ((m->aflags & PGA_WRITEABLE) == 0)

+ if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

rw_wlock(&pvh_global_lock);

moea_clear_bit(m, PTE_CHG);

@@ -1382,7 +1382,7 @@ moea_remove_write(mmu_t mmu, vm_page_t m)

* if PGA_WRITEABLE is clear, no page table entries need updating.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

rw_wlock(&pvh_global_lock);

lo = moea_attr_fetch(m);

@@ -1915,7 +1915,8 @@ moea_remove_all(mmu_t mmu, vm_page_t m)

moea_pvo_remove(pvo, -1);

PMAP_UNLOCK(pmap);

}

- if ((m->aflags & PGA_WRITEABLE) && moea_query_bit(m, PTE_CHG)) {

+ if ((vm_page_aflags(m) & PGA_WRITEABLE) != 0 &&

+ moea_query_bit(m, PTE_CHG)) {

moea_attr_clear(m, PTE_CHG);

vm_page_dirty(m);

}

diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c
index 7ad86d5f1896..6361938e0dc9 100644
--- a/sys/powerpc/aim/mmu_oea64.c
+++ b/sys/powerpc/aim/mmu_oea64.c

@@ -1467,7 +1467,7 @@ out:

* Flush the page from the instruction cache if this page is

* mapped executable and cacheable.

- if (pmap != kernel_pmap && !(m->aflags & PGA_EXECUTABLE) &&

+ if (pmap != kernel_pmap && (vm_page_aflags(m) & PGA_EXECUTABLE) != 0 &&

(pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {

vm_page_aflag_set(m, PGA_EXECUTABLE);

moea64_syncicache(mmu, pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE);

@@ -1688,7 +1688,7 @@ moea64_is_modified(mmu_t mmu, vm_page_t m)

* is clear, no PTEs can have LPTE_CHG set.

VM_OBJECT_ASSERT_LOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return (FALSE);

return (moea64_query_bit(mmu, m, LPTE_CHG));

}

@@ -1722,7 +1722,7 @@ moea64_clear_modify(mmu_t mmu, vm_page_t m)

* set. If the object containing the page is locked and the page is

* not exclusive busied, then PGA_WRITEABLE cannot be concurrently set.

- if ((m->aflags & PGA_WRITEABLE) == 0)

+ if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

moea64_clear_bit(mmu, m, LPTE_CHG);

}

@@ -1746,7 +1746,7 @@ moea64_remove_write(mmu_t mmu, vm_page_t m)

* if PGA_WRITEABLE is clear, no page table entries need updating.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

powerpc_sync();

PV_PAGE_LOCK(m);

@@ -2240,7 +2240,8 @@ moea64_pvo_protect(mmu_t mmu, pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot)

if (refchg < 0)

refchg = (oldprot & VM_PROT_WRITE) ? LPTE_CHG : 0;

- if (pm != kernel_pmap && pg != NULL && !(pg->aflags & PGA_EXECUTABLE) &&

+ if (pm != kernel_pmap && pg != NULL &&

+ (vm_page_aflags(pg) & PGA_EXECUTABLE) == 0 &&

(pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {

if ((pg->oflags & VPO_UNMANAGED) == 0)

vm_page_aflag_set(pg, PGA_EXECUTABLE);

@@ -2454,7 +2455,8 @@ moea64_remove_all(mmu_t mmu, vm_page_t m)

}

KASSERT(!pmap_page_is_mapped(m), ("Page still has mappings"));

- KASSERT(!(m->aflags & PGA_WRITEABLE), ("Page still writable"));

+ KASSERT((vm_page_aflags(m) & PGA_WRITEABLE) == 0,

+ ("Page still writable"));

PV_PAGE_UNLOCK(m);

/* Clean up UMA allocations */

diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c
index 140b1367325f..2374d1a9ad91 100644
--- a/sys/powerpc/booke/pmap.c
+++ b/sys/powerpc/booke/pmap.c

@@ -2694,7 +2694,7 @@ mmu_booke_remove_write(mmu_t mmu, vm_page_t m)

* if PGA_WRITEABLE is clear, no page table entries need updating.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

rw_wlock(&pvh_global_lock);

TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {

@@ -3040,7 +3040,7 @@ mmu_booke_is_modified(mmu_t mmu, vm_page_t m)

* is clear, no PTEs can be modified.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return (rv);

rw_wlock(&pvh_global_lock);

TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {

@@ -3119,7 +3119,7 @@ mmu_booke_clear_modify(mmu_t mmu, vm_page_t m)

* If the object containing the page is locked and the page is not

* exclusive busied, then PG_AWRITEABLE cannot be concurrently set.

- if ((m->aflags & PGA_WRITEABLE) == 0)

+ if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

rw_wlock(&pvh_global_lock);

TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {

diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index bbda832ff885..2196a6c153af 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c

@@ -2825,7 +2825,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,

("pmap_enter: no PV entry for %#lx", va));

if ((new_l3 & PTE_SW_MANAGED) == 0)

free_pv_entry(pmap, pv);

- if ((om->aflags & PGA_WRITEABLE) != 0 &&

+ if ((vm_page_aflags(om) & PGA_WRITEABLE) == 0 &&

TAILQ_EMPTY(&om->md.pv_list))

vm_page_aflag_clear(om, PGA_WRITEABLE);

}

@@ -3556,7 +3556,7 @@ pmap_remove_pages_pv(pmap_t pmap, vm_page_t m, pv_entry_t pv,

if (TAILQ_EMPTY(&pvh->pv_list)) {

for (mt = m; mt < &m[Ln_ENTRIES]; mt++)

if (TAILQ_EMPTY(&mt->md.pv_list) &&

- (mt->aflags & PGA_WRITEABLE) != 0)

+ (vm_page_aflags(mt) & PGA_WRITEABLE) != 0)

vm_page_aflag_clear(mt, PGA_WRITEABLE);

}

mpte = pmap_remove_pt_page(pmap, pv->pv_va);

@@ -3574,7 +3574,7 @@ pmap_remove_pages_pv(pmap_t pmap, vm_page_t m, pv_entry_t pv,

TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);

m->md.pv_gen++;

if (TAILQ_EMPTY(&m->md.pv_list) &&

- (m->aflags & PGA_WRITEABLE) != 0) {

+ (vm_page_aflags(m) & PGA_WRITEABLE) != 0) {

pvh = pa_to_pvh(m->phys_addr);

if (TAILQ_EMPTY(&pvh->pv_list))

vm_page_aflag_clear(m, PGA_WRITEABLE);

@@ -3789,7 +3789,7 @@ pmap_is_modified(vm_page_t m)

* is clear, no PTEs can have PG_M set.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return (FALSE);

return (pmap_page_test_mappings(m, FALSE, TRUE));

}

@@ -3855,7 +3855,7 @@ pmap_remove_write(vm_page_t m)

* if PGA_WRITEABLE is clear, no page table entries need updating.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

lock = VM_PAGE_TO_PV_LIST_LOCK(m);

pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :

@@ -4115,7 +4115,7 @@ pmap_clear_modify(vm_page_t m)

* If the object containing the page is locked and the page is not

* exclusive busied, then PGA_WRITEABLE cannot be concurrently set.

- if ((m->aflags & PGA_WRITEABLE) == 0)

+ if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :

pa_to_pvh(VM_PAGE_TO_PHYS(m));

diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c
index 436c15623a6e..a038845e359f 100644
--- a/sys/sparc64/sparc64/pmap.c
+++ b/sys/sparc64/sparc64/pmap.c

@@ -2121,7 +2121,7 @@ pmap_is_modified(vm_page_t m)

* is clear, no TTEs can have TD_W set.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return (rv);

rw_wlock(&tte_list_global_lock);

TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {

@@ -2204,7 +2204,7 @@ pmap_clear_modify(vm_page_t m)

* If the object containing the page is locked and the page is not

* exclusive busied, then PGA_WRITEABLE cannot be concurrently set.

- if ((m->aflags & PGA_WRITEABLE) == 0)

+ if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

rw_wlock(&tte_list_global_lock);

TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {

@@ -2232,7 +2232,7 @@ pmap_remove_write(vm_page_t m)

* if PGA_WRITEABLE is clear, no page table entries need updating.

VM_OBJECT_ASSERT_WLOCKED(m->object);

- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)

+ if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0)

return;

rw_wlock(&tte_list_global_lock);

TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {

diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 4ea49c7aa4a2..249d158ca6d3 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c

@@ -1648,12 +1648,6 @@ swp_pager_force_dirty(vm_page_t m)

{

vm_page_dirty(m);

-#ifdef INVARIANTS

- vm_page_lock(m);

- if (!vm_page_wired(m) && m->queue == PQ_NONE)

- panic("page %p is neither wired nor queued", m);

- vm_page_unlock(m);

-#endif

vm_page_xunbusy(m);

swap_pager_unswapped(m);

}

diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index f3557bbde5ac..ea783fbf53b8 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c

@@ -153,9 +153,7 @@ release_page(struct faultstate *fs)

{

vm_page_xunbusy(fs->m);

- vm_page_lock(fs->m);

vm_page_deactivate(fs->m);

- vm_page_unlock(fs->m);

fs->m = NULL;

}

@@ -376,9 +374,7 @@ vm_fault_populate_cleanup(vm_object_t object, vm_pindex_t first,

for (pidx = first, m = vm_page_lookup(object, pidx);

pidx <= last; pidx++, m = vm_page_next(m)) {

vm_fault_populate_check_page(m);

- vm_page_lock(m);

vm_page_deactivate(m);

- vm_page_unlock(m);

vm_page_xunbusy(m);

}

@@ -1325,9 +1321,7 @@ readrest:

if ((fault_flags & VM_FAULT_WIRE) != 0) {

vm_page_wire(fs.m);

} else {

- vm_page_lock(fs.m);

vm_page_activate(fs.m);

- vm_page_unlock(fs.m);

}

if (m_hold != NULL) {

*m_hold = fs.m;

diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 23bdde163cec..882a77e0de30 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c

@@ -935,9 +935,9 @@ RestartScan:

* and set PGA_REFERENCED before the call to

* pmap_is_referenced().

- if ((m->aflags & PGA_REFERENCED) != 0 ||

+ if ((vm_page_aflags(m) & PGA_REFERENCED) != 0 ||

pmap_is_referenced(m) ||

- (m->aflags & PGA_REFERENCED) != 0)

+ (vm_page_aflags(m) & PGA_REFERENCED) != 0)

mincoreinfo |= MINCORE_REFERENCED_OTHER;

}

if (object != NULL)

diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index a2f6cb7c1f22..8a6ace0dfe27 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c

@@ -2312,9 +2312,9 @@ sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)

* sysctl is only meant to give an

* approximation of the system anyway.

- if (m->queue == PQ_ACTIVE)

+ if (m->astate.queue == PQ_ACTIVE)

kvo->kvo_active++;

- else if (m->queue == PQ_INACTIVE)

+ else if (m->astate.queue == PQ_INACTIVE)

kvo->kvo_inactive++;

}

diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 99c3abe1f9e7..7343210f6e41 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c

@@ -73,11 +73,12 @@ __FBSDID("$FreeBSD$");

#include <sys/param.h>

#include <sys/systm.h>

-#include <sys/lock.h>

+#include <sys/counter.h>

#include <sys/domainset.h>

#include <sys/kernel.h>

#include <sys/limits.h>

#include <sys/linker.h>

+#include <sys/lock.h>

#include <sys/malloc.h>

#include <sys/mman.h>

#include <sys/msgbuf.h>

@@ -130,6 +131,34 @@ static int vm_min_waiters;

static int vm_severe_waiters;

static int vm_pageproc_waiters;

+static SYSCTL_NODE(_vm_stats, OID_AUTO, page, CTLFLAG_RD, 0,

+ "VM page stats");

+static counter_u64_t pqstate_commit_aborts = EARLY_COUNTER;

+SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, commit_aborts, CTLFLAG_RD,

+ &pqstate_commit_aborts,

+ "Failed page queue state updates");

+static counter_u64_t queue_ops = EARLY_COUNTER;

+SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, queue_ops, CTLFLAG_RD,

+ &queue_ops,

+ "Batched queue operations");

+static counter_u64_t null_queue_ops = EARLY_COUNTER;

+SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, null_queue_ops, CTLFLAG_RD,

+ &null_queue_ops,

+ "Batched queue operations with no effect");

+static void

+counter_startup(void)

+ pqstate_commit_aborts = counter_u64_alloc(M_WAITOK);

+ queue_ops = counter_u64_alloc(M_WAITOK);

+ null_queue_ops = counter_u64_alloc(M_WAITOK);

+SYSINIT(page_counters, SI_SUB_CPU, SI_ORDER_ANY, counter_startup, NULL);

* bogus page -- for I/O to/from partially complete buffers,

* or for paging into sparsely invalid regions.

@@ -158,16 +187,17 @@ static uma_zone_t fakepg_zone;

static void vm_page_alloc_check(vm_page_t m);

static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);

-static void vm_page_dequeue_complete(vm_page_t m);

static void vm_page_enqueue(vm_page_t m, uint8_t queue);

static void vm_page_init(void *dummy);

static int vm_page_insert_after(vm_page_t m, vm_object_t object,

vm_pindex_t pindex, vm_page_t mpred);

static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object,

vm_page_t mpred);

-static void vm_page_mvqueue(vm_page_t m, uint8_t queue);

+static void vm_page_mvqueue(vm_page_t m, const uint8_t queue,

+ const uint16_t nflag);

static int vm_page_reclaim_run(int req_class, int domain, u_long npages,

vm_page_t m_run, vm_paddr_t high);

+static bool vm_page_release_toq(vm_page_t m, uint8_t queue, bool noreuse);

static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object,

int req);

static int vm_page_zone_import(void *arg, void **store, int cnt, int domain,

@@ -440,10 +470,10 @@ vm_page_init_marker(vm_page_t marker, int queue, uint8_t aflags)

{

bzero(marker, sizeof(*marker));

- marker->flags = PG_MARKER;

- marker->aflags = aflags;

marker->busy_lock = VPB_SINGLE_EXCLUSIVER;

- marker->queue = queue;

+ marker->astate.flags = aflags;

+ marker->astate.queue = queue;

+ marker->flags = PG_MARKER;

}

static void

@@ -513,9 +543,10 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind)

m->object = NULL;

m->ref_count = 0;

m->busy_lock = VPB_UNBUSIED;

- m->flags = m->aflags = 0;

+ m->flags = 0;

m->phys_addr = pa;

- m->queue = PQ_NONE;

+ m->astate.flags = 0;

+ m->astate.queue = PQ_NONE;

m->psind = 0;

m->segind = segind;

m->order = VM_NFREEORDER;

@@ -1152,7 +1183,7 @@ vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)

goto memattr;

}

m->phys_addr = paddr;

- m->queue = PQ_NONE;

+ m->astate.queue = PQ_NONE;

/* Fictitious pages don't use "segind". */

m->flags = PG_FICTITIOUS;

/* Fictitious pages don't use "order" or "pool". */

@@ -1239,12 +1270,10 @@ vm_page_readahead_finish(vm_page_t m)

* have shown that deactivating the page is usually the best choice,

* unless the page is wanted by another thread.

- vm_page_lock(m);

if ((m->busy_lock & VPB_BIT_WAITERS) != 0)

vm_page_activate(m);

else

vm_page_deactivate(m);

- vm_page_unlock(m);

vm_page_xunbusy(m);

}

@@ -1607,7 +1636,7 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex)

mnew->pindex = pindex;

atomic_set_int(&mnew->ref_count, VPRC_OBJREF);

mold = vm_radix_replace(&object->rtree, mnew);

- KASSERT(mold->queue == PQ_NONE,

+ KASSERT(mold->astate.queue == PQ_NONE,

("vm_page_replace: old page %p is on a paging queue", mold));

/* Keep the resident page list in sorted order. */

@@ -1883,7 +1912,7 @@ found:

if ((req & VM_ALLOC_NODUMP) != 0)

flags |= PG_NODUMP;

m->flags = flags;

- m->aflags = 0;

+ m->astate.flags = 0;

m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ?

VPO_UNMANAGED : 0;

m->busy_lock = VPB_UNBUSIED;

@@ -1899,7 +1928,7 @@ found:

vm_wire_add(1);

m->ref_count = 1;

}

- m->act_count = 0;

+ m->astate.act_count = 0;

if (object != NULL) {

if (vm_page_insert_after(m, object, pindex, mpred)) {

@@ -2093,12 +2122,12 @@ found:

memattr = object->memattr;

}

for (m = m_ret; m < &m_ret[npages]; m++) {

- m->aflags = 0;

+ m->astate.flags = 0;

m->flags = (m->flags | PG_NODUMP) & flags;

m->busy_lock = busy_lock;

if ((req & VM_ALLOC_WIRED) != 0)

m->ref_count = 1;

- m->act_count = 0;

+ m->astate.act_count = 0;

m->oflags = oflags;

if (object != NULL) {

if (vm_page_insert_after(m, object, pindex, mpred)) {

@@ -2141,9 +2170,10 @@ vm_page_alloc_check(vm_page_t m)

{

KASSERT(m->object == NULL, ("page %p has object", m));

- KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0,

+ KASSERT(m->astate.queue == PQ_NONE &&

+ (m->astate.flags & PGA_QUEUE_STATE_MASK) == 0,

("page %p has unexpected queue %d, flags %#x",

- m, m->queue, (m->aflags & PGA_QUEUE_STATE_MASK)));

+ m, m->astate.queue, (m->astate.flags & PGA_QUEUE_STATE_MASK)));

KASSERT(m->ref_count == 0, ("page %p has references", m));

KASSERT(!vm_page_busied(m), ("page %p is busy", m));

KASSERT(m->dirty == 0, ("page %p is dirty", m));

@@ -2217,7 +2247,7 @@ again:

* Initialize the page. Only the PG_ZERO flag is inherited.

- m->aflags = 0;

+ m->astate.flags = 0;

flags = 0;

if ((req & VM_ALLOC_ZERO) != 0)

flags = PG_ZERO;

@@ -2396,8 +2426,7 @@ retry:

vm_reserv_size(level)) - pa);

#endif

} else if (object->memattr == VM_MEMATTR_DEFAULT &&

- vm_page_queue(m) != PQ_NONE && !vm_page_busied(m) &&

- !vm_page_wired(m)) {

+ !vm_page_busied(m) && !vm_page_wired(m)) {

* The page is allocated but eligible for

* relocation. Extend the current run by one

@@ -2545,8 +2574,7 @@ retry:

error = EINVAL;

else if (object->memattr != VM_MEMATTR_DEFAULT)

error = EINVAL;

- else if (vm_page_queue(m) != PQ_NONE &&

- !vm_page_busied(m) && !vm_page_wired(m)) {

+ else if (!vm_page_busied(m) && !vm_page_wired(m)) {

KASSERT(pmap_page_get_memattr(m) ==

VM_MEMATTR_DEFAULT,

("page %p has an unexpected memattr", m));

@@ -2607,7 +2635,7 @@ retry:

error = EBUSY;

goto unlock;

}

- m_new->aflags = m->aflags &

+ m_new->astate.flags = m->astate.flags &

~PGA_QUEUE_STATE_MASK;

KASSERT(m_new->oflags == VPO_UNMANAGED,

("page %p is managed", m_new));

@@ -3075,65 +3103,141 @@ vm_waitpfault(struct domainset *dset, int timo)

mtx_unlock(&vm_domainset_lock);

}

-static struct vm_pagequeue *

-vm_page_pagequeue(vm_page_t m)

+bool

+vm_page_pqstate_commit(vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new)

{

+ vm_page_t next;

+ struct vm_pagequeue *pq;

+ int mask;

- uint8_t queue;

+ if (old->queue != PQ_NONE && old->queue != new.queue) {

+ new.flags &= ~PGA_ENQUEUED;

- if ((queue = atomic_load_8(&m->queue)) == PQ_NONE)

- return (NULL);

- return (&vm_pagequeue_domain(m)->vmd_pagequeues[queue]);

+ pq = _vm_page_pagequeue(m, old->queue);

+ /*

+ * The physical queue state might change at any point before the

+ * page queue lock is acquired, so we must verify that the lock

+ * is correct before proceeding. Once the page's queue index is

+ * changed, the page queue lock we hold will no longer

+ * synchronize the physical queue state of the page, so we must

+ * awkwardly remove the page from the queue and put it back if

+ * the commit fails.

+ */

+ vm_pagequeue_lock(pq);

+ if (__predict_false(m->astate.queue != old->queue)) {

+ vm_pagequeue_unlock(pq);

+ *old = vm_page_astate_load(m);

+ return (false);

+ }

+ if (__predict_true((m->astate.flags & PGA_ENQUEUED) != 0)) {

+ next = TAILQ_NEXT(m, plinks.q);

+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);

+ }

+ if (__predict_false(!vm_page_astate_fcmpset(m, old, new))) {

+ if ((old->flags & PGA_ENQUEUED) != 0) {

+ if (next == NULL)

+ TAILQ_INSERT_TAIL(&pq->pq_pl, m,

+ plinks.q);

+ else

+ TAILQ_INSERT_BEFORE(next, m, plinks.q);

+ }

+ vm_pagequeue_unlock(pq);

+ counter_u64_add(pqstate_commit_aborts, 1);

+ return (false);

+ }

+ if ((old->flags & PGA_ENQUEUED) != 0)

+ vm_pagequeue_cnt_dec(pq);

+ vm_pagequeue_unlock(pq);

+ } else if (__predict_false(!vm_page_astate_fcmpset(m, old, new))) {

+ counter_u64_add(pqstate_commit_aborts, 1);

+ return (false);

+ }

+ if (new.queue != PQ_NONE) {

+ mask = new.flags & PGA_QUEUE_OP_MASK;

+ if (mask != 0 && (old->flags & mask) != mask)

+ vm_page_pqbatch_submit(m, new.queue);

+ }

+ return (true);

}

static inline void

-vm_pqbatch_process_page(struct vm_pagequeue *pq, vm_page_t m)

+vm_pqbatch_process_page(struct vm_pagequeue *pq, vm_page_t m, uint8_t queue)

{

+ vm_page_t next;

struct vm_domain *vmd;

- uint8_t qflags;

+ vm_page_astate_t old, new;

CRITICAL_ASSERT(curthread);

vm_pagequeue_assert_locked(pq);

+ old = vm_page_astate_load(m);

+retry:

+ if (__predict_false(old.queue != queue))

+ return;

+ KASSERT(pq == _vm_page_pagequeue(m, queue),

+ ("page %p does not belong to queue %p", m, pq));

+ KASSERT(old.queue != PQ_NONE || (old.flags & PGA_QUEUE_STATE_MASK) == 0,

+ ("page %p has unexpected queue state", m));

- * The page daemon is allowed to set m->queue = PQ_NONE without

- * the page queue lock held. In this case it is about to free the page,

- * which must not have any queue state.

+ * Update the page's queue state before modifying the page queues

+ * themselves, to avoid having to roll back updates when a queue state

+ * update fails and requires a retry.

- qflags = atomic_load_8(&m->aflags);

- KASSERT(pq == vm_page_pagequeue(m) ||

- (qflags & PGA_QUEUE_STATE_MASK) == 0,

- ("page %p doesn't belong to queue %p but has aflags %#x",

- m, pq, qflags));

- if ((qflags & PGA_DEQUEUE) != 0) {

- if (__predict_true((qflags & PGA_ENQUEUED) != 0))

- vm_pagequeue_remove(pq, m);

- vm_page_dequeue_complete(m);

- } else if ((qflags & (PGA_REQUEUE | PGA_REQUEUE_HEAD)) != 0) {

- if ((qflags & PGA_ENQUEUED) != 0)

+ new = old;

+ if ((old.flags & PGA_DEQUEUE) != 0) {

+ new.queue = PQ_NONE;

+ new.flags &= ~PGA_QUEUE_STATE_MASK;

+ if (__predict_true((old.flags & PGA_ENQUEUED) != 0)) {

+ next = TAILQ_NEXT(m, plinks.q);

TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);

- else {

- vm_pagequeue_cnt_inc(pq);

- vm_page_aflag_set(m, PGA_ENQUEUED);

}

+ if (__predict_false(!vm_page_astate_fcmpset(m, &old, new))) {

+ if ((old.flags & PGA_ENQUEUED) != 0) {

+ if (next == NULL)

+ TAILQ_INSERT_TAIL(&pq->pq_pl, m,

+ plinks.q);

+ else

+ TAILQ_INSERT_BEFORE(next, m, plinks.q);

+ }

+ counter_u64_add(pqstate_commit_aborts, 1);

+ goto retry;

+ }

+ if ((old.flags & PGA_ENQUEUED) != 0)

+ vm_pagequeue_cnt_dec(pq);

+ counter_u64_add(queue_ops, 1);

+ } else if ((old.flags & (PGA_REQUEUE | PGA_REQUEUE_HEAD)) != 0) {

+ new.flags |= PGA_ENQUEUED;

+ new.flags &= ~(PGA_REQUEUE | PGA_REQUEUE_HEAD);

+ if (__predict_false(!vm_page_astate_fcmpset(m, &old, new))) {

+ counter_u64_add(pqstate_commit_aborts, 1);

+ goto retry;

+ }

+ if ((old.flags & PGA_ENQUEUED) != 0)

+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);

+ else

+ vm_pagequeue_cnt_inc(pq);

- * Give PGA_REQUEUE_HEAD precedence over PGA_REQUEUE.

- * In particular, if both flags are set in close succession,

- * only PGA_REQUEUE_HEAD will be applied, even if it was set

- * first.

+ * Give PGA_REQUEUE_HEAD precedence over PGA_REQUEUE. In

+ * particular, if both flags are set in close succession, only

+ * PGA_REQUEUE_HEAD will be applied, even if it was set first.

- if ((qflags & PGA_REQUEUE_HEAD) != 0) {

- KASSERT(m->queue == PQ_INACTIVE,

+ if ((old.flags & PGA_REQUEUE_HEAD) != 0) {

+ KASSERT(old.queue == PQ_INACTIVE,

("head enqueue not supported for page %p", m));

vmd = vm_pagequeue_domain(m);

TAILQ_INSERT_BEFORE(&vmd->vmd_inacthead, m, plinks.q);

- } else

+ } else {

TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);

- vm_page_aflag_clear(m, qflags & (PGA_REQUEUE |

- PGA_REQUEUE_HEAD));

+ }

+ counter_u64_add(queue_ops, 1);

+ } else {

+ counter_u64_add(null_queue_ops, 1);

}

@@ -3141,15 +3245,10 @@ static void

vm_pqbatch_process(struct vm_pagequeue *pq, struct vm_batchqueue *bq,

uint8_t queue)

{

- vm_page_t m;

int i;

- for (i = 0; i < bq->bq_cnt; i++) {

- m = bq->bq_pa[i];

- if (__predict_false(m->queue != queue))

- continue;

- vm_pqbatch_process_page(pq, m);

- }

+ for (i = 0; i < bq->bq_cnt; i++)

+ vm_pqbatch_process_page(pq, bq->bq_pa[i], queue);

vm_batchqueue_init(bq);

}

@@ -3157,8 +3256,6 @@ vm_pqbatch_process(struct vm_pagequeue *pq, struct vm_batchqueue *bq,

* vm_page_pqbatch_submit: [ internal use only ]

* Enqueue a page in the specified page queue's batched work queue.

- * The caller must have encoded the requested operation in the page

- * structure's aflags field.

void

vm_page_pqbatch_submit(vm_page_t m, uint8_t queue)

@@ -3169,8 +3266,6 @@ vm_page_pqbatch_submit(vm_page_t m, uint8_t queue)

KASSERT((m->oflags & VPO_UNMANAGED) == 0,

("page %p is unmanaged", m));

- KASSERT(mtx_owned(vm_page_lockptr(m)) || m->object == NULL,

- ("missing synchronization for page %p", m));

KASSERT(queue < PQ_COUNT, ("invalid queue %d", queue));

domain = vm_phys_domain(m);

@@ -3189,21 +3284,7 @@ vm_page_pqbatch_submit(vm_page_t m, uint8_t queue)

bq = DPCPU_PTR(pqbatch[domain][queue]);

}

vm_pqbatch_process(pq, bq, queue);

- /*

- * The page may have been logically dequeued before we acquired the

- * page queue lock. In this case, since we either hold the page lock

- * or the page is being freed, a different thread cannot be concurrently

- * enqueuing the page.

- */

- if (__predict_true(m->queue == queue))

- vm_pqbatch_process_page(pq, m);

- else {

- KASSERT(m->queue == PQ_NONE,

- ("invalid queue transition for page %p", m));

- KASSERT((m->aflags & PGA_ENQUEUED) == 0,

- ("page %p is enqueued with invalid queue index", m));

- }

+ vm_pqbatch_process_page(pq, m, queue);

vm_pagequeue_unlock(pq);

critical_exit();

}

@@ -3247,131 +3328,54 @@ vm_page_pqbatch_drain(void)

thread_unlock(td);

}

-/*

- * Complete the logical removal of a page from a page queue. We must be

- * careful to synchronize with the page daemon, which may be concurrently

- * examining the page with only the page lock held. The page must not be

- * in a state where it appears to be logically enqueued.

- */

-static void

-vm_page_dequeue_complete(vm_page_t m)

- m->queue = PQ_NONE;

- atomic_thread_fence_rel();

- vm_page_aflag_clear(m, PGA_QUEUE_STATE_MASK);

-/*

- * vm_page_dequeue_deferred: [ internal use only ]

- *

- * Request removal of the given page from its current page

- * queue. Physical removal from the queue may be deferred

- * indefinitely.

- *

- * The page must be locked.

- */

-void

-vm_page_dequeue_deferred(vm_page_t m)

- uint8_t queue;

- vm_page_assert_locked(m);

- if ((queue = vm_page_queue(m)) == PQ_NONE)

- return;

- /*

- * Set PGA_DEQUEUE if it is not already set to handle a concurrent call

- * to vm_page_dequeue_deferred_free(). In particular, avoid modifying

- * the page's queue state once vm_page_dequeue_deferred_free() has been

- * called. In the event of a race, two batch queue entries for the page

- * will be created, but the second will have no effect.

- */

- if (vm_page_pqstate_cmpset(m, queue, queue, PGA_DEQUEUE, PGA_DEQUEUE))

- vm_page_pqbatch_submit(m, queue);

-/*

- * A variant of vm_page_dequeue_deferred() that does not assert the page

- * lock and is only to be called from vm_page_free_prep(). Because the

- * page is being freed, we can assume that nothing other than the page

- * daemon is scheduling queue operations on this page, so we get for

- * free the mutual exclusion that is otherwise provided by the page lock.

- * To handle races, the page daemon must take care to atomically check

- * for PGA_DEQUEUE when updating queue state.

- */

+/* XXX comment */

static void

-vm_page_dequeue_deferred_free(vm_page_t m)

+vm_page_dequeue_free(vm_page_t m)

{

- uint8_t queue;

+ vm_page_astate_t old, new;

- KASSERT(m->ref_count == 0, ("page %p has references", m));

- if ((m->aflags & PGA_DEQUEUE) != 0)

- return;

- atomic_thread_fence_acq();

- if ((queue = m->queue) == PQ_NONE)

- return;

- vm_page_aflag_set(m, PGA_DEQUEUE);

- vm_page_pqbatch_submit(m, queue);

+ for (old = vm_page_astate_load(m);;) {

+ if (old.queue == PQ_NONE) {

+ KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0,

+ ("page %p has unexpected queue state flags %#x",

+ m, old.flags));

+ break;

+ }

+ if ((old.flags & PGA_DEQUEUE) != 0) {

+ vm_page_pqbatch_submit(m, old.queue);

+ break;

+ }

+ new = old;

+ new.flags |= PGA_DEQUEUE;

+ if (vm_page_pqstate_commit(m, &old, new))

+ break;

+ }

}

* vm_page_dequeue:

* Remove the page from whichever page queue it's in, if any.

- * The page must either be locked or unallocated. This constraint

- * ensures that the queue state of the page will remain consistent

- * after this function returns.

+ * XXX

void

vm_page_dequeue(vm_page_t m)

{

- struct vm_pagequeue *pq, *pq1;

- uint8_t aflags;

+ vm_page_astate_t old, new;

- KASSERT(mtx_owned(vm_page_lockptr(m)) || m->object == NULL,

- ("page %p is allocated and unlocked", m));

- for (pq = vm_page_pagequeue(m);; pq = pq1) {

- if (pq == NULL) {

- /*

- * A thread may be concurrently executing

- * vm_page_dequeue_complete(). Ensure that all queue

- * state is cleared before we return.

- */

- aflags = atomic_load_8(&m->aflags);

- if ((aflags & PGA_QUEUE_STATE_MASK) == 0)

- return;

- KASSERT((aflags & PGA_DEQUEUE) != 0,

+ for (old = vm_page_astate_load(m);;) {

+ if (old.queue == PQ_NONE) {

+ KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0,

("page %p has unexpected queue state flags %#x",

- m, aflags));

- /*

- * Busy wait until the thread updating queue state is

- * finished. Such a thread must be executing in a

- * critical section.

- */

- cpu_spinwait();

- pq1 = vm_page_pagequeue(m);

- continue;

+ m, old.flags));

+ break;

}

- vm_pagequeue_lock(pq);

- if ((pq1 = vm_page_pagequeue(m)) == pq)

+ new = old;

+ new.queue = PQ_NONE;

+ new.flags &= ~PGA_QUEUE_STATE_MASK;

+ if (vm_page_pqstate_commit(m, &old, new))

break;

- vm_pagequeue_unlock(pq);

}

- KASSERT(pq == vm_page_pagequeue(m),

- ("%s: page %p migrated directly between queues", __func__, m));

- KASSERT((m->aflags & PGA_DEQUEUE) != 0 ||

- mtx_owned(vm_page_lockptr(m)),

- ("%s: queued unlocked page %p", __func__, m));

- if ((m->aflags & PGA_ENQUEUED) != 0)

- vm_pagequeue_remove(pq, m);

- vm_page_dequeue_complete(m);

- vm_pagequeue_unlock(pq);

}

@@ -3383,72 +3387,17 @@ vm_page_enqueue(vm_page_t m, uint8_t queue)

{

vm_page_assert_locked(m);

- KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0,

+ KASSERT(m->astate.queue == PQ_NONE &&

+ (m->astate.flags & PGA_QUEUE_STATE_MASK) == 0,

("%s: page %p is already enqueued", __func__, m));

- m->queue = queue;

- if ((m->aflags & PGA_REQUEUE) == 0)

+ m->astate.queue = queue;

+ if ((m->astate.flags & PGA_REQUEUE) == 0)

vm_page_aflag_set(m, PGA_REQUEUE);

vm_page_pqbatch_submit(m, queue);

}

- * vm_page_requeue: [ internal use only ]

- *

- * Schedule a requeue of the given page.

- *

- * The page must be locked.

- */

-void

-vm_page_requeue(vm_page_t m)

- vm_page_assert_locked(m);

- KASSERT(vm_page_queue(m) != PQ_NONE,

- ("%s: page %p is not logically enqueued", __func__, m));

- if ((m->aflags & PGA_REQUEUE) == 0)

- vm_page_aflag_set(m, PGA_REQUEUE);

- vm_page_pqbatch_submit(m, atomic_load_8(&m->queue));

-/*

- * vm_page_swapqueue: [ internal use only ]

- *

- * Move the page from one queue to another, or to the tail of its

- * current queue, in the face of a possible concurrent call to

- * vm_page_dequeue_deferred_free().

- */

-void

-vm_page_swapqueue(vm_page_t m, uint8_t oldq, uint8_t newq)

- struct vm_pagequeue *pq;

- KASSERT(oldq < PQ_COUNT && newq < PQ_COUNT && oldq != newq,

- ("vm_page_swapqueue: invalid queues (%d, %d)", oldq, newq));

- KASSERT((m->oflags & VPO_UNMANAGED) == 0,

- ("vm_page_swapqueue: page %p is unmanaged", m));

- vm_page_assert_locked(m);

- /*

- * Atomically update the queue field and set PGA_REQUEUE while

- * ensuring that PGA_DEQUEUE has not been set.

- */

- pq = &vm_pagequeue_domain(m)->vmd_pagequeues[oldq];

- vm_pagequeue_lock(pq);

- if (!vm_page_pqstate_cmpset(m, oldq, newq, PGA_DEQUEUE, PGA_REQUEUE)) {

- vm_pagequeue_unlock(pq);

- return;

- }

- if ((m->aflags & PGA_ENQUEUED) != 0) {

- vm_pagequeue_remove(pq, m);

- vm_page_aflag_clear(m, PGA_ENQUEUED);

- }

- vm_pagequeue_unlock(pq);

- vm_page_pqbatch_submit(m, newq);

-/*

* vm_page_free_prep:

* Prepares the given page to be put on the free list,

@@ -3479,10 +3428,11 @@ vm_page_free_prep(vm_page_t m)

}

#endif

if ((m->oflags & VPO_UNMANAGED) == 0)

- KASSERT(!pmap_page_is_mapped(m),

+ KASSERT(!pmap_page_is_mapped(m) && (vm_page_aflags(m) &

+ (PGA_EXECUTABLE | PGA_WRITEABLE)) == 0,

("vm_page_free_prep: freeing mapped page %p", m));

else

- KASSERT(m->queue == PQ_NONE,

+ KASSERT(m->astate.queue == PQ_NONE,

("vm_page_free_prep: unmanaged page %p is queued", m));

VM_CNT_INC(v_tfree);

@@ -3511,7 +3461,7 @@ vm_page_free_prep(vm_page_t m)

if ((m->flags & PG_FICTITIOUS) != 0) {

KASSERT(m->ref_count == 1,

("fictitious page %p is referenced", m));

- KASSERT(m->queue == PQ_NONE,

+ KASSERT(m->astate.queue == PQ_NONE,

("fictitious page %p is queued", m));

return (false);

}

@@ -3522,7 +3472,7 @@ vm_page_free_prep(vm_page_t m)

* dequeue.

if ((m->oflags & VPO_UNMANAGED) == 0)

- vm_page_dequeue_deferred_free(m);

+ vm_page_dequeue_free(m);

m->valid = 0;

vm_page_undirty(m);

@@ -3629,6 +3579,8 @@ vm_page_wire(vm_page_t m)

old = atomic_fetchadd_int(&m->ref_count, 1);

KASSERT(VPRC_WIRE_COUNT(old) != VPRC_WIRE_COUNT_MAX,

("vm_page_wire: counter overflow for page %p", m));

+ if ((m->oflags & VPO_UNMANAGED) == 0)

+ vm_page_aflag_set(m, PGA_DEQUEUE);

if (VPRC_WIRE_COUNT(old) == 0)

vm_wire_add(1);

}

@@ -3650,11 +3602,45 @@ vm_page_wire_mapped(vm_page_t m)

return (false);

} while (!atomic_fcmpset_int(&m->ref_count, &old, old + 1));

+ if ((m->oflags & VPO_UNMANAGED) == 0)

+ vm_page_aflag_set(m, PGA_DEQUEUE);

if (VPRC_WIRE_COUNT(old) == 0)

vm_wire_add(1);

return (true);

}

+/* XXX comment */

+static void

+vm_page_unwire_managed(vm_page_t m, uint8_t queue, bool noreuse)

+ u_int old;

+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,

+ ("vm_page_unwire_managed: page %p is unmanaged", m));

+ /*

+ * Update LRU state before releasing the wiring reference.

+ * Use a release store when updating the reference count to

+ * synchronize with vm_page_free_prep().

+ */

+ old = m->ref_count;

+ do {

+ KASSERT(VPRC_WIRE_COUNT(old) > 0,

+ ("vm_page_unwire: wire count underflow for page %p", m));

+ if (VPRC_WIRE_COUNT(old) == 1 &&

+ !vm_page_release_toq(m, queue, noreuse)) {

+ old = atomic_load_int(&m->ref_count);

+ continue;

+ }

+ } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1));

+ if (VPRC_WIRE_COUNT(old) == 1) {

+ vm_wire_sub(1);

+ if (old == 1)

+ vm_page_free(m);

+ }

* Release one wiring of the specified page, potentially allowing it to be

* paged out.

@@ -3669,8 +3655,6 @@ vm_page_wire_mapped(vm_page_t m)

void

vm_page_unwire(vm_page_t m, uint8_t queue)

{

- u_int old;

- bool locked;

KASSERT(queue < PQ_COUNT,

("vm_page_unwire: invalid queue %u request for page %p", queue, m));

@@ -3678,42 +3662,8 @@ vm_page_unwire(vm_page_t m, uint8_t queue)

if ((m->oflags & VPO_UNMANAGED) != 0) {

if (vm_page_unwire_noq(m) && m->ref_count == 0)

vm_page_free(m);

- return;

- }

- /*

- * Update LRU state before releasing the wiring reference.

- * We only need to do this once since we hold the page lock.

- * Use a release store when updating the reference count to

- * synchronize with vm_page_free_prep().

- */

- old = m->ref_count;

- locked = false;

- do {

- KASSERT(VPRC_WIRE_COUNT(old) > 0,

- ("vm_page_unwire: wire count underflow for page %p", m));

- if (!locked && VPRC_WIRE_COUNT(old) == 1) {

- vm_page_lock(m);

- locked = true;

- if (queue == PQ_ACTIVE && vm_page_queue(m) == PQ_ACTIVE)

- vm_page_reference(m);

- else

- vm_page_mvqueue(m, queue);

- }

- } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1));

- /*

- * Release the lock only after the wiring is released, to ensure that

- * the page daemon does not encounter and dequeue the page while it is

- * still wired.

- */

- if (locked)

- vm_page_unlock(m);

- if (VPRC_WIRE_COUNT(old) == 1) {

- vm_wire_sub(1);

- if (old == 1)

- vm_page_free(m);

+ } else {

+ vm_page_unwire_managed(m, queue, false);

}

@@ -3750,25 +3700,45 @@ vm_page_unwire_noq(vm_page_t m)

* before releasing the page lock, otherwise the page daemon may immediately

* dequeue the page.

+ * In many cases this function's parameters are known at compile-time, so

+ * it is inlined into its callers so as to allow constant folding to remove

+ * branches.

+ *

* A managed page must be locked.

static __always_inline void

-vm_page_mvqueue(vm_page_t m, const uint8_t nqueue)

+vm_page_mvqueue(vm_page_t m, const uint8_t nqueue, const uint16_t nflag)

{

+ vm_page_astate_t old, new;

- vm_page_assert_locked(m);

KASSERT((m->oflags & VPO_UNMANAGED) == 0,

("vm_page_mvqueue: page %p is unmanaged", m));

- if (vm_page_queue(m) != nqueue) {

- vm_page_dequeue(m);

- vm_page_enqueue(m, nqueue);

- } else if (nqueue != PQ_ACTIVE) {

- vm_page_requeue(m);

+ KASSERT(m->ref_count > 0,

+ ("vm_page_mvqueue: page %p is missing refs", m));

+ KASSERT(nflag == PGA_REQUEUE || nflag == PGA_REQUEUE_HEAD,

+ ("vm_page_mvqueue: unexpected queue state flag"));

+ KASSERT(nflag != PGA_REQUEUE_HEAD || nqueue == PQ_INACTIVE,

+ ("vm_page_mvqueue: wrong queue %d for PGA_REQUEUE_HEAD", nqueue));

+ for (old = vm_page_astate_load(m);;) {

+ if ((old.flags & PGA_DEQUEUE) != 0)

+ break;

+ new = old;

+ if (nqueue == PQ_ACTIVE)

+ new.act_count = max(old.act_count, ACT_INIT);

+ if (old.queue == nqueue) {

+ if (nqueue != PQ_ACTIVE)

+ new.flags |= nflag;

+ if (new._bits == old._bits)

+ break;

+ } else {

+ new.flags |= nflag;

+ new.queue = nqueue;

+ }

+ if (vm_page_pqstate_commit(m, &old, new))

+ break;

}

- if (nqueue == PQ_ACTIVE && m->act_count < ACT_INIT)

- m->act_count = ACT_INIT;

}

@@ -3778,9 +3748,9 @@ void

vm_page_activate(vm_page_t m)

{

- if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))

+ if ((m->oflags & VPO_UNMANAGED) != 0)

return;

- vm_page_mvqueue(m, PQ_ACTIVE);

+ vm_page_mvqueue(m, PQ_ACTIVE, PGA_REQUEUE);

}

@@ -3791,30 +3761,9 @@ void

vm_page_deactivate(vm_page_t m)

{

- if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))

+ if ((m->oflags & VPO_UNMANAGED) != 0)

return;

- vm_page_mvqueue(m, PQ_INACTIVE);

-/*

- * Move the specified page close to the head of the inactive queue,

- * bypassing LRU. A marker page is used to maintain FIFO ordering.

- * As with regular enqueues, we use a per-CPU batch queue to reduce

- * contention on the page queue lock.

- */

-static void

-_vm_page_deactivate_noreuse(vm_page_t m)

- vm_page_assert_locked(m);

- if (!vm_page_inactive(m)) {

- vm_page_dequeue(m);

- m->queue = PQ_INACTIVE;

- }

- if ((m->aflags & PGA_REQUEUE_HEAD) == 0)

- vm_page_aflag_set(m, PGA_REQUEUE_HEAD);

- vm_page_pqbatch_submit(m, PQ_INACTIVE);

+ vm_page_mvqueue(m, PQ_INACTIVE, PGA_REQUEUE);

}

void

@@ -3824,8 +3773,9 @@ vm_page_deactivate_noreuse(vm_page_t m)

KASSERT(m->object != NULL,

("vm_page_deactivate_noreuse: page %p has no object", m));

- if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_wired(m))

- _vm_page_deactivate_noreuse(m);

+ if ((m->oflags & VPO_UNMANAGED) != 0)

+ return;

+ vm_page_mvqueue(m, PQ_INACTIVE, PGA_REQUEUE_HEAD);

}

@@ -3837,7 +3787,7 @@ vm_page_launder(vm_page_t m)

if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))

return;

- vm_page_mvqueue(m, PQ_LAUNDRY);

+ vm_page_mvqueue(m, PQ_LAUNDRY, PGA_REQUEUE);

}

@@ -3855,11 +3805,17 @@ vm_page_unswappable(vm_page_t m)

vm_page_enqueue(m, PQ_UNSWAPPABLE);

}

-static void

-vm_page_release_toq(vm_page_t m, int flags)

+/* XXX comment */

+static bool

+vm_page_release_toq(vm_page_t m, uint8_t nqueue, bool noreuse)

{

+ vm_page_astate_t old, new;

+ uint16_t nflag;

- vm_page_assert_locked(m);

+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,

+ ("vm_page_release_toq: page %p is unmanaged", m));

+ KASSERT(m->ref_count > 0,

+ ("vm_page_release_toq: page %p is missing refs", m));

* Use a check of the valid bits to determine whether we should

@@ -3871,12 +3827,35 @@ vm_page_release_toq(vm_page_t m, int flags)

* If we were asked to not cache the page, place it near the head of the

* inactive queue so that is reclaimed sooner.

- if ((flags & (VPR_TRYFREE | VPR_NOREUSE)) != 0 || m->valid == 0)

- _vm_page_deactivate_noreuse(m);

- else if (vm_page_active(m))

- vm_page_reference(m);

- else

- vm_page_mvqueue(m, PQ_INACTIVE);

+ nflag = (noreuse || m->valid == 0) ? PGA_REQUEUE_HEAD : PGA_REQUEUE;

+ /* XXX explain */

+ vm_page_aflag_clear(m, PGA_DEQUEUE);

+ for (old = vm_page_astate_load(m);;) {

+ new = old;

+ if ((new.flags & PGA_DEQUEUE) != 0)

+ return (false);

+ if (nflag != PGA_REQUEUE_HEAD && old.queue == PQ_ACTIVE) {

+ new.flags |= PGA_REFERENCED;

+ } else {

+ if (nqueue == PQ_ACTIVE)

+ new.act_count = max(old.act_count, ACT_INIT);

+ else

+ new.flags |= nflag;

+ new.queue = nqueue;

+ }

+ /*

+ * If the page queue state is not changing, we have nothing

+ * to do.

+ */

+ if (new._bits == old._bits)

+ break;

+ if (vm_page_pqstate_commit(m, &old, new))

+ break;

+ }

+ return (true);

}

@@ -3886,8 +3865,6 @@ void

vm_page_release(vm_page_t m, int flags)

{

vm_object_t object;

- u_int old;

- bool locked;

KASSERT((m->oflags & VPO_UNMANAGED) == 0,

("vm_page_release: page %p is unmanaged", m));

@@ -3913,36 +3890,7 @@ vm_page_release(vm_page_t m, int flags)

}

- /*

- * Update LRU state before releasing the wiring reference.

- * Use a release store when updating the reference count to

- * synchronize with vm_page_free_prep().

- */

- old = m->ref_count;

- locked = false;

- do {

- KASSERT(VPRC_WIRE_COUNT(old) > 0,

- ("vm_page_unwire: wire count underflow for page %p", m));

- if (!locked && VPRC_WIRE_COUNT(old) == 1) {

- vm_page_lock(m);

- locked = true;

- vm_page_release_toq(m, flags);

- }

- } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1));

- /*

- * Release the lock only after the wiring is released, to ensure that

- * the page daemon does not encounter and dequeue the page while it is

- * still wired.

- */

- if (locked)

- vm_page_unlock(m);

- if (VPRC_WIRE_COUNT(old) == 1) {

- vm_wire_sub(1);

- if (old == 1)

- vm_page_free(m);

- }

+ vm_page_unwire_managed(m, PQ_INACTIVE, flags != 0);

}

/* See vm_page_release(). */

@@ -3960,9 +3908,7 @@ vm_page_release_locked(vm_page_t m, int flags)

m->dirty == 0 && !vm_page_busied(m)) {

vm_page_free(m);

} else {

- vm_page_lock(m);

- vm_page_release_toq(m, flags);

- vm_page_unlock(m);

+ (void)vm_page_release_toq(m, PQ_INACTIVE, flags != 0);

}

@@ -4775,6 +4721,22 @@ vm_page_object_lock_assert(vm_page_t m)

}

void

+vm_page_pagequeue_lock_assert(vm_page_t m, uint8_t queue)

+ if ((m->flags & PG_MARKER) != 0)

+ return;

+ /*

+ * The page's page queue index may only change while the

+ * current queue's lock is held.

+ */

+ KASSERT(queue != PQ_NONE,

+ ("page %p does not belong to a queue", m));

+ vm_pagequeue_assert_locked(_vm_page_pagequeue(m, queue));

+void

vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits)

{

@@ -4853,7 +4815,7 @@ DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo)

"page %p obj %p pidx 0x%jx phys 0x%jx q %d ref %u\n"

" af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n",

m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr,

- m->queue, m->ref_count, m->aflags, m->oflags,

- m->flags, m->act_count, m->busy_lock, m->valid, m->dirty);

+ m->astate.queue, m->ref_count, m->astate.flags, m->oflags,

+ m->flags, m->astate.act_count, m->busy_lock, m->valid, m->dirty);

}

#endif /* DDB */

diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 0c3f3a9bade2..4d5726c0e39a 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h

@@ -190,6 +190,15 @@ typedef uint32_t vm_page_bits_t;

typedef uint64_t vm_page_bits_t;

#endif

+typedef union {

+ struct {

+ uint16_t flags;

+ uint8_t queue;

+ uint8_t act_count;

+ };

+ uint32_t _bits;

+} vm_page_astate_t;

struct vm_page {

union {

TAILQ_ENTRY(vm_page) q; /* page queue or free list (Q) */

@@ -212,15 +221,13 @@ struct vm_page {

u_int ref_count; /* page references */

};

volatile u_int busy_lock; /* busy owners lock */

- uint16_t flags; /* page PG_* flags (P) */

+ vm_page_astate_t astate; /* atomically updated state */

+ uint8_t flags; /* page PG_* flags (P) */

uint8_t order; /* index of the buddy queue (F) */

uint8_t pool; /* vm_phys freepool index (F) */

- uint8_t aflags; /* access is atomic */

- uint8_t oflags; /* page VPO_* flags (O) */

- uint8_t queue; /* page queue index (Q) */

int8_t psind; /* pagesizes[] index (O) */

int8_t segind; /* vm_phys segment index (C) */

- u_char act_count; /* page usage count (P) */

+ uint8_t oflags; /* page VPO_* flags (O) */

/* NOTE that these must support one bit per DEV_BSIZE in a page */

/* so, on normal X86 kernels, they must be at least 8 bits wide */

vm_page_bits_t valid; /* map of valid DEV_BSIZE chunks (O) */

@@ -399,8 +406,8 @@ extern struct mtx_padalign pa_lock[];

#define PGA_REQUEUE 0x20 /* page is due to be requeued */

#define PGA_REQUEUE_HEAD 0x40 /* page requeue should bypass LRU */

-#define PGA_QUEUE_STATE_MASK (PGA_ENQUEUED | PGA_DEQUEUE | PGA_REQUEUE | \

- PGA_REQUEUE_HEAD)

+#define PGA_QUEUE_OP_MASK (PGA_DEQUEUE | PGA_REQUEUE | PGA_REQUEUE_HEAD)

+#define PGA_QUEUE_STATE_MASK (PGA_ENQUEUED | PGA_QUEUE_OP_MASK)

* Page flags. If changed at any other time than page allocation or

@@ -410,11 +417,11 @@ extern struct mtx_padalign pa_lock[];

* allocated from a per-CPU cache. It is cleared the next time that the

* page is allocated from the physical memory allocator.

-#define PG_PCPU_CACHE 0x0001 /* was allocated from per-CPU caches */

-#define PG_FICTITIOUS 0x0004 /* physical page doesn't exist */

-#define PG_ZERO 0x0008 /* page is zeroed */

-#define PG_MARKER 0x0010 /* special queue marker page */

-#define PG_NODUMP 0x0080 /* don't include this page in a dump */

+#define PG_PCPU_CACHE 0x01 /* was allocated from per-CPU caches */

+#define PG_FICTITIOUS 0x04 /* physical page doesn't exist */

+#define PG_ZERO 0x08 /* page is zeroed */

+#define PG_MARKER 0x10 /* special queue marker page */

+#define PG_NODUMP 0x80 /* don't include this page in a dump */

* Misc constants.

@@ -572,7 +579,6 @@ int vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex,

void vm_page_deactivate(vm_page_t);

void vm_page_deactivate_noreuse(vm_page_t);

void vm_page_dequeue(vm_page_t m);

-void vm_page_dequeue_deferred(vm_page_t m);

vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);

bool vm_page_free_prep(vm_page_t m);

vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);

@@ -584,6 +590,8 @@ vm_page_t vm_page_next(vm_page_t m);

int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *);

void vm_page_pqbatch_drain(void);

void vm_page_pqbatch_submit(vm_page_t m, uint8_t queue);

+bool vm_page_pqstate_commit(vm_page_t m, vm_page_astate_t *old,

+ vm_page_astate_t new);

vm_page_t vm_page_prev(vm_page_t m);

bool vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m);

void vm_page_putfake(vm_page_t m);

@@ -688,64 +696,52 @@ void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line);

#ifdef INVARIANTS

void vm_page_object_lock_assert(vm_page_t m);

#define VM_PAGE_OBJECT_LOCK_ASSERT(m) vm_page_object_lock_assert(m)

+void vm_page_pagequeue_lock_assert(vm_page_t m, uint8_t queue);

+#define VM_PAGE_PAGEQUEUE_LOCK_ASSERT(m, q) vm_page_pagequeue_lock_assert(m, q)

void vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits);

#define VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits) \

vm_page_assert_pga_writeable(m, bits)

#else

#define VM_PAGE_OBJECT_LOCK_ASSERT(m) (void)0

+#define VM_PAGE_PAGEQUEUE_LOCK_ASSERT(m, q) (void)0

#define VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits) (void)0

#endif

- * We want to use atomic updates for the aflags field, which is 8 bits wide.

- * However, not all architectures support atomic operations on 8-bit

+ * We want to use atomic updates for the aflags field, which is 16 bits wide.

+ * However, not all architectures support atomic operations on 16-bit

* destinations. In order that we can easily use a 32-bit operation, we

* require that the aflags field be 32-bit aligned.

-_Static_assert(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0,

+_Static_assert(offsetof(struct vm_page, astate.flags) % sizeof(uint32_t) == 0,

"aflags field is not 32-bit aligned");

+#define VM_PAGE_AFLAG_SHIFT __offsetof(vm_page_astate_t, flags)

- * We want to be able to update the aflags and queue fields atomically in

- * the same operation.

+ * Return the atomic flag set for the page.

-_Static_assert(offsetof(struct vm_page, aflags) / sizeof(uint32_t) ==

- offsetof(struct vm_page, queue) / sizeof(uint32_t),

- "aflags and queue fields do not belong to the same 32-bit word");

-_Static_assert(offsetof(struct vm_page, queue) % sizeof(uint32_t) == 2,

- "queue field is at an unexpected offset");

-_Static_assert(sizeof(((struct vm_page *)NULL)->queue) == 1,

- "queue field has an unexpected size");

-#if BYTE_ORDER == LITTLE_ENDIAN

-#define VM_PAGE_AFLAG_SHIFT 0

-#define VM_PAGE_QUEUE_SHIFT 16

-#else

-#define VM_PAGE_AFLAG_SHIFT 24

-#define VM_PAGE_QUEUE_SHIFT 8

-#endif

-#define VM_PAGE_QUEUE_MASK (0xff << VM_PAGE_QUEUE_SHIFT)

+static inline int

+vm_page_aflags(vm_page_t m)

+ return (m->astate.flags);

* Clear the given bits in the specified page.

static inline void

-vm_page_aflag_clear(vm_page_t m, uint8_t bits)

+vm_page_aflag_clear(vm_page_t m, uint16_t bits)

{

uint32_t *addr, val;

- * The PGA_REFERENCED flag can only be cleared if the page is locked.

- */

- if ((bits & PGA_REFERENCED) != 0)

- vm_page_assert_locked(m);

- /*

* Access the whole 32-bit word containing the aflags field with an

* atomic update. Parallel non-atomic updates to the other fields

* within this word are handled properly by the atomic update.

- addr = (void *)&m->aflags;

+ addr = (void *)&m->astate;

val = bits << VM_PAGE_AFLAG_SHIFT;

atomic_clear_32(addr, val);

}

@@ -754,7 +750,7 @@ vm_page_aflag_clear(vm_page_t m, uint8_t bits)

* Set the given bits in the specified page.

static inline void

-vm_page_aflag_set(vm_page_t m, uint8_t bits)

+vm_page_aflag_set(vm_page_t m, uint16_t bits)

{

uint32_t *addr, val;

@@ -765,44 +761,43 @@ vm_page_aflag_set(vm_page_t m, uint8_t bits)

* atomic update. Parallel non-atomic updates to the other fields

* within this word are handled properly by the atomic update.

- addr = (void *)&m->aflags;

+ addr = (void *)&m->astate;

val = bits << VM_PAGE_AFLAG_SHIFT;

atomic_set_32(addr, val);

}

-/*

- * Atomically update the queue state of the page. The operation fails if

- * any of the queue flags in "fflags" are set or if the "queue" field of

- * the page does not match the expected value; if the operation is

- * successful, the flags in "nflags" are set and all other queue state

- * flags are cleared.

- */

+static inline vm_page_astate_t

+vm_page_astate_load(vm_page_t m)

+ vm_page_astate_t astate;

+ astate._bits = atomic_load_32(&m->astate);

+ return (astate);

static inline bool

-vm_page_pqstate_cmpset(vm_page_t m, uint32_t oldq, uint32_t newq,

- uint32_t fflags, uint32_t nflags)

+vm_page_astate_fcmpset(vm_page_t m, vm_page_astate_t *old,

+ vm_page_astate_t new)

{

- uint32_t *addr, nval, oval, qsmask;

- vm_page_assert_locked(m);

- fflags <<= VM_PAGE_AFLAG_SHIFT;

- nflags <<= VM_PAGE_AFLAG_SHIFT;

- newq <<= VM_PAGE_QUEUE_SHIFT;

- oldq <<= VM_PAGE_QUEUE_SHIFT;

- qsmask = ((PGA_DEQUEUE | PGA_REQUEUE | PGA_REQUEUE_HEAD) <<

- VM_PAGE_AFLAG_SHIFT) | VM_PAGE_QUEUE_MASK;

- addr = (void *)&m->aflags;

- oval = atomic_load_32(addr);

- do {

- if ((oval & fflags) != 0)

- return (false);

- if ((oval & VM_PAGE_QUEUE_MASK) != oldq)

- return (false);

- nval = (oval & ~qsmask) | nflags | newq;

- } while (!atomic_fcmpset_32(addr, &oval, nval));

- return (true);

+ int ret;

+ KASSERT(new.queue == PQ_INACTIVE || (new.flags & PGA_REQUEUE_HEAD) == 0,

+ ("vm_page_astate_fcmpset: unexecpted head requeue for page %p",

+ m));

+ KASSERT((new.flags & PGA_ENQUEUED) == 0 || new.queue != PQ_NONE,

+ ("vm_page_astate_fcmpset: setting PGA_ENQUEUED without a queue"));

+ KASSERT(new._bits != old->_bits,

+ ("vm_page_astate_fcmpset: bits are not changing"));

+ ret = atomic_fcmpset_32(&m->astate._bits, &old->_bits, new._bits);

+ if (ret != 0) {

+ if (old->queue != PQ_NONE && old->queue != new.queue)

+ VM_PAGE_PAGEQUEUE_LOCK_ASSERT(m, old->queue);

+ KASSERT((new.flags & PGA_ENQUEUED) == 0 || old->queue == new.queue,

+ ("vm_page_astate_fcmpset: PGA_ENQUEUED set after queue change for page %p", m));

+ }

+ return (ret != 0);

}

@@ -858,19 +853,17 @@ vm_page_replace_checked(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex,

* vm_page_queue:

- * Return the index of the queue containing m. This index is guaranteed

- * not to change while the page lock is held.

+ * Return the index of the queue containing m.

static inline uint8_t

vm_page_queue(vm_page_t m)

{

+ vm_page_astate_t as;

- vm_page_assert_locked(m);

- if ((m->aflags & PGA_DEQUEUE) != 0)

+ as = vm_page_astate_load(m);

+ if ((as.flags & PGA_DEQUEUE) != 0)

return (PQ_NONE);

- atomic_thread_fence_acq();

- return (m->queue);

+ return (as.queue);

}

static inline bool

diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index c7f03129d070..848239eea411 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c

@@ -218,7 +218,7 @@ vm_pageout_init_scan(struct scan_state *ss, struct vm_pagequeue *pq,

{

vm_pagequeue_assert_locked(pq);

- KASSERT((marker->aflags & PGA_ENQUEUED) == 0,

+ KASSERT((vm_page_aflags(marker) & PGA_ENQUEUED) == 0,

("marker %p already enqueued", marker));

if (after == NULL)

@@ -242,7 +242,7 @@ vm_pageout_end_scan(struct scan_state *ss)

pq = ss->pq;

vm_pagequeue_assert_locked(pq);

- KASSERT((ss->marker->aflags & PGA_ENQUEUED) != 0,

+ KASSERT((vm_page_aflags(ss->marker) & PGA_ENQUEUED) != 0,

("marker %p not enqueued", ss->marker));

TAILQ_REMOVE(&pq->pq_pl, ss->marker, plinks.q);

@@ -271,7 +271,7 @@ vm_pageout_collect_batch(struct scan_state *ss, const bool dequeue)

marker = ss->marker;

pq = ss->pq;

- KASSERT((marker->aflags & PGA_ENQUEUED) != 0,

+ KASSERT((marker->astate.flags & PGA_ENQUEUED) != 0,

("marker %p not enqueued", ss->marker));

vm_pagequeue_lock(pq);

@@ -280,7 +280,7 @@ vm_pageout_collect_batch(struct scan_state *ss, const bool dequeue)

m = n, ss->scanned++) {

n = TAILQ_NEXT(m, plinks.q);

if ((m->flags & PG_MARKER) == 0) {

- KASSERT((m->aflags & PGA_ENQUEUED) != 0,

+ KASSERT((m->astate.flags & PGA_ENQUEUED) != 0,

("page %p not enqueued", m));

KASSERT((m->flags & PG_FICTITIOUS) == 0,

("Fictitious page %p cannot be in page queue", m));

@@ -370,13 +370,10 @@ more:

ib = 0;

break;

}

- vm_page_lock(p);

if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) {

- vm_page_unlock(p);

ib = 0;

break;

}

- vm_page_unlock(p);

mc[--page_base] = pb = p;

++pageout_count;

++ib;

@@ -396,12 +393,8 @@ more:

vm_page_test_dirty(p);

if (p->dirty == 0)

break;

- vm_page_lock(p);

- if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) {

- vm_page_unlock(p);

+ if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p))

break;

- }

- vm_page_unlock(p);

mc[page_base + pageout_count] = ps = p;

++pageout_count;

++is;

@@ -458,7 +451,7 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,

KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL,

("vm_pageout_flush: partially invalid page %p index %d/%d",

mc[i], i, count));

- KASSERT((mc[i]->aflags & PGA_WRITEABLE) == 0,

+ KASSERT((vm_page_aflags(mc[i]) & PGA_WRITEABLE) == 0,

("vm_pageout_flush: writeable page %p", mc[i]));

vm_page_sbusy(mc[i]);

}

@@ -577,7 +570,6 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)

vm_pindex_t pindex;

int error, lockmode;

- vm_page_assert_locked(m);

object = m->object;

VM_OBJECT_ASSERT_WLOCKED(object);

error = 0;

@@ -597,7 +589,6 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)

* of time.

if (object->type == OBJT_VNODE) {

- vm_page_unlock(m);

vp = object->handle;

if (vp->v_type == VREG &&

vn_start_write(vp, &mp, V_NOWAIT) != 0) {

@@ -627,7 +618,6 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)

error = ENOENT;

goto unlock_all;

}

- vm_page_lock(m);

* While the object and page were unlocked, the page

@@ -663,7 +653,6 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)

error = EBUSY;

goto unlock_all;

}

- vm_page_unlock(m);

* If a page is dirty, then it is either being washed

@@ -699,14 +688,13 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall)

{

struct scan_state ss;

struct vm_pagequeue *pq;

- struct mtx *mtx;

vm_object_t object;

vm_page_t m, marker;

- int act_delta, error, numpagedout, queue, starting_target;

+ vm_page_astate_t old, new;

+ int act_delta, error, numpagedout, queue, refs, starting_target;

int vnodes_skipped;

bool pageout_ok;

- mtx = NULL;

object = NULL;

starting_target = launder;

vnodes_skipped = 0;

@@ -734,77 +722,45 @@ scan:

if (__predict_false((m->flags & PG_MARKER) != 0))

continue;

- vm_page_change_lock(m, &mtx);

-recheck:

- * The page may have been disassociated from the queue

- * or even freed while locks were dropped. We thus must be

- * careful whenever modifying page state. Once the object lock

- * has been acquired, we have a stable reference to the page.

+ * Perform some quick and racy checks of the page's queue state.

+ * Bail if things are not as we expect.

- if (vm_page_queue(m) != queue)

+ old = vm_page_astate_load(m);

+ if (old.queue != PQ_LAUNDRY || (old.flags & PGA_ENQUEUED) == 0)

continue;

- /*

- * A requeue was requested, so this page gets a second

- * chance.

- */

- if ((m->aflags & PGA_REQUEUE) != 0) {

+ if ((old.flags & PGA_QUEUE_OP_MASK) != 0) {

vm_page_pqbatch_submit(m, queue);

continue;

}

- /*

- * Wired pages may not be freed. Complete their removal

- * from the queue now to avoid needless revisits during

- * future scans. This check is racy and must be reverified once

- * we hold the object lock and have verified that the page

- * is not busy.

- */

- if (vm_page_wired(m)) {

- vm_page_dequeue_deferred(m);

- continue;

- }

if (object != m->object) {

if (object != NULL)

VM_OBJECT_WUNLOCK(object);

- /*

- * A page's object pointer may be set to NULL before

- * the object lock is acquired.

- */

object = (vm_object_t)atomic_load_ptr(&m->object);

- if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) {

- mtx_unlock(mtx);

- /* Depends on type-stability. */

- VM_OBJECT_WLOCK(object);

- mtx_lock(mtx);

- goto recheck;

+ if (object == NULL)

+ continue;

+ VM_OBJECT_WLOCK(object);

+ if (m->object != object) {

+ VM_OBJECT_WUNLOCK(object);

+ object = NULL;

+ continue;

}

- if (__predict_false(m->object == NULL))

- /*

- * The page has been removed from its object.

- */

- continue;

- KASSERT(m->object == object, ("page %p does not belong to %p",

- m, object));

if (vm_page_busied(m))

continue;

- * Re-check for wirings now that we hold the object lock and

- * have verified that the page is unbusied. If the page is

- * mapped, it may still be wired by pmap lookups. The call to

+ * Check for wirings now that we hold the object lock and have

+ * verified that the page is unbusied. If the page is mapped,

+ * it may still be wired by pmap lookups. The call to

* vm_page_try_remove_all() below atomically checks for such

* wirings and removes mappings. If the page is unmapped, the

* wire count is guaranteed not to increase.

if (__predict_false(vm_page_wired(m))) {

- vm_page_dequeue_deferred(m);

+ vm_page_pqbatch_submit(m, queue);

continue;

}

@@ -824,46 +780,64 @@ recheck:

* that a reference from a concurrently destroyed mapping is

* observed here and now.

- if (object->ref_count != 0)

- act_delta = pmap_ts_referenced(m);

- else {

- KASSERT(!pmap_page_is_mapped(m),

- ("page %p is mapped", m));

- act_delta = 0;

- }

- if ((m->aflags & PGA_REFERENCED) != 0) {

- vm_page_aflag_clear(m, PGA_REFERENCED);

- act_delta++;

- }

- if (act_delta != 0) {

- if (object->ref_count != 0) {

- VM_CNT_INC(v_reactivated);

- vm_page_activate(m);

+ refs = object->ref_count != 0 ? pmap_ts_referenced(m) : 0;

- /*

- * Increase the activation count if the page

- * was referenced while in the laundry queue.

- * This makes it less likely that the page will

- * be returned prematurely to the inactive

- * queue.

- */

- m->act_count += act_delta + ACT_ADVANCE;

+ for (old = vm_page_astate_load(m);;) {

+ if (old.queue != queue ||

+ (old.flags & PGA_ENQUEUED) == 0)

+ goto next_page;

- /*

- * If this was a background laundering, count

- * activated pages towards our target. The

- * purpose of background laundering is to ensure

- * that pages are eventually cycled through the

- * laundry queue, and an activation is a valid

- * way out.

- */

- if (!in_shortfall)

- launder--;

- continue;

- } else if ((object->flags & OBJ_DEAD) == 0) {

- vm_page_requeue(m);

- continue;

+ if ((old.flags & PGA_QUEUE_OP_MASK) != 0) {

+ vm_page_pqbatch_submit(m, queue);

+ goto next_page;

+ }

+ new = old;

+ act_delta = refs;

+ if ((old.flags & PGA_REFERENCED) != 0) {

+ new.flags &= ~PGA_REFERENCED;

+ act_delta++;

+ }

+ if (act_delta != 0) {

+ if (object->ref_count != 0) {

+ /*

+ * Increase the activation count if the

+ * page was referenced while in the

+ * laundry queue. This makes it less

+ * likely that the page will be returned

+ * prematurely to the inactive queue.

+ */

+ new.act_count += ACT_ADVANCE +

+ act_delta;

+ if (new.act_count > ACT_MAX)

+ new.act_count = ACT_MAX;

+ new.flags |= PGA_REQUEUE;

+ new.queue = PQ_ACTIVE;

+ if (!vm_page_pqstate_commit(m, &old,

+ new))

+ continue;

+ VM_CNT_INC(v_reactivated);

+ /*

+ * If this was a background laundering,

+ * count activated pages towards our

+ * target. The purpose of background

+ * laundering is to ensure that pages

+ * are eventually cycled through the

+ * laundry queue, and an activation is a

+ * valid way out.

+ */

+ if (!in_shortfall)

+ launder--;

+ goto next_page;

+ } else if ((object->flags & OBJ_DEAD) == 0) {

+ vm_page_launder(m);

+ goto next_page;

+ }

}

+ break;

}

@@ -876,7 +850,7 @@ recheck:

if (object->ref_count != 0) {

vm_page_test_dirty(m);

if (m->dirty == 0 && !vm_page_try_remove_all(m)) {

- vm_page_dequeue_deferred(m);

+ vm_page_pqbatch_submit(m, queue);

continue;

}

@@ -900,7 +874,7 @@ free_page:

else

pageout_ok = true;

if (!pageout_ok) {

- vm_page_requeue(m);

+ vm_page_launder(m);

continue;

}

@@ -925,13 +899,9 @@ free_page:

pageout_lock_miss++;

vnodes_skipped++;

}

- mtx = NULL;

object = NULL;

}

- }

- if (mtx != NULL) {

- mtx_unlock(mtx);

- mtx = NULL;

+next_page:;

}

if (object != NULL) {

VM_OBJECT_WUNLOCK(object);

@@ -1169,12 +1139,13 @@ static void

vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage)

{

struct scan_state ss;

- struct mtx *mtx;

vm_object_t object;

vm_page_t m, marker;

+ vm_page_astate_t old, new;

struct vm_pagequeue *pq;

long min_scan;

- int act_delta, max_scan, scan_tick;

+ int act_delta, max_scan, ps_delta, refs, scan_tick;

+ uint8_t nqueue;

marker = &vmd->vmd_markers[PQ_ACTIVE];

pq = &vmd->vmd_pagequeues[PQ_ACTIVE];

@@ -1208,7 +1179,6 @@ vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage)

* and scanning resumes.

max_scan = page_shortage > 0 ? pq->pq_cnt : min_scan;

- mtx = NULL;

act_scan:

vm_pageout_init_scan(&ss, pq, marker, &vmd->vmd_clock[0], max_scan);

while ((m = vm_pageout_next(&ss, false)) != NULL) {

@@ -1227,29 +1197,6 @@ act_scan:

if (__predict_false((m->flags & PG_MARKER) != 0))

continue;

- vm_page_change_lock(m, &mtx);

- /*

- * The page may have been disassociated from the queue

- * or even freed while locks were dropped. We thus must be

- * careful whenever modifying page state. Once the object lock

- * has been acquired, we have a stable reference to the page.

- */

- if (vm_page_queue(m) != PQ_ACTIVE)

- continue;

- /*

- * Wired pages are dequeued lazily.

- */

- if (vm_page_wired(m)) {

- vm_page_dequeue_deferred(m);

- continue;

- }

- /*

- * A page's object pointer may be set to NULL before

- * the object lock is acquired.

- */

object = (vm_object_t)atomic_load_ptr(&m->object);

if (__predict_false(object == NULL))

@@ -1264,80 +1211,104 @@ act_scan:

* that a reference from a concurrently destroyed mapping is

* observed here and now.

- * Perform an unsynchronized object ref count check. While

- * the page lock ensures that the page is not reallocated to

- * another object, in particular, one with unmanaged mappings

- * that cannot support pmap_ts_referenced(), two races are,

+ * Perform an unsynchronized object ref count check. While the

+ * page lock ensures that the page is not reallocated to another

+ * object, in particular, one with unmanaged mappings that

+ * cannot support pmap_ts_referenced(), two races are,

* nonetheless, possible:

+ *

* 1) The count was transitioning to zero, but we saw a non-

- * zero value. pmap_ts_referenced() will return zero

- * because the page is not mapped.

- * 2) The count was transitioning to one, but we saw zero.

- * This race delays the detection of a new reference. At

- * worst, we will deactivate and reactivate the page.

+ * zero value. pmap_ts_referenced() will return zero because

+ * the page is not mapped.

+ * 2) The count was transitioning to one, but we saw zero. This

+ * race delays the detection of a new reference. At worst,

+ * we will deactivate and reactivate the page.

- if (object->ref_count != 0)

- act_delta = pmap_ts_referenced(m);

- else

- act_delta = 0;

- if ((m->aflags & PGA_REFERENCED) != 0) {

- vm_page_aflag_clear(m, PGA_REFERENCED);

- act_delta++;

- }

+ refs = object->ref_count != 0 ? pmap_ts_referenced(m) : 0;

- /*

- * Advance or decay the act_count based on recent usage.

- */

- if (act_delta != 0) {

- m->act_count += ACT_ADVANCE + act_delta;

- if (m->act_count > ACT_MAX)

- m->act_count = ACT_MAX;

- } else

- m->act_count -= min(m->act_count, ACT_DECLINE);

+ for (old = vm_page_astate_load(m);;) {

+ if (old.queue != PQ_ACTIVE ||

+ (old.flags & PGA_ENQUEUED) == 0)

+ /*

+ * Something has moved the page out of the

+ * active queue. Don't touch it.

+ */

+ break;

+ if ((old.flags & PGA_DEQUEUE) != 0) {

+ vm_page_pqbatch_submit(m, PQ_ACTIVE);

+ break;

+ }

+ new = old;

+ act_delta = refs;

+ if ((old.flags & PGA_REFERENCED) != 0) {

+ new.flags &= ~PGA_REFERENCED;

+ act_delta++;

+ }

- if (m->act_count == 0) {

- * When not short for inactive pages, let dirty pages go

- * through the inactive queue before moving to the

- * laundry queues. This gives them some extra time to

- * be reactivated, potentially avoiding an expensive

- * pageout. However, during a page shortage, the

- * inactive queue is necessarily small, and so dirty

- * pages would only spend a trivial amount of time in

- * the inactive queue. Therefore, we might as well

- * place them directly in the laundry queue to reduce

- * queuing overhead.

+ * Advance or decay the act_count based on recent usage.

- if (page_shortage <= 0) {

- vm_page_swapqueue(m, PQ_ACTIVE, PQ_INACTIVE);

+ if (act_delta != 0) {

+ new.act_count += ACT_ADVANCE + act_delta;

+ if (new.act_count > ACT_MAX)

+ new.act_count = ACT_MAX;

+ } else {

+ new.act_count -= min(new.act_count, ACT_DECLINE);

+ }

+ if (new.act_count > 0) {

+ /*

+ * Adjust the activation count and keep the page

+ * in the active queue. The count might be left

+ * unchanged if it is saturated.

+ */

+ if (new.act_count == old.act_count ||

+ vm_page_astate_fcmpset(m, &old, new))

+ break;

} else {

+ * When not short for inactive pages, let dirty

+ * pages go through the inactive queue before

+ * moving to the laundry queues. This gives

+ * them some extra time to be reactivated,

+ * potentially avoiding an expensive pageout.

+ * However, during a page shortage, the inactive

+ * queue is necessarily small, and so dirty

+ * pages would only spend a trivial amount of

+ * time in the inactive queue. Therefore, we

+ * might as well place them directly in the

+ * laundry queue to reduce queuing overhead.

+ *

* Calling vm_page_test_dirty() here would

* require acquisition of the object's write

* lock. However, during a page shortage,

- * directing dirty pages into the laundry

- * queue is only an optimization and not a

+ * directing dirty pages into the laundry queue

+ * is only an optimization and not a

* requirement. Therefore, we simply rely on

- * the opportunistic updates to the page's

- * dirty field by the pmap.

+ * the opportunistic updates to the page's dirty

+ * field by the pmap.

- if (m->dirty == 0) {

- vm_page_swapqueue(m, PQ_ACTIVE,

- PQ_INACTIVE);

- page_shortage -=

- act_scan_laundry_weight;

+ if (page_shortage <= 0) {

+ nqueue = PQ_INACTIVE;

+ ps_delta = 0;

+ } else if (m->dirty == 0) {

+ nqueue = PQ_INACTIVE;

+ ps_delta = act_scan_laundry_weight;

} else {

- vm_page_swapqueue(m, PQ_ACTIVE,

- PQ_LAUNDRY);

- page_shortage--;

+ nqueue = PQ_LAUNDRY;

+ ps_delta = 1;

+ }

+ new.flags |= PGA_REQUEUE;

+ new.queue = nqueue;

+ if (vm_page_pqstate_commit(m, &old, new)) {

+ page_shortage -= ps_delta;

+ break;

}

- if (mtx != NULL) {

- mtx_unlock(mtx);

- mtx = NULL;

- }

vm_pagequeue_lock(pq);

TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[0], plinks.q);

TAILQ_INSERT_AFTER(&pq->pq_pl, marker, &vmd->vmd_clock[0], plinks.q);

@@ -1349,20 +1320,30 @@ static int

vm_pageout_reinsert_inactive_page(struct scan_state *ss, vm_page_t m)

{

struct vm_domain *vmd;

+ vm_page_astate_t old, new;

- if (m->queue != PQ_INACTIVE || (m->aflags & PGA_ENQUEUED) != 0)

- return (0);

- vm_page_aflag_set(m, PGA_ENQUEUED);

- if ((m->aflags & PGA_REQUEUE_HEAD) != 0) {

- vmd = vm_pagequeue_domain(m);

- TAILQ_INSERT_BEFORE(&vmd->vmd_inacthead, m, plinks.q);

- vm_page_aflag_clear(m, PGA_REQUEUE | PGA_REQUEUE_HEAD);

- } else if ((m->aflags & PGA_REQUEUE) != 0) {

- TAILQ_INSERT_TAIL(&ss->pq->pq_pl, m, plinks.q);

- vm_page_aflag_clear(m, PGA_REQUEUE | PGA_REQUEUE_HEAD);

- } else

- TAILQ_INSERT_BEFORE(ss->marker, m, plinks.q);

- return (1);

+ for (old = vm_page_astate_load(m);;) {

+ if (old.queue != PQ_INACTIVE ||

+ (old.flags & (PGA_DEQUEUE | PGA_ENQUEUED)) != 0)

+ break;

+ new = old;

+ new.flags |= PGA_ENQUEUED;

+ new.flags &= ~(PGA_REQUEUE | PGA_REQUEUE_HEAD);

+ if (!vm_page_astate_fcmpset(m, &old, new))

+ continue;

+ if ((old.flags & PGA_REQUEUE_HEAD) != 0) {

+ vmd = vm_pagequeue_domain(m);

+ TAILQ_INSERT_BEFORE(&vmd->vmd_inacthead, m, plinks.q);

+ } else if ((old.flags & PGA_REQUEUE) != 0) {

+ TAILQ_INSERT_TAIL(&ss->pq->pq_pl, m, plinks.q);

+ } else {

+ TAILQ_INSERT_BEFORE(ss->marker, m, plinks.q);

+ }

+ return (1);

+ }

+ return (0);

}

@@ -1405,11 +1386,11 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage,

{

struct scan_state ss;

struct vm_batchqueue rq;

- struct mtx *mtx;

vm_page_t m, marker;

+ vm_page_astate_t old, new;

struct vm_pagequeue *pq;

vm_object_t object;

- int act_delta, addl_page_shortage, deficit, page_shortage;

+ int act_delta, addl_page_shortage, deficit, page_shortage, refs;

int starting_page_shortage;

@@ -1429,7 +1410,6 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage,

deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);

starting_page_shortage = page_shortage = shortage + deficit;

- mtx = NULL;

object = NULL;

vm_batchqueue_init(&rq);

@@ -1447,65 +1427,31 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage,

KASSERT((m->flags & PG_MARKER) == 0,

("marker page %p was dequeued", m));

- vm_page_change_lock(m, &mtx);

-recheck:

- * The page may have been disassociated from the queue

- * or even freed while locks were dropped. We thus must be

- * careful whenever modifying page state. Once the object lock

- * has been acquired, we have a stable reference to the page.

+ * Perform some quick and racy checks of the page's queue state.

+ * Bail if things are not as we expect.

- if (vm_page_queue(m) != PQ_INACTIVE) {

- addl_page_shortage++;

+ old = vm_page_astate_load(m);

+ if (old.queue != PQ_INACTIVE || (old.flags & PGA_ENQUEUED) != 0)

continue;

- }

- /*

- * The page was re-enqueued after the page queue lock was

- * dropped, or a requeue was requested. This page gets a second

- * chance.

- */

- if ((m->aflags & (PGA_ENQUEUED | PGA_REQUEUE |

- PGA_REQUEUE_HEAD)) != 0)

- goto reinsert;

- /*

- * Wired pages may not be freed. Complete their removal

- * from the queue now to avoid needless revisits during

- * future scans. This check is racy and must be reverified once

- * we hold the object lock and have verified that the page

- * is not busy.

- */

- if (vm_page_wired(m)) {

- vm_page_dequeue_deferred(m);

+ if ((old.flags & PGA_QUEUE_OP_MASK) != 0) {

+ vm_page_pqbatch_submit(m, PQ_INACTIVE);

continue;

}

if (object != m->object) {

if (object != NULL)

VM_OBJECT_WUNLOCK(object);

- /*

- * A page's object pointer may be set to NULL before

- * the object lock is acquired.

- */

object = (vm_object_t)atomic_load_ptr(&m->object);

- if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) {

- mtx_unlock(mtx);

- /* Depends on type-stability. */

- VM_OBJECT_WLOCK(object);

- mtx_lock(mtx);

- goto recheck;

+ if (object == NULL)

+ continue;

+ VM_OBJECT_WLOCK(object);

+ if (m->object != object) {

+ VM_OBJECT_WUNLOCK(object);

+ object = NULL;

+ goto reinsert;

}

- if (__predict_false(m->object == NULL))

- /*

- * The page has been removed from its object.

- */

- continue;

- KASSERT(m->object == object, ("page %p does not belong to %p",

- m, object));

if (vm_page_busied(m)) {

@@ -1521,15 +1467,15 @@ recheck:

}

- * Re-check for wirings now that we hold the object lock and

- * have verified that the page is unbusied. If the page is

- * mapped, it may still be wired by pmap lookups. The call to

+ * Check for wirings now that we hold the object lock and have

+ * verified that the page is unbusied. If the page is mapped,

+ * it may still be wired by pmap lookups. The call to

* vm_page_try_remove_all() below atomically checks for such

* wirings and removes mappings. If the page is unmapped, the

* wire count is guaranteed not to increase.

if (__predict_false(vm_page_wired(m))) {

- vm_page_dequeue_deferred(m);

+ vm_page_pqbatch_submit(m, PQ_INACTIVE);

continue;

}

@@ -1549,35 +1495,52 @@ recheck:

* that a reference from a concurrently destroyed mapping is

* observed here and now.

- if (object->ref_count != 0)

- act_delta = pmap_ts_referenced(m);

- else {

- KASSERT(!pmap_page_is_mapped(m),

- ("page %p is mapped", m));

- act_delta = 0;

- }

- if ((m->aflags & PGA_REFERENCED) != 0) {

- vm_page_aflag_clear(m, PGA_REFERENCED);

- act_delta++;

- }

- if (act_delta != 0) {

- if (object->ref_count != 0) {

- VM_CNT_INC(v_reactivated);

- vm_page_activate(m);

+ refs = object->ref_count != 0 ? pmap_ts_referenced(m) : 0;

- /*

- * Increase the activation count if the page

- * was referenced while in the inactive queue.

- * This makes it less likely that the page will

- * be returned prematurely to the inactive

- * queue.

- */

- m->act_count += act_delta + ACT_ADVANCE;

- continue;

- } else if ((object->flags & OBJ_DEAD) == 0) {

- vm_page_aflag_set(m, PGA_REQUEUE);

- goto reinsert;

+ for (old = vm_page_astate_load(m);;) {

+ if (old.queue != PQ_INACTIVE ||

+ (old.flags & PGA_ENQUEUED) != 0)

+ goto next_page;

+ if ((old.flags & PGA_QUEUE_OP_MASK) != 0) {

+ vm_page_pqbatch_submit(m, PQ_INACTIVE);

+ goto next_page;

}

+ new = old;

+ act_delta = refs;

+ if ((old.flags & PGA_REFERENCED) != 0) {

+ new.flags &= ~PGA_REFERENCED;

+ act_delta++;

+ }

+ if (act_delta != 0) {

+ if (object->ref_count != 0) {

+ /*

+ * Increase the activation count if the

+ * page was referenced while in the

+ * inactive queue. This makes it less

+ * likely that the page will be returned

+ * prematurely to the inactive queue.

+ */

+ new.act_count += ACT_ADVANCE +

+ act_delta;

+ if (new.act_count > ACT_MAX)

+ new.act_count = ACT_MAX;

+ new.flags |= PGA_REQUEUE;

+ new.queue = PQ_ACTIVE;

+ if (!vm_page_pqstate_commit(m, &old,

+ new))

+ continue;

+ VM_CNT_INC(v_reactivated);

+ goto next_page;

+ } else if ((object->flags & OBJ_DEAD) == 0) {

+ vm_page_aflag_set(m, PGA_REQUEUE);

+ goto reinsert;

+ }

+ break;

}

@@ -1590,7 +1553,7 @@ recheck:

if (object->ref_count != 0) {

vm_page_test_dirty(m);

if (m->dirty == 0 && !vm_page_try_remove_all(m)) {

- vm_page_dequeue_deferred(m);

+ vm_page_pqbatch_submit(m, PQ_INACTIVE);

continue;

}

@@ -1604,25 +1567,30 @@ recheck:

if (m->dirty == 0) {

free_page:

+ /* XXX comment */

+ old = vm_page_astate_load(m);

+ if (old.queue != PQ_INACTIVE ||

+ (old.flags & PGA_QUEUE_STATE_MASK) != 0) {

+ vm_page_pqbatch_submit(m, PQ_INACTIVE);

+ goto next_page;

+ }

* Because we dequeued the page and have already

* checked for concurrent dequeue and enqueue

* requests, we can safely disassociate the page

* from the inactive queue.

- KASSERT((m->aflags & PGA_QUEUE_STATE_MASK) == 0,

- ("page %p has queue state", m));

- m->queue = PQ_NONE;

+ m->astate.queue = PQ_NONE;

vm_page_free(m);

page_shortage--;

} else if ((object->flags & OBJ_DEAD) == 0)

vm_page_launder(m);

+next_page:

continue;

reinsert:

vm_pageout_reinsert_inactive(&ss, &rq, m);

}

- if (mtx != NULL)

- mtx_unlock(mtx);

if (object != NULL)

VM_OBJECT_WUNLOCK(object);

vm_pageout_reinsert_inactive(&ss, &rq, NULL);

diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h
index ba5e77ce6c8d..b3e244755a05 100644
--- a/sys/vm/vm_pagequeue.h
+++ b/sys/vm/vm_pagequeue.h

@@ -202,6 +202,8 @@ static inline void

vm_pagequeue_remove(struct vm_pagequeue *pq, vm_page_t m)

{

+ vm_pagequeue_assert_locked(pq);

TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);

vm_pagequeue_cnt_dec(pq);

}

@@ -249,6 +251,22 @@ vm_pagequeue_domain(vm_page_t m)

return (VM_DOMAIN(vm_phys_domain(m)));

}

+static inline struct vm_pagequeue *

+_vm_page_pagequeue(vm_page_t m, uint8_t queue)

+ if (queue == PQ_NONE)

+ return (NULL);

+ return (&vm_pagequeue_domain(m)->vmd_pagequeues[queue]);

+static inline struct vm_pagequeue *

+vm_page_pagequeue(vm_page_t m)

+ return (_vm_page_pagequeue(m, atomic_load_8(&m->astate.queue)));

* Return the number of pages we need to free-up or cache

* A positive number indicates that we do not have enough free pages.

diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c
index d71c3d07a0d9..e45034348dec 100644
--- a/sys/vm/vm_swapout.c
+++ b/sys/vm/vm_swapout.c

@@ -108,8 +108,9 @@ __FBSDID("$FreeBSD$");

#include <vm/vm_page.h>

#include <vm/vm_map.h>

#include <vm/vm_pageout.h>

-#include <vm/vm_pager.h>

#include <vm/vm_phys.h>

+#include <vm/vm_pagequeue.h>

+#include <vm/vm_pager.h>

#include <vm/swap_pager.h>

#include <vm/vm_extern.h>

#include <vm/uma.h>

@@ -170,6 +171,56 @@ static void swapout_procs(int action);

static void vm_req_vmdaemon(int req);

static void vm_thread_swapout(struct thread *td);

+static void

+vm_swapout_object_deactivate_page(vm_page_t m, int remove_mode)

+ vm_page_astate_t old, new;

+ int act_delta, refs;

+ refs = pmap_ts_referenced(m);

+ for (old = vm_page_astate_load(m);;) {

+ if ((old.flags & PGA_DEQUEUE) != 0)

+ break;

+ act_delta = refs;

+ if ((old.flags & PGA_REFERENCED) != 0) {

+ new.flags &= ~PGA_REFERENCED;

+ act_delta++;

+ }

+ if (old.queue != PQ_ACTIVE && act_delta != 0) {

+ if (new.act_count == ACT_MAX)

+ break;

+ new.act_count += act_delta;

+ new.flags |= PGA_REQUEUE;

+ new.queue = PQ_ACTIVE;

+ if (vm_page_pqstate_commit(m, &old, new))

+ break;

+ } else if (old.queue == PQ_ACTIVE) {

+ if (act_delta == 0) {

+ new.act_count -= min(new.act_count,

+ ACT_DECLINE);

+ if (!remove_mode && new.act_count == 0) {

+ (void)vm_page_try_remove_all(m);

+ new.flags |= PGA_REQUEUE;

+ new.queue = PQ_INACTIVE;

+ }

+ if (vm_page_pqstate_commit(m, &old, new))

+ break;

+ } else {

+ if (new.act_count < ACT_MAX - ACT_ADVANCE)

+ new.act_count += ACT_ADVANCE;

+ if (vm_page_astate_fcmpset(m, &old, new))

+ break;

+ }

+ } else {

+ (void)vm_page_try_remove_all(m);

+ }

* vm_swapout_object_deactivate_pages

@@ -184,7 +235,7 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,

{

vm_object_t backing_object, object;

vm_page_t p;

- int act_delta, remove_mode;

+ int remove_mode;

VM_OBJECT_ASSERT_LOCKED(first_object);

if ((first_object->flags & OBJ_FICTITIOUS) != 0)

@@ -220,37 +271,8 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,

VM_CNT_INC(v_pdpages);

if (!pmap_page_exists_quick(pmap, p))

continue;

- act_delta = pmap_ts_referenced(p);

- vm_page_lock(p);

- if ((p->aflags & PGA_REFERENCED) != 0) {

- if (act_delta == 0)

- act_delta = 1;

- vm_page_aflag_clear(p, PGA_REFERENCED);

- }

- if (!vm_page_active(p) && act_delta != 0) {

- vm_page_activate(p);

- p->act_count += act_delta;

- } else if (vm_page_active(p)) {

- /*

- * The page daemon does not requeue pages

- * after modifying their activation count.

- */

- if (act_delta == 0) {

- p->act_count -= min(p->act_count,

- ACT_DECLINE);

- if (!remove_mode && p->act_count == 0) {

- (void)vm_page_try_remove_all(p);

- vm_page_deactivate(p);

- }

- } else {

- vm_page_activate(p);

- if (p->act_count < ACT_MAX -

- ACT_ADVANCE)

- p->act_count += ACT_ADVANCE;

- }

- } else if (vm_page_inactive(p))

- (void)vm_page_try_remove_all(p);

- vm_page_unlock(p);

+ vm_swapout_object_deactivate_page(p, remove_mode);

}

if ((backing_object = object->backing_object) == NULL)

goto unlock_return;