aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorJeff Roberson <jeff@FreeBSD.org>2018-02-06 22:10:07 +0000
committerJeff Roberson <jeff@FreeBSD.org>2018-02-06 22:10:07 +0000
commite2068d0bcd95865d142f0657ed834146ddad9754 (patch)
treec718f937c03962e717d7260e38f7e5fba9c76936 /sys
parent1616767dfc210635ce16ea6e3659e3569be57515 (diff)
downloadsrc-e2068d0bcd95865d142f0657ed834146ddad9754.tar.gz
src-e2068d0bcd95865d142f0657ed834146ddad9754.zip
Use per-domain locks for vm page queue free. Move paging control from
global to per-domain state. Protect reservations with the free lock from the domain that they belong to. Refactor to make vm domains more of a first class object. Reviewed by: markj, kib, gallatin Tested by: pho Sponsored by: Netflix, Dell/EMC Isilon Differential Revision: https://reviews.freebsd.org/D14000
Notes
Notes: svn path=/head/; revision=328954
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/machdep.c6
-rw-r--r--sys/arm/arm/machdep.c4
-rw-r--r--sys/arm/arm/pmap-v4.c2
-rw-r--r--sys/cddl/compat/opensolaris/sys/kmem.h2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c2
-rw-r--r--sys/compat/linprocfs/linprocfs.c4
-rw-r--r--sys/fs/tmpfs/tmpfs_subr.c2
-rw-r--r--sys/i386/i386/machdep.c6
-rw-r--r--sys/kern/init_main.c3
-rw-r--r--sys/kern/subr_vmem.c5
-rw-r--r--sys/kern/subr_witness.c2
-rw-r--r--sys/mips/mips/machdep.c4
-rw-r--r--sys/powerpc/booke/pmap.c2
-rw-r--r--sys/powerpc/powerpc/machdep.c4
-rw-r--r--sys/sparc64/sparc64/machdep.c4
-rw-r--r--sys/sys/vmmeter.h58
-rw-r--r--sys/vm/swap_pager.c2
-rw-r--r--sys/vm/uma_core.c2
-rw-r--r--sys/vm/vm_extern.h4
-rw-r--r--sys/vm/vm_glue.c2
-rw-r--r--sys/vm/vm_init.c1
-rw-r--r--sys/vm/vm_kern.c19
-rw-r--r--sys/vm/vm_map.c2
-rw-r--r--sys/vm/vm_meter.c84
-rw-r--r--sys/vm/vm_object.c2
-rw-r--r--sys/vm/vm_object.h11
-rw-r--r--sys/vm/vm_page.c528
-rw-r--r--sys/vm/vm_page.h44
-rw-r--r--sys/vm/vm_pageout.c281
-rw-r--r--sys/vm/vm_pageout.h9
-rw-r--r--sys/vm/vm_pagequeue.h235
-rw-r--r--sys/vm/vm_phys.c14
-rw-r--r--sys/vm/vm_phys.h25
-rw-r--r--sys/vm/vm_reserv.c498
-rw-r--r--sys/vm/vm_reserv.h6
-rw-r--r--sys/vm/vm_swapout.c2
-rw-r--r--sys/vm/vnode_pager.c2
37 files changed, 1260 insertions, 623 deletions
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 6f5b0a6bddbe..e340c6cd14dc 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -282,7 +282,7 @@ cpu_startup(dummy)
memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
freeenv(sysenv);
}
- if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count))
+ if (memsize < ptoa((uintmax_t)vm_free_count()))
memsize = ptoa((uintmax_t)Maxmem);
printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20);
realmem = atop(memsize);
@@ -309,8 +309,8 @@ cpu_startup(dummy)
vm_ksubmap_init(&kmi);
printf("avail memory = %ju (%ju MB)\n",
- ptoa((uintmax_t)vm_cnt.v_free_count),
- ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
+ ptoa((uintmax_t)vm_free_count()),
+ ptoa((uintmax_t)vm_free_count()) / 1048576);
/*
* Set up buffers, so they can be used to read disk labels.
diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c
index 4fdb37ef518b..491850130ba7 100644
--- a/sys/arm/arm/machdep.c
+++ b/sys/arm/arm/machdep.c
@@ -228,8 +228,8 @@ cpu_startup(void *dummy)
(uintmax_t)arm32_ptob(realmem),
(uintmax_t)arm32_ptob(realmem) / mbyte);
printf("avail memory = %ju (%ju MB)\n",
- (uintmax_t)arm32_ptob(vm_cnt.v_free_count),
- (uintmax_t)arm32_ptob(vm_cnt.v_free_count) / mbyte);
+ (uintmax_t)arm32_ptob(vm_free_count()),
+ (uintmax_t)arm32_ptob(vm_free_count()) / mbyte);
if (bootverbose) {
arm_physmem_print_tables();
devmap_print_table();
diff --git a/sys/arm/arm/pmap-v4.c b/sys/arm/arm/pmap-v4.c
index 748d9d232ac8..27c235cb403b 100644
--- a/sys/arm/arm/pmap-v4.c
+++ b/sys/arm/arm/pmap-v4.c
@@ -3817,7 +3817,7 @@ pmap_get_pv_entry(void)
pv_entry_count++;
if (pv_entry_count > pv_entry_high_water)
- pagedaemon_wakeup();
+ pagedaemon_wakeup(0); /* XXX ARM NUMA */
ret_value = uma_zalloc(pvzone, M_NOWAIT);
return ret_value;
}
diff --git a/sys/cddl/compat/opensolaris/sys/kmem.h b/sys/cddl/compat/opensolaris/sys/kmem.h
index 7b5c4f9956b3..dab0dc1dc9b3 100644
--- a/sys/cddl/compat/opensolaris/sys/kmem.h
+++ b/sys/cddl/compat/opensolaris/sys/kmem.h
@@ -78,7 +78,7 @@ void kmem_reap(void);
int kmem_debugging(void);
void *calloc(size_t n, size_t s);
-#define freemem vm_cnt.v_free_count
+#define freemem vm_free_count()
#define minfree vm_cnt.v_free_min
#define heap_arena kernel_arena
#define zio_arena NULL
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
index e6a87ff0f342..096fcff214e4 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -379,7 +379,7 @@ static void
arc_free_target_init(void *unused __unused)
{
- zfs_arc_free_target = vm_pageout_wakeup_thresh;
+ zfs_arc_free_target = (vm_cnt.v_free_min / 10) * 11;
}
SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
arc_free_target_init, NULL);
diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c
index 686d417d00fc..c277d7d30baf 100644
--- a/sys/compat/linprocfs/linprocfs.c
+++ b/sys/compat/linprocfs/linprocfs.c
@@ -156,7 +156,7 @@ linprocfs_domeminfo(PFS_FILL_ARGS)
/*
* The correct thing here would be:
*
- memfree = vm_cnt.v_free_count * PAGE_SIZE;
+ memfree = vm_free_count() * PAGE_SIZE;
memused = memtotal - memfree;
*
* but it might mislead linux binaries into thinking there
@@ -178,7 +178,7 @@ linprocfs_domeminfo(PFS_FILL_ARGS)
* like unstaticizing it just for linprocfs's sake.
*/
buffers = 0;
- cached = vm_cnt.v_inactive_count * PAGE_SIZE;
+ cached = vm_inactive_count() * PAGE_SIZE;
sbuf_printf(sb,
"MemTotal: %9lu kB\n"
diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c
index 9ebcb7ae84a3..6b7e4351d8d5 100644
--- a/sys/fs/tmpfs/tmpfs_subr.c
+++ b/sys/fs/tmpfs/tmpfs_subr.c
@@ -106,7 +106,7 @@ tmpfs_mem_avail(void)
{
vm_ooffset_t avail;
- avail = swap_pager_avail + vm_cnt.v_free_count - tmpfs_pages_reserved;
+ avail = swap_pager_avail + vm_free_count() - tmpfs_pages_reserved;
if (__predict_false(avail < 0))
avail = 0;
return (avail);
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 162d9d145dcf..b70f9f799d73 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -271,7 +271,7 @@ cpu_startup(dummy)
memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
freeenv(sysenv);
}
- if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count))
+ if (memsize < ptoa((uintmax_t)vm_free_count()))
memsize = ptoa((uintmax_t)Maxmem);
printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20);
realmem = atop(memsize);
@@ -298,8 +298,8 @@ cpu_startup(dummy)
vm_ksubmap_init(&kmi);
printf("avail memory = %ju (%ju MB)\n",
- ptoa((uintmax_t)vm_cnt.v_free_count),
- ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
+ ptoa((uintmax_t)vm_free_count()),
+ ptoa((uintmax_t)vm_free_count()) / 1048576);
/*
* Set up buffers, so they can be used to read disk labels.
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 397bc99452eb..ac856d10fab9 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -87,6 +87,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/vm_param.h>
+#include <vm/vm_extern.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <sys/copyright.h>
@@ -555,7 +556,7 @@ proc0_init(void *dummy __unused)
p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz;
p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz;
/* Cast to avoid overflow on i386/PAE. */
- pageablemem = ptoa((vm_paddr_t)vm_cnt.v_free_count);
+ pageablemem = ptoa((vm_paddr_t)vm_free_count());
p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur =
p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem;
p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3;
diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c
index 1078d041393b..f109d1124d10 100644
--- a/sys/kern/subr_vmem.c
+++ b/sys/kern/subr_vmem.c
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <sys/vmem.h>
+#include <sys/vmmeter.h>
#include "opt_vm.h"
@@ -72,6 +73,8 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_param.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/uma_int.h>
int vmem_startup_count(void);
@@ -644,7 +647,7 @@ vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
* possible due to M_USE_RESERVE page allocation.
*/
if (wait & M_WAITOK)
- VM_WAIT;
+ vm_wait_domain(domain);
return (NULL);
}
mtx_unlock(&vmem_bt_lock);
diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c
index 4f4716e58faf..a0037f81dcdb 100644
--- a/sys/kern/subr_witness.c
+++ b/sys/kern/subr_witness.c
@@ -139,7 +139,7 @@ __FBSDID("$FreeBSD$");
#define WITNESS_COUNT 1536
#endif
#define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */
-#define WITNESS_PENDLIST (2048 + MAXCPU)
+#define WITNESS_PENDLIST (2048 + (MAXCPU * 4))
/* Allocate 256 KB of stack data space */
#define WITNESS_LO_DATA_COUNT 2048
diff --git a/sys/mips/mips/machdep.c b/sys/mips/mips/machdep.c
index 0dcba4ab3355..29c0bbb662f1 100644
--- a/sys/mips/mips/machdep.c
+++ b/sys/mips/mips/machdep.c
@@ -210,8 +210,8 @@ cpu_startup(void *dummy)
vm_ksubmap_init(&kmi);
printf("avail memory = %ju (%juMB)\n",
- ptoa((uintmax_t)vm_cnt.v_free_count),
- ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
+ ptoa((uintmax_t)vm_free_count()),
+ ptoa((uintmax_t)vm_free_count()) / 1048576);
cpu_init_interrupts();
/*
diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c
index e6c207ef4552..ff9be3ef3cb0 100644
--- a/sys/powerpc/booke/pmap.c
+++ b/sys/powerpc/booke/pmap.c
@@ -1190,7 +1190,7 @@ pv_alloc(void)
pv_entry_count++;
if (pv_entry_count > pv_entry_high_water)
- pagedaemon_wakeup();
+ pagedaemon_wakeup(0); /* XXX powerpc NUMA */
pv = uma_zalloc(pvzone, M_NOWAIT);
return (pv);
diff --git a/sys/powerpc/powerpc/machdep.c b/sys/powerpc/powerpc/machdep.c
index c9e172cf5b18..c45e5e748281 100644
--- a/sys/powerpc/powerpc/machdep.c
+++ b/sys/powerpc/powerpc/machdep.c
@@ -221,8 +221,8 @@ cpu_startup(void *dummy)
vm_ksubmap_init(&kmi);
printf("avail memory = %ju (%ju MB)\n",
- ptoa((uintmax_t)vm_cnt.v_free_count),
- ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
+ ptoa((uintmax_t)vm_free_count()),
+ ptoa((uintmax_t)vm_free_count()) / 1048576);
/*
* Set up buffers, so they can be used to read disk labels.
diff --git a/sys/sparc64/sparc64/machdep.c b/sys/sparc64/sparc64/machdep.c
index d450d436379e..0bb16c77dfb4 100644
--- a/sys/sparc64/sparc64/machdep.c
+++ b/sys/sparc64/sparc64/machdep.c
@@ -190,8 +190,8 @@ cpu_startup(void *arg)
EVENTHANDLER_REGISTER(shutdown_final, sparc64_shutdown_final, NULL,
SHUTDOWN_PRI_LAST);
- printf("avail memory = %lu (%lu MB)\n", vm_cnt.v_free_count * PAGE_SIZE,
- vm_cnt.v_free_count / ((1024 * 1024) / PAGE_SIZE));
+ printf("avail memory = %lu (%lu MB)\n", vm_free_count() * PAGE_SIZE,
+ vm_free_count() / ((1024 * 1024) / PAGE_SIZE));
if (bootverbose)
printf("machine: %s\n", sparc64_model);
diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h
index 901604aed808..de18d7702400 100644
--- a/sys/sys/vmmeter.h
+++ b/sys/sys/vmmeter.h
@@ -140,23 +140,23 @@ struct vmmeter {
u_int v_interrupt_free_min; /* (c) reserved pages for int code */
u_int v_free_severe; /* (c) severe page depletion point */
u_int v_wire_count VMMETER_ALIGNED; /* (a) pages wired down */
- u_int v_active_count VMMETER_ALIGNED; /* (a) pages active */
- u_int v_inactive_count VMMETER_ALIGNED; /* (a) pages inactive */
- u_int v_laundry_count VMMETER_ALIGNED; /* (a) pages eligible for
- laundering */
- u_int v_free_count VMMETER_ALIGNED; /* (f) pages free */
};
#endif /* _KERNEL || _WANT_VMMETER */
#ifdef _KERNEL
+#include <sys/domainset.h>
+
extern struct vmmeter vm_cnt;
-extern u_int vm_pageout_wakeup_thresh;
+extern domainset_t vm_min_domains;
+extern domainset_t vm_severe_domains;
#define VM_CNT_ADD(var, x) counter_u64_add(vm_cnt.var, x)
#define VM_CNT_INC(var) VM_CNT_ADD(var, 1)
#define VM_CNT_FETCH(var) counter_u64_fetch(vm_cnt.var)
+u_int vm_free_count(void);
+
/*
* Return TRUE if we are under our severe low-free-pages threshold
*
@@ -167,7 +167,7 @@ static inline int
vm_page_count_severe(void)
{
- return (vm_cnt.v_free_severe > vm_cnt.v_free_count);
+ return (!DOMAINSET_EMPTY(&vm_severe_domains));
}
/*
@@ -183,50 +183,8 @@ static inline int
vm_page_count_min(void)
{
- return (vm_cnt.v_free_min > vm_cnt.v_free_count);
+ return (!DOMAINSET_EMPTY(&vm_min_domains));
}
-/*
- * Return TRUE if we have not reached our free page target during
- * free page recovery operations.
- */
-static inline int
-vm_page_count_target(void)
-{
-
- return (vm_cnt.v_free_target > vm_cnt.v_free_count);
-}
-
-/*
- * Return the number of pages we need to free-up or cache
- * A positive number indicates that we do not have enough free pages.
- */
-static inline int
-vm_paging_target(void)
-{
-
- return (vm_cnt.v_free_target - vm_cnt.v_free_count);
-}
-
-/*
- * Returns TRUE if the pagedaemon needs to be woken up.
- */
-static inline int
-vm_paging_needed(u_int free_count)
-{
-
- return (free_count < vm_pageout_wakeup_thresh);
-}
-
-/*
- * Return the number of pages we need to launder.
- * A positive number indicates that we have a shortfall of clean pages.
- */
-static inline int
-vm_laundry_target(void)
-{
-
- return (vm_paging_target());
-}
#endif /* _KERNEL */
#endif /* _SYS_VMMETER_H_ */
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 753aeded64e9..a774b363590d 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -2327,7 +2327,7 @@ swapoff_one(struct swdevt *sp, struct ucred *cred)
* of data we will have to page back in, plus an epsilon so
* the system doesn't become critically low on swap space.
*/
- if (vm_cnt.v_free_count + swap_pager_avail < nblks + nswap_lowat)
+ if (vm_free_count() + swap_pager_avail < nblks + nswap_lowat)
return (ENOMEM);
/*
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index c4aa574685d6..3f45e4da6567 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -3464,7 +3464,7 @@ uma_large_malloc_domain(vm_size_t size, int domain, int wait)
slab->us_data = (void *)addr;
slab->us_flags = UMA_SLAB_KERNEL | UMA_SLAB_MALLOC;
slab->us_size = size;
- slab->us_domain = vm_phys_domidx(PHYS_TO_VM_PAGE(
+ slab->us_domain = vm_phys_domain(PHYS_TO_VM_PAGE(
pmap_kextract(addr)));
uma_total_inc(size);
} else {
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index ab1c8daaeb8a..fde50f4a26ef 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -122,5 +122,9 @@ struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset);
void vm_imgact_unmap_page(struct sf_buf *sf);
void vm_thread_dispose(struct thread *td);
int vm_thread_new(struct thread *td, int pages);
+u_int vm_active_count(void);
+u_int vm_inactive_count(void);
+u_int vm_laundry_count(void);
+u_int vm_wait_count(void);
#endif /* _KERNEL */
#endif /* !_VM_EXTERN_H_ */
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 6a70121c2cf7..6bb5ee804569 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -552,7 +552,7 @@ vm_forkproc(struct thread *td, struct proc *p2, struct thread *td2,
}
while (vm_page_count_severe()) {
- VM_WAIT;
+ vm_wait_severe();
}
if ((flags & RFMEM) == 0) {
diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
index b246699894a3..8e6b437cbd63 100644
--- a/sys/vm/vm_init.c
+++ b/sys/vm/vm_init.c
@@ -89,6 +89,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/vm_map.h>
#include <vm/vm_pager.h>
#include <vm/vm_extern.h>
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 7e56ead146c7..0ff1b155c069 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -92,6 +92,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/vm_radix.h>
#include <vm/vm_extern.h>
#include <vm/uma.h>
@@ -196,7 +197,7 @@ retry:
if (!vm_page_reclaim_contig_domain(domain,
pflags, 1, low, high, PAGE_SIZE, 0) &&
(flags & M_WAITOK) != 0)
- VM_WAIT;
+ vm_wait_domain(domain);
VM_OBJECT_WLOCK(object);
tries++;
goto retry;
@@ -205,9 +206,9 @@ retry:
vmem_free(vmem, addr, size);
return (0);
}
- KASSERT(vm_phys_domidx(m) == domain,
+ KASSERT(vm_phys_domain(m) == domain,
("kmem_alloc_attr_domain: Domain mismatch %d != %d",
- vm_phys_domidx(m), domain));
+ vm_phys_domain(m), domain));
if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
m->valid = VM_PAGE_BITS_ALL;
@@ -280,7 +281,7 @@ retry:
if (!vm_page_reclaim_contig_domain(domain, pflags,
npages, low, high, alignment, boundary) &&
(flags & M_WAITOK) != 0)
- VM_WAIT;
+ vm_wait_domain(domain);
VM_OBJECT_WLOCK(object);
tries++;
goto retry;
@@ -288,9 +289,9 @@ retry:
vmem_free(vmem, addr, size);
return (0);
}
- KASSERT(vm_phys_domidx(m) == domain,
+ KASSERT(vm_phys_domain(m) == domain,
("kmem_alloc_contig_domain: Domain mismatch %d != %d",
- vm_phys_domidx(m), domain));
+ vm_phys_domain(m), domain));
end_m = m + npages;
tmp = addr;
for (; m < end_m; m++) {
@@ -452,9 +453,9 @@ retry:
kmem_unback(object, addr, i);
return (KERN_NO_SPACE);
}
- KASSERT(vm_phys_domidx(m) == domain,
+ KASSERT(vm_phys_domain(m) == domain,
("kmem_back_domain: Domain mismatch %d != %d",
- vm_phys_domidx(m), domain));
+ vm_phys_domain(m), domain));
if (flags & M_ZERO && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
KASSERT((m->oflags & VPO_UNMANAGED) != 0,
@@ -514,7 +515,7 @@ _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size)
end = offset + size;
VM_OBJECT_WLOCK(object);
m = vm_page_lookup(object, atop(offset));
- domain = vm_phys_domidx(m);
+ domain = vm_phys_domain(m);
for (; offset < end; offset += PAGE_SIZE, m = next) {
next = vm_page_next(m);
vm_page_unwire(m, PQ_NONE);
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 5c6f6e7b3cf2..65c009003d86 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2011,7 +2011,7 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
* free pages allocating pv entries.
*/
if (((flags & MAP_PREFAULT_MADVISE) != 0 &&
- vm_cnt.v_free_count < vm_cnt.v_free_reserved) ||
+ vm_page_count_severe()) ||
((flags & MAP_PREFAULT_PARTIAL) != 0 &&
tmpidx >= threshold)) {
psize = tmpidx;
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index 09a7a82addeb..a3c1036a8159 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h>
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
@@ -213,9 +215,6 @@ vmtotal(SYSCTL_HANDLER_ARGS)
total.t_dw++;
else
total.t_sl++;
- if (td->td_wchan ==
- &vm_cnt.v_free_count)
- total.t_pw++;
}
break;
case TDS_CAN_RUN:
@@ -283,7 +282,8 @@ vmtotal(SYSCTL_HANDLER_ARGS)
}
}
mtx_unlock(&vm_object_list_mtx);
- total.t_free = vm_cnt.v_free_count;
+ total.t_pw = vm_wait_count();
+ total.t_free = vm_free_count();
#if defined(COMPAT_FREEBSD11)
/* sysctl(8) allocates twice as much memory as reported by sysctl(3) */
if (curproc->p_osrel < P_OSREL_VMTOTAL64 && (req->oldlen ==
@@ -339,7 +339,7 @@ sysctl_handle_vmstat(SYSCTL_HANDLER_ARGS)
#define VM_STATS(parent, var, descr) \
SYSCTL_OID(parent, OID_AUTO, var, CTLTYPE_U64 | CTLFLAG_MPSAFE | \
- CTLFLAG_RD, &vm_cnt.var, 0, sysctl_handle_vmstat, "QU", descr);
+ CTLFLAG_RD, &vm_cnt.var, 0, sysctl_handle_vmstat, "QU", descr)
#define VM_STATS_VM(var, descr) VM_STATS(_vm_stats_vm, var, descr)
#define VM_STATS_SYS(var, descr) VM_STATS(_vm_stats_sys, var, descr)
@@ -379,19 +379,36 @@ VM_STATS_VM(v_vforkpages, "VM pages affected by vfork()");
VM_STATS_VM(v_rforkpages, "VM pages affected by rfork()");
VM_STATS_VM(v_kthreadpages, "VM pages affected by fork() by kernel");
+static int
+sysctl_handle_vmstat_proc(SYSCTL_HANDLER_ARGS)
+{
+ u_int (*fn)(void);
+ uint32_t val;
+
+ fn = arg1;
+ val = fn();
+ return (SYSCTL_OUT(req, &val, sizeof(val)));
+}
+
+#define VM_STATS_PROC(var, descr, fn) \
+ SYSCTL_OID(_vm_stats_vm, OID_AUTO, var, CTLTYPE_U32 | CTLFLAG_MPSAFE | \
+ CTLFLAG_RD, fn, 0, sysctl_handle_vmstat_proc, "IU", descr)
+
#define VM_STATS_UINT(var, descr) \
SYSCTL_UINT(_vm_stats_vm, OID_AUTO, var, CTLFLAG_RD, &vm_cnt.var, 0, descr)
+
VM_STATS_UINT(v_page_size, "Page size in bytes");
VM_STATS_UINT(v_page_count, "Total number of pages in system");
VM_STATS_UINT(v_free_reserved, "Pages reserved for deadlock");
VM_STATS_UINT(v_free_target, "Pages desired free");
VM_STATS_UINT(v_free_min, "Minimum low-free-pages threshold");
-VM_STATS_UINT(v_free_count, "Free pages");
+VM_STATS_PROC(v_free_count, "Free pages", vm_free_count);
VM_STATS_UINT(v_wire_count, "Wired pages");
-VM_STATS_UINT(v_active_count, "Active pages");
+VM_STATS_PROC(v_active_count, "Active pages", vm_active_count);
VM_STATS_UINT(v_inactive_target, "Desired inactive pages");
-VM_STATS_UINT(v_inactive_count, "Inactive pages");
-VM_STATS_UINT(v_laundry_count, "Pages eligible for laundering");
+VM_STATS_PROC(v_inactive_count, "Inactive pages", vm_inactive_count);
+VM_STATS_PROC(v_laundry_count, "Pages eligible for laundering",
+ vm_laundry_count);
VM_STATS_UINT(v_pageout_free_min, "Min pages reserved for kernel");
VM_STATS_UINT(v_interrupt_free_min, "Reserved pages for interrupt code");
VM_STATS_UINT(v_free_severe, "Severe page depletion point");
@@ -406,3 +423,52 @@ SYSCTL_UINT(_vm_stats_vm, OID_AUTO, v_cache_count, CTLFLAG_RD,
SYSCTL_UINT(_vm_stats_vm, OID_AUTO, v_tcached, CTLFLAG_RD,
SYSCTL_NULL_UINT_PTR, 0, "Dummy for compatibility");
#endif
+
+u_int
+vm_free_count(void)
+{
+ u_int v;
+ int i;
+
+ v = 0;
+ for (i = 0; i < vm_ndomains; i++)
+ v += vm_dom[i].vmd_free_count;
+
+ return (v);
+}
+
+static
+u_int
+vm_pagequeue_count(int pq)
+{
+ u_int v;
+ int i;
+
+ v = 0;
+ for (i = 0; i < vm_ndomains; i++)
+ v += vm_dom[i].vmd_pagequeues[pq].pq_cnt;
+
+ return (v);
+}
+
+u_int
+vm_active_count(void)
+{
+
+ return vm_pagequeue_count(PQ_ACTIVE);
+}
+
+u_int
+vm_inactive_count(void)
+{
+
+ return vm_pagequeue_count(PQ_INACTIVE);
+}
+
+u_int
+vm_laundry_count(void)
+{
+
+ return vm_pagequeue_count(PQ_LAUNDRY);
+}
+
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 0714c063f6e8..758d9f31eda4 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -96,6 +96,8 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/swap_pager.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index 1e3744ffe24f..6585a6245638 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -297,6 +297,17 @@ vm_object_color(vm_object_t object, u_short color)
}
}
+static __inline bool
+vm_object_reserv(vm_object_t object)
+{
+
+ if (object != NULL &&
+ (object->flags & (OBJ_COLORED | OBJ_FICTITIOUS)) == OBJ_COLORED) {
+ return (true);
+ }
+ return (false);
+}
+
void vm_object_clear_flag(vm_object_t object, u_short bits);
void vm_object_pip_add(vm_object_t object, short i);
void vm_object_pip_subtract(vm_object_t object, short i);
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index d2333d69c81e..b66b0f0c45e0 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -116,8 +116,9 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
+#include <vm/vm_pager.h>
#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
#include <vm/vm_extern.h>
@@ -136,9 +137,15 @@ extern int vmem_startup_count(void);
*/
struct vm_domain vm_dom[MAXMEMDOM];
-struct mtx_padalign __exclusive_cache_line vm_page_queue_free_mtx;
struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT];
+struct mtx_padalign __exclusive_cache_line vm_domainset_lock;
+domainset_t __exclusive_cache_line vm_min_domains;
+domainset_t __exclusive_cache_line vm_severe_domains;
+static int vm_min_waiters;
+static int vm_severe_waiters;
+static int vm_pageproc_waiters;
+
/*
* bogus page -- for I/O to/from partially complete buffers,
@@ -164,24 +171,22 @@ static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS);
SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD |
CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages");
-/* Is the page daemon waiting for free pages? */
-static int vm_pageout_pages_needed;
-
static uma_zone_t fakepg_zone;
static void vm_page_alloc_check(vm_page_t m);
static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
static void vm_page_enqueue(uint8_t queue, vm_page_t m);
-static void vm_page_free_phys(vm_page_t m);
-static void vm_page_free_wakeup(void);
+static void vm_page_free_phys(struct vm_domain *vmd, vm_page_t m);
static void vm_page_init(void *dummy);
static int vm_page_insert_after(vm_page_t m, vm_object_t object,
vm_pindex_t pindex, vm_page_t mpred);
static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object,
vm_page_t mpred);
-static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run,
- vm_paddr_t high);
-static int vm_page_alloc_fail(vm_object_t object, int req);
+static int vm_page_reclaim_run(int req_class, int domain, u_long npages,
+ vm_page_t m_run, vm_paddr_t high);
+static void vm_domain_free_wakeup(struct vm_domain *);
+static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object,
+ int req);
SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL);
@@ -318,6 +323,7 @@ vm_page_blacklist_next(char **list, char *end)
static void
vm_page_blacklist_check(char *list, char *end)
{
+ struct vm_domain *vmd;
vm_paddr_t pa;
vm_page_t m;
char *next;
@@ -330,9 +336,10 @@ vm_page_blacklist_check(char *list, char *end)
m = vm_phys_paddr_to_vm_page(pa);
if (m == NULL)
continue;
- mtx_lock(&vm_page_queue_free_mtx);
+ vmd = vm_pagequeue_domain(m);
+ vm_domain_free_lock(vmd);
ret = vm_phys_unfree_page(m);
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_free_unlock(vmd);
if (ret == TRUE) {
TAILQ_INSERT_TAIL(&blacklist_head, m, listq);
if (bootverbose)
@@ -395,28 +402,23 @@ sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS)
}
static void
-vm_page_domain_init(struct vm_domain *vmd)
+vm_page_domain_init(int domain)
{
+ struct vm_domain *vmd;
struct vm_pagequeue *pq;
int i;
+ vmd = VM_DOMAIN(domain);
+ bzero(vmd, sizeof(*vmd));
*__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) =
"vm inactive pagequeue";
- *__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_vcnt) =
- &vm_cnt.v_inactive_count;
*__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) =
"vm active pagequeue";
- *__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) =
- &vm_cnt.v_active_count;
*__DECONST(char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) =
"vm laundry pagequeue";
- *__DECONST(int **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_vcnt) =
- &vm_cnt.v_laundry_count;
*__DECONST(char **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) =
"vm unswappable pagequeue";
- /* Unswappable dirty pages are counted as being in the laundry. */
- *__DECONST(int **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_vcnt) =
- &vm_cnt.v_laundry_count;
+ vmd->vmd_domain = domain;
vmd->vmd_page_count = 0;
vmd->vmd_free_count = 0;
vmd->vmd_segs = 0;
@@ -427,6 +429,7 @@ vm_page_domain_init(struct vm_domain *vmd)
mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue",
MTX_DEF | MTX_DUPOK);
}
+ mtx_init(&vmd->vmd_free_mtx, "vm page free queue", NULL, MTX_DEF);
}
/*
@@ -463,7 +466,6 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind)
vm_offset_t
vm_page_startup(vm_offset_t vaddr)
{
- struct vm_domain *vmd;
struct vm_phys_seg *seg;
vm_page_t m;
char *list, *listend;
@@ -494,11 +496,11 @@ vm_page_startup(vm_offset_t vaddr)
/*
* Initialize the page and queue locks.
*/
- mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF);
+ mtx_init(&vm_domainset_lock, "vm domainset lock", NULL, MTX_DEF);
for (i = 0; i < PA_LOCK_COUNT; i++)
mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF);
for (i = 0; i < vm_ndomains; i++)
- vm_page_domain_init(&vm_dom[i]);
+ vm_page_domain_init(i);
/*
* Allocate memory for use when boot strapping the kernel memory
@@ -704,7 +706,6 @@ vm_page_startup(vm_offset_t vaddr)
* physical memory allocator's free lists.
*/
vm_cnt.v_page_count = 0;
- vm_cnt.v_free_count = 0;
for (segind = 0; segind < vm_phys_nsegs; segind++) {
seg = &vm_phys_segs[segind];
for (m = seg->first_page, pa = seg->start; pa < seg->end;
@@ -719,6 +720,8 @@ vm_page_startup(vm_offset_t vaddr)
* or doesn't overlap any of them.
*/
for (i = 0; phys_avail[i + 1] != 0; i += 2) {
+ struct vm_domain *vmd;
+
if (seg->start < phys_avail[i] ||
seg->end > phys_avail[i + 1])
continue;
@@ -726,13 +729,14 @@ vm_page_startup(vm_offset_t vaddr)
m = seg->first_page;
pagecount = (u_long)atop(seg->end - seg->start);
- mtx_lock(&vm_page_queue_free_mtx);
+ vmd = VM_DOMAIN(seg->domain);
+ vm_domain_free_lock(vmd);
vm_phys_free_contig(m, pagecount);
- vm_phys_freecnt_adj(m, (int)pagecount);
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_freecnt_adj(vmd, (int)pagecount);
+ vm_domain_free_unlock(vmd);
vm_cnt.v_page_count += (u_int)pagecount;
- vmd = &vm_dom[seg->domain];
+ vmd = VM_DOMAIN(seg->domain);
vmd->vmd_page_count += (u_int)pagecount;
vmd->vmd_segs |= 1UL << m->segind;
break;
@@ -1657,12 +1661,40 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex,
return (m);
}
+/*
+ * Returns true if the number of free pages exceeds the minimum
+ * for the request class and false otherwise.
+ */
+int
+vm_domain_available(struct vm_domain *vmd, int req, int npages)
+{
+
+ vm_domain_free_assert_locked(vmd);
+ req = req & VM_ALLOC_CLASS_MASK;
+
+ /*
+ * The page daemon is allowed to dig deeper into the free page list.
+ */
+ if (curproc == pageproc && req != VM_ALLOC_INTERRUPT)
+ req = VM_ALLOC_SYSTEM;
+
+ if (vmd->vmd_free_count >= npages + vmd->vmd_free_reserved ||
+ (req == VM_ALLOC_SYSTEM &&
+ vmd->vmd_free_count >= npages + vmd->vmd_interrupt_free_min) ||
+ (req == VM_ALLOC_INTERRUPT &&
+ vmd->vmd_free_count >= npages))
+ return (1);
+
+ return (0);
+}
+
vm_page_t
vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
int req, vm_page_t mpred)
{
+ struct vm_domain *vmd;
vm_page_t m;
- int flags, req_class;
+ int flags;
u_int free_count;
KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
@@ -1678,34 +1710,27 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
if (object != NULL)
VM_OBJECT_ASSERT_WLOCKED(object);
- req_class = req & VM_ALLOC_CLASS_MASK;
-
- /*
- * The page daemon is allowed to dig deeper into the free page list.
- */
- if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
- req_class = VM_ALLOC_SYSTEM;
-
- /*
- * Allocate a page if the number of free pages exceeds the minimum
- * for the request class.
- */
again:
m = NULL;
- mtx_lock(&vm_page_queue_free_mtx);
- if (vm_cnt.v_free_count > vm_cnt.v_free_reserved ||
- (req_class == VM_ALLOC_SYSTEM &&
- vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) ||
- (req_class == VM_ALLOC_INTERRUPT &&
- vm_cnt.v_free_count > 0)) {
+#if VM_NRESERVLEVEL > 0
+ if (vm_object_reserv(object) &&
+ (m = vm_reserv_extend(req, object, pindex, domain, mpred))
+ != NULL) {
+ domain = vm_phys_domain(m);
+ vmd = VM_DOMAIN(domain);
+ goto found;
+ }
+#endif
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_lock(vmd);
+ if (vm_domain_available(vmd, req, 1)) {
/*
* Can we allocate the page from a reservation?
*/
#if VM_NRESERVLEVEL > 0
- if (object == NULL || (object->flags & (OBJ_COLORED |
- OBJ_FICTITIOUS)) != OBJ_COLORED || (m =
- vm_reserv_alloc_page(object, pindex, domain,
- mpred)) == NULL)
+ if (!vm_object_reserv(object) ||
+ (m = vm_reserv_alloc_page(object, pindex,
+ domain, mpred)) == NULL)
#endif
{
/*
@@ -1727,7 +1752,7 @@ again:
/*
* Not allocatable, give up.
*/
- if (vm_page_alloc_fail(object, req))
+ if (vm_domain_alloc_fail(vmd, object, req))
goto again;
return (NULL);
}
@@ -1736,8 +1761,18 @@ again:
* At this point we had better have found a good page.
*/
KASSERT(m != NULL, ("missing page"));
- free_count = vm_phys_freecnt_adj(m, -1);
- mtx_unlock(&vm_page_queue_free_mtx);
+ free_count = vm_domain_freecnt_adj(vmd, -1);
+ vm_domain_free_unlock(vmd);
+
+ /*
+ * Don't wakeup too often - wakeup the pageout daemon when
+ * we would be nearly out of memory.
+ */
+ if (vm_paging_needed(vmd, free_count))
+ pagedaemon_wakeup(vmd->vmd_domain);
+#if VM_NRESERVLEVEL > 0
+found:
+#endif
vm_page_alloc_check(m);
/*
@@ -1770,7 +1805,7 @@ again:
if (object != NULL) {
if (vm_page_insert_after(m, object, pindex, mpred)) {
- pagedaemon_wakeup();
+ pagedaemon_wakeup(domain);
if (req & VM_ALLOC_WIRED) {
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
m->wire_count = 0;
@@ -1795,13 +1830,6 @@ again:
} else
m->pindex = pindex;
- /*
- * Don't wakeup too often - wakeup the pageout daemon when
- * we would be nearly out of memory.
- */
- if (vm_paging_needed(free_count))
- pagedaemon_wakeup();
-
return (m);
}
@@ -1869,9 +1897,9 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_memattr_t memattr)
{
+ struct vm_domain *vmd;
vm_page_t m, m_ret, mpred;
u_int busy_lock, flags, oflags;
- int req_class;
mpred = NULL; /* XXX: pacify gcc */
KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
@@ -1889,13 +1917,6 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
object));
}
KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero"));
- req_class = req & VM_ALLOC_CLASS_MASK;
-
- /*
- * The page daemon is allowed to dig deeper into the free page list.
- */
- if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
- req_class = VM_ALLOC_SYSTEM;
if (object != NULL) {
mpred = vm_radix_lookup_le(&object->rtree, pindex);
@@ -1908,19 +1929,25 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
* below the lower bound for the allocation class?
*/
again:
+#if VM_NRESERVLEVEL > 0
+ if (vm_object_reserv(object) &&
+ (m_ret = vm_reserv_extend_contig(req, object, pindex, domain,
+ npages, low, high, alignment, boundary, mpred)) != NULL) {
+ domain = vm_phys_domain(m_ret);
+ vmd = VM_DOMAIN(domain);
+ goto found;
+ }
+#endif
m_ret = NULL;
- mtx_lock(&vm_page_queue_free_mtx);
- if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved ||
- (req_class == VM_ALLOC_SYSTEM &&
- vm_cnt.v_free_count >= npages + vm_cnt.v_interrupt_free_min) ||
- (req_class == VM_ALLOC_INTERRUPT &&
- vm_cnt.v_free_count >= npages)) {
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_lock(vmd);
+ if (vm_domain_available(vmd, req, npages)) {
/*
* Can we allocate the pages from a reservation?
*/
#if VM_NRESERVLEVEL > 0
retry:
- if (object == NULL || (object->flags & OBJ_COLORED) == 0 ||
+ if (!vm_object_reserv(object) ||
(m_ret = vm_reserv_alloc_contig(object, pindex, domain,
npages, low, high, alignment, boundary, mpred)) == NULL)
#endif
@@ -1936,12 +1963,15 @@ retry:
#endif
}
if (m_ret == NULL) {
- if (vm_page_alloc_fail(object, req))
+ if (vm_domain_alloc_fail(vmd, object, req))
goto again;
return (NULL);
}
- vm_phys_freecnt_adj(m_ret, -npages);
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_freecnt_adj(vmd, -npages);
+ vm_domain_free_unlock(vmd);
+#if VM_NRESERVLEVEL > 0
+found:
+#endif
for (m = m_ret; m < &m_ret[npages]; m++)
vm_page_alloc_check(m);
@@ -1977,7 +2007,7 @@ retry:
m->oflags = oflags;
if (object != NULL) {
if (vm_page_insert_after(m, object, pindex, mpred)) {
- pagedaemon_wakeup();
+ pagedaemon_wakeup(domain);
if ((req & VM_ALLOC_WIRED) != 0)
atomic_subtract_int(
&vm_cnt.v_wire_count, npages);
@@ -2007,8 +2037,9 @@ retry:
pmap_page_set_memattr(m, memattr);
pindex++;
}
- if (vm_paging_needed(vm_cnt.v_free_count))
- pagedaemon_wakeup();
+ vmd = VM_DOMAIN(domain);
+ if (vm_paging_needed(vmd, vmd->vmd_free_count))
+ pagedaemon_wakeup(domain);
return (m_ret);
}
@@ -2070,37 +2101,26 @@ vm_page_alloc_freelist(int freelist, int req)
vm_page_t
vm_page_alloc_freelist_domain(int domain, int freelist, int req)
{
+ struct vm_domain *vmd;
vm_page_t m;
u_int flags, free_count;
- int req_class;
-
- req_class = req & VM_ALLOC_CLASS_MASK;
-
- /*
- * The page daemon is allowed to dig deeper into the free page list.
- */
- if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
- req_class = VM_ALLOC_SYSTEM;
/*
* Do not allocate reserved pages unless the req has asked for it.
*/
+ vmd = VM_DOMAIN(domain);
again:
- mtx_lock(&vm_page_queue_free_mtx);
- if (vm_cnt.v_free_count > vm_cnt.v_free_reserved ||
- (req_class == VM_ALLOC_SYSTEM &&
- vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) ||
- (req_class == VM_ALLOC_INTERRUPT &&
- vm_cnt.v_free_count > 0))
+ vm_domain_free_lock(vmd);
+ if (vm_domain_available(vmd, req, 1))
m = vm_phys_alloc_freelist_pages(domain, freelist,
VM_FREEPOOL_DIRECT, 0);
if (m == NULL) {
- if (vm_page_alloc_fail(NULL, req))
+ if (vm_domain_alloc_fail(vmd, NULL, req))
goto again;
return (NULL);
}
- free_count = vm_phys_freecnt_adj(m, -1);
- mtx_unlock(&vm_page_queue_free_mtx);
+ free_count = vm_domain_freecnt_adj(vmd, -1);
+ vm_domain_free_unlock(vmd);
vm_page_alloc_check(m);
/*
@@ -2121,8 +2141,8 @@ again:
}
/* Unmanaged pages don't use "act_count". */
m->oflags = VPO_UNMANAGED;
- if (vm_paging_needed(free_count))
- pagedaemon_wakeup();
+ if (vm_paging_needed(vmd, free_count))
+ pagedaemon_wakeup(domain);
return (m);
}
@@ -2344,9 +2364,10 @@ unlock:
* "req_class" must be an allocation class.
*/
static int
-vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run,
+vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run,
vm_paddr_t high)
{
+ struct vm_domain *vmd;
struct mtx *m_mtx;
struct spglist free;
vm_object_t object;
@@ -2496,7 +2517,9 @@ retry:
unlock:
VM_OBJECT_WUNLOCK(object);
} else {
- mtx_lock(&vm_page_queue_free_mtx);
+ MPASS(vm_phys_domain(m) == domain);
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_lock(vmd);
order = m->order;
if (order < VM_NFREEORDER) {
/*
@@ -2513,7 +2536,7 @@ unlock:
else if (vm_reserv_is_page_free(m))
order = 0;
#endif
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_free_unlock(vmd);
if (order == VM_NFREEORDER)
error = EINVAL;
}
@@ -2521,13 +2544,15 @@ unlock:
if (m_mtx != NULL)
mtx_unlock(m_mtx);
if ((m = SLIST_FIRST(&free)) != NULL) {
- mtx_lock(&vm_page_queue_free_mtx);
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_lock(vmd);
do {
+ MPASS(vm_phys_domain(m) == domain);
SLIST_REMOVE_HEAD(&free, plinks.s.ss);
- vm_page_free_phys(m);
+ vm_page_free_phys(vmd, m);
} while ((m = SLIST_FIRST(&free)) != NULL);
- vm_page_free_wakeup();
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_free_wakeup(vmd);
+ vm_domain_free_unlock(vmd);
}
return (error);
}
@@ -2567,6 +2592,7 @@ bool
vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
{
+ struct vm_domain *vmd;
vm_paddr_t curr_low;
vm_page_t m_run, m_runs[NRUNS];
u_long count, reclaimed;
@@ -2587,9 +2613,10 @@ vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
* Return if the number of free pages cannot satisfy the requested
* allocation.
*/
- count = vm_cnt.v_free_count;
- if (count < npages + vm_cnt.v_free_reserved || (count < npages +
- vm_cnt.v_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) ||
+ vmd = VM_DOMAIN(domain);
+ count = vmd->vmd_free_count;
+ if (count < npages + vmd->vmd_free_reserved || (count < npages +
+ vmd->vmd_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) ||
(count < npages && req_class == VM_ALLOC_INTERRUPT))
return (false);
@@ -2625,8 +2652,8 @@ vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
for (i = 0; count > 0 && i < NRUNS; i++) {
count--;
m_run = m_runs[RUN_INDEX(count)];
- error = vm_page_reclaim_run(req_class, npages, m_run,
- high);
+ error = vm_page_reclaim_run(req_class, domain, npages,
+ m_run, high);
if (error == 0) {
reclaimed += npages;
if (reclaimed >= MIN_RECLAIM)
@@ -2666,66 +2693,191 @@ vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high,
return (ret);
}
+/*
+ * Set the domain in the appropriate page level domainset.
+ */
+void
+vm_domain_set(struct vm_domain *vmd)
+{
+
+ mtx_lock(&vm_domainset_lock);
+ if (!vmd->vmd_minset && vm_paging_min(vmd)) {
+ vmd->vmd_minset = 1;
+ DOMAINSET_SET(vmd->vmd_domain, &vm_min_domains);
+ }
+ if (!vmd->vmd_severeset && vm_paging_severe(vmd)) {
+ vmd->vmd_severeset = 1;
+ DOMAINSET_CLR(vmd->vmd_domain, &vm_severe_domains);
+ }
+ mtx_unlock(&vm_domainset_lock);
+}
/*
- * vm_wait: (also see VM_WAIT macro)
+ * Clear the domain from the appropriate page level domainset.
+ */
+static void
+vm_domain_clear(struct vm_domain *vmd)
+{
+
+ mtx_lock(&vm_domainset_lock);
+ if (vmd->vmd_minset && !vm_paging_min(vmd)) {
+ vmd->vmd_minset = 0;
+ DOMAINSET_CLR(vmd->vmd_domain, &vm_min_domains);
+ if (vm_min_waiters != 0) {
+ vm_min_waiters = 0;
+ wakeup(&vm_min_domains);
+ }
+ }
+ if (vmd->vmd_severeset && !vm_paging_severe(vmd)) {
+ vmd->vmd_severeset = 0;
+ DOMAINSET_CLR(vmd->vmd_domain, &vm_severe_domains);
+ if (vm_severe_waiters != 0) {
+ vm_severe_waiters = 0;
+ wakeup(&vm_severe_domains);
+ }
+ }
+ mtx_unlock(&vm_domainset_lock);
+}
+
+/*
+ * Wait for free pages to exceed the min threshold globally.
+ */
+void
+vm_wait_min(void)
+{
+
+ mtx_lock(&vm_domainset_lock);
+ while (vm_page_count_min()) {
+ vm_min_waiters++;
+ msleep(&vm_min_domains, &vm_domainset_lock, PVM, "vmwait", 0);
+ }
+ mtx_unlock(&vm_domainset_lock);
+}
+
+/*
+ * Wait for free pages to exceed the severe threshold globally.
+ */
+void
+vm_wait_severe(void)
+{
+
+ mtx_lock(&vm_domainset_lock);
+ while (vm_page_count_severe()) {
+ vm_severe_waiters++;
+ msleep(&vm_severe_domains, &vm_domainset_lock, PVM,
+ "vmwait", 0);
+ }
+ mtx_unlock(&vm_domainset_lock);
+}
+
+u_int
+vm_wait_count(void)
+{
+ u_int cnt;
+ int i;
+
+ cnt = 0;
+ for (i = 0; i < vm_ndomains; i++)
+ cnt += VM_DOMAIN(i)->vmd_waiters;
+ cnt += vm_severe_waiters + vm_min_waiters;
+
+ return (cnt);
+}
+
+/*
+ * vm_wait_domain:
*
* Sleep until free pages are available for allocation.
- * - Called in various places before memory allocations.
+ * - Called in various places after failed memory allocations.
*/
-static void
-_vm_wait(void)
+void
+vm_wait_domain(int domain)
{
+ struct vm_domain *vmd;
+
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_assert_locked(vmd);
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
if (curproc == pageproc) {
- vm_pageout_pages_needed = 1;
- msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx,
- PDROP | PSWP, "VMWait", 0);
+ vmd->vmd_pageout_pages_needed = 1;
+ msleep(&vmd->vmd_pageout_pages_needed,
+ vm_domain_free_lockptr(vmd), PDROP | PSWP, "VMWait", 0);
} else {
if (pageproc == NULL)
panic("vm_wait in early boot");
- pagedaemon_wait(PVM, "vmwait");
+ pagedaemon_wait(domain, PVM, "vmwait");
}
}
+/*
+ * vm_wait: (also see VM_WAIT macro)
+ *
+ * Sleep until free pages are available for allocation.
+ * - Called in various places after failed memory allocations.
+ */
void
vm_wait(void)
{
- mtx_lock(&vm_page_queue_free_mtx);
- _vm_wait();
+ /*
+ * We use racey wakeup synchronization to avoid expensive global
+ * locking for the pageproc when sleeping with a non-specific vm_wait.
+ * To handle this, we only sleep for one tick in this instance. It
+ * is expected that most allocations for the pageproc will come from
+ * kmem or vm_page_grab* which will use the more specific and
+ * race-free vm_wait_domain().
+ */
+ if (curproc == pageproc) {
+ mtx_lock(&vm_domainset_lock);
+ vm_pageproc_waiters++;
+ msleep(&vm_pageproc_waiters, &vm_domainset_lock, PVM,
+ "pageprocwait", 1);
+ mtx_unlock(&vm_domainset_lock);
+ } else {
+ /*
+ * XXX Ideally we would wait only until the allocation could
+ * be satisfied. This condition can cause new allocators to
+ * consume all freed pages while old allocators wait.
+ */
+ mtx_lock(&vm_domainset_lock);
+ if (vm_page_count_min()) {
+ vm_min_waiters++;
+ msleep(&vm_min_domains, &vm_domainset_lock, PVM,
+ "vmwait", 0);
+ }
+ mtx_unlock(&vm_domainset_lock);
+ }
}
/*
- * vm_page_alloc_fail:
+ * vm_domain_alloc_fail:
*
* Called when a page allocation function fails. Informs the
* pagedaemon and performs the requested wait. Requires the
- * page_queue_free and object lock on entry. Returns with the
+ * domain_free and object lock on entry. Returns with the
* object lock held and free lock released. Returns an error when
* retry is necessary.
*
*/
static int
-vm_page_alloc_fail(vm_object_t object, int req)
+vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req)
{
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(vmd);
- atomic_add_int(&vm_pageout_deficit,
+ atomic_add_int(&vmd->vmd_pageout_deficit,
max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1));
if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) {
if (object != NULL)
VM_OBJECT_WUNLOCK(object);
- _vm_wait();
+ vm_wait_domain(vmd->vmd_domain);
if (object != NULL)
VM_OBJECT_WLOCK(object);
if (req & VM_ALLOC_WAITOK)
return (EAGAIN);
} else {
- mtx_unlock(&vm_page_queue_free_mtx);
- pagedaemon_wakeup();
+ vm_domain_free_unlock(vmd);
+ pagedaemon_wakeup(vmd->vmd_domain);
}
return (0);
}
@@ -2744,18 +2896,19 @@ void
vm_waitpfault(void)
{
- mtx_lock(&vm_page_queue_free_mtx);
- pagedaemon_wait(PUSER, "pfault");
+ mtx_lock(&vm_domainset_lock);
+ if (vm_page_count_min()) {
+ vm_min_waiters++;
+ msleep(&vm_min_domains, &vm_domainset_lock, PUSER, "pfault", 0);
+ }
+ mtx_unlock(&vm_domainset_lock);
}
struct vm_pagequeue *
vm_page_pagequeue(vm_page_t m)
{
- if (vm_page_in_laundry(m))
- return (&vm_dom[0].vmd_pagequeues[m->queue]);
- else
- return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]);
+ return (&vm_pagequeue_domain(m)->vmd_pagequeues[m->queue]);
}
/*
@@ -2817,10 +2970,7 @@ vm_page_enqueue(uint8_t queue, vm_page_t m)
KASSERT(queue < PQ_COUNT,
("vm_page_enqueue: invalid queue %u request for page %p",
queue, m));
- if (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE)
- pq = &vm_dom[0].vmd_pagequeues[queue];
- else
- pq = &vm_phys_domain(m)->vmd_pagequeues[queue];
+ pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue];
vm_pagequeue_lock(pq);
m->queue = queue;
TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
@@ -2902,7 +3052,7 @@ vm_page_activate(vm_page_t m)
}
/*
- * vm_page_free_wakeup:
+ * vm_domain_free_wakeup:
*
* Helper routine for vm_page_free_toq(). This routine is called
* when a page is added to the free queues.
@@ -2910,28 +3060,39 @@ vm_page_activate(vm_page_t m)
* The page queues must be locked.
*/
static void
-vm_page_free_wakeup(void)
+vm_domain_free_wakeup(struct vm_domain *vmd)
{
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(vmd);
+
/*
* if pageout daemon needs pages, then tell it that there are
* some free.
*/
- if (vm_pageout_pages_needed &&
- vm_cnt.v_free_count >= vm_cnt.v_pageout_free_min) {
- wakeup(&vm_pageout_pages_needed);
- vm_pageout_pages_needed = 0;
+ if (vmd->vmd_pageout_pages_needed &&
+ vmd->vmd_free_count >= vmd->vmd_pageout_free_min) {
+ wakeup(&vmd->vmd_pageout_pages_needed);
+ vmd->vmd_pageout_pages_needed = 0;
}
/*
* wakeup processes that are waiting on memory if we hit a
* high water mark. And wakeup scheduler process if we have
* lots of memory. this process will swapin processes.
*/
- if (vm_pages_needed && !vm_page_count_min()) {
- vm_pages_needed = false;
- wakeup(&vm_cnt.v_free_count);
+ if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) {
+ vmd->vmd_pages_needed = false;
+ wakeup(&vmd->vmd_free_count);
+ }
+ if ((vmd->vmd_minset && !vm_paging_min(vmd)) ||
+ (vmd->vmd_severeset && !vm_paging_severe(vmd)))
+ vm_domain_clear(vmd);
+
+ /* See comments in vm_wait(); */
+ if (vm_pageproc_waiters) {
+ vm_pageproc_waiters = 0;
+ wakeup(&vm_pageproc_waiters);
}
+
}
/*
@@ -3018,12 +3179,12 @@ vm_page_free_prep(vm_page_t m, bool pagequeue_locked)
* queues. This is the last step to free a page.
*/
static void
-vm_page_free_phys(vm_page_t m)
+vm_page_free_phys(struct vm_domain *vmd, vm_page_t m)
{
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(vmd);
- vm_phys_freecnt_adj(m, 1);
+ vm_domain_freecnt_adj(vmd, 1);
#if VM_NRESERVLEVEL > 0
if (!vm_reserv_free_page(m))
#endif
@@ -3033,15 +3194,27 @@ vm_page_free_phys(vm_page_t m)
void
vm_page_free_phys_pglist(struct pglist *tq)
{
+ struct vm_domain *vmd;
vm_page_t m;
if (TAILQ_EMPTY(tq))
return;
- mtx_lock(&vm_page_queue_free_mtx);
- TAILQ_FOREACH(m, tq, listq)
- vm_page_free_phys(m);
- vm_page_free_wakeup();
- mtx_unlock(&vm_page_queue_free_mtx);
+ vmd = NULL;
+ TAILQ_FOREACH(m, tq, listq) {
+ if (vmd != vm_pagequeue_domain(m)) {
+ if (vmd != NULL) {
+ vm_domain_free_wakeup(vmd);
+ vm_domain_free_unlock(vmd);
+ }
+ vmd = vm_pagequeue_domain(m);
+ vm_domain_free_lock(vmd);
+ }
+ vm_page_free_phys(vmd, m);
+ }
+ if (vmd != NULL) {
+ vm_domain_free_wakeup(vmd);
+ vm_domain_free_unlock(vmd);
+ }
}
/*
@@ -3056,13 +3229,15 @@ vm_page_free_phys_pglist(struct pglist *tq)
void
vm_page_free_toq(vm_page_t m)
{
+ struct vm_domain *vmd;
if (!vm_page_free_prep(m, false))
return;
- mtx_lock(&vm_page_queue_free_mtx);
- vm_page_free_phys(m);
- vm_page_free_wakeup();
- mtx_unlock(&vm_page_queue_free_mtx);
+ vmd = vm_pagequeue_domain(m);
+ vm_domain_free_lock(vmd);
+ vm_page_free_phys(vmd, m);
+ vm_domain_free_wakeup(vmd);
+ vm_domain_free_unlock(vmd);
}
/*
@@ -3173,7 +3348,7 @@ _vm_page_deactivate(vm_page_t m, boolean_t noreuse)
if ((queue = m->queue) == PQ_INACTIVE && !noreuse)
return;
if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
- pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE];
+ pq = &vm_pagequeue_domain(m)->vmd_pagequeues[PQ_INACTIVE];
/* Avoid multiple acquisitions of the inactive queue lock. */
if (queue == PQ_INACTIVE) {
vm_pagequeue_lock(pq);
@@ -3185,8 +3360,9 @@ _vm_page_deactivate(vm_page_t m, boolean_t noreuse)
}
m->queue = PQ_INACTIVE;
if (noreuse)
- TAILQ_INSERT_BEFORE(&vm_phys_domain(m)->vmd_inacthead,
- m, plinks.q);
+ TAILQ_INSERT_BEFORE(
+ &vm_pagequeue_domain(m)->vmd_inacthead, m,
+ plinks.q);
else
TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
vm_pagequeue_cnt_inc(pq);
@@ -3963,10 +4139,10 @@ vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits)
DB_SHOW_COMMAND(page, vm_page_print_page_info)
{
- db_printf("vm_cnt.v_free_count: %d\n", vm_cnt.v_free_count);
- db_printf("vm_cnt.v_inactive_count: %d\n", vm_cnt.v_inactive_count);
- db_printf("vm_cnt.v_active_count: %d\n", vm_cnt.v_active_count);
- db_printf("vm_cnt.v_laundry_count: %d\n", vm_cnt.v_laundry_count);
+ db_printf("vm_cnt.v_free_count: %d\n", vm_free_count());
+ db_printf("vm_cnt.v_inactive_count: %d\n", vm_inactive_count());
+ db_printf("vm_cnt.v_active_count: %d\n", vm_active_count());
+ db_printf("vm_cnt.v_laundry_count: %d\n", vm_laundry_count());
db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count);
db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved);
db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min);
@@ -3978,7 +4154,7 @@ DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
{
int dom;
- db_printf("pq_free %d\n", vm_cnt.v_free_count);
+ db_printf("pq_free %d\n", vm_free_count());
for (dom = 0; dom < vm_ndomains; dom++) {
db_printf(
"dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d pq_unsw %d\n",
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 3b7871a800a2..831a7c1838fb 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -218,54 +218,10 @@ TAILQ_HEAD(pglist, vm_page);
#endif
SLIST_HEAD(spglist, vm_page);
-struct vm_pagequeue {
- struct mtx pq_mutex;
- struct pglist pq_pl;
- int pq_cnt;
- u_int * const pq_vcnt;
- const char * const pq_name;
-} __aligned(CACHE_LINE_SIZE);
-
-
-struct vm_domain {
- struct vm_pagequeue vmd_pagequeues[PQ_COUNT];
- struct vmem *vmd_kernel_arena;
- u_int vmd_page_count;
- u_int vmd_free_count;
- long vmd_segs; /* bitmask of the segments */
- boolean_t vmd_oom;
- int vmd_oom_seq;
- int vmd_last_active_scan;
- struct vm_page vmd_laundry_marker;
- struct vm_page vmd_marker; /* marker for pagedaemon private use */
- struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */
-};
-
-extern struct vm_domain vm_dom[MAXMEMDOM];
-
-#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
-#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
-#define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex)
-#define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex)
-
#ifdef _KERNEL
extern vm_page_t bogus_page;
-
-static __inline void
-vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
-{
-
-#ifdef notyet
- vm_pagequeue_assert_locked(pq);
-#endif
- pq->pq_cnt += addend;
- atomic_add_int(pq->pq_vcnt, addend);
-}
-#define vm_pagequeue_cnt_inc(pq) vm_pagequeue_cnt_add((pq), 1)
-#define vm_pagequeue_cnt_dec(pq) vm_pagequeue_cnt_add((pq), -1)
#endif /* _KERNEL */
-extern struct mtx_padalign vm_page_queue_free_mtx;
extern struct mtx_padalign pa_lock[];
#if defined(__arm__)
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 51c7af7e1146..87163053d052 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -110,6 +110,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/swap_pager.h>
#include <vm/vm_extern.h>
#include <vm/uma.h>
@@ -147,19 +148,7 @@ SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);
#define VM_LAUNDER_RATE 10
#define VM_INACT_SCAN_RATE 2
-int vm_pageout_deficit; /* Estimated number of pages deficit */
-u_int vm_pageout_wakeup_thresh;
static int vm_pageout_oom_seq = 12;
-static bool vm_pageout_wanted; /* Event on which pageout daemon sleeps */
-bool vm_pages_needed; /* Are threads waiting for free pages? */
-
-/* Pending request for dirty page laundering. */
-static enum {
- VM_LAUNDRY_IDLE,
- VM_LAUNDRY_BACKGROUND,
- VM_LAUNDRY_SHORTFALL
-} vm_laundry_request = VM_LAUNDRY_IDLE;
-static int vm_inactq_scans;
static int vm_pageout_update_period;
static int disable_swap_pageouts;
@@ -173,10 +162,6 @@ SYSCTL_INT(_vm, OID_AUTO, panic_on_oom,
CTLFLAG_RWTUN, &vm_panic_on_oom, 0,
"panic on out of memory instead of killing the largest process");
-SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh,
- CTLFLAG_RWTUN, &vm_pageout_wakeup_thresh, 0,
- "free page threshold for waking up the pageout daemon");
-
SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
CTLFLAG_RWTUN, &vm_pageout_update_period, 0,
"Maximum active LRU update period");
@@ -200,11 +185,6 @@ SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RWTUN,
&act_scan_laundry_weight, 0,
"weight given to clean vs. dirty pages in active queue scans");
-static u_int vm_background_launder_target;
-SYSCTL_UINT(_vm, OID_AUTO, background_launder_target, CTLFLAG_RWTUN,
- &vm_background_launder_target, 0,
- "background laundering target, in pages");
-
static u_int vm_background_launder_rate = 4096;
SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RWTUN,
&vm_background_launder_rate, 0,
@@ -959,18 +939,18 @@ isqrt(u_int num)
static void
vm_pageout_laundry_worker(void *arg)
{
- struct vm_domain *domain;
+ struct vm_domain *vmd;
struct vm_pagequeue *pq;
uint64_t nclean, ndirty;
u_int inactq_scans, last_launder;
- int domidx, last_target, launder, shortfall, shortfall_cycle, target;
+ int domain, last_target, launder, shortfall, shortfall_cycle, target;
bool in_shortfall;
- domidx = (uintptr_t)arg;
- domain = &vm_dom[domidx];
- pq = &domain->vmd_pagequeues[PQ_LAUNDRY];
- KASSERT(domain->vmd_segs != 0, ("domain without segments"));
- vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY);
+ domain = (uintptr_t)arg;
+ vmd = VM_DOMAIN(domain);
+ pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
+ KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
+ vm_pageout_init_marker(&vmd->vmd_laundry_marker, PQ_LAUNDRY);
shortfall = 0;
in_shortfall = false;
@@ -982,9 +962,9 @@ vm_pageout_laundry_worker(void *arg)
/*
* Calls to these handlers are serialized by the swap syscall lock.
*/
- (void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, domain,
+ (void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, vmd,
EVENTHANDLER_PRI_ANY);
- (void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, domain,
+ (void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, vmd,
EVENTHANDLER_PRI_ANY);
/*
@@ -1006,7 +986,7 @@ vm_pageout_laundry_worker(void *arg)
target = shortfall;
} else if (!in_shortfall)
goto trybackground;
- else if (shortfall_cycle == 0 || vm_laundry_target() <= 0) {
+ else if (shortfall_cycle == 0 || vm_laundry_target(vmd) <= 0) {
/*
* We recently entered shortfall and began laundering
* pages. If we have completed that laundering run
@@ -1040,11 +1020,12 @@ vm_pageout_laundry_worker(void *arg)
* memory pressure required to trigger laundering decreases.
*/
trybackground:
- nclean = vm_cnt.v_inactive_count + vm_cnt.v_free_count;
- ndirty = vm_cnt.v_laundry_count;
+ nclean = vmd->vmd_free_count +
+ vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt;
+ ndirty = vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt;
if (target == 0 && inactq_scans != last_launder &&
ndirty * isqrt(inactq_scans - last_launder) >= nclean) {
- target = vm_background_launder_target;
+ target = vmd->vmd_background_launder_target;
}
/*
@@ -1076,7 +1057,7 @@ dolaundry:
* pages could exceed "target" by the maximum size of
* a cluster minus one.
*/
- target -= min(vm_pageout_launder(domain, launder,
+ target -= min(vm_pageout_launder(vmd, launder,
in_shortfall), target);
pause("laundp", hz / VM_LAUNDER_RATE);
}
@@ -1087,8 +1068,8 @@ dolaundry:
* kicks us.
*/
vm_pagequeue_lock(pq);
- if (target == 0 && vm_laundry_request == VM_LAUNDRY_IDLE)
- (void)mtx_sleep(&vm_laundry_request,
+ if (target == 0 && vmd->vmd_laundry_request == VM_LAUNDRY_IDLE)
+ (void)mtx_sleep(&vmd->vmd_laundry_request,
vm_pagequeue_lockptr(pq), PVM, "launds", 0);
/*
@@ -1096,16 +1077,17 @@ dolaundry:
* a shortfall laundering unless we're already in the middle of
* one. This may preempt a background laundering.
*/
- if (vm_laundry_request == VM_LAUNDRY_SHORTFALL &&
+ if (vmd->vmd_laundry_request == VM_LAUNDRY_SHORTFALL &&
(!in_shortfall || shortfall_cycle == 0)) {
- shortfall = vm_laundry_target() + vm_pageout_deficit;
+ shortfall = vm_laundry_target(vmd) +
+ vmd->vmd_pageout_deficit;
target = 0;
} else
shortfall = 0;
if (target == 0)
- vm_laundry_request = VM_LAUNDRY_IDLE;
- inactq_scans = vm_inactq_scans;
+ vmd->vmd_laundry_request = VM_LAUNDRY_IDLE;
+ inactq_scans = vmd->vmd_inactq_scans;
vm_pagequeue_unlock(pq);
}
}
@@ -1134,7 +1116,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
* If we need to reclaim memory ask kernel caches to return
* some. We rate limit to avoid thrashing.
*/
- if (vmd == &vm_dom[0] && pass > 0 &&
+ if (vmd == VM_DOMAIN(0) && pass > 0 &&
(time_uptime - lowmem_uptime) >= lowmem_period) {
/*
* Decrease registered cache sizes.
@@ -1163,8 +1145,8 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
* the page daemon and this calculation.
*/
if (pass > 0) {
- deficit = atomic_readandclear_int(&vm_pageout_deficit);
- page_shortage = vm_paging_target() + deficit;
+ deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
+ page_shortage = vm_paging_target(vmd) + deficit;
} else
page_shortage = deficit = 0;
starting_page_shortage = page_shortage;
@@ -1357,18 +1339,20 @@ drop_page:
* keep count.
*/
if (starting_page_shortage > 0) {
- pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY];
+ pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
vm_pagequeue_lock(pq);
- if (vm_laundry_request == VM_LAUNDRY_IDLE &&
+ if (vmd->vmd_laundry_request == VM_LAUNDRY_IDLE &&
(pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled))) {
if (page_shortage > 0) {
- vm_laundry_request = VM_LAUNDRY_SHORTFALL;
+ vmd->vmd_laundry_request = VM_LAUNDRY_SHORTFALL;
VM_CNT_INC(v_pdshortfalls);
- } else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL)
- vm_laundry_request = VM_LAUNDRY_BACKGROUND;
- wakeup(&vm_laundry_request);
+ } else if (vmd->vmd_laundry_request !=
+ VM_LAUNDRY_SHORTFALL)
+ vmd->vmd_laundry_request =
+ VM_LAUNDRY_BACKGROUND;
+ wakeup(&vmd->vmd_laundry_request);
}
- vm_inactq_scans++;
+ vmd->vmd_inactq_scans++;
vm_pagequeue_unlock(pq);
}
@@ -1397,9 +1381,9 @@ drop_page:
* more aggressively, improving the effectiveness of clustering and
* ensuring that they can eventually be reused.
*/
- inactq_shortage = vm_cnt.v_inactive_target - (vm_cnt.v_inactive_count +
- vm_cnt.v_laundry_count / act_scan_laundry_weight) +
- vm_paging_target() + deficit + addl_page_shortage;
+ inactq_shortage = vmd->vmd_inactive_target - (pq->pq_cnt +
+ vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt / act_scan_laundry_weight) +
+ vm_paging_target(vmd) + deficit + addl_page_shortage;
inactq_shortage *= act_scan_laundry_weight;
pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
@@ -1742,6 +1726,8 @@ vm_pageout_oom(int shortage)
}
sx_sunlock(&allproc_lock);
if (bigproc != NULL) {
+ int i;
+
if (vm_panic_on_oom != 0)
panic("out of swap space");
PROC_LOCK(bigproc);
@@ -1749,19 +1735,20 @@ vm_pageout_oom(int shortage)
sched_nice(bigproc, PRIO_MIN);
_PRELE(bigproc);
PROC_UNLOCK(bigproc);
- wakeup(&vm_cnt.v_free_count);
+ for (i = 0; i < vm_ndomains; i++)
+ wakeup(&VM_DOMAIN(i)->vmd_free_count);
}
}
static void
vm_pageout_worker(void *arg)
{
- struct vm_domain *domain;
- int domidx, pass;
+ struct vm_domain *vmd;
+ int domain, pass;
bool target_met;
- domidx = (uintptr_t)arg;
- domain = &vm_dom[domidx];
+ domain = (uintptr_t)arg;
+ vmd = VM_DOMAIN(domain);
pass = 0;
target_met = true;
@@ -1771,18 +1758,18 @@ vm_pageout_worker(void *arg)
* is allocated.
*/
- KASSERT(domain->vmd_segs != 0, ("domain without segments"));
- domain->vmd_last_active_scan = ticks;
- vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE);
- vm_pageout_init_marker(&domain->vmd_inacthead, PQ_INACTIVE);
- TAILQ_INSERT_HEAD(&domain->vmd_pagequeues[PQ_INACTIVE].pq_pl,
- &domain->vmd_inacthead, plinks.q);
+ KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
+ vmd->vmd_last_active_scan = ticks;
+ vm_pageout_init_marker(&vmd->vmd_marker, PQ_INACTIVE);
+ vm_pageout_init_marker(&vmd->vmd_inacthead, PQ_INACTIVE);
+ TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_INACTIVE].pq_pl,
+ &vmd->vmd_inacthead, plinks.q);
/*
* The pageout daemon worker is never done, so loop forever.
*/
while (TRUE) {
- mtx_lock(&vm_page_queue_free_mtx);
+ vm_domain_free_lock(vmd);
/*
* Generally, after a level >= 1 scan, if there are enough
@@ -1796,34 +1783,34 @@ vm_pageout_worker(void *arg)
* thread will, nonetheless, wait until another page is freed
* or this wakeup is performed.
*/
- if (vm_pages_needed && !vm_page_count_min()) {
- vm_pages_needed = false;
- wakeup(&vm_cnt.v_free_count);
+ if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) {
+ vmd->vmd_pages_needed = false;
+ wakeup(&vmd->vmd_free_count);
}
/*
- * Do not clear vm_pageout_wanted until we reach our free page
+ * Do not clear vmd_pageout_wanted until we reach our free page
* target. Otherwise, we may be awakened over and over again,
* wasting CPU time.
*/
- if (vm_pageout_wanted && target_met)
- vm_pageout_wanted = false;
+ if (vmd->vmd_pageout_wanted && target_met)
+ vmd->vmd_pageout_wanted = false;
/*
* Might the page daemon receive a wakeup call?
*/
- if (vm_pageout_wanted) {
+ if (vmd->vmd_pageout_wanted) {
/*
- * No. Either vm_pageout_wanted was set by another
+ * No. Either vmd_pageout_wanted was set by another
* thread during the previous scan, which must have
- * been a level 0 scan, or vm_pageout_wanted was
+ * been a level 0 scan, or vmd_pageout_wanted was
* already set and the scan failed to free enough
* pages. If we haven't yet performed a level >= 1
* (page reclamation) scan, then increase the level
* and scan again now. Otherwise, sleep a bit and
* try again later.
*/
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_free_unlock(vmd);
if (pass >= 1)
pause("pwait", hz / VM_INACT_SCAN_RATE);
pass++;
@@ -1834,20 +1821,20 @@ vm_pageout_worker(void *arg)
* sleep until the next wakeup or until pages need to
* have their reference stats updated.
*/
- if (vm_pages_needed) {
- mtx_unlock(&vm_page_queue_free_mtx);
+ if (vmd->vmd_pages_needed) {
+ vm_domain_free_unlock(vmd);
if (pass == 0)
pass++;
- } else if (mtx_sleep(&vm_pageout_wanted,
- &vm_page_queue_free_mtx, PDROP | PVM, "psleep",
- hz) == 0) {
+ } else if (mtx_sleep(&vmd->vmd_pageout_wanted,
+ vm_domain_free_lockptr(vmd), PDROP | PVM,
+ "psleep", hz) == 0) {
VM_CNT_INC(v_pdwakeups);
pass = 1;
} else
pass = 0;
}
- target_met = vm_pageout_scan(domain, pass);
+ target_met = vm_pageout_scan(vmd, pass);
}
}
@@ -1855,41 +1842,76 @@ vm_pageout_worker(void *arg)
* vm_pageout_init initialises basic pageout daemon settings.
*/
static void
-vm_pageout_init(void)
+vm_pageout_init_domain(int domain)
{
- /*
- * Initialize some paging parameters.
- */
- vm_cnt.v_interrupt_free_min = 2;
- if (vm_cnt.v_page_count < 2000)
- vm_pageout_page_count = 8;
+ struct vm_domain *vmd;
+
+ vmd = VM_DOMAIN(domain);
+ vmd->vmd_interrupt_free_min = 2;
/*
* v_free_reserved needs to include enough for the largest
* swap pager structures plus enough for any pv_entry structs
* when paging.
*/
- if (vm_cnt.v_page_count > 1024)
- vm_cnt.v_free_min = 4 + (vm_cnt.v_page_count - 1024) / 200;
+ if (vmd->vmd_page_count > 1024)
+ vmd->vmd_free_min = 4 + (vmd->vmd_page_count - 1024) / 200;
else
- vm_cnt.v_free_min = 4;
- vm_cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
- vm_cnt.v_interrupt_free_min;
- vm_cnt.v_free_reserved = vm_pageout_page_count +
- vm_cnt.v_pageout_free_min + (vm_cnt.v_page_count / 768);
- vm_cnt.v_free_severe = vm_cnt.v_free_min / 2;
- vm_cnt.v_free_target = 4 * vm_cnt.v_free_min + vm_cnt.v_free_reserved;
- vm_cnt.v_free_min += vm_cnt.v_free_reserved;
- vm_cnt.v_free_severe += vm_cnt.v_free_reserved;
- vm_cnt.v_inactive_target = (3 * vm_cnt.v_free_target) / 2;
- if (vm_cnt.v_inactive_target > vm_cnt.v_free_count / 3)
- vm_cnt.v_inactive_target = vm_cnt.v_free_count / 3;
+ vmd->vmd_free_min = 4;
+ vmd->vmd_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
+ vmd->vmd_interrupt_free_min;
+ vmd->vmd_free_reserved = vm_pageout_page_count +
+ vmd->vmd_pageout_free_min + (vmd->vmd_page_count / 768);
+ vmd->vmd_free_severe = vmd->vmd_free_min / 2;
+ vmd->vmd_free_target = 4 * vmd->vmd_free_min + vmd->vmd_free_reserved;
+ vmd->vmd_free_min += vmd->vmd_free_reserved;
+ vmd->vmd_free_severe += vmd->vmd_free_reserved;
+ vmd->vmd_inactive_target = (3 * vmd->vmd_free_target) / 2;
+ if (vmd->vmd_inactive_target > vmd->vmd_free_count / 3)
+ vmd->vmd_inactive_target = vmd->vmd_free_count / 3;
/*
* Set the default wakeup threshold to be 10% above the minimum
* page limit. This keeps the steady state out of shortfall.
*/
- vm_pageout_wakeup_thresh = (vm_cnt.v_free_min / 10) * 11;
+ vmd->vmd_pageout_wakeup_thresh = (vmd->vmd_free_min / 10) * 11;
+
+ /*
+ * Target amount of memory to move out of the laundry queue during a
+ * background laundering. This is proportional to the amount of system
+ * memory.
+ */
+ vmd->vmd_background_launder_target = (vmd->vmd_free_target -
+ vmd->vmd_free_min) / 10;
+}
+
+static void
+vm_pageout_init(void)
+{
+ u_int freecount;
+ int i;
+
+ /*
+ * Initialize some paging parameters.
+ */
+ if (vm_cnt.v_page_count < 2000)
+ vm_pageout_page_count = 8;
+
+ freecount = 0;
+ for (i = 0; i < vm_ndomains; i++) {
+ struct vm_domain *vmd;
+
+ vm_pageout_init_domain(i);
+ vmd = VM_DOMAIN(i);
+ vm_cnt.v_free_reserved += vmd->vmd_free_reserved;
+ vm_cnt.v_free_target += vmd->vmd_free_target;
+ vm_cnt.v_free_min += vmd->vmd_free_min;
+ vm_cnt.v_inactive_target += vmd->vmd_inactive_target;
+ vm_cnt.v_pageout_free_min += vmd->vmd_pageout_free_min;
+ vm_cnt.v_interrupt_free_min += vmd->vmd_interrupt_free_min;
+ vm_cnt.v_free_severe += vmd->vmd_free_severe;
+ freecount += vmd->vmd_free_count;
+ }
/*
* Set interval in seconds for active scan. We want to visit each
@@ -1899,17 +1921,8 @@ vm_pageout_init(void)
if (vm_pageout_update_period == 0)
vm_pageout_update_period = 600;
- /* XXX does not really belong here */
if (vm_page_max_wired == 0)
- vm_page_max_wired = vm_cnt.v_free_count / 3;
-
- /*
- * Target amount of memory to move out of the laundry queue during a
- * background laundering. This is proportional to the amount of system
- * memory.
- */
- vm_background_launder_target = (vm_cnt.v_free_target -
- vm_cnt.v_free_min) / 10;
+ vm_page_max_wired = freecount / 3;
}
/*
@@ -1933,6 +1946,12 @@ vm_pageout(void)
panic("starting pageout for domain %d, error %d\n",
i, error);
}
+ error = kthread_add(vm_pageout_laundry_worker,
+ (void *)(uintptr_t)i, curproc, NULL, 0, 0,
+ "laundry: dom%d", i);
+ if (error != 0)
+ panic("starting laundry for domain %d, error %d",
+ i, error);
}
error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL,
0, 0, "uma");
@@ -1945,14 +1964,16 @@ vm_pageout(void)
* Perform an advisory wakeup of the page daemon.
*/
void
-pagedaemon_wakeup(void)
+pagedaemon_wakeup(int domain)
{
+ struct vm_domain *vmd;
- mtx_assert(&vm_page_queue_free_mtx, MA_NOTOWNED);
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_assert_unlocked(vmd);
- if (!vm_pageout_wanted && curthread->td_proc != pageproc) {
- vm_pageout_wanted = true;
- wakeup(&vm_pageout_wanted);
+ if (!vmd->vmd_pageout_wanted && curthread->td_proc != pageproc) {
+ vmd->vmd_pageout_wanted = true;
+ wakeup(&vmd->vmd_pageout_wanted);
}
}
@@ -1962,22 +1983,26 @@ pagedaemon_wakeup(void)
* This function returns with the free queues mutex unlocked.
*/
void
-pagedaemon_wait(int pri, const char *wmesg)
+pagedaemon_wait(int domain, int pri, const char *wmesg)
{
+ struct vm_domain *vmd;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_assert_locked(vmd);
/*
- * vm_pageout_wanted may have been set by an advisory wakeup, but if the
- * page daemon is running on a CPU, the wakeup will have been lost.
+ * vmd_pageout_wanted may have been set by an advisory wakeup, but if
+ * the page daemon is running on a CPU, the wakeup will have been lost.
* Thus, deliver a potentially spurious wakeup to ensure that the page
* daemon has been notified of the shortage.
*/
- if (!vm_pageout_wanted || !vm_pages_needed) {
- vm_pageout_wanted = true;
- wakeup(&vm_pageout_wanted);
+ if (!vmd->vmd_pageout_wanted || !vmd->vmd_pages_needed) {
+ vmd->vmd_pageout_wanted = true;
+ wakeup(&vmd->vmd_pageout_wanted);
}
- vm_pages_needed = true;
- msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | pri,
+ vmd->vmd_pages_needed = true;
+ vmd->vmd_waiters++;
+ msleep(&vmd->vmd_free_count, vm_domain_free_lockptr(vmd), PDROP | pri,
wmesg, 0);
+ vmd->vmd_waiters--;
}
diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h
index 60ee00d9e820..0894cd1541aa 100644
--- a/sys/vm/vm_pageout.h
+++ b/sys/vm/vm_pageout.h
@@ -74,9 +74,7 @@
*/
extern int vm_page_max_wired;
-extern int vm_pageout_deficit;
extern int vm_pageout_page_count;
-extern bool vm_pages_needed;
#define VM_OOM_MEM 1
#define VM_OOM_SWAPZ 2
@@ -95,12 +93,15 @@ extern bool vm_pages_needed;
* Signal pageout-daemon and wait for it.
*/
-void pagedaemon_wait(int pri, const char *wmesg);
-void pagedaemon_wakeup(void);
+void pagedaemon_wait(int domain, int pri, const char *wmesg);
+void pagedaemon_wakeup(int domain);
#define VM_WAIT vm_wait()
#define VM_WAITPFAULT vm_waitpfault()
void vm_wait(void);
void vm_waitpfault(void);
+void vm_wait_domain(int domain);
+void vm_wait_min(void);
+void vm_wait_severe(void);
#ifdef _KERNEL
int vm_pageout_flush(vm_page_t *, int, int, int, int *, boolean_t *);
diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h
new file mode 100644
index 000000000000..d7bff9bc5eff
--- /dev/null
+++ b/sys/vm/vm_pagequeue.h
@@ -0,0 +1,235 @@
+/*-
+ * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
+ *
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)vm_page.h 8.2 (Berkeley) 12/13/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VM_PAGEQUEUE_
+#define _VM_PAGEQUEUE_
+
+#ifdef _KERNEL
+struct vm_pagequeue {
+ struct mtx pq_mutex;
+ struct pglist pq_pl;
+ int pq_cnt;
+ const char * const pq_name;
+} __aligned(CACHE_LINE_SIZE);
+
+
+struct vm_domain {
+ struct vm_pagequeue vmd_pagequeues[PQ_COUNT];
+ struct mtx_padalign vmd_free_mtx;
+ struct vmem *vmd_kernel_arena;
+ u_int vmd_domain; /* Domain number. */
+ u_int vmd_page_count;
+ long vmd_segs; /* bitmask of the segments */
+
+ /* Paging control variables, locked by domain_free_mtx. */
+ u_int vmd_free_count;
+ boolean_t vmd_oom;
+ int vmd_oom_seq;
+ int vmd_last_active_scan;
+ struct vm_page vmd_laundry_marker;
+ struct vm_page vmd_marker; /* marker for pagedaemon private use */
+ struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */
+
+ int vmd_pageout_pages_needed; /* page daemon waiting for pages? */
+ int vmd_pageout_deficit; /* Estimated number of pages deficit */
+ int vmd_waiters; /* Pageout waiters. */
+ bool vmd_pages_needed; /* Are threads waiting for free pages? */
+ bool vmd_pageout_wanted; /* pageout daemon wait channel */
+ bool vmd_minset; /* Are we in vm_min_domains? */
+ bool vmd_severeset; /* Are we in vm_severe_domains? */
+ int vmd_inactq_scans;
+ enum {
+ VM_LAUNDRY_IDLE = 0,
+ VM_LAUNDRY_BACKGROUND,
+ VM_LAUNDRY_SHORTFALL
+ } vmd_laundry_request;
+
+ /* Paging thresholds. */
+ u_int vmd_background_launder_target;
+ u_int vmd_free_reserved; /* (c) pages reserved for deadlock */
+ u_int vmd_free_target; /* (c) pages desired free */
+ u_int vmd_free_min; /* (c) pages desired free */
+ u_int vmd_inactive_target; /* (c) pages desired inactive */
+ u_int vmd_pageout_free_min; /* (c) min pages reserved for kernel */
+ u_int vmd_pageout_wakeup_thresh;/* (c) min pages to wake pagedaemon */
+ u_int vmd_interrupt_free_min; /* (c) reserved pages for int code */
+ u_int vmd_free_severe; /* (c) severe page depletion point */
+} __aligned(CACHE_LINE_SIZE);
+
+extern struct vm_domain vm_dom[MAXMEMDOM];
+
+#define VM_DOMAIN(n) (&vm_dom[(n)])
+
+#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
+#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
+#define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex)
+#define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex)
+
+#define vm_domain_free_assert_locked(n) \
+ mtx_assert(vm_domain_free_lockptr((n)), MA_OWNED)
+#define vm_domain_free_assert_unlocked(n) \
+ mtx_assert(vm_domain_free_lockptr((n)), MA_NOTOWNED)
+#define vm_domain_free_lock(d) \
+ mtx_lock(vm_domain_free_lockptr((d)))
+#define vm_domain_free_lockptr(d) \
+ (&(d)->vmd_free_mtx)
+#define vm_domain_free_unlock(d) \
+ mtx_unlock(vm_domain_free_lockptr((d)))
+
+static __inline void
+vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
+{
+
+#ifdef notyet
+ vm_pagequeue_assert_locked(pq);
+#endif
+ pq->pq_cnt += addend;
+}
+#define vm_pagequeue_cnt_inc(pq) vm_pagequeue_cnt_add((pq), 1)
+#define vm_pagequeue_cnt_dec(pq) vm_pagequeue_cnt_add((pq), -1)
+
+void vm_domain_set(struct vm_domain *vmd);
+int vm_domain_available(struct vm_domain *vmd, int req, int npages);
+
+/*
+ * vm_pagequeue_domain:
+ *
+ * Return the memory domain the page belongs to.
+ */
+static inline struct vm_domain *
+vm_pagequeue_domain(vm_page_t m)
+{
+
+ return (VM_DOMAIN(vm_phys_domain(m)));
+}
+
+/*
+ * Return the number of pages we need to free-up or cache
+ * A positive number indicates that we do not have enough free pages.
+ */
+static inline int
+vm_paging_target(struct vm_domain *vmd)
+{
+
+ return (vmd->vmd_free_target - vmd->vmd_free_count);
+}
+
+/*
+ * Returns TRUE if the pagedaemon needs to be woken up.
+ */
+static inline int
+vm_paging_needed(struct vm_domain *vmd, u_int free_count)
+{
+
+ return (free_count < vmd->vmd_pageout_wakeup_thresh);
+}
+
+/*
+ * Returns TRUE if the domain is below the min paging target.
+ */
+static inline int
+vm_paging_min(struct vm_domain *vmd)
+{
+
+ return (vmd->vmd_free_min > vmd->vmd_free_count);
+}
+
+/*
+ * Returns TRUE if the domain is below the severe paging target.
+ */
+static inline int
+vm_paging_severe(struct vm_domain *vmd)
+{
+
+ return (vmd->vmd_free_severe > vmd->vmd_free_count);
+}
+
+/*
+ * Return the number of pages we need to launder.
+ * A positive number indicates that we have a shortfall of clean pages.
+ */
+static inline int
+vm_laundry_target(struct vm_domain *vmd)
+{
+
+ return (vm_paging_target(vmd));
+}
+
+static inline u_int
+vm_domain_freecnt_adj(struct vm_domain *vmd, int adj)
+{
+ u_int ret;
+
+ vm_domain_free_assert_locked(vmd);
+ ret = vmd->vmd_free_count += adj;
+ if ((!vmd->vmd_minset && vm_paging_min(vmd)) ||
+ (!vmd->vmd_severeset && vm_paging_severe(vmd)))
+ vm_domain_set(vmd);
+
+ return (ret);
+}
+
+
+#endif /* _KERNEL */
+#endif /* !_VM_PAGEQUEUE_ */
diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
index 3c0f42353c1a..7270d36e452c 100644
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -67,6 +67,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
_Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
"Too many physsegs.");
@@ -653,7 +654,7 @@ vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order)
if (flind < 0)
return (NULL);
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(domain));
fl = &vm_phys_free_queues[domain][flind][pool][0];
for (oind = order; oind < VM_NFREEORDER; oind++) {
m = TAILQ_FIRST(&fl[oind].pl);
@@ -906,8 +907,8 @@ vm_phys_free_pages(vm_page_t m, int order)
m, m->pool));
KASSERT(order < VM_NFREEORDER,
("vm_phys_free_pages: order %d is out of range", order));
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
seg = &vm_phys_segs[m->segind];
+ vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
if (order < VM_NFREEORDER - 1) {
pa = VM_PAGE_TO_PHYS(m);
do {
@@ -945,7 +946,7 @@ vm_phys_free_contig(vm_page_t m, u_long npages)
* Avoid unnecessary coalescing by freeing the pages in the largest
* possible power-of-two-sized subsets.
*/
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(vm_pagequeue_domain(m));
for (;; npages -= n) {
/*
* Unsigned "min" is used here so that "order" is assigned
@@ -1051,14 +1052,13 @@ vm_phys_unfree_page(vm_page_t m)
vm_page_t m_set, m_tmp;
int order;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
-
/*
* First, find the contiguous, power of two-sized set of free
* physical pages containing the given physical page "m" and
* assign it to "m_set".
*/
seg = &vm_phys_segs[m->segind];
+ vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
order < VM_NFREEORDER - 1; ) {
order++;
@@ -1122,7 +1122,7 @@ vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
KASSERT(npages > 0, ("npages is 0"));
KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(domain));
if (low >= high)
return (NULL);
m_run = NULL;
@@ -1167,7 +1167,7 @@ vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages,
KASSERT(npages > 0, ("npages is 0"));
KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
/* Compute the queue that is the best fit for npages. */
for (order = 0; (1 << order) < npages; order++);
/* Search for a run satisfying the specified conditions. */
diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h
index b6a1402d4994..f94b03938533 100644
--- a/sys/vm/vm_phys.h
+++ b/sys/vm/vm_phys.h
@@ -96,12 +96,12 @@ int vm_phys_mem_affinity(int f, int t);
/*
*
- * vm_phys_domidx:
+ * vm_phys_domain:
*
* Return the index of the domain the page belongs to.
*/
static inline int
-vm_phys_domidx(vm_page_t m)
+vm_phys_domain(vm_page_t m)
{
#ifdef NUMA
int domn, segind;
@@ -117,26 +117,5 @@ vm_phys_domidx(vm_page_t m)
#endif
}
-/*
- * vm_phys_domain:
- *
- * Return the memory domain the page belongs to.
- */
-static inline struct vm_domain *
-vm_phys_domain(vm_page_t m)
-{
-
- return (&vm_dom[vm_phys_domidx(m)]);
-}
-
-static inline u_int
-vm_phys_freecnt_adj(vm_page_t m, int adj)
-{
-
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
- vm_phys_domain(m)->vmd_free_count += adj;
- return (vm_cnt.v_free_count += adj);
-}
-
#endif /* _KERNEL */
#endif /* !_VM_PHYS_H_ */
diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c
index 8b37e228f104..da7c36297302 100644
--- a/sys/vm/vm_reserv.c
+++ b/sys/vm/vm_reserv.c
@@ -59,7 +59,9 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
@@ -163,17 +165,21 @@ popmap_is_set(popmap_t popmap[], int i)
* object's list of reservations.
*
* A partially populated reservation can be broken and reclaimed at any time.
+ *
+ * f - vm_domain_free_lock
+ * o - vm_reserv_object_lock
+ * c - constant after boot
*/
struct vm_reserv {
- TAILQ_ENTRY(vm_reserv) partpopq;
- LIST_ENTRY(vm_reserv) objq;
- vm_object_t object; /* containing object */
- vm_pindex_t pindex; /* offset within object */
- vm_page_t pages; /* first page of a superpage */
- int domain; /* NUMA domain */
- int popcnt; /* # of pages in use */
- char inpartpopq;
- popmap_t popmap[NPOPMAP]; /* bit vector of used pages */
+ TAILQ_ENTRY(vm_reserv) partpopq; /* (f) per-domain queue. */
+ LIST_ENTRY(vm_reserv) objq; /* (o, f) object queue */
+ vm_object_t object; /* (o, f) containing object */
+ vm_pindex_t pindex; /* (o, f) offset in object */
+ vm_page_t pages; /* (c) first page */
+ int domain; /* (c) NUMA domain. */
+ int popcnt; /* (f) # of pages in use */
+ char inpartpopq; /* (f) */
+ popmap_t popmap[NPOPMAP]; /* (f) bit vector, used pages */
};
/*
@@ -234,6 +240,25 @@ static long vm_reserv_reclaimed;
SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
&vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations");
+/*
+ * The object lock pool is used to synchronize the rvq. We can not use a
+ * pool mutex because it is required before malloc works.
+ *
+ * The "hash" function could be made faster without divide and modulo.
+ */
+#define VM_RESERV_OBJ_LOCK_COUNT MAXCPU
+
+struct mtx_padalign vm_reserv_object_mtx[VM_RESERV_OBJ_LOCK_COUNT];
+
+#define vm_reserv_object_lock_idx(object) \
+ (((uintptr_t)object / sizeof(*object)) % VM_RESERV_OBJ_LOCK_COUNT)
+#define vm_reserv_object_lock_ptr(object) \
+ &vm_reserv_object_mtx[vm_reserv_object_lock_idx((object))]
+#define vm_reserv_object_lock(object) \
+ mtx_lock(vm_reserv_object_lock_ptr((object)))
+#define vm_reserv_object_unlock(object) \
+ mtx_unlock(vm_reserv_object_lock_ptr((object)))
+
static void vm_reserv_break(vm_reserv_t rv, vm_page_t m);
static void vm_reserv_depopulate(vm_reserv_t rv, int index);
static vm_reserv_t vm_reserv_from_page(vm_page_t m);
@@ -288,12 +313,12 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
counter = 0;
unused_pages = 0;
- mtx_lock(&vm_page_queue_free_mtx);
+ vm_domain_free_lock(VM_DOMAIN(domain));
TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
counter++;
unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
}
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_free_unlock(VM_DOMAIN(domain));
sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n",
domain, level,
unused_pages * ((int)PAGE_SIZE / 1024), counter);
@@ -305,6 +330,49 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
}
/*
+ * Remove a reservation from the object's objq.
+ */
+static void
+vm_reserv_remove(vm_reserv_t rv)
+{
+ vm_object_t object;
+
+ KASSERT(rv->object != NULL,
+ ("vm_reserv_remove: reserv %p is free", rv));
+ KASSERT(!rv->inpartpopq,
+ ("vm_reserv_remove: reserv %p's inpartpopq is TRUE", rv));
+ object = rv->object;
+ vm_reserv_object_lock(object);
+ LIST_REMOVE(rv, objq);
+ rv->object = NULL;
+ vm_reserv_object_unlock(object);
+}
+
+/*
+ * Insert a new reservation into the object's objq.
+ */
+static void
+vm_reserv_insert(vm_reserv_t rv, vm_object_t object, vm_pindex_t pindex)
+{
+ int i;
+
+ KASSERT(rv->object == NULL,
+ ("vm_reserv_insert: reserv %p isn't free", rv));
+ KASSERT(rv->popcnt == 0,
+ ("vm_reserv_insert: reserv %p's popcnt is corrupted", rv));
+ KASSERT(!rv->inpartpopq,
+ ("vm_reserv_insert: reserv %p's inpartpopq is TRUE", rv));
+ for (i = 0; i < NPOPMAP; i++)
+ KASSERT(rv->popmap[i] == 0,
+ ("vm_reserv_insert: reserv %p's popmap is corrupted", rv));
+ vm_reserv_object_lock(object);
+ rv->pindex = pindex;
+ rv->object = object;
+ LIST_INSERT_HEAD(&object->rvq, rv, objq);
+ vm_reserv_object_unlock(object);
+}
+
+/*
* Reduces the given reservation's population count. If the population count
* becomes zero, the reservation is destroyed. Additionally, moves the
* reservation to the tail of the partially populated reservation queue if the
@@ -316,7 +384,7 @@ static void
vm_reserv_depopulate(vm_reserv_t rv, int index)
{
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
KASSERT(rv->object != NULL,
("vm_reserv_depopulate: reserv %p is free", rv));
KASSERT(popmap_is_set(rv->popmap, index),
@@ -339,9 +407,7 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
popmap_clear(rv->popmap, index);
rv->popcnt--;
if (rv->popcnt == 0) {
- LIST_REMOVE(rv, objq);
- rv->object = NULL;
- rv->domain = -1;
+ vm_reserv_remove(rv);
vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
vm_reserv_freed++;
} else {
@@ -361,6 +427,43 @@ vm_reserv_from_page(vm_page_t m)
}
/*
+ * Returns an existing reservation or NULL and initialized successor pointer.
+ */
+static vm_reserv_t
+vm_reserv_from_object(vm_object_t object, vm_pindex_t pindex,
+ vm_page_t mpred, vm_page_t *msuccp)
+{
+ vm_reserv_t rv;
+ vm_page_t msucc;
+
+ msucc = NULL;
+ if (mpred != NULL) {
+ KASSERT(mpred->object == object,
+ ("vm_reserv_from_object: object doesn't contain mpred"));
+ KASSERT(mpred->pindex < pindex,
+ ("vm_reserv_from_object: mpred doesn't precede pindex"));
+ rv = vm_reserv_from_page(mpred);
+ if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
+ goto found;
+ msucc = TAILQ_NEXT(mpred, listq);
+ } else
+ msucc = TAILQ_FIRST(&object->memq);
+ if (msucc != NULL) {
+ KASSERT(msucc->pindex > pindex,
+ ("vm_reserv_from_object: msucc doesn't succeed pindex"));
+ rv = vm_reserv_from_page(msucc);
+ if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
+ goto found;
+ }
+ rv = NULL;
+
+found:
+ *msuccp = msucc;
+
+ return (rv);
+}
+
+/*
* Returns TRUE if the given reservation contains the given page index and
* FALSE otherwise.
*/
@@ -381,7 +484,7 @@ static void
vm_reserv_populate(vm_reserv_t rv, int index)
{
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
KASSERT(rv->object != NULL,
("vm_reserv_populate: reserv %p is free", rv));
KASSERT(popmap_is_clear(rv->popmap, index),
@@ -423,6 +526,100 @@ vm_reserv_populate(vm_reserv_t rv, int index)
* The object and free page queue must be locked.
*/
vm_page_t
+vm_reserv_extend_contig(int req, vm_object_t object, vm_pindex_t pindex,
+ int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
+ u_long alignment, vm_paddr_t boundary, vm_page_t mpred)
+{
+ struct vm_domain *vmd;
+ vm_paddr_t pa, size;
+ vm_page_t m, msucc;
+ vm_reserv_t rv;
+ int i, index;
+
+ VM_OBJECT_ASSERT_WLOCKED(object);
+ KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
+
+ /*
+ * Is a reservation fundamentally impossible?
+ */
+ if (pindex < VM_RESERV_INDEX(object, pindex) ||
+ pindex + npages > object->size || object->resident_page_count == 0)
+ return (NULL);
+
+ /*
+ * All reservations of a particular size have the same alignment.
+ * Assuming that the first page is allocated from a reservation, the
+ * least significant bits of its physical address can be determined
+ * from its offset from the beginning of the reservation and the size
+ * of the reservation.
+ *
+ * Could the specified index within a reservation of the smallest
+ * possible size satisfy the alignment and boundary requirements?
+ */
+ pa = VM_RESERV_INDEX(object, pindex) << PAGE_SHIFT;
+ if ((pa & (alignment - 1)) != 0)
+ return (NULL);
+ size = npages << PAGE_SHIFT;
+ if (((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0)
+ return (NULL);
+
+ /*
+ * Look for an existing reservation.
+ */
+ rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
+ if (rv == NULL)
+ return (NULL);
+ KASSERT(object != kernel_object || rv->domain == domain,
+ ("vm_reserv_extend_contig: Domain mismatch from reservation."));
+ index = VM_RESERV_INDEX(object, pindex);
+ /* Does the allocation fit within the reservation? */
+ if (index + npages > VM_LEVEL_0_NPAGES)
+ return (NULL);
+ domain = rv->domain;
+ vmd = VM_DOMAIN(domain);
+ vm_domain_free_lock(vmd);
+ if (rv->object != object || !vm_domain_available(vmd, req, npages)) {
+ m = NULL;
+ goto out;
+ }
+ m = &rv->pages[index];
+ pa = VM_PAGE_TO_PHYS(m);
+ if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 ||
+ ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) {
+ m = NULL;
+ goto out;
+ }
+ /* Handle vm_page_rename(m, new_object, ...). */
+ for (i = 0; i < npages; i++) {
+ if (popmap_is_set(rv->popmap, index + i)) {
+ m = NULL;
+ goto out;
+ }
+ }
+ for (i = 0; i < npages; i++)
+ vm_reserv_populate(rv, index + i);
+ vm_domain_freecnt_adj(vmd, -npages);
+out:
+ vm_domain_free_unlock(vmd);
+ return (m);
+}
+
+/*
+ * Allocates a contiguous set of physical pages of the given size "npages"
+ * from existing or newly created reservations. All of the physical pages
+ * must be at or above the given physical address "low" and below the given
+ * physical address "high". The given value "alignment" determines the
+ * alignment of the first physical page in the set. If the given value
+ * "boundary" is non-zero, then the set of physical pages cannot cross any
+ * physical address boundary that is a multiple of that value. Both
+ * "alignment" and "boundary" must be a power of two.
+ *
+ * The page "mpred" must immediately precede the offset "pindex" within the
+ * specified object.
+ *
+ * The object and free page queue must be locked.
+ */
+vm_page_t
vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_page_t mpred)
@@ -434,7 +631,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
u_long allocpages, maxpages, minpages;
int i, index, n;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(domain));
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
@@ -463,52 +660,48 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
return (NULL);
/*
- * Look for an existing reservation.
+ * Callers should've extended an existing reservation prior to
+ * calling this function. If a reservation exists it is
+ * incompatible with the allocation.
*/
- if (mpred != NULL) {
- KASSERT(mpred->object == object,
- ("vm_reserv_alloc_contig: object doesn't contain mpred"));
- KASSERT(mpred->pindex < pindex,
- ("vm_reserv_alloc_contig: mpred doesn't precede pindex"));
- rv = vm_reserv_from_page(mpred);
- if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
- goto found;
- msucc = TAILQ_NEXT(mpred, listq);
- } else
- msucc = TAILQ_FIRST(&object->memq);
- if (msucc != NULL) {
- KASSERT(msucc->pindex > pindex,
- ("vm_reserv_alloc_contig: msucc doesn't succeed pindex"));
- rv = vm_reserv_from_page(msucc);
- if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
- goto found;
- }
+ rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
+ if (rv != NULL)
+ return (NULL);
/*
* Could at least one reservation fit between the first index to the
* left that can be used ("leftcap") and the first index to the right
* that cannot be used ("rightcap")?
+ *
+ * We must synchronize with the reserv object lock to protect the
+ * pindex/object of the resulting reservations against rename while
+ * we are inspecting.
*/
first = pindex - VM_RESERV_INDEX(object, pindex);
+ minpages = VM_RESERV_INDEX(object, pindex) + npages;
+ maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES);
+ allocpages = maxpages;
+ vm_reserv_object_lock(object);
if (mpred != NULL) {
if ((rv = vm_reserv_from_page(mpred))->object != object)
leftcap = mpred->pindex + 1;
else
leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
- if (leftcap > first)
+ if (leftcap > first) {
+ vm_reserv_object_unlock(object);
return (NULL);
+ }
}
- minpages = VM_RESERV_INDEX(object, pindex) + npages;
- maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES);
- allocpages = maxpages;
if (msucc != NULL) {
if ((rv = vm_reserv_from_page(msucc))->object != object)
rightcap = msucc->pindex;
else
rightcap = rv->pindex;
if (first + maxpages > rightcap) {
- if (maxpages == VM_LEVEL_0_NPAGES)
+ if (maxpages == VM_LEVEL_0_NPAGES) {
+ vm_reserv_object_unlock(object);
return (NULL);
+ }
/*
* At least one reservation will fit between "leftcap"
@@ -519,6 +712,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
allocpages = minpages;
}
}
+ vm_reserv_object_unlock(object);
/*
* Would the last new reservation extend past the end of the object?
@@ -549,7 +743,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0);
if (m == NULL)
return (NULL);
- KASSERT(vm_phys_domidx(m) == domain,
+ KASSERT(vm_phys_domain(m) == domain,
("vm_reserv_alloc_contig: Page domain does not match requested."));
/*
@@ -565,22 +759,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
KASSERT(rv->pages == m,
("vm_reserv_alloc_contig: reserv %p's pages is corrupted",
rv));
- KASSERT(rv->object == NULL,
- ("vm_reserv_alloc_contig: reserv %p isn't free", rv));
- LIST_INSERT_HEAD(&object->rvq, rv, objq);
- rv->object = object;
- rv->pindex = first;
- rv->domain = domain;
- KASSERT(rv->popcnt == 0,
- ("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted",
- rv));
- KASSERT(!rv->inpartpopq,
- ("vm_reserv_alloc_contig: reserv %p's inpartpopq is TRUE",
- rv));
- for (i = 0; i < NPOPMAP; i++)
- KASSERT(rv->popmap[i] == 0,
- ("vm_reserv_alloc_contig: reserv %p's popmap is corrupted",
- rv));
+ vm_reserv_insert(rv, object, first);
n = ulmin(VM_LEVEL_0_NPAGES - index, npages);
for (i = 0; i < n; i++)
vm_reserv_populate(rv, index + i);
@@ -594,31 +773,70 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
allocpages -= VM_LEVEL_0_NPAGES;
} while (allocpages >= VM_LEVEL_0_NPAGES);
return (m_ret);
+}
+
+/*
+ * Attempts to extend an existing reservation and allocate the page to the
+ * object.
+ *
+ * The page "mpred" must immediately precede the offset "pindex" within the
+ * specified object.
+ *
+ * The object must be locked.
+ */
+vm_page_t
+vm_reserv_extend(int req, vm_object_t object, vm_pindex_t pindex, int domain,
+ vm_page_t mpred)
+{
+ struct vm_domain *vmd;
+ vm_page_t m, msucc;
+ vm_reserv_t rv;
+ int index, free_count;
+
+ VM_OBJECT_ASSERT_WLOCKED(object);
/*
- * Found a matching reservation.
+ * Could a reservation currently exist?
*/
-found:
- index = VM_RESERV_INDEX(object, pindex);
- /* Does the allocation fit within the reservation? */
- if (index + npages > VM_LEVEL_0_NPAGES)
+ if (pindex < VM_RESERV_INDEX(object, pindex) ||
+ pindex >= object->size || object->resident_page_count == 0)
return (NULL);
- m = &rv->pages[index];
- pa = VM_PAGE_TO_PHYS(m);
- if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 ||
- ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0)
+
+ /*
+ * Look for an existing reservation.
+ */
+ rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
+ if (rv == NULL)
return (NULL);
- /* Handle vm_page_rename(m, new_object, ...). */
- for (i = 0; i < npages; i++)
- if (popmap_is_set(rv->popmap, index + i))
- return (NULL);
- for (i = 0; i < npages; i++)
- vm_reserv_populate(rv, index + i);
+
+ KASSERT(object != kernel_object || rv->domain == domain,
+ ("vm_reserv_extend: Domain mismatch from reservation."));
+ domain = rv->domain;
+ vmd = VM_DOMAIN(domain);
+ index = VM_RESERV_INDEX(object, pindex);
+ m = &rv->pages[index];
+ vm_domain_free_lock(vmd);
+ if (vm_domain_available(vmd, req, 1) == 0 ||
+ /* Handle reclaim race. */
+ rv->object != object ||
+ /* Handle vm_page_rename(m, new_object, ...). */
+ popmap_is_set(rv->popmap, index))
+ m = NULL;
+ if (m != NULL) {
+ vm_reserv_populate(rv, index);
+ free_count = vm_domain_freecnt_adj(vmd, -1);
+ } else
+ free_count = vmd->vmd_free_count;
+ vm_domain_free_unlock(vmd);
+
+ if (vm_paging_needed(vmd, free_count))
+ pagedaemon_wakeup(domain);
+
return (m);
}
/*
- * Allocates a page from an existing or newly created reservation.
+ * Allocates a page from an existing reservation.
*
* The page "mpred" must immediately precede the offset "pindex" within the
* specified object.
@@ -632,9 +850,9 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
vm_page_t m, msucc;
vm_pindex_t first, leftcap, rightcap;
vm_reserv_t rv;
- int i, index;
+ int index;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(domain));
VM_OBJECT_ASSERT_WLOCKED(object);
/*
@@ -645,48 +863,45 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
return (NULL);
/*
- * Look for an existing reservation.
+ * Callers should've extended an existing reservation prior to
+ * calling this function. If a reservation exists it is
+ * incompatible with the allocation.
*/
- if (mpred != NULL) {
- KASSERT(mpred->object == object,
- ("vm_reserv_alloc_page: object doesn't contain mpred"));
- KASSERT(mpred->pindex < pindex,
- ("vm_reserv_alloc_page: mpred doesn't precede pindex"));
- rv = vm_reserv_from_page(mpred);
- if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
- goto found;
- msucc = TAILQ_NEXT(mpred, listq);
- } else
- msucc = TAILQ_FIRST(&object->memq);
- if (msucc != NULL) {
- KASSERT(msucc->pindex > pindex,
- ("vm_reserv_alloc_page: msucc doesn't succeed pindex"));
- rv = vm_reserv_from_page(msucc);
- if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
- goto found;
- }
+ rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
+ if (rv != NULL)
+ return (NULL);
/*
* Could a reservation fit between the first index to the left that
* can be used and the first index to the right that cannot be used?
+ *
+ * We must synchronize with the reserv object lock to protect the
+ * pindex/object of the resulting reservations against rename while
+ * we are inspecting.
*/
first = pindex - VM_RESERV_INDEX(object, pindex);
+ vm_reserv_object_lock(object);
if (mpred != NULL) {
if ((rv = vm_reserv_from_page(mpred))->object != object)
leftcap = mpred->pindex + 1;
else
leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
- if (leftcap > first)
+ if (leftcap > first) {
+ vm_reserv_object_unlock(object);
return (NULL);
+ }
}
if (msucc != NULL) {
if ((rv = vm_reserv_from_page(msucc))->object != object)
rightcap = msucc->pindex;
else
rightcap = rv->pindex;
- if (first + VM_LEVEL_0_NPAGES > rightcap)
+ if (first + VM_LEVEL_0_NPAGES > rightcap) {
+ vm_reserv_object_unlock(object);
return (NULL);
+ }
}
+ vm_reserv_object_unlock(object);
/*
* Would a new reservation extend past the end of the object?
@@ -712,37 +927,10 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
rv = vm_reserv_from_page(m);
KASSERT(rv->pages == m,
("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv));
- KASSERT(rv->object == NULL,
- ("vm_reserv_alloc_page: reserv %p isn't free", rv));
- LIST_INSERT_HEAD(&object->rvq, rv, objq);
- rv->object = object;
- rv->pindex = first;
- rv->domain = domain;
- KASSERT(rv->popcnt == 0,
- ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv));
- KASSERT(!rv->inpartpopq,
- ("vm_reserv_alloc_page: reserv %p's inpartpopq is TRUE", rv));
- for (i = 0; i < NPOPMAP; i++)
- KASSERT(rv->popmap[i] == 0,
- ("vm_reserv_alloc_page: reserv %p's popmap is corrupted",
- rv));
+ vm_reserv_insert(rv, object, first);
index = VM_RESERV_INDEX(object, pindex);
vm_reserv_populate(rv, index);
return (&rv->pages[index]);
-
- /*
- * Found a matching reservation.
- */
-found:
- index = VM_RESERV_INDEX(object, pindex);
- m = &rv->pages[index];
- KASSERT(object != kernel_object || vm_phys_domidx(m) == domain,
- ("vm_reserv_alloc_page: Domain mismatch from reservation."));
- /* Handle vm_page_rename(m, new_object, ...). */
- if (popmap_is_set(rv->popmap, index))
- return (NULL);
- vm_reserv_populate(rv, index);
- return (m);
}
/*
@@ -759,14 +947,8 @@ vm_reserv_break(vm_reserv_t rv, vm_page_t m)
{
int begin_zeroes, hi, i, lo;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
- KASSERT(rv->object != NULL,
- ("vm_reserv_break: reserv %p is free", rv));
- KASSERT(!rv->inpartpopq,
- ("vm_reserv_break: reserv %p's inpartpopq is TRUE", rv));
- LIST_REMOVE(rv, objq);
- rv->object = NULL;
- rv->domain = -1;
+ vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+ vm_reserv_remove(rv);
if (m != NULL) {
/*
* Since the reservation is being broken, there is no harm in
@@ -830,9 +1012,26 @@ void
vm_reserv_break_all(vm_object_t object)
{
vm_reserv_t rv;
+ struct vm_domain *vmd;
- mtx_lock(&vm_page_queue_free_mtx);
+ /*
+ * This access of object->rvq is unsynchronized so that the
+ * object rvq lock can nest after the domain_free lock. We
+ * must check for races in the results. However, the object
+ * lock prevents new additions, so we are guaranteed that when
+ * it returns NULL the object is properly empty.
+ */
+ vmd = NULL;
while ((rv = LIST_FIRST(&object->rvq)) != NULL) {
+ if (vmd != VM_DOMAIN(rv->domain)) {
+ if (vmd != NULL)
+ vm_domain_free_unlock(vmd);
+ vmd = VM_DOMAIN(rv->domain);
+ vm_domain_free_lock(vmd);
+ }
+ /* Reclaim race. */
+ if (rv->object != object)
+ continue;
KASSERT(rv->object == object,
("vm_reserv_break_all: reserv %p is corrupted", rv));
if (rv->inpartpopq) {
@@ -841,7 +1040,8 @@ vm_reserv_break_all(vm_object_t object)
}
vm_reserv_break(rv, NULL);
}
- mtx_unlock(&vm_page_queue_free_mtx);
+ if (vmd != NULL)
+ vm_domain_free_unlock(vmd);
}
/*
@@ -855,8 +1055,8 @@ vm_reserv_free_page(vm_page_t m)
{
vm_reserv_t rv;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
rv = vm_reserv_from_page(m);
+ vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
if (rv->object == NULL)
return (FALSE);
vm_reserv_depopulate(rv, m - rv->pages);
@@ -886,6 +1086,8 @@ vm_reserv_init(void)
while (paddr + VM_LEVEL_0_SIZE <= seg->end) {
vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages =
PHYS_TO_VM_PAGE(paddr);
+ vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].domain =
+ seg->domain;
paddr += VM_LEVEL_0_SIZE;
}
}
@@ -902,8 +1104,8 @@ vm_reserv_is_page_free(vm_page_t m)
{
vm_reserv_t rv;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
rv = vm_reserv_from_page(m);
+ vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
if (rv->object == NULL)
return (false);
return (popmap_is_clear(rv->popmap, m - rv->pages));
@@ -945,7 +1147,7 @@ static void
vm_reserv_reclaim(vm_reserv_t rv)
{
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
KASSERT(rv->inpartpopq,
("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv));
KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
@@ -969,7 +1171,7 @@ vm_reserv_reclaim_inactive(int domain)
{
vm_reserv_t rv;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(domain));
if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) {
vm_reserv_reclaim(rv);
return (TRUE);
@@ -993,7 +1195,7 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm_paddr_t low,
vm_reserv_t rv;
int hi, i, lo, low_index, next_free;
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ vm_domain_free_assert_locked(VM_DOMAIN(domain));
if (npages > VM_LEVEL_0_NPAGES - 1)
return (FALSE);
size = npages << PAGE_SHIFT;
@@ -1084,14 +1286,19 @@ vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object,
VM_OBJECT_ASSERT_WLOCKED(new_object);
rv = vm_reserv_from_page(m);
if (rv->object == old_object) {
- mtx_lock(&vm_page_queue_free_mtx);
+ vm_domain_free_lock(VM_DOMAIN(rv->domain));
if (rv->object == old_object) {
+ vm_reserv_object_lock(old_object);
+ rv->object = NULL;
LIST_REMOVE(rv, objq);
- LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
+ vm_reserv_object_unlock(old_object);
+ vm_reserv_object_lock(new_object);
rv->object = new_object;
rv->pindex -= old_object_offset;
+ LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
+ vm_reserv_object_unlock(new_object);
}
- mtx_unlock(&vm_page_queue_free_mtx);
+ vm_domain_free_unlock(VM_DOMAIN(rv->domain));
}
}
@@ -1121,6 +1328,7 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
{
vm_paddr_t new_end;
size_t size;
+ int i;
/*
* Calculate the size (in bytes) of the reservation array. Round up
@@ -1140,6 +1348,10 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
VM_PROT_READ | VM_PROT_WRITE);
bzero(vm_reserv_array, size);
+ for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++)
+ mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL,
+ MTX_DEF);
+
/*
* Return the next available physical address.
*/
diff --git a/sys/vm/vm_reserv.h b/sys/vm/vm_reserv.h
index 9be3a73ea252..91d2bf6aa389 100644
--- a/sys/vm/vm_reserv.h
+++ b/sys/vm/vm_reserv.h
@@ -50,8 +50,14 @@
vm_page_t vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex,
int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
u_long alignment, vm_paddr_t boundary, vm_page_t mpred);
+vm_page_t vm_reserv_extend_contig(int req, vm_object_t object,
+ vm_pindex_t pindex, int domain, u_long npages,
+ vm_paddr_t low, vm_paddr_t high, u_long alignment,
+ vm_paddr_t boundary, vm_page_t mpred);
vm_page_t vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex,
int domain, vm_page_t mpred);
+vm_page_t vm_reserv_extend(int req, vm_object_t object,
+ vm_pindex_t pindex, int domain, vm_page_t mpred);
void vm_reserv_break_all(vm_object_t object);
boolean_t vm_reserv_free_page(vm_page_t m);
void vm_reserv_init(void);
diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c
index ea76c1ee0d69..97d9eb474e73 100644
--- a/sys/vm/vm_swapout.c
+++ b/sys/vm/vm_swapout.c
@@ -650,7 +650,7 @@ swapper(void)
loop:
if (vm_page_count_min()) {
- VM_WAIT;
+ vm_wait_min();
goto loop;
}
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index 966897243814..ca982eb52fc2 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -1167,7 +1167,7 @@ vnode_pager_putpages(vm_object_t object, vm_page_t *m, int count,
* daemon up. This should be probably be addressed XXX.
*/
- if (vm_cnt.v_free_count < vm_cnt.v_pageout_free_min)
+ if (vm_page_count_min())
flags |= VM_PAGER_PUT_SYNC;
/*