aboutsummaryrefslogtreecommitdiff
path: root/sys/vm
diff options
context:
space:
mode:
authorAndrew Gallatin <gallatin@FreeBSD.org>2018-10-01 14:14:21 +0000
committerAndrew Gallatin <gallatin@FreeBSD.org>2018-10-01 14:14:21 +0000
commit30c5525b3cd28170e736202510ab64d51aa703bf (patch)
treebc1ffae70e460a396beda9f42a5fb363ab5c13b5 /sys/vm
parent15a087e55119214fbe129fbbef9a4e49bab6bc90 (diff)
downloadsrc-30c5525b3cd28170e736202510ab64d51aa703bf.tar.gz
src-30c5525b3cd28170e736202510ab64d51aa703bf.zip
Allow empty NUMA memory domains to support Threadripper2
The AMD Threadripper 2990WX is basically a slightly crippled Epyc. Rather than having 4 memory controllers, one per NUMA domain, it has only 2 memory controllers enabled. This means that only 2 of the 4 NUMA domains can be populated with physical memory, and the others are empty. Add support to FreeBSD for empty NUMA domains by: - creating empty memory domains when parsing the SRAT table, rather than failing to parse the table - not running the pageout deamon threads in empty domains - adding defensive code to UMA to avoid allocating from empty domains - adding defensive code to cpuset to avoid binding to an empty domain Thanks to Jeff for suggesting this strategy. Reviewed by: alc, markj Approved by: re (gjb@) Differential Revision: https://reviews.freebsd.org/D1683
Notes
Notes: svn path=/head/; revision=339043
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/uma_core.c42
-rw-r--r--sys/vm/vm_kern.c2
-rw-r--r--sys/vm/vm_pageout.c7
-rw-r--r--sys/vm/vm_pagequeue.h3
4 files changed, 45 insertions, 9 deletions
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 937e78452021..837eb0787915 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_pageout.h>
#include <vm/vm_param.h>
#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
@@ -2469,9 +2470,11 @@ zalloc_start:
if (bucket != NULL)
bucket_free(zone, bucket, udata);
- if (zone->uz_flags & UMA_ZONE_NUMA)
+ if (zone->uz_flags & UMA_ZONE_NUMA) {
domain = PCPU_GET(domain);
- else
+ if (VM_DOMAIN_EMPTY(domain))
+ domain = UMA_ANYDOMAIN;
+ } else
domain = UMA_ANYDOMAIN;
/* Short-circuit for zones without buckets and low memory. */
@@ -2647,7 +2650,11 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, int flags)
rdomain = 0;
rr = rdomain == UMA_ANYDOMAIN;
if (rr) {
- keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains;
+ start = keg->uk_cursor;
+ do {
+ keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains;
+ domain = keg->uk_cursor;
+ } while (VM_DOMAIN_EMPTY(domain) && domain != start);
domain = start = keg->uk_cursor;
/* Only block on the second pass. */
if ((flags & (M_WAITOK | M_NOVM)) == M_WAITOK)
@@ -2698,8 +2705,11 @@ again:
LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
return (slab);
}
- if (rr)
- domain = (domain + 1) % vm_ndomains;
+ if (rr) {
+ do {
+ domain = (domain + 1) % vm_ndomains;
+ } while (VM_DOMAIN_EMPTY(domain) && domain != start);
+ }
} while (domain != start);
/* Retry domain scan with blocking. */
@@ -2903,6 +2913,8 @@ zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
uma_bucket_t bucket;
int max;
+ CTR1(KTR_UMA, "zone_alloc:_bucket domain %d)", domain);
+
/* Don't wait for buckets, preserve caller's NOVM setting. */
bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
if (bucket == NULL)
@@ -2970,6 +2982,11 @@ zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
item = NULL;
+ if (domain != UMA_ANYDOMAIN) {
+ /* avoid allocs targeting empty domains */
+ if (VM_DOMAIN_EMPTY(domain))
+ domain = UMA_ANYDOMAIN;
+ }
if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
goto fail;
atomic_add_long(&zone->uz_allocs, 1);
@@ -3139,9 +3156,11 @@ zfree_start:
/* We are no longer associated with this CPU. */
critical_exit();
- if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
+ if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) {
domain = PCPU_GET(domain);
- else
+ if (VM_DOMAIN_EMPTY(domain))
+ domain = UMA_ANYDOMAIN;
+ } else
domain = 0;
zdom = &zone->uz_domain[0];
@@ -3588,7 +3607,9 @@ uma_prealloc(uma_zone_t zone, int items)
dom = &keg->uk_domain[slab->us_domain];
LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
slabs--;
- domain = (domain + 1) % vm_ndomains;
+ do {
+ domain = (domain + 1) % vm_ndomains;
+ } while (VM_DOMAIN_EMPTY(domain));
}
KEG_UNLOCK(keg);
}
@@ -3678,6 +3699,11 @@ uma_large_malloc_domain(vm_size_t size, int domain, int wait)
vm_offset_t addr;
uma_slab_t slab;
+ if (domain != UMA_ANYDOMAIN) {
+ /* avoid allocs targeting empty domains */
+ if (VM_DOMAIN_EMPTY(domain))
+ domain = UMA_ANYDOMAIN;
+ }
slab = zone_alloc_item(slabzone, NULL, domain, wait);
if (slab == NULL)
return (NULL);
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 9774e0b4f22d..88fbc74848df 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -502,6 +502,8 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags)
*/
if (vm_ndomains > 1) {
domain = (addr >> KVA_QUANTUM_SHIFT) % vm_ndomains;
+ while (VM_DOMAIN_EMPTY(domain))
+ domain++;
next = roundup2(addr + 1, KVA_QUANTUM);
if (next > end || next < start)
next = end;
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index c084de7aca44..5c309a04a402 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -2082,6 +2082,13 @@ vm_pageout(void)
if (error != 0)
panic("starting laundry for domain 0, error %d", error);
for (i = 1; i < vm_ndomains; i++) {
+ if (VM_DOMAIN_EMPTY(i)) {
+ if (bootverbose)
+ printf("domain %d empty; skipping pageout\n",
+ i);
+ continue;
+ }
+
error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,
curproc, NULL, 0, 0, "dom%d", i);
if (error != 0) {
diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h
index ac04f2b3ea61..758e51d8ef6e 100644
--- a/sys/vm/vm_pagequeue.h
+++ b/sys/vm/vm_pagequeue.h
@@ -151,7 +151,8 @@ struct vm_domain {
extern struct vm_domain vm_dom[MAXMEMDOM];
-#define VM_DOMAIN(n) (&vm_dom[(n)])
+#define VM_DOMAIN(n) (&vm_dom[(n)])
+#define VM_DOMAIN_EMPTY(n) (vm_dom[(n)].vmd_page_count == 0)
#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)