diff options
author | Andrew Gallatin <gallatin@FreeBSD.org> | 2018-10-01 14:14:21 +0000 |
---|---|---|
committer | Andrew Gallatin <gallatin@FreeBSD.org> | 2018-10-01 14:14:21 +0000 |
commit | 30c5525b3cd28170e736202510ab64d51aa703bf (patch) | |
tree | bc1ffae70e460a396beda9f42a5fb363ab5c13b5 /sys/vm | |
parent | 15a087e55119214fbe129fbbef9a4e49bab6bc90 (diff) | |
download | src-30c5525b3cd28170e736202510ab64d51aa703bf.tar.gz src-30c5525b3cd28170e736202510ab64d51aa703bf.zip |
Allow empty NUMA memory domains to support Threadripper2
The AMD Threadripper 2990WX is basically a slightly crippled Epyc.
Rather than having 4 memory controllers, one per NUMA domain, it has
only 2 memory controllers enabled. This means that only 2 of the
4 NUMA domains can be populated with physical memory, and the
others are empty.
Add support to FreeBSD for empty NUMA domains by:
- creating empty memory domains when parsing the SRAT table,
rather than failing to parse the table
- not running the pageout deamon threads in empty domains
- adding defensive code to UMA to avoid allocating from empty domains
- adding defensive code to cpuset to avoid binding to an empty domain
Thanks to Jeff for suggesting this strategy.
Reviewed by: alc, markj
Approved by: re (gjb@)
Differential Revision: https://reviews.freebsd.org/D1683
Notes
Notes:
svn path=/head/; revision=339043
Diffstat (limited to 'sys/vm')
-rw-r--r-- | sys/vm/uma_core.c | 42 | ||||
-rw-r--r-- | sys/vm/vm_kern.c | 2 | ||||
-rw-r--r-- | sys/vm/vm_pageout.c | 7 | ||||
-rw-r--r-- | sys/vm/vm_pagequeue.h | 3 |
4 files changed, 45 insertions, 9 deletions
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 937e78452021..837eb0787915 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_pageout.h> #include <vm/vm_param.h> #include <vm/vm_phys.h> +#include <vm/vm_pagequeue.h> #include <vm/vm_map.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> @@ -2469,9 +2470,11 @@ zalloc_start: if (bucket != NULL) bucket_free(zone, bucket, udata); - if (zone->uz_flags & UMA_ZONE_NUMA) + if (zone->uz_flags & UMA_ZONE_NUMA) { domain = PCPU_GET(domain); - else + if (VM_DOMAIN_EMPTY(domain)) + domain = UMA_ANYDOMAIN; + } else domain = UMA_ANYDOMAIN; /* Short-circuit for zones without buckets and low memory. */ @@ -2647,7 +2650,11 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, int flags) rdomain = 0; rr = rdomain == UMA_ANYDOMAIN; if (rr) { - keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains; + start = keg->uk_cursor; + do { + keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains; + domain = keg->uk_cursor; + } while (VM_DOMAIN_EMPTY(domain) && domain != start); domain = start = keg->uk_cursor; /* Only block on the second pass. */ if ((flags & (M_WAITOK | M_NOVM)) == M_WAITOK) @@ -2698,8 +2705,11 @@ again: LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); return (slab); } - if (rr) - domain = (domain + 1) % vm_ndomains; + if (rr) { + do { + domain = (domain + 1) % vm_ndomains; + } while (VM_DOMAIN_EMPTY(domain) && domain != start); + } } while (domain != start); /* Retry domain scan with blocking. */ @@ -2903,6 +2913,8 @@ zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags) uma_bucket_t bucket; int max; + CTR1(KTR_UMA, "zone_alloc:_bucket domain %d)", domain); + /* Don't wait for buckets, preserve caller's NOVM setting. */ bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM)); if (bucket == NULL) @@ -2970,6 +2982,11 @@ zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags) item = NULL; + if (domain != UMA_ANYDOMAIN) { + /* avoid allocs targeting empty domains */ + if (VM_DOMAIN_EMPTY(domain)) + domain = UMA_ANYDOMAIN; + } if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1) goto fail; atomic_add_long(&zone->uz_allocs, 1); @@ -3139,9 +3156,11 @@ zfree_start: /* We are no longer associated with this CPU. */ critical_exit(); - if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) + if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) { domain = PCPU_GET(domain); - else + if (VM_DOMAIN_EMPTY(domain)) + domain = UMA_ANYDOMAIN; + } else domain = 0; zdom = &zone->uz_domain[0]; @@ -3588,7 +3607,9 @@ uma_prealloc(uma_zone_t zone, int items) dom = &keg->uk_domain[slab->us_domain]; LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link); slabs--; - domain = (domain + 1) % vm_ndomains; + do { + domain = (domain + 1) % vm_ndomains; + } while (VM_DOMAIN_EMPTY(domain)); } KEG_UNLOCK(keg); } @@ -3678,6 +3699,11 @@ uma_large_malloc_domain(vm_size_t size, int domain, int wait) vm_offset_t addr; uma_slab_t slab; + if (domain != UMA_ANYDOMAIN) { + /* avoid allocs targeting empty domains */ + if (VM_DOMAIN_EMPTY(domain)) + domain = UMA_ANYDOMAIN; + } slab = zone_alloc_item(slabzone, NULL, domain, wait); if (slab == NULL) return (NULL); diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 9774e0b4f22d..88fbc74848df 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -502,6 +502,8 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) */ if (vm_ndomains > 1) { domain = (addr >> KVA_QUANTUM_SHIFT) % vm_ndomains; + while (VM_DOMAIN_EMPTY(domain)) + domain++; next = roundup2(addr + 1, KVA_QUANTUM); if (next > end || next < start) next = end; diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index c084de7aca44..5c309a04a402 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -2082,6 +2082,13 @@ vm_pageout(void) if (error != 0) panic("starting laundry for domain 0, error %d", error); for (i = 1; i < vm_ndomains; i++) { + if (VM_DOMAIN_EMPTY(i)) { + if (bootverbose) + printf("domain %d empty; skipping pageout\n", + i); + continue; + } + error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i, curproc, NULL, 0, 0, "dom%d", i); if (error != 0) { diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h index ac04f2b3ea61..758e51d8ef6e 100644 --- a/sys/vm/vm_pagequeue.h +++ b/sys/vm/vm_pagequeue.h @@ -151,7 +151,8 @@ struct vm_domain { extern struct vm_domain vm_dom[MAXMEMDOM]; -#define VM_DOMAIN(n) (&vm_dom[(n)]) +#define VM_DOMAIN(n) (&vm_dom[(n)]) +#define VM_DOMAIN_EMPTY(n) (vm_dom[(n)].vmd_page_count == 0) #define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED) #define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex) |