aboutsummaryrefslogtreecommitdiff
path: root/sys/vm
diff options
context:
space:
mode:
authorMark Johnston <markj@FreeBSD.org>2018-11-13 19:44:40 +0000
committerMark Johnston <markj@FreeBSD.org>2018-11-13 19:44:40 +0000
commit0f9b7bf37aef3218bf09cf3869f9748e68d74be3 (patch)
treefaff8d6b587afc3878fe5e232ac4ee4a0b75b85d /sys/vm
parenta82296c2dfa2caae222e98315ee382a361aef5ad (diff)
downloadsrc-0f9b7bf37aef3218bf09cf3869f9748e68d74be3.tar.gz
src-0f9b7bf37aef3218bf09cf3869f9748e68d74be3.zip
Add accounting to per-domain UMA full bucket caches.
In particular, track the current size of the cache and maintain an estimate of its working set size. This will be used to decide how much to shrink various caches when the kernel attempts to reclaim pages. As a secondary effect, it makes statistics aggregation (done by, e.g., vmstat -z) cheaper since sysctl_vm_zone_stats() no longer needs to iterate over lists of cached buckets. Discussed with: alc, glebius, jeff Tested by: pho (previous version) MFC after: 1 month Differential Revision: https://reviews.freebsd.org/D16666
Notes
Notes: svn path=/head/; revision=340405
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/uma_core.c113
-rw-r--r--sys/vm/uma_int.h7
2 files changed, 84 insertions, 36 deletions
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 9db39ea2ac80..7d14586a31cd 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -459,6 +459,36 @@ bucket_zone_drain(void)
zone_drain(ubz->ubz_zone);
}
+static uma_bucket_t
+zone_try_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom, const bool ws)
+{
+ uma_bucket_t bucket;
+
+ ZONE_LOCK_ASSERT(zone);
+
+ if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
+ MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
+ LIST_REMOVE(bucket, ub_link);
+ zdom->uzd_nitems -= bucket->ub_cnt;
+ if (ws && zdom->uzd_imin > zdom->uzd_nitems)
+ zdom->uzd_imin = zdom->uzd_nitems;
+ }
+ return (bucket);
+}
+
+static void
+zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket,
+ const bool ws)
+{
+
+ ZONE_LOCK_ASSERT(zone);
+
+ LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
+ zdom->uzd_nitems += bucket->ub_cnt;
+ if (ws && zdom->uzd_imax < zdom->uzd_nitems)
+ zdom->uzd_imax = zdom->uzd_nitems;
+}
+
static void
zone_log_warning(uma_zone_t zone)
{
@@ -509,6 +539,23 @@ uma_timeout(void *unused)
}
/*
+ * Update the working set size estimate for the zone's bucket cache.
+ * The constants chosen here are somewhat arbitrary. With an update period of
+ * 20s (UMA_TIMEOUT), this estimate is dominated by zone activity over the
+ * last 100s.
+ */
+static void
+zone_domain_update_wss(uma_zone_domain_t zdom)
+{
+ long wss;
+
+ MPASS(zdom->uzd_imax >= zdom->uzd_imin);
+ wss = zdom->uzd_imax - zdom->uzd_imin;
+ zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems;
+ zdom->uzd_wss = (3 * wss + 2 * zdom->uzd_wss) / 5;
+}
+
+/*
* Routine to perform timeout driven calculations. This expands the
* hashes and does per cpu statistics aggregation.
*
@@ -560,8 +607,14 @@ keg_timeout(uma_keg_t keg)
static void
zone_timeout(uma_zone_t zone)
{
+ int i;
zone_foreach_keg(zone, &keg_timeout);
+
+ ZONE_LOCK(zone);
+ for (i = 0; i < vm_ndomains; i++)
+ zone_domain_update_wss(&zone->uz_domain[i]);
+ ZONE_UNLOCK(zone);
}
/*
@@ -772,16 +825,16 @@ cache_drain_safe_cpu(uma_zone_t zone)
cache = &zone->uz_cpu[curcpu];
if (cache->uc_allocbucket) {
if (cache->uc_allocbucket->ub_cnt != 0)
- LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
- cache->uc_allocbucket, ub_link);
+ zone_put_bucket(zone, &zone->uz_domain[domain],
+ cache->uc_allocbucket, false);
else
b1 = cache->uc_allocbucket;
cache->uc_allocbucket = NULL;
}
if (cache->uc_freebucket) {
if (cache->uc_freebucket->ub_cnt != 0)
- LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
- cache->uc_freebucket, ub_link);
+ zone_put_bucket(zone, &zone->uz_domain[domain],
+ cache->uc_freebucket, false);
else
b2 = cache->uc_freebucket;
cache->uc_freebucket = NULL;
@@ -844,8 +897,8 @@ bucket_cache_drain(uma_zone_t zone)
*/
for (i = 0; i < vm_ndomains; i++) {
zdom = &zone->uz_domain[i];
- while ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
- LIST_REMOVE(bucket, ub_link);
+ while ((bucket = zone_try_fetch_bucket(zone, zdom, false)) !=
+ NULL) {
ZONE_UNLOCK(zone);
bucket_drain(zone, bucket);
bucket_free(zone, bucket, NULL);
@@ -2523,11 +2576,9 @@ zalloc_start:
zdom = &zone->uz_domain[0];
else
zdom = &zone->uz_domain[domain];
- if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
+ if ((bucket = zone_try_fetch_bucket(zone, zdom, true)) != NULL) {
KASSERT(bucket->ub_cnt != 0,
("uma_zalloc_arg: Returning an empty bucket."));
-
- LIST_REMOVE(bucket, ub_link);
cache->uc_allocbucket = bucket;
ZONE_UNLOCK(zone);
goto zalloc_start;
@@ -2556,6 +2607,7 @@ zalloc_start:
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
+
/*
* See if we lost the race or were migrated. Cache the
* initialized bucket to make this less likely or claim
@@ -2565,6 +2617,7 @@ zalloc_start:
((zone->uz_flags & UMA_ZONE_NUMA) == 0 ||
domain == PCPU_GET(domain))) {
cache->uc_allocbucket = bucket;
+ zdom->uzd_imax += bucket->ub_cnt;
} else if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) != 0) {
critical_exit();
ZONE_UNLOCK(zone);
@@ -2572,7 +2625,7 @@ zalloc_start:
bucket_free(zone, bucket, udata);
goto zalloc_restart;
} else
- LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
+ zone_put_bucket(zone, zdom, bucket, false);
ZONE_UNLOCK(zone);
goto zalloc_start;
}
@@ -3200,7 +3253,7 @@ zfree_start:
bucket_free(zone, bucket, udata);
goto zfree_restart;
} else
- LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
+ zone_put_bucket(zone, zdom, bucket, true);
}
/*
@@ -3649,6 +3702,7 @@ uma_reclaim_locked(bool kmem_danger)
cache_drain_safe(NULL);
zone_foreach(zone_drain);
}
+
/*
* Some slabs may have been freed but this zone will be visited early
* we visit again so that we can free pages that are empty once other
@@ -3882,7 +3936,7 @@ uma_print_zone(uma_zone_t zone)
* directly so that we don't have to.
*/
static void
-uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
+uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp,
uint64_t *freesp, uint64_t *sleepsp)
{
uma_cache_t cache;
@@ -3937,7 +3991,6 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
struct uma_stream_header ush;
struct uma_type_header uth;
struct uma_percpu_stat *ups;
- uma_bucket_t bucket;
uma_zone_domain_t zdom;
struct sbuf sbuf;
uma_cache_t cache;
@@ -3997,9 +4050,7 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
for (i = 0; i < vm_ndomains; i++) {
zdom = &z->uz_domain[i];
- LIST_FOREACH(bucket, &zdom->uzd_buckets,
- ub_link)
- uth.uth_zone_free += bucket->ub_cnt;
+ uth.uth_zone_free += zdom->uzd_nitems;
}
uth.uth_allocs = z->uz_allocs;
uth.uth_frees = z->uz_frees;
@@ -4199,12 +4250,11 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
#ifdef DDB
DB_SHOW_COMMAND(uma, db_show_uma)
{
- uma_bucket_t bucket;
uma_keg_t kz;
uma_zone_t z;
- uma_zone_domain_t zdom;
uint64_t allocs, frees, sleeps;
- int cachefree, i;
+ long cachefree;
+ int i;
db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
"Free", "Requests", "Sleeps", "Bucket");
@@ -4221,13 +4271,10 @@ DB_SHOW_COMMAND(uma, db_show_uma)
if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
(LIST_FIRST(&kz->uk_zones) != z)))
cachefree += kz->uk_free;
- for (i = 0; i < vm_ndomains; i++) {
- zdom = &z->uz_domain[i];
- LIST_FOREACH(bucket, &zdom->uzd_buckets,
- ub_link)
- cachefree += bucket->ub_cnt;
- }
- db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
+ for (i = 0; i < vm_ndomains; i++)
+ cachefree += z->uz_domain[i].uzd_nitems;
+
+ db_printf("%18s %8ju %8jd %8ld %12ju %8ju %8u\n",
z->uz_name, (uintmax_t)kz->uk_size,
(intmax_t)(allocs - frees), cachefree,
(uintmax_t)allocs, sleeps, z->uz_count);
@@ -4239,22 +4286,18 @@ DB_SHOW_COMMAND(uma, db_show_uma)
DB_SHOW_COMMAND(umacache, db_show_umacache)
{
- uma_bucket_t bucket;
uma_zone_t z;
- uma_zone_domain_t zdom;
uint64_t allocs, frees;
- int cachefree, i;
+ long cachefree;
+ int i;
db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
"Requests", "Bucket");
LIST_FOREACH(z, &uma_cachezones, uz_link) {
uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
- for (i = 0; i < vm_ndomains; i++) {
- zdom = &z->uz_domain[i];
- LIST_FOREACH(bucket, &zdom->uzd_buckets, ub_link)
- cachefree += bucket->ub_cnt;
- }
- db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
+ for (i = 0; i < vm_ndomains; i++)
+ cachefree += z->uz_domain[i].uzd_nitems;
+ db_printf("%18s %8ju %8jd %8ld %12ju %8u\n",
z->uz_name, (uintmax_t)z->uz_size,
(intmax_t)(allocs - frees), cachefree,
(uintmax_t)allocs, z->uz_count);
diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h
index 5b28df3701bf..a97f51734ef6 100644
--- a/sys/vm/uma_int.h
+++ b/sys/vm/uma_int.h
@@ -304,6 +304,10 @@ typedef struct uma_klink *uma_klink_t;
struct uma_zone_domain {
LIST_HEAD(,uma_bucket) uzd_buckets; /* full buckets */
+ long uzd_nitems; /* total item count */
+ long uzd_imax; /* maximum item count this period */
+ long uzd_imin; /* minimum item count this period */
+ long uzd_wss; /* working set size estimate */
};
typedef struct uma_zone_domain * uma_zone_domain_t;
@@ -423,11 +427,12 @@ void uma_large_free(uma_slab_t slab);
mtx_init(&(z)->uz_lock, (z)->uz_name, \
"UMA zone", MTX_DEF | MTX_DUPOK); \
} while (0)
-
+
#define ZONE_LOCK(z) mtx_lock((z)->uz_lockptr)
#define ZONE_TRYLOCK(z) mtx_trylock((z)->uz_lockptr)
#define ZONE_UNLOCK(z) mtx_unlock((z)->uz_lockptr)
#define ZONE_LOCK_FINI(z) mtx_destroy(&(z)->uz_lock)
+#define ZONE_LOCK_ASSERT(z) mtx_assert((z)->uz_lockptr, MA_OWNED)
/*
* Find a slab within a hash table. This is used for OFFPAGE zones to lookup