aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2023-09-02 10:32:48 +0000
committerMartin Matuska <mm@FreeBSD.org>2023-09-02 10:33:26 +0000
commit2ad756a6bbb30fc98ee9000fba5bceec916a6c70 (patch)
treee9b4d857d72cf082b29cb918ae8de328e0c69324 /sys/contrib/openzfs/module
parentf4296cfb409a48de00bfa60e76f686c2b031876f (diff)
parent95f71c019d7c3e3b728a9b05e2117ce6b09f1b87 (diff)
zfs: merge openzfs/zfs@95f71c019
Notable upstream pull request merges: #15018 Increase limit of redaction list by using spill block #15161 Make zoned/jailed zfsprops(7) make more sense #15216 Relax error reporting in zpool import and zpool split #15218 Selectable block allocators #15227 ZIL: Tune some assertions #15228 ZIL: Revert zl_lock scope reduction #15233 ZIL: Change ZIOs issue order Obtained from: OpenZFS OpenZFS commit: 95f71c019d7c3e3b728a9b05e2117ce6b09f1b87
Diffstat (limited to 'sys/contrib/openzfs/module')
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c18
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-proc.c36
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c12
-rw-r--r--sys/contrib/openzfs/module/zcommon/zfeature_common.c12
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_redact.c17
-rw-r--r--sys/contrib/openzfs/module/zfs/dnode.c1
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_bookmark.c67
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_destroy.c10
-rw-r--r--sys/contrib/openzfs/module/zfs/metaslab.c98
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c14
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_misc.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/zil.c49
13 files changed, 241 insertions, 101 deletions
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
index 8ae2f23c3ecf..ba9a95e4a66d 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
@@ -503,6 +503,24 @@ SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance,
/* metaslab.c */
+int
+param_set_active_allocator(SYSCTL_HANDLER_ARGS)
+{
+ char buf[16];
+ int rc;
+
+ if (req->newptr == NULL)
+ strlcpy(buf, zfs_active_allocator, sizeof (buf));
+
+ rc = sysctl_handle_string(oidp, buf, sizeof (buf), req);
+ if (rc || req->newptr == NULL)
+ return (rc);
+ if (strcmp(buf, zfs_active_allocator) == 0)
+ return (0);
+
+ return (param_set_active_allocator_common(buf));
+}
+
/*
* In pools where the log space map feature is not enabled we touch
* multiple metaslabs (and their respective space maps) with each
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c b/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c
index bcc356ae55b6..5cb5a6dadb05 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c
@@ -659,6 +659,21 @@ static struct ctl_table spl_root[] = {
};
#endif
+static void spl_proc_cleanup(void)
+{
+ remove_proc_entry("kstat", proc_spl);
+ remove_proc_entry("slab", proc_spl_kmem);
+ remove_proc_entry("kmem", proc_spl);
+ remove_proc_entry("taskq-all", proc_spl);
+ remove_proc_entry("taskq", proc_spl);
+ remove_proc_entry("spl", NULL);
+
+ if (spl_header) {
+ unregister_sysctl_table(spl_header);
+ spl_header = NULL;
+ }
+}
+
int
spl_proc_init(void)
{
@@ -723,15 +738,8 @@ spl_proc_init(void)
goto out;
}
out:
- if (rc) {
- remove_proc_entry("kstat", proc_spl);
- remove_proc_entry("slab", proc_spl_kmem);
- remove_proc_entry("kmem", proc_spl);
- remove_proc_entry("taskq-all", proc_spl);
- remove_proc_entry("taskq", proc_spl);
- remove_proc_entry("spl", NULL);
- unregister_sysctl_table(spl_header);
- }
+ if (rc)
+ spl_proc_cleanup();
return (rc);
}
@@ -739,13 +747,5 @@ out:
void
spl_proc_fini(void)
{
- remove_proc_entry("kstat", proc_spl);
- remove_proc_entry("slab", proc_spl_kmem);
- remove_proc_entry("kmem", proc_spl);
- remove_proc_entry("taskq-all", proc_spl);
- remove_proc_entry("taskq", proc_spl);
- remove_proc_entry("spl", NULL);
-
- ASSERT(spl_header != NULL);
- unregister_sysctl_table(spl_header);
+ spl_proc_cleanup();
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
index 3efc8b9644fd..c8cbedcd5157 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
@@ -103,6 +103,18 @@ param_set_slop_shift(const char *buf, zfs_kernel_param_t *kp)
return (0);
}
+int
+param_set_active_allocator(const char *val, zfs_kernel_param_t *kp)
+{
+ int error;
+
+ error = -param_set_active_allocator_common(val);
+ if (error == 0)
+ error = param_set_charp(val, kp);
+
+ return (error);
+}
+
const char *
spa_history_zone(void)
{
diff --git a/sys/contrib/openzfs/module/zcommon/zfeature_common.c b/sys/contrib/openzfs/module/zcommon/zfeature_common.c
index 4c9b7ed72a0f..2c74d10f43ff 100644
--- a/sys/contrib/openzfs/module/zcommon/zfeature_common.c
+++ b/sys/contrib/openzfs/module/zcommon/zfeature_common.c
@@ -737,6 +737,18 @@ zpool_feature_init(void)
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL,
sfeatures);
+ {
+ static const spa_feature_t redact_list_spill_deps[] = {
+ SPA_FEATURE_REDACTION_BOOKMARKS,
+ SPA_FEATURE_NONE
+ };
+ zfeature_register(SPA_FEATURE_REDACTION_LIST_SPILL,
+ "com.delphix:redaction_list_spill", "redaction_list_spill",
+ "Support for increased number of redaction_snapshot "
+ "arguments in zfs redact.", 0, ZFEATURE_TYPE_BOOLEAN,
+ redact_list_spill_deps, sfeatures);
+ }
+
zfs_mod_list_supported_free(sfeatures);
}
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index b7453578a76f..f2831a0e8abf 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -2701,7 +2701,7 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx)
*/
mutex_enter(&db->db_mtx);
VERIFY(!dbuf_undirty(db, tx));
- ASSERT0(dbuf_find_dirty_eq(db, tx->tx_txg));
+ ASSERT3P(dbuf_find_dirty_eq(db, tx->tx_txg), ==, NULL);
if (db->db_buf != NULL) {
arc_buf_destroy(db->db_buf, db);
db->db_buf = NULL;
diff --git a/sys/contrib/openzfs/module/zfs/dmu_redact.c b/sys/contrib/openzfs/module/zfs/dmu_redact.c
index 6bd35713ff18..5ac14edfca12 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_redact.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_redact.c
@@ -746,7 +746,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
bqueue_enqueue(q, record, sizeof (*record));
return (0);
}
- redact_nodes = kmem_zalloc(num_threads *
+ redact_nodes = vmem_zalloc(num_threads *
sizeof (*redact_nodes), KM_SLEEP);
avl_create(&start_tree, redact_node_compare_start,
@@ -820,7 +820,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
avl_destroy(&start_tree);
avl_destroy(&end_tree);
- kmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
+ vmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
if (current_record != NULL)
bqueue_enqueue(q, current_record, sizeof (*current_record));
return (err);
@@ -1030,7 +1030,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
numsnaps = fnvlist_num_pairs(redactnvl);
if (numsnaps > 0)
- args = kmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);
+ args = vmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);
nvpair_t *pair = NULL;
for (int i = 0; i < numsnaps; i++) {
@@ -1079,7 +1079,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
kmem_free(newredactbook,
sizeof (char) * ZFS_MAX_DATASET_NAME_LEN);
if (args != NULL)
- kmem_free(args, numsnaps * sizeof (*args));
+ vmem_free(args, numsnaps * sizeof (*args));
return (SET_ERROR(ENAMETOOLONG));
}
err = dsl_bookmark_lookup(dp, newredactbook, NULL, &bookmark);
@@ -1119,7 +1119,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
} else {
uint64_t *guids = NULL;
if (numsnaps > 0) {
- guids = kmem_zalloc(numsnaps * sizeof (uint64_t),
+ guids = vmem_zalloc(numsnaps * sizeof (uint64_t),
KM_SLEEP);
}
for (int i = 0; i < numsnaps; i++) {
@@ -1131,10 +1131,9 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
dp = NULL;
err = dsl_bookmark_create_redacted(newredactbook, snapname,
numsnaps, guids, FTAG, &new_rl);
- kmem_free(guids, numsnaps * sizeof (uint64_t));
- if (err != 0) {
+ vmem_free(guids, numsnaps * sizeof (uint64_t));
+ if (err != 0)
goto out;
- }
}
for (int i = 0; i < numsnaps; i++) {
@@ -1188,7 +1187,7 @@ out:
}
if (args != NULL)
- kmem_free(args, numsnaps * sizeof (*args));
+ vmem_free(args, numsnaps * sizeof (*args));
if (dp != NULL)
dsl_pool_rele(dp, FTAG);
if (ds != NULL) {
diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c
index 7cf03264dce2..79fd02dcb9aa 100644
--- a/sys/contrib/openzfs/module/zfs/dnode.c
+++ b/sys/contrib/openzfs/module/zfs/dnode.c
@@ -720,6 +720,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT(DMU_OT_IS_VALID(ot));
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0) ||
+ (bonustype == DMU_OTN_UINT64_METADATA && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0));
ASSERT(DMU_OT_IS_VALID(bonustype));
ASSERT3U(bonuslen, <=, DN_SLOTS_TO_BONUSLEN(dn_slots));
diff --git a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
index e04796a0814f..03d9420dbdb9 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
@@ -34,6 +34,7 @@
#include <sys/dsl_bookmark.h>
#include <zfs_namecheck.h>
#include <sys/dmu_send.h>
+#include <sys/dbuf.h>
static int
dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
@@ -459,25 +460,42 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
SPA_FEATURE_REDACTED_DATASETS, &dsnumsnaps, &dsredactsnaps);
if (redaction_list != NULL || bookmark_redacted) {
redaction_list_t *local_rl;
+ boolean_t spill = B_FALSE;
if (bookmark_redacted) {
redact_snaps = dsredactsnaps;
num_redact_snaps = dsnumsnaps;
}
+ int bonuslen = sizeof (redaction_list_phys_t) +
+ num_redact_snaps * sizeof (uint64_t);
+ if (bonuslen > dmu_bonus_max())
+ spill = B_TRUE;
dbn->dbn_phys.zbm_redaction_obj = dmu_object_alloc(mos,
DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
- DMU_OTN_UINT64_METADATA, sizeof (redaction_list_phys_t) +
- num_redact_snaps * sizeof (uint64_t), tx);
+ DMU_OTN_UINT64_METADATA, spill ? 0 : bonuslen, tx);
spa_feature_incr(dp->dp_spa,
SPA_FEATURE_REDACTION_BOOKMARKS, tx);
+ if (spill) {
+ spa_feature_incr(dp->dp_spa,
+ SPA_FEATURE_REDACTION_LIST_SPILL, tx);
+ }
VERIFY0(dsl_redaction_list_hold_obj(dp,
dbn->dbn_phys.zbm_redaction_obj, tag, &local_rl));
dsl_redaction_list_long_hold(dp, local_rl, tag);
- ASSERT3U((local_rl)->rl_dbuf->db_size, >=,
- sizeof (redaction_list_phys_t) + num_redact_snaps *
- sizeof (uint64_t));
- dmu_buf_will_dirty(local_rl->rl_dbuf, tx);
+ if (!spill) {
+ ASSERT3U(local_rl->rl_bonus->db_size, >=, bonuslen);
+ dmu_buf_will_dirty(local_rl->rl_bonus, tx);
+ } else {
+ dmu_buf_t *db;
+ VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus,
+ DB_RF_MUST_SUCCEED, FTAG, &db));
+ dmu_buf_will_fill(db, tx);
+ VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen,
+ SPA_MINBLOCKSIZE), tx));
+ local_rl->rl_phys = db->db_data;
+ local_rl->rl_dbuf = db;
+ }
memcpy(local_rl->rl_phys->rlp_snaps, redact_snaps,
sizeof (uint64_t) * num_redact_snaps);
local_rl->rl_phys->rlp_num_snaps = num_redact_snaps;
@@ -636,11 +654,15 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx)
SPA_FEATURE_REDACTION_BOOKMARKS))
return (SET_ERROR(ENOTSUP));
/*
- * If the list of redact snaps will not fit in the bonus buffer with
- * the furthest reached object and offset, fail.
+ * If the list of redact snaps will not fit in the bonus buffer (or
+ * spill block, with the REDACTION_LIST_SPILL feature) with the
+ * furthest reached object and offset, fail.
*/
- if (dbcra->dbcra_numsnaps > (dmu_bonus_max() -
- sizeof (redaction_list_phys_t)) / sizeof (uint64_t))
+ uint64_t snaplimit = ((spa_feature_is_enabled(dp->dp_spa,
+ SPA_FEATURE_REDACTION_LIST_SPILL) ? spa_maxblocksize(dp->dp_spa) :
+ dmu_bonus_max()) -
+ sizeof (redaction_list_phys_t)) / sizeof (uint64_t);
+ if (dbcra->dbcra_numsnaps > snaplimit)
return (SET_ERROR(E2BIG));
if (dsl_bookmark_create_nvl_validate_pair(
@@ -1040,6 +1062,14 @@ dsl_bookmark_destroy_sync_impl(dsl_dataset_t *ds, const char *name,
}
if (dbn->dbn_phys.zbm_redaction_obj != 0) {
+ dnode_t *rl;
+ VERIFY0(dnode_hold(mos,
+ dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl));
+ if (rl->dn_have_spill) {
+ spa_feature_decr(dmu_objset_spa(mos),
+ SPA_FEATURE_REDACTION_LIST_SPILL, tx);
+ }
+ dnode_rele(rl, FTAG);
VERIFY0(dmu_object_free(mos,
dbn->dbn_phys.zbm_redaction_obj, tx));
spa_feature_decr(dmu_objset_spa(mos),
@@ -1213,7 +1243,9 @@ redaction_list_evict_sync(void *rlu)
void
dsl_redaction_list_rele(redaction_list_t *rl, const void *tag)
{
- dmu_buf_rele(rl->rl_dbuf, tag);
+ if (rl->rl_bonus != rl->rl_dbuf)
+ dmu_buf_rele(rl->rl_dbuf, tag);
+ dmu_buf_rele(rl->rl_bonus, tag);
}
int
@@ -1221,7 +1253,7 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
redaction_list_t **rlp)
{
objset_t *mos = dp->dp_meta_objset;
- dmu_buf_t *dbuf;
+ dmu_buf_t *dbuf, *spill_dbuf;
redaction_list_t *rl;
int err;
@@ -1236,13 +1268,18 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
redaction_list_t *winner = NULL;
rl = kmem_zalloc(sizeof (redaction_list_t), KM_SLEEP);
- rl->rl_dbuf = dbuf;
+ rl->rl_bonus = dbuf;
+ if (dmu_spill_hold_existing(dbuf, tag, &spill_dbuf) == 0) {
+ rl->rl_dbuf = spill_dbuf;
+ } else {
+ rl->rl_dbuf = dbuf;
+ }
rl->rl_object = rlobj;
- rl->rl_phys = dbuf->db_data;
+ rl->rl_phys = rl->rl_dbuf->db_data;
rl->rl_mos = dp->dp_meta_objset;
zfs_refcount_create(&rl->rl_longholds);
dmu_buf_init_user(&rl->rl_dbu, redaction_list_evict_sync, NULL,
- &rl->rl_dbuf);
+ &rl->rl_bonus);
if ((winner = dmu_buf_set_user_ie(dbuf, &rl->rl_dbu)) != NULL) {
kmem_free(rl, sizeof (*rl));
rl = winner;
diff --git a/sys/contrib/openzfs/module/zfs/dsl_destroy.c b/sys/contrib/openzfs/module/zfs/dsl_destroy.c
index 053f26878cf1..d9d88a981e05 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_destroy.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_destroy.c
@@ -1125,6 +1125,16 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) !=
NULL) {
if (dbn->dbn_phys.zbm_redaction_obj != 0) {
+ dnode_t *rl;
+ VERIFY0(dnode_hold(mos,
+ dbn->dbn_phys.zbm_redaction_obj, FTAG,
+ &rl));
+ if (rl->dn_have_spill) {
+ spa_feature_decr(dmu_objset_spa(mos),
+ SPA_FEATURE_REDACTION_LIST_SPILL,
+ tx);
+ }
+ dnode_rele(rl, FTAG);
VERIFY0(dmu_object_free(mos,
dbn->dbn_phys.zbm_redaction_obj, tx));
spa_feature_decr(dmu_objset_spa(mos),
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
index 20dc934593f1..dd4ff77e6f5d 100644
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@@ -40,8 +40,6 @@
#include <sys/zap.h>
#include <sys/btree.h>
-#define WITH_DF_BLOCK_ALLOCATOR
-
#define GANG_ALLOCATION(flags) \
((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER))
@@ -1622,9 +1620,6 @@ metaslab_block_find(zfs_btree_t *t, range_tree_t *rt, uint64_t start,
return (rs);
}
-#if defined(WITH_DF_BLOCK_ALLOCATOR) || \
- defined(WITH_CF_BLOCK_ALLOCATOR)
-
/*
* This is a helper function that can be used by the allocator to find a
* suitable block to allocate. This will search the specified B-tree looking
@@ -1659,9 +1654,74 @@ metaslab_block_picker(range_tree_t *rt, uint64_t *cursor, uint64_t size,
*cursor = 0;
return (-1ULL);
}
-#endif /* WITH_DF/CF_BLOCK_ALLOCATOR */
-#if defined(WITH_DF_BLOCK_ALLOCATOR)
+static uint64_t metaslab_df_alloc(metaslab_t *msp, uint64_t size);
+static uint64_t metaslab_cf_alloc(metaslab_t *msp, uint64_t size);
+static uint64_t metaslab_ndf_alloc(metaslab_t *msp, uint64_t size);
+metaslab_ops_t *metaslab_allocator(spa_t *spa);
+
+static metaslab_ops_t metaslab_allocators[] = {
+ { "dynamic", metaslab_df_alloc },
+ { "cursor", metaslab_cf_alloc },
+ { "new-dynamic", metaslab_ndf_alloc },
+};
+
+static int
+spa_find_allocator_byname(const char *val)
+{
+ int a = ARRAY_SIZE(metaslab_allocators) - 1;
+ if (strcmp("new-dynamic", val) == 0)
+ return (-1); /* remove when ndf is working */
+ for (; a >= 0; a--) {
+ if (strcmp(val, metaslab_allocators[a].msop_name) == 0)
+ return (a);
+ }
+ return (-1);
+}
+
+void
+spa_set_allocator(spa_t *spa, const char *allocator)
+{
+ int a = spa_find_allocator_byname(allocator);
+ if (a < 0) a = 0;
+ spa->spa_active_allocator = a;
+ zfs_dbgmsg("spa allocator: %s\n", metaslab_allocators[a].msop_name);
+}
+
+int
+spa_get_allocator(spa_t *spa)
+{
+ return (spa->spa_active_allocator);
+}
+
+#if defined(_KERNEL)
+int
+param_set_active_allocator_common(const char *val)
+{
+ char *p;
+
+ if (val == NULL)
+ return (SET_ERROR(EINVAL));
+
+ if ((p = strchr(val, '\n')) != NULL)
+ *p = '\0';
+
+ int a = spa_find_allocator_byname(val);
+ if (a < 0)
+ return (SET_ERROR(EINVAL));
+
+ zfs_active_allocator = metaslab_allocators[a].msop_name;
+ return (0);
+}
+#endif
+
+metaslab_ops_t *
+metaslab_allocator(spa_t *spa)
+{
+ int allocator = spa_get_allocator(spa);
+ return (&metaslab_allocators[allocator]);
+}
+
/*
* ==========================================================================
* Dynamic Fit (df) block allocator
@@ -1736,12 +1796,6 @@ metaslab_df_alloc(metaslab_t *msp, uint64_t size)
return (offset);
}
-const metaslab_ops_t zfs_metaslab_ops = {
- metaslab_df_alloc
-};
-#endif /* WITH_DF_BLOCK_ALLOCATOR */
-
-#if defined(WITH_CF_BLOCK_ALLOCATOR)
/*
* ==========================================================================
* Cursor fit block allocator -
@@ -1784,12 +1838,6 @@ metaslab_cf_alloc(metaslab_t *msp, uint64_t size)
return (offset);
}
-const metaslab_ops_t zfs_metaslab_ops = {
- metaslab_cf_alloc
-};
-#endif /* WITH_CF_BLOCK_ALLOCATOR */
-
-#if defined(WITH_NDF_BLOCK_ALLOCATOR)
/*
* ==========================================================================
* New dynamic fit allocator -
@@ -1846,12 +1894,6 @@ metaslab_ndf_alloc(metaslab_t *msp, uint64_t size)
return (-1ULL);
}
-const metaslab_ops_t zfs_metaslab_ops = {
- metaslab_ndf_alloc
-};
-#endif /* WITH_NDF_BLOCK_ALLOCATOR */
-
-
/*
* ==========================================================================
* Metaslabs
@@ -6232,3 +6274,9 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, try_hard_before_gang, INT,
ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, find_max_tries, UINT, ZMOD_RW,
"Normally only consider this many of the best metaslabs in each vdev");
+
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM_CALL(zfs, zfs_, active_allocator,
+ param_set_active_allocator, param_get_charp, ZMOD_RW,
+ "SPA active allocator");
+/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index 88ee4ea9f458..cda62f939c1e 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -1295,24 +1295,26 @@ spa_thread(void *arg)
}
#endif
+extern metaslab_ops_t *metaslab_allocator(spa_t *spa);
+
/*
* Activate an uninitialized pool.
*/
static void
spa_activate(spa_t *spa, spa_mode_t mode)
{
+ metaslab_ops_t *msp = metaslab_allocator(spa);
ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
spa->spa_state = POOL_STATE_ACTIVE;
spa->spa_mode = mode;
spa->spa_read_spacemaps = spa_mode_readable_spacemaps;
- spa->spa_normal_class = metaslab_class_create(spa, &zfs_metaslab_ops);
- spa->spa_log_class = metaslab_class_create(spa, &zfs_metaslab_ops);
- spa->spa_embedded_log_class =
- metaslab_class_create(spa, &zfs_metaslab_ops);
- spa->spa_special_class = metaslab_class_create(spa, &zfs_metaslab_ops);
- spa->spa_dedup_class = metaslab_class_create(spa, &zfs_metaslab_ops);
+ spa->spa_normal_class = metaslab_class_create(spa, msp);
+ spa->spa_log_class = metaslab_class_create(spa, msp);
+ spa->spa_embedded_log_class = metaslab_class_create(spa, msp);
+ spa->spa_special_class = metaslab_class_create(spa, msp);
+ spa->spa_dedup_class = metaslab_class_create(spa, msp);
/* Try to create a covering process */
mutex_enter(&spa->spa_proc_lock);
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
index 3b355e0debcc..413476196b9f 100644
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -389,6 +389,11 @@ static const uint64_t spa_min_slop = 128ULL * 1024 * 1024;
static const uint64_t spa_max_slop = 128ULL * 1024 * 1024 * 1024;
static const int spa_allocators = 4;
+/*
+ * Spa active allocator.
+ * Valid values are zfs_active_allocator=<dynamic|cursor|new-dynamic>.
+ */
+const char *zfs_active_allocator = "dynamic";
void
spa_load_failed(spa_t *spa, const char *fmt, ...)
@@ -710,6 +715,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms);
spa->spa_deadman_ziotime = MSEC2NSEC(zfs_deadman_ziotime_ms);
spa_set_deadman_failmode(spa, zfs_deadman_failmode);
+ spa_set_allocator(spa, zfs_active_allocator);
zfs_refcount_create(&spa->spa_refcount);
spa_config_lock_init(spa);
diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c
index f2d279e36a96..b30676b42d88 100644
--- a/sys/contrib/openzfs/module/zfs/zil.c
+++ b/sys/contrib/openzfs/module/zfs/zil.c
@@ -814,17 +814,17 @@ static void
zil_free_lwb(zilog_t *zilog, lwb_t *lwb)
{
ASSERT(MUTEX_HELD(&zilog->zl_lock));
- ASSERT(!MUTEX_HELD(&lwb->lwb_vdev_lock));
- VERIFY(list_is_empty(&lwb->lwb_waiters));
- VERIFY(list_is_empty(&lwb->lwb_itxs));
- ASSERT(avl_is_empty(&lwb->lwb_vdev_tree));
+ ASSERT(lwb->lwb_state == LWB_STATE_NEW ||
+ lwb->lwb_state == LWB_STATE_FLUSH_DONE);
ASSERT3P(lwb->lwb_child_zio, ==, NULL);
ASSERT3P(lwb->lwb_write_zio, ==, NULL);
ASSERT3P(lwb->lwb_root_zio, ==, NULL);
ASSERT3U(lwb->lwb_alloc_txg, <=, spa_syncing_txg(zilog->zl_spa));
ASSERT3U(lwb->lwb_max_txg, <=, spa_syncing_txg(zilog->zl_spa));
- ASSERT(lwb->lwb_state == LWB_STATE_NEW ||
- lwb->lwb_state == LWB_STATE_FLUSH_DONE);
+ VERIFY(list_is_empty(&lwb->lwb_itxs));
+ VERIFY(list_is_empty(&lwb->lwb_waiters));
+ ASSERT(avl_is_empty(&lwb->lwb_vdev_tree));
+ ASSERT(!MUTEX_HELD(&lwb->lwb_vdev_lock));
/*
* Clear the zilog's field to indicate this lwb is no longer
@@ -1329,6 +1329,9 @@ zil_lwb_add_block(lwb_t *lwb, const blkptr_t *bp)
int ndvas = BP_GET_NDVAS(bp);
int i;
+ ASSERT3S(lwb->lwb_state, !=, LWB_STATE_WRITE_DONE);
+ ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE);
+
if (zil_nocacheflush)
return;
@@ -1408,15 +1411,9 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
zilog_t *zilog = lwb->lwb_zilog;
zil_commit_waiter_t *zcw;
itx_t *itx;
- uint64_t txg;
- list_t itxs, waiters;
spa_config_exit(zilog->zl_spa, SCL_STATE, lwb);
- list_create(&itxs, sizeof (itx_t), offsetof(itx_t, itx_node));
- list_create(&waiters, sizeof (zil_commit_waiter_t),
- offsetof(zil_commit_waiter_t, zcw_node));
-
hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp;
mutex_enter(&zilog->zl_lock);
@@ -1425,6 +1422,9 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
lwb->lwb_root_zio = NULL;
+ ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
+ lwb->lwb_state = LWB_STATE_FLUSH_DONE;
+
if (zilog->zl_last_lwb_opened == lwb) {
/*
* Remember the highest committed log sequence number
@@ -1435,22 +1435,13 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
zilog->zl_commit_lr_seq = zilog->zl_lr_seq;
}
- list_move_tail(&itxs, &lwb->lwb_itxs);
- list_move_tail(&waiters, &lwb->lwb_waiters);
- txg = lwb->lwb_issued_txg;
-
- ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
- lwb->lwb_state = LWB_STATE_FLUSH_DONE;
-
- mutex_exit(&zilog->zl_lock);
-
- while ((itx = list_remove_head(&itxs)) != NULL)
+ while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL)
zil_itx_destroy(itx);
- list_destroy(&itxs);
- while ((zcw = list_remove_head(&waiters)) != NULL) {
+ while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) {
mutex_enter(&zcw->zcw_lock);
+ ASSERT3P(zcw->zcw_lwb, ==, lwb);
zcw->zcw_lwb = NULL;
/*
* We expect any ZIO errors from child ZIOs to have been
@@ -1475,7 +1466,11 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
mutex_exit(&zcw->zcw_lock);
}
- list_destroy(&waiters);
+
+ uint64_t txg = lwb->lwb_issued_txg;
+
+ /* Once we drop the lock, lwb may be freed by zil_sync(). */
+ mutex_exit(&zilog->zl_lock);
mutex_enter(&zilog->zl_lwb_io_lock);
ASSERT3U(zilog->zl_lwb_inflight[txg & TXG_MASK], >, 0);
@@ -1929,10 +1924,10 @@ next_lwb:
BP_GET_LSIZE(&lwb->lwb_blk));
}
lwb->lwb_issued_timestamp = gethrtime();
- zio_nowait(lwb->lwb_root_zio);
- zio_nowait(lwb->lwb_write_zio);
if (lwb->lwb_child_zio)
zio_nowait(lwb->lwb_child_zio);
+ zio_nowait(lwb->lwb_write_zio);
+ zio_nowait(lwb->lwb_root_zio);
/*
* If nlwb was ready when we gave it the block pointer,