diff options
author | Martin Matuska <mm@FreeBSD.org> | 2023-09-02 10:32:48 +0000 |
---|---|---|
committer | Martin Matuska <mm@FreeBSD.org> | 2023-09-02 10:33:26 +0000 |
commit | 2ad756a6bbb30fc98ee9000fba5bceec916a6c70 (patch) | |
tree | e9b4d857d72cf082b29cb918ae8de328e0c69324 /sys/contrib/openzfs/module | |
parent | f4296cfb409a48de00bfa60e76f686c2b031876f (diff) | |
parent | 95f71c019d7c3e3b728a9b05e2117ce6b09f1b87 (diff) |
zfs: merge openzfs/zfs@95f71c019
Notable upstream pull request merges:
#15018 Increase limit of redaction list by using spill block
#15161 Make zoned/jailed zfsprops(7) make more sense
#15216 Relax error reporting in zpool import and zpool split
#15218 Selectable block allocators
#15227 ZIL: Tune some assertions
#15228 ZIL: Revert zl_lock scope reduction
#15233 ZIL: Change ZIOs issue order
Obtained from: OpenZFS
OpenZFS commit: 95f71c019d7c3e3b728a9b05e2117ce6b09f1b87
Diffstat (limited to 'sys/contrib/openzfs/module')
-rw-r--r-- | sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c | 18 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/os/linux/spl/spl-proc.c | 36 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c | 12 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zcommon/zfeature_common.c | 12 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zfs/dbuf.c | 2 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zfs/dmu_redact.c | 17 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zfs/dnode.c | 1 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zfs/dsl_bookmark.c | 67 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zfs/dsl_destroy.c | 10 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zfs/metaslab.c | 98 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zfs/spa.c | 14 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zfs/spa_misc.c | 6 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zfs/zil.c | 49 |
13 files changed, 241 insertions, 101 deletions
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c index 8ae2f23c3ecf..ba9a95e4a66d 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c @@ -503,6 +503,24 @@ SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance, /* metaslab.c */ +int +param_set_active_allocator(SYSCTL_HANDLER_ARGS) +{ + char buf[16]; + int rc; + + if (req->newptr == NULL) + strlcpy(buf, zfs_active_allocator, sizeof (buf)); + + rc = sysctl_handle_string(oidp, buf, sizeof (buf), req); + if (rc || req->newptr == NULL) + return (rc); + if (strcmp(buf, zfs_active_allocator) == 0) + return (0); + + return (param_set_active_allocator_common(buf)); +} + /* * In pools where the log space map feature is not enabled we touch * multiple metaslabs (and their respective space maps) with each diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c b/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c index bcc356ae55b6..5cb5a6dadb05 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c @@ -659,6 +659,21 @@ static struct ctl_table spl_root[] = { }; #endif +static void spl_proc_cleanup(void) +{ + remove_proc_entry("kstat", proc_spl); + remove_proc_entry("slab", proc_spl_kmem); + remove_proc_entry("kmem", proc_spl); + remove_proc_entry("taskq-all", proc_spl); + remove_proc_entry("taskq", proc_spl); + remove_proc_entry("spl", NULL); + + if (spl_header) { + unregister_sysctl_table(spl_header); + spl_header = NULL; + } +} + int spl_proc_init(void) { @@ -723,15 +738,8 @@ spl_proc_init(void) goto out; } out: - if (rc) { - remove_proc_entry("kstat", proc_spl); - remove_proc_entry("slab", proc_spl_kmem); - remove_proc_entry("kmem", proc_spl); - remove_proc_entry("taskq-all", proc_spl); - remove_proc_entry("taskq", proc_spl); - remove_proc_entry("spl", NULL); - unregister_sysctl_table(spl_header); - } + if (rc) + spl_proc_cleanup(); return (rc); } @@ -739,13 +747,5 @@ out: void spl_proc_fini(void) { - remove_proc_entry("kstat", proc_spl); - remove_proc_entry("slab", proc_spl_kmem); - remove_proc_entry("kmem", proc_spl); - remove_proc_entry("taskq-all", proc_spl); - remove_proc_entry("taskq", proc_spl); - remove_proc_entry("spl", NULL); - - ASSERT(spl_header != NULL); - unregister_sysctl_table(spl_header); + spl_proc_cleanup(); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c index 3efc8b9644fd..c8cbedcd5157 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c @@ -103,6 +103,18 @@ param_set_slop_shift(const char *buf, zfs_kernel_param_t *kp) return (0); } +int +param_set_active_allocator(const char *val, zfs_kernel_param_t *kp) +{ + int error; + + error = -param_set_active_allocator_common(val); + if (error == 0) + error = param_set_charp(val, kp); + + return (error); +} + const char * spa_history_zone(void) { diff --git a/sys/contrib/openzfs/module/zcommon/zfeature_common.c b/sys/contrib/openzfs/module/zcommon/zfeature_common.c index 4c9b7ed72a0f..2c74d10f43ff 100644 --- a/sys/contrib/openzfs/module/zcommon/zfeature_common.c +++ b/sys/contrib/openzfs/module/zcommon/zfeature_common.c @@ -737,6 +737,18 @@ zpool_feature_init(void) ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures); + { + static const spa_feature_t redact_list_spill_deps[] = { + SPA_FEATURE_REDACTION_BOOKMARKS, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_REDACTION_LIST_SPILL, + "com.delphix:redaction_list_spill", "redaction_list_spill", + "Support for increased number of redaction_snapshot " + "arguments in zfs redact.", 0, ZFEATURE_TYPE_BOOLEAN, + redact_list_spill_deps, sfeatures); + } + zfs_mod_list_supported_free(sfeatures); } diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index b7453578a76f..f2831a0e8abf 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -2701,7 +2701,7 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx) */ mutex_enter(&db->db_mtx); VERIFY(!dbuf_undirty(db, tx)); - ASSERT0(dbuf_find_dirty_eq(db, tx->tx_txg)); + ASSERT3P(dbuf_find_dirty_eq(db, tx->tx_txg), ==, NULL); if (db->db_buf != NULL) { arc_buf_destroy(db->db_buf, db); db->db_buf = NULL; diff --git a/sys/contrib/openzfs/module/zfs/dmu_redact.c b/sys/contrib/openzfs/module/zfs/dmu_redact.c index 6bd35713ff18..5ac14edfca12 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_redact.c +++ b/sys/contrib/openzfs/module/zfs/dmu_redact.c @@ -746,7 +746,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads, bqueue_enqueue(q, record, sizeof (*record)); return (0); } - redact_nodes = kmem_zalloc(num_threads * + redact_nodes = vmem_zalloc(num_threads * sizeof (*redact_nodes), KM_SLEEP); avl_create(&start_tree, redact_node_compare_start, @@ -820,7 +820,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads, avl_destroy(&start_tree); avl_destroy(&end_tree); - kmem_free(redact_nodes, num_threads * sizeof (*redact_nodes)); + vmem_free(redact_nodes, num_threads * sizeof (*redact_nodes)); if (current_record != NULL) bqueue_enqueue(q, current_record, sizeof (*current_record)); return (err); @@ -1030,7 +1030,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl, numsnaps = fnvlist_num_pairs(redactnvl); if (numsnaps > 0) - args = kmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP); + args = vmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP); nvpair_t *pair = NULL; for (int i = 0; i < numsnaps; i++) { @@ -1079,7 +1079,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl, kmem_free(newredactbook, sizeof (char) * ZFS_MAX_DATASET_NAME_LEN); if (args != NULL) - kmem_free(args, numsnaps * sizeof (*args)); + vmem_free(args, numsnaps * sizeof (*args)); return (SET_ERROR(ENAMETOOLONG)); } err = dsl_bookmark_lookup(dp, newredactbook, NULL, &bookmark); @@ -1119,7 +1119,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl, } else { uint64_t *guids = NULL; if (numsnaps > 0) { - guids = kmem_zalloc(numsnaps * sizeof (uint64_t), + guids = vmem_zalloc(numsnaps * sizeof (uint64_t), KM_SLEEP); } for (int i = 0; i < numsnaps; i++) { @@ -1131,10 +1131,9 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl, dp = NULL; err = dsl_bookmark_create_redacted(newredactbook, snapname, numsnaps, guids, FTAG, &new_rl); - kmem_free(guids, numsnaps * sizeof (uint64_t)); - if (err != 0) { + vmem_free(guids, numsnaps * sizeof (uint64_t)); + if (err != 0) goto out; - } } for (int i = 0; i < numsnaps; i++) { @@ -1188,7 +1187,7 @@ out: } if (args != NULL) - kmem_free(args, numsnaps * sizeof (*args)); + vmem_free(args, numsnaps * sizeof (*args)); if (dp != NULL) dsl_pool_rele(dp, FTAG); if (ds != NULL) { diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c index 7cf03264dce2..79fd02dcb9aa 100644 --- a/sys/contrib/openzfs/module/zfs/dnode.c +++ b/sys/contrib/openzfs/module/zfs/dnode.c @@ -720,6 +720,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, ASSERT(DMU_OT_IS_VALID(ot)); ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) || (bonustype == DMU_OT_SA && bonuslen == 0) || + (bonustype == DMU_OTN_UINT64_METADATA && bonuslen == 0) || (bonustype != DMU_OT_NONE && bonuslen != 0)); ASSERT(DMU_OT_IS_VALID(bonustype)); ASSERT3U(bonuslen, <=, DN_SLOTS_TO_BONUSLEN(dn_slots)); diff --git a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c index e04796a0814f..03d9420dbdb9 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c +++ b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c @@ -34,6 +34,7 @@ #include <sys/dsl_bookmark.h> #include <zfs_namecheck.h> #include <sys/dmu_send.h> +#include <sys/dbuf.h> static int dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname, @@ -459,25 +460,42 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot, SPA_FEATURE_REDACTED_DATASETS, &dsnumsnaps, &dsredactsnaps); if (redaction_list != NULL || bookmark_redacted) { redaction_list_t *local_rl; + boolean_t spill = B_FALSE; if (bookmark_redacted) { redact_snaps = dsredactsnaps; num_redact_snaps = dsnumsnaps; } + int bonuslen = sizeof (redaction_list_phys_t) + + num_redact_snaps * sizeof (uint64_t); + if (bonuslen > dmu_bonus_max()) + spill = B_TRUE; dbn->dbn_phys.zbm_redaction_obj = dmu_object_alloc(mos, DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE, - DMU_OTN_UINT64_METADATA, sizeof (redaction_list_phys_t) + - num_redact_snaps * sizeof (uint64_t), tx); + DMU_OTN_UINT64_METADATA, spill ? 0 : bonuslen, tx); spa_feature_incr(dp->dp_spa, SPA_FEATURE_REDACTION_BOOKMARKS, tx); + if (spill) { + spa_feature_incr(dp->dp_spa, + SPA_FEATURE_REDACTION_LIST_SPILL, tx); + } VERIFY0(dsl_redaction_list_hold_obj(dp, dbn->dbn_phys.zbm_redaction_obj, tag, &local_rl)); dsl_redaction_list_long_hold(dp, local_rl, tag); - ASSERT3U((local_rl)->rl_dbuf->db_size, >=, - sizeof (redaction_list_phys_t) + num_redact_snaps * - sizeof (uint64_t)); - dmu_buf_will_dirty(local_rl->rl_dbuf, tx); + if (!spill) { + ASSERT3U(local_rl->rl_bonus->db_size, >=, bonuslen); + dmu_buf_will_dirty(local_rl->rl_bonus, tx); + } else { + dmu_buf_t *db; + VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus, + DB_RF_MUST_SUCCEED, FTAG, &db)); + dmu_buf_will_fill(db, tx); + VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen, + SPA_MINBLOCKSIZE), tx)); + local_rl->rl_phys = db->db_data; + local_rl->rl_dbuf = db; + } memcpy(local_rl->rl_phys->rlp_snaps, redact_snaps, sizeof (uint64_t) * num_redact_snaps); local_rl->rl_phys->rlp_num_snaps = num_redact_snaps; @@ -636,11 +654,15 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx) SPA_FEATURE_REDACTION_BOOKMARKS)) return (SET_ERROR(ENOTSUP)); /* - * If the list of redact snaps will not fit in the bonus buffer with - * the furthest reached object and offset, fail. + * If the list of redact snaps will not fit in the bonus buffer (or + * spill block, with the REDACTION_LIST_SPILL feature) with the + * furthest reached object and offset, fail. */ - if (dbcra->dbcra_numsnaps > (dmu_bonus_max() - - sizeof (redaction_list_phys_t)) / sizeof (uint64_t)) + uint64_t snaplimit = ((spa_feature_is_enabled(dp->dp_spa, + SPA_FEATURE_REDACTION_LIST_SPILL) ? spa_maxblocksize(dp->dp_spa) : + dmu_bonus_max()) - + sizeof (redaction_list_phys_t)) / sizeof (uint64_t); + if (dbcra->dbcra_numsnaps > snaplimit) return (SET_ERROR(E2BIG)); if (dsl_bookmark_create_nvl_validate_pair( @@ -1040,6 +1062,14 @@ dsl_bookmark_destroy_sync_impl(dsl_dataset_t *ds, const char *name, } if (dbn->dbn_phys.zbm_redaction_obj != 0) { + dnode_t *rl; + VERIFY0(dnode_hold(mos, + dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl)); + if (rl->dn_have_spill) { + spa_feature_decr(dmu_objset_spa(mos), + SPA_FEATURE_REDACTION_LIST_SPILL, tx); + } + dnode_rele(rl, FTAG); VERIFY0(dmu_object_free(mos, dbn->dbn_phys.zbm_redaction_obj, tx)); spa_feature_decr(dmu_objset_spa(mos), @@ -1213,7 +1243,9 @@ redaction_list_evict_sync(void *rlu) void dsl_redaction_list_rele(redaction_list_t *rl, const void *tag) { - dmu_buf_rele(rl->rl_dbuf, tag); + if (rl->rl_bonus != rl->rl_dbuf) + dmu_buf_rele(rl->rl_dbuf, tag); + dmu_buf_rele(rl->rl_bonus, tag); } int @@ -1221,7 +1253,7 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag, redaction_list_t **rlp) { objset_t *mos = dp->dp_meta_objset; - dmu_buf_t *dbuf; + dmu_buf_t *dbuf, *spill_dbuf; redaction_list_t *rl; int err; @@ -1236,13 +1268,18 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag, redaction_list_t *winner = NULL; rl = kmem_zalloc(sizeof (redaction_list_t), KM_SLEEP); - rl->rl_dbuf = dbuf; + rl->rl_bonus = dbuf; + if (dmu_spill_hold_existing(dbuf, tag, &spill_dbuf) == 0) { + rl->rl_dbuf = spill_dbuf; + } else { + rl->rl_dbuf = dbuf; + } rl->rl_object = rlobj; - rl->rl_phys = dbuf->db_data; + rl->rl_phys = rl->rl_dbuf->db_data; rl->rl_mos = dp->dp_meta_objset; zfs_refcount_create(&rl->rl_longholds); dmu_buf_init_user(&rl->rl_dbu, redaction_list_evict_sync, NULL, - &rl->rl_dbuf); + &rl->rl_bonus); if ((winner = dmu_buf_set_user_ie(dbuf, &rl->rl_dbu)) != NULL) { kmem_free(rl, sizeof (*rl)); rl = winner; diff --git a/sys/contrib/openzfs/module/zfs/dsl_destroy.c b/sys/contrib/openzfs/module/zfs/dsl_destroy.c index 053f26878cf1..d9d88a981e05 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_destroy.c +++ b/sys/contrib/openzfs/module/zfs/dsl_destroy.c @@ -1125,6 +1125,16 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) != NULL) { if (dbn->dbn_phys.zbm_redaction_obj != 0) { + dnode_t *rl; + VERIFY0(dnode_hold(mos, + dbn->dbn_phys.zbm_redaction_obj, FTAG, + &rl)); + if (rl->dn_have_spill) { + spa_feature_decr(dmu_objset_spa(mos), + SPA_FEATURE_REDACTION_LIST_SPILL, + tx); + } + dnode_rele(rl, FTAG); VERIFY0(dmu_object_free(mos, dbn->dbn_phys.zbm_redaction_obj, tx)); spa_feature_decr(dmu_objset_spa(mos), diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c index 20dc934593f1..dd4ff77e6f5d 100644 --- a/sys/contrib/openzfs/module/zfs/metaslab.c +++ b/sys/contrib/openzfs/module/zfs/metaslab.c @@ -40,8 +40,6 @@ #include <sys/zap.h> #include <sys/btree.h> -#define WITH_DF_BLOCK_ALLOCATOR - #define GANG_ALLOCATION(flags) \ ((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER)) @@ -1622,9 +1620,6 @@ metaslab_block_find(zfs_btree_t *t, range_tree_t *rt, uint64_t start, return (rs); } -#if defined(WITH_DF_BLOCK_ALLOCATOR) || \ - defined(WITH_CF_BLOCK_ALLOCATOR) - /* * This is a helper function that can be used by the allocator to find a * suitable block to allocate. This will search the specified B-tree looking @@ -1659,9 +1654,74 @@ metaslab_block_picker(range_tree_t *rt, uint64_t *cursor, uint64_t size, *cursor = 0; return (-1ULL); } -#endif /* WITH_DF/CF_BLOCK_ALLOCATOR */ -#if defined(WITH_DF_BLOCK_ALLOCATOR) +static uint64_t metaslab_df_alloc(metaslab_t *msp, uint64_t size); +static uint64_t metaslab_cf_alloc(metaslab_t *msp, uint64_t size); +static uint64_t metaslab_ndf_alloc(metaslab_t *msp, uint64_t size); +metaslab_ops_t *metaslab_allocator(spa_t *spa); + +static metaslab_ops_t metaslab_allocators[] = { + { "dynamic", metaslab_df_alloc }, + { "cursor", metaslab_cf_alloc }, + { "new-dynamic", metaslab_ndf_alloc }, +}; + +static int +spa_find_allocator_byname(const char *val) +{ + int a = ARRAY_SIZE(metaslab_allocators) - 1; + if (strcmp("new-dynamic", val) == 0) + return (-1); /* remove when ndf is working */ + for (; a >= 0; a--) { + if (strcmp(val, metaslab_allocators[a].msop_name) == 0) + return (a); + } + return (-1); +} + +void +spa_set_allocator(spa_t *spa, const char *allocator) +{ + int a = spa_find_allocator_byname(allocator); + if (a < 0) a = 0; + spa->spa_active_allocator = a; + zfs_dbgmsg("spa allocator: %s\n", metaslab_allocators[a].msop_name); +} + +int +spa_get_allocator(spa_t *spa) +{ + return (spa->spa_active_allocator); +} + +#if defined(_KERNEL) +int +param_set_active_allocator_common(const char *val) +{ + char *p; + + if (val == NULL) + return (SET_ERROR(EINVAL)); + + if ((p = strchr(val, '\n')) != NULL) + *p = '\0'; + + int a = spa_find_allocator_byname(val); + if (a < 0) + return (SET_ERROR(EINVAL)); + + zfs_active_allocator = metaslab_allocators[a].msop_name; + return (0); +} +#endif + +metaslab_ops_t * +metaslab_allocator(spa_t *spa) +{ + int allocator = spa_get_allocator(spa); + return (&metaslab_allocators[allocator]); +} + /* * ========================================================================== * Dynamic Fit (df) block allocator @@ -1736,12 +1796,6 @@ metaslab_df_alloc(metaslab_t *msp, uint64_t size) return (offset); } -const metaslab_ops_t zfs_metaslab_ops = { - metaslab_df_alloc -}; -#endif /* WITH_DF_BLOCK_ALLOCATOR */ - -#if defined(WITH_CF_BLOCK_ALLOCATOR) /* * ========================================================================== * Cursor fit block allocator - @@ -1784,12 +1838,6 @@ metaslab_cf_alloc(metaslab_t *msp, uint64_t size) return (offset); } -const metaslab_ops_t zfs_metaslab_ops = { - metaslab_cf_alloc -}; -#endif /* WITH_CF_BLOCK_ALLOCATOR */ - -#if defined(WITH_NDF_BLOCK_ALLOCATOR) /* * ========================================================================== * New dynamic fit allocator - @@ -1846,12 +1894,6 @@ metaslab_ndf_alloc(metaslab_t *msp, uint64_t size) return (-1ULL); } -const metaslab_ops_t zfs_metaslab_ops = { - metaslab_ndf_alloc -}; -#endif /* WITH_NDF_BLOCK_ALLOCATOR */ - - /* * ========================================================================== * Metaslabs @@ -6232,3 +6274,9 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, try_hard_before_gang, INT, ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, find_max_tries, UINT, ZMOD_RW, "Normally only consider this many of the best metaslabs in each vdev"); + +/* BEGIN CSTYLED */ +ZFS_MODULE_PARAM_CALL(zfs, zfs_, active_allocator, + param_set_active_allocator, param_get_charp, ZMOD_RW, + "SPA active allocator"); +/* END CSTYLED */ diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c index 88ee4ea9f458..cda62f939c1e 100644 --- a/sys/contrib/openzfs/module/zfs/spa.c +++ b/sys/contrib/openzfs/module/zfs/spa.c @@ -1295,24 +1295,26 @@ spa_thread(void *arg) } #endif +extern metaslab_ops_t *metaslab_allocator(spa_t *spa); + /* * Activate an uninitialized pool. */ static void spa_activate(spa_t *spa, spa_mode_t mode) { + metaslab_ops_t *msp = metaslab_allocator(spa); ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); spa->spa_state = POOL_STATE_ACTIVE; spa->spa_mode = mode; spa->spa_read_spacemaps = spa_mode_readable_spacemaps; - spa->spa_normal_class = metaslab_class_create(spa, &zfs_metaslab_ops); - spa->spa_log_class = metaslab_class_create(spa, &zfs_metaslab_ops); - spa->spa_embedded_log_class = - metaslab_class_create(spa, &zfs_metaslab_ops); - spa->spa_special_class = metaslab_class_create(spa, &zfs_metaslab_ops); - spa->spa_dedup_class = metaslab_class_create(spa, &zfs_metaslab_ops); + spa->spa_normal_class = metaslab_class_create(spa, msp); + spa->spa_log_class = metaslab_class_create(spa, msp); + spa->spa_embedded_log_class = metaslab_class_create(spa, msp); + spa->spa_special_class = metaslab_class_create(spa, msp); + spa->spa_dedup_class = metaslab_class_create(spa, msp); /* Try to create a covering process */ mutex_enter(&spa->spa_proc_lock); diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c index 3b355e0debcc..413476196b9f 100644 --- a/sys/contrib/openzfs/module/zfs/spa_misc.c +++ b/sys/contrib/openzfs/module/zfs/spa_misc.c @@ -389,6 +389,11 @@ static const uint64_t spa_min_slop = 128ULL * 1024 * 1024; static const uint64_t spa_max_slop = 128ULL * 1024 * 1024 * 1024; static const int spa_allocators = 4; +/* + * Spa active allocator. + * Valid values are zfs_active_allocator=<dynamic|cursor|new-dynamic>. + */ +const char *zfs_active_allocator = "dynamic"; void spa_load_failed(spa_t *spa, const char *fmt, ...) @@ -710,6 +715,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms); spa->spa_deadman_ziotime = MSEC2NSEC(zfs_deadman_ziotime_ms); spa_set_deadman_failmode(spa, zfs_deadman_failmode); + spa_set_allocator(spa, zfs_active_allocator); zfs_refcount_create(&spa->spa_refcount); spa_config_lock_init(spa); diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c index f2d279e36a96..b30676b42d88 100644 --- a/sys/contrib/openzfs/module/zfs/zil.c +++ b/sys/contrib/openzfs/module/zfs/zil.c @@ -814,17 +814,17 @@ static void zil_free_lwb(zilog_t *zilog, lwb_t *lwb) { ASSERT(MUTEX_HELD(&zilog->zl_lock)); - ASSERT(!MUTEX_HELD(&lwb->lwb_vdev_lock)); - VERIFY(list_is_empty(&lwb->lwb_waiters)); - VERIFY(list_is_empty(&lwb->lwb_itxs)); - ASSERT(avl_is_empty(&lwb->lwb_vdev_tree)); + ASSERT(lwb->lwb_state == LWB_STATE_NEW || + lwb->lwb_state == LWB_STATE_FLUSH_DONE); ASSERT3P(lwb->lwb_child_zio, ==, NULL); ASSERT3P(lwb->lwb_write_zio, ==, NULL); ASSERT3P(lwb->lwb_root_zio, ==, NULL); ASSERT3U(lwb->lwb_alloc_txg, <=, spa_syncing_txg(zilog->zl_spa)); ASSERT3U(lwb->lwb_max_txg, <=, spa_syncing_txg(zilog->zl_spa)); - ASSERT(lwb->lwb_state == LWB_STATE_NEW || - lwb->lwb_state == LWB_STATE_FLUSH_DONE); + VERIFY(list_is_empty(&lwb->lwb_itxs)); + VERIFY(list_is_empty(&lwb->lwb_waiters)); + ASSERT(avl_is_empty(&lwb->lwb_vdev_tree)); + ASSERT(!MUTEX_HELD(&lwb->lwb_vdev_lock)); /* * Clear the zilog's field to indicate this lwb is no longer @@ -1329,6 +1329,9 @@ zil_lwb_add_block(lwb_t *lwb, const blkptr_t *bp) int ndvas = BP_GET_NDVAS(bp); int i; + ASSERT3S(lwb->lwb_state, !=, LWB_STATE_WRITE_DONE); + ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE); + if (zil_nocacheflush) return; @@ -1408,15 +1411,9 @@ zil_lwb_flush_vdevs_done(zio_t *zio) zilog_t *zilog = lwb->lwb_zilog; zil_commit_waiter_t *zcw; itx_t *itx; - uint64_t txg; - list_t itxs, waiters; spa_config_exit(zilog->zl_spa, SCL_STATE, lwb); - list_create(&itxs, sizeof (itx_t), offsetof(itx_t, itx_node)); - list_create(&waiters, sizeof (zil_commit_waiter_t), - offsetof(zil_commit_waiter_t, zcw_node)); - hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp; mutex_enter(&zilog->zl_lock); @@ -1425,6 +1422,9 @@ zil_lwb_flush_vdevs_done(zio_t *zio) lwb->lwb_root_zio = NULL; + ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE); + lwb->lwb_state = LWB_STATE_FLUSH_DONE; + if (zilog->zl_last_lwb_opened == lwb) { /* * Remember the highest committed log sequence number @@ -1435,22 +1435,13 @@ zil_lwb_flush_vdevs_done(zio_t *zio) zilog->zl_commit_lr_seq = zilog->zl_lr_seq; } - list_move_tail(&itxs, &lwb->lwb_itxs); - list_move_tail(&waiters, &lwb->lwb_waiters); - txg = lwb->lwb_issued_txg; - - ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE); - lwb->lwb_state = LWB_STATE_FLUSH_DONE; - - mutex_exit(&zilog->zl_lock); - - while ((itx = list_remove_head(&itxs)) != NULL) + while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL) zil_itx_destroy(itx); - list_destroy(&itxs); - while ((zcw = list_remove_head(&waiters)) != NULL) { + while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) { mutex_enter(&zcw->zcw_lock); + ASSERT3P(zcw->zcw_lwb, ==, lwb); zcw->zcw_lwb = NULL; /* * We expect any ZIO errors from child ZIOs to have been @@ -1475,7 +1466,11 @@ zil_lwb_flush_vdevs_done(zio_t *zio) mutex_exit(&zcw->zcw_lock); } - list_destroy(&waiters); + + uint64_t txg = lwb->lwb_issued_txg; + + /* Once we drop the lock, lwb may be freed by zil_sync(). */ + mutex_exit(&zilog->zl_lock); mutex_enter(&zilog->zl_lwb_io_lock); ASSERT3U(zilog->zl_lwb_inflight[txg & TXG_MASK], >, 0); @@ -1929,10 +1924,10 @@ next_lwb: BP_GET_LSIZE(&lwb->lwb_blk)); } lwb->lwb_issued_timestamp = gethrtime(); - zio_nowait(lwb->lwb_root_zio); - zio_nowait(lwb->lwb_write_zio); if (lwb->lwb_child_zio) zio_nowait(lwb->lwb_child_zio); + zio_nowait(lwb->lwb_write_zio); + zio_nowait(lwb->lwb_root_zio); /* * If nlwb was ready when we gave it the block pointer, |