diff options
37 files changed, 406 insertions, 178 deletions
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index 4b9921d47b81..87499cdc95cb 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -5293,8 +5293,18 @@ dump_one_objset(const char *dsname, void *arg) avl_first(&dmu_objset_ds(os)->ds_bookmarks); dbn != NULL; dbn = AVL_NEXT(&dmu_objset_ds(os)->ds_bookmarks, dbn)) { mos_obj_refd(dbn->dbn_phys.zbm_redaction_obj); - if (dbn->dbn_phys.zbm_redaction_obj != 0) - global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS]++; + if (dbn->dbn_phys.zbm_redaction_obj != 0) { + global_feature_count[ + SPA_FEATURE_REDACTION_BOOKMARKS]++; + objset_t *mos = os->os_spa->spa_meta_objset; + dnode_t *rl; + VERIFY0(dnode_hold(mos, + dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl)); + if (rl->dn_have_spill) { + global_feature_count[ + SPA_FEATURE_REDACTION_LIST_SPILL]++; + } + } if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN]++; } @@ -8135,6 +8145,7 @@ dump_zpool(spa_t *spa) for (spa_feature_t f = 0; f < SPA_FEATURES; f++) global_feature_count[f] = UINT64_MAX; global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS] = 0; + global_feature_count[SPA_FEATURE_REDACTION_LIST_SPILL] = 0; global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN] = 0; global_feature_count[SPA_FEATURE_LIVELIST] = 0; diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am b/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am index c65b43fb027e..812558cf6d0f 100644 --- a/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am +++ b/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am @@ -16,6 +16,7 @@ dist_zedexec_SCRIPTS = \ %D%/scrub_finish-notify.sh \ %D%/statechange-led.sh \ %D%/statechange-notify.sh \ + %D%/statechange-slot_off.sh \ %D%/trim_finish-notify.sh \ %D%/vdev_attach-led.sh \ %D%/vdev_clear-led.sh @@ -35,6 +36,7 @@ zedconfdefaults = \ scrub_finish-notify.sh \ statechange-led.sh \ statechange-notify.sh \ + statechange-slot_off.sh \ vdev_attach-led.sh \ vdev_clear-led.sh diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh index d6f3c94a4197..150012abe71a 100755 --- a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh +++ b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh @@ -1,4 +1,5 @@ #!/bin/sh +# shellcheck disable=SC3014,SC2154,SC2086,SC2034 # # Turn off disk's enclosure slot if it becomes FAULTED. # @@ -43,15 +44,17 @@ if [ ! -f "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status" ] ; then exit 4 fi -echo "off" | tee "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status" - -# Wait for sysfs for report that the slot is off. It can take ~400ms on some -# enclosures. +# Turn off the slot and wait for sysfs to report that the slot is off. +# It can take ~400ms on some enclosures and multiple retries may be needed. for i in $(seq 1 20) ; do - if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" == "off" ] ; then - break - fi - sleep 0.1 + echo "off" | tee "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status" + + for j in $(seq 1 5) ; do + if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" == "off" ] ; then + break 2 + fi + sleep 0.1 + done done if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" != "off" ] ; then diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c index afd330630f69..47fbb8474c9c 100644 --- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c +++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c @@ -3978,6 +3978,10 @@ zfs_do_redact(int argc, char **argv) (void) fprintf(stderr, gettext("potentially invalid redaction " "snapshot; full dataset names required\n")); break; + case ESRCH: + (void) fprintf(stderr, gettext("attempted to resume redaction " + " with a mismatched redaction list\n")); + break; default: (void) fprintf(stderr, gettext("internal error: %s\n"), strerror(errno)); diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c index 10a3b5b14fc9..6d0dae8d8b05 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c @@ -3143,6 +3143,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, nvlist_t *props, int flags) { int ret = 0; + int ms_status = 0; zpool_handle_t *zhp; const char *name; uint64_t version; @@ -3232,10 +3233,15 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, ret = 1; if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && - !(flags & ZFS_IMPORT_ONLY) && - zpool_enable_datasets(zhp, mntopts, 0) != 0) { - zpool_close(zhp); - return (1); + !(flags & ZFS_IMPORT_ONLY)) { + ms_status = zpool_enable_datasets(zhp, mntopts, 0); + if (ms_status == EZFS_SHAREFAILED) { + (void) fprintf(stderr, gettext("Import was " + "successful, but unable to share some datasets")); + } else if (ms_status == EZFS_MOUNTFAILED) { + (void) fprintf(stderr, gettext("Import was " + "successful, but unable to mount some datasets")); + } } zpool_close(zhp); @@ -6755,6 +6761,7 @@ zpool_do_split(int argc, char **argv) char *mntopts = NULL; splitflags_t flags; int c, ret = 0; + int ms_status = 0; boolean_t loadkeys = B_FALSE; zpool_handle_t *zhp; nvlist_t *config, *props = NULL; @@ -6891,13 +6898,18 @@ zpool_do_split(int argc, char **argv) ret = 1; } - if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && - zpool_enable_datasets(zhp, mntopts, 0) != 0) { - ret = 1; - (void) fprintf(stderr, gettext("Split was successful, but " - "the datasets could not all be mounted\n")); - (void) fprintf(stderr, gettext("Try doing '%s' with a " - "different altroot\n"), "zpool import"); + if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) { + ms_status = zpool_enable_datasets(zhp, mntopts, 0); + if (ms_status == EZFS_SHAREFAILED) { + (void) fprintf(stderr, gettext("Split was successful, " + "datasets are mounted but sharing of some datasets " + "has failed\n")); + } else if (ms_status == EZFS_MOUNTFAILED) { + (void) fprintf(stderr, gettext("Split was successful" + ", but some datasets could not be mounted\n")); + (void) fprintf(stderr, gettext("Try doing '%s' with a " + "different altroot\n"), "zpool import"); + } } zpool_close(zhp); nvlist_free(config); diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install index 0f58508f0062..301d8f67b3af 100644 --- a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install +++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install @@ -60,7 +60,6 @@ usr/share/man/man8/zfs-get.8 usr/share/man/man8/zfs-groupspace.8 usr/share/man/man8/zfs-hold.8 usr/share/man/man8/zfs-inherit.8 -usr/share/man/man8/zfs-jail.8 usr/share/man/man8/zfs-list.8 usr/share/man/man8/zfs-load-key.8 usr/share/man/man8/zfs-mount-generator.8 @@ -80,7 +79,6 @@ usr/share/man/man8/zfs-set.8 usr/share/man/man8/zfs-share.8 usr/share/man/man8/zfs-snapshot.8 usr/share/man/man8/zfs-unallow.8 -usr/share/man/man8/zfs-unjail.8 usr/share/man/man8/zfs-unload-key.8 usr/share/man/man8/zfs-unmount.8 usr/share/man/man8/zfs-unzone.8 diff --git a/sys/contrib/openzfs/include/libzfs.h b/sys/contrib/openzfs/include/libzfs.h index a7037e3e6266..fa05b7921bb5 100644 --- a/sys/contrib/openzfs/include/libzfs.h +++ b/sys/contrib/openzfs/include/libzfs.h @@ -156,6 +156,7 @@ typedef enum zfs_error { EZFS_NOT_USER_NAMESPACE, /* a file is not a user namespace */ EZFS_CKSUM, /* insufficient replicas */ EZFS_RESUME_EXISTS, /* Resume on existing dataset without force */ + EZFS_SHAREFAILED, /* filesystem share failed */ EZFS_UNKNOWN } zfs_error_t; diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h index 77ce75ca3f11..08d983c51f1e 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h @@ -73,6 +73,9 @@ #define param_set_deadman_failmode_args(var) \ CTLTYPE_STRING, NULL, 0, param_set_deadman_failmode, "A" +#define param_set_active_allocator_args(var) \ + CTLTYPE_STRING, NULL, 0, param_set_active_allocator, "A" + #define param_set_deadman_synctime_args(var) \ CTLTYPE_U64, NULL, 0, param_set_deadman_synctime, "QU" diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h index 1d77f0487a30..699b8a571824 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h @@ -147,6 +147,15 @@ #error "Toolchain needs to support the XSAVE assembler instruction" #endif +#ifndef XFEATURE_MASK_XTILE +/* + * For kernels where this doesn't exist yet, we still don't want to break + * by save/restoring this broken nonsense. + * See issue #14989 or Intel errata SPR4 for why + */ +#define XFEATURE_MASK_XTILE 0x60000 +#endif + #include <linux/mm.h> #include <linux/slab.h> @@ -315,18 +324,18 @@ kfpu_begin(void) uint8_t *state = zfs_kfpu_fpregs[smp_processor_id()]; #if defined(HAVE_XSAVES) if (static_cpu_has(X86_FEATURE_XSAVES)) { - kfpu_do_xsave("xsaves", state, ~0); + kfpu_do_xsave("xsaves", state, ~XFEATURE_MASK_XTILE); return; } #endif #if defined(HAVE_XSAVEOPT) if (static_cpu_has(X86_FEATURE_XSAVEOPT)) { - kfpu_do_xsave("xsaveopt", state, ~0); + kfpu_do_xsave("xsaveopt", state, ~XFEATURE_MASK_XTILE); return; } #endif if (static_cpu_has(X86_FEATURE_XSAVE)) { - kfpu_do_xsave("xsave", state, ~0); + kfpu_do_xsave("xsave", state, ~XFEATURE_MASK_XTILE); } else if (static_cpu_has(X86_FEATURE_FXSR)) { kfpu_save_fxsr(state); } else { @@ -376,12 +385,12 @@ kfpu_end(void) uint8_t *state = zfs_kfpu_fpregs[smp_processor_id()]; #if defined(HAVE_XSAVES) if (static_cpu_has(X86_FEATURE_XSAVES)) { - kfpu_do_xrstor("xrstors", state, ~0); + kfpu_do_xrstor("xrstors", state, ~XFEATURE_MASK_XTILE); goto out; } #endif if (static_cpu_has(X86_FEATURE_XSAVE)) { - kfpu_do_xrstor("xrstor", state, ~0); + kfpu_do_xrstor("xrstor", state, ~XFEATURE_MASK_XTILE); } else if (static_cpu_has(X86_FEATURE_FXSR)) { kfpu_restore_fxsr(state); } else { diff --git a/sys/contrib/openzfs/include/sys/dsl_bookmark.h b/sys/contrib/openzfs/include/sys/dsl_bookmark.h index 353c5c2d260f..d4e559a09037 100644 --- a/sys/contrib/openzfs/include/sys/dsl_bookmark.h +++ b/sys/contrib/openzfs/include/sys/dsl_bookmark.h @@ -72,6 +72,7 @@ typedef struct redaction_list_phys { typedef struct redaction_list { dmu_buf_user_t rl_dbu; redaction_list_phys_t *rl_phys; + dmu_buf_t *rl_bonus; dmu_buf_t *rl_dbuf; uint64_t rl_object; zfs_refcount_t rl_longholds; diff --git a/sys/contrib/openzfs/include/sys/metaslab.h b/sys/contrib/openzfs/include/sys/metaslab.h index 0df6e5f81fc1..815b5d0c9cf1 100644 --- a/sys/contrib/openzfs/include/sys/metaslab.h +++ b/sys/contrib/openzfs/include/sys/metaslab.h @@ -39,6 +39,7 @@ extern "C" { typedef struct metaslab_ops { + const char *msop_name; uint64_t (*msop_alloc)(metaslab_t *, uint64_t); } metaslab_ops_t; diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h index b90855687411..18062d3f2a95 100644 --- a/sys/contrib/openzfs/include/sys/spa.h +++ b/sys/contrib/openzfs/include/sys/spa.h @@ -1056,6 +1056,8 @@ extern uint64_t spa_deadman_synctime(spa_t *spa); extern uint64_t spa_deadman_ziotime(spa_t *spa); extern uint64_t spa_dirty_data(spa_t *spa); extern spa_autotrim_t spa_get_autotrim(spa_t *spa); +extern int spa_get_allocator(spa_t *spa); +extern void spa_set_allocator(spa_t *spa, const char *allocator); /* Miscellaneous support routines */ extern void spa_load_failed(spa_t *spa, const char *fmt, ...) @@ -1207,6 +1209,7 @@ int param_set_deadman_ziotime(ZFS_MODULE_PARAM_ARGS); int param_set_deadman_synctime(ZFS_MODULE_PARAM_ARGS); int param_set_slop_shift(ZFS_MODULE_PARAM_ARGS); int param_set_deadman_failmode(ZFS_MODULE_PARAM_ARGS); +int param_set_active_allocator(ZFS_MODULE_PARAM_ARGS); #ifdef ZFS_DEBUG #define dprintf_bp(bp, fmt, ...) do { \ diff --git a/sys/contrib/openzfs/include/sys/spa_impl.h b/sys/contrib/openzfs/include/sys/spa_impl.h index 588c72f6e4fa..1a04bedc3137 100644 --- a/sys/contrib/openzfs/include/sys/spa_impl.h +++ b/sys/contrib/openzfs/include/sys/spa_impl.h @@ -263,6 +263,7 @@ struct spa { */ spa_alloc_t *spa_allocs; int spa_alloc_count; + int spa_active_allocator; /* selectable allocator */ spa_aux_vdev_t spa_spares; /* hot spares */ spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */ @@ -467,6 +468,8 @@ extern int param_set_deadman_failmode_common(const char *val); extern void spa_set_deadman_synctime(hrtime_t ns); extern void spa_set_deadman_ziotime(hrtime_t ns); extern const char *spa_history_zone(void); +extern const char *zfs_active_allocator; +extern int param_set_active_allocator_common(const char *val); #ifdef __cplusplus } diff --git a/sys/contrib/openzfs/include/zfeature_common.h b/sys/contrib/openzfs/include/zfeature_common.h index 7066c699e203..1025c44738ba 100644 --- a/sys/contrib/openzfs/include/zfeature_common.h +++ b/sys/contrib/openzfs/include/zfeature_common.h @@ -80,6 +80,7 @@ typedef enum spa_feature { SPA_FEATURE_BLAKE3, SPA_FEATURE_BLOCK_CLONING, SPA_FEATURE_AVZ_V2, + SPA_FEATURE_REDACTION_LIST_SPILL, SPA_FEATURES } spa_feature_t; diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi index 6e53bcb41a87..0a8e9bcbd74d 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi +++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi @@ -596,7 +596,7 @@ <elf-symbol name='fletcher_4_superscalar_ops' size='128' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='sa_protocol_names' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> - <elf-symbol name='spa_feature_table' size='2184' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + <elf-symbol name='spa_feature_table' size='2240' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> @@ -5809,7 +5809,8 @@ <enumerator name='SPA_FEATURE_BLAKE3' value='36'/> <enumerator name='SPA_FEATURE_BLOCK_CLONING' value='37'/> <enumerator name='SPA_FEATURE_AVZ_V2' value='38'/> - <enumerator name='SPA_FEATURES' value='39'/> + <enumerator name='SPA_FEATURE_REDACTION_LIST_SPILL' value='39'/> + <enumerator name='SPA_FEATURES' value='40'/> </enum-decl> <typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/> <qualified-type-def type-id='22cce67b' const='yes' id='d2816df0'/> @@ -8706,8 +8707,8 @@ </function-decl> </abi-instr> <abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'> - <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='17472' id='dd432c71'> - <subrange length='39' type-id='7359adad' id='ae4a9561'/> + <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='17920' id='dd432c71'> + <subrange length='40' type-id='7359adad' id='ae4a9561'/> </array-type-def> <enum-decl name='zfeature_flags' id='6db816a4'> <underlying-type type-id='9cac1fee'/> diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c b/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c index 5d1fe651c97e..b38ad88096b2 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c @@ -1300,7 +1300,7 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, zfs_mount_one, &ms, B_TRUE); if (ms.ms_mntstatus != 0) - ret = ms.ms_mntstatus; + ret = EZFS_MOUNTFAILED; /* * Share all filesystems that need to be shared. This needs to be @@ -1311,7 +1311,7 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, zfs_share_one, &ms, B_FALSE); if (ms.ms_mntstatus != 0) - ret = ms.ms_mntstatus; + ret = EZFS_SHAREFAILED; else zfs_commit_shares(NULL); diff --git a/sys/contrib/openzfs/man/Makefile.am b/sys/contrib/openzfs/man/Makefile.am index 2973520324fd..36c1aede106e 100644 --- a/sys/contrib/openzfs/man/Makefile.am +++ b/sys/contrib/openzfs/man/Makefile.am @@ -38,7 +38,6 @@ dist_man_MANS = \ %D%/man8/zfs-groupspace.8 \ %D%/man8/zfs-hold.8 \ %D%/man8/zfs-inherit.8 \ - %D%/man8/zfs-jail.8 \ %D%/man8/zfs-list.8 \ %D%/man8/zfs-load-key.8 \ %D%/man8/zfs-mount.8 \ @@ -57,14 +56,11 @@ dist_man_MANS = \ %D%/man8/zfs-share.8 \ %D%/man8/zfs-snapshot.8 \ %D%/man8/zfs-unallow.8 \ - %D%/man8/zfs-unjail.8 \ %D%/man8/zfs-unload-key.8 \ %D%/man8/zfs-unmount.8 \ - %D%/man8/zfs-unzone.8 \ %D%/man8/zfs-upgrade.8 \ %D%/man8/zfs-userspace.8 \ %D%/man8/zfs-wait.8 \ - %D%/man8/zfs-zone.8 \ %D%/man8/zfs_ids_to_path.8 \ %D%/man8/zgenhostid.8 \ %D%/man8/zinject.8 \ @@ -104,6 +100,18 @@ dist_man_MANS = \ %D%/man8/zstreamdump.8 \ %D%/man8/zpool_influxdb.8 +if BUILD_FREEBSD +dist_man_MANS += \ + %D%/man8/zfs-jail.8 \ + %D%/man8/zfs-unjail.8 +endif + +if BUILD_LINUX +dist_man_MANS += \ + %D%/man8/zfs-unzone.8 \ + %D%/man8/zfs-zone.8 +endif + nodist_man_MANS = \ %D%/man8/zed.8 \ %D%/man8/zfs-mount-generator.8 diff --git a/sys/contrib/openzfs/man/man7/zfsprops.7 b/sys/contrib/openzfs/man/man7/zfsprops.7 index 8f6b919cfc0b..51ddd85eb79e 100644 --- a/sys/contrib/openzfs/man/man7/zfsprops.7 +++ b/sys/contrib/openzfs/man/man7/zfsprops.7 @@ -38,7 +38,7 @@ .\" Copyright (c) 2019, Kjeld Schouten-Lebbing .\" Copyright (c) 2022 Hewlett Packard Enterprise Development LP. .\" -.Dd April 18, 2023 +.Dd August 8, 2023 .Dt ZFSPROPS 7 .Os . @@ -1916,13 +1916,15 @@ See for more information. Jails are a .Fx -feature and are not relevant on other platforms. -The default value is -.Sy off . -.It Sy zoned Ns = Ns Sy on Ns | Ns Sy off +feature and this property is not available on other platforms. +.It Sy zoned Ns = Ns Sy off Ns | Ns Sy on Controls whether the dataset is managed from a non-global zone or namespace. -The default value is -.Sy off . +See +.Xr zfs-zone 8 +for more information. +Zoning is a +Linux +feature and this property is not available on other platforms. .El .Pp The following three properties cannot be changed after the file system is diff --git a/sys/contrib/openzfs/man/man7/zpool-features.7 b/sys/contrib/openzfs/man/man7/zpool-features.7 index b901ce6c2935..3c7b0b345d96 100644 --- a/sys/contrib/openzfs/man/man7/zpool-features.7 +++ b/sys/contrib/openzfs/man/man7/zpool-features.7 @@ -947,6 +947,18 @@ once all filesystems that have ever had their property set to .Sy zstd are destroyed. +. +.feature com.delphix redaction_list_spill no redaction_bookmarks +This feature enables the redaction list created by zfs redact to store +many more entries. +It becomes +.Sy active +when a redaction list is created with more than 36 entries, +and returns to being +.Sy enabled +when no long redaction lists remain in the pool. +For more information about redacted sends, see +.Xr zfs-send 8 . .El . .Sh SEE ALSO diff --git a/sys/contrib/openzfs/man/man8/zpool.8 b/sys/contrib/openzfs/man/man8/zpool.8 index e8eadffa6fcf..4c4020bdd810 100644 --- a/sys/contrib/openzfs/man/man8/zpool.8 +++ b/sys/contrib/openzfs/man/man8/zpool.8 @@ -110,9 +110,10 @@ Removes ZFS label information from the specified .It Xo .Xr zpool-attach 8 Ns / Ns Xr zpool-detach 8 .Xc -Increases or decreases redundancy by -.Cm attach Ns ing or -.Cm detach Ns ing a device on an existing vdev (virtual device). +Converts a non-redundant disk into a mirror, or increases +the redundancy level of an existing mirror +.Cm ( attach Ns ), or performs the inverse operation ( +.Cm detach Ns ). .It Xo .Xr zpool-add 8 Ns / Ns Xr zpool-remove 8 .Xc @@ -233,16 +234,16 @@ Invalid command line options were specified. .El . .Sh EXAMPLES -.\" Examples 1, 2, 3, 4, 11, 12 are shared with zpool-create.8. -.\" Examples 5, 13 are shared with zpool-add.8. -.\" Examples 6, 15 are shared with zpool-list.8. -.\" Examples 7 are shared with zpool-destroy.8. -.\" Examples 8 are shared with zpool-export.8. -.\" Examples 9 are shared with zpool-import.8. -.\" Examples 10 are shared with zpool-upgrade.8. -.\" Examples 14 are shared with zpool-remove.8. -.\" Examples 16 are shared with zpool-status.8. -.\" Examples 13, 16 are also shared with zpool-iostat.8. +.\" Examples 1, 2, 3, 4, 12, 13 are shared with zpool-create.8. +.\" Examples 6, 14 are shared with zpool-add.8. +.\" Examples 7, 16 are shared with zpool-list.8. +.\" Examples 8 are shared with zpool-destroy.8. +.\" Examples 9 are shared with zpool-export.8. +.\" Examples 10 are shared with zpool-import.8. +.\" Examples 11 are shared with zpool-upgrade.8. +.\" Examples 15 are shared with zpool-remove.8. +.\" Examples 17 are shared with zpool-status.8. +.\" Examples 14, 17 are also shared with zpool-iostat.8. .\" Make sure to update them omnidirectionally .Ss Example 1 : No Creating a RAID-Z Storage Pool The following command creates a pool with a single raidz root vdev that @@ -264,14 +265,21 @@ While not recommended, a pool based on files can be useful for experimental purposes. .Dl # Nm zpool Cm create Ar tank Pa /path/to/file/a /path/to/file/b . -.Ss Example 5 : No Adding a Mirror to a ZFS Storage Pool +.Ss Example 5 : No Making a non-mirrored ZFS Storage Pool mirrored +The following command converts an existing single device +.Ar sda +into a mirror by attaching a second device to it, +.Ar sdb . +.Dl # Nm zpool Cm attach Ar tank Pa sda sdb +. +.Ss Example 6 : No Adding a Mirror to a ZFS Storage Pool The following command adds two mirrored disks to the pool .Ar tank , assuming the pool is already made up of two-way mirrors. The additional space is immediately available to any datasets within the pool. .Dl # Nm zpool Cm add Ar tank Sy mirror Pa sda sdb . -.Ss Example 6 : No Listing Available ZFS Storage Pools +.Ss Example 7 : No Listing Available ZFS Storage Pools The following command lists all available pools on the system. In this case, the pool .Ar zion @@ -285,19 +293,19 @@ tank 61.5G 20.0G 41.5G - 48% 32% 1.00x ONLINE - zion - - - - - - - FAULTED - .Ed . -.Ss Example 7 : No Destroying a ZFS Storage Pool +.Ss Example 8 : No Destroying a ZFS Storage Pool The following command destroys the pool .Ar tank and any datasets contained within: .Dl # Nm zpool Cm destroy Fl f Ar tank . -.Ss Example 8 : No Exporting a ZFS Storage Pool +.Ss Example 9 : No Exporting a ZFS Storage Pool The following command exports the devices in pool .Ar tank so that they can be relocated or later imported: .Dl # Nm zpool Cm export Ar tank . -.Ss Example 9 : No Importing a ZFS Storage Pool +.Ss Example 10 : No Importing a ZFS Storage Pool The following command displays available pools, and then imports the pool .Ar tank for use on the system. @@ -318,7 +326,7 @@ config: .No # Nm zpool Cm import Ar tank .Ed . -.Ss Example 10 : No Upgrading All ZFS Storage Pools to the Current Version +.Ss Example 11 : No Upgrading All ZFS Storage Pools to the Current Version The following command upgrades all ZFS Storage pools to the current version of the software: .Bd -literal -compact -offset Ds @@ -326,7 +334,7 @@ the software: This system is currently running ZFS version 2. .Ed . -.Ss Example 11 : No Managing Hot Spares +.Ss Example 12 : No Managing Hot Spares The following command creates a new pool with an available hot spare: .Dl # Nm zpool Cm create Ar tank Sy mirror Pa sda sdb Sy spare Pa sdc .Pp @@ -341,12 +349,12 @@ The hot spare can be permanently removed from the pool using the following command: .Dl # Nm zpool Cm remove Ar tank Pa sdc . -.Ss Example 12 : No Creating a ZFS Pool with Mirrored Separate Intent Logs +.Ss Example 13 : No Creating a ZFS Pool with Mirrored Separate Intent Logs The following command creates a ZFS storage pool consisting of two, two-way mirrors and mirrored log devices: .Dl # Nm zpool Cm create Ar pool Sy mirror Pa sda sdb Sy mirror Pa sdc sdd Sy log mirror Pa sde sdf . -.Ss Example 13 : No Adding Cache Devices to a ZFS Pool +.Ss Example 14 : No Adding Cache Devices to a ZFS Pool The following command adds two disks for use as cache devices to a ZFS storage pool: .Dl # Nm zpool Cm add Ar pool Sy cache Pa sdc sdd @@ -359,7 +367,7 @@ Capacity and reads can be monitored using the subcommand as follows: .Dl # Nm zpool Cm iostat Fl v Ar pool 5 . -.Ss Example 14 : No Removing a Mirrored top-level (Log or Data) Device +.Ss Example 15 : No Removing a Mirrored top-level (Log or Data) Device The following commands remove the mirrored log device .Sy mirror-2 and mirrored top-level data device @@ -394,7 +402,7 @@ The command to remove the mirrored data .Ar mirror-1 No is : .Dl # Nm zpool Cm remove Ar tank mirror-1 . -.Ss Example 15 : No Displaying expanded space on a device +.Ss Example 16 : No Displaying expanded space on a device The following command displays the detailed information for the pool .Ar data . This pool is comprised of a single raidz vdev where one of its devices @@ -411,7 +419,7 @@ data 23.9G 14.6G 9.30G - 48% 61% 1.00x ONLINE - sdc - - - - - .Ed . -.Ss Example 16 : No Adding output columns +.Ss Example 17 : No Adding output columns Additional columns can be added to the .Nm zpool Cm status No and Nm zpool Cm iostat No output with Fl c . .Bd -literal -compact -offset Ds diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c index 8ae2f23c3ecf..ba9a95e4a66d 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c @@ -503,6 +503,24 @@ SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance, /* metaslab.c */ +int +param_set_active_allocator(SYSCTL_HANDLER_ARGS) +{ + char buf[16]; + int rc; + + if (req->newptr == NULL) + strlcpy(buf, zfs_active_allocator, sizeof (buf)); + + rc = sysctl_handle_string(oidp, buf, sizeof (buf), req); + if (rc || req->newptr == NULL) + return (rc); + if (strcmp(buf, zfs_active_allocator) == 0) + return (0); + + return (param_set_active_allocator_common(buf)); +} + /* * In pools where the log space map feature is not enabled we touch * multiple metaslabs (and their respective space maps) with each diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c b/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c index bcc356ae55b6..5cb5a6dadb05 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c @@ -659,6 +659,21 @@ static struct ctl_table spl_root[] = { }; #endif +static void spl_proc_cleanup(void) +{ + remove_proc_entry("kstat", proc_spl); + remove_proc_entry("slab", proc_spl_kmem); + remove_proc_entry("kmem", proc_spl); + remove_proc_entry("taskq-all", proc_spl); + remove_proc_entry("taskq", proc_spl); + remove_proc_entry("spl", NULL); + + if (spl_header) { + unregister_sysctl_table(spl_header); + spl_header = NULL; + } +} + int spl_proc_init(void) { @@ -723,15 +738,8 @@ spl_proc_init(void) goto out; } out: - if (rc) { - remove_proc_entry("kstat", proc_spl); - remove_proc_entry("slab", proc_spl_kmem); - remove_proc_entry("kmem", proc_spl); - remove_proc_entry("taskq-all", proc_spl); - remove_proc_entry("taskq", proc_spl); - remove_proc_entry("spl", NULL); - unregister_sysctl_table(spl_header); - } + if (rc) + spl_proc_cleanup(); return (rc); } @@ -739,13 +747,5 @@ out: void spl_proc_fini(void) { - remove_proc_entry("kstat", proc_spl); - remove_proc_entry("slab", proc_spl_kmem); - remove_proc_entry("kmem", proc_spl); - remove_proc_entry("taskq-all", proc_spl); - remove_proc_entry("taskq", proc_spl); - remove_proc_entry("spl", NULL); - - ASSERT(spl_header != NULL); - unregister_sysctl_table(spl_header); + spl_proc_cleanup(); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c index 3efc8b9644fd..c8cbedcd5157 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c @@ -103,6 +103,18 @@ param_set_slop_shift(const char *buf, zfs_kernel_param_t *kp) return (0); } +int +param_set_active_allocator(const char *val, zfs_kernel_param_t *kp) +{ + int error; + + error = -param_set_active_allocator_common(val); + if (error == 0) + error = param_set_charp(val, kp); + + return (error); +} + const char * spa_history_zone(void) { diff --git a/sys/contrib/openzfs/module/zcommon/zfeature_common.c b/sys/contrib/openzfs/module/zcommon/zfeature_common.c index 4c9b7ed72a0f..2c74d10f43ff 100644 --- a/sys/contrib/openzfs/module/zcommon/zfeature_common.c +++ b/sys/contrib/openzfs/module/zcommon/zfeature_common.c @@ -737,6 +737,18 @@ zpool_feature_init(void) ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures); + { + static const spa_feature_t redact_list_spill_deps[] = { + SPA_FEATURE_REDACTION_BOOKMARKS, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_REDACTION_LIST_SPILL, + "com.delphix:redaction_list_spill", "redaction_list_spill", + "Support for increased number of redaction_snapshot " + "arguments in zfs redact.", 0, ZFEATURE_TYPE_BOOLEAN, + redact_list_spill_deps, sfeatures); + } + zfs_mod_list_supported_free(sfeatures); } diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index b7453578a76f..f2831a0e8abf 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -2701,7 +2701,7 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx) */ mutex_enter(&db->db_mtx); VERIFY(!dbuf_undirty(db, tx)); - ASSERT0(dbuf_find_dirty_eq(db, tx->tx_txg)); + ASSERT3P(dbuf_find_dirty_eq(db, tx->tx_txg), ==, NULL); if (db->db_buf != NULL) { arc_buf_destroy(db->db_buf, db); db->db_buf = NULL; diff --git a/sys/contrib/openzfs/module/zfs/dmu_redact.c b/sys/contrib/openzfs/module/zfs/dmu_redact.c index 6bd35713ff18..5ac14edfca12 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_redact.c +++ b/sys/contrib/openzfs/module/zfs/dmu_redact.c @@ -746,7 +746,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads, bqueue_enqueue(q, record, sizeof (*record)); return (0); } - redact_nodes = kmem_zalloc(num_threads * + redact_nodes = vmem_zalloc(num_threads * sizeof (*redact_nodes), KM_SLEEP); avl_create(&start_tree, redact_node_compare_start, @@ -820,7 +820,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads, avl_destroy(&start_tree); avl_destroy(&end_tree); - kmem_free(redact_nodes, num_threads * sizeof (*redact_nodes)); + vmem_free(redact_nodes, num_threads * sizeof (*redact_nodes)); if (current_record != NULL) bqueue_enqueue(q, current_record, sizeof (*current_record)); return (err); @@ -1030,7 +1030,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl, numsnaps = fnvlist_num_pairs(redactnvl); if (numsnaps > 0) - args = kmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP); + args = vmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP); nvpair_t *pair = NULL; for (int i = 0; i < numsnaps; i++) { @@ -1079,7 +1079,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl, kmem_free(newredactbook, sizeof (char) * ZFS_MAX_DATASET_NAME_LEN); if (args != NULL) - kmem_free(args, numsnaps * sizeof (*args)); + vmem_free(args, numsnaps * sizeof (*args)); return (SET_ERROR(ENAMETOOLONG)); } err = dsl_bookmark_lookup(dp, newredactbook, NULL, &bookmark); @@ -1119,7 +1119,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl, } else { uint64_t *guids = NULL; if (numsnaps > 0) { - guids = kmem_zalloc(numsnaps * sizeof (uint64_t), + guids = vmem_zalloc(numsnaps * sizeof (uint64_t), KM_SLEEP); } for (int i = 0; i < numsnaps; i++) { @@ -1131,10 +1131,9 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl, dp = NULL; err = dsl_bookmark_create_redacted(newredactbook, snapname, numsnaps, guids, FTAG, &new_rl); - kmem_free(guids, numsnaps * sizeof (uint64_t)); - if (err != 0) { + vmem_free(guids, numsnaps * sizeof (uint64_t)); + if (err != 0) goto out; - } } for (int i = 0; i < numsnaps; i++) { @@ -1188,7 +1187,7 @@ out: } if (args != NULL) - kmem_free(args, numsnaps * sizeof (*args)); + vmem_free(args, numsnaps * sizeof (*args)); if (dp != NULL) dsl_pool_rele(dp, FTAG); if (ds != NULL) { diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c index 7cf03264dce2..79fd02dcb9aa 100644 --- a/sys/contrib/openzfs/module/zfs/dnode.c +++ b/sys/contrib/openzfs/module/zfs/dnode.c @@ -720,6 +720,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, ASSERT(DMU_OT_IS_VALID(ot)); ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) || (bonustype == DMU_OT_SA && bonuslen == 0) || + (bonustype == DMU_OTN_UINT64_METADATA && bonuslen == 0) || (bonustype != DMU_OT_NONE && bonuslen != 0)); ASSERT(DMU_OT_IS_VALID(bonustype)); ASSERT3U(bonuslen, <=, DN_SLOTS_TO_BONUSLEN(dn_slots)); diff --git a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c index e04796a0814f..03d9420dbdb9 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c +++ b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c @@ -34,6 +34,7 @@ #include <sys/dsl_bookmark.h> #include <zfs_namecheck.h> #include <sys/dmu_send.h> +#include <sys/dbuf.h> static int dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname, @@ -459,25 +460,42 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot, SPA_FEATURE_REDACTED_DATASETS, &dsnumsnaps, &dsredactsnaps); if (redaction_list != NULL || bookmark_redacted) { redaction_list_t *local_rl; + boolean_t spill = B_FALSE; if (bookmark_redacted) { redact_snaps = dsredactsnaps; num_redact_snaps = dsnumsnaps; } + int bonuslen = sizeof (redaction_list_phys_t) + + num_redact_snaps * sizeof (uint64_t); + if (bonuslen > dmu_bonus_max()) + spill = B_TRUE; dbn->dbn_phys.zbm_redaction_obj = dmu_object_alloc(mos, DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE, - DMU_OTN_UINT64_METADATA, sizeof (redaction_list_phys_t) + - num_redact_snaps * sizeof (uint64_t), tx); + DMU_OTN_UINT64_METADATA, spill ? 0 : bonuslen, tx); spa_feature_incr(dp->dp_spa, SPA_FEATURE_REDACTION_BOOKMARKS, tx); + if (spill) { + spa_feature_incr(dp->dp_spa, + SPA_FEATURE_REDACTION_LIST_SPILL, tx); + } VERIFY0(dsl_redaction_list_hold_obj(dp, dbn->dbn_phys.zbm_redaction_obj, tag, &local_rl)); dsl_redaction_list_long_hold(dp, local_rl, tag); - ASSERT3U((local_rl)->rl_dbuf->db_size, >=, - sizeof (redaction_list_phys_t) + num_redact_snaps * - sizeof (uint64_t)); - dmu_buf_will_dirty(local_rl->rl_dbuf, tx); + if (!spill) { + ASSERT3U(local_rl->rl_bonus->db_size, >=, bonuslen); + dmu_buf_will_dirty(local_rl->rl_bonus, tx); + } else { + dmu_buf_t *db; + VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus, + DB_RF_MUST_SUCCEED, FTAG, &db)); + dmu_buf_will_fill(db, tx); + VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen, + SPA_MINBLOCKSIZE), tx)); + local_rl->rl_phys = db->db_data; + local_rl->rl_dbuf = db; + } memcpy(local_rl->rl_phys->rlp_snaps, redact_snaps, sizeof (uint64_t) * num_redact_snaps); local_rl->rl_phys->rlp_num_snaps = num_redact_snaps; @@ -636,11 +654,15 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx) SPA_FEATURE_REDACTION_BOOKMARKS)) return (SET_ERROR(ENOTSUP)); /* - * If the list of redact snaps will not fit in the bonus buffer with - * the furthest reached object and offset, fail. + * If the list of redact snaps will not fit in the bonus buffer (or + * spill block, with the REDACTION_LIST_SPILL feature) with the + * furthest reached object and offset, fail. */ - if (dbcra->dbcra_numsnaps > (dmu_bonus_max() - - sizeof (redaction_list_phys_t)) / sizeof (uint64_t)) + uint64_t snaplimit = ((spa_feature_is_enabled(dp->dp_spa, + SPA_FEATURE_REDACTION_LIST_SPILL) ? spa_maxblocksize(dp->dp_spa) : + dmu_bonus_max()) - + sizeof (redaction_list_phys_t)) / sizeof (uint64_t); + if (dbcra->dbcra_numsnaps > snaplimit) return (SET_ERROR(E2BIG)); if (dsl_bookmark_create_nvl_validate_pair( @@ -1040,6 +1062,14 @@ dsl_bookmark_destroy_sync_impl(dsl_dataset_t *ds, const char *name, } if (dbn->dbn_phys.zbm_redaction_obj != 0) { + dnode_t *rl; + VERIFY0(dnode_hold(mos, + dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl)); + if (rl->dn_have_spill) { + spa_feature_decr(dmu_objset_spa(mos), + SPA_FEATURE_REDACTION_LIST_SPILL, tx); + } + dnode_rele(rl, FTAG); VERIFY0(dmu_object_free(mos, dbn->dbn_phys.zbm_redaction_obj, tx)); spa_feature_decr(dmu_objset_spa(mos), @@ -1213,7 +1243,9 @@ redaction_list_evict_sync(void *rlu) void dsl_redaction_list_rele(redaction_list_t *rl, const void *tag) { - dmu_buf_rele(rl->rl_dbuf, tag); + if (rl->rl_bonus != rl->rl_dbuf) + dmu_buf_rele(rl->rl_dbuf, tag); + dmu_buf_rele(rl->rl_bonus, tag); } int @@ -1221,7 +1253,7 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag, redaction_list_t **rlp) { objset_t *mos = dp->dp_meta_objset; - dmu_buf_t *dbuf; + dmu_buf_t *dbuf, *spill_dbuf; redaction_list_t *rl; int err; @@ -1236,13 +1268,18 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag, redaction_list_t *winner = NULL; rl = kmem_zalloc(sizeof (redaction_list_t), KM_SLEEP); - rl->rl_dbuf = dbuf; + rl->rl_bonus = dbuf; + if (dmu_spill_hold_existing(dbuf, tag, &spill_dbuf) == 0) { + rl->rl_dbuf = spill_dbuf; + } else { + rl->rl_dbuf = dbuf; + } rl->rl_object = rlobj; - rl->rl_phys = dbuf->db_data; + rl->rl_phys = rl->rl_dbuf->db_data; rl->rl_mos = dp->dp_meta_objset; zfs_refcount_create(&rl->rl_longholds); dmu_buf_init_user(&rl->rl_dbu, redaction_list_evict_sync, NULL, - &rl->rl_dbuf); + &rl->rl_bonus); if ((winner = dmu_buf_set_user_ie(dbuf, &rl->rl_dbu)) != NULL) { kmem_free(rl, sizeof (*rl)); rl = winner; diff --git a/sys/contrib/openzfs/module/zfs/dsl_destroy.c b/sys/contrib/openzfs/module/zfs/dsl_destroy.c index 053f26878cf1..d9d88a981e05 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_destroy.c +++ b/sys/contrib/openzfs/module/zfs/dsl_destroy.c @@ -1125,6 +1125,16 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) != NULL) { if (dbn->dbn_phys.zbm_redaction_obj != 0) { + dnode_t *rl; + VERIFY0(dnode_hold(mos, + dbn->dbn_phys.zbm_redaction_obj, FTAG, + &rl)); + if (rl->dn_have_spill) { + spa_feature_decr(dmu_objset_spa(mos), + SPA_FEATURE_REDACTION_LIST_SPILL, + tx); + } + dnode_rele(rl, FTAG); VERIFY0(dmu_object_free(mos, dbn->dbn_phys.zbm_redaction_obj, tx)); spa_feature_decr(dmu_objset_spa(mos), diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c index 20dc934593f1..dd4ff77e6f5d 100644 --- a/sys/contrib/openzfs/module/zfs/metaslab.c +++ b/sys/contrib/openzfs/module/zfs/metaslab.c @@ -40,8 +40,6 @@ #include <sys/zap.h> #include <sys/btree.h> -#define WITH_DF_BLOCK_ALLOCATOR - #define GANG_ALLOCATION(flags) \ ((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER)) @@ -1622,9 +1620,6 @@ metaslab_block_find(zfs_btree_t *t, range_tree_t *rt, uint64_t start, return (rs); } -#if defined(WITH_DF_BLOCK_ALLOCATOR) || \ - defined(WITH_CF_BLOCK_ALLOCATOR) - /* * This is a helper function that can be used by the allocator to find a * suitable block to allocate. This will search the specified B-tree looking @@ -1659,9 +1654,74 @@ metaslab_block_picker(range_tree_t *rt, uint64_t *cursor, uint64_t size, *cursor = 0; return (-1ULL); } -#endif /* WITH_DF/CF_BLOCK_ALLOCATOR */ -#if defined(WITH_DF_BLOCK_ALLOCATOR) +static uint64_t metaslab_df_alloc(metaslab_t *msp, uint64_t size); +static uint64_t metaslab_cf_alloc(metaslab_t *msp, uint64_t size); +static uint64_t metaslab_ndf_alloc(metaslab_t *msp, uint64_t size); +metaslab_ops_t *metaslab_allocator(spa_t *spa); + +static metaslab_ops_t metaslab_allocators[] = { + { "dynamic", metaslab_df_alloc }, + { "cursor", metaslab_cf_alloc }, + { "new-dynamic", metaslab_ndf_alloc }, +}; + +static int +spa_find_allocator_byname(const char *val) +{ + int a = ARRAY_SIZE(metaslab_allocators) - 1; + if (strcmp("new-dynamic", val) == 0) + return (-1); /* remove when ndf is working */ + for (; a >= 0; a--) { + if (strcmp(val, metaslab_allocators[a].msop_name) == 0) + return (a); + } + return (-1); +} + +void +spa_set_allocator(spa_t *spa, const char *allocator) +{ + int a = spa_find_allocator_byname(allocator); + if (a < 0) a = 0; + spa->spa_active_allocator = a; + zfs_dbgmsg("spa allocator: %s\n", metaslab_allocators[a].msop_name); +} + +int +spa_get_allocator(spa_t *spa) +{ + return (spa->spa_active_allocator); +} + +#if defined(_KERNEL) +int +param_set_active_allocator_common(const char *val) +{ + char *p; + + if (val == NULL) + return (SET_ERROR(EINVAL)); + + if ((p = strchr(val, '\n')) != NULL) + *p = '\0'; + + int a = spa_find_allocator_byname(val); + if (a < 0) + return (SET_ERROR(EINVAL)); + + zfs_active_allocator = metaslab_allocators[a].msop_name; + return (0); +} +#endif + +metaslab_ops_t * +metaslab_allocator(spa_t *spa) +{ + int allocator = spa_get_allocator(spa); + return (&metaslab_allocators[allocator]); +} + /* * ========================================================================== * Dynamic Fit (df) block allocator @@ -1736,12 +1796,6 @@ metaslab_df_alloc(metaslab_t *msp, uint64_t size) return (offset); } -const metaslab_ops_t zfs_metaslab_ops = { - metaslab_df_alloc -}; -#endif /* WITH_DF_BLOCK_ALLOCATOR */ - -#if defined(WITH_CF_BLOCK_ALLOCATOR) /* * ========================================================================== * Cursor fit block allocator - @@ -1784,12 +1838,6 @@ metaslab_cf_alloc(metaslab_t *msp, uint64_t size) return (offset); } -const metaslab_ops_t zfs_metaslab_ops = { - metaslab_cf_alloc -}; -#endif /* WITH_CF_BLOCK_ALLOCATOR */ - -#if defined(WITH_NDF_BLOCK_ALLOCATOR) /* * ========================================================================== * New dynamic fit allocator - @@ -1846,12 +1894,6 @@ metaslab_ndf_alloc(metaslab_t *msp, uint64_t size) return (-1ULL); } -const metaslab_ops_t zfs_metaslab_ops = { - metaslab_ndf_alloc -}; -#endif /* WITH_NDF_BLOCK_ALLOCATOR */ - - /* * ========================================================================== * Metaslabs @@ -6232,3 +6274,9 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, try_hard_before_gang, INT, ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, find_max_tries, UINT, ZMOD_RW, "Normally only consider this many of the best metaslabs in each vdev"); + +/* BEGIN CSTYLED */ +ZFS_MODULE_PARAM_CALL(zfs, zfs_, active_allocator, + param_set_active_allocator, param_get_charp, ZMOD_RW, + "SPA active allocator"); +/* END CSTYLED */ diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c index 88ee4ea9f458..cda62f939c1e 100644 --- a/sys/contrib/openzfs/module/zfs/spa.c +++ b/sys/contrib/openzfs/module/zfs/spa.c @@ -1295,24 +1295,26 @@ spa_thread(void *arg) } #endif +extern metaslab_ops_t *metaslab_allocator(spa_t *spa); + /* * Activate an uninitialized pool. */ static void spa_activate(spa_t *spa, spa_mode_t mode) { + metaslab_ops_t *msp = metaslab_allocator(spa); ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); spa->spa_state = POOL_STATE_ACTIVE; spa->spa_mode = mode; spa->spa_read_spacemaps = spa_mode_readable_spacemaps; - spa->spa_normal_class = metaslab_class_create(spa, &zfs_metaslab_ops); - spa->spa_log_class = metaslab_class_create(spa, &zfs_metaslab_ops); - spa->spa_embedded_log_class = - metaslab_class_create(spa, &zfs_metaslab_ops); - spa->spa_special_class = metaslab_class_create(spa, &zfs_metaslab_ops); - spa->spa_dedup_class = metaslab_class_create(spa, &zfs_metaslab_ops); + spa->spa_normal_class = metaslab_class_create(spa, msp); + spa->spa_log_class = metaslab_class_create(spa, msp); + spa->spa_embedded_log_class = metaslab_class_create(spa, msp); + spa->spa_special_class = metaslab_class_create(spa, msp); + spa->spa_dedup_class = metaslab_class_create(spa, msp); /* Try to create a covering process */ mutex_enter(&spa->spa_proc_lock); diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c index 3b355e0debcc..413476196b9f 100644 --- a/sys/contrib/openzfs/module/zfs/spa_misc.c +++ b/sys/contrib/openzfs/module/zfs/spa_misc.c @@ -389,6 +389,11 @@ static const uint64_t spa_min_slop = 128ULL * 1024 * 1024; static const uint64_t spa_max_slop = 128ULL * 1024 * 1024 * 1024; static const int spa_allocators = 4; +/* + * Spa active allocator. + * Valid values are zfs_active_allocator=<dynamic|cursor|new-dynamic>. + */ +const char *zfs_active_allocator = "dynamic"; void spa_load_failed(spa_t *spa, const char *fmt, ...) @@ -710,6 +715,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms); spa->spa_deadman_ziotime = MSEC2NSEC(zfs_deadman_ziotime_ms); spa_set_deadman_failmode(spa, zfs_deadman_failmode); + spa_set_allocator(spa, zfs_active_allocator); zfs_refcount_create(&spa->spa_refcount); spa_config_lock_init(spa); diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c index f2d279e36a96..b30676b42d88 100644 --- a/sys/contrib/openzfs/module/zfs/zil.c +++ b/sys/contrib/openzfs/module/zfs/zil.c @@ -814,17 +814,17 @@ static void zil_free_lwb(zilog_t *zilog, lwb_t *lwb) { ASSERT(MUTEX_HELD(&zilog->zl_lock)); - ASSERT(!MUTEX_HELD(&lwb->lwb_vdev_lock)); - VERIFY(list_is_empty(&lwb->lwb_waiters)); - VERIFY(list_is_empty(&lwb->lwb_itxs)); - ASSERT(avl_is_empty(&lwb->lwb_vdev_tree)); + ASSERT(lwb->lwb_state == LWB_STATE_NEW || + lwb->lwb_state == LWB_STATE_FLUSH_DONE); ASSERT3P(lwb->lwb_child_zio, ==, NULL); ASSERT3P(lwb->lwb_write_zio, ==, NULL); ASSERT3P(lwb->lwb_root_zio, ==, NULL); ASSERT3U(lwb->lwb_alloc_txg, <=, spa_syncing_txg(zilog->zl_spa)); ASSERT3U(lwb->lwb_max_txg, <=, spa_syncing_txg(zilog->zl_spa)); - ASSERT(lwb->lwb_state == LWB_STATE_NEW || - lwb->lwb_state == LWB_STATE_FLUSH_DONE); + VERIFY(list_is_empty(&lwb->lwb_itxs)); + VERIFY(list_is_empty(&lwb->lwb_waiters)); + ASSERT(avl_is_empty(&lwb->lwb_vdev_tree)); + ASSERT(!MUTEX_HELD(&lwb->lwb_vdev_lock)); /* * Clear the zilog's field to indicate this lwb is no longer @@ -1329,6 +1329,9 @@ zil_lwb_add_block(lwb_t *lwb, const blkptr_t *bp) int ndvas = BP_GET_NDVAS(bp); int i; + ASSERT3S(lwb->lwb_state, !=, LWB_STATE_WRITE_DONE); + ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE); + if (zil_nocacheflush) return; @@ -1408,15 +1411,9 @@ zil_lwb_flush_vdevs_done(zio_t *zio) zilog_t *zilog = lwb->lwb_zilog; zil_commit_waiter_t *zcw; itx_t *itx; - uint64_t txg; - list_t itxs, waiters; spa_config_exit(zilog->zl_spa, SCL_STATE, lwb); - list_create(&itxs, sizeof (itx_t), offsetof(itx_t, itx_node)); - list_create(&waiters, sizeof (zil_commit_waiter_t), - offsetof(zil_commit_waiter_t, zcw_node)); - hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp; mutex_enter(&zilog->zl_lock); @@ -1425,6 +1422,9 @@ zil_lwb_flush_vdevs_done(zio_t *zio) lwb->lwb_root_zio = NULL; + ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE); + lwb->lwb_state = LWB_STATE_FLUSH_DONE; + if (zilog->zl_last_lwb_opened == lwb) { /* * Remember the highest committed log sequence number @@ -1435,22 +1435,13 @@ zil_lwb_flush_vdevs_done(zio_t *zio) zilog->zl_commit_lr_seq = zilog->zl_lr_seq; } - list_move_tail(&itxs, &lwb->lwb_itxs); - list_move_tail(&waiters, &lwb->lwb_waiters); - txg = lwb->lwb_issued_txg; - - ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE); - lwb->lwb_state = LWB_STATE_FLUSH_DONE; - - mutex_exit(&zilog->zl_lock); - - while ((itx = list_remove_head(&itxs)) != NULL) + while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL) zil_itx_destroy(itx); - list_destroy(&itxs); - while ((zcw = list_remove_head(&waiters)) != NULL) { + while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) { mutex_enter(&zcw->zcw_lock); + ASSERT3P(zcw->zcw_lwb, ==, lwb); zcw->zcw_lwb = NULL; /* * We expect any ZIO errors from child ZIOs to have been @@ -1475,7 +1466,11 @@ zil_lwb_flush_vdevs_done(zio_t *zio) mutex_exit(&zcw->zcw_lock); } - list_destroy(&waiters); + + uint64_t txg = lwb->lwb_issued_txg; + + /* Once we drop the lock, lwb may be freed by zil_sync(). */ + mutex_exit(&zilog->zl_lock); mutex_enter(&zilog->zl_lwb_io_lock); ASSERT3U(zilog->zl_lwb_inflight[txg & TXG_MASK], >, 0); @@ -1929,10 +1924,10 @@ next_lwb: BP_GET_LSIZE(&lwb->lwb_blk)); } lwb->lwb_issued_timestamp = gethrtime(); - zio_nowait(lwb->lwb_root_zio); - zio_nowait(lwb->lwb_write_zio); if (lwb->lwb_child_zio) zio_nowait(lwb->lwb_child_zio); + zio_nowait(lwb->lwb_write_zio); + zio_nowait(lwb->lwb_root_zio); /* * If nlwb was ready when we gave it the block pointer, diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index 160a0ca2e6db..4248578cde16 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -86,6 +86,7 @@ typeset -a properties=( "feature@log_spacemap" "feature@device_rebuild" "feature@draid" + "feature@redaction_list_spill" ) if is_linux || is_freebsd; then diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh index 3386643b295e..f2150be3bc95 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh @@ -27,7 +27,6 @@ # second (the last block in the file) is common to them all. # 2. Verify a redacted stream with a reasonable redaction list length can # be correctly processed. -# 3. Verify that if the list is too long, the send fails gracefully. # typeset ds_name="many_clones" @@ -56,13 +55,18 @@ for i in {1..64}; do log_must zfs snapshot ${clone}$i@snap done -# The limit isn't necessarily 32 snapshots. The maximum number of snapshots in +# The limit isn't necessarily 64 snapshots. The maximum number of snapshots in # the redacted list is determined in dsl_bookmark_create_redacted_check(). -log_must zfs redact $sendfs@snap book1 $clone{1..32}@snap +log_must zfs redact $sendfs@snap book1 $clone{1..64}@snap log_must eval "zfs send --redact book1 $sendfs@snap >$stream" log_must eval "zfs recv $recvfs <$stream" compare_files $sendfs $recvfs "f2" "$RANGE8" -log_mustnot zfs redact $sendfs@snap book2 $clone{1..64}@snap +rls_value="$(zpool get -H -o value feature@redaction_list_spill $POOL)" +if [ "$rls_value" = "active" ]; then + log_note "redaction_list_spill feature active" +else + log_fail "redaction_list_spill feature not active" +fi log_pass "Redacted send can deal with a large redaction list." diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index 08092e5e169c..49b699b24018 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -1095,7 +1095,7 @@ /* #undef ZFS_IS_GPL_COMPATIBLE */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.2.99-FreeBSD_g804414aad" +#define ZFS_META_ALIAS "zfs-2.2.99-FreeBSD_g95f71c019" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -1125,7 +1125,7 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "FreeBSD_g804414aad" +#define ZFS_META_RELEASE "FreeBSD_g95f71c019" /* Define the project version. */ #define ZFS_META_VERSION "2.2.99" diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 00a5acf6e4e4..f477478a7f7d 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.2.99-68-g804414aad" +#define ZFS_META_GITREV "zfs-2.2.99-81-g95f71c019" |