aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c4
-rw-r--r--sys/contrib/openzfs/cmd/zed/zed_event.c31
-rw-r--r--sys/contrib/openzfs/cmd/zpool/compatibility.d/grub21
-rwxr-xr-xsys/contrib/openzfs/cmd/zpool/zpool.d/ses12
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_vdev.c4
-rw-r--r--sys/contrib/openzfs/configure.ac2
-rw-r--r--sys/contrib/openzfs/include/libzutil.h2
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/taskq.h6
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h2
-rw-r--r--sys/contrib/openzfs/include/sys/dataset_kstats.h1
-rw-r--r--sys/contrib/openzfs/include/sys/spa.h5
-rw-r--r--sys/contrib/openzfs/include/sys/spa_impl.h12
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_impl.h5
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_context.h2
-rw-r--r--sys/contrib/openzfs/include/sys/zio.h6
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs.abi7
-rw-r--r--sys/contrib/openzfs/lib/libzpool/taskq.c30
-rw-r--r--sys/contrib/openzfs/lib/libzutil/os/freebsd/zutil_import_os.c9
-rw-r--r--sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c17
-rw-r--r--sys/contrib/openzfs/man/man4/zfs.422
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c38
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c112
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c3
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c1
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c36
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/abd.c9
-rw-r--r--sys/contrib/openzfs/module/zfs/dataset_kstats.c12
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf.c9
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_objset.c138
-rw-r--r--sys/contrib/openzfs/module/zfs/dnode_sync.c1
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_dataset.c5
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_pool.c31
-rw-r--r--sys/contrib/openzfs/module/zfs/sa.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c190
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_misc.c14
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_queue.c7
-rw-r--r--sys/contrib/openzfs/module/zfs/zio.c43
-rw-r--r--sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in90
-rw-r--r--sys/modules/zfs/zfs_config.h4
-rw-r--r--sys/modules/zfs/zfs_gitrev.h2
42 files changed, 755 insertions, 177 deletions
diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
index 9636c99fc85f..69163b80bd5a 100644
--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
@@ -233,8 +233,12 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
}
(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
+
+ update_vdev_config_dev_sysfs_path(vdev, path,
+ ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
&enc_sysfs_path);
+
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_FAULTED, &faulted);
diff --git a/sys/contrib/openzfs/cmd/zed/zed_event.c b/sys/contrib/openzfs/cmd/zed/zed_event.c
index c60d5a4bc22e..7e5867692234 100644
--- a/sys/contrib/openzfs/cmd/zed/zed_event.c
+++ b/sys/contrib/openzfs/cmd/zed/zed_event.c
@@ -35,6 +35,7 @@
#include "zed_strings.h"
#include "agents/zfs_agents.h"
+#include <libzutil.h>
#define MAXBUF 4096
@@ -922,6 +923,25 @@ _zed_event_add_time_strings(uint64_t eid, zed_strings_t *zsp, int64_t etime[])
}
}
+
+static void
+_zed_event_update_enc_sysfs_path(nvlist_t *nvl)
+{
+ const char *vdev_path;
+
+ if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH,
+ &vdev_path) != 0) {
+ return; /* some other kind of event, ignore it */
+ }
+
+ if (vdev_path == NULL) {
+ return;
+ }
+
+ update_vdev_config_dev_sysfs_path(nvl, vdev_path,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH);
+}
+
/*
* Service the next zevent, blocking until one is available.
*/
@@ -969,6 +989,17 @@ zed_event_service(struct zed_conf *zcp)
zed_log_msg(LOG_WARNING,
"Failed to lookup zevent class (eid=%llu)", eid);
} else {
+ /*
+ * Special case: If we can dynamically detect an enclosure sysfs
+ * path, then use that value rather than the one stored in the
+ * vd->vdev_enc_sysfs_path. There have been rare cases where
+ * vd->vdev_enc_sysfs_path becomes outdated. However, there
+ * will be other times when we can not dynamically detect the
+ * sysfs path (like if a disk disappears) and have to rely on
+ * the old value for things like turning on the fault LED.
+ */
+ _zed_event_update_enc_sysfs_path(nvl);
+
/* let internal modules see this event first */
zfs_agent_post_event(class, NULL, nvl);
diff --git a/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2 b/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2
index 8c3a3ce11d94..6d60e643593b 100644
--- a/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2
+++ b/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2
@@ -14,7 +14,6 @@ large_blocks
livelist
log_spacemap
lz4_compress
-obsolete_counts
project_quota
resilver_defer
spacemap_histogram
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool.d/ses b/sys/contrib/openzfs/cmd/zpool/zpool.d/ses
index 638145c95d47..19ef92ad67b2 100755
--- a/sys/contrib/openzfs/cmd/zpool/zpool.d/ses
+++ b/sys/contrib/openzfs/cmd/zpool/zpool.d/ses
@@ -33,10 +33,18 @@ for i in $scripts ; do
val=""
case $i in
enc)
- val=$(ls "$VDEV_ENC_SYSFS_PATH/../../" 2>/dev/null)
+ if echo "$VDEV_ENC_SYSFS_PATH" | grep -q '/sys/bus/pci/slots' ; then
+ val="$VDEV_ENC_SYSFS_PATH"
+ else
+ val="$(ls """$VDEV_ENC_SYSFS_PATH/../../""" 2>/dev/null)"
+ fi
;;
slot)
- val=$(cat "$VDEV_ENC_SYSFS_PATH/slot" 2>/dev/null)
+ if echo "$VDEV_ENC_SYSFS_PATH" | grep -q '/sys/bus/pci/slots' ; then
+ val="$(basename """$VDEV_ENC_SYSFS_PATH""")"
+ else
+ val="$(cat """$VDEV_ENC_SYSFS_PATH/slot""" 2>/dev/null)"
+ fi
;;
encdev)
val=$(ls "$VDEV_ENC_SYSFS_PATH/../device/scsi_generic" 2>/dev/null)
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c
index 3d0fc089c32f..fbd4b81dfacc 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c
@@ -372,6 +372,10 @@ make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary)
verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
+ /* Lookup and add the enclosure sysfs path (if exists) */
+ update_vdev_config_dev_sysfs_path(vdev, path,
+ ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
+
if (strcmp(type, VDEV_TYPE_DISK) == 0)
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
(uint64_t)wholedisk) == 0);
diff --git a/sys/contrib/openzfs/configure.ac b/sys/contrib/openzfs/configure.ac
index 4c75616e4299..99db7a1e35f6 100644
--- a/sys/contrib/openzfs/configure.ac
+++ b/sys/contrib/openzfs/configure.ac
@@ -44,7 +44,7 @@ AM_INIT_AUTOMAKE([subdir-objects foreign])
# Remove default macros from config.h:
# PACKAGE, PACKAGE_{BUGREPORT,NAME,STRING,TARNAME,VERSION}, STDC_HEADERS, VERSION
AC_CONFIG_HEADERS([zfs_config.h], [
- sed -nri~ -e '/^$/be' -e 'N;N;/#define (PACKAGE|VERSION|STDC_HEADERS)/d' -e ':e' -e 'p' zfs_config.h && rm zfs_config.h~ || exit])
+ $SED -nri~ -e '/^$/be' -e 'N;N;/#define (PACKAGE|VERSION|STDC_HEADERS)/d' -e ':e' -e 'p' zfs_config.h && rm zfs_config.h~ || exit])
LT_INIT
AC_PROG_INSTALL
diff --git a/sys/contrib/openzfs/include/libzutil.h b/sys/contrib/openzfs/include/libzutil.h
index 053b1ed4b52a..9842c225b6f0 100644
--- a/sys/contrib/openzfs/include/libzutil.h
+++ b/sys/contrib/openzfs/include/libzutil.h
@@ -208,6 +208,8 @@ int for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func,
int for_each_vdev_in_nvlist(nvlist_t *nvroot, pool_vdev_iter_f func,
void *data);
void update_vdevs_config_dev_sysfs_path(nvlist_t *config);
+_LIBZUTIL_H void update_vdev_config_dev_sysfs_path(nvlist_t *nv,
+ const char *path, const char *key);
#ifdef __cplusplus
}
#endif
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/taskq.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/taskq.h
index b23a939b3aa7..fa6ff84a0134 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/taskq.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/taskq.h
@@ -42,6 +42,7 @@ extern "C" {
typedef struct taskq {
struct taskqueue *tq_queue;
+ int tq_nthreads;
} taskq_t;
typedef uintptr_t taskqid_t;
@@ -81,7 +82,6 @@ typedef struct taskq_ent {
#define TASKQID_INVALID ((taskqid_t)0)
-#define taskq_init_ent(x)
extern taskq_t *system_taskq;
/* Global dynamic task queue for long delay */
extern taskq_t *system_delay_taskq;
@@ -92,7 +92,10 @@ extern taskqid_t taskq_dispatch_delay(taskq_t *, task_func_t, void *,
extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
taskq_ent_t *);
extern int taskq_empty_ent(taskq_ent_t *);
+extern void taskq_init_ent(taskq_ent_t *);
taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
+taskq_t *taskq_create_synced(const char *, int, pri_t, int, int, uint_t,
+ kthread_t ***);
taskq_t *taskq_create_instance(const char *, int, int, pri_t, int, int, uint_t);
taskq_t *taskq_create_proc(const char *, int, pri_t, int, int,
struct proc *, uint_t);
@@ -117,6 +120,7 @@ void taskq_resume(taskq_t *);
#endif /* _KERNEL */
#ifdef _STANDALONE
+typedef void taskq_t;
typedef int taskq_ent_t;
#define taskq_init_ent(x)
#endif /* _STANDALONE */
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h b/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
index 6c1b4377a98a..aa5860c56e83 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
@@ -150,6 +150,8 @@ extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
extern int taskq_empty_ent(taskq_ent_t *);
extern void taskq_init_ent(taskq_ent_t *);
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
+extern taskq_t *taskq_create_synced(const char *, int, pri_t, int, int, uint_t,
+ kthread_t ***);
extern void taskq_destroy(taskq_t *);
extern void taskq_wait_id(taskq_t *, taskqid_t);
extern void taskq_wait_outstanding(taskq_t *, taskqid_t);
diff --git a/sys/contrib/openzfs/include/sys/dataset_kstats.h b/sys/contrib/openzfs/include/sys/dataset_kstats.h
index 40cf5258a2e7..c81a07f0c116 100644
--- a/sys/contrib/openzfs/include/sys/dataset_kstats.h
+++ b/sys/contrib/openzfs/include/sys/dataset_kstats.h
@@ -71,6 +71,7 @@ typedef struct dataset_kstats {
int dataset_kstats_create(dataset_kstats_t *, objset_t *);
void dataset_kstats_destroy(dataset_kstats_t *);
+void dataset_kstats_rename(dataset_kstats_t *dk, const char *);
void dataset_kstats_update_write_kstats(dataset_kstats_t *, int64_t);
void dataset_kstats_update_read_kstats(dataset_kstats_t *, int64_t);
diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h
index 88ef510b744b..cef7933df441 100644
--- a/sys/contrib/openzfs/include/sys/spa.h
+++ b/sys/contrib/openzfs/include/sys/spa.h
@@ -825,6 +825,11 @@ extern void spa_sync_allpools(void);
extern uint_t zfs_sync_pass_deferred_free;
+/* spa sync taskqueues */
+taskq_t *spa_sync_tq_create(spa_t *spa, const char *name);
+void spa_sync_tq_destroy(spa_t *spa);
+void spa_select_allocator(zio_t *zio);
+
/* spa namespace global mutex */
extern kmutex_t spa_namespace_lock;
diff --git a/sys/contrib/openzfs/include/sys/spa_impl.h b/sys/contrib/openzfs/include/sys/spa_impl.h
index 094258d47a48..b1eb06f94fcc 100644
--- a/sys/contrib/openzfs/include/sys/spa_impl.h
+++ b/sys/contrib/openzfs/include/sys/spa_impl.h
@@ -188,6 +188,12 @@ typedef struct spa_taskqs {
taskq_t **stqs_taskq;
} spa_taskqs_t;
+/* one for each thread in the spa sync taskq */
+typedef struct spa_syncthread_info {
+ kthread_t *sti_thread;
+ taskq_t *sti_wr_iss_tq; /* assigned wr_iss taskq */
+} spa_syncthread_info_t;
+
typedef enum spa_all_vdev_zap_action {
AVZ_ACTION_NONE = 0,
AVZ_ACTION_DESTROY, /* Destroy all per-vdev ZAPs and the AVZ. */
@@ -265,6 +271,10 @@ struct spa {
int spa_alloc_count;
int spa_active_allocator; /* selectable allocator */
+ /* per-allocator sync thread taskqs */
+ taskq_t *spa_sync_tq;
+ spa_syncthread_info_t *spa_syncthreads;
+
spa_aux_vdev_t spa_spares; /* hot spares */
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
nvlist_t *spa_label_features; /* Features for reading MOS */
@@ -456,7 +466,7 @@ extern char *spa_config_path;
extern const char *zfs_deadman_failmode;
extern uint_t spa_slop_shift;
extern void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
- task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent);
+ task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent, zio_t *zio);
extern void spa_taskq_dispatch_sync(spa_t *, zio_type_t t, zio_taskq_type_t q,
task_func_t *func, void *arg, uint_t flags);
extern void spa_load_spares(spa_t *spa);
diff --git a/sys/contrib/openzfs/include/sys/vdev_impl.h b/sys/contrib/openzfs/include/sys/vdev_impl.h
index ad9dc3aefd8e..3f2312c23438 100644
--- a/sys/contrib/openzfs/include/sys/vdev_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_impl.h
@@ -131,7 +131,10 @@ typedef const struct vdev_ops {
* Virtual device properties
*/
typedef union vdev_queue_class {
- list_t vqc_list;
+ struct {
+ ulong_t vqc_list_numnodes;
+ list_t vqc_list;
+ };
avl_tree_t vqc_tree;
} vdev_queue_class_t;
diff --git a/sys/contrib/openzfs/include/sys/zfs_context.h b/sys/contrib/openzfs/include/sys/zfs_context.h
index 750ca612b962..9ec2f73b366c 100644
--- a/sys/contrib/openzfs/include/sys/zfs_context.h
+++ b/sys/contrib/openzfs/include/sys/zfs_context.h
@@ -496,6 +496,8 @@ extern taskq_t *system_taskq;
extern taskq_t *system_delay_taskq;
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
+extern taskq_t *taskq_create_synced(const char *, int, pri_t, int, int, uint_t,
+ kthread_t ***);
#define taskq_create_proc(a, b, c, d, e, p, f) \
(taskq_create(a, b, c, d, e, f))
#define taskq_create_sysdc(a, b, d, e, p, dc, f) \
diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h
index e1f4d5c04499..25a4b221f05e 100644
--- a/sys/contrib/openzfs/include/sys/zio.h
+++ b/sys/contrib/openzfs/include/sys/zio.h
@@ -223,6 +223,9 @@ typedef uint64_t zio_flag_t;
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
#define ZIO_FLAG_DELEGATED (1ULL << 30)
+#define ZIO_ALLOCATOR_NONE (-1)
+#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)
+
#define ZIO_FLAG_MUSTSUCCEED 0
#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
@@ -526,6 +529,9 @@ struct zio {
/* Taskq dispatching state */
taskq_ent_t io_tqent;
+
+ /* write issue taskq selection, based upon sync thread */
+ taskq_t *io_wr_iss_tq;
};
enum blk_verify_flag {
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
index f8b0d395161b..bcdcff97666e 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
@@ -260,6 +260,7 @@
<elf-symbol name='tpool_suspended' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='tpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='update_vdev_config_dev_strs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+ <elf-symbol name='update_vdev_config_dev_sysfs_path' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='use_color' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='vdev_expand_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='vdev_name_to_prop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@@ -8332,6 +8333,12 @@
<parameter type-id='b59d7dce' name='buflen'/>
<return type-id='95e97e5e'/>
</function-decl>
+ <function-decl name='update_vdev_config_dev_sysfs_path' mangled-name='update_vdev_config_dev_sysfs_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='update_vdev_config_dev_sysfs_path'>
+ <parameter type-id='5ce45b60' name='nv'/>
+ <parameter type-id='80f4b756' name='path'/>
+ <parameter type-id='80f4b756' name='key'/>
+ <return type-id='48b5725f'/>
+ </function-decl>
<function-type size-in-bits='64' id='2ec2411e'>
<parameter type-id='eaa32e2f'/>
<parameter type-id='5ce45b60'/>
diff --git a/sys/contrib/openzfs/lib/libzpool/taskq.c b/sys/contrib/openzfs/lib/libzpool/taskq.c
index a2e457ef9e60..99a181ec3c93 100644
--- a/sys/contrib/openzfs/lib/libzpool/taskq.c
+++ b/sys/contrib/openzfs/lib/libzpool/taskq.c
@@ -337,6 +337,36 @@ taskq_destroy(taskq_t *tq)
kmem_free(tq, sizeof (taskq_t));
}
+/*
+ * Create a taskq with a specified number of pool threads. Allocate
+ * and return an array of nthreads kthread_t pointers, one for each
+ * thread in the pool. The array is not ordered and must be freed
+ * by the caller.
+ */
+taskq_t *
+taskq_create_synced(const char *name, int nthreads, pri_t pri,
+ int minalloc, int maxalloc, uint_t flags, kthread_t ***ktpp)
+{
+ taskq_t *tq;
+ kthread_t **kthreads = kmem_zalloc(sizeof (*kthreads) * nthreads,
+ KM_SLEEP);
+
+ (void) pri; (void) minalloc; (void) maxalloc;
+
+ flags &= ~(TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT | TASKQ_DC_BATCH);
+
+ tq = taskq_create(name, nthreads, minclsyspri, nthreads, INT_MAX,
+ flags | TASKQ_PREPOPULATE);
+ VERIFY(tq != NULL);
+ VERIFY(tq->tq_nthreads == nthreads);
+
+ for (int i = 0; i < nthreads; i++) {
+ kthreads[i] = tq->tq_threadlist[i];
+ }
+ *ktpp = kthreads;
+ return (tq);
+}
+
int
taskq_member(taskq_t *tq, kthread_t *t)
{
diff --git a/sys/contrib/openzfs/lib/libzutil/os/freebsd/zutil_import_os.c b/sys/contrib/openzfs/lib/libzutil/os/freebsd/zutil_import_os.c
index 19ba58e79a03..a134c173bc89 100644
--- a/sys/contrib/openzfs/lib/libzutil/os/freebsd/zutil_import_os.c
+++ b/sys/contrib/openzfs/lib/libzutil/os/freebsd/zutil_import_os.c
@@ -250,6 +250,15 @@ zfs_dev_flush(int fd)
}
void
+update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path,
+ const char *key)
+{
+ (void) nv;
+ (void) path;
+ (void) key;
+}
+
+void
update_vdevs_config_dev_sysfs_path(nvlist_t *config)
{
(void) config;
diff --git a/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c b/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c
index 44ed697dd490..fbfae4f7e685 100644
--- a/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c
+++ b/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c
@@ -766,9 +766,12 @@ no_dev:
* Rescan the enclosure sysfs path for turning on enclosure LEDs and store it
* in the nvlist * (if applicable). Like:
* vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
+ *
+ * key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH)
*/
-static void
-update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path)
+void
+update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path,
+ const char *key)
{
char *upath, *spath;
@@ -777,9 +780,9 @@ update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path)
spath = zfs_get_enclosure_sysfs_path(upath);
if (spath) {
- nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, spath);
+ (void) nvlist_add_string(nv, key, spath);
} else {
- nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
+ (void) nvlist_remove_all(nv, key);
}
free(upath);
@@ -799,7 +802,8 @@ sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data)
return (1);
/* Rescan our enclosure sysfs path for this vdev */
- update_vdev_config_dev_sysfs_path(nv, path);
+ update_vdev_config_dev_sysfs_path(nv, path,
+ ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
return (0);
}
@@ -888,7 +892,8 @@ update_vdev_config_dev_strs(nvlist_t *nv)
(void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
vds.vds_devphys);
}
- update_vdev_config_dev_sysfs_path(nv, path);
+ update_vdev_config_dev_sysfs_path(nv, path,
+ ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
} else {
/* Clear out any stale entries. */
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
index ddad00be412d..f9824ac170ea 100644
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@@ -496,6 +496,13 @@ If we have less than this amount of free space,
most ZPL operations (e.g. write, create) will return
.Sy ENOSPC .
.
+.It Sy spa_num_allocators Ns = Ns Sy 4 Pq int
+Determines the number of block alloctators to use per spa instance.
+Capped by the number of actual CPUs in the system.
+.Pp
+Note that setting this value too high could result in performance
+degredation and/or excess fragmentation.
+.
.It Sy spa_upgrade_errlog_limit Ns = Ns Sy 0 Pq uint
Limits the number of on-disk error log entries that will be converted to the
new format when enabling the
@@ -1974,13 +1981,6 @@ and may need to load new metaslabs to satisfy these allocations.
.It Sy zfs_sync_pass_rewrite Ns = Ns Sy 2 Pq uint
Rewrite new block pointers starting in this pass.
.
-.It Sy zfs_sync_taskq_batch_pct Ns = Ns Sy 75 Ns % Pq int
-This controls the number of threads used by
-.Sy dp_sync_taskq .
-The default value of
-.Sy 75%
-will create a maximum of one thread per CPU.
-.
.It Sy zfs_trim_extent_bytes_max Ns = Ns Sy 134217728 Ns B Po 128 MiB Pc Pq uint
Maximum size of TRIM command.
Larger ranges will be split into chunks no larger than this value before
@@ -2265,6 +2265,14 @@ If
.Sy 0 ,
generate a system-dependent value close to 6 threads per taskq.
.
+.It Sy zio_taskq_wr_iss_ncpus Ns = Ns Sy 0 Pq uint
+Determines the number of CPUs to run write issue taskqs.
+.Pp
+When 0 (the default), the value to use is computed internally
+as the number of actual CPUs in the system divided by the
+.Sy spa_num_allocators
+value.
+.
.It Sy zvol_inhibit_dev Ns = Ns Sy 0 Ns | Ns 1 Pq uint
Do not create zvol device nodes.
This may slightly improve startup time on
diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c
index 9f5f92e194ec..43cd4da02e30 100644
--- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c
+++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c
@@ -187,19 +187,18 @@ kstat_sysctl_dataset_string(SYSCTL_HANDLER_ARGS)
static int
kstat_sysctl_io(SYSCTL_HANDLER_ARGS)
{
- struct sbuf *sb;
+ struct sbuf sb;
kstat_t *ksp = arg1;
kstat_io_t *kip = ksp->ks_data;
int rc;
- sb = sbuf_new_auto();
- if (sb == NULL)
- return (ENOMEM);
+ sbuf_new_for_sysctl(&sb, NULL, 0, req);
+
/* Update the aggsums before reading */
(void) ksp->ks_update(ksp, KSTAT_READ);
/* though wlentime & friends are signed, they will never be negative */
- sbuf_printf(sb,
+ sbuf_printf(&sb,
"%-8llu %-8llu %-8u %-8u %-8llu %-8llu "
"%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n",
kip->nread, kip->nwritten,
@@ -207,25 +206,21 @@ kstat_sysctl_io(SYSCTL_HANDLER_ARGS)
kip->wtime, kip->wlentime, kip->wlastupdate,
kip->rtime, kip->rlentime, kip->rlastupdate,
kip->wcnt, kip->rcnt);
- rc = sbuf_finish(sb);
- if (rc == 0)
- rc = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
- sbuf_delete(sb);
+ rc = sbuf_finish(&sb);
+ sbuf_delete(&sb);
return (rc);
}
static int
kstat_sysctl_raw(SYSCTL_HANDLER_ARGS)
{
- struct sbuf *sb;
+ struct sbuf sb;
void *data;
kstat_t *ksp = arg1;
void *(*addr_op)(kstat_t *ksp, loff_t index);
int n, has_header, rc = 0;
- sb = sbuf_new_auto();
- if (sb == NULL)
- return (ENOMEM);
+ sbuf_new_for_sysctl(&sb, NULL, PAGE_SIZE, req);
if (ksp->ks_raw_ops.addr)
addr_op = ksp->ks_raw_ops.addr;
@@ -258,8 +253,10 @@ restart_headers:
if (has_header) {
if (rc == ENOMEM && !kstat_resize_raw(ksp))
goto restart_headers;
- if (rc == 0)
- sbuf_printf(sb, "\n%s", ksp->ks_raw_buf);
+ if (rc == 0) {
+ sbuf_cat(&sb, "\n");
+ sbuf_cat(&sb, ksp->ks_raw_buf);
+ }
}
while ((data = addr_op(ksp, n)) != NULL) {
@@ -270,22 +267,19 @@ restart:
if (rc == ENOMEM && !kstat_resize_raw(ksp))
goto restart;
if (rc == 0)
- sbuf_printf(sb, "%s", ksp->ks_raw_buf);
+ sbuf_cat(&sb, ksp->ks_raw_buf);
} else {
ASSERT3U(ksp->ks_ndata, ==, 1);
- sbuf_hexdump(sb, ksp->ks_data,
+ sbuf_hexdump(&sb, ksp->ks_data,
ksp->ks_data_size, NULL, 0);
}
n++;
}
free(ksp->ks_raw_buf, M_TEMP);
mutex_exit(ksp->ks_lock);
- sbuf_trim(sb);
- rc = sbuf_finish(sb);
- if (rc == 0)
- rc = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
- sbuf_delete(sb);
+ rc = sbuf_finish(&sb);
+ sbuf_delete(&sb);
return (rc);
}
diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c
index 3fba5ed3c228..a005debaf6e3 100644
--- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c
+++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c
@@ -220,6 +220,7 @@ taskq_create_impl(const char *name, int nthreads, pri_t pri,
nthreads = MAX((mp_ncpus * nthreads) / 100, 1);
tq = kmem_alloc(sizeof (*tq), KM_SLEEP);
+ tq->tq_nthreads = nthreads;
tq->tq_queue = taskqueue_create(name, M_WAITOK,
taskqueue_thread_enqueue, &tq->tq_queue);
taskqueue_set_callback(tq->tq_queue, TASKQUEUE_CALLBACK_TYPE_INIT,
@@ -254,6 +255,87 @@ taskq_destroy(taskq_t *tq)
kmem_free(tq, sizeof (*tq));
}
+static void taskq_sync_assign(void *arg);
+
+typedef struct taskq_sync_arg {
+ kthread_t *tqa_thread;
+ kcondvar_t tqa_cv;
+ kmutex_t tqa_lock;
+ int tqa_ready;
+} taskq_sync_arg_t;
+
+static void
+taskq_sync_assign(void *arg)
+{
+ taskq_sync_arg_t *tqa = arg;
+
+ mutex_enter(&tqa->tqa_lock);
+ tqa->tqa_thread = curthread;
+ tqa->tqa_ready = 1;
+ cv_signal(&tqa->tqa_cv);
+ while (tqa->tqa_ready == 1)
+ cv_wait(&tqa->tqa_cv, &tqa->tqa_lock);
+ mutex_exit(&tqa->tqa_lock);
+}
+
+/*
+ * Create a taskq with a specified number of pool threads. Allocate
+ * and return an array of nthreads kthread_t pointers, one for each
+ * thread in the pool. The array is not ordered and must be freed
+ * by the caller.
+ */
+taskq_t *
+taskq_create_synced(const char *name, int nthreads, pri_t pri,
+ int minalloc, int maxalloc, uint_t flags, kthread_t ***ktpp)
+{
+ taskq_t *tq;
+ taskq_sync_arg_t *tqs = kmem_zalloc(sizeof (*tqs) * nthreads, KM_SLEEP);
+ kthread_t **kthreads = kmem_zalloc(sizeof (*kthreads) * nthreads,
+ KM_SLEEP);
+
+ flags &= ~(TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT | TASKQ_DC_BATCH);
+
+ tq = taskq_create(name, nthreads, minclsyspri, nthreads, INT_MAX,
+ flags | TASKQ_PREPOPULATE);
+ VERIFY(tq != NULL);
+ VERIFY(tq->tq_nthreads == nthreads);
+
+ /* spawn all syncthreads */
+ for (int i = 0; i < nthreads; i++) {
+ cv_init(&tqs[i].tqa_cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&tqs[i].tqa_lock, NULL, MUTEX_DEFAULT, NULL);
+ (void) taskq_dispatch(tq, taskq_sync_assign,
+ &tqs[i], TQ_FRONT);
+ }
+
+ /* wait on all syncthreads to start */
+ for (int i = 0; i < nthreads; i++) {
+ mutex_enter(&tqs[i].tqa_lock);
+ while (tqs[i].tqa_ready == 0)
+ cv_wait(&tqs[i].tqa_cv, &tqs[i].tqa_lock);
+ mutex_exit(&tqs[i].tqa_lock);
+ }
+
+ /* let all syncthreads resume, finish */
+ for (int i = 0; i < nthreads; i++) {
+ mutex_enter(&tqs[i].tqa_lock);
+ tqs[i].tqa_ready = 2;
+ cv_broadcast(&tqs[i].tqa_cv);
+ mutex_exit(&tqs[i].tqa_lock);
+ }
+ taskq_wait(tq);
+
+ for (int i = 0; i < nthreads; i++) {
+ kthreads[i] = tqs[i].tqa_thread;
+ mutex_destroy(&tqs[i].tqa_lock);
+ cv_destroy(&tqs[i].tqa_cv);
+ }
+ kmem_free(tqs, sizeof (*tqs) * nthreads);
+
+ *ktpp = kthreads;
+ return (tq);
+}
+
int
taskq_member(taskq_t *tq, kthread_t *thread)
{
@@ -398,22 +480,34 @@ void
taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint32_t flags,
taskq_ent_t *task)
{
- int prio;
-
/*
* If TQ_FRONT is given, we want higher priority for this task, so it
* can go at the front of the queue.
*/
- prio = !!(flags & TQ_FRONT);
- task->tqent_id = 0;
+ task->tqent_task.ta_priority = !!(flags & TQ_FRONT);
task->tqent_func = func;
task->tqent_arg = arg;
-
- TASK_INIT(&task->tqent_task, prio, taskq_run_ent, task);
taskqueue_enqueue(tq->tq_queue, &task->tqent_task);
}
void
+taskq_init_ent(taskq_ent_t *task)
+{
+ TASK_INIT(&task->tqent_task, 0, taskq_run_ent, task);
+ task->tqent_func = NULL;
+ task->tqent_arg = NULL;
+ task->tqent_id = 0;
+ task->tqent_type = NORMAL_TASK;
+ task->tqent_rc = 0;
+}
+
+int
+taskq_empty_ent(taskq_ent_t *task)
+{
+ return (task->tqent_task.ta_pending == 0);
+}
+
+void
taskq_wait(taskq_t *tq)
{
taskqueue_quiesce(tq->tq_queue);
@@ -439,9 +533,3 @@ taskq_wait_outstanding(taskq_t *tq, taskqid_t id __unused)
{
taskqueue_drain_all(tq->tq_queue);
}
-
-int
-taskq_empty_ent(taskq_ent_t *t)
-{
- return (t->tqent_task.ta_pending == 0);
-}
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c
index 9a268573528c..00c1acf57710 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c
@@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
#include <sys/spa_impl.h>
#include <sys/stat.h>
#include <sys/sunddi.h>
+#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/taskqueue.h>
#include <sys/uio.h>
@@ -336,6 +337,8 @@ static moduledata_t zfs_mod = {
EVENTHANDLER_DEFINE(mountroot, spa_boot_init, NULL, 0);
#endif
+FEATURE(zfs, "OpenZFS support");
+
DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_CLOCKS, SI_ORDER_ANY);
MODULE_VERSION(zfsctrl, 1);
#if __FreeBSD_version > 1300092
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
index 0830e1c26546..6a7c2d2811b1 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
@@ -1319,6 +1319,7 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
}
}
strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
+ dataset_kstats_rename(&zv->zv_kstat, newname);
}
/*
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
index d18f935b167c..79a1a8e5a5aa 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
@@ -1262,6 +1262,42 @@ taskq_destroy(taskq_t *tq)
}
EXPORT_SYMBOL(taskq_destroy);
+/*
+ * Create a taskq with a specified number of pool threads. Allocate
+ * and return an array of nthreads kthread_t pointers, one for each
+ * thread in the pool. The array is not ordered and must be freed
+ * by the caller.
+ */
+taskq_t *
+taskq_create_synced(const char *name, int nthreads, pri_t pri,
+ int minalloc, int maxalloc, uint_t flags, kthread_t ***ktpp)
+{
+ taskq_t *tq;
+ taskq_thread_t *tqt;
+ int i = 0;
+ kthread_t **kthreads = kmem_zalloc(sizeof (*kthreads) * nthreads,
+ KM_SLEEP);
+
+ flags &= ~(TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT | TASKQ_DC_BATCH);
+
+ /* taskq_create spawns all the threads before returning */
+ tq = taskq_create(name, nthreads, minclsyspri, nthreads, INT_MAX,
+ flags | TASKQ_PREPOPULATE);
+ VERIFY(tq != NULL);
+ VERIFY(tq->tq_nthreads == nthreads);
+
+ list_for_each_entry(tqt, &tq->tq_thread_list, tqt_thread_list) {
+ kthreads[i] = tqt->tqt_thread;
+ i++;
+ }
+
+ ASSERT3S(i, ==, nthreads);
+ *ktpp = kthreads;
+
+ return (tq);
+}
+EXPORT_SYMBOL(taskq_create_synced);
+
static unsigned int spl_taskq_kick = 0;
/*
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
index a1db5c57c18b..2792bc027213 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@@ -1488,7 +1488,7 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
* read-only flag, pretend it was set, as done for snapshots.
*/
if (!canwrite)
- vfs->vfs_readonly = true;
+ vfs->vfs_readonly = B_TRUE;
error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
if (error) {
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
index fd0fd2c36dcd..384f5785a498 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
@@ -1528,6 +1528,8 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
*/
set_disk_ro(zv->zv_zso->zvo_disk, !readonly);
set_disk_ro(zv->zv_zso->zvo_disk, readonly);
+
+ dataset_kstats_rename(&zv->zv_kstat, newname);
}
void
diff --git a/sys/contrib/openzfs/module/zfs/abd.c b/sys/contrib/openzfs/module/zfs/abd.c
index bcc6ddd5e81b..0a2411a2d572 100644
--- a/sys/contrib/openzfs/module/zfs/abd.c
+++ b/sys/contrib/openzfs/module/zfs/abd.c
@@ -1025,7 +1025,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
size_t len, dlen;
struct abd_iter caiters[3];
struct abd_iter daiter;
- void *caddrs[3];
+ void *caddrs[3], *daddr;
unsigned long flags __maybe_unused = 0;
abd_t *c_cabds[3];
abd_t *c_dabd = NULL;
@@ -1057,10 +1057,13 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
if (dsize > 0) {
IMPLY(abd_is_gang(dabd), c_dabd != NULL);
abd_iter_map(&daiter);
+ daddr = daiter.iter_mapaddr;
len = MIN(daiter.iter_mapsize, len);
dlen = len;
- } else
+ } else {
+ daddr = NULL;
dlen = 0;
+ }
/* must be progressive */
ASSERT3U(len, >, 0);
@@ -1070,7 +1073,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
*/
ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
- func_raidz_gen(caddrs, daiter.iter_mapaddr, len, dlen);
+ func_raidz_gen(caddrs, daddr, len, dlen);
for (i = parity-1; i >= 0; i--) {
abd_iter_unmap(&caiters[i]);
diff --git a/sys/contrib/openzfs/module/zfs/dataset_kstats.c b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
index 767a461e0026..2ac058fd2c93 100644
--- a/sys/contrib/openzfs/module/zfs/dataset_kstats.c
+++ b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
@@ -199,6 +199,18 @@ dataset_kstats_destroy(dataset_kstats_t *dk)
}
void
+dataset_kstats_rename(dataset_kstats_t *dk, const char *name)
+{
+ dataset_kstat_values_t *dkv = dk->dk_kstats->ks_data;
+ char *ds_name;
+
+ ds_name = KSTAT_NAMED_STR_PTR(&dkv->dkv_ds_name);
+ ASSERT3S(ds_name, !=, NULL);
+ (void) strlcpy(ds_name, name,
+ KSTAT_NAMED_STR_BUFLEN(&dkv->dkv_ds_name));
+}
+
+void
dataset_kstats_update_write_kstats(dataset_kstats_t *dk,
int64_t nwritten)
{
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index c0c2692c113a..0a179fffb16a 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -4587,6 +4587,10 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
}
}
+/*
+ * Syncs out a range of dirty records for indirect or leaf dbufs. May be
+ * called recursively from dbuf_sync_indirect().
+ */
void
dbuf_sync_list(list_t *list, int level, dmu_tx_t *tx)
{
@@ -5005,7 +5009,10 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
}
-/* Issue I/O to commit a dirty buffer to disk. */
+/*
+ * Populate dr->dr_zio with a zio to commit a dirty buffer to disk.
+ * Caller is responsible for issuing the zio_[no]wait(dr->dr_zio).
+ */
static void
dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
{
diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c
index 76e65b5506a9..f098e1daa44b 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_objset.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c
@@ -1639,28 +1639,90 @@ dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg)
kmem_free(bp, sizeof (*bp));
}
+typedef struct sync_objset_arg {
+ zio_t *soa_zio;
+ objset_t *soa_os;
+ dmu_tx_t *soa_tx;
+ kmutex_t soa_mutex;
+ int soa_count;
+ taskq_ent_t soa_tq_ent;
+} sync_objset_arg_t;
+
typedef struct sync_dnodes_arg {
- multilist_t *sda_list;
- int sda_sublist_idx;
- multilist_t *sda_newlist;
- dmu_tx_t *sda_tx;
+ multilist_t *sda_list;
+ int sda_sublist_idx;
+ multilist_t *sda_newlist;
+ sync_objset_arg_t *sda_soa;
} sync_dnodes_arg_t;
+static void sync_meta_dnode_task(void *arg);
+
static void
sync_dnodes_task(void *arg)
{
sync_dnodes_arg_t *sda = arg;
+ sync_objset_arg_t *soa = sda->sda_soa;
+ objset_t *os = soa->soa_os;
multilist_sublist_t *ms =
multilist_sublist_lock(sda->sda_list, sda->sda_sublist_idx);
- dmu_objset_sync_dnodes(ms, sda->sda_tx);
+ dmu_objset_sync_dnodes(ms, soa->soa_tx);
multilist_sublist_unlock(ms);
kmem_free(sda, sizeof (*sda));
+
+ mutex_enter(&soa->soa_mutex);
+ ASSERT(soa->soa_count != 0);
+ if (--soa->soa_count != 0) {
+ mutex_exit(&soa->soa_mutex);
+ return;
+ }
+ mutex_exit(&soa->soa_mutex);
+
+ taskq_dispatch_ent(dmu_objset_pool(os)->dp_sync_taskq,
+ sync_meta_dnode_task, soa, TQ_FRONT, &soa->soa_tq_ent);
}
+/*
+ * Issue the zio_nowait() for all dirty record zios on the meta dnode,
+ * then trigger the callback for the zil_sync. This runs once for each
+ * objset, only after any/all sublists in the objset have been synced.
+ */
+static void
+sync_meta_dnode_task(void *arg)
+{
+ sync_objset_arg_t *soa = arg;
+ objset_t *os = soa->soa_os;
+ dmu_tx_t *tx = soa->soa_tx;
+ int txgoff = tx->tx_txg & TXG_MASK;
+ dbuf_dirty_record_t *dr;
+
+ ASSERT0(soa->soa_count);
+
+ list_t *list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
+ while ((dr = list_remove_head(list)) != NULL) {
+ ASSERT0(dr->dr_dbuf->db_level);
+ zio_nowait(dr->dr_zio);
+ }
+
+ /* Enable dnode backfill if enough objects have been freed. */
+ if (os->os_freed_dnodes >= dmu_rescan_dnode_threshold) {
+ os->os_rescan_dnodes = B_TRUE;
+ os->os_freed_dnodes = 0;
+ }
+
+ /*
+ * Free intent log blocks up to this tx.
+ */
+ zil_sync(os->os_zil, tx);
+ os->os_phys->os_zil_header = os->os_zil_header;
+ zio_nowait(soa->soa_zio);
+
+ mutex_destroy(&soa->soa_mutex);
+ kmem_free(soa, sizeof (*soa));
+}
/* called from dsl */
void
@@ -1670,8 +1732,6 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
zbookmark_phys_t zb;
zio_prop_t zp;
zio_t *zio;
- list_t *list;
- dbuf_dirty_record_t *dr;
int num_sublists;
multilist_t *ml;
blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP);
@@ -1758,39 +1818,49 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
offsetof(dnode_t, dn_dirty_link[txgoff]));
}
+ /*
+ * zio_nowait(zio) is done after any/all sublist and meta dnode
+ * zios have been nowaited, and the zil_sync() has been performed.
+ * The soa is freed at the end of sync_meta_dnode_task.
+ */
+ sync_objset_arg_t *soa = kmem_alloc(sizeof (*soa), KM_SLEEP);
+ soa->soa_zio = zio;
+ soa->soa_os = os;
+ soa->soa_tx = tx;
+ taskq_init_ent(&soa->soa_tq_ent);
+ mutex_init(&soa->soa_mutex, NULL, MUTEX_DEFAULT, NULL);
+
ml = &os->os_dirty_dnodes[txgoff];
- num_sublists = multilist_get_num_sublists(ml);
+ soa->soa_count = num_sublists = multilist_get_num_sublists(ml);
+
for (int i = 0; i < num_sublists; i++) {
if (multilist_sublist_is_empty_idx(ml, i))
- continue;
- sync_dnodes_arg_t *sda = kmem_alloc(sizeof (*sda), KM_SLEEP);
- sda->sda_list = ml;
- sda->sda_sublist_idx = i;
- sda->sda_tx = tx;
- (void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq,
- sync_dnodes_task, sda, 0);
- /* callback frees sda */
- }
- taskq_wait(dmu_objset_pool(os)->dp_sync_taskq);
-
- list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
- while ((dr = list_remove_head(list)) != NULL) {
- ASSERT0(dr->dr_dbuf->db_level);
- zio_nowait(dr->dr_zio);
+ soa->soa_count--;
}
- /* Enable dnode backfill if enough objects have been freed. */
- if (os->os_freed_dnodes >= dmu_rescan_dnode_threshold) {
- os->os_rescan_dnodes = B_TRUE;
- os->os_freed_dnodes = 0;
+ if (soa->soa_count == 0) {
+ taskq_dispatch_ent(dmu_objset_pool(os)->dp_sync_taskq,
+ sync_meta_dnode_task, soa, TQ_FRONT, &soa->soa_tq_ent);
+ } else {
+ /*
+ * Sync sublists in parallel. The last to finish
+ * (i.e., when soa->soa_count reaches zero) must
+ * dispatch sync_meta_dnode_task.
+ */
+ for (int i = 0; i < num_sublists; i++) {
+ if (multilist_sublist_is_empty_idx(ml, i))
+ continue;
+ sync_dnodes_arg_t *sda =
+ kmem_alloc(sizeof (*sda), KM_SLEEP);
+ sda->sda_list = ml;
+ sda->sda_sublist_idx = i;
+ sda->sda_soa = soa;
+ (void) taskq_dispatch(
+ dmu_objset_pool(os)->dp_sync_taskq,
+ sync_dnodes_task, sda, 0);
+ /* sync_dnodes_task frees sda */
+ }
}
-
- /*
- * Free intent log blocks up to this tx.
- */
- zil_sync(os->os_zil, tx);
- os->os_phys->os_zil_header = os->os_zil_header;
- zio_nowait(zio);
}
boolean_t
diff --git a/sys/contrib/openzfs/module/zfs/dnode_sync.c b/sys/contrib/openzfs/module/zfs/dnode_sync.c
index 8e39af83bb0a..8cffbdb9d20b 100644
--- a/sys/contrib/openzfs/module/zfs/dnode_sync.c
+++ b/sys/contrib/openzfs/module/zfs/dnode_sync.c
@@ -627,6 +627,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
/*
* Write out the dnode's dirty buffers.
+ * Does not wait for zio completions.
*/
void
dnode_sync(dnode_t *dn, dmu_tx_t *tx)
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
index d6db61729223..62a1649d3786 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
@@ -2069,8 +2069,9 @@ dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
return (error);
}
+/* Nonblocking dataset sync. Assumes dataset:objset is always 1:1 */
void
-dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
+dsl_dataset_sync(dsl_dataset_t *ds, zio_t *rio, dmu_tx_t *tx)
{
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(ds->ds_objset != NULL);
@@ -2098,7 +2099,7 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] = 0;
}
- dmu_objset_sync(ds->ds_objset, zio, tx);
+ dmu_objset_sync(ds->ds_objset, rio, tx);
}
/*
diff --git a/sys/contrib/openzfs/module/zfs/dsl_pool.c b/sys/contrib/openzfs/module/zfs/dsl_pool.c
index 17b971248283..370c6a010dca 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_pool.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_pool.c
@@ -141,11 +141,6 @@ uint_t zfs_delay_min_dirty_percent = 60;
uint64_t zfs_delay_scale = 1000 * 1000 * 1000 / 2000;
/*
- * This determines the number of threads used by the dp_sync_taskq.
- */
-static int zfs_sync_taskq_batch_pct = 75;
-
-/*
* These tunables determine the behavior of how zil_itxg_clean() is
* called via zil_clean() in the context of spa_sync(). When an itxg
* list needs to be cleaned, TQ_NOSLEEP will be used when dispatching.
@@ -214,9 +209,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
txg_list_create(&dp->dp_early_sync_tasks, spa,
offsetof(dsl_sync_task_t, dst_node));
- dp->dp_sync_taskq = taskq_create("dp_sync_taskq",
- zfs_sync_taskq_batch_pct, minclsyspri, 1, INT_MAX,
- TASKQ_THREADS_CPU_PCT);
+ dp->dp_sync_taskq = spa_sync_tq_create(spa, "dp_sync_taskq");
dp->dp_zil_clean_taskq = taskq_create("dp_zil_clean_taskq",
zfs_zil_clean_taskq_nthr_pct, minclsyspri,
@@ -409,7 +402,7 @@ dsl_pool_close(dsl_pool_t *dp)
txg_list_destroy(&dp->dp_dirty_dirs);
taskq_destroy(dp->dp_zil_clean_taskq);
- taskq_destroy(dp->dp_sync_taskq);
+ spa_sync_tq_destroy(dp->dp_spa);
/*
* We can't set retry to TRUE since we're explicitly specifying
@@ -674,7 +667,7 @@ dsl_early_sync_task_verify(dsl_pool_t *dp, uint64_t txg)
void
dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
{
- zio_t *zio;
+ zio_t *rio; /* root zio for all dirty dataset syncs */
dmu_tx_t *tx;
dsl_dir_t *dd;
dsl_dataset_t *ds;
@@ -704,9 +697,10 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
}
/*
- * Write out all dirty blocks of dirty datasets.
+ * Write out all dirty blocks of dirty datasets. Note, this could
+ * create a very large (+10k) zio tree.
*/
- zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+ rio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) {
/*
* We must not sync any non-MOS datasets twice, because
@@ -715,9 +709,9 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
*/
ASSERT(!list_link_active(&ds->ds_synced_link));
list_insert_tail(&synced_datasets, ds);
- dsl_dataset_sync(ds, zio, tx);
+ dsl_dataset_sync(ds, rio, tx);
}
- VERIFY0(zio_wait(zio));
+ VERIFY0(zio_wait(rio));
/*
* Update the long range free counter after
@@ -748,13 +742,13 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
* user accounting information (and we won't get confused
* about which blocks are part of the snapshot).
*/
- zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+ rio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) {
objset_t *os = ds->ds_objset;
ASSERT(list_link_active(&ds->ds_synced_link));
dmu_buf_rele(ds->ds_dbuf, ds);
- dsl_dataset_sync(ds, zio, tx);
+ dsl_dataset_sync(ds, rio, tx);
/*
* Release any key mappings created by calls to
@@ -767,7 +761,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
key_mapping_rele(dp->dp_spa, ds->ds_key_mapping, ds);
}
}
- VERIFY0(zio_wait(zio));
+ VERIFY0(zio_wait(rio));
/*
* Now that the datasets have been completely synced, we can
@@ -1481,9 +1475,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_sync_percent, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, delay_scale, U64, ZMOD_RW,
"How quickly delay approaches infinity");
-ZFS_MODULE_PARAM(zfs, zfs_, sync_taskq_batch_pct, INT, ZMOD_RW,
- "Max percent of CPUs that are used to sync dirty data");
-
ZFS_MODULE_PARAM(zfs_zil, zfs_zil_, clean_taskq_nthr_pct, INT, ZMOD_RW,
"Max percent of CPUs that are used per dp_sync_taskq");
diff --git a/sys/contrib/openzfs/module/zfs/sa.c b/sys/contrib/openzfs/module/zfs/sa.c
index f9daaabbed3e..0ae4c331dd36 100644
--- a/sys/contrib/openzfs/module/zfs/sa.c
+++ b/sys/contrib/openzfs/module/zfs/sa.c
@@ -23,6 +23,7 @@
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2023 RackTop Systems, Inc.
*/
#include <sys/zfs_context.h>
@@ -369,7 +370,7 @@ sa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
if (bulk[i].sa_data) {
SA_COPY_DATA(bulk[i].sa_data_func,
bulk[i].sa_addr, bulk[i].sa_data,
- bulk[i].sa_size);
+ MIN(bulk[i].sa_size, bulk[i].sa_length));
}
continue;
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index aa97144f16e4..68f367c1c744 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -99,6 +99,7 @@
#include "zfs_prop.h"
#include "zfs_comutil.h"
+#include <cityhash.h>
/*
* spa_thread() existed on Illumos as a parent thread for the various worker
@@ -128,16 +129,16 @@ int zfs_ccw_retry_interval = 300;
typedef enum zti_modes {
ZTI_MODE_FIXED, /* value is # of threads (min 1) */
- ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */
ZTI_MODE_SCALE, /* Taskqs scale with CPUs. */
+ ZTI_MODE_SYNC, /* sync thread assigned */
ZTI_MODE_NULL, /* don't create a taskq */
ZTI_NMODES
} zti_modes_t;
#define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) }
#define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 }
-#define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 }
#define ZTI_SCALE { ZTI_MODE_SCALE, 0, 1 }
+#define ZTI_SYNC { ZTI_MODE_SYNC, 0, 1 }
#define ZTI_NULL { ZTI_MODE_NULL, 0, 0 }
#define ZTI_N(n) ZTI_P(n, 1)
@@ -158,14 +159,14 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
* initializing a pool, we use this table to create an appropriately sized
* taskq. Some operations are low volume and therefore have a small, static
* number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE
- * macros. Other operations process a large amount of data; the ZTI_BATCH
+ * macros. Other operations process a large amount of data; the ZTI_SCALE
* macro causes us to create a taskq oriented for throughput. Some operations
* are so high frequency and short-lived that the taskq itself can become a
* point of lock contention. The ZTI_P(#, #) macro indicates that we need an
* additional degree of parallelism specified by the number of threads per-
* taskq and the number of taskqs; when dispatching an event in this case, the
- * particular taskq is chosen at random. ZTI_SCALE is similar to ZTI_BATCH,
- * but with number of taskqs also scaling with number of CPUs.
+ * particular taskq is chosen at random. ZTI_SCALE uses a number of taskqs
+ * that scales with the number of CPUs.
*
* The different taskq priorities are to handle the different contexts (issue
* and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
@@ -175,7 +176,7 @@ static const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
{ ZTI_N(8), ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* READ */
- { ZTI_BATCH, ZTI_N(5), ZTI_SCALE, ZTI_N(5) }, /* WRITE */
+ { ZTI_SYNC, ZTI_N(5), ZTI_SCALE, ZTI_N(5) }, /* WRITE */
{ ZTI_SCALE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */
@@ -206,6 +207,8 @@ static const uint_t zio_taskq_basedc = 80; /* base duty cycle */
static const boolean_t spa_create_process = B_TRUE; /* no process => no sysdc */
#endif
+static uint_t zio_taskq_wr_iss_ncpus = 0;
+
/*
* Report any spa_load_verify errors found, but do not fail spa_load.
* This is used by zdb to analyze non-idle pools.
@@ -1054,21 +1057,34 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
uint_t count = ztip->zti_count;
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
uint_t cpus, flags = TASKQ_DYNAMIC;
-#ifdef HAVE_SYSDC
- boolean_t batch = B_FALSE;
-#endif
switch (mode) {
case ZTI_MODE_FIXED:
ASSERT3U(value, >, 0);
break;
- case ZTI_MODE_BATCH:
-#ifdef HAVE_SYSDC
- batch = B_TRUE;
-#endif
+ case ZTI_MODE_SYNC:
+
+ /*
+ * Create one wr_iss taskq for every 'zio_taskq_wr_iss_ncpus',
+ * not to exceed the number of spa allocators.
+ */
+ if (zio_taskq_wr_iss_ncpus == 0) {
+ count = MAX(boot_ncpus / spa->spa_alloc_count, 1);
+ } else {
+ count = MAX(1,
+ boot_ncpus / MAX(1, zio_taskq_wr_iss_ncpus));
+ }
+ count = MAX(count, (zio_taskq_batch_pct + 99) / 100);
+ count = MIN(count, spa->spa_alloc_count);
+
+ /*
+ * zio_taskq_batch_pct is unbounded and may exceed 100%, but no
+ * single taskq may have more threads than 100% of online cpus.
+ */
+ value = (zio_taskq_batch_pct + count / 2) / count;
+ value = MIN(value, 100);
flags |= TASKQ_THREADS_CPU_PCT;
- value = MIN(zio_taskq_batch_pct, 100);
break;
case ZTI_MODE_SCALE:
@@ -1115,7 +1131,7 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
default:
panic("unrecognized mode for %s_%s taskq (%u:%u) in "
- "spa_activate()",
+ "spa_taskqs_init()",
zio_type_name[t], zio_taskq_types[q], mode, value);
break;
}
@@ -1137,9 +1153,6 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
#ifdef HAVE_SYSDC
if (zio_taskq_sysdc && spa->spa_proc != &p0) {
- if (batch)
- flags |= TASKQ_DC_BATCH;
-
(void) zio_taskq_basedc;
tq = taskq_create_sysdc(name, value, 50, INT_MAX,
spa->spa_proc, zio_taskq_basedc, flags);
@@ -1200,12 +1213,11 @@ spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
/*
* Dispatch a task to the appropriate taskq for the ZFS I/O type and priority.
* Note that a type may have multiple discrete taskqs to avoid lock contention
- * on the taskq itself. In that case we choose which taskq at random by using
- * the low bits of gethrtime().
+ * on the taskq itself.
*/
-void
-spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
- task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent)
+static taskq_t *
+spa_taskq_dispatch_select(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
+ zio_t *zio)
{
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
taskq_t *tq;
@@ -1213,12 +1225,27 @@ spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
ASSERT3P(tqs->stqs_taskq, !=, NULL);
ASSERT3U(tqs->stqs_count, !=, 0);
+ if ((t == ZIO_TYPE_WRITE) && (q == ZIO_TASKQ_ISSUE) &&
+ (zio != NULL) && (zio->io_wr_iss_tq != NULL)) {
+ /* dispatch to assigned write issue taskq */
+ tq = zio->io_wr_iss_tq;
+ return (tq);
+ }
+
if (tqs->stqs_count == 1) {
tq = tqs->stqs_taskq[0];
} else {
tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count];
}
+ return (tq);
+}
+void
+spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
+ task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent,
+ zio_t *zio)
+{
+ taskq_t *tq = spa_taskq_dispatch_select(spa, t, q, zio);
taskq_dispatch_ent(tq, func, arg, flags, ent);
}
@@ -1229,20 +1256,8 @@ void
spa_taskq_dispatch_sync(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
task_func_t *func, void *arg, uint_t flags)
{
- spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
- taskq_t *tq;
- taskqid_t id;
-
- ASSERT3P(tqs->stqs_taskq, !=, NULL);
- ASSERT3U(tqs->stqs_count, !=, 0);
-
- if (tqs->stqs_count == 1) {
- tq = tqs->stqs_taskq[0];
- } else {
- tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count];
- }
-
- id = taskq_dispatch(tq, func, arg, flags);
+ taskq_t *tq = spa_taskq_dispatch_select(spa, t, q, NULL);
+ taskqid_t id = taskq_dispatch(tq, func, arg, flags);
if (id)
taskq_wait_id(tq, id);
}
@@ -9649,6 +9664,104 @@ spa_sync_allpools(void)
mutex_exit(&spa_namespace_lock);
}
+taskq_t *
+spa_sync_tq_create(spa_t *spa, const char *name)
+{
+ kthread_t **kthreads;
+
+ ASSERT(spa->spa_sync_tq == NULL);
+ ASSERT3S(spa->spa_alloc_count, <=, boot_ncpus);
+
+ /*
+ * - do not allow more allocators than cpus.
+ * - there may be more cpus than allocators.
+ * - do not allow more sync taskq threads than allocators or cpus.
+ */
+ int nthreads = spa->spa_alloc_count;
+ spa->spa_syncthreads = kmem_zalloc(sizeof (spa_syncthread_info_t) *
+ nthreads, KM_SLEEP);
+
+ spa->spa_sync_tq = taskq_create_synced(name, nthreads, minclsyspri,
+ nthreads, INT_MAX, TASKQ_PREPOPULATE, &kthreads);
+ VERIFY(spa->spa_sync_tq != NULL);
+ VERIFY(kthreads != NULL);
+
+ spa_taskqs_t *tqs =
+ &spa->spa_zio_taskq[ZIO_TYPE_WRITE][ZIO_TASKQ_ISSUE];
+
+ spa_syncthread_info_t *ti = spa->spa_syncthreads;
+ for (int i = 0, w = 0; i < nthreads; i++, w++, ti++) {
+ ti->sti_thread = kthreads[i];
+ if (w == tqs->stqs_count) {
+ w = 0;
+ }
+ ti->sti_wr_iss_tq = tqs->stqs_taskq[w];
+ }
+
+ kmem_free(kthreads, sizeof (*kthreads) * nthreads);
+ return (spa->spa_sync_tq);
+}
+
+void
+spa_sync_tq_destroy(spa_t *spa)
+{
+ ASSERT(spa->spa_sync_tq != NULL);
+
+ taskq_wait(spa->spa_sync_tq);
+ taskq_destroy(spa->spa_sync_tq);
+ kmem_free(spa->spa_syncthreads,
+ sizeof (spa_syncthread_info_t) * spa->spa_alloc_count);
+ spa->spa_sync_tq = NULL;
+}
+
+void
+spa_select_allocator(zio_t *zio)
+{
+ zbookmark_phys_t *bm = &zio->io_bookmark;
+ spa_t *spa = zio->io_spa;
+
+ ASSERT(zio->io_type == ZIO_TYPE_WRITE);
+
+ /*
+ * A gang block (for example) may have inherited its parent's
+ * allocator, in which case there is nothing further to do here.
+ */
+ if (ZIO_HAS_ALLOCATOR(zio))
+ return;
+
+ ASSERT(spa != NULL);
+ ASSERT(bm != NULL);
+
+ /*
+ * First try to use an allocator assigned to the syncthread, and set
+ * the corresponding write issue taskq for the allocator.
+ * Note, we must have an open pool to do this.
+ */
+ if (spa->spa_sync_tq != NULL) {
+ spa_syncthread_info_t *ti = spa->spa_syncthreads;
+ for (int i = 0; i < spa->spa_alloc_count; i++, ti++) {
+ if (ti->sti_thread == curthread) {
+ zio->io_allocator = i;
+ zio->io_wr_iss_tq = ti->sti_wr_iss_tq;
+ return;
+ }
+ }
+ }
+
+ /*
+ * We want to try to use as many allocators as possible to help improve
+ * performance, but we also want logically adjacent IOs to be physically
+ * adjacent to improve sequential read performance. We chunk each object
+ * into 2^20 block regions, and then hash based on the objset, object,
+ * level, and region to accomplish both of these goals.
+ */
+ uint64_t hv = cityhash4(bm->zb_objset, bm->zb_object, bm->zb_level,
+ bm->zb_blkid >> 20);
+
+ zio->io_allocator = (uint_t)hv % spa->spa_alloc_count;
+ zio->io_wr_iss_tq = NULL;
+}
+
/*
* ==========================================================================
* Miscellaneous routines
@@ -10242,3 +10355,6 @@ ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, new_alloc, INT,
"Whether extra ALLOC blkptrs were added to a livelist entry while it "
"was being condensed");
/* END CSTYLED */
+
+ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_wr_iss_ncpus, UINT, ZMOD_RW,
+ "Number of CPUs to run write issue taskqs");
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
index c7472f972cc2..3990af98c732 100644
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -388,7 +388,11 @@ uint_t spa_asize_inflation = 24;
uint_t spa_slop_shift = 5;
static const uint64_t spa_min_slop = 128ULL * 1024 * 1024;
static const uint64_t spa_max_slop = 128ULL * 1024 * 1024 * 1024;
-static const int spa_allocators = 4;
+
+/*
+ * Number of allocators to use, per spa instance
+ */
+static int spa_num_allocators = 4;
/*
* Spa active allocator.
@@ -730,7 +734,9 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
if (altroot)
spa->spa_root = spa_strdup(altroot);
- spa->spa_alloc_count = spa_allocators;
+ /* Do not allow more allocators than CPUs. */
+ spa->spa_alloc_count = MIN(MAX(spa_num_allocators, 1), boot_ncpus);
+
spa->spa_allocs = kmem_zalloc(spa->spa_alloc_count *
sizeof (spa_alloc_t), KM_SLEEP);
for (int i = 0; i < spa->spa_alloc_count; i++) {
@@ -739,6 +745,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
avl_create(&spa->spa_allocs[i].spaa_tree, zio_bookmark_compare,
sizeof (zio_t), offsetof(zio_t, io_queue_node.a));
}
+
avl_create(&spa->spa_metaslabs_by_flushed, metaslab_sort_by_flushed,
sizeof (metaslab_t), offsetof(metaslab_t, ms_spa_txg_node));
avl_create(&spa->spa_sm_logs_by_txg, spa_log_sm_sort_by_txg,
@@ -3009,3 +3016,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, special_class_metadata_reserve_pct, UINT, ZMOD_RW,
ZFS_MODULE_PARAM_CALL(zfs_spa, spa_, slop_shift, param_set_slop_shift,
param_get_uint, ZMOD_RW, "Reserved free space in pool");
+
+ZFS_MODULE_PARAM(zfs, spa_, num_allocators, INT, ZMOD_RW,
+ "Number of allocators per spa, capped by ncpus");
diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c
index 08d918467d03..092b3f375be0 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
@@ -273,8 +273,10 @@ vdev_queue_class_add(vdev_queue_t *vq, zio_t *zio)
{
zio_priority_t p = zio->io_priority;
vq->vq_cqueued |= 1U << p;
- if (vdev_queue_class_fifo(p))
+ if (vdev_queue_class_fifo(p)) {
list_insert_tail(&vq->vq_class[p].vqc_list, zio);
+ vq->vq_class[p].vqc_list_numnodes++;
+ }
else
avl_add(&vq->vq_class[p].vqc_tree, zio);
}
@@ -288,6 +290,7 @@ vdev_queue_class_remove(vdev_queue_t *vq, zio_t *zio)
list_t *list = &vq->vq_class[p].vqc_list;
list_remove(list, zio);
empty = list_is_empty(list);
+ vq->vq_class[p].vqc_list_numnodes--;
} else {
avl_tree_t *tree = &vq->vq_class[p].vqc_tree;
avl_remove(tree, zio);
@@ -1069,7 +1072,7 @@ vdev_queue_class_length(vdev_t *vd, zio_priority_t p)
{
vdev_queue_t *vq = &vd->vdev_queue;
if (vdev_queue_class_fifo(p))
- return (list_is_empty(&vq->vq_class[p].vqc_list) == 0);
+ return (vq->vq_class[p].vqc_list_numnodes);
else
return (avl_numnodes(&vq->vq_class[p].vqc_tree));
}
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index 4eb276352a23..2f5b423ee72e 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -899,6 +899,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio->io_orig_stage = zio->io_stage = stage;
zio->io_orig_pipeline = zio->io_pipeline = pipeline;
zio->io_pipeline_trace = ZIO_STAGE_OPEN;
+ zio->io_allocator = ZIO_ALLOCATOR_NONE;
zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY) ||
(pipeline & ZIO_STAGE_READY) == 0;
@@ -2007,7 +2008,7 @@ zio_taskq_dispatch(zio_t *zio, zio_taskq_type_t q, boolean_t cutinline)
*/
ASSERT(taskq_empty_ent(&zio->io_tqent));
spa_taskq_dispatch_ent(spa, t, q, zio_execute, zio, flags,
- &zio->io_tqent);
+ &zio->io_tqent, zio);
}
static boolean_t
@@ -2032,8 +2033,8 @@ zio_taskq_member(zio_t *zio, zio_taskq_type_t q)
static zio_t *
zio_issue_async(zio_t *zio)
{
+ ASSERT((zio->io_type != ZIO_TYPE_WRITE) || ZIO_HAS_ALLOCATOR(zio));
zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_FALSE);
-
return (NULL);
}
@@ -2347,6 +2348,9 @@ zio_wait(zio_t *zio)
ASSERT0(zio->io_queued_timestamp);
zio->io_queued_timestamp = gethrtime();
+ if (zio->io_type == ZIO_TYPE_WRITE) {
+ spa_select_allocator(zio);
+ }
__zio_execute(zio);
mutex_enter(&zio->io_lock);
@@ -2399,6 +2403,9 @@ zio_nowait(zio_t *zio)
ASSERT0(zio->io_queued_timestamp);
zio->io_queued_timestamp = gethrtime();
+ if (zio->io_type == ZIO_TYPE_WRITE) {
+ spa_select_allocator(zio);
+ }
__zio_execute(zio);
}
@@ -2864,6 +2871,13 @@ zio_gang_issue(zio_t *zio)
}
static void
+zio_gang_inherit_allocator(zio_t *pio, zio_t *cio)
+{
+ cio->io_allocator = pio->io_allocator;
+ cio->io_wr_iss_tq = pio->io_wr_iss_tq;
+}
+
+static void
zio_write_gang_member_ready(zio_t *zio)
{
zio_t *pio = zio_unique_parent(zio);
@@ -2934,6 +2948,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
gbh_copies = MIN(2, spa_max_replication(spa));
}
+ ASSERT(ZIO_HAS_ALLOCATOR(pio));
int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER;
if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
@@ -2997,6 +3012,8 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
zio_write_gang_done, NULL, pio->io_priority,
ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
+ zio_gang_inherit_allocator(pio, zio);
+
/*
* Create and nowait the gang children.
*/
@@ -3027,6 +3044,8 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
zio_write_gang_done, &gn->gn_child[g], pio->io_priority,
ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
+ zio_gang_inherit_allocator(zio, cio);
+
if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
ASSERT(has_data);
@@ -3539,6 +3558,7 @@ zio_io_to_allocate(spa_t *spa, int allocator)
return (NULL);
ASSERT(IO_IS_ALLOCATING(zio));
+ ASSERT(ZIO_HAS_ALLOCATOR(zio));
/*
* Try to place a reservation for this zio. If we're unable to
@@ -3575,21 +3595,12 @@ zio_dva_throttle(zio_t *zio)
}
ASSERT(zio->io_type == ZIO_TYPE_WRITE);
+ ASSERT(ZIO_HAS_ALLOCATOR(zio));
ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
ASSERT3U(zio->io_queued_timestamp, >, 0);
ASSERT(zio->io_stage == ZIO_STAGE_DVA_THROTTLE);
- zbookmark_phys_t *bm = &zio->io_bookmark;
- /*
- * We want to try to use as many allocators as possible to help improve
- * performance, but we also want logically adjacent IOs to be physically
- * adjacent to improve sequential read performance. We chunk each object
- * into 2^20 block regions, and then hash based on the objset, object,
- * level, and region to accomplish both of these goals.
- */
- int allocator = (uint_t)cityhash4(bm->zb_objset, bm->zb_object,
- bm->zb_level, bm->zb_blkid >> 20) % spa->spa_alloc_count;
- zio->io_allocator = allocator;
+ int allocator = zio->io_allocator;
zio->io_metaslab_class = mc;
mutex_enter(&spa->spa_allocs[allocator].spaa_lock);
avl_add(&spa->spa_allocs[allocator].spaa_tree, zio);
@@ -3663,6 +3674,7 @@ zio_dva_allocate(zio_t *zio)
* sync write performance. If a log allocation fails, we will fall
* back to spa_sync() which is abysmal for performance.
*/
+ ASSERT(ZIO_HAS_ALLOCATOR(zio));
error = metaslab_alloc(spa, mc, zio->io_size, bp,
zio->io_prop.zp_copies, zio->io_txg, NULL, flags,
&zio->io_alloc_list, zio, zio->io_allocator);
@@ -4515,6 +4527,7 @@ zio_ready(zio_t *zio)
ASSERT(IO_IS_ALLOCATING(zio));
ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
ASSERT(zio->io_metaslab_class != NULL);
+ ASSERT(ZIO_HAS_ALLOCATOR(zio));
/*
* We were unable to allocate anything, unreserve and
@@ -4601,6 +4614,7 @@ zio_dva_throttle_done(zio_t *zio)
}
ASSERT(IO_IS_ALLOCATING(pio));
+ ASSERT(ZIO_HAS_ALLOCATOR(pio));
ASSERT3P(zio, !=, zio->io_logical);
ASSERT(zio->io_logical != NULL);
ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REPAIR));
@@ -4663,6 +4677,7 @@ zio_done(zio_t *zio)
ASSERT(zio->io_type == ZIO_TYPE_WRITE);
ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
ASSERT(zio->io_bp != NULL);
+ ASSERT(ZIO_HAS_ALLOCATOR(zio));
metaslab_group_alloc_verify(zio->io_spa, zio->io_bp, zio,
zio->io_allocator);
@@ -4928,7 +4943,7 @@ zio_done(zio_t *zio)
ASSERT(taskq_empty_ent(&zio->io_tqent));
spa_taskq_dispatch_ent(zio->io_spa,
ZIO_TYPE_CLAIM, ZIO_TASKQ_ISSUE,
- zio_reexecute, zio, 0, &zio->io_tqent);
+ zio_reexecute, zio, 0, &zio->io_tqent, NULL);
}
return (NULL);
}
diff --git a/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in b/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in
index 23c3ed6ff408..d56967d7a8b1 100644
--- a/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in
+++ b/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in
@@ -24,6 +24,7 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
BuildArch: noarch
Requires: dkms >= 2.2.0.3
+Requires(pre): dkms >= 2.2.0.3
Requires(post): dkms >= 2.2.0.3
Requires(preun): dkms >= 2.2.0.3
Requires: gcc, make, perl, diffutils
@@ -68,9 +69,92 @@ fi
%defattr(-,root,root)
/usr/src/%{module}-%{version}
+%pre
+echo "Running pre installation script: $0. Parameters: $*"
+# We don't want any other versions lingering around in dkms.
+# Tests with 'dnf' showed that in case of reinstall, or upgrade
+# the preun scriptlet removed the version we are trying to install.
+# Because of this, find all zfs dkms sources in /var/lib/dkms and
+# remove them, if we find a matching version in dkms.
+
+dkms_root=/var/lib/dkms
+if [ -d ${dkms_root}/%{module} ]; then
+ cd ${dkms_root}/%{module}
+ for x in [[:digit:]]*; do
+ [ -d "$x" ] || continue
+ otherver="$x"
+ opath="${dkms_root}/%{module}/${otherver}"
+ if [ "$otherver" != %{version} ]; then
+ # This is a workaround for a broken 'dkms status', we caused in a previous version.
+ # One day it might be not needed anymore, but it does not hurt to keep it.
+ if dkms status -m %{module} -v "$otherver" 2>&1 | grep "${opath}/source/dkms.conf does not exist"
+ then
+ echo "ERROR: dkms status is broken!" >&2
+ if [ -L "${opath}/source" -a ! -d "${opath}/source" ]
+ then
+ echo "Trying to fix it by removing the symlink: ${opath}/source" >&2
+ echo "You should manually remove ${opath}" >&2
+ rm -f "${opath}/source" || echo "Removal failed!" >&2
+ fi
+ fi
+ if [ `dkms status -m %{module} -v "$otherver" | grep -c %{module}` -gt 0 ]; then
+ echo "Removing old %{module} dkms modules version $otherver from all kernels."
+ dkms remove -m %{module} -v "$otherver" --all ||:
+ fi
+ fi
+ done
+fi
+
+# Uninstall this version of zfs dkms modules before installation of the package.
+if [ `dkms status -m %{module} -v %{version} | grep -c %{module}` -gt 0 ]; then
+ echo "Removing %{module} dkms modules version %{version} from all kernels."
+ dkms remove -m %{module} -v %{version} --all ||:
+fi
+
+%post
+echo "Running post installation script: $0. Parameters: $*"
+# Add the module to dkms, as reccommended in the dkms man page.
+# This is generally rpm specfic.
+# But this also may help, if we have a broken 'dkms status'.
+# Because, if the sources are available and only the symlink pointing
+# to them is missing, this will resolve the situation
+echo "Adding %{module} dkms modules version %{version} to dkms."
+dkms add -m %{module} -v %{version} %{!?not_rpm:--rpm_safe_upgrade} ||:
+
+# After installing the package, dkms install this zfs version for the current kernel.
+# Force the overwriting of old modules to avoid diff warnings in dkms status.
+# Or in case of a downgrade to overwrite newer versions.
+# Or if some other backed up versions have been restored before.
+echo "Installing %{module} dkms modules version %{version} for the current kernel."
+dkms install --force -m %{module} -v %{version} ||:
+
%preun
-dkms remove -m %{module} -v %{version} --all
+dkms_root="/var/lib/dkms/%{module}/%{version}"
+echo "Running pre uninstall script: $0. Parameters: $*"
+# In case of upgrade we do nothing. See above comment in pre hook.
+if [ "$1" = "1" -o "$1" = "upgrade" ] ; then
+ echo "This is an upgrade. Skipping pre uninstall action."
+ exit 0
+fi
+
+# Check if we uninstall the package. In that case remove the dkms modules.
+# '0' is the value for the first parameter for rpm packages.
+# 'remove' or 'purge' are the possible names for deb packages.
+if [ "$1" = "0" -o "$1" = "remove" -o "$1" = "purge" ] ; then
+ if [ `dkms status -m %{module} -v %{version} | grep -c %{module}` -gt 0 ]; then
+ echo "Removing %{module} dkms modules version %{version} from all kernels."
+ dkms remove -m %{module} -v %{version} --all %{!?not_rpm:--rpm_safe_upgrade} && exit 0
+ fi
+ # If removing the modules failed, it might be because of the broken 'dkms status'.
+ if dkms status -m %{module} -v %{version} 2>&1 | grep "${dkms_root}/source/dkms.conf does not exist"
+ then
+ echo "ERROR: dkms status is broken!" >&2
+ echo "You should manually remove ${dkms_root}" >&2
+ echo "WARNING: installed modules in /lib/modules/`uname -r`/extra could not be removed automatically!" >&2
+ fi
+else
+ echo "Script parameter $1 did not match any removal condition."
+fi
-%posttrans
-/usr/lib/dkms/common.postinst %{module} %{version}
+exit 0
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index 5344d981882b..dd1a926d286c 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -1110,7 +1110,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.2.99-184-FreeBSD_g41e55b476"
+#define ZFS_META_ALIAS "zfs-2.2.99-197-FreeBSD_g9198de8f1"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@@ -1140,7 +1140,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
-#define ZFS_META_RELEASE "184-FreeBSD_g41e55b476"
+#define ZFS_META_RELEASE "197-FreeBSD_g9198de8f1"
/* Define the project version. */
#define ZFS_META_VERSION "2.2.99"
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index 9501947797c0..86525c162b79 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define ZFS_META_GITREV "zfs-2.2.99-184-g41e55b476"
+#define ZFS_META_GITREV "zfs-2.2.99-197-g9198de8f1"