aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/conf/files1
-rw-r--r--sys/contrib/openzfs/cmd/Makefile.am2
-rw-r--r--sys/contrib/openzfs/cmd/zdb/zdb.c18
-rw-r--r--sys/contrib/openzfs/config/Rules.am4
-rw-r--r--sys/contrib/openzfs/include/Makefile.am1
-rw-r--r--sys/contrib/openzfs/include/sys/ddt.h199
-rw-r--r--sys/contrib/openzfs/include/sys/ddt_impl.h95
-rw-r--r--sys/contrib/openzfs/include/sys/spa.h2
-rw-r--r--sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c10
-rw-r--r--sys/contrib/openzfs/lib/libzpool/Makefile.am1
-rw-r--r--sys/contrib/openzfs/module/Kbuild.in1
-rw-r--r--sys/contrib/openzfs/module/Makefile.bsd2
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/ddt.c584
-rw-r--r--sys/contrib/openzfs/module/zfs/ddt_stats.c212
-rw-r--r--sys/contrib/openzfs/module/zfs/ddt_zap.c136
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_scan.c4
-rwxr-xr-xsys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in22
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib27
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib6
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh5
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh5
-rw-r--r--sys/modules/zfs/Makefile2
-rw-r--r--sys/modules/zfs/zfs_config.h4
-rw-r--r--sys/modules/zfs/zfs_gitrev.h2
25 files changed, 858 insertions, 491 deletions
diff --git a/sys/conf/files b/sys/conf/files
index 3c87c0b88528..e57c82238380 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -268,6 +268,7 @@ contrib/openzfs/module/zfs/dbuf.c optional zfs compile-with "${ZFS_C}"
contrib/openzfs/module/zfs/dbuf_stats.c optional zfs compile-with "${ZFS_C}"
contrib/openzfs/module/zfs/dataset_kstats.c optional zfs compile-with "${ZFS_C}"
contrib/openzfs/module/zfs/ddt.c optional zfs compile-with "${ZFS_C}"
+contrib/openzfs/module/zfs/ddt_stats.c optional zfs compile-with "${ZFS_C}"
contrib/openzfs/module/zfs/ddt_zap.c optional zfs compile-with "${ZFS_C}"
contrib/openzfs/module/zfs/dmu.c optional zfs compile-with "${ZFS_C}"
contrib/openzfs/module/zfs/dmu_diff.c optional zfs compile-with "${ZFS_C}"
diff --git a/sys/contrib/openzfs/cmd/Makefile.am b/sys/contrib/openzfs/cmd/Makefile.am
index 6d6de4adb42a..2bd9d039f20e 100644
--- a/sys/contrib/openzfs/cmd/Makefile.am
+++ b/sys/contrib/openzfs/cmd/Makefile.am
@@ -39,8 +39,6 @@ zhack_LDADD = \
ztest_CFLAGS = $(AM_CFLAGS) $(KERNEL_CFLAGS)
-# Get rid of compiler warning for unchecked truncating snprintfs on gcc 7.1.1
-ztest_CFLAGS += $(NO_FORMAT_TRUNCATION)
ztest_CPPFLAGS = $(AM_CPPFLAGS) $(FORCEDEBUG_CPPFLAGS)
sbin_PROGRAMS += ztest
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c
index b857e61bd04f..4880c8048726 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.c
@@ -74,6 +74,7 @@
#include <sys/arc.h>
#include <sys/arc_impl.h>
#include <sys/ddt.h>
+#include <sys/ddt_impl.h>
#include <sys/zfeature.h>
#include <sys/abd.h>
#include <sys/blkptr.h>
@@ -1904,7 +1905,7 @@ dump_dedup_ratio(const ddt_stat_t *dds)
}
static void
-dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
+dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
{
char name[DDT_NAMELEN];
ddt_entry_t dde;
@@ -1964,8 +1965,10 @@ dump_all_ddts(spa_t *spa)
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
- for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
- for (enum ddt_class class = 0; class < DDT_CLASSES;
+ if (!ddt)
+ continue;
+ for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
+ for (ddt_class_t class = 0; class < DDT_CLASSES;
class++) {
dump_ddt(ddt, type, class);
}
@@ -6061,6 +6064,8 @@ zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
return;
ASSERT(ddt_phys_total_refcnt(&dde) > 1);
+ ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
+ VERIFY(ddt);
for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
if (ddp->ddp_phys_birth == 0)
@@ -6075,7 +6080,7 @@ zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
zcb->zcb_dedup_blocks++;
}
}
- ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
+
ddt_enter(ddt);
VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
ddt_exit(ddt);
@@ -7121,6 +7126,7 @@ dump_block_stats(spa_t *spa)
}
typedef struct zdb_ddt_entry {
+ /* key must be first for ddt_key_compare */
ddt_key_t zdde_key;
uint64_t zdde_ref_blocks;
uint64_t zdde_ref_lsize;
@@ -7181,7 +7187,7 @@ dump_simulated_ddt(spa_t *spa)
ddt_histogram_t ddh_total = {{{0}}};
ddt_stat_t dds_total = {0};
- avl_create(&t, ddt_entry_compare,
+ avl_create(&t, ddt_key_compare,
sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
@@ -7947,6 +7953,8 @@ dump_mos_leaks(spa_t *spa)
for (uint64_t cksum = 0;
cksum < ZIO_CHECKSUM_FUNCTIONS; cksum++) {
ddt_t *ddt = spa->spa_ddt[cksum];
+ if (!ddt)
+ continue;
mos_obj_refd(ddt->ddt_object[type][class]);
}
}
diff --git a/sys/contrib/openzfs/config/Rules.am b/sys/contrib/openzfs/config/Rules.am
index 2e463ae6083a..30c5f353cd23 100644
--- a/sys/contrib/openzfs/config/Rules.am
+++ b/sys/contrib/openzfs/config/Rules.am
@@ -21,7 +21,9 @@ AM_CFLAGS += $(IMPLICIT_FALLTHROUGH)
AM_CFLAGS += $(DEBUG_CFLAGS)
AM_CFLAGS += $(ASAN_CFLAGS)
AM_CFLAGS += $(UBSAN_CFLAGS)
-AM_CFLAGS += $(CODE_COVERAGE_CFLAGS) $(NO_FORMAT_ZERO_LENGTH)
+AM_CFLAGS += $(CODE_COVERAGE_CFLAGS)
+AM_CFLAGS += $(NO_FORMAT_ZERO_LENGTH)
+AM_CFLAGS += $(NO_FORMAT_TRUNCATION)
if BUILD_FREEBSD
AM_CFLAGS += -fPIC -Werror -Wno-unknown-pragmas -Wno-enum-conversion
AM_CFLAGS += -include $(top_srcdir)/include/os/freebsd/spl/sys/ccompile.h
diff --git a/sys/contrib/openzfs/include/Makefile.am b/sys/contrib/openzfs/include/Makefile.am
index cb28a2d6c96c..fa725c2e7a5f 100644
--- a/sys/contrib/openzfs/include/Makefile.am
+++ b/sys/contrib/openzfs/include/Makefile.am
@@ -37,6 +37,7 @@ COMMON_H = \
sys/dataset_kstats.h \
sys/dbuf.h \
sys/ddt.h \
+ sys/ddt_impl.h \
sys/dmu.h \
sys/dmu_impl.h \
sys/dmu_objset.h \
diff --git a/sys/contrib/openzfs/include/sys/ddt.h b/sys/contrib/openzfs/include/sys/ddt.h
index 6378c042c705..726f1a3902eb 100644
--- a/sys/contrib/openzfs/include/sys/ddt.h
+++ b/sys/contrib/openzfs/include/sys/ddt.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2023, Klara Inc.
*/
#ifndef _SYS_DDT_H
@@ -39,32 +40,50 @@ extern "C" {
struct abd;
/*
- * On-disk DDT formats, in the desired search order (newest version first).
+ * DDT on-disk storage object types. Each one corresponds to specific
+ * implementation, see ddt_ops_t. The value itself is not stored on disk.
+ *
+ * When searching for an entry, objects types will be searched in this order.
+ *
+ * Note that DDT_TYPES is used as the "no type" for new entries that have not
+ * yet been written to a storage object.
*/
-enum ddt_type {
- DDT_TYPE_ZAP = 0,
+typedef enum {
+ DDT_TYPE_ZAP = 0, /* ZAP storage object, ddt_zap */
DDT_TYPES
-};
+} ddt_type_t;
+
+_Static_assert(DDT_TYPES <= UINT8_MAX,
+ "ddt_type_t must fit in a uint8_t");
+
+/* New and updated entries recieve this type, see ddt_sync_entry() */
+#define DDT_TYPE_DEFAULT (DDT_TYPE_ZAP)
/*
- * DDT classes, in the desired search order (highest replication level first).
+ * DDT storage classes. Each class has a separate storage object for each type.
+ * The value itself is not stored on disk.
+ *
+ * When search for an entry, object classes will be searched in this order.
+ *
+ * Note that DDT_CLASSES is used as the "no class" for new entries that have not
+ * yet been written to a storage object.
*/
-enum ddt_class {
- DDT_CLASS_DITTO = 0,
- DDT_CLASS_DUPLICATE,
- DDT_CLASS_UNIQUE,
+typedef enum {
+ DDT_CLASS_DITTO = 0, /* entry has ditto blocks (obsolete) */
+ DDT_CLASS_DUPLICATE, /* entry has multiple references */
+ DDT_CLASS_UNIQUE, /* entry has a single reference */
DDT_CLASSES
-};
-
-#define DDT_TYPE_CURRENT 0
+} ddt_class_t;
-#define DDT_COMPRESS_BYTEORDER_MASK 0x80
-#define DDT_COMPRESS_FUNCTION_MASK 0x7f
+_Static_assert(DDT_CLASSES < UINT8_MAX,
+ "ddt_class_t must fit in a uint8_t");
/*
- * On-disk ddt entry: key (name) and physical storage (value).
+ * The "key" part of an on-disk entry. This is the unique "name" for a block,
+ * that is, that parts of the block pointer that will always be the same for
+ * the same data.
*/
-typedef struct ddt_key {
+typedef struct {
zio_cksum_t ddk_cksum; /* 256-bit block checksum */
/*
* Encoded with logical & physical size, encryption, and compression,
@@ -76,6 +95,10 @@ typedef struct ddt_key {
uint64_t ddk_prop;
} ddt_key_t;
+/*
+ * Macros for accessing parts of a ddt_key_t. These are similar to their BP_*
+ * counterparts.
+ */
#define DDK_GET_LSIZE(ddk) \
BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
#define DDK_SET_LSIZE(ddk, x) \
@@ -92,18 +115,25 @@ typedef struct ddt_key {
#define DDK_GET_CRYPT(ddk) BF64_GET((ddk)->ddk_prop, 39, 1)
#define DDK_SET_CRYPT(ddk, x) BF64_SET((ddk)->ddk_prop, 39, 1, x)
-#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
-
-#define DDE_GET_NDVAS(dde) (DDK_GET_CRYPT(&dde->dde_key) \
- ? SPA_DVAS_PER_BP - 1 : SPA_DVAS_PER_BP)
-
-typedef struct ddt_phys {
+/*
+ * The "value" part for an on-disk entry. These are the "physical"
+ * characteristics of the stored block, such as its location on disk (DVAs),
+ * birth txg and ref count.
+ *
+ * Note that an entry has an array of four ddt_phys_t, one for each number of
+ * DVAs (copies= property) and another for additional "ditto" copies. Most
+ * users of ddt_phys_t will handle indexing into or counting the phys they
+ * want.
+ */
+typedef struct {
dva_t ddp_dva[SPA_DVAS_PER_BP];
uint64_t ddp_refcnt;
uint64_t ddp_phys_birth;
} ddt_phys_t;
/*
+ * Named indexes into the ddt_phys_t array in each entry.
+ *
* Note, we no longer generate new DDT_PHYS_DITTO-type blocks. However,
* we maintain the ability to free existing dedup-ditto blocks.
*/
@@ -116,99 +146,83 @@ enum ddt_phys_type {
};
/*
- * In-core ddt entry
+ * A "live" entry, holding changes to an entry made this txg, and other data to
+ * support loading, updating and repairing the entry.
*/
-struct ddt_entry {
- ddt_key_t dde_key;
- ddt_phys_t dde_phys[DDT_PHYS_TYPES];
+
+/* State flags for dde_flags */
+#define DDE_FLAG_LOADED (1 << 0) /* entry ready for use */
+
+typedef struct {
+ /* key must be first for ddt_key_compare */
+ ddt_key_t dde_key; /* ddt_tree key */
+ ddt_phys_t dde_phys[DDT_PHYS_TYPES]; /* on-disk data */
+
+ /* in-flight update IOs */
zio_t *dde_lead_zio[DDT_PHYS_TYPES];
+
+ /* copy of data after a repair read, to be rewritten */
struct abd *dde_repair_abd;
- enum ddt_type dde_type;
- enum ddt_class dde_class;
- uint8_t dde_loading;
- uint8_t dde_loaded;
- kcondvar_t dde_cv;
- avl_node_t dde_node;
-};
+
+ /* storage type and class the entry was loaded from */
+ ddt_type_t dde_type;
+ ddt_class_t dde_class;
+
+ uint8_t dde_flags; /* load state flags */
+ kcondvar_t dde_cv; /* signaled when load completes */
+
+ avl_node_t dde_node; /* ddt_tree node */
+} ddt_entry_t;
/*
- * In-core ddt
+ * In-core DDT object. This covers all entries and stats for a the whole pool
+ * for a given checksum type.
*/
-struct ddt {
- kmutex_t ddt_lock;
- avl_tree_t ddt_tree;
- avl_tree_t ddt_repair_tree;
- enum zio_checksum ddt_checksum;
- spa_t *ddt_spa;
- objset_t *ddt_os;
- uint64_t ddt_stat_object;
+typedef struct {
+ kmutex_t ddt_lock; /* protects changes to all fields */
+
+ avl_tree_t ddt_tree; /* "live" (changed) entries this txg */
+
+ avl_tree_t ddt_repair_tree; /* entries being repaired */
+
+ enum zio_checksum ddt_checksum; /* checksum algorithm in use */
+ spa_t *ddt_spa; /* pool this ddt is on */
+ objset_t *ddt_os; /* ddt objset (always MOS) */
+
+ /* per-type/per-class entry store objects */
uint64_t ddt_object[DDT_TYPES][DDT_CLASSES];
+
+ /* object ids for whole-ddt and per-type/per-class stats */
+ uint64_t ddt_stat_object;
+ ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES];
+
+ /* type/class stats by power-2-sized referenced blocks */
ddt_histogram_t ddt_histogram[DDT_TYPES][DDT_CLASSES];
ddt_histogram_t ddt_histogram_cache[DDT_TYPES][DDT_CLASSES];
- ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES];
- avl_node_t ddt_node;
-};
+} ddt_t;
/*
- * In-core and on-disk bookmark for DDT walks
+ * In-core and on-disk bookmark for DDT walks. This is a cursor for ddt_walk(),
+ * and is stable across calls, even if the DDT is updated, the pool is
+ * restarted or loaded on another system, or OpenZFS is upgraded.
*/
-typedef struct ddt_bookmark {
+typedef struct {
uint64_t ddb_class;
uint64_t ddb_type;
uint64_t ddb_checksum;
uint64_t ddb_cursor;
} ddt_bookmark_t;
-/*
- * Ops vector to access a specific DDT object type.
- */
-typedef struct ddt_ops {
- char ddt_op_name[32];
- int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
- boolean_t prehash);
- int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
- int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
- void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
- ddt_entry_t *dde);
- int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
- dmu_tx_t *tx);
- int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
- dmu_tx_t *tx);
- int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde,
- uint64_t *walk);
- int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
-} ddt_ops_t;
-
-#define DDT_NAMELEN 107
-
-extern void ddt_object_name(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, char *name);
-extern int ddt_object_walk(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, uint64_t *walk, ddt_entry_t *dde);
-extern int ddt_object_count(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, uint64_t *count);
-extern int ddt_object_info(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, dmu_object_info_t *);
-extern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz);
-
extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
uint64_t txg);
extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
const ddt_phys_t *ddp, blkptr_t *bp);
-extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
-
extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
extern void ddt_phys_clear(ddt_phys_t *ddp);
extern void ddt_phys_addref(ddt_phys_t *ddp);
extern void ddt_phys_decref(ddt_phys_t *ddp);
-extern void ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp,
- uint64_t txg);
extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
-extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
-
-extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
@@ -220,9 +234,6 @@ extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
-extern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len);
-extern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len);
-
extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
extern void ddt_enter(ddt_t *ddt);
extern void ddt_exit(ddt_t *ddt);
@@ -232,26 +243,22 @@ extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
-extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
+extern boolean_t ddt_class_contains(spa_t *spa, ddt_class_t max_class,
const blkptr_t *bp);
extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);
-extern int ddt_entry_compare(const void *x1, const void *x2);
+extern int ddt_key_compare(const void *x1, const void *x2);
extern void ddt_create(spa_t *spa);
extern int ddt_load(spa_t *spa);
extern void ddt_unload(spa_t *spa);
extern void ddt_sync(spa_t *spa, uint64_t txg);
extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
-extern int ddt_object_update(ddt_t *ddt, enum ddt_type type,
- enum ddt_class clazz, ddt_entry_t *dde, dmu_tx_t *tx);
extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp);
-extern const ddt_ops_t ddt_zap_ops;
-
#ifdef __cplusplus
}
#endif
diff --git a/sys/contrib/openzfs/include/sys/ddt_impl.h b/sys/contrib/openzfs/include/sys/ddt_impl.h
new file mode 100644
index 000000000000..52b927b7519d
--- /dev/null
+++ b/sys/contrib/openzfs/include/sys/ddt_impl.h
@@ -0,0 +1,95 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2023, Klara Inc.
+ */
+
+#ifndef _SYS_DDT_IMPL_H
+#define _SYS_DDT_IMPL_H
+
+#include <sys/ddt.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Ops vector to access a specific DDT object type.
+ */
+typedef struct {
+ char ddt_op_name[32];
+ int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
+ boolean_t prehash);
+ int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
+ int (*ddt_op_lookup)(objset_t *os, uint64_t object,
+ const ddt_key_t *ddk, ddt_phys_t *phys, size_t psize);
+ int (*ddt_op_contains)(objset_t *os, uint64_t object,
+ const ddt_key_t *ddk);
+ void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
+ const ddt_key_t *ddk);
+ int (*ddt_op_update)(objset_t *os, uint64_t object,
+ const ddt_key_t *ddk, const ddt_phys_t *phys, size_t psize,
+ dmu_tx_t *tx);
+ int (*ddt_op_remove)(objset_t *os, uint64_t object,
+ const ddt_key_t *ddk, dmu_tx_t *tx);
+ int (*ddt_op_walk)(objset_t *os, uint64_t object, uint64_t *walk,
+ ddt_key_t *ddk, ddt_phys_t *phys, size_t psize);
+ int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
+} ddt_ops_t;
+
+extern const ddt_ops_t ddt_zap_ops;
+
+extern void ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg);
+
+/*
+ * These are only exposed so that zdb can access them. Try not to use them
+ * outside of the DDT implementation proper, and if you do, consider moving
+ * them up.
+ */
+
+/*
+ * Enough room to expand DMU_POOL_DDT format for all possible DDT
+ * checksum/class/type combinations.
+ */
+#define DDT_NAMELEN 32
+
+extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
+
+extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
+
+extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
+
+extern void ddt_object_name(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
+ char *name);
+extern int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
+ uint64_t *walk, ddt_entry_t *dde);
+extern int ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
+ uint64_t *count);
+extern int ddt_object_info(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
+ dmu_object_info_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DDT_H */
diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h
index 08099cd4fa29..cada3c841037 100644
--- a/sys/contrib/openzfs/include/sys/spa.h
+++ b/sys/contrib/openzfs/include/sys/spa.h
@@ -62,8 +62,6 @@ typedef struct metaslab_class metaslab_class_t;
typedef struct zio zio_t;
typedef struct zilog zilog_t;
typedef struct spa_aux_vdev spa_aux_vdev_t;
-typedef struct ddt ddt_t;
-typedef struct ddt_entry ddt_entry_t;
typedef struct zbookmark_phys zbookmark_phys_t;
typedef struct zbookmark_err_phys zbookmark_err_phys_t;
diff --git a/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c b/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c
index 401151b1afb5..86eef3255bc2 100644
--- a/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c
+++ b/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c
@@ -273,6 +273,16 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name)
vtoc->efi_parts[0].p_start = start_block;
vtoc->efi_parts[0].p_size = slice_size;
+ if (vtoc->efi_parts[0].p_size * vtoc->efi_lbasize < SPA_MINDEVSIZE) {
+ (void) close(fd);
+ efi_free(vtoc);
+
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+ "label '%s': partition would be less than the minimum "
+ "device size (64M)"), path);
+ return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+ }
+
/*
* Why we use V_USR: V_BACKUP confuses users, and is considered
* disposable by some EFI utilities (since EFI doesn't have a backup
diff --git a/sys/contrib/openzfs/lib/libzpool/Makefile.am b/sys/contrib/openzfs/lib/libzpool/Makefile.am
index 3c986a707d2d..42f3404db5a9 100644
--- a/sys/contrib/openzfs/lib/libzpool/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzpool/Makefile.am
@@ -79,6 +79,7 @@ nodist_libzpool_la_SOURCES = \
module/zfs/dbuf.c \
module/zfs/dbuf_stats.c \
module/zfs/ddt.c \
+ module/zfs/ddt_stats.c \
module/zfs/ddt_zap.c \
module/zfs/dmu.c \
module/zfs/dmu_diff.c \
diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in
index fb22bfe733c0..7e08374fa2b9 100644
--- a/sys/contrib/openzfs/module/Kbuild.in
+++ b/sys/contrib/openzfs/module/Kbuild.in
@@ -326,6 +326,7 @@ ZFS_OBJS := \
dbuf.o \
dbuf_stats.o \
ddt.o \
+ ddt_stats.o \
ddt_zap.o \
dmu.o \
dmu_diff.o \
diff --git a/sys/contrib/openzfs/module/Makefile.bsd b/sys/contrib/openzfs/module/Makefile.bsd
index 0c4d8bfe1159..e9ad69fc50a2 100644
--- a/sys/contrib/openzfs/module/Makefile.bsd
+++ b/sys/contrib/openzfs/module/Makefile.bsd
@@ -252,6 +252,7 @@ SRCS+= abd.c \
bqueue.c \
dataset_kstats.c \
ddt.c \
+ ddt_stats.c \
ddt_zap.c \
dmu.c \
dmu_diff.c \
@@ -420,6 +421,7 @@ CFLAGS.gcc+= -Wno-pointer-to-int-cast
CFLAGS.abd.c= -Wno-cast-qual
CFLAGS.ddt.c= -Wno-cast-qual
+CFLAGS.ddt_zap.c= -Wno-cast-qual
CFLAGS.dmu.c= -Wno-cast-qual
CFLAGS.dmu_traverse.c= -Wno-cast-qual
CFLAGS.dnode.c= ${NO_WUNUSED_BUT_SET_VARIABLE}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
index a1fd3c9856cc..48abbc010917 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
@@ -1921,8 +1921,8 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH &&
zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH_X)
trim = B_TRUE;
- zfs_acl_chmod(vap->va_mode, acl_ids->z_mode, B_FALSE,
- trim, acl_ids->z_aclp);
+ zfs_acl_chmod(S_ISDIR(vap->va_mode), acl_ids->z_mode,
+ B_FALSE, trim, acl_ids->z_aclp);
}
}
diff --git a/sys/contrib/openzfs/module/zfs/ddt.c b/sys/contrib/openzfs/module/zfs/ddt.c
index 1fb198219904..de8640e58a2c 100644
--- a/sys/contrib/openzfs/module/zfs/ddt.c
+++ b/sys/contrib/openzfs/module/zfs/ddt.c
@@ -23,6 +23,7 @@
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2022 by Pawel Jakub Dawidek
+ * Copyright (c) 2023, Klara Inc.
*/
#include <sys/zfs_context.h>
@@ -30,15 +31,119 @@
#include <sys/spa_impl.h>
#include <sys/zio.h>
#include <sys/ddt.h>
+#include <sys/ddt_impl.h>
#include <sys/zap.h>
#include <sys/dmu_tx.h>
#include <sys/arc.h>
#include <sys/dsl_pool.h>
#include <sys/zio_checksum.h>
-#include <sys/zio_compress.h>
#include <sys/dsl_scan.h>
#include <sys/abd.h>
+/*
+ * # DDT: Deduplication tables
+ *
+ * The dedup subsystem provides block-level deduplication. When enabled, blocks
+ * to be written will have the dedup (D) bit set, which causes them to be
+ * tracked in a "dedup table", or DDT. If a block has been seen before (exists
+ * in the DDT), instead of being written, it will instead be made to reference
+ * the existing on-disk data, and a refcount bumped in the DDT instead.
+ *
+ * ## Dedup tables and entries
+ *
+ * Conceptually, a DDT is a dictionary or map. Each entry has a "key"
+ * (ddt_key_t) made up a block's checksum and certian properties, and a "value"
+ * (one or more ddt_phys_t) containing valid DVAs for the block's data, birth
+ * time and refcount. Together these are enough to track references to a
+ * specific block, to build a valid block pointer to reference that block (for
+ * freeing, scrubbing, etc), and to fill a new block pointer with the missing
+ * pieces to make it seem like it was written.
+ *
+ * There's a single DDT (ddt_t) for each checksum type, held in spa_ddt[].
+ * Within each DDT, there can be multiple storage "types" (ddt_type_t, on-disk
+ * object data formats, each with their own implementations) and "classes"
+ * (ddt_class_t, instance of a storage type object, for entries with a specific
+ * characteristic). An entry (key) will only ever exist on one of these objects
+ * at any given time, but may be moved from one to another if their type or
+ * class changes.
+ *
+ * The DDT is driven by the write IO pipeline (zio_ddt_write()). When a block
+ * is to be written, before DVAs have been allocated, ddt_lookup() is called to
+ * see if the block has been seen before. If its not found, the write proceeds
+ * as normal, and after it succeeds, a new entry is created. If it is found, we
+ * fill the BP with the DVAs from the entry, increment the refcount and cause
+ * the write IO to return immediately.
+ *
+ * Each ddt_phys_t slot in the entry represents a separate dedup block for the
+ * same content/checksum. The slot is selected based on the zp_copies parameter
+ * the block is written with, that is, the number of DVAs in the block. The
+ * "ditto" slot (DDT_PHYS_DITTO) used to be used for now-removed "dedupditto"
+ * feature. These are no longer written, and will be freed if encountered on
+ * old pools.
+ *
+ * ## Lifetime of an entry
+ *
+ * A DDT can be enormous, and typically is not held in memory all at once.
+ * Instead, the changes to an entry are tracked in memory, and written down to
+ * disk at the end of each txg.
+ *
+ * A "live" in-memory entry (ddt_entry_t) is a node on the live tree
+ * (ddt_tree). At the start of a txg, ddt_tree is empty. When an entry is
+ * required for IO, ddt_lookup() is called. If an entry already exists on
+ * ddt_tree, it is returned. Otherwise, a new one is created, and the
+ * type/class objects for the DDT are searched for that key. If its found, its
+ * value is copied into the live entry. If not, an empty entry is created.
+ *
+ * The live entry will be modified during the txg, usually by modifying the
+ * refcount, but sometimes by adding or updating DVAs. At the end of the txg
+ * (during spa_sync()), type and class are recalculated for entry (see
+ * ddt_sync_entry()), and the entry is written to the appropriate storage
+ * object and (if necessary), removed from an old one. ddt_tree is cleared and
+ * the next txg can start.
+ *
+ * ## Repair IO
+ *
+ * If a read on a dedup block fails, but there are other copies of the block in
+ * the other ddt_phys_t slots, reads will be issued for those instead
+ * (zio_ddt_read_start()). If one of those succeeds, the read is returned to
+ * the caller, and a copy is stashed on the entry's dde_repair_abd.
+ *
+ * During the end-of-txg sync, any entries with a dde_repair_abd get a
+ * "rewrite" write issued for the original block pointer, with the data read
+ * from the alternate block. If the block is actually damaged, this will invoke
+ * the pool's "self-healing" mechanism, and repair the block.
+ *
+ * ## Scanning (scrub/resilver)
+ *
+ * If dedup is active, the scrub machinery will walk the dedup table first, and
+ * scrub all blocks with refcnt > 1 first. After that it will move on to the
+ * regular top-down scrub, and exclude the refcnt > 1 blocks when it sees them.
+ * In this way, heavily deduplicated blocks are only scrubbed once. See the
+ * commentary on dsl_scan_ddt() for more details.
+ *
+ * Walking the DDT is done via ddt_walk(). The current position is stored in a
+ * ddt_bookmark_t, which represents a stable position in the storage object.
+ * This bookmark is stored by the scan machinery, and must reference the same
+ * position on the object even if the object changes, the pool is exported, or
+ * OpenZFS is upgraded.
+ *
+ * ## Interaction with block cloning
+ *
+ * If block cloning and dedup are both enabled on a pool, BRT will look for the
+ * dedup bit on an incoming block pointer. If set, it will call into the DDT
+ * (ddt_addref()) to add a reference to the block, instead of adding a
+ * reference to the BRT. See brt_pending_apply().
+ */
+
+/*
+ * These are the only checksums valid for dedup. They must match the list
+ * from dedup_table in zfs_prop.c
+ */
+#define DDT_CHECKSUM_VALID(c) \
+ (c == ZIO_CHECKSUM_SHA256 || c == ZIO_CHECKSUM_SHA512 || \
+ c == ZIO_CHECKSUM_SKEIN || c == ZIO_CHECKSUM_EDONR || \
+ c == ZIO_CHECKSUM_BLAKE3)
+
static kmem_cache_t *ddt_cache;
static kmem_cache_t *ddt_entry_cache;
@@ -58,7 +163,7 @@ static const char *const ddt_class_name[DDT_CLASSES] = {
};
static void
-ddt_object_create(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
+ddt_object_create(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
dmu_tx_t *tx)
{
spa_t *spa = ddt->ddt_spa;
@@ -70,20 +175,20 @@ ddt_object_create(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
ddt_object_name(ddt, type, class, name);
- ASSERT(*objectp == 0);
- VERIFY(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash) == 0);
- ASSERT(*objectp != 0);
+ ASSERT3U(*objectp, ==, 0);
+ VERIFY0(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash));
+ ASSERT3U(*objectp, !=, 0);
- VERIFY(zap_add(os, DMU_POOL_DIRECTORY_OBJECT, name,
- sizeof (uint64_t), 1, objectp, tx) == 0);
+ VERIFY0(zap_add(os, DMU_POOL_DIRECTORY_OBJECT, name,
+ sizeof (uint64_t), 1, objectp, tx));
- VERIFY(zap_add(os, spa->spa_ddt_stat_object, name,
+ VERIFY0(zap_add(os, spa->spa_ddt_stat_object, name,
sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t),
- &ddt->ddt_histogram[type][class], tx) == 0);
+ &ddt->ddt_histogram[type][class], tx));
}
static void
-ddt_object_destroy(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
+ddt_object_destroy(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
dmu_tx_t *tx)
{
spa_t *spa = ddt->ddt_spa;
@@ -94,19 +199,20 @@ ddt_object_destroy(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
ddt_object_name(ddt, type, class, name);
- ASSERT(*objectp != 0);
+ ASSERT3U(*objectp, !=, 0);
ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class]));
- VERIFY(ddt_object_count(ddt, type, class, &count) == 0 && count == 0);
- VERIFY(zap_remove(os, DMU_POOL_DIRECTORY_OBJECT, name, tx) == 0);
- VERIFY(zap_remove(os, spa->spa_ddt_stat_object, name, tx) == 0);
- VERIFY(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx) == 0);
+ VERIFY0(ddt_object_count(ddt, type, class, &count));
+ VERIFY0(count);
+ VERIFY0(zap_remove(os, DMU_POOL_DIRECTORY_OBJECT, name, tx));
+ VERIFY0(zap_remove(os, spa->spa_ddt_stat_object, name, tx));
+ VERIFY0(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx));
memset(&ddt->ddt_object_stats[type][class], 0, sizeof (ddt_object_t));
*objectp = 0;
}
static int
-ddt_object_load(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
+ddt_object_load(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
{
ddt_object_t *ddo = &ddt->ddt_object_stats[type][class];
dmu_object_info_t doi;
@@ -146,7 +252,7 @@ ddt_object_load(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
}
static void
-ddt_object_sync(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
+ddt_object_sync(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
dmu_tx_t *tx)
{
ddt_object_t *ddo = &ddt->ddt_object_stats[type][class];
@@ -156,75 +262,95 @@ ddt_object_sync(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
ddt_object_name(ddt, type, class, name);
- VERIFY(zap_update(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,
+ VERIFY0(zap_update(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,
sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t),
- &ddt->ddt_histogram[type][class], tx) == 0);
+ &ddt->ddt_histogram[type][class], tx));
/*
* Cache DDT statistics; this is the only time they'll change.
*/
- VERIFY(ddt_object_info(ddt, type, class, &doi) == 0);
- VERIFY(ddt_object_count(ddt, type, class, &count) == 0);
+ VERIFY0(ddt_object_info(ddt, type, class, &doi));
+ VERIFY0(ddt_object_count(ddt, type, class, &count));
ddo->ddo_count = count;
ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9;
ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size;
}
+static boolean_t
+ddt_object_exists(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
+{
+ return (!!ddt->ddt_object[type][class]);
+}
+
static int
-ddt_object_lookup(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
+ddt_object_lookup(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ddt_entry_t *dde)
{
if (!ddt_object_exists(ddt, type, class))
return (SET_ERROR(ENOENT));
return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os,
- ddt->ddt_object[type][class], dde));
+ ddt->ddt_object[type][class], &dde->dde_key,
+ dde->dde_phys, sizeof (dde->dde_phys)));
+}
+
+static int
+ddt_object_contains(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
+ const ddt_key_t *ddk)
+{
+ if (!ddt_object_exists(ddt, type, class))
+ return (SET_ERROR(ENOENT));
+
+ return (ddt_ops[type]->ddt_op_contains(ddt->ddt_os,
+ ddt->ddt_object[type][class], ddk));
}
static void
-ddt_object_prefetch(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
- ddt_entry_t *dde)
+ddt_object_prefetch(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
+ const ddt_key_t *ddk)
{
if (!ddt_object_exists(ddt, type, class))
return;
ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os,
- ddt->ddt_object[type][class], dde);
+ ddt->ddt_object[type][class], ddk);
}
-int
-ddt_object_update(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
+static int
+ddt_object_update(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ddt_entry_t *dde, dmu_tx_t *tx)
{
ASSERT(ddt_object_exists(ddt, type, class));
return (ddt_ops[type]->ddt_op_update(ddt->ddt_os,
- ddt->ddt_object[type][class], dde, tx));
+ ddt->ddt_object[type][class], &dde->dde_key, dde->dde_phys,
+ sizeof (dde->dde_phys), tx));
}
static int
-ddt_object_remove(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
- ddt_entry_t *dde, dmu_tx_t *tx)
+ddt_object_remove(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
+ const ddt_key_t *ddk, dmu_tx_t *tx)
{
ASSERT(ddt_object_exists(ddt, type, class));
return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os,
- ddt->ddt_object[type][class], dde, tx));
+ ddt->ddt_object[type][class], ddk, tx));
}
int
-ddt_object_walk(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
+ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
uint64_t *walk, ddt_entry_t *dde)
{
ASSERT(ddt_object_exists(ddt, type, class));
return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os,
- ddt->ddt_object[type][class], dde, walk));
+ ddt->ddt_object[type][class], walk, &dde->dde_key,
+ dde->dde_phys, sizeof (dde->dde_phys)));
}
int
-ddt_object_count(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
+ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
uint64_t *count)
{
ASSERT(ddt_object_exists(ddt, type, class));
@@ -234,7 +360,7 @@ ddt_object_count(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
}
int
-ddt_object_info(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
+ddt_object_info(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
dmu_object_info_t *doi)
{
if (!ddt_object_exists(ddt, type, class))
@@ -244,14 +370,8 @@ ddt_object_info(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
doi));
}
-boolean_t
-ddt_object_exists(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
-{
- return (!!ddt->ddt_object[type][class]);
-}
-
void
-ddt_object_name(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
+ddt_object_name(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
char *name)
{
(void) snprintf(name, DDT_NAMELEN, DMU_POOL_DDT,
@@ -262,7 +382,7 @@ ddt_object_name(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
void
ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg)
{
- ASSERT(txg != 0);
+ ASSERT3U(txg, !=, 0);
for (int d = 0; d < SPA_DVAS_PER_BP; d++)
bp->blk_dva[d] = ddp->ddp_dva[d];
@@ -313,7 +433,7 @@ ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp)
void
ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp)
{
- ASSERT(ddp->ddp_phys_birth == 0);
+ ASSERT0(ddp->ddp_phys_birth);
for (int d = 0; d < SPA_DVAS_PER_BP; d++)
ddp->ddp_dva[d] = bp->blk_dva[d];
@@ -336,12 +456,12 @@ void
ddt_phys_decref(ddt_phys_t *ddp)
{
if (ddp) {
- ASSERT(ddp->ddp_refcnt > 0);
+ ASSERT3U(ddp->ddp_refcnt, >, 0);
ddp->ddp_refcnt--;
}
}
-void
+static void
ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp, uint64_t txg)
{
blkptr_t blk;
@@ -382,221 +502,10 @@ ddt_phys_total_refcnt(const ddt_entry_t *dde)
return (refcnt);
}
-static void
-ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
-{
- spa_t *spa = ddt->ddt_spa;
- ddt_phys_t *ddp = dde->dde_phys;
- ddt_key_t *ddk = &dde->dde_key;
- uint64_t lsize = DDK_GET_LSIZE(ddk);
- uint64_t psize = DDK_GET_PSIZE(ddk);
-
- memset(dds, 0, sizeof (*dds));
-
- for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
- uint64_t dsize = 0;
- uint64_t refcnt = ddp->ddp_refcnt;
-
- if (ddp->ddp_phys_birth == 0)
- continue;
-
- for (int d = 0; d < DDE_GET_NDVAS(dde); d++)
- dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]);
-
- dds->dds_blocks += 1;
- dds->dds_lsize += lsize;
- dds->dds_psize += psize;
- dds->dds_dsize += dsize;
-
- dds->dds_ref_blocks += refcnt;
- dds->dds_ref_lsize += lsize * refcnt;
- dds->dds_ref_psize += psize * refcnt;
- dds->dds_ref_dsize += dsize * refcnt;
- }
-}
-
-void
-ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg)
-{
- const uint64_t *s = (const uint64_t *)src;
- uint64_t *d = (uint64_t *)dst;
- uint64_t *d_end = (uint64_t *)(dst + 1);
-
- ASSERT(neg == 0 || neg == -1ULL); /* add or subtract */
-
- for (int i = 0; i < d_end - d; i++)
- d[i] += (s[i] ^ neg) - neg;
-}
-
-static void
-ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg)
-{
- ddt_stat_t dds;
- ddt_histogram_t *ddh;
- int bucket;
-
- ddt_stat_generate(ddt, dde, &dds);
-
- bucket = highbit64(dds.dds_ref_blocks) - 1;
- ASSERT(bucket >= 0);
-
- ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class];
-
- ddt_stat_add(&ddh->ddh_stat[bucket], &dds, neg);
-}
-
-void
-ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src)
-{
- for (int h = 0; h < 64; h++)
- ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0);
-}
-
-void
-ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh)
-{
- memset(dds, 0, sizeof (*dds));
-
- for (int h = 0; h < 64; h++)
- ddt_stat_add(dds, &ddh->ddh_stat[h], 0);
-}
-
-boolean_t
-ddt_histogram_empty(const ddt_histogram_t *ddh)
-{
- const uint64_t *s = (const uint64_t *)ddh;
- const uint64_t *s_end = (const uint64_t *)(ddh + 1);
-
- while (s < s_end)
- if (*s++ != 0)
- return (B_FALSE);
-
- return (B_TRUE);
-}
-
-void
-ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total)
-{
- /* Sum the statistics we cached in ddt_object_sync(). */
- for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
- ddt_t *ddt = spa->spa_ddt[c];
- for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
- for (enum ddt_class class = 0; class < DDT_CLASSES;
- class++) {
- ddt_object_t *ddo =
- &ddt->ddt_object_stats[type][class];
- ddo_total->ddo_count += ddo->ddo_count;
- ddo_total->ddo_dspace += ddo->ddo_dspace;
- ddo_total->ddo_mspace += ddo->ddo_mspace;
- }
- }
- }
-
- /* ... and compute the averages. */
- if (ddo_total->ddo_count != 0) {
- ddo_total->ddo_dspace /= ddo_total->ddo_count;
- ddo_total->ddo_mspace /= ddo_total->ddo_count;
- }
-}
-
-void
-ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh)
-{
- for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
- ddt_t *ddt = spa->spa_ddt[c];
- for (enum ddt_type type = 0; type < DDT_TYPES && ddt; type++) {
- for (enum ddt_class class = 0; class < DDT_CLASSES;
- class++) {
- ddt_histogram_add(ddh,
- &ddt->ddt_histogram_cache[type][class]);
- }
- }
- }
-}
-
-void
-ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total)
-{
- ddt_histogram_t *ddh_total;
-
- ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
- ddt_get_dedup_histogram(spa, ddh_total);
- ddt_histogram_stat(dds_total, ddh_total);
- kmem_free(ddh_total, sizeof (ddt_histogram_t));
-}
-
-uint64_t
-ddt_get_dedup_dspace(spa_t *spa)
-{
- ddt_stat_t dds_total;
-
- if (spa->spa_dedup_dspace != ~0ULL)
- return (spa->spa_dedup_dspace);
-
- memset(&dds_total, 0, sizeof (ddt_stat_t));
-
- /* Calculate and cache the stats */
- ddt_get_dedup_stats(spa, &dds_total);
- spa->spa_dedup_dspace = dds_total.dds_ref_dsize - dds_total.dds_dsize;
- return (spa->spa_dedup_dspace);
-}
-
-uint64_t
-ddt_get_pool_dedup_ratio(spa_t *spa)
-{
- ddt_stat_t dds_total = { 0 };
-
- ddt_get_dedup_stats(spa, &dds_total);
- if (dds_total.dds_dsize == 0)
- return (100);
-
- return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize);
-}
-
-size_t
-ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len)
-{
- uchar_t *version = dst++;
- int cpfunc = ZIO_COMPRESS_ZLE;
- zio_compress_info_t *ci = &zio_compress_table[cpfunc];
- size_t c_len;
-
- ASSERT(d_len >= s_len + 1); /* no compression plus version byte */
-
- c_len = ci->ci_compress(src, dst, s_len, d_len - 1, ci->ci_level);
-
- if (c_len == s_len) {
- cpfunc = ZIO_COMPRESS_OFF;
- memcpy(dst, src, s_len);
- }
-
- *version = cpfunc;
- if (ZFS_HOST_BYTEORDER)
- *version |= DDT_COMPRESS_BYTEORDER_MASK;
-
- return (c_len + 1);
-}
-
-void
-ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len)
-{
- uchar_t version = *src++;
- int cpfunc = version & DDT_COMPRESS_FUNCTION_MASK;
- zio_compress_info_t *ci = &zio_compress_table[cpfunc];
-
- if (ci->ci_decompress != NULL)
- (void) ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level);
- else
- memcpy(dst, src, d_len);
-
- if (((version & DDT_COMPRESS_BYTEORDER_MASK) != 0) !=
- (ZFS_HOST_BYTEORDER != 0))
- byteswap_uint64_array(dst, d_len);
-}
-
ddt_t *
ddt_select(spa_t *spa, const blkptr_t *bp)
{
+ ASSERT(DDT_CHECKSUM_VALID(BP_GET_CHECKSUM(bp)));
return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]);
}
@@ -645,10 +554,10 @@ ddt_alloc(const ddt_key_t *ddk)
static void
ddt_free(ddt_entry_t *dde)
{
- ASSERT(!dde->dde_loading);
+ ASSERT(dde->dde_flags & DDE_FLAG_LOADED);
for (int p = 0; p < DDT_PHYS_TYPES; p++)
- ASSERT(dde->dde_lead_zio[p] == NULL);
+ ASSERT3P(dde->dde_lead_zio[p], ==, NULL);
if (dde->dde_repair_abd != NULL)
abd_free(dde->dde_repair_abd);
@@ -669,36 +578,48 @@ ddt_remove(ddt_t *ddt, ddt_entry_t *dde)
ddt_entry_t *
ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add)
{
- ddt_entry_t *dde, dde_search;
- enum ddt_type type;
- enum ddt_class class;
+ ddt_key_t search;
+ ddt_entry_t *dde;
+ ddt_type_t type;
+ ddt_class_t class;
avl_index_t where;
int error;
ASSERT(MUTEX_HELD(&ddt->ddt_lock));
- ddt_key_fill(&dde_search.dde_key, bp);
+ ddt_key_fill(&search, bp);
- dde = avl_find(&ddt->ddt_tree, &dde_search, &where);
- if (dde == NULL) {
- if (!add)
- return (NULL);
- dde = ddt_alloc(&dde_search.dde_key);
- avl_insert(&ddt->ddt_tree, dde, where);
- }
+ /* Find an existing live entry */
+ dde = avl_find(&ddt->ddt_tree, &search, &where);
+ if (dde != NULL) {
+ /* Found it. If it's already loaded, we can just return it. */
+ if (dde->dde_flags & DDE_FLAG_LOADED)
+ return (dde);
- while (dde->dde_loading)
- cv_wait(&dde->dde_cv, &ddt->ddt_lock);
+ /* Someone else is loading it, wait for it. */
+ while (!(dde->dde_flags & DDE_FLAG_LOADED))
+ cv_wait(&dde->dde_cv, &ddt->ddt_lock);
- if (dde->dde_loaded)
return (dde);
+ }
+
+ /* Not found. */
+ if (!add)
+ return (NULL);
- dde->dde_loading = B_TRUE;
+ /* Time to make a new entry. */
+ dde = ddt_alloc(&search);
+ avl_insert(&ddt->ddt_tree, dde, where);
+ /*
+ * ddt_tree is now stable, so unlock and let everyone else keep moving.
+ * Anyone landing on this entry will find it without DDE_FLAG_LOADED,
+ * and go to sleep waiting for it above.
+ */
ddt_exit(ddt);
+ /* Search all store objects for the entry. */
error = ENOENT;
-
for (type = 0; type < DDT_TYPES; type++) {
for (class = 0; class < DDT_CLASSES; class++) {
error = ddt_object_lookup(ddt, type, class, dde);
@@ -713,17 +634,16 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add)
ddt_enter(ddt);
- ASSERT(dde->dde_loaded == B_FALSE);
- ASSERT(dde->dde_loading == B_TRUE);
+ ASSERT(!(dde->dde_flags & DDE_FLAG_LOADED));
dde->dde_type = type; /* will be DDT_TYPES if no entry found */
dde->dde_class = class; /* will be DDT_CLASSES if no entry found */
- dde->dde_loaded = B_TRUE;
- dde->dde_loading = B_FALSE;
if (error == 0)
ddt_stat_update(ddt, dde, -1ULL);
+ /* Entry loaded, everyone can proceed now */
+ dde->dde_flags |= DDE_FLAG_LOADED;
cv_broadcast(&dde->dde_cv);
return (dde);
@@ -733,7 +653,7 @@ void
ddt_prefetch(spa_t *spa, const blkptr_t *bp)
{
ddt_t *ddt;
- ddt_entry_t dde;
+ ddt_key_t ddk;
if (!zfs_dedup_prefetch || bp == NULL || !BP_GET_DEDUP(bp))
return;
@@ -744,17 +664,18 @@ ddt_prefetch(spa_t *spa, const blkptr_t *bp)
* Thus no locking is required as the DDT can't disappear on us.
*/
ddt = ddt_select(spa, bp);
- ddt_key_fill(&dde.dde_key, bp);
+ ddt_key_fill(&ddk, bp);
- for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
- for (enum ddt_class class = 0; class < DDT_CLASSES; class++) {
- ddt_object_prefetch(ddt, type, class, &dde);
+ for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
+ for (ddt_class_t class = 0; class < DDT_CLASSES; class++) {
+ ddt_object_prefetch(ddt, type, class, &ddk);
}
}
}
/*
- * Opaque struct used for ddt_key comparison
+ * Key comparison. Any struct wanting to make use of this function must have
+ * the key as the first element.
*/
#define DDT_KEY_CMP_LEN (sizeof (ddt_key_t) / sizeof (uint16_t))
@@ -763,12 +684,10 @@ typedef struct ddt_key_cmp {
} ddt_key_cmp_t;
int
-ddt_entry_compare(const void *x1, const void *x2)
+ddt_key_compare(const void *x1, const void *x2)
{
- const ddt_entry_t *dde1 = x1;
- const ddt_entry_t *dde2 = x2;
- const ddt_key_cmp_t *k1 = (const ddt_key_cmp_t *)&dde1->dde_key;
- const ddt_key_cmp_t *k2 = (const ddt_key_cmp_t *)&dde2->dde_key;
+ const ddt_key_cmp_t *k1 = (const ddt_key_cmp_t *)x1;
+ const ddt_key_cmp_t *k2 = (const ddt_key_cmp_t *)x2;
int32_t cmp = 0;
for (int i = 0; i < DDT_KEY_CMP_LEN; i++) {
@@ -789,9 +708,9 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c)
memset(ddt, 0, sizeof (ddt_t));
mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL);
- avl_create(&ddt->ddt_tree, ddt_entry_compare,
+ avl_create(&ddt->ddt_tree, ddt_key_compare,
sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node));
- avl_create(&ddt->ddt_repair_tree, ddt_entry_compare,
+ avl_create(&ddt->ddt_repair_tree, ddt_key_compare,
sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node));
ddt->ddt_checksum = c;
ddt->ddt_spa = spa;
@@ -803,8 +722,8 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c)
static void
ddt_table_free(ddt_t *ddt)
{
- ASSERT(avl_numnodes(&ddt->ddt_tree) == 0);
- ASSERT(avl_numnodes(&ddt->ddt_repair_tree) == 0);
+ ASSERT0(avl_numnodes(&ddt->ddt_tree));
+ ASSERT0(avl_numnodes(&ddt->ddt_repair_tree));
avl_destroy(&ddt->ddt_tree);
avl_destroy(&ddt->ddt_repair_tree);
mutex_destroy(&ddt->ddt_lock);
@@ -816,8 +735,10 @@ ddt_create(spa_t *spa)
{
spa->spa_dedup_checksum = ZIO_DEDUPCHECKSUM;
- for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++)
- spa->spa_ddt[c] = ddt_table_alloc(spa, c);
+ for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
+ if (DDT_CHECKSUM_VALID(c))
+ spa->spa_ddt[c] = ddt_table_alloc(spa, c);
+ }
}
int
@@ -835,9 +756,12 @@ ddt_load(spa_t *spa)
return (error == ENOENT ? 0 : error);
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
+ if (!DDT_CHECKSUM_VALID(c))
+ continue;
+
ddt_t *ddt = spa->spa_ddt[c];
- for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
- for (enum ddt_class class = 0; class < DDT_CLASSES;
+ for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
+ for (ddt_class_t class = 0; class < DDT_CLASSES;
class++) {
error = ddt_object_load(ddt, type, class);
if (error != 0 && error != ENOENT)
@@ -868,10 +792,10 @@ ddt_unload(spa_t *spa)
}
boolean_t
-ddt_class_contains(spa_t *spa, enum ddt_class max_class, const blkptr_t *bp)
+ddt_class_contains(spa_t *spa, ddt_class_t max_class, const blkptr_t *bp)
{
ddt_t *ddt;
- ddt_entry_t *dde;
+ ddt_key_t ddk;
if (!BP_GET_DEDUP(bp))
return (B_FALSE);
@@ -880,20 +804,16 @@ ddt_class_contains(spa_t *spa, enum ddt_class max_class, const blkptr_t *bp)
return (B_TRUE);
ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)];
- dde = kmem_cache_alloc(ddt_entry_cache, KM_SLEEP);
- ddt_key_fill(&(dde->dde_key), bp);
+ ddt_key_fill(&ddk, bp);
- for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
- for (enum ddt_class class = 0; class <= max_class; class++) {
- if (ddt_object_lookup(ddt, type, class, dde) == 0) {
- kmem_cache_free(ddt_entry_cache, dde);
+ for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
+ for (ddt_class_t class = 0; class <= max_class; class++) {
+ if (ddt_object_contains(ddt, type, class, &ddk) == 0)
return (B_TRUE);
- }
}
}
- kmem_cache_free(ddt_entry_cache, dde);
return (B_FALSE);
}
@@ -907,8 +827,8 @@ ddt_repair_start(ddt_t *ddt, const blkptr_t *bp)
dde = ddt_alloc(&ddk);
- for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
- for (enum ddt_class class = 0; class < DDT_CLASSES; class++) {
+ for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
+ for (ddt_class_t class = 0; class < DDT_CLASSES; class++) {
/*
* We can only do repair if there are multiple copies
* of the block. For anything in the UNIQUE class,
@@ -1007,19 +927,18 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
dsl_pool_t *dp = ddt->ddt_spa->spa_dsl_pool;
ddt_phys_t *ddp = dde->dde_phys;
ddt_key_t *ddk = &dde->dde_key;
- enum ddt_type otype = dde->dde_type;
- enum ddt_type ntype = DDT_TYPE_CURRENT;
- enum ddt_class oclass = dde->dde_class;
- enum ddt_class nclass;
+ ddt_type_t otype = dde->dde_type;
+ ddt_type_t ntype = DDT_TYPE_DEFAULT;
+ ddt_class_t oclass = dde->dde_class;
+ ddt_class_t nclass;
uint64_t total_refcnt = 0;
- ASSERT(dde->dde_loaded);
- ASSERT(!dde->dde_loading);
+ ASSERT(dde->dde_flags & DDE_FLAG_LOADED);
for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
- ASSERT(dde->dde_lead_zio[p] == NULL);
+ ASSERT3P(dde->dde_lead_zio[p], ==, NULL);
if (ddp->ddp_phys_birth == 0) {
- ASSERT(ddp->ddp_refcnt == 0);
+ ASSERT0(ddp->ddp_refcnt);
continue;
}
if (p == DDT_PHYS_DITTO) {
@@ -1044,8 +963,9 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
if (otype != DDT_TYPES &&
(otype != ntype || oclass != nclass || total_refcnt == 0)) {
- VERIFY(ddt_object_remove(ddt, otype, oclass, dde, tx) == 0);
- ASSERT(ddt_object_lookup(ddt, otype, oclass, dde) == ENOENT);
+ VERIFY0(ddt_object_remove(ddt, otype, oclass, ddk, tx));
+ ASSERT3U(
+ ddt_object_contains(ddt, otype, oclass, ddk), ==, ENOENT);
}
if (total_refcnt != 0) {
@@ -1054,7 +974,7 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
ddt_stat_update(ddt, dde, 0);
if (!ddt_object_exists(ddt, ntype, nclass))
ddt_object_create(ddt, ntype, nclass, tx);
- VERIFY(ddt_object_update(ddt, ntype, nclass, dde, tx) == 0);
+ VERIFY0(ddt_object_update(ddt, ntype, nclass, dde, tx));
/*
* If the class changes, the order that we scan this bp
@@ -1080,7 +1000,7 @@ ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg)
if (avl_numnodes(&ddt->ddt_tree) == 0)
return;
- ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP);
+ ASSERT3U(spa->spa_uberblock.ub_version, >=, SPA_VERSION_DEDUP);
if (spa->spa_ddt_stat_object == 0) {
spa->spa_ddt_stat_object = zap_create_link(ddt->ddt_os,
@@ -1093,17 +1013,17 @@ ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg)
ddt_free(dde);
}
- for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
+ for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
uint64_t add, count = 0;
- for (enum ddt_class class = 0; class < DDT_CLASSES; class++) {
+ for (ddt_class_t class = 0; class < DDT_CLASSES; class++) {
if (ddt_object_exists(ddt, type, class)) {
ddt_object_sync(ddt, type, class, tx);
- VERIFY(ddt_object_count(ddt, type, class,
- &add) == 0);
+ VERIFY0(ddt_object_count(ddt, type, class,
+ &add));
count += add;
}
}
- for (enum ddt_class class = 0; class < DDT_CLASSES; class++) {
+ for (ddt_class_t class = 0; class < DDT_CLASSES; class++) {
if (count == 0 && ddt_object_exists(ddt, type, class))
ddt_object_destroy(ddt, type, class, tx);
}
@@ -1121,7 +1041,7 @@ ddt_sync(spa_t *spa, uint64_t txg)
dmu_tx_t *tx;
zio_t *rio;
- ASSERT(spa_syncing_txg(spa) == txg);
+ ASSERT3U(spa_syncing_txg(spa), ==, txg);
tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
@@ -1158,6 +1078,8 @@ ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde)
do {
do {
ddt_t *ddt = spa->spa_ddt[ddb->ddb_checksum];
+ if (ddt == NULL)
+ continue;
int error = ENOENT;
if (ddt_object_exists(ddt, ddb->ddb_type,
ddb->ddb_class)) {
@@ -1201,7 +1123,7 @@ ddt_addref(spa_t *spa, const blkptr_t *bp)
ddt_enter(ddt);
dde = ddt_lookup(ddt, bp, B_TRUE);
- ASSERT(dde != NULL);
+ ASSERT3P(dde, !=, NULL);
if (dde->dde_type < DDT_TYPES) {
ddt_phys_t *ddp;
diff --git a/sys/contrib/openzfs/module/zfs/ddt_stats.c b/sys/contrib/openzfs/module/zfs/ddt_stats.c
new file mode 100644
index 000000000000..af5365a1d114
--- /dev/null
+++ b/sys/contrib/openzfs/module/zfs/ddt_stats.c
@@ -0,0 +1,212 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2022 by Pawel Jakub Dawidek
+ * Copyright (c) 2023, Klara Inc.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/ddt.h>
+#include <sys/ddt_impl.h>
+
+static void
+ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
+{
+ spa_t *spa = ddt->ddt_spa;
+ ddt_phys_t *ddp = dde->dde_phys;
+ ddt_key_t *ddk = &dde->dde_key;
+ uint64_t lsize = DDK_GET_LSIZE(ddk);
+ uint64_t psize = DDK_GET_PSIZE(ddk);
+
+ memset(dds, 0, sizeof (*dds));
+
+ for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
+ uint64_t dsize = 0;
+ uint64_t refcnt = ddp->ddp_refcnt;
+
+ if (ddp->ddp_phys_birth == 0)
+ continue;
+
+ int ndvas = DDK_GET_CRYPT(&dde->dde_key) ?
+ SPA_DVAS_PER_BP - 1 : SPA_DVAS_PER_BP;
+ for (int d = 0; d < ndvas; d++)
+ dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]);
+
+ dds->dds_blocks += 1;
+ dds->dds_lsize += lsize;
+ dds->dds_psize += psize;
+ dds->dds_dsize += dsize;
+
+ dds->dds_ref_blocks += refcnt;
+ dds->dds_ref_lsize += lsize * refcnt;
+ dds->dds_ref_psize += psize * refcnt;
+ dds->dds_ref_dsize += dsize * refcnt;
+ }
+}
+
+void
+ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg)
+{
+ const uint64_t *s = (const uint64_t *)src;
+ uint64_t *d = (uint64_t *)dst;
+ uint64_t *d_end = (uint64_t *)(dst + 1);
+
+ ASSERT(neg == 0 || neg == -1ULL); /* add or subtract */
+
+ for (int i = 0; i < d_end - d; i++)
+ d[i] += (s[i] ^ neg) - neg;
+}
+
+void
+ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg)
+{
+ ddt_stat_t dds;
+ ddt_histogram_t *ddh;
+ int bucket;
+
+ ddt_stat_generate(ddt, dde, &dds);
+
+ bucket = highbit64(dds.dds_ref_blocks) - 1;
+ ASSERT3U(bucket, >=, 0);
+
+ ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class];
+
+ ddt_stat_add(&ddh->ddh_stat[bucket], &dds, neg);
+}
+
+void
+ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src)
+{
+ for (int h = 0; h < 64; h++)
+ ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0);
+}
+
+void
+ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh)
+{
+ memset(dds, 0, sizeof (*dds));
+
+ for (int h = 0; h < 64; h++)
+ ddt_stat_add(dds, &ddh->ddh_stat[h], 0);
+}
+
+boolean_t
+ddt_histogram_empty(const ddt_histogram_t *ddh)
+{
+ const uint64_t *s = (const uint64_t *)ddh;
+ const uint64_t *s_end = (const uint64_t *)(ddh + 1);
+
+ while (s < s_end)
+ if (*s++ != 0)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+void
+ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total)
+{
+ /* Sum the statistics we cached in ddt_object_sync(). */
+ for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
+ ddt_t *ddt = spa->spa_ddt[c];
+ if (!ddt)
+ continue;
+
+ for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
+ for (ddt_class_t class = 0; class < DDT_CLASSES;
+ class++) {
+ ddt_object_t *ddo =
+ &ddt->ddt_object_stats[type][class];
+ ddo_total->ddo_count += ddo->ddo_count;
+ ddo_total->ddo_dspace += ddo->ddo_dspace;
+ ddo_total->ddo_mspace += ddo->ddo_mspace;
+ }
+ }
+ }
+
+ /* ... and compute the averages. */
+ if (ddo_total->ddo_count != 0) {
+ ddo_total->ddo_dspace /= ddo_total->ddo_count;
+ ddo_total->ddo_mspace /= ddo_total->ddo_count;
+ }
+}
+
+void
+ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh)
+{
+ for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
+ ddt_t *ddt = spa->spa_ddt[c];
+ if (!ddt)
+ continue;
+
+ for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
+ for (ddt_class_t class = 0; class < DDT_CLASSES;
+ class++) {
+ ddt_histogram_add(ddh,
+ &ddt->ddt_histogram_cache[type][class]);
+ }
+ }
+ }
+}
+
+void
+ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total)
+{
+ ddt_histogram_t *ddh_total;
+
+ ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
+ ddt_get_dedup_histogram(spa, ddh_total);
+ ddt_histogram_stat(dds_total, ddh_total);
+ kmem_free(ddh_total, sizeof (ddt_histogram_t));
+}
+
+uint64_t
+ddt_get_dedup_dspace(spa_t *spa)
+{
+ ddt_stat_t dds_total;
+
+ if (spa->spa_dedup_dspace != ~0ULL)
+ return (spa->spa_dedup_dspace);
+
+ memset(&dds_total, 0, sizeof (ddt_stat_t));
+
+ /* Calculate and cache the stats */
+ ddt_get_dedup_stats(spa, &dds_total);
+ spa->spa_dedup_dspace = dds_total.dds_ref_dsize - dds_total.dds_dsize;
+ return (spa->spa_dedup_dspace);
+}
+
+uint64_t
+ddt_get_pool_dedup_ratio(spa_t *spa)
+{
+ ddt_stat_t dds_total = { 0 };
+
+ ddt_get_dedup_stats(spa, &dds_total);
+ if (dds_total.dds_dsize == 0)
+ return (100);
+
+ return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize);
+}
diff --git a/sys/contrib/openzfs/module/zfs/ddt_zap.c b/sys/contrib/openzfs/module/zfs/ddt_zap.c
index 8f6397a6d108..741554de3c60 100644
--- a/sys/contrib/openzfs/module/zfs/ddt_zap.c
+++ b/sys/contrib/openzfs/module/zfs/ddt_zap.c
@@ -28,12 +28,61 @@
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/ddt.h>
+#include <sys/ddt_impl.h>
#include <sys/zap.h>
#include <sys/dmu_tx.h>
+#include <sys/zio_compress.h>
static unsigned int ddt_zap_default_bs = 15;
static unsigned int ddt_zap_default_ibs = 15;
+#define DDT_ZAP_COMPRESS_BYTEORDER_MASK 0x80
+#define DDT_ZAP_COMPRESS_FUNCTION_MASK 0x7f
+
+#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
+
+static size_t
+ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len)
+{
+ uchar_t *version = dst++;
+ int cpfunc = ZIO_COMPRESS_ZLE;
+ zio_compress_info_t *ci = &zio_compress_table[cpfunc];
+ size_t c_len;
+
+ ASSERT3U(d_len, >=, s_len + 1); /* no compression plus version byte */
+
+ c_len = ci->ci_compress((void *)src, dst, s_len, d_len - 1,
+ ci->ci_level);
+
+ if (c_len == s_len) {
+ cpfunc = ZIO_COMPRESS_OFF;
+ memcpy(dst, src, s_len);
+ }
+
+ *version = cpfunc;
+ if (ZFS_HOST_BYTEORDER)
+ *version |= DDT_ZAP_COMPRESS_BYTEORDER_MASK;
+
+ return (c_len + 1);
+}
+
+static void
+ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len)
+{
+ uchar_t version = *src++;
+ int cpfunc = version & DDT_ZAP_COMPRESS_FUNCTION_MASK;
+ zio_compress_info_t *ci = &zio_compress_table[cpfunc];
+
+ if (ci->ci_decompress != NULL)
+ (void) ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level);
+ else
+ memcpy(dst, src, d_len);
+
+ if (((version & DDT_ZAP_COMPRESS_BYTEORDER_MASK) != 0) !=
+ (ZFS_HOST_BYTEORDER != 0))
+ byteswap_uint64_array(dst, d_len);
+}
+
static int
ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)
{
@@ -45,8 +94,10 @@ ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)
*objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP,
ddt_zap_default_bs, ddt_zap_default_ibs,
DMU_OT_NONE, 0, tx);
+ if (*objectp == 0)
+ return (SET_ERROR(ENOTSUP));
- return (*objectp == 0 ? SET_ERROR(ENOTSUP) : 0);
+ return (0);
}
static int
@@ -56,63 +107,75 @@ ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)
}
static int
-ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde)
+ddt_zap_lookup(objset_t *os, uint64_t object,
+ const ddt_key_t *ddk, ddt_phys_t *phys, size_t psize)
{
uchar_t *cbuf;
uint64_t one, csize;
int error;
- cbuf = kmem_alloc(sizeof (dde->dde_phys) + 1, KM_SLEEP);
-
- error = zap_length_uint64(os, object, (uint64_t *)&dde->dde_key,
+ error = zap_length_uint64(os, object, (uint64_t *)ddk,
DDT_KEY_WORDS, &one, &csize);
if (error)
- goto out;
+ return (error);
- ASSERT(one == 1);
- ASSERT(csize <= (sizeof (dde->dde_phys) + 1));
+ ASSERT3U(one, ==, 1);
+ ASSERT3U(csize, <=, psize + 1);
- error = zap_lookup_uint64(os, object, (uint64_t *)&dde->dde_key,
+ cbuf = kmem_alloc(csize, KM_SLEEP);
+
+ error = zap_lookup_uint64(os, object, (uint64_t *)ddk,
DDT_KEY_WORDS, 1, csize, cbuf);
- if (error)
- goto out;
+ if (error == 0)
+ ddt_zap_decompress(cbuf, phys, csize, psize);
- ddt_decompress(cbuf, dde->dde_phys, csize, sizeof (dde->dde_phys));
-out:
- kmem_free(cbuf, sizeof (dde->dde_phys) + 1);
+ kmem_free(cbuf, csize);
return (error);
}
+static int
+ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk)
+{
+ return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS,
+ NULL, NULL));
+}
+
static void
-ddt_zap_prefetch(objset_t *os, uint64_t object, ddt_entry_t *dde)
+ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk)
{
- (void) zap_prefetch_uint64(os, object, (uint64_t *)&dde->dde_key,
- DDT_KEY_WORDS);
+ (void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS);
}
static int
-ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
+ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
+ const ddt_phys_t *phys, size_t psize, dmu_tx_t *tx)
{
- uchar_t cbuf[sizeof (dde->dde_phys) + 1];
- uint64_t csize;
+ const size_t cbuf_size = psize + 1;
+
+ uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP);
+
+ uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size);
- csize = ddt_compress(dde->dde_phys, cbuf,
- sizeof (dde->dde_phys), sizeof (cbuf));
+ int error = zap_update_uint64(os, object, (uint64_t *)ddk,
+ DDT_KEY_WORDS, 1, csize, cbuf, tx);
- return (zap_update_uint64(os, object, (uint64_t *)&dde->dde_key,
- DDT_KEY_WORDS, 1, csize, cbuf, tx));
+ kmem_free(cbuf, cbuf_size);
+
+ return (error);
}
static int
-ddt_zap_remove(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
+ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk,
+ dmu_tx_t *tx)
{
- return (zap_remove_uint64(os, object, (uint64_t *)&dde->dde_key,
+ return (zap_remove_uint64(os, object, (uint64_t *)ddk,
DDT_KEY_WORDS, tx));
}
static int
-ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk)
+ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
+ ddt_phys_t *phys, size_t psize)
{
zap_cursor_t zc;
zap_attribute_t za;
@@ -131,17 +194,23 @@ ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk)
zap_cursor_init_serialized(&zc, os, object, *walk);
}
if ((error = zap_cursor_retrieve(&zc, &za)) == 0) {
- uchar_t cbuf[sizeof (dde->dde_phys) + 1];
uint64_t csize = za.za_num_integers;
- ASSERT(za.za_integer_length == 1);
+
+ ASSERT3U(za.za_integer_length, ==, 1);
+ ASSERT3U(csize, <=, psize + 1);
+
+ uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP);
+
error = zap_lookup_uint64(os, object, (uint64_t *)za.za_name,
DDT_KEY_WORDS, 1, csize, cbuf);
- ASSERT(error == 0);
+ ASSERT0(error);
if (error == 0) {
- ddt_decompress(cbuf, dde->dde_phys, csize,
- sizeof (dde->dde_phys));
- dde->dde_key = *(ddt_key_t *)za.za_name;
+ ddt_zap_decompress(cbuf, phys, csize, psize);
+ *ddk = *(ddt_key_t *)za.za_name;
}
+
+ kmem_free(cbuf, csize);
+
zap_cursor_advance(&zc);
*walk = zap_cursor_serialize(&zc);
}
@@ -160,6 +229,7 @@ const ddt_ops_t ddt_zap_ops = {
ddt_zap_create,
ddt_zap_destroy,
ddt_zap_lookup,
+ ddt_zap_contains,
ddt_zap_prefetch,
ddt_zap_update,
ddt_zap_remove,
diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c
index d04149f560a4..060a5cc36d70 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
@@ -203,7 +203,7 @@ static uint_t zfs_scan_checkpoint_intval = 7200; /* in seconds */
int zfs_scan_suspend_progress = 0; /* set to prevent scans from progressing */
static int zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
static int zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */
-static const enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
+static const ddt_class_t zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
/* max number of blocks to free in a single TXG */
static uint64_t zfs_async_block_max_blocks = UINT64_MAX;
/* max number of dedup blocks to free in a single TXG */
@@ -2962,7 +2962,7 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
* If there are N references to a deduped block, we don't want to scrub it
* N times -- ideally, we should scrub it exactly once.
*
- * We leverage the fact that the dde's replication class (enum ddt_class)
+ * We leverage the fact that the dde's replication class (ddt_class_t)
* is ordered from highest replication class (DDT_CLASS_DITTO) to lowest
* (DDT_CLASS_UNIQUE) so that we may walk the DDT in that order.
*
diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
index edfdd47ee6d7..ecc50f487152 100755
--- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
+++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
@@ -138,7 +138,11 @@ idmap_reason = 'Idmapped mount needs kernel 5.12+'
# copy_file_range() is not supported by all kernels
#
cfr_reason = 'Kernel copy_file_range support required'
-cfr_cross_reason = 'copy_file_range(2) cross-filesystem needs kernel 5.3+'
+
+if sys.platform.startswith('freebsd'):
+ cfr_cross_reason = 'copy_file_range(2) cross-filesystem needs FreeBSD 14+'
+else:
+ cfr_cross_reason = 'copy_file_range(2) cross-filesystem needs kernel 5.3+'
#
# These tests are known to fail, thus we use this list to prevent these
@@ -268,6 +272,22 @@ if sys.platform.startswith('freebsd'):
'pool_checkpoint/checkpoint_indirect': ['FAIL', 12623],
'resilver/resilver_restart_001': ['FAIL', known_reason],
'snapshot/snapshot_002_pos': ['FAIL', '14831'],
+ 'bclone/bclone_crossfs_corner_cases': ['SKIP', cfr_cross_reason],
+ 'bclone/bclone_crossfs_corner_cases_limited':
+ ['SKIP', cfr_cross_reason],
+ 'bclone/bclone_crossfs_data': ['SKIP', cfr_cross_reason],
+ 'bclone/bclone_crossfs_embedded': ['SKIP', cfr_cross_reason],
+ 'bclone/bclone_crossfs_hole': ['SKIP', cfr_cross_reason],
+ 'bclone/bclone_diffprops_all': ['SKIP', cfr_cross_reason],
+ 'bclone/bclone_diffprops_checksum': ['SKIP', cfr_cross_reason],
+ 'bclone/bclone_diffprops_compress': ['SKIP', cfr_cross_reason],
+ 'bclone/bclone_diffprops_copies': ['SKIP', cfr_cross_reason],
+ 'bclone/bclone_diffprops_recordsize': ['SKIP', cfr_cross_reason],
+ 'bclone/bclone_prop_sync': ['SKIP', cfr_cross_reason],
+ 'block_cloning/block_cloning_cross_enc_dataset':
+ ['SKIP', cfr_cross_reason],
+ 'block_cloning/block_cloning_copyfilerange_cross_dataset':
+ ['SKIP', cfr_cross_reason]
})
elif sys.platform.startswith('linux'):
maybe.update({
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
index b4d2b91dd476..dfab48d2cdaf 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
@@ -61,13 +61,8 @@ function compare_version_gte
[ "$(printf "$1\n$2" | sort -V | tail -n1)" = "$1" ]
}
-# Linux kernel version comparison function
-#
-# $1 Linux version ("4.10", "2.6.32") or blank for installed Linux version
-#
-# Used for comparison: if [ $(linux_version) -ge $(linux_version "2.6.32") ]
-#
-function linux_version
+# Helper function used by linux_version() and freebsd_version()
+function kernel_version
{
typeset ver="$1"
@@ -83,6 +78,24 @@ function linux_version
echo $((version * 100000 + major * 1000 + minor))
}
+# Linux kernel version comparison function
+#
+# $1 Linux version ("4.10", "2.6.32") or blank for installed Linux version
+#
+# Used for comparison: if [ $(linux_version) -ge $(linux_version "2.6.32") ]
+function linux_version {
+ kernel_version "$1"
+}
+
+# FreeBSD version comparison function
+#
+# $1 FreeBSD version ("13.2", "14.0") or blank for installed FreeBSD version
+#
+# Used for comparison: if [ $(freebsd_version) -ge $(freebsd_version "13.2") ]
+function freebsd_version {
+ kernel_version "$1"
+}
+
# Determine if this is a Linux test system
#
# Return 0 if platform Linux, 1 if otherwise
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib
index beba01c0ed26..3b8eaea5bb54 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib
@@ -42,6 +42,12 @@ function verify_crossfs_block_cloning
if is_linux && [[ $(linux_version) -lt $(linux_version "5.3") ]]; then
log_unsupported "copy_file_range can't copy cross-filesystem before Linux 5.3"
fi
+
+ # Cross dataset block cloning only supported on FreeBSD 14+
+ # https://github.com/freebsd/freebsd-src/commit/969071be938c
+ if is_freebsd && [ $(freebsd_version) -lt $(freebsd_version 14.0) ] ; then
+ log_unsupported "Cloning across datasets not supported in $(uname -r)"
+ fi
}
# Unused.
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh
index 43323c207a62..ad83d30291ac 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh
@@ -26,12 +26,11 @@
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
+. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
verify_runnable "global"
-if is_linux && [[ $(linux_version) -lt $(linux_version "5.3") ]]; then
- log_unsupported "copy_file_range can't copy cross-filesystem before Linux 5.3"
-fi
+verify_crossfs_block_cloning
claim="The copy_file_range syscall can clone across datasets."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh
index 34d3d2692555..702e23267f7e 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh
@@ -26,12 +26,11 @@
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
+. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
verify_runnable "global"
-if is_linux && [[ $(linux_version) -lt $(linux_version "5.3") ]]; then
- log_unsupported "copy_file_range can't copy cross-filesystem before Linux 5.3"
-fi
+verify_crossfs_block_cloning
claim="Block cloning across encrypted datasets."
diff --git a/sys/modules/zfs/Makefile b/sys/modules/zfs/Makefile
index 056e6a015971..654f0044a8fc 100644
--- a/sys/modules/zfs/Makefile
+++ b/sys/modules/zfs/Makefile
@@ -238,6 +238,7 @@ SRCS+= abd.c \
bqueue.c \
dataset_kstats.c \
ddt.c \
+ ddt_stats.c \
ddt_zap.c \
dmu.c \
dmu_diff.c \
@@ -389,6 +390,7 @@ CFLAGS.gcc+= -Wno-pointer-to-int-cast
CFLAGS.abd.c= -Wno-cast-qual
CFLAGS.ddt.c= -Wno-cast-qual
+CFLAGS.ddt_zap.c= -Wno-cast-qual
CFLAGS.dmu.c= -Wno-cast-qual
CFLAGS.dmu_traverse.c= -Wno-cast-qual
CFLAGS.dnode.c= ${NO_WUNUSED_BUT_SET_VARIABLE}
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index 5a3e421ab67e..d3fa8b6c5aff 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -1155,7 +1155,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.2.99-345-FreeBSD_ge0bd8118d"
+#define ZFS_META_ALIAS "zfs-2.2.99-365-FreeBSD_g8f2f6cd2a"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@@ -1185,7 +1185,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
-#define ZFS_META_RELEASE "345-FreeBSD_ge0bd8118d"
+#define ZFS_META_RELEASE "365-FreeBSD_g8f2f6cd2a"
/* Define the project version. */
#define ZFS_META_VERSION "2.2.99"
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index 9370fe080d71..821a84187a9c 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define ZFS_META_GITREV "zfs-2.2.99-345-ge0bd8118d"
+#define ZFS_META_GITREV "zfs-2.2.99-365-g8f2f6cd2a-dirty"