diff options
Diffstat (limited to 'sys')
63 files changed, 2001 insertions, 1294 deletions
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index aba99fabbbb9..5e8f282e96c3 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -122,7 +122,7 @@ static int flagbits[256]; static uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */ static int leaked_objects = 0; -static range_tree_t *mos_refd_objs; +static zfs_range_tree_t *mos_refd_objs; static spa_t *spa; static objset_t *os; static boolean_t kernel_init_done; @@ -325,7 +325,7 @@ typedef struct metaslab_verify { /* * What's currently allocated for this metaslab. */ - range_tree_t *mv_allocated; + zfs_range_tree_t *mv_allocated; } metaslab_verify_t; typedef void ll_iter_t(dsl_deadlist_t *ll, void *arg); @@ -417,7 +417,7 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg) uint64_t txg = sme->sme_txg; if (sme->sme_type == SM_ALLOC) { - if (range_tree_contains(mv->mv_allocated, + if (zfs_range_tree_contains(mv->mv_allocated, offset, size)) { (void) printf("ERROR: DOUBLE ALLOC: " "%llu [%llx:%llx] " @@ -426,11 +426,11 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg) (u_longlong_t)size, (u_longlong_t)mv->mv_vdid, (u_longlong_t)mv->mv_msid); } else { - range_tree_add(mv->mv_allocated, + zfs_range_tree_add(mv->mv_allocated, offset, size); } } else { - if (!range_tree_contains(mv->mv_allocated, + if (!zfs_range_tree_contains(mv->mv_allocated, offset, size)) { (void) printf("ERROR: DOUBLE FREE: " "%llu [%llx:%llx] " @@ -439,7 +439,7 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg) (u_longlong_t)size, (u_longlong_t)mv->mv_vdid, (u_longlong_t)mv->mv_msid); } else { - range_tree_remove(mv->mv_allocated, + zfs_range_tree_remove(mv->mv_allocated, offset, size); } } @@ -614,11 +614,11 @@ livelist_metaslab_validate(spa_t *spa) (longlong_t)vd->vdev_ms_count); uint64_t shift, start; - range_seg_type_t type = + zfs_range_seg_type_t type = metaslab_calculate_range_tree_type(vd, m, &start, &shift); metaslab_verify_t mv; - mv.mv_allocated = range_tree_create(NULL, + mv.mv_allocated = zfs_range_tree_create(NULL, type, NULL, start, shift); mv.mv_vdid = vd->vdev_id; mv.mv_msid = m->ms_id; @@ -633,8 +633,8 @@ livelist_metaslab_validate(spa_t *spa) spacemap_check_ms_sm(m->ms_sm, &mv); spacemap_check_sm_log(spa, &mv); - range_tree_vacate(mv.mv_allocated, NULL, NULL); - range_tree_destroy(mv.mv_allocated); + zfs_range_tree_vacate(mv.mv_allocated, NULL, NULL); + zfs_range_tree_destroy(mv.mv_allocated); zfs_btree_clear(&mv.mv_livelist_allocs); zfs_btree_destroy(&mv.mv_livelist_allocs); } @@ -1633,9 +1633,9 @@ static void dump_metaslab_stats(metaslab_t *msp) { char maxbuf[32]; - range_tree_t *rt = msp->ms_allocatable; + zfs_range_tree_t *rt = msp->ms_allocatable; zfs_btree_t *t = &msp->ms_allocatable_by_size; - int free_pct = range_tree_space(rt) * 100 / msp->ms_size; + int free_pct = zfs_range_tree_space(rt) * 100 / msp->ms_size; /* max sure nicenum has enough space */ _Static_assert(sizeof (maxbuf) >= NN_NUMBUF_SZ, "maxbuf truncated"); @@ -1646,7 +1646,7 @@ dump_metaslab_stats(metaslab_t *msp) "segments", zfs_btree_numnodes(t), "maxsize", maxbuf, "freepct", free_pct); (void) printf("\tIn-memory histogram:\n"); - dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); + dump_histogram(rt->rt_histogram, ZFS_RANGE_TREE_HISTOGRAM_SIZE, 0); } static void @@ -1668,7 +1668,7 @@ dump_metaslab(metaslab_t *msp) if (dump_opt['m'] > 2 && !dump_opt['L']) { mutex_enter(&msp->ms_lock); VERIFY0(metaslab_load(msp)); - range_tree_stat_verify(msp->ms_allocatable); + zfs_range_tree_stat_verify(msp->ms_allocatable); dump_metaslab_stats(msp); metaslab_unload(msp); mutex_exit(&msp->ms_lock); @@ -1769,7 +1769,8 @@ dump_metaslab_groups(spa_t *spa, boolean_t show_special) (void) printf("%3llu%%\n", (u_longlong_t)mg->mg_fragmentation); } - dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); + dump_histogram(mg->mg_histogram, + ZFS_RANGE_TREE_HISTOGRAM_SIZE, 0); } (void) printf("\tpool %s\tfragmentation", spa_name(spa)); @@ -1778,7 +1779,7 @@ dump_metaslab_groups(spa_t *spa, boolean_t show_special) (void) printf("\t%3s\n", "-"); else (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation); - dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); + dump_histogram(mc->mc_histogram, ZFS_RANGE_TREE_HISTOGRAM_SIZE, 0); } static void @@ -2292,12 +2293,12 @@ dump_dtl(vdev_t *vd, int indent) required ? "DTL-required" : "DTL-expendable"); for (int t = 0; t < DTL_TYPES; t++) { - range_tree_t *rt = vd->vdev_dtl[t]; - if (range_tree_space(rt) == 0) + zfs_range_tree_t *rt = vd->vdev_dtl[t]; + if (zfs_range_tree_space(rt) == 0) continue; (void) snprintf(prefix, sizeof (prefix), "\t%*s%s", indent + 2, "", name[t]); - range_tree_walk(rt, dump_dtl_seg, prefix); + zfs_range_tree_walk(rt, dump_dtl_seg, prefix); if (dump_opt['d'] > 5 && vd->vdev_children == 0) dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm); @@ -6258,9 +6259,9 @@ load_unflushed_svr_segs_cb(spa_t *spa, space_map_entry_t *sme, return (0); if (sme->sme_type == SM_ALLOC) - range_tree_add(svr->svr_allocd_segs, offset, size); + zfs_range_tree_add(svr->svr_allocd_segs, offset, size); else - range_tree_remove(svr->svr_allocd_segs, offset, size); + zfs_range_tree_remove(svr->svr_allocd_segs, offset, size); return (0); } @@ -6314,18 +6315,20 @@ zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb) vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id); vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; - ASSERT0(range_tree_space(svr->svr_allocd_segs)); + ASSERT0(zfs_range_tree_space(svr->svr_allocd_segs)); - range_tree_t *allocs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); + zfs_range_tree_t *allocs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, + NULL, 0, 0); for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) { metaslab_t *msp = vd->vdev_ms[msi]; - ASSERT0(range_tree_space(allocs)); + ASSERT0(zfs_range_tree_space(allocs)); if (msp->ms_sm != NULL) VERIFY0(space_map_load(msp->ms_sm, allocs, SM_ALLOC)); - range_tree_vacate(allocs, range_tree_add, svr->svr_allocd_segs); + zfs_range_tree_vacate(allocs, zfs_range_tree_add, + svr->svr_allocd_segs); } - range_tree_destroy(allocs); + zfs_range_tree_destroy(allocs); iterate_through_spacemap_logs(spa, load_unflushed_svr_segs_cb, svr); @@ -6334,12 +6337,12 @@ zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb) * because we have not allocated mappings for * it yet. */ - range_tree_clear(svr->svr_allocd_segs, + zfs_range_tree_clear(svr->svr_allocd_segs, vdev_indirect_mapping_max_offset(vim), vd->vdev_asize - vdev_indirect_mapping_max_offset(vim)); - zcb->zcb_removing_size += range_tree_space(svr->svr_allocd_segs); - range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd); + zcb->zcb_removing_size += zfs_range_tree_space(svr->svr_allocd_segs); + zfs_range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd); spa_config_exit(spa, SCL_CONFIG, FTAG); } @@ -6442,7 +6445,8 @@ checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg) * also verify that the entry is there to begin with. */ mutex_enter(&ms->ms_lock); - range_tree_remove(ms->ms_allocatable, sme->sme_offset, sme->sme_run); + zfs_range_tree_remove(ms->ms_allocatable, sme->sme_offset, + sme->sme_run); mutex_exit(&ms->ms_lock); cseea->cseea_checkpoint_size += sme->sme_run; @@ -6573,9 +6577,9 @@ load_unflushed_cb(spa_t *spa, space_map_entry_t *sme, uint64_t txg, void *arg) return (0); if (*uic_maptype == sme->sme_type) - range_tree_add(ms->ms_allocatable, offset, size); + zfs_range_tree_add(ms->ms_allocatable, offset, size); else - range_tree_remove(ms->ms_allocatable, offset, size); + zfs_range_tree_remove(ms->ms_allocatable, offset, size); return (0); } @@ -6609,7 +6613,7 @@ load_concrete_ms_allocatable_trees(spa_t *spa, maptype_t maptype) (longlong_t)vd->vdev_ms_count); mutex_enter(&msp->ms_lock); - range_tree_vacate(msp->ms_allocatable, NULL, NULL); + zfs_range_tree_vacate(msp->ms_allocatable, NULL, NULL); /* * We don't want to spend the CPU manipulating the @@ -6642,7 +6646,7 @@ load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp, vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; mutex_enter(&msp->ms_lock); - range_tree_vacate(msp->ms_allocatable, NULL, NULL); + zfs_range_tree_vacate(msp->ms_allocatable, NULL, NULL); /* * We don't want to spend the CPU manipulating the @@ -6666,7 +6670,7 @@ load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp, */ ASSERT3U(ent_offset + ent_len, <=, msp->ms_start + msp->ms_size); - range_tree_add(msp->ms_allocatable, ent_offset, ent_len); + zfs_range_tree_add(msp->ms_allocatable, ent_offset, ent_len); } if (!msp->ms_loaded) @@ -6812,7 +6816,7 @@ zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb) for (uint64_t inner_offset = 0; inner_offset < DVA_GET_ASIZE(&vimep->vimep_dst); inner_offset += 1ULL << vd->vdev_ashift) { - if (range_tree_contains(msp->ms_allocatable, + if (zfs_range_tree_contains(msp->ms_allocatable, offset + inner_offset, 1ULL << vd->vdev_ashift)) { obsolete_bytes += 1ULL << vd->vdev_ashift; } @@ -6895,10 +6899,10 @@ zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb) * not referenced, which is not a bug. */ if (vd->vdev_ops == &vdev_indirect_ops) { - range_tree_vacate(msp->ms_allocatable, + zfs_range_tree_vacate(msp->ms_allocatable, NULL, NULL); } else { - range_tree_vacate(msp->ms_allocatable, + zfs_range_tree_vacate(msp->ms_allocatable, zdb_leak, vd); } if (msp->ms_loaded) { @@ -7796,7 +7800,7 @@ verify_checkpoint_sm_entry_cb(space_map_entry_t *sme, void *arg) * their respective ms_allocateable trees should not contain them. */ mutex_enter(&ms->ms_lock); - range_tree_verify_not_present(ms->ms_allocatable, + zfs_range_tree_verify_not_present(ms->ms_allocatable, sme->sme_offset, sme->sme_run); mutex_exit(&ms->ms_lock); @@ -7947,8 +7951,9 @@ verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current) * This way we ensure that none of the blocks that * are part of the checkpoint were freed by mistake. */ - range_tree_walk(ckpoint_msp->ms_allocatable, - (range_tree_func_t *)range_tree_verify_not_present, + zfs_range_tree_walk(ckpoint_msp->ms_allocatable, + (zfs_range_tree_func_t *) + zfs_range_tree_verify_not_present, current_msp->ms_allocatable); } } @@ -8088,7 +8093,7 @@ static void mos_obj_refd(uint64_t obj) { if (obj != 0 && mos_refd_objs != NULL) - range_tree_add(mos_refd_objs, obj, 1); + zfs_range_tree_add(mos_refd_objs, obj, 1); } /* @@ -8098,8 +8103,8 @@ static void mos_obj_refd_multiple(uint64_t obj) { if (obj != 0 && mos_refd_objs != NULL && - !range_tree_contains(mos_refd_objs, obj, 1)) - range_tree_add(mos_refd_objs, obj, 1); + !zfs_range_tree_contains(mos_refd_objs, obj, 1)) + zfs_range_tree_add(mos_refd_objs, obj, 1); } static void @@ -8296,8 +8301,8 @@ dump_mos_leaks(spa_t *spa) */ uint64_t object = 0; while (dmu_object_next(mos, &object, B_FALSE, 0) == 0) { - if (range_tree_contains(mos_refd_objs, object, 1)) { - range_tree_remove(mos_refd_objs, object, 1); + if (zfs_range_tree_contains(mos_refd_objs, object, 1)) { + zfs_range_tree_remove(mos_refd_objs, object, 1); } else { dmu_object_info_t doi; const char *name; @@ -8315,11 +8320,11 @@ dump_mos_leaks(spa_t *spa) rv = 2; } } - (void) range_tree_walk(mos_refd_objs, mos_leaks_cb, NULL); - if (!range_tree_is_empty(mos_refd_objs)) + (void) zfs_range_tree_walk(mos_refd_objs, mos_leaks_cb, NULL); + if (!zfs_range_tree_is_empty(mos_refd_objs)) rv = 2; - range_tree_vacate(mos_refd_objs, NULL, NULL); - range_tree_destroy(mos_refd_objs); + zfs_range_tree_vacate(mos_refd_objs, NULL, NULL); + zfs_range_tree_destroy(mos_refd_objs); return (rv); } @@ -8441,8 +8446,8 @@ dump_zpool(spa_t *spa) if (dump_opt['d'] || dump_opt['i']) { spa_feature_t f; - mos_refd_objs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, - 0); + mos_refd_objs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, + NULL, 0, 0); dump_objset(dp->dp_meta_objset); if (dump_opt['d'] >= 3) { diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c index 506427a10672..5fcf0991de66 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c @@ -8481,19 +8481,19 @@ zpool_do_scrub(int argc, char **argv) if (is_pause && is_stop) { (void) fprintf(stderr, gettext("invalid option " - "combination :-s and -p are mutually exclusive\n")); + "combination: -s and -p are mutually exclusive\n")); usage(B_FALSE); } else if (is_pause && is_txg_continue) { (void) fprintf(stderr, gettext("invalid option " - "combination :-p and -C are mutually exclusive\n")); + "combination: -p and -C are mutually exclusive\n")); usage(B_FALSE); } else if (is_stop && is_txg_continue) { (void) fprintf(stderr, gettext("invalid option " - "combination :-s and -C are mutually exclusive\n")); + "combination: -s and -C are mutually exclusive\n")); usage(B_FALSE); } else if (is_error_scrub && is_txg_continue) { (void) fprintf(stderr, gettext("invalid option " - "combination :-e and -C are mutually exclusive\n")); + "combination: -e and -C are mutually exclusive\n")); usage(B_FALSE); } else { if (is_error_scrub) diff --git a/sys/contrib/openzfs/config/kernel-automount.m4 b/sys/contrib/openzfs/config/kernel-automount.m4 index 52f1931b748e..b5f1392d0fcd 100644 --- a/sys/contrib/openzfs/config/kernel-automount.m4 +++ b/sys/contrib/openzfs/config/kernel-automount.m4 @@ -5,7 +5,7 @@ dnl # solution to handling automounts. Prior to this cifs/nfs clients dnl # which required automount support would abuse the follow_link() dnl # operation on directories for this purpose. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_AUTOMOUNT], [ +AC_DEFUN([ZFS_AC_KERNEL_SRC_D_AUTOMOUNT], [ ZFS_LINUX_TEST_SRC([dentry_operations_d_automount], [ #include <linux/dcache.h> static struct vfsmount *d_automount(struct path *p) { return NULL; } @@ -15,7 +15,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_AUTOMOUNT], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_AUTOMOUNT], [ +AC_DEFUN([ZFS_AC_KERNEL_D_AUTOMOUNT], [ AC_MSG_CHECKING([whether dops->d_automount() exists]) ZFS_LINUX_TEST_RESULT([dentry_operations_d_automount], [ AC_MSG_RESULT(yes) @@ -23,3 +23,40 @@ AC_DEFUN([ZFS_AC_KERNEL_AUTOMOUNT], [ ZFS_LINUX_TEST_ERROR([dops->d_automount()]) ]) ]) + +dnl # +dnl # 6.14 API change +dnl # dops->d_revalidate now has four args. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_D_REVALIDATE_4ARGS], [ + ZFS_LINUX_TEST_SRC([dentry_operations_d_revalidate_4args], [ + #include <linux/dcache.h> + static int d_revalidate(struct inode *dir, + const struct qstr *name, struct dentry *dentry, + unsigned int fl) { return 0; } + struct dentry_operations dops __attribute__ ((unused)) = { + .d_revalidate = d_revalidate, + }; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_D_REVALIDATE_4ARGS], [ + AC_MSG_CHECKING([whether dops->d_revalidate() takes 4 args]) + ZFS_LINUX_TEST_RESULT([dentry_operations_d_revalidate_4args], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_REVALIDATE_4ARGS, 1, + [dops->d_revalidate() takes 4 args]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_AUTOMOUNT], [ + ZFS_AC_KERNEL_SRC_D_AUTOMOUNT + ZFS_AC_KERNEL_SRC_D_REVALIDATE_4ARGS +]) + +AC_DEFUN([ZFS_AC_KERNEL_AUTOMOUNT], [ + ZFS_AC_KERNEL_D_AUTOMOUNT + ZFS_AC_KERNEL_D_REVALIDATE_4ARGS +]) diff --git a/sys/contrib/openzfs/config/kernel-vfs-iov_iter.m4 b/sys/contrib/openzfs/config/kernel-vfs-iov_iter.m4 index a223343030db..dc4e11cef2e9 100644 --- a/sys/contrib/openzfs/config/kernel-vfs-iov_iter.m4 +++ b/sys/contrib/openzfs/config/kernel-vfs-iov_iter.m4 @@ -21,6 +21,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [ __attribute__((unused)) enum iter_type i = iov_iter_type(&iter); ]) + ZFS_LINUX_TEST_SRC([iov_iter_get_pages2], [ + #include <linux/uio.h> + ],[ + struct iov_iter iter = { 0 }; + struct page **pages = NULL; + size_t maxsize = 4096; + unsigned maxpages = 1; + size_t start; + size_t ret __attribute__ ((unused)); + + ret = iov_iter_get_pages2(&iter, pages, maxsize, maxpages, + &start); + ]) + ZFS_LINUX_TEST_SRC([iter_is_ubuf], [ #include <linux/uio.h> ],[ @@ -64,6 +78,19 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [ AC_MSG_RESULT(no) ]) + + dnl # + dnl # Kernel 6.0 changed iov_iter_get_pages() to iov_iter_get_pages2(). + dnl # + AC_MSG_CHECKING([whether iov_iter_get_pages2() is available]) + ZFS_LINUX_TEST_RESULT([iov_iter_get_pages2], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_IOV_ITER_GET_PAGES2, 1, + [iov_iter_get_pages2() is available]) + ],[ + AC_MSG_RESULT(no) + ]) + dnl # dnl # Kernel 6.0 introduced the ITER_UBUF iov_iter type. iter_is_ubuf() dnl # was also added to determine if the iov_iter is an ITER_UBUF. diff --git a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c index 08a8640669b3..c617a6e6b370 100644 --- a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c +++ b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c @@ -63,6 +63,7 @@ pam_syslog(pam_handle_t *pamh, int loglevel, const char *fmt, ...) #include <sys/file.h> #include <sys/wait.h> #include <pwd.h> +#include <lib/libzfs/libzfs_impl.h> #include <sys/mman.h> @@ -370,67 +371,6 @@ change_key(pam_handle_t *pamh, const char *ds_name, return (0); } -static int -decrypt_mount(pam_handle_t *pamh, const char *ds_name, - const char *passphrase, boolean_t noop) -{ - zfs_handle_t *ds = zfs_open(g_zfs, ds_name, ZFS_TYPE_FILESYSTEM); - if (ds == NULL) { - pam_syslog(pamh, LOG_ERR, "dataset %s not found", ds_name); - return (-1); - } - pw_password_t *key = prepare_passphrase(pamh, ds, passphrase, NULL); - if (key == NULL) { - zfs_close(ds); - return (-1); - } - int ret = lzc_load_key(ds_name, noop, (uint8_t *)key->value, - WRAPPING_KEY_LEN); - pw_free(key); - if (ret && ret != EEXIST) { - pam_syslog(pamh, LOG_ERR, "load_key failed: %d", ret); - zfs_close(ds); - return (-1); - } - if (noop) { - goto out; - } - ret = zfs_mount(ds, NULL, 0); - if (ret) { - pam_syslog(pamh, LOG_ERR, "mount failed: %d", ret); - zfs_close(ds); - return (-1); - } -out: - zfs_close(ds); - return (0); -} - -static int -unmount_unload(pam_handle_t *pamh, const char *ds_name, boolean_t force) -{ - zfs_handle_t *ds = zfs_open(g_zfs, ds_name, ZFS_TYPE_FILESYSTEM); - if (ds == NULL) { - pam_syslog(pamh, LOG_ERR, "dataset %s not found", ds_name); - return (-1); - } - int ret = zfs_unmount(ds, NULL, force ? MS_FORCE : 0); - if (ret) { - pam_syslog(pamh, LOG_ERR, "zfs_unmount failed with: %d", ret); - zfs_close(ds); - return (-1); - } - - ret = lzc_unload_key(ds_name); - if (ret) { - pam_syslog(pamh, LOG_ERR, "unload_key failed with: %d", ret); - zfs_close(ds); - return (-1); - } - zfs_close(ds); - return (0); -} - typedef struct { char *homes_prefix; char *runstatedir; @@ -443,6 +383,7 @@ typedef struct { boolean_t unmount_and_unload; boolean_t force_unmount; boolean_t recursive_homes; + boolean_t mount_recursively; } zfs_key_config_t; static int @@ -481,6 +422,7 @@ zfs_key_config_load(pam_handle_t *pamh, zfs_key_config_t *config, config->unmount_and_unload = B_TRUE; config->force_unmount = B_FALSE; config->recursive_homes = B_FALSE; + config->mount_recursively = B_FALSE; config->dsname = NULL; config->homedir = NULL; for (int c = 0; c < argc; c++) { @@ -500,6 +442,8 @@ zfs_key_config_load(pam_handle_t *pamh, zfs_key_config_t *config, config->force_unmount = B_TRUE; } else if (strcmp(argv[c], "recursive_homes") == 0) { config->recursive_homes = B_TRUE; + } else if (strcmp(argv[c], "mount_recursively") == 0) { + config->mount_recursively = B_TRUE; } else if (strcmp(argv[c], "prop_mountpoint") == 0) { if (config->homedir == NULL) config->homedir = strdup(entry->pw_dir); @@ -508,6 +452,217 @@ zfs_key_config_load(pam_handle_t *pamh, zfs_key_config_t *config, return (PAM_SUCCESS); } +typedef struct { + pam_handle_t *pamh; + zfs_key_config_t *target; +} mount_umount_dataset_data_t; + +static int +mount_dataset(zfs_handle_t *zhp, void *data) +{ + mount_umount_dataset_data_t *mount_umount_dataset_data = data; + + zfs_key_config_t *target = mount_umount_dataset_data->target; + pam_handle_t *pamh = mount_umount_dataset_data->pamh; + + /* Refresh properties to get the latest key status */ + zfs_refresh_properties(zhp); + + int ret = 0; + + /* Check if dataset type is filesystem */ + if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM) { + pam_syslog(pamh, LOG_DEBUG, + "dataset is not filesystem: %s, skipping.", + zfs_get_name(zhp)); + return (0); + } + + /* Check if encryption key is available */ + if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) == + ZFS_KEYSTATUS_UNAVAILABLE) { + pam_syslog(pamh, LOG_WARNING, + "key unavailable for: %s, skipping", + zfs_get_name(zhp)); + return (0); + } + + /* Check if prop canmount is on */ + if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) != ZFS_CANMOUNT_ON) { + pam_syslog(pamh, LOG_INFO, + "canmount is not on for: %s, skipping", + zfs_get_name(zhp)); + return (0); + } + + /* Get mountpoint prop for check */ + char mountpoint[ZFS_MAXPROPLEN]; + if ((ret = zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint, + sizeof (mountpoint), NULL, NULL, 0, 1)) != 0) { + pam_syslog(pamh, LOG_ERR, + "failed to get mountpoint prop: %d", ret); + return (-1); + } + + /* Check if mountpoint isn't none or legacy */ + if (strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) == 0 || + strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) == 0) { + pam_syslog(pamh, LOG_INFO, + "mountpoint is none or legacy for: %s, skipping", + zfs_get_name(zhp)); + return (0); + } + + /* Don't mount the dataset if already mounted */ + if (zfs_is_mounted(zhp, NULL)) { + pam_syslog(pamh, LOG_INFO, "already mounted: %s", + zfs_get_name(zhp)); + return (0); + } + + /* Mount the dataset */ + ret = zfs_mount(zhp, NULL, 0); + if (ret) { + pam_syslog(pamh, LOG_ERR, + "zfs_mount failed for %s with: %d", zfs_get_name(zhp), + ret); + return (ret); + } + + /* Recursively mount children if the recursive flag is set */ + if (target->mount_recursively) { + ret = zfs_iter_filesystems_v2(zhp, 0, mount_dataset, data); + if (ret != 0) { + pam_syslog(pamh, LOG_ERR, + "child iteration failed: %d", ret); + return (-1); + } + } + + return (ret); +} + +static int +umount_dataset(zfs_handle_t *zhp, void *data) +{ + mount_umount_dataset_data_t *mount_umount_dataset_data = data; + + zfs_key_config_t *target = mount_umount_dataset_data->target; + pam_handle_t *pamh = mount_umount_dataset_data->pamh; + + int ret = 0; + /* Recursively umount children if the recursive flag is set */ + if (target->mount_recursively) { + ret = zfs_iter_filesystems_v2(zhp, 0, umount_dataset, data); + if (ret != 0) { + pam_syslog(pamh, LOG_ERR, + "child iteration failed: %d", ret); + return (-1); + } + } + + /* Check if dataset type is filesystem */ + if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM) { + pam_syslog(pamh, LOG_DEBUG, + "dataset is not filesystem: %s, skipping", + zfs_get_name(zhp)); + return (0); + } + + /* Don't umount the dataset if already unmounted */ + if (zfs_is_mounted(zhp, NULL) == 0) { + pam_syslog(pamh, LOG_INFO, "already unmounted: %s", + zfs_get_name(zhp)); + return (0); + } + + /* Unmount the dataset */ + ret = zfs_unmount(zhp, NULL, target->force_unmount ? MS_FORCE : 0); + if (ret) { + pam_syslog(pamh, LOG_ERR, + "zfs_unmount failed for %s with: %d", zfs_get_name(zhp), + ret); + return (ret); + } + + return (ret); +} + +static int +decrypt_mount(pam_handle_t *pamh, zfs_key_config_t *config, const char *ds_name, + const char *passphrase, boolean_t noop) +{ + zfs_handle_t *ds = zfs_open(g_zfs, ds_name, ZFS_TYPE_FILESYSTEM); + if (ds == NULL) { + pam_syslog(pamh, LOG_ERR, "dataset %s not found", ds_name); + return (-1); + } + pw_password_t *key = prepare_passphrase(pamh, ds, passphrase, NULL); + if (key == NULL) { + zfs_close(ds); + return (-1); + } + int ret = lzc_load_key(ds_name, noop, (uint8_t *)key->value, + WRAPPING_KEY_LEN); + pw_free(key); + if (ret && ret != EEXIST) { + pam_syslog(pamh, LOG_ERR, "load_key failed: %d", ret); + zfs_close(ds); + return (-1); + } + + if (noop) { + zfs_close(ds); + return (0); + } + + mount_umount_dataset_data_t data; + data.pamh = pamh; + data.target = config; + + ret = mount_dataset(ds, &data); + if (ret != 0) { + pam_syslog(pamh, LOG_ERR, "mount failed: %d", ret); + zfs_close(ds); + return (-1); + } + + zfs_close(ds); + return (0); +} + +static int +unmount_unload(pam_handle_t *pamh, const char *ds_name, + zfs_key_config_t *target) +{ + zfs_handle_t *ds = zfs_open(g_zfs, ds_name, ZFS_TYPE_FILESYSTEM); + if (ds == NULL) { + pam_syslog(pamh, LOG_ERR, "dataset %s not found", ds_name); + return (-1); + } + + mount_umount_dataset_data_t data; + data.pamh = pamh; + data.target = target; + + int ret = umount_dataset(ds, &data); + if (ret) { + pam_syslog(pamh, LOG_ERR, + "unmount_dataset failed with: %d", ret); + zfs_close(ds); + return (-1); + } + + ret = lzc_unload_key(ds_name); + if (ret) { + pam_syslog(pamh, LOG_ERR, "unload_key failed with: %d", ret); + zfs_close(ds); + return (-1); + } + zfs_close(ds); + return (0); +} + static void zfs_key_config_free(zfs_key_config_t *config) { @@ -548,7 +703,7 @@ find_dsname_by_prop_value(zfs_handle_t *zhp, void *data) } static char * -zfs_key_config_get_dataset(zfs_key_config_t *config) +zfs_key_config_get_dataset(pam_handle_t *pamh, zfs_key_config_t *config) { if (config->homedir != NULL && config->homes_prefix != NULL) { @@ -559,7 +714,7 @@ zfs_key_config_get_dataset(zfs_key_config_t *config) zfs_handle_t *zhp = zfs_open(g_zfs, config->homes_prefix, ZFS_TYPE_FILESYSTEM); if (zhp == NULL) { - pam_syslog(NULL, LOG_ERR, + pam_syslog(pamh, LOG_ERR, "dataset %s not found", config->homes_prefix); return (NULL); @@ -697,13 +852,13 @@ pam_sm_authenticate(pam_handle_t *pamh, int flags, zfs_key_config_free(&config); return (PAM_SERVICE_ERR); } - char *dataset = zfs_key_config_get_dataset(&config); + char *dataset = zfs_key_config_get_dataset(pamh, &config); if (!dataset) { pam_zfs_free(); zfs_key_config_free(&config); return (PAM_SERVICE_ERR); } - if (decrypt_mount(pamh, dataset, token->value, B_TRUE) == -1) { + if (decrypt_mount(pamh, &config, dataset, token->value, B_TRUE) == -1) { free(dataset); pam_zfs_free(); zfs_key_config_free(&config); @@ -749,7 +904,7 @@ pam_sm_chauthtok(pam_handle_t *pamh, int flags, zfs_key_config_free(&config); return (PAM_SERVICE_ERR); } - char *dataset = zfs_key_config_get_dataset(&config); + char *dataset = zfs_key_config_get_dataset(pamh, &config); if (!dataset) { pam_zfs_free(); zfs_key_config_free(&config); @@ -763,7 +918,7 @@ pam_sm_chauthtok(pam_handle_t *pamh, int flags, zfs_key_config_free(&config); return (PAM_SERVICE_ERR); } - if (decrypt_mount(pamh, dataset, + if (decrypt_mount(pamh, &config, dataset, old_token->value, B_TRUE) == -1) { pam_syslog(pamh, LOG_ERR, "old token mismatch"); @@ -784,7 +939,7 @@ pam_sm_chauthtok(pam_handle_t *pamh, int flags, pw_clear(pamh, OLD_PASSWORD_VAR_NAME); return (PAM_SERVICE_ERR); } - char *dataset = zfs_key_config_get_dataset(&config); + char *dataset = zfs_key_config_get_dataset(pamh, &config); if (!dataset) { pam_zfs_free(); zfs_key_config_free(&config); @@ -793,7 +948,7 @@ pam_sm_chauthtok(pam_handle_t *pamh, int flags, return (PAM_SERVICE_ERR); } int was_loaded = is_key_loaded(pamh, dataset); - if (!was_loaded && decrypt_mount(pamh, dataset, + if (!was_loaded && decrypt_mount(pamh, &config, dataset, old_token->value, B_FALSE) == -1) { free(dataset); pam_zfs_free(); @@ -804,7 +959,7 @@ pam_sm_chauthtok(pam_handle_t *pamh, int flags, } int changed = change_key(pamh, dataset, token->value); if (!was_loaded) { - unmount_unload(pamh, dataset, config.force_unmount); + unmount_unload(pamh, dataset, &config); } free(dataset); pam_zfs_free(); @@ -856,13 +1011,14 @@ pam_sm_open_session(pam_handle_t *pamh, int flags, zfs_key_config_free(&config); return (PAM_SERVICE_ERR); } - char *dataset = zfs_key_config_get_dataset(&config); + char *dataset = zfs_key_config_get_dataset(pamh, &config); if (!dataset) { pam_zfs_free(); zfs_key_config_free(&config); return (PAM_SERVICE_ERR); } - if (decrypt_mount(pamh, dataset, token->value, B_FALSE) == -1) { + if (decrypt_mount(pamh, &config, dataset, + token->value, B_FALSE) == -1) { free(dataset); pam_zfs_free(); zfs_key_config_free(&config); @@ -910,13 +1066,13 @@ pam_sm_close_session(pam_handle_t *pamh, int flags, zfs_key_config_free(&config); return (PAM_SERVICE_ERR); } - char *dataset = zfs_key_config_get_dataset(&config); + char *dataset = zfs_key_config_get_dataset(pamh, &config); if (!dataset) { pam_zfs_free(); zfs_key_config_free(&config); return (PAM_SESSION_ERR); } - if (unmount_unload(pamh, dataset, config.force_unmount) == -1) { + if (unmount_unload(pamh, dataset, &config) == -1) { free(dataset); pam_zfs_free(); zfs_key_config_free(&config); diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h index df7be6fc13f6..1479242de53b 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h @@ -94,6 +94,9 @@ #define param_set_max_auto_ashift_args(var) \ CTLTYPE_UINT, NULL, 0, param_set_max_auto_ashift, "IU" +#define param_set_raidz_impl_args(var) \ + CTLTYPE_STRING, NULL, 0, param_set_raidz_impl, "A" + #define spa_taskq_read_param_set_args(var) \ CTLTYPE_STRING, NULL, 0, spa_taskq_read_param, "A" diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h b/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h index 9e7afea2ab34..fcb4a464c9e4 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h @@ -63,6 +63,7 @@ typedef enum zfs_uio_seg { typedef struct { struct page **pages; /* Mapped pages */ long npages; /* Number of mapped pages */ + boolean_t pinned; /* Whether FOLL_PIN was used */ } zfs_uio_dio_t; typedef struct zfs_uio { @@ -199,4 +200,13 @@ zfs_uio_iov_iter_init(zfs_uio_t *uio, struct iov_iter *iter, offset_t offset, #define zfs_uio_iov_iter_type(iter) (iter)->type #endif +#if defined(HAVE_ITER_IS_UBUF) +#define zfs_user_backed_iov_iter(iter) \ + (iter_is_ubuf((iter)) || \ + (zfs_uio_iov_iter_type((iter)) == ITER_IOVEC)) +#else +#define zfs_user_backed_iov_iter(iter) \ + (zfs_uio_iov_iter_type((iter)) == ITER_IOVEC) +#endif + #endif /* SPL_UIO_H */ diff --git a/sys/contrib/openzfs/include/sys/dnode.h b/sys/contrib/openzfs/include/sys/dnode.h index 5d0f0fb26d02..b6d3e2c918c5 100644 --- a/sys/contrib/openzfs/include/sys/dnode.h +++ b/sys/contrib/openzfs/include/sys/dnode.h @@ -335,7 +335,7 @@ struct dnode { /* protected by dn_mtx: */ kmutex_t dn_mtx; list_t dn_dirty_records[TXG_SIZE]; - struct range_tree *dn_free_ranges[TXG_SIZE]; + struct zfs_range_tree *dn_free_ranges[TXG_SIZE]; uint64_t dn_allocated_txg; uint64_t dn_free_txg; uint64_t dn_assigned_txg; diff --git a/sys/contrib/openzfs/include/sys/metaslab.h b/sys/contrib/openzfs/include/sys/metaslab.h index 815b5d0c9cf1..0171cd0fe0f8 100644 --- a/sys/contrib/openzfs/include/sys/metaslab.h +++ b/sys/contrib/openzfs/include/sys/metaslab.h @@ -139,7 +139,7 @@ void metaslab_set_selected_txg(metaslab_t *, uint64_t); extern int metaslab_debug_load; -range_seg_type_t metaslab_calculate_range_tree_type(vdev_t *vdev, +zfs_range_seg_type_t metaslab_calculate_range_tree_type(vdev_t *vdev, metaslab_t *msp, uint64_t *start, uint64_t *shift); #ifdef __cplusplus diff --git a/sys/contrib/openzfs/include/sys/metaslab_impl.h b/sys/contrib/openzfs/include/sys/metaslab_impl.h index 4f434291ddbf..9c35f27ff0b4 100644 --- a/sys/contrib/openzfs/include/sys/metaslab_impl.h +++ b/sys/contrib/openzfs/include/sys/metaslab_impl.h @@ -200,7 +200,7 @@ struct metaslab_class { uint64_t mc_deferred; /* total deferred frees */ uint64_t mc_space; /* total space (alloc + free) */ uint64_t mc_dspace; /* total deflated space */ - uint64_t mc_histogram[RANGE_TREE_HISTOGRAM_SIZE]; + uint64_t mc_histogram[ZFS_RANGE_TREE_HISTOGRAM_SIZE]; /* * List of all loaded metaslabs in the class, sorted in order of most @@ -290,7 +290,7 @@ struct metaslab_group { uint64_t mg_allocations; uint64_t mg_failed_allocations; uint64_t mg_fragmentation; - uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE]; + uint64_t mg_histogram[ZFS_RANGE_TREE_HISTOGRAM_SIZE]; int mg_ms_disabled; boolean_t mg_disabled_updating; @@ -398,8 +398,8 @@ struct metaslab { uint64_t ms_size; uint64_t ms_fragmentation; - range_tree_t *ms_allocating[TXG_SIZE]; - range_tree_t *ms_allocatable; + zfs_range_tree_t *ms_allocating[TXG_SIZE]; + zfs_range_tree_t *ms_allocatable; uint64_t ms_allocated_this_txg; uint64_t ms_allocating_total; @@ -408,10 +408,12 @@ struct metaslab { * ms_free*tree only have entries while syncing, and are empty * between syncs. */ - range_tree_t *ms_freeing; /* to free this syncing txg */ - range_tree_t *ms_freed; /* already freed this syncing txg */ - range_tree_t *ms_defer[TXG_DEFER_SIZE]; - range_tree_t *ms_checkpointing; /* to add to the checkpoint */ + zfs_range_tree_t *ms_freeing; /* to free this syncing txg */ + /* already freed this syncing txg */ + zfs_range_tree_t *ms_freed; + zfs_range_tree_t *ms_defer[TXG_DEFER_SIZE]; + /* to add to the checkpoint */ + zfs_range_tree_t *ms_checkpointing; /* * The ms_trim tree is the set of allocatable segments which are @@ -421,7 +423,7 @@ struct metaslab { * is unloaded. Its purpose is to aggregate freed ranges to * facilitate efficient trimming. */ - range_tree_t *ms_trim; + zfs_range_tree_t *ms_trim; boolean_t ms_condensing; /* condensing? */ boolean_t ms_condense_wanted; @@ -542,8 +544,8 @@ struct metaslab { * Allocs and frees that are committed to the vdev log spacemap but * not yet to this metaslab's spacemap. */ - range_tree_t *ms_unflushed_allocs; - range_tree_t *ms_unflushed_frees; + zfs_range_tree_t *ms_unflushed_allocs; + zfs_range_tree_t *ms_unflushed_frees; /* * We have flushed entries up to but not including this TXG. In diff --git a/sys/contrib/openzfs/include/sys/range_tree.h b/sys/contrib/openzfs/include/sys/range_tree.h index d6f60e795288..23eea3210c98 100644 --- a/sys/contrib/openzfs/include/sys/range_tree.h +++ b/sys/contrib/openzfs/include/sys/range_tree.h @@ -37,25 +37,25 @@ extern "C" { #endif -#define RANGE_TREE_HISTOGRAM_SIZE 64 +#define ZFS_RANGE_TREE_HISTOGRAM_SIZE 64 -typedef struct range_tree_ops range_tree_ops_t; +typedef struct zfs_range_tree_ops zfs_range_tree_ops_t; -typedef enum range_seg_type { - RANGE_SEG32, - RANGE_SEG64, - RANGE_SEG_GAP, - RANGE_SEG_NUM_TYPES, -} range_seg_type_t; +typedef enum zfs_range_seg_type { + ZFS_RANGE_SEG32, + ZFS_RANGE_SEG64, + ZFS_RANGE_SEG_GAP, + ZFS_RANGE_SEG_NUM_TYPES, +} zfs_range_seg_type_t; /* * Note: the range_tree may not be accessed concurrently; consumers * must provide external locking if required. */ -typedef struct range_tree { +typedef struct zfs_range_tree { zfs_btree_t rt_root; /* offset-ordered segment b-tree */ uint64_t rt_space; /* sum of all segments in the map */ - range_seg_type_t rt_type; /* type of range_seg_t in use */ + zfs_range_seg_type_t rt_type; /* type of zfs_range_seg_t in use */ /* * All data that is stored in the range tree must have a start higher * than or equal to rt_start, and all sizes and offsets must be @@ -63,7 +63,7 @@ typedef struct range_tree { */ uint8_t rt_shift; uint64_t rt_start; - const range_tree_ops_t *rt_ops; + const zfs_range_tree_ops_t *rt_ops; void *rt_arg; uint64_t rt_gap; /* allowable inter-segment gap */ @@ -72,61 +72,61 @@ typedef struct range_tree { * rt_histogram[i], contains the number of ranges whose size is: * 2^i <= size of range in bytes < 2^(i+1) */ - uint64_t rt_histogram[RANGE_TREE_HISTOGRAM_SIZE]; -} range_tree_t; + uint64_t rt_histogram[ZFS_RANGE_TREE_HISTOGRAM_SIZE]; +} zfs_range_tree_t; -typedef struct range_seg32 { +typedef struct zfs_range_seg32 { uint32_t rs_start; /* starting offset of this segment */ uint32_t rs_end; /* ending offset (non-inclusive) */ -} range_seg32_t; +} zfs_range_seg32_t; /* * Extremely large metaslabs, vdev-wide trees, and dnode-wide trees may * require 64-bit integers for ranges. */ -typedef struct range_seg64 { +typedef struct zfs_range_seg64 { uint64_t rs_start; /* starting offset of this segment */ uint64_t rs_end; /* ending offset (non-inclusive) */ -} range_seg64_t; +} zfs_range_seg64_t; -typedef struct range_seg_gap { +typedef struct zfs_range_seg_gap { uint64_t rs_start; /* starting offset of this segment */ uint64_t rs_end; /* ending offset (non-inclusive) */ uint64_t rs_fill; /* actual fill if gap mode is on */ -} range_seg_gap_t; +} zfs_range_seg_gap_t; /* * This type needs to be the largest of the range segs, since it will be stack * allocated and then cast the actual type to do tree operations. */ -typedef range_seg_gap_t range_seg_max_t; +typedef zfs_range_seg_gap_t zfs_range_seg_max_t; /* * This is just for clarity of code purposes, so we can make it clear that a * pointer is to a range seg of some type; when we need to do the actual math, * we'll figure out the real type. */ -typedef void range_seg_t; - -struct range_tree_ops { - void (*rtop_create)(range_tree_t *rt, void *arg); - void (*rtop_destroy)(range_tree_t *rt, void *arg); - void (*rtop_add)(range_tree_t *rt, void *rs, void *arg); - void (*rtop_remove)(range_tree_t *rt, void *rs, void *arg); - void (*rtop_vacate)(range_tree_t *rt, void *arg); +typedef void zfs_range_seg_t; + +struct zfs_range_tree_ops { + void (*rtop_create)(zfs_range_tree_t *rt, void *arg); + void (*rtop_destroy)(zfs_range_tree_t *rt, void *arg); + void (*rtop_add)(zfs_range_tree_t *rt, void *rs, void *arg); + void (*rtop_remove)(zfs_range_tree_t *rt, void *rs, void *arg); + void (*rtop_vacate)(zfs_range_tree_t *rt, void *arg); }; static inline uint64_t -rs_get_start_raw(const range_seg_t *rs, const range_tree_t *rt) +zfs_rs_get_start_raw(const zfs_range_seg_t *rs, const zfs_range_tree_t *rt) { - ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); + ASSERT3U(rt->rt_type, <=, ZFS_RANGE_SEG_NUM_TYPES); switch (rt->rt_type) { - case RANGE_SEG32: - return (((const range_seg32_t *)rs)->rs_start); - case RANGE_SEG64: - return (((const range_seg64_t *)rs)->rs_start); - case RANGE_SEG_GAP: - return (((const range_seg_gap_t *)rs)->rs_start); + case ZFS_RANGE_SEG32: + return (((const zfs_range_seg32_t *)rs)->rs_start); + case ZFS_RANGE_SEG64: + return (((const zfs_range_seg64_t *)rs)->rs_start); + case ZFS_RANGE_SEG_GAP: + return (((const zfs_range_seg_gap_t *)rs)->rs_start); default: VERIFY(0); return (0); @@ -134,16 +134,16 @@ rs_get_start_raw(const range_seg_t *rs, const range_tree_t *rt) } static inline uint64_t -rs_get_end_raw(const range_seg_t *rs, const range_tree_t *rt) +zfs_rs_get_end_raw(const zfs_range_seg_t *rs, const zfs_range_tree_t *rt) { - ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); + ASSERT3U(rt->rt_type, <=, ZFS_RANGE_SEG_NUM_TYPES); switch (rt->rt_type) { - case RANGE_SEG32: - return (((const range_seg32_t *)rs)->rs_end); - case RANGE_SEG64: - return (((const range_seg64_t *)rs)->rs_end); - case RANGE_SEG_GAP: - return (((const range_seg_gap_t *)rs)->rs_end); + case ZFS_RANGE_SEG32: + return (((const zfs_range_seg32_t *)rs)->rs_end); + case ZFS_RANGE_SEG64: + return (((const zfs_range_seg64_t *)rs)->rs_end); + case ZFS_RANGE_SEG_GAP: + return (((const zfs_range_seg_gap_t *)rs)->rs_end); default: VERIFY(0); return (0); @@ -151,20 +151,20 @@ rs_get_end_raw(const range_seg_t *rs, const range_tree_t *rt) } static inline uint64_t -rs_get_fill_raw(const range_seg_t *rs, const range_tree_t *rt) +zfs_rs_get_fill_raw(const zfs_range_seg_t *rs, const zfs_range_tree_t *rt) { - ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); + ASSERT3U(rt->rt_type, <=, ZFS_RANGE_SEG_NUM_TYPES); switch (rt->rt_type) { - case RANGE_SEG32: { - const range_seg32_t *r32 = (const range_seg32_t *)rs; + case ZFS_RANGE_SEG32: { + const zfs_range_seg32_t *r32 = (const zfs_range_seg32_t *)rs; return (r32->rs_end - r32->rs_start); } - case RANGE_SEG64: { - const range_seg64_t *r64 = (const range_seg64_t *)rs; + case ZFS_RANGE_SEG64: { + const zfs_range_seg64_t *r64 = (const zfs_range_seg64_t *)rs; return (r64->rs_end - r64->rs_start); } - case RANGE_SEG_GAP: - return (((const range_seg_gap_t *)rs)->rs_fill); + case ZFS_RANGE_SEG_GAP: + return (((const zfs_range_seg_gap_t *)rs)->rs_fill); default: VERIFY(0); return (0); @@ -173,37 +173,37 @@ rs_get_fill_raw(const range_seg_t *rs, const range_tree_t *rt) } static inline uint64_t -rs_get_start(const range_seg_t *rs, const range_tree_t *rt) +zfs_rs_get_start(const zfs_range_seg_t *rs, const zfs_range_tree_t *rt) { - return ((rs_get_start_raw(rs, rt) << rt->rt_shift) + rt->rt_start); + return ((zfs_rs_get_start_raw(rs, rt) << rt->rt_shift) + rt->rt_start); } static inline uint64_t -rs_get_end(const range_seg_t *rs, const range_tree_t *rt) +zfs_rs_get_end(const zfs_range_seg_t *rs, const zfs_range_tree_t *rt) { - return ((rs_get_end_raw(rs, rt) << rt->rt_shift) + rt->rt_start); + return ((zfs_rs_get_end_raw(rs, rt) << rt->rt_shift) + rt->rt_start); } static inline uint64_t -rs_get_fill(const range_seg_t *rs, const range_tree_t *rt) +zfs_rs_get_fill(const zfs_range_seg_t *rs, const zfs_range_tree_t *rt) { - return (rs_get_fill_raw(rs, rt) << rt->rt_shift); + return (zfs_rs_get_fill_raw(rs, rt) << rt->rt_shift); } static inline void -rs_set_start_raw(range_seg_t *rs, range_tree_t *rt, uint64_t start) +zfs_rs_set_start_raw(zfs_range_seg_t *rs, zfs_range_tree_t *rt, uint64_t start) { - ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); + ASSERT3U(rt->rt_type, <=, ZFS_RANGE_SEG_NUM_TYPES); switch (rt->rt_type) { - case RANGE_SEG32: + case ZFS_RANGE_SEG32: ASSERT3U(start, <=, UINT32_MAX); - ((range_seg32_t *)rs)->rs_start = (uint32_t)start; + ((zfs_range_seg32_t *)rs)->rs_start = (uint32_t)start; break; - case RANGE_SEG64: - ((range_seg64_t *)rs)->rs_start = start; + case ZFS_RANGE_SEG64: + ((zfs_range_seg64_t *)rs)->rs_start = start; break; - case RANGE_SEG_GAP: - ((range_seg_gap_t *)rs)->rs_start = start; + case ZFS_RANGE_SEG_GAP: + ((zfs_range_seg_gap_t *)rs)->rs_start = start; break; default: VERIFY(0); @@ -211,19 +211,19 @@ rs_set_start_raw(range_seg_t *rs, range_tree_t *rt, uint64_t start) } static inline void -rs_set_end_raw(range_seg_t *rs, range_tree_t *rt, uint64_t end) +zfs_rs_set_end_raw(zfs_range_seg_t *rs, zfs_range_tree_t *rt, uint64_t end) { - ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); + ASSERT3U(rt->rt_type, <=, ZFS_RANGE_SEG_NUM_TYPES); switch (rt->rt_type) { - case RANGE_SEG32: + case ZFS_RANGE_SEG32: ASSERT3U(end, <=, UINT32_MAX); - ((range_seg32_t *)rs)->rs_end = (uint32_t)end; + ((zfs_range_seg32_t *)rs)->rs_end = (uint32_t)end; break; - case RANGE_SEG64: - ((range_seg64_t *)rs)->rs_end = end; + case ZFS_RANGE_SEG64: + ((zfs_range_seg64_t *)rs)->rs_end = end; break; - case RANGE_SEG_GAP: - ((range_seg_gap_t *)rs)->rs_end = end; + case ZFS_RANGE_SEG_GAP: + ((zfs_range_seg_gap_t *)rs)->rs_end = end; break; default: VERIFY(0); @@ -231,18 +231,19 @@ rs_set_end_raw(range_seg_t *rs, range_tree_t *rt, uint64_t end) } static inline void -rs_set_fill_raw(range_seg_t *rs, range_tree_t *rt, uint64_t fill) +zfs_zfs_rs_set_fill_raw(zfs_range_seg_t *rs, zfs_range_tree_t *rt, + uint64_t fill) { - ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); + ASSERT3U(rt->rt_type, <=, ZFS_RANGE_SEG_NUM_TYPES); switch (rt->rt_type) { - case RANGE_SEG32: + case ZFS_RANGE_SEG32: /* fall through */ - case RANGE_SEG64: - ASSERT3U(fill, ==, rs_get_end_raw(rs, rt) - rs_get_start_raw(rs, - rt)); + case ZFS_RANGE_SEG64: + ASSERT3U(fill, ==, zfs_rs_get_end_raw(rs, rt) - + zfs_rs_get_start_raw(rs, rt)); break; - case RANGE_SEG_GAP: - ((range_seg_gap_t *)rs)->rs_fill = fill; + case ZFS_RANGE_SEG_GAP: + ((zfs_range_seg_gap_t *)rs)->rs_fill = fill; break; default: VERIFY(0); @@ -250,67 +251,73 @@ rs_set_fill_raw(range_seg_t *rs, range_tree_t *rt, uint64_t fill) } static inline void -rs_set_start(range_seg_t *rs, range_tree_t *rt, uint64_t start) +zfs_rs_set_start(zfs_range_seg_t *rs, zfs_range_tree_t *rt, uint64_t start) { ASSERT3U(start, >=, rt->rt_start); ASSERT(IS_P2ALIGNED(start, 1ULL << rt->rt_shift)); - rs_set_start_raw(rs, rt, (start - rt->rt_start) >> rt->rt_shift); + zfs_rs_set_start_raw(rs, rt, (start - rt->rt_start) >> rt->rt_shift); } static inline void -rs_set_end(range_seg_t *rs, range_tree_t *rt, uint64_t end) +zfs_rs_set_end(zfs_range_seg_t *rs, zfs_range_tree_t *rt, uint64_t end) { ASSERT3U(end, >=, rt->rt_start); ASSERT(IS_P2ALIGNED(end, 1ULL << rt->rt_shift)); - rs_set_end_raw(rs, rt, (end - rt->rt_start) >> rt->rt_shift); + zfs_rs_set_end_raw(rs, rt, (end - rt->rt_start) >> rt->rt_shift); } static inline void -rs_set_fill(range_seg_t *rs, range_tree_t *rt, uint64_t fill) +zfs_rs_set_fill(zfs_range_seg_t *rs, zfs_range_tree_t *rt, uint64_t fill) { ASSERT(IS_P2ALIGNED(fill, 1ULL << rt->rt_shift)); - rs_set_fill_raw(rs, rt, fill >> rt->rt_shift); + zfs_zfs_rs_set_fill_raw(rs, rt, fill >> rt->rt_shift); } -typedef void range_tree_func_t(void *arg, uint64_t start, uint64_t size); +typedef void zfs_range_tree_func_t(void *arg, uint64_t start, uint64_t size); -range_tree_t *range_tree_create_gap(const range_tree_ops_t *ops, - range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, +zfs_range_tree_t *zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops, + zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, uint64_t gap); -range_tree_t *range_tree_create(const range_tree_ops_t *ops, - range_seg_type_t type, void *arg, uint64_t start, uint64_t shift); -void range_tree_destroy(range_tree_t *rt); -boolean_t range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size); -range_seg_t *range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size); -boolean_t range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size, - uint64_t *ostart, uint64_t *osize); -void range_tree_verify_not_present(range_tree_t *rt, +zfs_range_tree_t *zfs_range_tree_create(const zfs_range_tree_ops_t *ops, + zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift); +void zfs_range_tree_destroy(zfs_range_tree_t *rt); +boolean_t zfs_range_tree_contains(zfs_range_tree_t *rt, uint64_t start, + uint64_t size); +zfs_range_seg_t *zfs_range_tree_find(zfs_range_tree_t *rt, uint64_t start, + uint64_t size); +boolean_t zfs_range_tree_find_in(zfs_range_tree_t *rt, uint64_t start, + uint64_t size, uint64_t *ostart, uint64_t *osize); +void zfs_range_tree_verify_not_present(zfs_range_tree_t *rt, uint64_t start, uint64_t size); -void range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs, +void zfs_range_tree_resize_segment(zfs_range_tree_t *rt, zfs_range_seg_t *rs, uint64_t newstart, uint64_t newsize); -uint64_t range_tree_space(range_tree_t *rt); -uint64_t range_tree_numsegs(range_tree_t *rt); -boolean_t range_tree_is_empty(range_tree_t *rt); -void range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst); -void range_tree_stat_verify(range_tree_t *rt); -uint64_t range_tree_min(range_tree_t *rt); -uint64_t range_tree_max(range_tree_t *rt); -uint64_t range_tree_span(range_tree_t *rt); - -void range_tree_add(void *arg, uint64_t start, uint64_t size); -void range_tree_remove(void *arg, uint64_t start, uint64_t size); -void range_tree_remove_fill(range_tree_t *rt, uint64_t start, uint64_t size); -void range_tree_adjust_fill(range_tree_t *rt, range_seg_t *rs, int64_t delta); -void range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size); - -void range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg); -void range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg); -range_seg_t *range_tree_first(range_tree_t *rt); - -void range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, - range_tree_t *removefrom, range_tree_t *addto); -void range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom, - range_tree_t *addto); +uint64_t zfs_range_tree_space(zfs_range_tree_t *rt); +uint64_t zfs_range_tree_numsegs(zfs_range_tree_t *rt); +boolean_t zfs_range_tree_is_empty(zfs_range_tree_t *rt); +void zfs_range_tree_swap(zfs_range_tree_t **rtsrc, zfs_range_tree_t **rtdst); +void zfs_range_tree_stat_verify(zfs_range_tree_t *rt); +uint64_t zfs_range_tree_min(zfs_range_tree_t *rt); +uint64_t zfs_range_tree_max(zfs_range_tree_t *rt); +uint64_t zfs_range_tree_span(zfs_range_tree_t *rt); + +void zfs_range_tree_add(void *arg, uint64_t start, uint64_t size); +void zfs_range_tree_remove(void *arg, uint64_t start, uint64_t size); +void zfs_range_tree_remove_fill(zfs_range_tree_t *rt, uint64_t start, + uint64_t size); +void zfs_range_tree_adjust_fill(zfs_range_tree_t *rt, zfs_range_seg_t *rs, + int64_t delta); +void zfs_range_tree_clear(zfs_range_tree_t *rt, uint64_t start, uint64_t size); + +void zfs_range_tree_vacate(zfs_range_tree_t *rt, zfs_range_tree_func_t *func, + void *arg); +void zfs_range_tree_walk(zfs_range_tree_t *rt, zfs_range_tree_func_t *func, + void *arg); +zfs_range_seg_t *zfs_range_tree_first(zfs_range_tree_t *rt); + +void zfs_range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, + zfs_range_tree_t *removefrom, zfs_range_tree_t *addto); +void zfs_range_tree_remove_xor_add(zfs_range_tree_t *rt, + zfs_range_tree_t *removefrom, zfs_range_tree_t *addto); #ifdef __cplusplus } diff --git a/sys/contrib/openzfs/include/sys/space_map.h b/sys/contrib/openzfs/include/sys/space_map.h index 14c5beccee55..2861b25e41ee 100644 --- a/sys/contrib/openzfs/include/sys/space_map.h +++ b/sys/contrib/openzfs/include/sys/space_map.h @@ -207,28 +207,28 @@ boolean_t sm_entry_is_double_word(uint64_t e); typedef int (*sm_cb_t)(space_map_entry_t *sme, void *arg); -int space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype); -int space_map_load_length(space_map_t *sm, range_tree_t *rt, maptype_t maptype, - uint64_t length); +int space_map_load(space_map_t *sm, zfs_range_tree_t *rt, maptype_t maptype); +int space_map_load_length(space_map_t *sm, zfs_range_tree_t *rt, + maptype_t maptype, uint64_t length); int space_map_iterate(space_map_t *sm, uint64_t length, sm_cb_t callback, void *arg); int space_map_incremental_destroy(space_map_t *sm, sm_cb_t callback, void *arg, dmu_tx_t *tx); -boolean_t space_map_histogram_verify(space_map_t *sm, range_tree_t *rt); +boolean_t space_map_histogram_verify(space_map_t *sm, zfs_range_tree_t *rt); void space_map_histogram_clear(space_map_t *sm); -void space_map_histogram_add(space_map_t *sm, range_tree_t *rt, +void space_map_histogram_add(space_map_t *sm, zfs_range_tree_t *rt, dmu_tx_t *tx); uint64_t space_map_object(space_map_t *sm); int64_t space_map_allocated(space_map_t *sm); uint64_t space_map_length(space_map_t *sm); -uint64_t space_map_entries(space_map_t *sm, range_tree_t *rt); +uint64_t space_map_entries(space_map_t *sm, zfs_range_tree_t *rt); uint64_t space_map_nblocks(space_map_t *sm); -void space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype, +void space_map_write(space_map_t *sm, zfs_range_tree_t *rt, maptype_t maptype, uint64_t vdev_id, dmu_tx_t *tx); -uint64_t space_map_estimate_optimal_size(space_map_t *sm, range_tree_t *rt, +uint64_t space_map_estimate_optimal_size(space_map_t *sm, zfs_range_tree_t *rt, uint64_t vdev_id); void space_map_truncate(space_map_t *sm, int blocksize, dmu_tx_t *tx); uint64_t space_map_alloc(objset_t *os, int blocksize, dmu_tx_t *tx); diff --git a/sys/contrib/openzfs/include/sys/space_reftree.h b/sys/contrib/openzfs/include/sys/space_reftree.h index b7a846aec624..e9a44ecf46b3 100644 --- a/sys/contrib/openzfs/include/sys/space_reftree.h +++ b/sys/contrib/openzfs/include/sys/space_reftree.h @@ -46,8 +46,8 @@ void space_reftree_create(avl_tree_t *t); void space_reftree_destroy(avl_tree_t *t); void space_reftree_add_seg(avl_tree_t *t, uint64_t start, uint64_t end, int64_t refcnt); -void space_reftree_add_map(avl_tree_t *t, range_tree_t *rt, int64_t refcnt); -void space_reftree_generate_map(avl_tree_t *t, range_tree_t *rt, +void space_reftree_add_map(avl_tree_t *t, zfs_range_tree_t *rt, int64_t refcnt); +void space_reftree_generate_map(avl_tree_t *t, zfs_range_tree_t *rt, int64_t minref); #ifdef __cplusplus diff --git a/sys/contrib/openzfs/include/sys/vdev.h b/sys/contrib/openzfs/include/sys/vdev.h index 38f62b07dc59..6ab7ac40bb07 100644 --- a/sys/contrib/openzfs/include/sys/vdev.h +++ b/sys/contrib/openzfs/include/sys/vdev.h @@ -106,12 +106,12 @@ extern void vdev_expand(vdev_t *vd, uint64_t txg); extern void vdev_split(vdev_t *vd); extern void vdev_deadman(vdev_t *vd, const char *tag); -typedef void vdev_xlate_func_t(void *arg, range_seg64_t *physical_rs); +typedef void vdev_xlate_func_t(void *arg, zfs_range_seg64_t *physical_rs); -extern boolean_t vdev_xlate_is_empty(range_seg64_t *rs); -extern void vdev_xlate(vdev_t *vd, const range_seg64_t *logical_rs, - range_seg64_t *physical_rs, range_seg64_t *remain_rs); -extern void vdev_xlate_walk(vdev_t *vd, const range_seg64_t *logical_rs, +extern boolean_t vdev_xlate_is_empty(zfs_range_seg64_t *rs); +extern void vdev_xlate(vdev_t *vd, const zfs_range_seg64_t *logical_rs, + zfs_range_seg64_t *physical_rs, zfs_range_seg64_t *remain_rs); +extern void vdev_xlate_walk(vdev_t *vd, const zfs_range_seg64_t *logical_rs, vdev_xlate_func_t *func, void *arg); extern void vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx); diff --git a/sys/contrib/openzfs/include/sys/vdev_impl.h b/sys/contrib/openzfs/include/sys/vdev_impl.h index abd66b8abc96..315e2fc88410 100644 --- a/sys/contrib/openzfs/include/sys/vdev_impl.h +++ b/sys/contrib/openzfs/include/sys/vdev_impl.h @@ -91,8 +91,8 @@ typedef void vdev_remap_func_t(vdev_t *vd, uint64_t offset, uint64_t size, * Given a target vdev, translates the logical range "in" to the physical * range "res" */ -typedef void vdev_xlation_func_t(vdev_t *cvd, const range_seg64_t *logical, - range_seg64_t *physical, range_seg64_t *remain); +typedef void vdev_xlation_func_t(vdev_t *cvd, const zfs_range_seg64_t *logical, + zfs_range_seg64_t *physical, zfs_range_seg64_t *remain); typedef uint64_t vdev_rebuild_asize_func_t(vdev_t *vd, uint64_t start, uint64_t size, uint64_t max_segment); typedef void vdev_metaslab_init_func_t(vdev_t *vd, uint64_t *startp, @@ -299,7 +299,8 @@ struct vdev { kcondvar_t vdev_initialize_cv; uint64_t vdev_initialize_offset[TXG_SIZE]; uint64_t vdev_initialize_last_offset; - range_tree_t *vdev_initialize_tree; /* valid while initializing */ + /* valid while initializing */ + zfs_range_tree_t *vdev_initialize_tree; uint64_t vdev_initialize_bytes_est; uint64_t vdev_initialize_bytes_done; uint64_t vdev_initialize_action_time; /* start and end time */ @@ -375,7 +376,7 @@ struct vdev { * from multiple zio threads. */ kmutex_t vdev_obsolete_lock; - range_tree_t *vdev_obsolete_segments; + zfs_range_tree_t *vdev_obsolete_segments; space_map_t *vdev_obsolete_sm; /* @@ -388,7 +389,7 @@ struct vdev { /* * Leaf vdev state. */ - range_tree_t *vdev_dtl[DTL_TYPES]; /* dirty time logs */ + zfs_range_tree_t *vdev_dtl[DTL_TYPES]; /* dirty time logs */ space_map_t *vdev_dtl_sm; /* dirty time log space map */ txg_node_t vdev_dtl_node; /* per-txg dirty DTL linkage */ uint64_t vdev_dtl_object; /* DTL object */ @@ -615,8 +616,8 @@ extern vdev_ops_t vdev_indirect_ops; /* * Common size functions */ -extern void vdev_default_xlate(vdev_t *vd, const range_seg64_t *logical_rs, - range_seg64_t *physical_rs, range_seg64_t *remain_rs); +extern void vdev_default_xlate(vdev_t *vd, const zfs_range_seg64_t *logical_rs, + zfs_range_seg64_t *physical_rs, zfs_range_seg64_t *remain_rs); extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize, uint64_t txg); extern uint64_t vdev_default_min_asize(vdev_t *vd); extern uint64_t vdev_get_min_asize(vdev_t *vd); @@ -645,6 +646,10 @@ extern int vdev_obsolete_counts_are_precise(vdev_t *vd, boolean_t *are_precise); int vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj); void vdev_metaslab_group_create(vdev_t *vd); uint64_t vdev_best_ashift(uint64_t logical, uint64_t a, uint64_t b); +#if defined(__linux__) +int param_get_raidz_impl(char *buf, zfs_kernel_param_t *kp); +#endif +int param_set_raidz_impl(ZFS_MODULE_PARAM_ARGS); /* * Vdev ashift optimization tunables diff --git a/sys/contrib/openzfs/include/sys/vdev_raidz.h b/sys/contrib/openzfs/include/sys/vdev_raidz.h index 64f484e9aa13..ed042aedbdbc 100644 --- a/sys/contrib/openzfs/include/sys/vdev_raidz.h +++ b/sys/contrib/openzfs/include/sys/vdev_raidz.h @@ -66,6 +66,8 @@ extern const zio_vsd_ops_t vdev_raidz_vsd_ops; /* * vdev_raidz_math interface */ +/* Required, but not used, by ZFS_MODULE_PARAM_CALL */ +extern uint32_t zfs_vdev_raidz_impl; void vdev_raidz_math_init(void); void vdev_raidz_math_fini(void); const struct raidz_impl_ops *vdev_raidz_math_get_ops(void); @@ -73,6 +75,7 @@ int vdev_raidz_math_generate(struct raidz_map *, struct raidz_row *); int vdev_raidz_math_reconstruct(struct raidz_map *, struct raidz_row *, const int *, const int *, const int); int vdev_raidz_impl_set(const char *); +int vdev_raidz_impl_get(char *buffer, size_t size); typedef struct vdev_raidz_expand { uint64_t vre_vdev_id; diff --git a/sys/contrib/openzfs/include/sys/vdev_rebuild.h b/sys/contrib/openzfs/include/sys/vdev_rebuild.h index 55ec6c570316..b7664a822bb3 100644 --- a/sys/contrib/openzfs/include/sys/vdev_rebuild.h +++ b/sys/contrib/openzfs/include/sys/vdev_rebuild.h @@ -65,7 +65,8 @@ typedef struct vdev_rebuild_phys { typedef struct vdev_rebuild { vdev_t *vr_top_vdev; /* top-level vdev to rebuild */ metaslab_t *vr_scan_msp; /* scanning disabled metaslab */ - range_tree_t *vr_scan_tree; /* scan ranges (in metaslab) */ + /* scan ranges (in metaslab) */ + zfs_range_tree_t *vr_scan_tree; kmutex_t vr_io_lock; /* inflight IO lock */ kcondvar_t vr_io_cv; /* inflight IO cv */ diff --git a/sys/contrib/openzfs/include/sys/vdev_removal.h b/sys/contrib/openzfs/include/sys/vdev_removal.h index 70b743f4ec6b..8e6005a94260 100644 --- a/sys/contrib/openzfs/include/sys/vdev_removal.h +++ b/sys/contrib/openzfs/include/sys/vdev_removal.h @@ -35,7 +35,7 @@ typedef struct spa_vdev_removal { /* Thread performing a vdev removal. */ kthread_t *svr_thread; /* Segments left to copy from the current metaslab. */ - range_tree_t *svr_allocd_segs; + zfs_range_tree_t *svr_allocd_segs; kmutex_t svr_lock; kcondvar_t svr_cv; boolean_t svr_thread_exit; @@ -49,7 +49,7 @@ typedef struct spa_vdev_removal { * Ranges that were freed while a mapping was in flight. This is * a subset of the ranges covered by vdev_im_new_segments. */ - range_tree_t *svr_frees[TXG_SIZE]; + zfs_range_tree_t *svr_frees[TXG_SIZE]; /* * Number of bytes which we have finished our work for diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c index 231bbbd92dbf..06fa52b00e05 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c @@ -4673,6 +4673,7 @@ zfs_rename(zfs_handle_t *zhp, const char *target, renameflags_t flags) changelist_rename(cl, zfs_get_name(zhp), target); ret = changelist_postfix(cl); } + (void) strlcpy(zhp->zfs_name, target, sizeof (zhp->zfs_name)); } error: diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c index b9780720e5a3..97920ce6f21c 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c @@ -3376,66 +3376,78 @@ created_before(libzfs_handle_t *hdl, avl_tree_t *avl, */ static int recv_fix_encryption_hierarchy(libzfs_handle_t *hdl, const char *top_zfs, - nvlist_t *stream_nv) + nvlist_t *stream_nv, avl_tree_t *stream_avl) { int err; nvpair_t *fselem = NULL; - nvlist_t *stream_fss; + nvlist_t *local_nv; + avl_tree_t *local_avl; + boolean_t recursive; + + recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == + ENOENT); - stream_fss = fnvlist_lookup_nvlist(stream_nv, "fss"); + /* Using top_zfs, gather the nvlists for all local filesystems. */ + if ((err = gather_nvlist(hdl, top_zfs, NULL, NULL, + recursive, B_TRUE, B_FALSE, recursive, B_FALSE, B_FALSE, B_FALSE, + B_FALSE, B_TRUE, &local_nv, &local_avl)) != 0) + return (err); - while ((fselem = nvlist_next_nvpair(stream_fss, fselem)) != NULL) { + /* + * Go through the nvlists of the local filesystems and check for + * encryption roots. + */ + while ((fselem = nvlist_next_nvpair(local_nv, fselem)) != NULL) { zfs_handle_t *zhp = NULL; uint64_t crypt; - nvlist_t *snaps, *props, *stream_nvfs = NULL; - nvpair_t *snapel = NULL; + nvlist_t *stream_props, *snaps, *stream_nvfs = NULL, + *nvfs = NULL; boolean_t is_encroot, is_clone, stream_encroot; - char *cp; - const char *stream_keylocation = NULL; + const char *stream_keylocation = NULL, *fsname; char keylocation[MAXNAMELEN]; - char fsname[ZFS_MAX_DATASET_NAME_LEN]; - - keylocation[0] = '\0'; - stream_nvfs = fnvpair_value_nvlist(fselem); - snaps = fnvlist_lookup_nvlist(stream_nvfs, "snaps"); - props = fnvlist_lookup_nvlist(stream_nvfs, "props"); - stream_encroot = nvlist_exists(stream_nvfs, "is_encroot"); - - /* find a snapshot from the stream that exists locally */ - err = ENOENT; - while ((snapel = nvlist_next_nvpair(snaps, snapel)) != NULL) { - uint64_t guid; - - guid = fnvpair_value_uint64(snapel); - err = guid_to_name(hdl, top_zfs, guid, B_FALSE, - fsname); - if (err == 0) - break; - } - - if (err != 0) - continue; - - cp = strchr(fsname, '@'); - if (cp != NULL) - *cp = '\0'; + nvpair_t *snapelem; + nvfs = fnvpair_value_nvlist(fselem); + snaps = fnvlist_lookup_nvlist(nvfs, "snaps"); + fsname = fnvlist_lookup_string(nvfs, "name"); zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET); if (zhp == NULL) { err = ENOENT; goto error; } - crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); - is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0'; - (void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL); - /* we don't need to do anything for unencrypted datasets */ + crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); if (crypt == ZIO_CRYPT_OFF) { zfs_close(zhp); continue; } + is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0'; + (void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL); + keylocation[0] = '\0'; + + /* + * Go through the snapshots of the local filesystem and find + * the stream's filesystem. + */ + for (snapelem = nvlist_next_nvpair(snaps, NULL); + snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) { + uint64_t thisguid; + + thisguid = fnvpair_value_uint64(snapelem); + stream_nvfs = fsavl_find(stream_avl, thisguid, NULL); + + if (stream_nvfs != NULL) + break; + } + + if (stream_nvfs == NULL) + continue; + + stream_props = fnvlist_lookup_nvlist(stream_nvfs, "props"); + stream_encroot = nvlist_exists(stream_nvfs, "is_encroot"); + /* * If the dataset is flagged as an encryption root, was not * received as a clone and is not currently an encryption root, @@ -3451,7 +3463,7 @@ recv_fix_encryption_hierarchy(libzfs_handle_t *hdl, const char *top_zfs, } } - stream_keylocation = fnvlist_lookup_string(props, + stream_keylocation = fnvlist_lookup_string(stream_props, zfs_prop_to_name(ZFS_PROP_KEYLOCATION)); /* @@ -3518,14 +3530,14 @@ recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, boolean_t needagain, progress, recursive; const char *s1, *s2; + if (flags->dryrun) + return (0); + fromsnap = fnvlist_lookup_string(stream_nv, "fromsnap"); recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == ENOENT); - if (flags->dryrun) - return (0); - again: needagain = progress = B_FALSE; @@ -3999,9 +4011,9 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, stream_nv, stream_avl, NULL); } - if (raw && softerr == 0 && *top_zfs != NULL) { + if (raw && *top_zfs != NULL && !flags->dryrun) { softerr = recv_fix_encryption_hierarchy(hdl, *top_zfs, - stream_nv); + stream_nv, stream_avl); } out: diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4 index dd0b3d848fe9..9d83357fcc6d 100644 --- a/sys/contrib/openzfs/man/man4/zfs.4 +++ b/sys/contrib/openzfs/man/man4/zfs.4 @@ -1778,7 +1778,7 @@ Normally disabled because these datasets may be missing key data. .It Sy zfs_min_metaslabs_to_flush Ns = Ns Sy 1 Pq u64 Minimum number of metaslabs to flush per dirty TXG. . -.It Sy zfs_metaslab_fragmentation_threshold Ns = Ns Sy 70 Ns % Pq uint +.It Sy zfs_metaslab_fragmentation_threshold Ns = Ns Sy 77 Ns % Pq uint Allow metaslabs to keep their active state as long as their fragmentation percentage is no more than this value. An active metaslab that exceeds this threshold diff --git a/sys/contrib/openzfs/man/man8/zfs-destroy.8 b/sys/contrib/openzfs/man/man8/zfs-destroy.8 index 247c561322bf..97596b28444b 100644 --- a/sys/contrib/openzfs/man/man8/zfs-destroy.8 +++ b/sys/contrib/openzfs/man/man8/zfs-destroy.8 @@ -101,18 +101,25 @@ behavior for mounted file systems in use. .Ar filesystem Ns | Ns Ar volume Ns @ Ns Ar snap Ns .Oo % Ns Ar snap Ns Oo , Ns Ar snap Ns Oo % Ns Ar snap Oc Oc Oc Ns … .Xc -The given snapshots are destroyed immediately if and only if the +Attempts to destroy the given snapshot(s). +This will fail if any clones of the snapshot exist or if the snapshot is held. +In this case, by default, .Nm zfs Cm destroy -command without the +will have no effect and exit in error. +If the .Fl d -option would have destroyed it. -Such immediate destruction would occur, for example, if the snapshot had no -clones and the user-initiated reference count were zero. +option is applied, the command will instead mark the given snapshot for +automatic destruction as soon as it becomes eligible. +While marked for destruction, a snapshot remains visible, and the user may +create new clones from it and place new holds on it. .Pp -If a snapshot does not qualify for immediate destruction, it is marked for -deferred deletion. -In this state, it exists as a usable, visible snapshot until both of the -preconditions listed above are met, at which point it is destroyed. +The read-only snapshot properties +.Sy defer_destroy +and +.Sy userrefs +are used by +.Nm zfs Cm destroy +to determine eligibility and marked status. .Pp An inclusive range of snapshots may be specified by separating the first and last snapshots with a percent sign. @@ -137,8 +144,9 @@ If this flag is specified, the .Fl d flag will have no effect. .It Fl d -Destroy immediately. -If a snapshot cannot be destroyed now, mark it for deferred destruction. +Rather than returning error if the given snapshot is ineligible for immediate +destruction, mark it for deferred, automatic destruction once it becomes +eligible. .It Fl n Do a dry-run .Pq Qq No-op @@ -223,4 +231,5 @@ renames the remaining snapshots, and then creates a new snapshot, as follows: . .Sh SEE ALSO .Xr zfs-create 8 , -.Xr zfs-hold 8 +.Xr zfs-hold 8 , +.Xr zfsprops 8 diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in index fc14d5cb535e..5190afc506f9 100644 --- a/sys/contrib/openzfs/module/Kbuild.in +++ b/sys/contrib/openzfs/module/Kbuild.in @@ -447,6 +447,7 @@ ZFS_OBJS_OS := \ trace.o \ vdev_disk.o \ vdev_file.o \ + vdev_raidz.o \ vdev_label_os.o \ zfs_acl.o \ zfs_ctldir.o \ diff --git a/sys/contrib/openzfs/module/Makefile.in b/sys/contrib/openzfs/module/Makefile.in index f76e94afa410..a65cbfce1a90 100644 --- a/sys/contrib/openzfs/module/Makefile.in +++ b/sys/contrib/openzfs/module/Makefile.in @@ -93,7 +93,7 @@ modules_install-Linux: modules_uninstall-Linux-legacy $(if @KERNEL_ARCH@,ARCH=@KERNEL_ARCH@) \ KERNELRELEASE=@LINUX_VERSION@ @# Remove extraneous build products when packaging - if [ -n "$(DESTDIR)" ]; then \ + if [ -n "$(DESTDIR)" ] && [ "$(DONT_DELETE_MODULES_FILES)" != "1" ]; then \ find $(KMODDIR) -name 'modules.*' -delete; \ fi @# Debian ships tiny fake System.map files that are @@ -106,7 +106,7 @@ modules_install-Linux: modules_uninstall-Linux-legacy { [ -f "$$sysmap" ] && [ $$(wc -l < "$$sysmap") -ge 100 ]; } || \ sysmap=$(INSTALL_MOD_PATH)/usr/lib/debug/boot/System.map-@LINUX_VERSION@; \ if [ -f $$sysmap ]; then \ - depmod -ae -F $$sysmap @LINUX_VERSION@; \ + depmod -ae -F $$sysmap @LINUX_VERSION@ -b $(INSTALL_MOD_PATH)/; \ fi modules_install-FreeBSD: diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c index 7350b8a6d49f..bddb25a07204 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c @@ -680,6 +680,27 @@ param_set_deadman_failmode(SYSCTL_HANDLER_ARGS) } int +param_set_raidz_impl(SYSCTL_HANDLER_ARGS) +{ + const size_t bufsize = 128; + char *buf; + int rc; + + buf = malloc(bufsize, M_SOLARIS, M_WAITOK | M_ZERO); + if (req->newptr == NULL) + vdev_raidz_impl_get(buf, bufsize); + + rc = sysctl_handle_string(oidp, buf, bufsize, req); + if (rc || req->newptr == NULL) { + free(buf, M_SOLARIS); + return (rc); + } + rc = vdev_raidz_impl_set(buf); + free(buf, M_SOLARIS); + return (rc); +} + +int param_set_slop_shift(SYSCTL_HANDLER_ARGS) { int val; diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c index 33c7d0879741..da5513c50189 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c @@ -727,6 +727,7 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align, rc = percpu_counter_init(&skc->skc_linux_alloc, 0, GFP_KERNEL); if (rc != 0) { + kfree(skc->skc_name); kfree(skc); return (NULL); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_raidz.c new file mode 100644 index 000000000000..0b34ca52fb90 --- /dev/null +++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_raidz.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (C) 2025 ConnectWise */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/zio.h> +#include <sys/vdev_impl.h> +#include <sys/vdev_raidz.h> + +int +param_get_raidz_impl(char *buf, zfs_kernel_param_t *kp) +{ + return (vdev_raidz_impl_get(buf, PAGE_SIZE)); +} + +int +param_set_raidz_impl(const char *val, zfs_kernel_param_t *kp) +{ + int error; + + error = vdev_raidz_impl_set(val); + return (error); +} diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c index db85b626f12a..1a815c62b19a 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c @@ -404,7 +404,6 @@ zfs_uio_page_aligned(zfs_uio_t *uio) return (aligned); } - #if defined(HAVE_ZERO_PAGE_GPL_ONLY) || !defined(_LP64) #define ZFS_MARKEED_PAGE 0x0 #define IS_ZFS_MARKED_PAGE(_p) 0 @@ -441,7 +440,6 @@ zfs_unmark_page(struct page *page) } #endif /* HAVE_ZERO_PAGE_GPL_ONLY || !_LP64 */ -#if !defined(HAVE_PIN_USER_PAGES_UNLOCKED) static void zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio) { @@ -473,7 +471,6 @@ zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio) } } } -#endif void zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) @@ -482,21 +479,24 @@ zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) ASSERT(uio->uio_extflg & UIO_DIRECT); ASSERT3P(uio->uio_dio.pages, !=, NULL); + if (uio->uio_dio.pinned) { #if defined(HAVE_PIN_USER_PAGES_UNLOCKED) - unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages); -#else - for (long i = 0; i < uio->uio_dio.npages; i++) { - struct page *p = uio->uio_dio.pages[i]; + unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages); +#endif + } else { + for (long i = 0; i < uio->uio_dio.npages; i++) { + struct page *p = uio->uio_dio.pages[i]; - if (IS_ZFS_MARKED_PAGE(p)) { - zfs_unmark_page(p); - __free_page(p); - continue; - } + if (IS_ZFS_MARKED_PAGE(p)) { + zfs_unmark_page(p); + __free_page(p); + continue; + } - put_page(p); + put_page(p); + } } -#endif + vmem_free(uio->uio_dio.pages, uio->uio_dio.npages * sizeof (struct page *)); } @@ -523,6 +523,7 @@ zfs_uio_pin_user_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) if (len == 0) return (0); + uio->uio_dio.pinned = B_TRUE; #if defined(HAVE_ITER_IS_UBUF) if (iter_is_ubuf(uio->uio_iter)) { nr_pages = DIV_ROUND_UP(len, PAGE_SIZE); @@ -569,8 +570,8 @@ zfs_uio_pin_user_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) return (0); } +#endif -#else static int zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw) { @@ -581,9 +582,15 @@ zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw) unsigned maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE); while (wanted) { +#if defined(HAVE_IOV_ITER_GET_PAGES2) + cnt = iov_iter_get_pages2(uio->uio_iter, + &uio->uio_dio.pages[uio->uio_dio.npages], + wanted, maxpages, &start); +#else cnt = iov_iter_get_pages(uio->uio_iter, &uio->uio_dio.pages[uio->uio_dio.npages], wanted, maxpages, &start); +#endif if (cnt < 0) { iov_iter_revert(uio->uio_iter, rollback); return (SET_ERROR(-cnt)); @@ -595,7 +602,12 @@ zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw) uio->uio_dio.npages += DIV_ROUND_UP(cnt, PAGE_SIZE); rollback += cnt; wanted -= cnt; +#if !defined(HAVE_IOV_ITER_GET_PAGES2) + /* + * iov_iter_get_pages2() advances the iov_iter on success. + */ iov_iter_advance(uio->uio_iter, cnt); +#endif } ASSERT3U(rollback, ==, uio->uio_resid - uio->uio_skip); @@ -603,7 +615,6 @@ zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw) return (0); } -#endif /* HAVE_PIN_USER_PAGES_UNLOCKED */ /* * This function pins user pages. In the event that the user pages were not @@ -621,7 +632,10 @@ zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw) if (uio->uio_segflg == UIO_ITER) { uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP); #if defined(HAVE_PIN_USER_PAGES_UNLOCKED) - error = zfs_uio_pin_user_pages(uio, rw); + if (zfs_user_backed_iov_iter(uio->uio_iter)) + error = zfs_uio_pin_user_pages(uio, rw); + else + error = zfs_uio_get_dio_pages_iov_iter(uio, rw); #else error = zfs_uio_get_dio_pages_iov_iter(uio, rw); #endif @@ -632,22 +646,24 @@ zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw) ASSERT3S(uio->uio_dio.npages, >=, 0); if (error) { + if (uio->uio_dio.pinned) { #if defined(HAVE_PIN_USER_PAGES_UNLOCKED) - unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages); -#else - for (long i = 0; i < uio->uio_dio.npages; i++) - put_page(uio->uio_dio.pages[i]); + unpin_user_pages(uio->uio_dio.pages, + uio->uio_dio.npages); #endif + } else { + for (long i = 0; i < uio->uio_dio.npages; i++) + put_page(uio->uio_dio.pages[i]); + } + vmem_free(uio->uio_dio.pages, size); return (error); } else { ASSERT3S(uio->uio_dio.npages, ==, npages); } -#if !defined(HAVE_PIN_USER_PAGES_UNLOCKED) - if (rw == UIO_WRITE) + if (rw == UIO_WRITE && !uio->uio_dio.pinned) zfs_uio_dio_check_for_zero_page(uio); -#endif uio->uio_extflg |= UIO_DIRECT; diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c index fe64bc710387..11438ab61475 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c @@ -189,8 +189,14 @@ zpl_snapdir_automount(struct path *path) * as of the 3.18 kernel revaliding the mountpoint dentry will result in * the snapshot being immediately unmounted. */ +#ifdef HAVE_D_REVALIDATE_4ARGS +static int +zpl_snapdir_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) +#else static int zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags) +#endif { return (!!dentry->d_inode); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index 7c9aae6a66af..7c5d567c3239 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -202,7 +202,16 @@ static int zvol_blk_mq_alloc_tag_set(zvol_state_t *zv) * We need BLK_MQ_F_BLOCKING here since we do blocking calls in * zvol_request_impl() */ - zso->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; + zso->tag_set.flags = BLK_MQ_F_BLOCKING; + +#ifdef BLK_MQ_F_SHOULD_MERGE + /* + * Linux 6.14 removed BLK_MQ_F_SHOULD_MERGE and made it implicit. + * For older kernels, we set it. + */ + zso->tag_set.flags |= BLK_MQ_F_SHOULD_MERGE; +#endif + zso->tag_set.driver_data = zv; return (blk_mq_alloc_tag_set(&zso->tag_set)); diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index bd6f076dfbbd..1f653d953113 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -6031,6 +6031,7 @@ top: acb->acb_compressed = compressed_read; acb->acb_encrypted = encrypted_read; acb->acb_noauth = noauth_read; + acb->acb_nobuf = no_buf; acb->acb_zb = *zb; ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index 90395cad6e45..5212751f9a63 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -2193,7 +2193,7 @@ dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx) mutex_enter(&dn->dn_mtx); int txgoff = tx->tx_txg & TXG_MASK; if (dn->dn_free_ranges[txgoff] != NULL) { - range_tree_clear(dn->dn_free_ranges[txgoff], blkid, 1); + zfs_range_tree_clear(dn->dn_free_ranges[txgoff], blkid, 1); } if (dn->dn_nlevels == 1) { @@ -2400,7 +2400,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) db->db_blkid != DMU_SPILL_BLKID) { mutex_enter(&dn->dn_mtx); if (dn->dn_free_ranges[txgoff] != NULL) { - range_tree_clear(dn->dn_free_ranges[txgoff], + zfs_range_tree_clear(dn->dn_free_ranges[txgoff], db->db_blkid, 1); } mutex_exit(&dn->dn_mtx); diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c index ecc6761f8fa4..ce2c79dbfaa3 100644 --- a/sys/contrib/openzfs/module/zfs/dnode.c +++ b/sys/contrib/openzfs/module/zfs/dnode.c @@ -2435,11 +2435,11 @@ done: { int txgoff = tx->tx_txg & TXG_MASK; if (dn->dn_free_ranges[txgoff] == NULL) { - dn->dn_free_ranges[txgoff] = range_tree_create(NULL, - RANGE_SEG64, NULL, 0, 0); + dn->dn_free_ranges[txgoff] = zfs_range_tree_create(NULL, + ZFS_RANGE_SEG64, NULL, 0, 0); } - range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks); - range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks); + zfs_range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks); + zfs_range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks); } dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n", (u_longlong_t)blkid, (u_longlong_t)nblks, @@ -2482,7 +2482,7 @@ dnode_block_freed(dnode_t *dn, uint64_t blkid) mutex_enter(&dn->dn_mtx); for (i = 0; i < TXG_SIZE; i++) { if (dn->dn_free_ranges[i] != NULL && - range_tree_contains(dn->dn_free_ranges[i], blkid, 1)) + zfs_range_tree_contains(dn->dn_free_ranges[i], blkid, 1)) break; } mutex_exit(&dn->dn_mtx); diff --git a/sys/contrib/openzfs/module/zfs/dnode_sync.c b/sys/contrib/openzfs/module/zfs/dnode_sync.c index 122d7d0d17d8..c82f45145d4b 100644 --- a/sys/contrib/openzfs/module/zfs/dnode_sync.c +++ b/sys/contrib/openzfs/module/zfs/dnode_sync.c @@ -720,7 +720,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) dn->dn_maxblkid == 0 || list_head(list) != NULL || dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT == dnp->dn_datablkszsec || - !range_tree_is_empty(dn->dn_free_ranges[txgoff])); + !zfs_range_tree_is_empty(dn->dn_free_ranges[txgoff])); dnp->dn_datablkszsec = dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT; dn->dn_next_blksz[txgoff] = 0; @@ -786,21 +786,22 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) dsfra.dsfra_free_indirects = freeing_dnode; mutex_enter(&dn->dn_mtx); if (freeing_dnode) { - ASSERT(range_tree_contains(dn->dn_free_ranges[txgoff], - 0, dn->dn_maxblkid + 1)); + ASSERT(zfs_range_tree_contains( + dn->dn_free_ranges[txgoff], 0, + dn->dn_maxblkid + 1)); } /* * Because dnode_sync_free_range() must drop dn_mtx during its - * processing, using it as a callback to range_tree_vacate() is - * not safe. No other operations (besides destroy) are allowed - * once range_tree_vacate() has begun, and dropping dn_mtx - * would leave a window open for another thread to observe that - * invalid (and unsafe) state. + * processing, using it as a callback to zfs_range_tree_vacate() + * is not safe. No other operations (besides destroy) are + * allowed once zfs_range_tree_vacate() has begun, and dropping + * dn_mtx would leave a window open for another thread to + * observe that invalid (and unsafe) state. */ - range_tree_walk(dn->dn_free_ranges[txgoff], + zfs_range_tree_walk(dn->dn_free_ranges[txgoff], dnode_sync_free_range, &dsfra); - range_tree_vacate(dn->dn_free_ranges[txgoff], NULL, NULL); - range_tree_destroy(dn->dn_free_ranges[txgoff]); + zfs_range_tree_vacate(dn->dn_free_ranges[txgoff], NULL, NULL); + zfs_range_tree_destroy(dn->dn_free_ranges[txgoff]); dn->dn_free_ranges[txgoff] = NULL; mutex_exit(&dn->dn_mtx); } diff --git a/sys/contrib/openzfs/module/zfs/dsl_pool.c b/sys/contrib/openzfs/module/zfs/dsl_pool.c index 39f97d7547c6..b98ff69191de 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_pool.c +++ b/sys/contrib/openzfs/module/zfs/dsl_pool.c @@ -660,8 +660,8 @@ dsl_early_sync_task_verify(dsl_pool_t *dp, uint64_t txg) for (ms = txg_list_head(tl, TXG_CLEAN(txg)); ms; ms = txg_list_next(tl, ms, TXG_CLEAN(txg))) { - VERIFY(range_tree_is_empty(ms->ms_freeing)); - VERIFY(range_tree_is_empty(ms->ms_checkpointing)); + VERIFY(zfs_range_tree_is_empty(ms->ms_freeing)); + VERIFY(zfs_range_tree_is_empty(ms->ms_checkpointing)); } } diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c index 3eba4cb35cc6..5977f8c82b45 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_scan.c +++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c @@ -321,7 +321,7 @@ struct dsl_scan_io_queue { zio_t *q_zio; /* scn_zio_root child for waiting on IO */ /* trees used for sorting I/Os and extents of I/Os */ - range_tree_t *q_exts_by_addr; + zfs_range_tree_t *q_exts_by_addr; zfs_btree_t q_exts_by_size; avl_tree_t q_sios_by_addr; uint64_t q_sio_memused; @@ -814,7 +814,8 @@ dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx, state_sync_type_t sync_type) ASSERT3P(avl_first(&q->q_sios_by_addr), ==, NULL); ASSERT3P(zfs_btree_first(&q->q_exts_by_size, NULL), ==, NULL); - ASSERT3P(range_tree_first(q->q_exts_by_addr), ==, NULL); + ASSERT3P(zfs_range_tree_first(q->q_exts_by_addr), ==, + NULL); mutex_exit(&vd->vdev_scan_io_queue_lock); } @@ -1601,8 +1602,8 @@ dsl_scan_should_clear(dsl_scan_t *scn) * # of extents in exts_by_addr = # in exts_by_size. * B-tree efficiency is ~75%, but can be as low as 50%. */ - mused += zfs_btree_numnodes(&queue->q_exts_by_size) * - ((sizeof (range_seg_gap_t) + sizeof (uint64_t)) * + mused += zfs_btree_numnodes(&queue->q_exts_by_size) * (( + sizeof (zfs_range_seg_gap_t) + sizeof (uint64_t)) * 3 / 2) + queue->q_sio_memused; } mutex_exit(&tvd->vdev_scan_io_queue_lock); @@ -3277,13 +3278,14 @@ scan_io_queue_issue(dsl_scan_io_queue_t *queue, list_t *io_list) /* * This function removes sios from an IO queue which reside within a given - * range_seg_t and inserts them (in offset order) into a list. Note that + * zfs_range_seg_t and inserts them (in offset order) into a list. Note that * we only ever return a maximum of 32 sios at once. If there are more sios * to process within this segment that did not make it onto the list we * return B_TRUE and otherwise B_FALSE. */ static boolean_t -scan_io_queue_gather(dsl_scan_io_queue_t *queue, range_seg_t *rs, list_t *list) +scan_io_queue_gather(dsl_scan_io_queue_t *queue, zfs_range_seg_t *rs, + list_t *list) { scan_io_t *srch_sio, *sio, *next_sio; avl_index_t idx; @@ -3295,7 +3297,7 @@ scan_io_queue_gather(dsl_scan_io_queue_t *queue, range_seg_t *rs, list_t *list) srch_sio = sio_alloc(1); srch_sio->sio_nr_dvas = 1; - SIO_SET_OFFSET(srch_sio, rs_get_start(rs, queue->q_exts_by_addr)); + SIO_SET_OFFSET(srch_sio, zfs_rs_get_start(rs, queue->q_exts_by_addr)); /* * The exact start of the extent might not contain any matching zios, @@ -3307,11 +3309,11 @@ scan_io_queue_gather(dsl_scan_io_queue_t *queue, range_seg_t *rs, list_t *list) if (sio == NULL) sio = avl_nearest(&queue->q_sios_by_addr, idx, AVL_AFTER); - while (sio != NULL && SIO_GET_OFFSET(sio) < rs_get_end(rs, + while (sio != NULL && SIO_GET_OFFSET(sio) < zfs_rs_get_end(rs, queue->q_exts_by_addr) && num_sios <= 32) { - ASSERT3U(SIO_GET_OFFSET(sio), >=, rs_get_start(rs, + ASSERT3U(SIO_GET_OFFSET(sio), >=, zfs_rs_get_start(rs, queue->q_exts_by_addr)); - ASSERT3U(SIO_GET_END_OFFSET(sio), <=, rs_get_end(rs, + ASSERT3U(SIO_GET_END_OFFSET(sio), <=, zfs_rs_get_end(rs, queue->q_exts_by_addr)); next_sio = AVL_NEXT(&queue->q_sios_by_addr, sio); @@ -3332,19 +3334,20 @@ scan_io_queue_gather(dsl_scan_io_queue_t *queue, range_seg_t *rs, list_t *list) * in the segment we update it to reflect the work we were able to * complete. Otherwise, we remove it from the range tree entirely. */ - if (sio != NULL && SIO_GET_OFFSET(sio) < rs_get_end(rs, + if (sio != NULL && SIO_GET_OFFSET(sio) < zfs_rs_get_end(rs, queue->q_exts_by_addr)) { - range_tree_adjust_fill(queue->q_exts_by_addr, rs, + zfs_range_tree_adjust_fill(queue->q_exts_by_addr, rs, -bytes_issued); - range_tree_resize_segment(queue->q_exts_by_addr, rs, - SIO_GET_OFFSET(sio), rs_get_end(rs, + zfs_range_tree_resize_segment(queue->q_exts_by_addr, rs, + SIO_GET_OFFSET(sio), zfs_rs_get_end(rs, queue->q_exts_by_addr) - SIO_GET_OFFSET(sio)); queue->q_last_ext_addr = SIO_GET_OFFSET(sio); return (B_TRUE); } else { - uint64_t rstart = rs_get_start(rs, queue->q_exts_by_addr); - uint64_t rend = rs_get_end(rs, queue->q_exts_by_addr); - range_tree_remove(queue->q_exts_by_addr, rstart, rend - rstart); + uint64_t rstart = zfs_rs_get_start(rs, queue->q_exts_by_addr); + uint64_t rend = zfs_rs_get_end(rs, queue->q_exts_by_addr); + zfs_range_tree_remove(queue->q_exts_by_addr, rstart, rend - + rstart); queue->q_last_ext_addr = -1; return (B_FALSE); } @@ -3361,11 +3364,11 @@ scan_io_queue_gather(dsl_scan_io_queue_t *queue, range_seg_t *rs, list_t *list) * memory limit. * 3) Otherwise we don't select any extents. */ -static range_seg_t * +static zfs_range_seg_t * scan_io_queue_fetch_ext(dsl_scan_io_queue_t *queue) { dsl_scan_t *scn = queue->q_scn; - range_tree_t *rt = queue->q_exts_by_addr; + zfs_range_tree_t *rt = queue->q_exts_by_addr; ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock)); ASSERT(scn->scn_is_sorted); @@ -3384,7 +3387,7 @@ scan_io_queue_fetch_ext(dsl_scan_io_queue_t *queue) */ if ((zfs_scan_issue_strategy < 1 && scn->scn_checkpointing) || zfs_scan_issue_strategy == 1) - return (range_tree_first(rt)); + return (zfs_range_tree_first(rt)); /* * Try to continue previous extent if it is not completed yet. After @@ -3393,10 +3396,10 @@ scan_io_queue_fetch_ext(dsl_scan_io_queue_t *queue) */ uint64_t start; uint64_t size = 1ULL << rt->rt_shift; - range_seg_t *addr_rs; + zfs_range_seg_t *addr_rs; if (queue->q_last_ext_addr != -1) { start = queue->q_last_ext_addr; - addr_rs = range_tree_find(rt, start, size); + addr_rs = zfs_range_tree_find(rt, start, size); if (addr_rs != NULL) return (addr_rs); } @@ -3413,10 +3416,10 @@ scan_io_queue_fetch_ext(dsl_scan_io_queue_t *queue) * We need to get the original entry in the by_addr tree so we can * modify it. */ - addr_rs = range_tree_find(rt, start, size); + addr_rs = zfs_range_tree_find(rt, start, size); ASSERT3P(addr_rs, !=, NULL); - ASSERT3U(rs_get_start(addr_rs, rt), ==, start); - ASSERT3U(rs_get_end(addr_rs, rt), >, start); + ASSERT3U(zfs_rs_get_start(addr_rs, rt), ==, start); + ASSERT3U(zfs_rs_get_end(addr_rs, rt), >, start); return (addr_rs); } @@ -3426,7 +3429,7 @@ scan_io_queues_run_one(void *arg) dsl_scan_io_queue_t *queue = arg; kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock; boolean_t suspended = B_FALSE; - range_seg_t *rs; + zfs_range_seg_t *rs; scan_io_t *sio; zio_t *zio; list_t sio_list; @@ -4723,7 +4726,7 @@ scan_io_queue_insert_impl(dsl_scan_io_queue_t *queue, scan_io_t *sio) } avl_insert(&queue->q_sios_by_addr, sio, idx); queue->q_sio_memused += SIO_GET_MUSED(sio); - range_tree_add(queue->q_exts_by_addr, SIO_GET_OFFSET(sio), + zfs_range_tree_add(queue->q_exts_by_addr, SIO_GET_OFFSET(sio), SIO_GET_ASIZE(sio)); } @@ -4983,7 +4986,7 @@ ZFS_BTREE_FIND_IN_BUF_FUNC(ext_size_find_in_buf, uint64_t, ext_size_compare) static void -ext_size_create(range_tree_t *rt, void *arg) +ext_size_create(zfs_range_tree_t *rt, void *arg) { (void) rt; zfs_btree_t *size_tree = arg; @@ -4993,7 +4996,7 @@ ext_size_create(range_tree_t *rt, void *arg) } static void -ext_size_destroy(range_tree_t *rt, void *arg) +ext_size_destroy(zfs_range_tree_t *rt, void *arg) { (void) rt; zfs_btree_t *size_tree = arg; @@ -5003,7 +5006,7 @@ ext_size_destroy(range_tree_t *rt, void *arg) } static uint64_t -ext_size_value(range_tree_t *rt, range_seg_gap_t *rsg) +ext_size_value(zfs_range_tree_t *rt, zfs_range_seg_gap_t *rsg) { (void) rt; uint64_t size = rsg->rs_end - rsg->rs_start; @@ -5014,25 +5017,25 @@ ext_size_value(range_tree_t *rt, range_seg_gap_t *rsg) } static void -ext_size_add(range_tree_t *rt, range_seg_t *rs, void *arg) +ext_size_add(zfs_range_tree_t *rt, zfs_range_seg_t *rs, void *arg) { zfs_btree_t *size_tree = arg; - ASSERT3U(rt->rt_type, ==, RANGE_SEG_GAP); - uint64_t v = ext_size_value(rt, (range_seg_gap_t *)rs); + ASSERT3U(rt->rt_type, ==, ZFS_RANGE_SEG_GAP); + uint64_t v = ext_size_value(rt, (zfs_range_seg_gap_t *)rs); zfs_btree_add(size_tree, &v); } static void -ext_size_remove(range_tree_t *rt, range_seg_t *rs, void *arg) +ext_size_remove(zfs_range_tree_t *rt, zfs_range_seg_t *rs, void *arg) { zfs_btree_t *size_tree = arg; - ASSERT3U(rt->rt_type, ==, RANGE_SEG_GAP); - uint64_t v = ext_size_value(rt, (range_seg_gap_t *)rs); + ASSERT3U(rt->rt_type, ==, ZFS_RANGE_SEG_GAP); + uint64_t v = ext_size_value(rt, (zfs_range_seg_gap_t *)rs); zfs_btree_remove(size_tree, &v); } static void -ext_size_vacate(range_tree_t *rt, void *arg) +ext_size_vacate(zfs_range_tree_t *rt, void *arg) { zfs_btree_t *size_tree = arg; zfs_btree_clear(size_tree); @@ -5041,7 +5044,7 @@ ext_size_vacate(range_tree_t *rt, void *arg) ext_size_create(rt, arg); } -static const range_tree_ops_t ext_size_ops = { +static const zfs_range_tree_ops_t ext_size_ops = { .rtop_create = ext_size_create, .rtop_destroy = ext_size_destroy, .rtop_add = ext_size_add, @@ -5073,8 +5076,9 @@ scan_io_queue_create(vdev_t *vd) q->q_sio_memused = 0; q->q_last_ext_addr = -1; cv_init(&q->q_zio_cv, NULL, CV_DEFAULT, NULL); - q->q_exts_by_addr = range_tree_create_gap(&ext_size_ops, RANGE_SEG_GAP, - &q->q_exts_by_size, 0, vd->vdev_ashift, zfs_scan_max_ext_gap); + q->q_exts_by_addr = zfs_range_tree_create_gap(&ext_size_ops, + ZFS_RANGE_SEG_GAP, &q->q_exts_by_size, 0, vd->vdev_ashift, + zfs_scan_max_ext_gap); avl_create(&q->q_sios_by_addr, sio_addr_compare, sizeof (scan_io_t), offsetof(scan_io_t, sio_nodes.sio_addr_node)); @@ -5099,15 +5103,15 @@ dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue) atomic_add_64(&scn->scn_queues_pending, -1); while ((sio = avl_destroy_nodes(&queue->q_sios_by_addr, &cookie)) != NULL) { - ASSERT(range_tree_contains(queue->q_exts_by_addr, + ASSERT(zfs_range_tree_contains(queue->q_exts_by_addr, SIO_GET_OFFSET(sio), SIO_GET_ASIZE(sio))); queue->q_sio_memused -= SIO_GET_MUSED(sio); sio_free(sio); } ASSERT0(queue->q_sio_memused); - range_tree_vacate(queue->q_exts_by_addr, NULL, queue); - range_tree_destroy(queue->q_exts_by_addr); + zfs_range_tree_vacate(queue->q_exts_by_addr, NULL, queue); + zfs_range_tree_destroy(queue->q_exts_by_addr); avl_destroy(&queue->q_sios_by_addr); cv_destroy(&queue->q_zio_cv); @@ -5184,10 +5188,10 @@ dsl_scan_freed_dva(spa_t *spa, const blkptr_t *bp, int dva_i) * 1) Cold, just sitting in the queue of zio's to be issued at * some point in the future. In this case, all we do is * remove the zio from the q_sios_by_addr tree, decrement - * its data volume from the containing range_seg_t and + * its data volume from the containing zfs_range_seg_t and * resort the q_exts_by_size tree to reflect that the - * range_seg_t has lost some of its 'fill'. We don't shorten - * the range_seg_t - this is usually rare enough not to be + * zfs_range_seg_t has lost some of its 'fill'. We don't shorten + * the zfs_range_seg_t - this is usually rare enough not to be * worth the extra hassle of trying keep track of precise * extent boundaries. * 2) Hot, where the zio is currently in-flight in @@ -5211,8 +5215,9 @@ dsl_scan_freed_dva(spa_t *spa, const blkptr_t *bp, int dva_i) atomic_add_64(&scn->scn_queues_pending, -1); queue->q_sio_memused -= SIO_GET_MUSED(sio); - ASSERT(range_tree_contains(queue->q_exts_by_addr, start, size)); - range_tree_remove_fill(queue->q_exts_by_addr, start, size); + ASSERT(zfs_range_tree_contains(queue->q_exts_by_addr, start, + size)); + zfs_range_tree_remove_fill(queue->q_exts_by_addr, start, size); /* count the block as though we skipped it */ sio2bp(sio, &tmpbp); diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c index 7affbfac9dc7..35bd968f68ce 100644 --- a/sys/contrib/openzfs/module/zfs/metaslab.c +++ b/sys/contrib/openzfs/module/zfs/metaslab.c @@ -146,7 +146,7 @@ static uint_t zfs_mg_fragmentation_threshold = 95; * active metaslab that exceeds this threshold will no longer keep its active * status allowing better metaslabs to be selected. */ -static uint_t zfs_metaslab_fragmentation_threshold = 70; +static uint_t zfs_metaslab_fragmentation_threshold = 77; /* * When set will load all metaslabs when pool is first opened. @@ -347,7 +347,8 @@ static uint64_t metaslab_weight_from_range_tree(metaslab_t *msp); static void metaslab_flush_update(metaslab_t *, dmu_tx_t *); static unsigned int metaslab_idx_func(multilist_t *, void *); static void metaslab_evict(metaslab_t *, uint64_t); -static void metaslab_rt_add(range_tree_t *rt, range_seg_t *rs, void *arg); +static void metaslab_rt_add(zfs_range_tree_t *rt, zfs_range_seg_t *rs, + void *arg); kmem_cache_t *metaslab_alloc_trace_cache; typedef struct metaslab_stats { @@ -517,7 +518,7 @@ metaslab_class_histogram_verify(metaslab_class_t *mc) if ((zfs_flags & ZFS_DEBUG_HISTOGRAM_VERIFY) == 0) return; - mc_hist = kmem_zalloc(sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE, + mc_hist = kmem_zalloc(sizeof (uint64_t) * ZFS_RANGE_TREE_HISTOGRAM_SIZE, KM_SLEEP); mutex_enter(&mc->mc_lock); @@ -537,16 +538,16 @@ metaslab_class_histogram_verify(metaslab_class_t *mc) IMPLY(mg == mg->mg_vd->vdev_log_mg, mc == spa_embedded_log_class(mg->mg_vd->vdev_spa)); - for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) + for (i = 0; i < ZFS_RANGE_TREE_HISTOGRAM_SIZE; i++) mc_hist[i] += mg->mg_histogram[i]; } - for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { + for (i = 0; i < ZFS_RANGE_TREE_HISTOGRAM_SIZE; i++) { VERIFY3U(mc_hist[i], ==, mc->mc_histogram[i]); } mutex_exit(&mc->mc_lock); - kmem_free(mc_hist, sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE); + kmem_free(mc_hist, sizeof (uint64_t) * ZFS_RANGE_TREE_HISTOGRAM_SIZE); } /* @@ -1028,10 +1029,10 @@ metaslab_group_histogram_verify(metaslab_group_t *mg) if ((zfs_flags & ZFS_DEBUG_HISTOGRAM_VERIFY) == 0) return; - mg_hist = kmem_zalloc(sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE, + mg_hist = kmem_zalloc(sizeof (uint64_t) * ZFS_RANGE_TREE_HISTOGRAM_SIZE, KM_SLEEP); - ASSERT3U(RANGE_TREE_HISTOGRAM_SIZE, >=, + ASSERT3U(ZFS_RANGE_TREE_HISTOGRAM_SIZE, >=, SPACE_MAP_HISTOGRAM_SIZE + ashift); mutex_enter(&mg->mg_lock); @@ -1048,12 +1049,12 @@ metaslab_group_histogram_verify(metaslab_group_t *mg) } } - for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i ++) + for (int i = 0; i < ZFS_RANGE_TREE_HISTOGRAM_SIZE; i ++) VERIFY3U(mg_hist[i], ==, mg->mg_histogram[i]); mutex_exit(&mg->mg_lock); - kmem_free(mg_hist, sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE); + kmem_free(mg_hist, sizeof (uint64_t) * ZFS_RANGE_TREE_HISTOGRAM_SIZE); } static void @@ -1175,9 +1176,8 @@ metaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight) } /* - * Calculate the fragmentation for a given metaslab group. We can use - * a simple average here since all metaslabs within the group must have - * the same size. The return value will be a value between 0 and 100 + * Calculate the fragmentation for a given metaslab group. Weight metaslabs + * on the amount of free space. The return value will be between 0 and 100 * (inclusive), or ZFS_FRAG_INVALID if less than half of the metaslab in this * group have a fragmentation metric. */ @@ -1186,24 +1186,29 @@ metaslab_group_fragmentation(metaslab_group_t *mg) { vdev_t *vd = mg->mg_vd; uint64_t fragmentation = 0; - uint64_t valid_ms = 0; + uint64_t valid_ms = 0, total_ms = 0; + uint64_t free, total_free = 0; for (int m = 0; m < vd->vdev_ms_count; m++) { metaslab_t *msp = vd->vdev_ms[m]; - if (msp->ms_fragmentation == ZFS_FRAG_INVALID) - continue; if (msp->ms_group != mg) continue; + total_ms++; + if (msp->ms_fragmentation == ZFS_FRAG_INVALID) + continue; valid_ms++; - fragmentation += msp->ms_fragmentation; + free = (msp->ms_size - metaslab_allocated_space(msp)) / + SPA_MINBLOCKSIZE; /* To prevent overflows. */ + total_free += free; + fragmentation += msp->ms_fragmentation * free; } - if (valid_ms <= mg->mg_vd->vdev_ms_count / 2) + if (valid_ms < (total_ms + 1) / 2 || total_free == 0) return (ZFS_FRAG_INVALID); - fragmentation /= valid_ms; + fragmentation /= total_free; ASSERT3U(fragmentation, <=, 100); return (fragmentation); } @@ -1343,8 +1348,8 @@ __attribute__((always_inline)) inline static int metaslab_rangesize32_compare(const void *x1, const void *x2) { - const range_seg32_t *r1 = x1; - const range_seg32_t *r2 = x2; + const zfs_range_seg32_t *r1 = x1; + const zfs_range_seg32_t *r2 = x2; uint64_t rs_size1 = r1->rs_end - r1->rs_start; uint64_t rs_size2 = r2->rs_end - r2->rs_start; @@ -1362,8 +1367,8 @@ __attribute__((always_inline)) inline static int metaslab_rangesize64_compare(const void *x1, const void *x2) { - const range_seg64_t *r1 = x1; - const range_seg64_t *r2 = x2; + const zfs_range_seg64_t *r1 = x1; + const zfs_range_seg64_t *r2 = x2; uint64_t rs_size1 = r1->rs_end - r1->rs_start; uint64_t rs_size2 = r2->rs_end - r2->rs_start; @@ -1379,7 +1384,7 @@ typedef struct metaslab_rt_arg { } metaslab_rt_arg_t; struct mssa_arg { - range_tree_t *rt; + zfs_range_tree_t *rt; metaslab_rt_arg_t *mra; }; @@ -1387,16 +1392,16 @@ static void metaslab_size_sorted_add(void *arg, uint64_t start, uint64_t size) { struct mssa_arg *mssap = arg; - range_tree_t *rt = mssap->rt; + zfs_range_tree_t *rt = mssap->rt; metaslab_rt_arg_t *mrap = mssap->mra; - range_seg_max_t seg = {0}; - rs_set_start(&seg, rt, start); - rs_set_end(&seg, rt, start + size); + zfs_range_seg_max_t seg = {0}; + zfs_rs_set_start(&seg, rt, start); + zfs_rs_set_end(&seg, rt, start + size); metaslab_rt_add(rt, &seg, mrap); } static void -metaslab_size_tree_full_load(range_tree_t *rt) +metaslab_size_tree_full_load(zfs_range_tree_t *rt) { metaslab_rt_arg_t *mrap = rt->rt_arg; METASLABSTAT_BUMP(metaslabstat_reload_tree); @@ -1405,22 +1410,23 @@ metaslab_size_tree_full_load(range_tree_t *rt) struct mssa_arg arg = {0}; arg.rt = rt; arg.mra = mrap; - range_tree_walk(rt, metaslab_size_sorted_add, &arg); + zfs_range_tree_walk(rt, metaslab_size_sorted_add, &arg); } ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize32_in_buf, - range_seg32_t, metaslab_rangesize32_compare) + zfs_range_seg32_t, metaslab_rangesize32_compare) ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize64_in_buf, - range_seg64_t, metaslab_rangesize64_compare) + zfs_range_seg64_t, metaslab_rangesize64_compare) /* * Create any block allocator specific components. The current allocators - * rely on using both a size-ordered range_tree_t and an array of uint64_t's. + * rely on using both a size-ordered zfs_range_tree_t and an array of + * uint64_t's. */ static void -metaslab_rt_create(range_tree_t *rt, void *arg) +metaslab_rt_create(zfs_range_tree_t *rt, void *arg) { metaslab_rt_arg_t *mrap = arg; zfs_btree_t *size_tree = mrap->mra_bt; @@ -1429,13 +1435,13 @@ metaslab_rt_create(range_tree_t *rt, void *arg) int (*compare) (const void *, const void *); bt_find_in_buf_f bt_find; switch (rt->rt_type) { - case RANGE_SEG32: - size = sizeof (range_seg32_t); + case ZFS_RANGE_SEG32: + size = sizeof (zfs_range_seg32_t); compare = metaslab_rangesize32_compare; bt_find = metaslab_rt_find_rangesize32_in_buf; break; - case RANGE_SEG64: - size = sizeof (range_seg64_t); + case ZFS_RANGE_SEG64: + size = sizeof (zfs_range_seg64_t); compare = metaslab_rangesize64_compare; bt_find = metaslab_rt_find_rangesize64_in_buf; break; @@ -1447,7 +1453,7 @@ metaslab_rt_create(range_tree_t *rt, void *arg) } static void -metaslab_rt_destroy(range_tree_t *rt, void *arg) +metaslab_rt_destroy(zfs_range_tree_t *rt, void *arg) { (void) rt; metaslab_rt_arg_t *mrap = arg; @@ -1458,12 +1464,12 @@ metaslab_rt_destroy(range_tree_t *rt, void *arg) } static void -metaslab_rt_add(range_tree_t *rt, range_seg_t *rs, void *arg) +metaslab_rt_add(zfs_range_tree_t *rt, zfs_range_seg_t *rs, void *arg) { metaslab_rt_arg_t *mrap = arg; zfs_btree_t *size_tree = mrap->mra_bt; - if (rs_get_end(rs, rt) - rs_get_start(rs, rt) < + if (zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt) < (1ULL << mrap->mra_floor_shift)) return; @@ -1471,12 +1477,12 @@ metaslab_rt_add(range_tree_t *rt, range_seg_t *rs, void *arg) } static void -metaslab_rt_remove(range_tree_t *rt, range_seg_t *rs, void *arg) +metaslab_rt_remove(zfs_range_tree_t *rt, zfs_range_seg_t *rs, void *arg) { metaslab_rt_arg_t *mrap = arg; zfs_btree_t *size_tree = mrap->mra_bt; - if (rs_get_end(rs, rt) - rs_get_start(rs, rt) < (1ULL << + if (zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt) < (1ULL << mrap->mra_floor_shift)) return; @@ -1484,7 +1490,7 @@ metaslab_rt_remove(range_tree_t *rt, range_seg_t *rs, void *arg) } static void -metaslab_rt_vacate(range_tree_t *rt, void *arg) +metaslab_rt_vacate(zfs_range_tree_t *rt, void *arg) { metaslab_rt_arg_t *mrap = arg; zfs_btree_t *size_tree = mrap->mra_bt; @@ -1494,7 +1500,7 @@ metaslab_rt_vacate(range_tree_t *rt, void *arg) metaslab_rt_create(rt, arg); } -static const range_tree_ops_t metaslab_rt_ops = { +static const zfs_range_tree_ops_t metaslab_rt_ops = { .rtop_create = metaslab_rt_create, .rtop_destroy = metaslab_rt_destroy, .rtop_add = metaslab_rt_add, @@ -1515,7 +1521,7 @@ uint64_t metaslab_largest_allocatable(metaslab_t *msp) { zfs_btree_t *t = &msp->ms_allocatable_by_size; - range_seg_t *rs; + zfs_range_seg_t *rs; if (t == NULL) return (0); @@ -1526,7 +1532,7 @@ metaslab_largest_allocatable(metaslab_t *msp) if (rs == NULL) return (0); - return (rs_get_end(rs, msp->ms_allocatable) - rs_get_start(rs, + return (zfs_rs_get_end(rs, msp->ms_allocatable) - zfs_rs_get_start(rs, msp->ms_allocatable)); } @@ -1544,7 +1550,7 @@ metaslab_largest_unflushed_free(metaslab_t *msp) if (zfs_btree_numnodes(&msp->ms_unflushed_frees_by_size) == 0) metaslab_size_tree_full_load(msp->ms_unflushed_frees); - range_seg_t *rs = zfs_btree_last(&msp->ms_unflushed_frees_by_size, + zfs_range_seg_t *rs = zfs_btree_last(&msp->ms_unflushed_frees_by_size, NULL); if (rs == NULL) return (0); @@ -1572,13 +1578,13 @@ metaslab_largest_unflushed_free(metaslab_t *msp) * the largest segment; there may be other usable chunks in the * largest segment, but we ignore them. */ - uint64_t rstart = rs_get_start(rs, msp->ms_unflushed_frees); - uint64_t rsize = rs_get_end(rs, msp->ms_unflushed_frees) - rstart; + uint64_t rstart = zfs_rs_get_start(rs, msp->ms_unflushed_frees); + uint64_t rsize = zfs_rs_get_end(rs, msp->ms_unflushed_frees) - rstart; for (int t = 0; t < TXG_DEFER_SIZE; t++) { uint64_t start = 0; uint64_t size = 0; - boolean_t found = range_tree_find_in(msp->ms_defer[t], rstart, - rsize, &start, &size); + boolean_t found = zfs_range_tree_find_in(msp->ms_defer[t], + rstart, rsize, &start, &size); if (found) { if (rstart == start) return (0); @@ -1588,7 +1594,7 @@ metaslab_largest_unflushed_free(metaslab_t *msp) uint64_t start = 0; uint64_t size = 0; - boolean_t found = range_tree_find_in(msp->ms_freed, rstart, + boolean_t found = zfs_range_tree_find_in(msp->ms_freed, rstart, rsize, &start, &size); if (found) rsize = start - rstart; @@ -1596,15 +1602,15 @@ metaslab_largest_unflushed_free(metaslab_t *msp) return (rsize); } -static range_seg_t * -metaslab_block_find(zfs_btree_t *t, range_tree_t *rt, uint64_t start, +static zfs_range_seg_t * +metaslab_block_find(zfs_btree_t *t, zfs_range_tree_t *rt, uint64_t start, uint64_t size, zfs_btree_index_t *where) { - range_seg_t *rs; - range_seg_max_t rsearch; + zfs_range_seg_t *rs; + zfs_range_seg_max_t rsearch; - rs_set_start(&rsearch, rt, start); - rs_set_end(&rsearch, rt, start + size); + zfs_rs_set_start(&rsearch, rt, start); + zfs_rs_set_end(&rsearch, rt, start + size); rs = zfs_btree_find(t, &rsearch, where); if (rs == NULL) { @@ -1620,24 +1626,25 @@ metaslab_block_find(zfs_btree_t *t, range_tree_t *rt, uint64_t start, * for a block that matches the specified criteria. */ static uint64_t -metaslab_block_picker(range_tree_t *rt, uint64_t *cursor, uint64_t size, +metaslab_block_picker(zfs_range_tree_t *rt, uint64_t *cursor, uint64_t size, uint64_t max_search) { if (*cursor == 0) *cursor = rt->rt_start; zfs_btree_t *bt = &rt->rt_root; zfs_btree_index_t where; - range_seg_t *rs = metaslab_block_find(bt, rt, *cursor, size, &where); + zfs_range_seg_t *rs = metaslab_block_find(bt, rt, *cursor, size, + &where); uint64_t first_found; int count_searched = 0; if (rs != NULL) - first_found = rs_get_start(rs, rt); + first_found = zfs_rs_get_start(rs, rt); - while (rs != NULL && (rs_get_start(rs, rt) - first_found <= + while (rs != NULL && (zfs_rs_get_start(rs, rt) - first_found <= max_search || count_searched < metaslab_min_search_count)) { - uint64_t offset = rs_get_start(rs, rt); - if (offset + size <= rs_get_end(rs, rt)) { + uint64_t offset = zfs_rs_get_start(rs, rt); + if (offset + size <= zfs_rs_get_end(rs, rt)) { *cursor = offset + size; return (offset); } @@ -1748,8 +1755,8 @@ metaslab_df_alloc(metaslab_t *msp, uint64_t size) */ uint64_t align = size & -size; uint64_t *cursor = &msp->ms_lbas[highbit64(align) - 1]; - range_tree_t *rt = msp->ms_allocatable; - uint_t free_pct = range_tree_space(rt) * 100 / msp->ms_size; + zfs_range_tree_t *rt = msp->ms_allocatable; + uint_t free_pct = zfs_range_tree_space(rt) * 100 / msp->ms_size; uint64_t offset; ASSERT(MUTEX_HELD(&msp->ms_lock)); @@ -1767,7 +1774,7 @@ metaslab_df_alloc(metaslab_t *msp, uint64_t size) } if (offset == -1) { - range_seg_t *rs; + zfs_range_seg_t *rs; if (zfs_btree_numnodes(&msp->ms_allocatable_by_size) == 0) metaslab_size_tree_full_load(msp->ms_allocatable); @@ -1780,9 +1787,9 @@ metaslab_df_alloc(metaslab_t *msp, uint64_t size) rs = metaslab_block_find(&msp->ms_allocatable_by_size, rt, msp->ms_start, size, &where); } - if (rs != NULL && rs_get_start(rs, rt) + size <= rs_get_end(rs, - rt)) { - offset = rs_get_start(rs, rt); + if (rs != NULL && zfs_rs_get_start(rs, rt) + size <= + zfs_rs_get_end(rs, rt)) { + offset = zfs_rs_get_start(rs, rt); *cursor = offset + size; } } @@ -1802,7 +1809,7 @@ metaslab_df_alloc(metaslab_t *msp, uint64_t size) static uint64_t metaslab_cf_alloc(metaslab_t *msp, uint64_t size) { - range_tree_t *rt = msp->ms_allocatable; + zfs_range_tree_t *rt = msp->ms_allocatable; zfs_btree_t *t = &msp->ms_allocatable_by_size; uint64_t *cursor = &msp->ms_lbas[0]; uint64_t *cursor_end = &msp->ms_lbas[1]; @@ -1813,17 +1820,17 @@ metaslab_cf_alloc(metaslab_t *msp, uint64_t size) ASSERT3U(*cursor_end, >=, *cursor); if ((*cursor + size) > *cursor_end) { - range_seg_t *rs; + zfs_range_seg_t *rs; if (zfs_btree_numnodes(t) == 0) metaslab_size_tree_full_load(msp->ms_allocatable); rs = zfs_btree_last(t, NULL); - if (rs == NULL || (rs_get_end(rs, rt) - rs_get_start(rs, rt)) < - size) + if (rs == NULL || (zfs_rs_get_end(rs, rt) - + zfs_rs_get_start(rs, rt)) < size) return (-1ULL); - *cursor = rs_get_start(rs, rt); - *cursor_end = rs_get_end(rs, rt); + *cursor = zfs_rs_get_start(rs, rt); + *cursor_end = zfs_rs_get_end(rs, rt); } offset = *cursor; @@ -1851,10 +1858,10 @@ static uint64_t metaslab_ndf_alloc(metaslab_t *msp, uint64_t size) { zfs_btree_t *t = &msp->ms_allocatable->rt_root; - range_tree_t *rt = msp->ms_allocatable; + zfs_range_tree_t *rt = msp->ms_allocatable; zfs_btree_index_t where; - range_seg_t *rs; - range_seg_max_t rsearch; + zfs_range_seg_t *rs; + zfs_range_seg_max_t rsearch; uint64_t hbit = highbit64(size); uint64_t *cursor = &msp->ms_lbas[hbit - 1]; uint64_t max_size = metaslab_largest_allocatable(msp); @@ -1864,15 +1871,16 @@ metaslab_ndf_alloc(metaslab_t *msp, uint64_t size) if (max_size < size) return (-1ULL); - rs_set_start(&rsearch, rt, *cursor); - rs_set_end(&rsearch, rt, *cursor + size); + zfs_rs_set_start(&rsearch, rt, *cursor); + zfs_rs_set_end(&rsearch, rt, *cursor + size); rs = zfs_btree_find(t, &rsearch, &where); - if (rs == NULL || (rs_get_end(rs, rt) - rs_get_start(rs, rt)) < size) { + if (rs == NULL || (zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt)) < + size) { t = &msp->ms_allocatable_by_size; - rs_set_start(&rsearch, rt, 0); - rs_set_end(&rsearch, rt, MIN(max_size, 1ULL << (hbit + + zfs_rs_set_start(&rsearch, rt, 0); + zfs_rs_set_end(&rsearch, rt, MIN(max_size, 1ULL << (hbit + metaslab_ndf_clump_shift))); rs = zfs_btree_find(t, &rsearch, &where); @@ -1881,9 +1889,9 @@ metaslab_ndf_alloc(metaslab_t *msp, uint64_t size) ASSERT(rs != NULL); } - if ((rs_get_end(rs, rt) - rs_get_start(rs, rt)) >= size) { - *cursor = rs_get_start(rs, rt) + size; - return (rs_get_start(rs, rt)); + if ((zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt)) >= size) { + *cursor = zfs_rs_get_start(rs, rt) + size; + return (zfs_rs_get_start(rs, rt)); } return (-1ULL); } @@ -1973,12 +1981,12 @@ metaslab_verify_space(metaslab_t *msp, uint64_t txg) ASSERT3S(space_map_allocated(msp->ms_sm), >=, 0); ASSERT3U(space_map_allocated(msp->ms_sm), >=, - range_tree_space(msp->ms_unflushed_frees)); + zfs_range_tree_space(msp->ms_unflushed_frees)); ASSERT3U(metaslab_allocated_space(msp), ==, space_map_allocated(msp->ms_sm) + - range_tree_space(msp->ms_unflushed_allocs) - - range_tree_space(msp->ms_unflushed_frees)); + zfs_range_tree_space(msp->ms_unflushed_allocs) - + zfs_range_tree_space(msp->ms_unflushed_frees)); sm_free_space = msp->ms_size - metaslab_allocated_space(msp); @@ -1988,17 +1996,19 @@ metaslab_verify_space(metaslab_t *msp, uint64_t txg) */ for (int t = 0; t < TXG_CONCURRENT_STATES; t++) { allocating += - range_tree_space(msp->ms_allocating[(txg + t) & TXG_MASK]); + zfs_range_tree_space(msp->ms_allocating[(txg + t) & + TXG_MASK]); } ASSERT3U(allocating + msp->ms_allocated_this_txg, ==, msp->ms_allocating_total); ASSERT3U(msp->ms_deferspace, ==, - range_tree_space(msp->ms_defer[0]) + - range_tree_space(msp->ms_defer[1])); + zfs_range_tree_space(msp->ms_defer[0]) + + zfs_range_tree_space(msp->ms_defer[1])); - msp_free_space = range_tree_space(msp->ms_allocatable) + allocating + - msp->ms_deferspace + range_tree_space(msp->ms_freed); + msp_free_space = zfs_range_tree_space(msp->ms_allocatable) + + allocating + msp->ms_deferspace + + zfs_range_tree_space(msp->ms_freed); VERIFY3U(sm_free_space, ==, msp_free_space); } @@ -2019,7 +2029,7 @@ metaslab_aux_histograms_clear(metaslab_t *msp) static void metaslab_aux_histogram_add(uint64_t *histogram, uint64_t shift, - range_tree_t *rt) + zfs_range_tree_t *rt) { /* * This is modeled after space_map_histogram_add(), so refer to that @@ -2029,7 +2039,7 @@ metaslab_aux_histogram_add(uint64_t *histogram, uint64_t shift, * from the space map histogram. */ int idx = 0; - for (int i = shift; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { + for (int i = shift; i < ZFS_RANGE_TREE_HISTOGRAM_SIZE; i++) { ASSERT3U(i, >=, idx + shift); histogram[idx] += rt->rt_histogram[i] << (i - idx - shift); @@ -2167,7 +2177,7 @@ metaslab_verify_weight_and_frag(metaslab_t *msp) /* some extra verification for in-core tree if you can */ if (msp->ms_loaded) { - range_tree_stat_verify(msp->ms_allocatable); + zfs_range_tree_stat_verify(msp->ms_allocatable); VERIFY(space_map_histogram_verify(msp->ms_sm, msp->ms_allocatable)); } @@ -2355,8 +2365,8 @@ metaslab_load_impl(metaslab_t *msp) struct mssa_arg arg = {0}; arg.rt = msp->ms_allocatable; arg.mra = mrap; - range_tree_walk(msp->ms_allocatable, metaslab_size_sorted_add, - &arg); + zfs_range_tree_walk(msp->ms_allocatable, + metaslab_size_sorted_add, &arg); } else { /* * Add the size-sorted tree first, since we don't need to load @@ -2370,7 +2380,7 @@ metaslab_load_impl(metaslab_t *msp) * all the space in the metaslab as free and add it to the * ms_allocatable tree. */ - range_tree_add(msp->ms_allocatable, + zfs_range_tree_add(msp->ms_allocatable, msp->ms_start, msp->ms_size); if (msp->ms_new) { @@ -2381,8 +2391,10 @@ metaslab_load_impl(metaslab_t *msp) * expect any unflushed allocs or frees from previous * TXGs. */ - ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs)); - ASSERT(range_tree_is_empty(msp->ms_unflushed_frees)); + ASSERT(zfs_range_tree_is_empty( + msp->ms_unflushed_allocs)); + ASSERT(zfs_range_tree_is_empty( + msp->ms_unflushed_frees)); } } @@ -2412,10 +2424,10 @@ metaslab_load_impl(metaslab_t *msp) * away so any manipulations we do below have a clear view * of what is allocated and what is free. */ - range_tree_walk(msp->ms_unflushed_allocs, - range_tree_remove, msp->ms_allocatable); - range_tree_walk(msp->ms_unflushed_frees, - range_tree_add, msp->ms_allocatable); + zfs_range_tree_walk(msp->ms_unflushed_allocs, + zfs_range_tree_remove, msp->ms_allocatable); + zfs_range_tree_walk(msp->ms_unflushed_frees, + zfs_range_tree_add, msp->ms_allocatable); ASSERT3P(msp->ms_group, !=, NULL); spa_t *spa = msp->ms_group->mg_vd->vdev_spa; @@ -2443,8 +2455,8 @@ metaslab_load_impl(metaslab_t *msp) * correctly doesn't contain any segments that exist * in ms_freed [see ms_synced_length]. */ - range_tree_walk(msp->ms_freed, - range_tree_remove, msp->ms_allocatable); + zfs_range_tree_walk(msp->ms_freed, + zfs_range_tree_remove, msp->ms_allocatable); } /* @@ -2462,8 +2474,8 @@ metaslab_load_impl(metaslab_t *msp) * code path. */ for (int t = 0; t < TXG_DEFER_SIZE; t++) { - range_tree_walk(msp->ms_defer[t], - range_tree_remove, msp->ms_allocatable); + zfs_range_tree_walk(msp->ms_defer[t], + zfs_range_tree_remove, msp->ms_allocatable); } /* @@ -2498,11 +2510,11 @@ metaslab_load_impl(metaslab_t *msp) (u_longlong_t)msp->ms_group->mg_vd->vdev_id, (u_longlong_t)msp->ms_id, (u_longlong_t)space_map_length(msp->ms_sm), - (u_longlong_t)range_tree_space(msp->ms_unflushed_allocs), - (u_longlong_t)range_tree_space(msp->ms_unflushed_frees), - (u_longlong_t)range_tree_space(msp->ms_freed), - (u_longlong_t)range_tree_space(msp->ms_defer[0]), - (u_longlong_t)range_tree_space(msp->ms_defer[1]), + (u_longlong_t)zfs_range_tree_space(msp->ms_unflushed_allocs), + (u_longlong_t)zfs_range_tree_space(msp->ms_unflushed_frees), + (u_longlong_t)zfs_range_tree_space(msp->ms_freed), + (u_longlong_t)zfs_range_tree_space(msp->ms_defer[0]), + (u_longlong_t)zfs_range_tree_space(msp->ms_defer[1]), (longlong_t)((load_start - msp->ms_unload_time) / 1000000), (longlong_t)((load_end - load_start) / 1000000), (u_longlong_t)msp->ms_max_size, @@ -2584,7 +2596,7 @@ metaslab_unload(metaslab_t *msp) if (!msp->ms_loaded) return; - range_tree_vacate(msp->ms_allocatable, NULL, NULL); + zfs_range_tree_vacate(msp->ms_allocatable, NULL, NULL); msp->ms_loaded = B_FALSE; msp->ms_unload_time = gethrtime(); @@ -2640,7 +2652,7 @@ metaslab_unload(metaslab_t *msp) * the vdev_ms_shift - the vdev_ashift is less than 32, we can store * the ranges using two uint32_ts, rather than two uint64_ts. */ -range_seg_type_t +zfs_range_seg_type_t metaslab_calculate_range_tree_type(vdev_t *vdev, metaslab_t *msp, uint64_t *start, uint64_t *shift) { @@ -2648,11 +2660,11 @@ metaslab_calculate_range_tree_type(vdev_t *vdev, metaslab_t *msp, !zfs_metaslab_force_large_segs) { *shift = vdev->vdev_ashift; *start = msp->ms_start; - return (RANGE_SEG32); + return (ZFS_RANGE_SEG32); } else { *shift = 0; *start = 0; - return (RANGE_SEG64); + return (ZFS_RANGE_SEG64); } } @@ -2738,32 +2750,33 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object, } uint64_t shift, start; - range_seg_type_t type = + zfs_range_seg_type_t type = metaslab_calculate_range_tree_type(vd, ms, &start, &shift); - ms->ms_allocatable = range_tree_create(NULL, type, NULL, start, shift); + ms->ms_allocatable = zfs_range_tree_create(NULL, type, NULL, start, + shift); for (int t = 0; t < TXG_SIZE; t++) { - ms->ms_allocating[t] = range_tree_create(NULL, type, + ms->ms_allocating[t] = zfs_range_tree_create(NULL, type, NULL, start, shift); } - ms->ms_freeing = range_tree_create(NULL, type, NULL, start, shift); - ms->ms_freed = range_tree_create(NULL, type, NULL, start, shift); + ms->ms_freeing = zfs_range_tree_create(NULL, type, NULL, start, shift); + ms->ms_freed = zfs_range_tree_create(NULL, type, NULL, start, shift); for (int t = 0; t < TXG_DEFER_SIZE; t++) { - ms->ms_defer[t] = range_tree_create(NULL, type, NULL, + ms->ms_defer[t] = zfs_range_tree_create(NULL, type, NULL, start, shift); } ms->ms_checkpointing = - range_tree_create(NULL, type, NULL, start, shift); + zfs_range_tree_create(NULL, type, NULL, start, shift); ms->ms_unflushed_allocs = - range_tree_create(NULL, type, NULL, start, shift); + zfs_range_tree_create(NULL, type, NULL, start, shift); metaslab_rt_arg_t *mrap = kmem_zalloc(sizeof (*mrap), KM_SLEEP); mrap->mra_bt = &ms->ms_unflushed_frees_by_size; mrap->mra_floor_shift = metaslab_by_size_min_shift; - ms->ms_unflushed_frees = range_tree_create(&metaslab_rt_ops, + ms->ms_unflushed_frees = zfs_range_tree_create(&metaslab_rt_ops, type, mrap, start, shift); - ms->ms_trim = range_tree_create(NULL, type, NULL, start, shift); + ms->ms_trim = zfs_range_tree_create(NULL, type, NULL, start, shift); metaslab_group_add(mg, ms); metaslab_set_fragmentation(ms, B_FALSE); @@ -2817,8 +2830,8 @@ metaslab_fini_flush_data(metaslab_t *msp) uint64_t metaslab_unflushed_changes_memused(metaslab_t *ms) { - return ((range_tree_numsegs(ms->ms_unflushed_allocs) + - range_tree_numsegs(ms->ms_unflushed_frees)) * + return ((zfs_range_tree_numsegs(ms->ms_unflushed_allocs) + + zfs_range_tree_numsegs(ms->ms_unflushed_frees)) * ms->ms_unflushed_allocs->rt_root.bt_elem_size); } @@ -2851,33 +2864,33 @@ metaslab_fini(metaslab_t *msp) metaslab_unload(msp); - range_tree_destroy(msp->ms_allocatable); - range_tree_destroy(msp->ms_freeing); - range_tree_destroy(msp->ms_freed); + zfs_range_tree_destroy(msp->ms_allocatable); + zfs_range_tree_destroy(msp->ms_freeing); + zfs_range_tree_destroy(msp->ms_freed); ASSERT3U(spa->spa_unflushed_stats.sus_memused, >=, metaslab_unflushed_changes_memused(msp)); spa->spa_unflushed_stats.sus_memused -= metaslab_unflushed_changes_memused(msp); - range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL); - range_tree_destroy(msp->ms_unflushed_allocs); - range_tree_destroy(msp->ms_checkpointing); - range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL); - range_tree_destroy(msp->ms_unflushed_frees); + zfs_range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL); + zfs_range_tree_destroy(msp->ms_unflushed_allocs); + zfs_range_tree_destroy(msp->ms_checkpointing); + zfs_range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL); + zfs_range_tree_destroy(msp->ms_unflushed_frees); for (int t = 0; t < TXG_SIZE; t++) { - range_tree_destroy(msp->ms_allocating[t]); + zfs_range_tree_destroy(msp->ms_allocating[t]); } for (int t = 0; t < TXG_DEFER_SIZE; t++) { - range_tree_destroy(msp->ms_defer[t]); + zfs_range_tree_destroy(msp->ms_defer[t]); } ASSERT0(msp->ms_deferspace); for (int t = 0; t < TXG_SIZE; t++) ASSERT(!txg_list_member(&vd->vdev_ms_list, msp, t)); - range_tree_vacate(msp->ms_trim, NULL, NULL); - range_tree_destroy(msp->ms_trim); + zfs_range_tree_vacate(msp->ms_trim, NULL, NULL); + zfs_range_tree_destroy(msp->ms_trim); mutex_exit(&msp->ms_lock); cv_destroy(&msp->ms_load_cv); @@ -2889,8 +2902,6 @@ metaslab_fini(metaslab_t *msp) kmem_free(msp, sizeof (metaslab_t)); } -#define FRAGMENTATION_TABLE_SIZE 17 - /* * This table defines a segment size based fragmentation metric that will * allow each metaslab to derive its own fragmentation value. This is done @@ -2901,33 +2912,40 @@ metaslab_fini(metaslab_t *msp) * us the fragmentation metric. This means that a high fragmentation metric * equates to most of the free space being comprised of small segments. * Conversely, if the metric is low, then most of the free space is in - * large segments. A 10% change in fragmentation equates to approximately - * double the number of segments. + * large segments. * - * This table defines 0% fragmented space using 16MB segments. Testing has - * shown that segments that are greater than or equal to 16MB do not suffer - * from drastic performance problems. Using this value, we derive the rest - * of the table. Since the fragmentation value is never stored on disk, it - * is possible to change these calculations in the future. + * This table defines 0% fragmented space using 512M segments. Using this value, + * we derive the rest of the table. This table originally went up to 16MB, but + * with larger recordsizes, larger ashifts, and use of raidz3, it is possible + * to have significantly larger allocations than were previously possible. + * Since the fragmentation value is never stored on disk, it is possible to + * change these calculations in the future. */ -static const int zfs_frag_table[FRAGMENTATION_TABLE_SIZE] = { +static const int zfs_frag_table[] = { 100, /* 512B */ - 100, /* 1K */ - 98, /* 2K */ - 95, /* 4K */ - 90, /* 8K */ - 80, /* 16K */ - 70, /* 32K */ - 60, /* 64K */ - 50, /* 128K */ - 40, /* 256K */ - 30, /* 512K */ - 20, /* 1M */ - 15, /* 2M */ - 10, /* 4M */ - 5, /* 8M */ - 0 /* 16M */ + 99, /* 1K */ + 97, /* 2K */ + 93, /* 4K */ + 88, /* 8K */ + 83, /* 16K */ + 77, /* 32K */ + 71, /* 64K */ + 64, /* 128K */ + 57, /* 256K */ + 50, /* 512K */ + 43, /* 1M */ + 36, /* 2M */ + 29, /* 4M */ + 23, /* 8M */ + 17, /* 16M */ + 12, /* 32M */ + 7, /* 64M */ + 3, /* 128M */ + 1, /* 256M */ + 0, /* 512M */ }; +#define FRAGMENTATION_TABLE_SIZE \ + (sizeof (zfs_frag_table)/(sizeof (zfs_frag_table[0]))) /* * Calculate the metaslab's fragmentation metric and set ms_fragmentation. @@ -3096,7 +3114,7 @@ metaslab_weight_from_range_tree(metaslab_t *msp) ASSERT(msp->ms_loaded); - for (int i = RANGE_TREE_HISTOGRAM_SIZE - 1; i >= SPA_MINBLOCKSHIFT; + for (int i = ZFS_RANGE_TREE_HISTOGRAM_SIZE - 1; i >= SPA_MINBLOCKSHIFT; i--) { uint8_t shift = msp->ms_group->mg_vd->vdev_ashift; int max_idx = SPACE_MAP_HISTOGRAM_SIZE + shift - 1; @@ -3440,7 +3458,7 @@ metaslab_activate(metaslab_t *msp, int allocator, uint64_t activation_weight) * lock. */ if (msp->ms_weight == 0) { - ASSERT0(range_tree_space(msp->ms_allocatable)); + ASSERT0(zfs_range_tree_space(msp->ms_allocatable)); return (SET_ERROR(ENOSPC)); } @@ -3499,7 +3517,7 @@ metaslab_passivate(metaslab_t *msp, uint64_t weight) */ ASSERT(!WEIGHT_IS_SPACEBASED(msp->ms_weight) || size >= SPA_MINBLOCKSIZE || - range_tree_space(msp->ms_allocatable) == 0); + zfs_range_tree_space(msp->ms_allocatable) == 0); ASSERT0(weight & METASLAB_ACTIVE_MASK); ASSERT(msp->ms_activation_weight != 0); @@ -3630,7 +3648,7 @@ metaslab_should_condense(metaslab_t *msp) * We always condense metaslabs that are empty and metaslabs for * which a condense request has been made. */ - if (range_tree_numsegs(msp->ms_allocatable) == 0 || + if (zfs_range_tree_numsegs(msp->ms_allocatable) == 0 || msp->ms_condense_wanted) return (B_TRUE); @@ -3654,7 +3672,7 @@ metaslab_should_condense(metaslab_t *msp) static void metaslab_condense(metaslab_t *msp, dmu_tx_t *tx) { - range_tree_t *condense_tree; + zfs_range_tree_t *condense_tree; space_map_t *sm = msp->ms_sm; uint64_t txg = dmu_tx_get_txg(tx); spa_t *spa = msp->ms_group->mg_vd->vdev_spa; @@ -3706,41 +3724,41 @@ metaslab_condense(metaslab_t *msp, dmu_tx_t *tx) * metaslab_flush_update(). */ ASSERT3U(spa_sync_pass(spa), ==, 1); - ASSERT(range_tree_is_empty(msp->ms_freed)); /* since it is pass 1 */ + ASSERT(zfs_range_tree_is_empty(msp->ms_freed)); /* since it is pass 1 */ zfs_dbgmsg("condensing: txg %llu, msp[%llu] %px, vdev id %llu, " "spa %s, smp size %llu, segments %llu, forcing condense=%s", (u_longlong_t)txg, (u_longlong_t)msp->ms_id, msp, (u_longlong_t)msp->ms_group->mg_vd->vdev_id, spa->spa_name, (u_longlong_t)space_map_length(msp->ms_sm), - (u_longlong_t)range_tree_numsegs(msp->ms_allocatable), + (u_longlong_t)zfs_range_tree_numsegs(msp->ms_allocatable), msp->ms_condense_wanted ? "TRUE" : "FALSE"); msp->ms_condense_wanted = B_FALSE; - range_seg_type_t type; + zfs_range_seg_type_t type; uint64_t shift, start; type = metaslab_calculate_range_tree_type(msp->ms_group->mg_vd, msp, &start, &shift); - condense_tree = range_tree_create(NULL, type, NULL, start, shift); + condense_tree = zfs_range_tree_create(NULL, type, NULL, start, shift); for (int t = 0; t < TXG_DEFER_SIZE; t++) { - range_tree_walk(msp->ms_defer[t], - range_tree_add, condense_tree); + zfs_range_tree_walk(msp->ms_defer[t], + zfs_range_tree_add, condense_tree); } for (int t = 0; t < TXG_CONCURRENT_STATES; t++) { - range_tree_walk(msp->ms_allocating[(txg + t) & TXG_MASK], - range_tree_add, condense_tree); + zfs_range_tree_walk(msp->ms_allocating[(txg + t) & TXG_MASK], + zfs_range_tree_add, condense_tree); } ASSERT3U(spa->spa_unflushed_stats.sus_memused, >=, metaslab_unflushed_changes_memused(msp)); spa->spa_unflushed_stats.sus_memused -= metaslab_unflushed_changes_memused(msp); - range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL); - range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL); + zfs_range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL); + zfs_range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL); /* * We're about to drop the metaslab's lock thus allowing other @@ -3780,17 +3798,17 @@ metaslab_condense(metaslab_t *msp, dmu_tx_t *tx) * followed by FREES (due to space_map_write() in metaslab_sync()) for * sync pass 1. */ - range_tree_t *tmp_tree = range_tree_create(NULL, type, NULL, start, - shift); - range_tree_add(tmp_tree, msp->ms_start, msp->ms_size); + zfs_range_tree_t *tmp_tree = zfs_range_tree_create(NULL, type, NULL, + start, shift); + zfs_range_tree_add(tmp_tree, msp->ms_start, msp->ms_size); space_map_write(sm, tmp_tree, SM_ALLOC, SM_NO_VDEVID, tx); space_map_write(sm, msp->ms_allocatable, SM_FREE, SM_NO_VDEVID, tx); space_map_write(sm, condense_tree, SM_FREE, SM_NO_VDEVID, tx); - range_tree_vacate(condense_tree, NULL, NULL); - range_tree_destroy(condense_tree); - range_tree_vacate(tmp_tree, NULL, NULL); - range_tree_destroy(tmp_tree); + zfs_range_tree_vacate(condense_tree, NULL, NULL); + zfs_range_tree_destroy(condense_tree); + zfs_range_tree_vacate(tmp_tree, NULL, NULL); + zfs_range_tree_destroy(tmp_tree); mutex_enter(&msp->ms_lock); msp->ms_condensing = B_FALSE; @@ -3803,8 +3821,8 @@ metaslab_unflushed_add(metaslab_t *msp, dmu_tx_t *tx) spa_t *spa = msp->ms_group->mg_vd->vdev_spa; ASSERT(spa_syncing_log_sm(spa) != NULL); ASSERT(msp->ms_sm != NULL); - ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs)); - ASSERT(range_tree_is_empty(msp->ms_unflushed_frees)); + ASSERT(zfs_range_tree_is_empty(msp->ms_unflushed_allocs)); + ASSERT(zfs_range_tree_is_empty(msp->ms_unflushed_frees)); mutex_enter(&spa->spa_flushed_ms_lock); metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx); @@ -3824,8 +3842,8 @@ metaslab_unflushed_bump(metaslab_t *msp, dmu_tx_t *tx, boolean_t dirty) ASSERT(msp->ms_sm != NULL); ASSERT(metaslab_unflushed_txg(msp) != 0); ASSERT3P(avl_find(&spa->spa_metaslabs_by_flushed, msp, NULL), ==, msp); - ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs)); - ASSERT(range_tree_is_empty(msp->ms_unflushed_frees)); + ASSERT(zfs_range_tree_is_empty(msp->ms_unflushed_allocs)); + ASSERT(zfs_range_tree_is_empty(msp->ms_unflushed_frees)); VERIFY3U(tx->tx_txg, <=, spa_final_dirty_txg(spa)); @@ -3945,7 +3963,7 @@ metaslab_flush(metaslab_t *msp, dmu_tx_t *tx) space_map_histogram_clear(msp->ms_sm); space_map_histogram_add(msp->ms_sm, msp->ms_allocatable, tx); - ASSERT(range_tree_is_empty(msp->ms_freed)); + ASSERT(zfs_range_tree_is_empty(msp->ms_freed)); for (int t = 0; t < TXG_DEFER_SIZE; t++) { space_map_histogram_add(msp->ms_sm, msp->ms_defer[t], tx); @@ -3987,8 +4005,10 @@ metaslab_flush(metaslab_t *msp, dmu_tx_t *tx) spa_name(spa), (u_longlong_t)msp->ms_group->mg_vd->vdev_id, (u_longlong_t)msp->ms_id, - (u_longlong_t)range_tree_space(msp->ms_unflushed_allocs), - (u_longlong_t)range_tree_space(msp->ms_unflushed_frees), + (u_longlong_t)zfs_range_tree_space( + msp->ms_unflushed_allocs), + (u_longlong_t)zfs_range_tree_space( + msp->ms_unflushed_frees), (u_longlong_t)(sm_len_after - sm_len_before)); } @@ -3996,8 +4016,8 @@ metaslab_flush(metaslab_t *msp, dmu_tx_t *tx) metaslab_unflushed_changes_memused(msp)); spa->spa_unflushed_stats.sus_memused -= metaslab_unflushed_changes_memused(msp); - range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL); - range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL); + zfs_range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL); + zfs_range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL); metaslab_verify_space(msp, dmu_tx_get_txg(tx)); metaslab_verify_weight_and_frag(msp); @@ -4022,7 +4042,7 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) vdev_t *vd = mg->mg_vd; spa_t *spa = vd->vdev_spa; objset_t *mos = spa_meta_objset(spa); - range_tree_t *alloctree = msp->ms_allocating[txg & TXG_MASK]; + zfs_range_tree_t *alloctree = msp->ms_allocating[txg & TXG_MASK]; dmu_tx_t *tx; ASSERT(!vd->vdev_ishole); @@ -4031,11 +4051,11 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) * This metaslab has just been added so there's no work to do now. */ if (msp->ms_new) { - ASSERT0(range_tree_space(alloctree)); - ASSERT0(range_tree_space(msp->ms_freeing)); - ASSERT0(range_tree_space(msp->ms_freed)); - ASSERT0(range_tree_space(msp->ms_checkpointing)); - ASSERT0(range_tree_space(msp->ms_trim)); + ASSERT0(zfs_range_tree_space(alloctree)); + ASSERT0(zfs_range_tree_space(msp->ms_freeing)); + ASSERT0(zfs_range_tree_space(msp->ms_freed)); + ASSERT0(zfs_range_tree_space(msp->ms_checkpointing)); + ASSERT0(zfs_range_tree_space(msp->ms_trim)); return; } @@ -4050,9 +4070,9 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) * we preserve the utility of the VERIFY statements in all other * cases. */ - if (range_tree_is_empty(alloctree) && - range_tree_is_empty(msp->ms_freeing) && - range_tree_is_empty(msp->ms_checkpointing) && + if (zfs_range_tree_is_empty(alloctree) && + zfs_range_tree_is_empty(msp->ms_freeing) && + zfs_range_tree_is_empty(msp->ms_checkpointing) && !(msp->ms_loaded && msp->ms_condense_wanted && txg <= spa_final_dirty_txg(spa))) return; @@ -4094,12 +4114,12 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) msp->ms_start, msp->ms_size, vd->vdev_ashift)); ASSERT(msp->ms_sm != NULL); - ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs)); - ASSERT(range_tree_is_empty(msp->ms_unflushed_frees)); + ASSERT(zfs_range_tree_is_empty(msp->ms_unflushed_allocs)); + ASSERT(zfs_range_tree_is_empty(msp->ms_unflushed_frees)); ASSERT0(metaslab_allocated_space(msp)); } - if (!range_tree_is_empty(msp->ms_checkpointing) && + if (!zfs_range_tree_is_empty(msp->ms_checkpointing) && vd->vdev_checkpoint_sm == NULL) { ASSERT(spa_has_checkpoint(spa)); @@ -4161,9 +4181,9 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) metaslab_unflushed_changes_memused(msp)); spa->spa_unflushed_stats.sus_memused -= metaslab_unflushed_changes_memused(msp); - range_tree_remove_xor_add(alloctree, + zfs_range_tree_remove_xor_add(alloctree, msp->ms_unflushed_frees, msp->ms_unflushed_allocs); - range_tree_remove_xor_add(msp->ms_freeing, + zfs_range_tree_remove_xor_add(msp->ms_freeing, msp->ms_unflushed_allocs, msp->ms_unflushed_frees); spa->spa_unflushed_stats.sus_memused += metaslab_unflushed_changes_memused(msp); @@ -4177,12 +4197,12 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) mutex_enter(&msp->ms_lock); } - msp->ms_allocated_space += range_tree_space(alloctree); + msp->ms_allocated_space += zfs_range_tree_space(alloctree); ASSERT3U(msp->ms_allocated_space, >=, - range_tree_space(msp->ms_freeing)); - msp->ms_allocated_space -= range_tree_space(msp->ms_freeing); + zfs_range_tree_space(msp->ms_freeing)); + msp->ms_allocated_space -= zfs_range_tree_space(msp->ms_freeing); - if (!range_tree_is_empty(msp->ms_checkpointing)) { + if (!zfs_range_tree_is_empty(msp->ms_checkpointing)) { ASSERT(spa_has_checkpoint(spa)); ASSERT3P(vd->vdev_checkpoint_sm, !=, NULL); @@ -4198,13 +4218,13 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) mutex_enter(&msp->ms_lock); spa->spa_checkpoint_info.sci_dspace += - range_tree_space(msp->ms_checkpointing); + zfs_range_tree_space(msp->ms_checkpointing); vd->vdev_stat.vs_checkpoint_space += - range_tree_space(msp->ms_checkpointing); + zfs_range_tree_space(msp->ms_checkpointing); ASSERT3U(vd->vdev_stat.vs_checkpoint_space, ==, -space_map_allocated(vd->vdev_checkpoint_sm)); - range_tree_vacate(msp->ms_checkpointing, NULL, NULL); + zfs_range_tree_vacate(msp->ms_checkpointing, NULL, NULL); } if (msp->ms_loaded) { @@ -4264,20 +4284,20 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) * get appended to the ms_sm) so their ranges can be reused as usual. */ if (spa_sync_pass(spa) == 1) { - range_tree_swap(&msp->ms_freeing, &msp->ms_freed); + zfs_range_tree_swap(&msp->ms_freeing, &msp->ms_freed); ASSERT0(msp->ms_allocated_this_txg); } else { - range_tree_vacate(msp->ms_freeing, - range_tree_add, msp->ms_freed); + zfs_range_tree_vacate(msp->ms_freeing, + zfs_range_tree_add, msp->ms_freed); } - msp->ms_allocated_this_txg += range_tree_space(alloctree); - range_tree_vacate(alloctree, NULL, NULL); + msp->ms_allocated_this_txg += zfs_range_tree_space(alloctree); + zfs_range_tree_vacate(alloctree, NULL, NULL); - ASSERT0(range_tree_space(msp->ms_allocating[txg & TXG_MASK])); - ASSERT0(range_tree_space(msp->ms_allocating[TXG_CLEAN(txg) + ASSERT0(zfs_range_tree_space(msp->ms_allocating[txg & TXG_MASK])); + ASSERT0(zfs_range_tree_space(msp->ms_allocating[TXG_CLEAN(txg) & TXG_MASK])); - ASSERT0(range_tree_space(msp->ms_freeing)); - ASSERT0(range_tree_space(msp->ms_checkpointing)); + ASSERT0(zfs_range_tree_space(msp->ms_freeing)); + ASSERT0(zfs_range_tree_space(msp->ms_checkpointing)); mutex_exit(&msp->ms_lock); @@ -4301,7 +4321,7 @@ metaslab_evict(metaslab_t *msp, uint64_t txg) return; for (int t = 1; t < TXG_CONCURRENT_STATES; t++) { - VERIFY0(range_tree_space( + VERIFY0(zfs_range_tree_space( msp->ms_allocating[(txg + t) & TXG_MASK])); } if (msp->ms_allocator != -1) @@ -4321,7 +4341,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) metaslab_group_t *mg = msp->ms_group; vdev_t *vd = mg->mg_vd; spa_t *spa = vd->vdev_spa; - range_tree_t **defer_tree; + zfs_range_tree_t **defer_tree; int64_t alloc_delta, defer_delta; boolean_t defer_allowed = B_TRUE; @@ -4335,11 +4355,11 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) /* there should be no allocations nor frees at this point */ VERIFY0(msp->ms_allocated_this_txg); - VERIFY0(range_tree_space(msp->ms_freed)); + VERIFY0(zfs_range_tree_space(msp->ms_freed)); } - ASSERT0(range_tree_space(msp->ms_freeing)); - ASSERT0(range_tree_space(msp->ms_checkpointing)); + ASSERT0(zfs_range_tree_space(msp->ms_freeing)); + ASSERT0(zfs_range_tree_space(msp->ms_checkpointing)); defer_tree = &msp->ms_defer[txg % TXG_DEFER_SIZE]; @@ -4352,13 +4372,13 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) defer_delta = 0; alloc_delta = msp->ms_allocated_this_txg - - range_tree_space(msp->ms_freed); + zfs_range_tree_space(msp->ms_freed); if (defer_allowed) { - defer_delta = range_tree_space(msp->ms_freed) - - range_tree_space(*defer_tree); + defer_delta = zfs_range_tree_space(msp->ms_freed) - + zfs_range_tree_space(*defer_tree); } else { - defer_delta -= range_tree_space(*defer_tree); + defer_delta -= zfs_range_tree_space(*defer_tree); } metaslab_space_update(vd, mg->mg_class, alloc_delta + defer_delta, defer_delta, 0); @@ -4385,13 +4405,14 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) * frees not being trimmed. */ if (spa_get_autotrim(spa) == SPA_AUTOTRIM_ON) { - range_tree_walk(*defer_tree, range_tree_add, msp->ms_trim); + zfs_range_tree_walk(*defer_tree, zfs_range_tree_add, + msp->ms_trim); if (!defer_allowed) { - range_tree_walk(msp->ms_freed, range_tree_add, + zfs_range_tree_walk(msp->ms_freed, zfs_range_tree_add, msp->ms_trim); } } else { - range_tree_vacate(msp->ms_trim, NULL, NULL); + zfs_range_tree_vacate(msp->ms_trim, NULL, NULL); } /* @@ -4400,13 +4421,13 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) * the defer_tree -- this is safe to do because we've * just emptied out the defer_tree. */ - range_tree_vacate(*defer_tree, - msp->ms_loaded ? range_tree_add : NULL, msp->ms_allocatable); + zfs_range_tree_vacate(*defer_tree, + msp->ms_loaded ? zfs_range_tree_add : NULL, msp->ms_allocatable); if (defer_allowed) { - range_tree_swap(&msp->ms_freed, defer_tree); + zfs_range_tree_swap(&msp->ms_freed, defer_tree); } else { - range_tree_vacate(msp->ms_freed, - msp->ms_loaded ? range_tree_add : NULL, + zfs_range_tree_vacate(msp->ms_freed, + msp->ms_loaded ? zfs_range_tree_add : NULL, msp->ms_allocatable); } @@ -4437,10 +4458,10 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) */ metaslab_recalculate_weight_and_sort(msp); - ASSERT0(range_tree_space(msp->ms_allocating[txg & TXG_MASK])); - ASSERT0(range_tree_space(msp->ms_freeing)); - ASSERT0(range_tree_space(msp->ms_freed)); - ASSERT0(range_tree_space(msp->ms_checkpointing)); + ASSERT0(zfs_range_tree_space(msp->ms_allocating[txg & TXG_MASK])); + ASSERT0(zfs_range_tree_space(msp->ms_freeing)); + ASSERT0(zfs_range_tree_space(msp->ms_freed)); + ASSERT0(zfs_range_tree_space(msp->ms_checkpointing)); msp->ms_allocating_total -= msp->ms_allocated_this_txg; msp->ms_allocated_this_txg = 0; mutex_exit(&msp->ms_lock); @@ -4452,8 +4473,8 @@ metaslab_sync_reassess(metaslab_group_t *mg) spa_t *spa = mg->mg_class->mc_spa; spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER); - metaslab_group_alloc_update(mg); mg->mg_fragmentation = metaslab_group_fragmentation(mg); + metaslab_group_alloc_update(mg); /* * Preload the next potential metaslabs but only on active @@ -4648,7 +4669,7 @@ static uint64_t metaslab_block_alloc(metaslab_t *msp, uint64_t size, uint64_t txg) { uint64_t start; - range_tree_t *rt = msp->ms_allocatable; + zfs_range_tree_t *rt = msp->ms_allocatable; metaslab_class_t *mc = msp->ms_group->mg_class; ASSERT(MUTEX_HELD(&msp->ms_lock)); @@ -4663,14 +4684,15 @@ metaslab_block_alloc(metaslab_t *msp, uint64_t size, uint64_t txg) VERIFY0(P2PHASE(start, 1ULL << vd->vdev_ashift)); VERIFY0(P2PHASE(size, 1ULL << vd->vdev_ashift)); - VERIFY3U(range_tree_space(rt) - size, <=, msp->ms_size); - range_tree_remove(rt, start, size); - range_tree_clear(msp->ms_trim, start, size); + VERIFY3U(zfs_range_tree_space(rt) - size, <=, msp->ms_size); + zfs_range_tree_remove(rt, start, size); + zfs_range_tree_clear(msp->ms_trim, start, size); - if (range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK])) + if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK])) vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg); - range_tree_add(msp->ms_allocating[txg & TXG_MASK], start, size); + zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], start, + size); msp->ms_allocating_total += size; /* Track the last successful allocation */ @@ -5390,16 +5412,16 @@ metaslab_free_concrete(vdev_t *vd, uint64_t offset, uint64_t asize, metaslab_check_free_impl(vd, offset, asize); mutex_enter(&msp->ms_lock); - if (range_tree_is_empty(msp->ms_freeing) && - range_tree_is_empty(msp->ms_checkpointing)) { + if (zfs_range_tree_is_empty(msp->ms_freeing) && + zfs_range_tree_is_empty(msp->ms_checkpointing)) { vdev_dirty(vd, VDD_METASLAB, msp, spa_syncing_txg(spa)); } if (checkpoint) { ASSERT(spa_has_checkpoint(spa)); - range_tree_add(msp->ms_checkpointing, offset, asize); + zfs_range_tree_add(msp->ms_checkpointing, offset, asize); } else { - range_tree_add(msp->ms_freeing, offset, asize); + zfs_range_tree_add(msp->ms_freeing, offset, asize); } mutex_exit(&msp->ms_lock); } @@ -5623,18 +5645,18 @@ metaslab_unalloc_dva(spa_t *spa, const dva_t *dva, uint64_t txg) msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; mutex_enter(&msp->ms_lock); - range_tree_remove(msp->ms_allocating[txg & TXG_MASK], + zfs_range_tree_remove(msp->ms_allocating[txg & TXG_MASK], offset, size); msp->ms_allocating_total -= size; VERIFY(!msp->ms_condensing); VERIFY3U(offset, >=, msp->ms_start); VERIFY3U(offset + size, <=, msp->ms_start + msp->ms_size); - VERIFY3U(range_tree_space(msp->ms_allocatable) + size, <=, + VERIFY3U(zfs_range_tree_space(msp->ms_allocatable) + size, <=, msp->ms_size); VERIFY0(P2PHASE(offset, 1ULL << vd->vdev_ashift)); VERIFY0(P2PHASE(size, 1ULL << vd->vdev_ashift)); - range_tree_add(msp->ms_allocatable, offset, size); + zfs_range_tree_add(msp->ms_allocatable, offset, size); mutex_exit(&msp->ms_lock); } @@ -5730,7 +5752,7 @@ metaslab_claim_concrete(vdev_t *vd, uint64_t offset, uint64_t size, } if (error == 0 && - !range_tree_contains(msp->ms_allocatable, offset, size)) + !zfs_range_tree_contains(msp->ms_allocatable, offset, size)) error = SET_ERROR(ENOENT); if (error || txg == 0) { /* txg == 0 indicates dry run */ @@ -5741,10 +5763,10 @@ metaslab_claim_concrete(vdev_t *vd, uint64_t offset, uint64_t size, VERIFY(!msp->ms_condensing); VERIFY0(P2PHASE(offset, 1ULL << vd->vdev_ashift)); VERIFY0(P2PHASE(size, 1ULL << vd->vdev_ashift)); - VERIFY3U(range_tree_space(msp->ms_allocatable) - size, <=, + VERIFY3U(zfs_range_tree_space(msp->ms_allocatable) - size, <=, msp->ms_size); - range_tree_remove(msp->ms_allocatable, offset, size); - range_tree_clear(msp->ms_trim, offset, size); + zfs_range_tree_remove(msp->ms_allocatable, offset, size); + zfs_range_tree_clear(msp->ms_trim, offset, size); if (spa_writeable(spa)) { /* don't dirty if we're zdb(8) */ metaslab_class_t *mc = msp->ms_group->mg_class; @@ -5756,9 +5778,9 @@ metaslab_claim_concrete(vdev_t *vd, uint64_t offset, uint64_t size, } multilist_sublist_unlock(mls); - if (range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK])) + if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK])) vdev_dirty(vd, VDD_METASLAB, msp, txg); - range_tree_add(msp->ms_allocating[txg & TXG_MASK], + zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], offset, size); msp->ms_allocating_total += size; } @@ -6015,7 +6037,7 @@ metaslab_check_free_impl(vdev_t *vd, uint64_t offset, uint64_t size) mutex_enter(&msp->ms_lock); if (msp->ms_loaded) { - range_tree_verify_not_present(msp->ms_allocatable, + zfs_range_tree_verify_not_present(msp->ms_allocatable, offset, size); } @@ -6027,15 +6049,16 @@ metaslab_check_free_impl(vdev_t *vd, uint64_t offset, uint64_t size) * allocated and freed in the same sync pass within the same txg. * Unfortunately there are places (e.g. the ZIL) where we allocate a * segment but then we free part of it within the same txg - * [see zil_sync()]. Thus, we don't call range_tree_verify() in the + * [see zil_sync()]. Thus, we don't call zfs_range_tree_verify() in the * current allocating tree. */ - range_tree_verify_not_present(msp->ms_freeing, offset, size); - range_tree_verify_not_present(msp->ms_checkpointing, offset, size); - range_tree_verify_not_present(msp->ms_freed, offset, size); + zfs_range_tree_verify_not_present(msp->ms_freeing, offset, size); + zfs_range_tree_verify_not_present(msp->ms_checkpointing, offset, size); + zfs_range_tree_verify_not_present(msp->ms_freed, offset, size); for (int j = 0; j < TXG_DEFER_SIZE; j++) - range_tree_verify_not_present(msp->ms_defer[j], offset, size); - range_tree_verify_not_present(msp->ms_trim, offset, size); + zfs_range_tree_verify_not_present(msp->ms_defer[j], offset, + size); + zfs_range_tree_verify_not_present(msp->ms_trim, offset, size); mutex_exit(&msp->ms_lock); } diff --git a/sys/contrib/openzfs/module/zfs/range_tree.c b/sys/contrib/openzfs/module/zfs/range_tree.c index 5174e2c46633..8bb9a0724e61 100644 --- a/sys/contrib/openzfs/module/zfs/range_tree.c +++ b/sys/contrib/openzfs/module/zfs/range_tree.c @@ -42,11 +42,11 @@ * splitting in response to range add/remove requests. * * A range tree starts out completely empty, with no segments in it. - * Adding an allocation via range_tree_add to the range tree can either: + * Adding an allocation via zfs_range_tree_add to the range tree can either: * 1) create a new extent * 2) extend an adjacent extent * 3) merge two adjacent extents - * Conversely, removing an allocation via range_tree_remove can: + * Conversely, removing an allocation via zfs_range_tree_remove can: * 1) completely remove an extent * 2) shorten an extent (if the allocation was near one of its ends) * 3) split an extent into two extents, in effect punching a hole @@ -54,16 +54,16 @@ * A range tree is also capable of 'bridging' gaps when adding * allocations. This is useful for cases when close proximity of * allocations is an important detail that needs to be represented - * in the range tree. See range_tree_set_gap(). The default behavior + * in the range tree. See zfs_range_tree_set_gap(). The default behavior * is not to bridge gaps (i.e. the maximum allowed gap size is 0). * - * In order to traverse a range tree, use either the range_tree_walk() - * or range_tree_vacate() functions. + * In order to traverse a range tree, use either the zfs_range_tree_walk() + * or zfs_range_tree_vacate() functions. * * To obtain more accurate information on individual segment * operations that the range tree performs "under the hood", you can - * specify a set of callbacks by passing a range_tree_ops_t structure - * to the range_tree_create function. Any callbacks that are non-NULL + * specify a set of callbacks by passing a zfs_range_tree_ops_t structure + * to the zfs_range_tree_create function. Any callbacks that are non-NULL * are then called at the appropriate times. * * The range tree code also supports a special variant of range trees @@ -76,19 +76,19 @@ */ static inline void -rs_copy(range_seg_t *src, range_seg_t *dest, range_tree_t *rt) +zfs_rs_copy(zfs_range_seg_t *src, zfs_range_seg_t *dest, zfs_range_tree_t *rt) { - ASSERT3U(rt->rt_type, <, RANGE_SEG_NUM_TYPES); + ASSERT3U(rt->rt_type, <, ZFS_RANGE_SEG_NUM_TYPES); size_t size = 0; switch (rt->rt_type) { - case RANGE_SEG32: - size = sizeof (range_seg32_t); + case ZFS_RANGE_SEG32: + size = sizeof (zfs_range_seg32_t); break; - case RANGE_SEG64: - size = sizeof (range_seg64_t); + case ZFS_RANGE_SEG64: + size = sizeof (zfs_range_seg64_t); break; - case RANGE_SEG_GAP: - size = sizeof (range_seg_gap_t); + case ZFS_RANGE_SEG_GAP: + size = sizeof (zfs_range_seg_gap_t); break; default: __builtin_unreachable(); @@ -97,23 +97,24 @@ rs_copy(range_seg_t *src, range_seg_t *dest, range_tree_t *rt) } void -range_tree_stat_verify(range_tree_t *rt) +zfs_range_tree_stat_verify(zfs_range_tree_t *rt) { - range_seg_t *rs; + zfs_range_seg_t *rs; zfs_btree_index_t where; - uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 }; + uint64_t hist[ZFS_RANGE_TREE_HISTOGRAM_SIZE] = { 0 }; int i; for (rs = zfs_btree_first(&rt->rt_root, &where); rs != NULL; rs = zfs_btree_next(&rt->rt_root, &where, &where)) { - uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt); + uint64_t size = zfs_rs_get_end(rs, rt) - + zfs_rs_get_start(rs, rt); int idx = highbit64(size) - 1; hist[idx]++; ASSERT3U(hist[idx], !=, 0); } - for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { + for (i = 0; i < ZFS_RANGE_TREE_HISTOGRAM_SIZE; i++) { if (hist[i] != rt->rt_histogram[i]) { zfs_dbgmsg("i=%d, hist=%px, hist=%llu, rt_hist=%llu", i, hist, (u_longlong_t)hist[i], @@ -124,9 +125,9 @@ range_tree_stat_verify(range_tree_t *rt) } static void -range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs) +zfs_range_tree_stat_incr(zfs_range_tree_t *rt, zfs_range_seg_t *rs) { - uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt); + uint64_t size = zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt); int idx = highbit64(size) - 1; ASSERT(size != 0); @@ -138,9 +139,9 @@ range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs) } static void -range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs) +zfs_range_tree_stat_decr(zfs_range_tree_t *rt, zfs_range_seg_t *rs) { - uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt); + uint64_t size = zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt); int idx = highbit64(size) - 1; ASSERT(size != 0); @@ -153,10 +154,10 @@ range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs) __attribute__((always_inline)) inline static int -range_tree_seg32_compare(const void *x1, const void *x2) +zfs_range_tree_seg32_compare(const void *x1, const void *x2) { - const range_seg32_t *r1 = x1; - const range_seg32_t *r2 = x2; + const zfs_range_seg32_t *r1 = x1; + const zfs_range_seg32_t *r2 = x2; ASSERT3U(r1->rs_start, <=, r1->rs_end); ASSERT3U(r2->rs_start, <=, r2->rs_end); @@ -166,10 +167,10 @@ range_tree_seg32_compare(const void *x1, const void *x2) __attribute__((always_inline)) inline static int -range_tree_seg64_compare(const void *x1, const void *x2) +zfs_range_tree_seg64_compare(const void *x1, const void *x2) { - const range_seg64_t *r1 = x1; - const range_seg64_t *r2 = x2; + const zfs_range_seg64_t *r1 = x1; + const zfs_range_seg64_t *r2 = x2; ASSERT3U(r1->rs_start, <=, r1->rs_end); ASSERT3U(r2->rs_start, <=, r2->rs_end); @@ -179,10 +180,10 @@ range_tree_seg64_compare(const void *x1, const void *x2) __attribute__((always_inline)) inline static int -range_tree_seg_gap_compare(const void *x1, const void *x2) +zfs_range_tree_seg_gap_compare(const void *x1, const void *x2) { - const range_seg_gap_t *r1 = x1; - const range_seg_gap_t *r2 = x2; + const zfs_range_seg_gap_t *r1 = x1; + const zfs_range_seg_gap_t *r2 = x2; ASSERT3U(r1->rs_start, <=, r1->rs_end); ASSERT3U(r2->rs_start, <=, r2->rs_end); @@ -190,41 +191,42 @@ range_tree_seg_gap_compare(const void *x1, const void *x2) return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); } -ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg32_find_in_buf, range_seg32_t, - range_tree_seg32_compare) +ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg32_find_in_buf, zfs_range_seg32_t, + zfs_range_tree_seg32_compare) -ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg64_find_in_buf, range_seg64_t, - range_tree_seg64_compare) +ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg64_find_in_buf, zfs_range_seg64_t, + zfs_range_tree_seg64_compare) -ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg_gap_find_in_buf, range_seg_gap_t, - range_tree_seg_gap_compare) +ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg_gap_find_in_buf, + zfs_range_seg_gap_t, zfs_range_tree_seg_gap_compare) -range_tree_t * -range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type, - void *arg, uint64_t start, uint64_t shift, uint64_t gap) +zfs_range_tree_t * +zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops, + zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, + uint64_t gap) { - range_tree_t *rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP); + zfs_range_tree_t *rt = kmem_zalloc(sizeof (zfs_range_tree_t), KM_SLEEP); ASSERT3U(shift, <, 64); - ASSERT3U(type, <=, RANGE_SEG_NUM_TYPES); + ASSERT3U(type, <=, ZFS_RANGE_SEG_NUM_TYPES); size_t size; int (*compare) (const void *, const void *); bt_find_in_buf_f bt_find; switch (type) { - case RANGE_SEG32: - size = sizeof (range_seg32_t); - compare = range_tree_seg32_compare; - bt_find = range_tree_seg32_find_in_buf; + case ZFS_RANGE_SEG32: + size = sizeof (zfs_range_seg32_t); + compare = zfs_range_tree_seg32_compare; + bt_find = zfs_range_tree_seg32_find_in_buf; break; - case RANGE_SEG64: - size = sizeof (range_seg64_t); - compare = range_tree_seg64_compare; - bt_find = range_tree_seg64_find_in_buf; + case ZFS_RANGE_SEG64: + size = sizeof (zfs_range_seg64_t); + compare = zfs_range_tree_seg64_compare; + bt_find = zfs_range_tree_seg64_find_in_buf; break; - case RANGE_SEG_GAP: - size = sizeof (range_seg_gap_t); - compare = range_tree_seg_gap_compare; - bt_find = range_tree_seg_gap_find_in_buf; + case ZFS_RANGE_SEG_GAP: + size = sizeof (zfs_range_seg_gap_t); + compare = zfs_range_tree_seg_gap_compare; + bt_find = zfs_range_tree_seg_gap_find_in_buf; break; default: panic("Invalid range seg type %d", type); @@ -244,15 +246,15 @@ range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type, return (rt); } -range_tree_t * -range_tree_create(const range_tree_ops_t *ops, range_seg_type_t type, - void *arg, uint64_t start, uint64_t shift) +zfs_range_tree_t * +zfs_range_tree_create(const zfs_range_tree_ops_t *ops, + zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift) { - return (range_tree_create_gap(ops, type, arg, start, shift, 0)); + return (zfs_range_tree_create_gap(ops, type, arg, start, shift, 0)); } void -range_tree_destroy(range_tree_t *rt) +zfs_range_tree_destroy(zfs_range_tree_t *rt) { VERIFY0(rt->rt_space); @@ -264,36 +266,37 @@ range_tree_destroy(range_tree_t *rt) } void -range_tree_adjust_fill(range_tree_t *rt, range_seg_t *rs, int64_t delta) +zfs_range_tree_adjust_fill(zfs_range_tree_t *rt, zfs_range_seg_t *rs, + int64_t delta) { - if (delta < 0 && delta * -1 >= rs_get_fill(rs, rt)) { + if (delta < 0 && delta * -1 >= zfs_rs_get_fill(rs, rt)) { zfs_panic_recover("zfs: attempting to decrease fill to or " "below 0; probable double remove in segment [%llx:%llx]", - (longlong_t)rs_get_start(rs, rt), - (longlong_t)rs_get_end(rs, rt)); + (longlong_t)zfs_rs_get_start(rs, rt), + (longlong_t)zfs_rs_get_end(rs, rt)); } - if (rs_get_fill(rs, rt) + delta > rs_get_end(rs, rt) - - rs_get_start(rs, rt)) { + if (zfs_rs_get_fill(rs, rt) + delta > zfs_rs_get_end(rs, rt) - + zfs_rs_get_start(rs, rt)) { zfs_panic_recover("zfs: attempting to increase fill beyond " "max; probable double add in segment [%llx:%llx]", - (longlong_t)rs_get_start(rs, rt), - (longlong_t)rs_get_end(rs, rt)); + (longlong_t)zfs_rs_get_start(rs, rt), + (longlong_t)zfs_rs_get_end(rs, rt)); } if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); - rs_set_fill(rs, rt, rs_get_fill(rs, rt) + delta); + zfs_rs_set_fill(rs, rt, zfs_rs_get_fill(rs, rt) + delta); if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); } static void -range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) +zfs_range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) { - range_tree_t *rt = arg; + zfs_range_tree_t *rt = arg; zfs_btree_index_t where; - range_seg_t *rs_before, *rs_after, *rs; - range_seg_max_t tmp, rsearch; + zfs_range_seg_t *rs_before, *rs_after, *rs; + zfs_range_seg_max_t tmp, rsearch; uint64_t end = start + size, gap = rt->rt_gap; uint64_t bridge_size = 0; boolean_t merge_before, merge_after; @@ -302,8 +305,8 @@ range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) ASSERT3U(fill, <=, size); ASSERT3U(start + size, >, start); - rs_set_start(&rsearch, rt, start); - rs_set_end(&rsearch, rt, end); + zfs_rs_set_start(&rsearch, rt, start); + zfs_rs_set_end(&rsearch, rt, end); rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); /* @@ -321,26 +324,26 @@ range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) (longlong_t)start, (longlong_t)size); return; } - uint64_t rstart = rs_get_start(rs, rt); - uint64_t rend = rs_get_end(rs, rt); + uint64_t rstart = zfs_rs_get_start(rs, rt); + uint64_t rend = zfs_rs_get_end(rs, rt); if (rstart <= start && rend >= end) { - range_tree_adjust_fill(rt, rs, fill); + zfs_range_tree_adjust_fill(rt, rs, fill); return; } if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); - range_tree_stat_decr(rt, rs); + zfs_range_tree_stat_decr(rt, rs); rt->rt_space -= rend - rstart; - fill += rs_get_fill(rs, rt); + fill += zfs_rs_get_fill(rs, rt); start = MIN(start, rstart); end = MAX(end, rend); size = end - start; zfs_btree_remove(&rt->rt_root, rs); - range_tree_add_impl(rt, start, size, fill); + zfs_range_tree_add_impl(rt, start, size, fill); return; } @@ -355,15 +358,15 @@ range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) rs_before = zfs_btree_prev(&rt->rt_root, &where, &where_before); rs_after = zfs_btree_next(&rt->rt_root, &where, &where_after); - merge_before = (rs_before != NULL && rs_get_end(rs_before, rt) >= + merge_before = (rs_before != NULL && zfs_rs_get_end(rs_before, rt) >= start - gap); - merge_after = (rs_after != NULL && rs_get_start(rs_after, rt) <= end + - gap); + merge_after = (rs_after != NULL && zfs_rs_get_start(rs_after, rt) <= + end + gap); if (merge_before && gap != 0) - bridge_size += start - rs_get_end(rs_before, rt); + bridge_size += start - zfs_rs_get_end(rs_before, rt); if (merge_after && gap != 0) - bridge_size += rs_get_start(rs_after, rt) - end; + bridge_size += zfs_rs_get_start(rs_after, rt) - end; if (merge_before && merge_after) { if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) { @@ -371,13 +374,13 @@ range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); } - range_tree_stat_decr(rt, rs_before); - range_tree_stat_decr(rt, rs_after); + zfs_range_tree_stat_decr(rt, rs_before); + zfs_range_tree_stat_decr(rt, rs_after); - rs_copy(rs_after, &tmp, rt); - uint64_t before_start = rs_get_start_raw(rs_before, rt); - uint64_t before_fill = rs_get_fill(rs_before, rt); - uint64_t after_fill = rs_get_fill(rs_after, rt); + zfs_rs_copy(rs_after, &tmp, rt); + uint64_t before_start = zfs_rs_get_start_raw(rs_before, rt); + uint64_t before_fill = zfs_rs_get_fill(rs_before, rt); + uint64_t after_fill = zfs_rs_get_fill(rs_after, rt); zfs_btree_remove_idx(&rt->rt_root, &where_before); /* @@ -386,76 +389,76 @@ range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) */ rs_after = zfs_btree_find(&rt->rt_root, &tmp, &where_after); ASSERT3P(rs_after, !=, NULL); - rs_set_start_raw(rs_after, rt, before_start); - rs_set_fill(rs_after, rt, after_fill + before_fill + fill); + zfs_rs_set_start_raw(rs_after, rt, before_start); + zfs_rs_set_fill(rs_after, rt, after_fill + before_fill + fill); rs = rs_after; } else if (merge_before) { if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); - range_tree_stat_decr(rt, rs_before); + zfs_range_tree_stat_decr(rt, rs_before); - uint64_t before_fill = rs_get_fill(rs_before, rt); - rs_set_end(rs_before, rt, end); - rs_set_fill(rs_before, rt, before_fill + fill); + uint64_t before_fill = zfs_rs_get_fill(rs_before, rt); + zfs_rs_set_end(rs_before, rt, end); + zfs_rs_set_fill(rs_before, rt, before_fill + fill); rs = rs_before; } else if (merge_after) { if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); - range_tree_stat_decr(rt, rs_after); + zfs_range_tree_stat_decr(rt, rs_after); - uint64_t after_fill = rs_get_fill(rs_after, rt); - rs_set_start(rs_after, rt, start); - rs_set_fill(rs_after, rt, after_fill + fill); + uint64_t after_fill = zfs_rs_get_fill(rs_after, rt); + zfs_rs_set_start(rs_after, rt, start); + zfs_rs_set_fill(rs_after, rt, after_fill + fill); rs = rs_after; } else { rs = &tmp; - rs_set_start(rs, rt, start); - rs_set_end(rs, rt, end); - rs_set_fill(rs, rt, fill); + zfs_rs_set_start(rs, rt, start); + zfs_rs_set_end(rs, rt, end); + zfs_rs_set_fill(rs, rt, fill); zfs_btree_add_idx(&rt->rt_root, rs, &where); } if (gap != 0) { - ASSERT3U(rs_get_fill(rs, rt), <=, rs_get_end(rs, rt) - - rs_get_start(rs, rt)); + ASSERT3U(zfs_rs_get_fill(rs, rt), <=, zfs_rs_get_end(rs, rt) - + zfs_rs_get_start(rs, rt)); } else { - ASSERT3U(rs_get_fill(rs, rt), ==, rs_get_end(rs, rt) - - rs_get_start(rs, rt)); + ASSERT3U(zfs_rs_get_fill(rs, rt), ==, zfs_rs_get_end(rs, rt) - + zfs_rs_get_start(rs, rt)); } if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); - range_tree_stat_incr(rt, rs); + zfs_range_tree_stat_incr(rt, rs); rt->rt_space += size + bridge_size; } void -range_tree_add(void *arg, uint64_t start, uint64_t size) +zfs_range_tree_add(void *arg, uint64_t start, uint64_t size) { - range_tree_add_impl(arg, start, size, size); + zfs_range_tree_add_impl(arg, start, size, size); } static void -range_tree_remove_impl(range_tree_t *rt, uint64_t start, uint64_t size, +zfs_range_tree_remove_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size, boolean_t do_fill) { zfs_btree_index_t where; - range_seg_t *rs; - range_seg_max_t rsearch, rs_tmp; + zfs_range_seg_t *rs; + zfs_range_seg_max_t rsearch, rs_tmp; uint64_t end = start + size; boolean_t left_over, right_over; VERIFY3U(size, !=, 0); VERIFY3U(size, <=, rt->rt_space); - if (rt->rt_type == RANGE_SEG64) + if (rt->rt_type == ZFS_RANGE_SEG64) ASSERT3U(start + size, >, start); - rs_set_start(&rsearch, rt, start); - rs_set_end(&rsearch, rt, end); + zfs_rs_set_start(&rsearch, rt, start); + zfs_rs_set_end(&rsearch, rt, end); rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); /* Make sure we completely overlap with someone */ @@ -474,49 +477,49 @@ range_tree_remove_impl(range_tree_t *rt, uint64_t start, uint64_t size, */ if (rt->rt_gap != 0) { if (do_fill) { - if (rs_get_fill(rs, rt) == size) { - start = rs_get_start(rs, rt); - end = rs_get_end(rs, rt); + if (zfs_rs_get_fill(rs, rt) == size) { + start = zfs_rs_get_start(rs, rt); + end = zfs_rs_get_end(rs, rt); size = end - start; } else { - range_tree_adjust_fill(rt, rs, -size); + zfs_range_tree_adjust_fill(rt, rs, -size); return; } - } else if (rs_get_start(rs, rt) != start || - rs_get_end(rs, rt) != end) { + } else if (zfs_rs_get_start(rs, rt) != start || + zfs_rs_get_end(rs, rt) != end) { zfs_panic_recover("zfs: freeing partial segment of " "gap tree (offset=%llx size=%llx) of " "(offset=%llx size=%llx)", (longlong_t)start, (longlong_t)size, - (longlong_t)rs_get_start(rs, rt), - (longlong_t)rs_get_end(rs, rt) - rs_get_start(rs, - rt)); + (longlong_t)zfs_rs_get_start(rs, rt), + (longlong_t)zfs_rs_get_end(rs, rt) - + zfs_rs_get_start(rs, rt)); return; } } - VERIFY3U(rs_get_start(rs, rt), <=, start); - VERIFY3U(rs_get_end(rs, rt), >=, end); + VERIFY3U(zfs_rs_get_start(rs, rt), <=, start); + VERIFY3U(zfs_rs_get_end(rs, rt), >=, end); - left_over = (rs_get_start(rs, rt) != start); - right_over = (rs_get_end(rs, rt) != end); + left_over = (zfs_rs_get_start(rs, rt) != start); + right_over = (zfs_rs_get_end(rs, rt) != end); - range_tree_stat_decr(rt, rs); + zfs_range_tree_stat_decr(rt, rs); if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); if (left_over && right_over) { - range_seg_max_t newseg; - rs_set_start(&newseg, rt, end); - rs_set_end_raw(&newseg, rt, rs_get_end_raw(rs, rt)); - rs_set_fill(&newseg, rt, rs_get_end(rs, rt) - end); - range_tree_stat_incr(rt, &newseg); + zfs_range_seg_max_t newseg; + zfs_rs_set_start(&newseg, rt, end); + zfs_rs_set_end_raw(&newseg, rt, zfs_rs_get_end_raw(rs, rt)); + zfs_rs_set_fill(&newseg, rt, zfs_rs_get_end(rs, rt) - end); + zfs_range_tree_stat_incr(rt, &newseg); // This modifies the buffer already inside the range tree - rs_set_end(rs, rt, start); + zfs_rs_set_end(rs, rt, start); - rs_copy(rs, &rs_tmp, rt); + zfs_rs_copy(rs, &rs_tmp, rt); if (zfs_btree_next(&rt->rt_root, &where, &where) != NULL) zfs_btree_add_idx(&rt->rt_root, &newseg, &where); else @@ -526,12 +529,12 @@ range_tree_remove_impl(range_tree_t *rt, uint64_t start, uint64_t size, rt->rt_ops->rtop_add(rt, &newseg, rt->rt_arg); } else if (left_over) { // This modifies the buffer already inside the range tree - rs_set_end(rs, rt, start); - rs_copy(rs, &rs_tmp, rt); + zfs_rs_set_end(rs, rt, start); + zfs_rs_copy(rs, &rs_tmp, rt); } else if (right_over) { // This modifies the buffer already inside the range tree - rs_set_start(rs, rt, end); - rs_copy(rs, &rs_tmp, rt); + zfs_rs_set_start(rs, rt, end); + zfs_rs_copy(rs, &rs_tmp, rt); } else { zfs_btree_remove_idx(&rt->rt_root, &where); rs = NULL; @@ -543,9 +546,9 @@ range_tree_remove_impl(range_tree_t *rt, uint64_t start, uint64_t size, * the size, since we do not support removing partial segments * of range trees with gaps. */ - rs_set_fill_raw(rs, rt, rs_get_end_raw(rs, rt) - - rs_get_start_raw(rs, rt)); - range_tree_stat_incr(rt, &rs_tmp); + zfs_zfs_rs_set_fill_raw(rs, rt, zfs_rs_get_end_raw(rs, rt) - + zfs_rs_get_start_raw(rs, rt)); + zfs_range_tree_stat_incr(rt, &rs_tmp); if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) rt->rt_ops->rtop_add(rt, &rs_tmp, rt->rt_arg); @@ -555,76 +558,78 @@ range_tree_remove_impl(range_tree_t *rt, uint64_t start, uint64_t size, } void -range_tree_remove(void *arg, uint64_t start, uint64_t size) +zfs_range_tree_remove(void *arg, uint64_t start, uint64_t size) { - range_tree_remove_impl(arg, start, size, B_FALSE); + zfs_range_tree_remove_impl(arg, start, size, B_FALSE); } void -range_tree_remove_fill(range_tree_t *rt, uint64_t start, uint64_t size) +zfs_range_tree_remove_fill(zfs_range_tree_t *rt, uint64_t start, uint64_t size) { - range_tree_remove_impl(rt, start, size, B_TRUE); + zfs_range_tree_remove_impl(rt, start, size, B_TRUE); } void -range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs, +zfs_range_tree_resize_segment(zfs_range_tree_t *rt, zfs_range_seg_t *rs, uint64_t newstart, uint64_t newsize) { - int64_t delta = newsize - (rs_get_end(rs, rt) - rs_get_start(rs, rt)); + int64_t delta = newsize - (zfs_rs_get_end(rs, rt) - + zfs_rs_get_start(rs, rt)); - range_tree_stat_decr(rt, rs); + zfs_range_tree_stat_decr(rt, rs); if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); - rs_set_start(rs, rt, newstart); - rs_set_end(rs, rt, newstart + newsize); + zfs_rs_set_start(rs, rt, newstart); + zfs_rs_set_end(rs, rt, newstart + newsize); - range_tree_stat_incr(rt, rs); + zfs_range_tree_stat_incr(rt, rs); if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); rt->rt_space += delta; } -static range_seg_t * -range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size) +static zfs_range_seg_t * +zfs_range_tree_find_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size) { - range_seg_max_t rsearch; + zfs_range_seg_max_t rsearch; uint64_t end = start + size; VERIFY(size != 0); - rs_set_start(&rsearch, rt, start); - rs_set_end(&rsearch, rt, end); + zfs_rs_set_start(&rsearch, rt, start); + zfs_rs_set_end(&rsearch, rt, end); return (zfs_btree_find(&rt->rt_root, &rsearch, NULL)); } -range_seg_t * -range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size) +zfs_range_seg_t * +zfs_range_tree_find(zfs_range_tree_t *rt, uint64_t start, uint64_t size) { - if (rt->rt_type == RANGE_SEG64) + if (rt->rt_type == ZFS_RANGE_SEG64) ASSERT3U(start + size, >, start); - range_seg_t *rs = range_tree_find_impl(rt, start, size); - if (rs != NULL && rs_get_start(rs, rt) <= start && - rs_get_end(rs, rt) >= start + size) { + zfs_range_seg_t *rs = zfs_range_tree_find_impl(rt, start, size); + if (rs != NULL && zfs_rs_get_start(rs, rt) <= start && + zfs_rs_get_end(rs, rt) >= start + size) { return (rs); } return (NULL); } void -range_tree_verify_not_present(range_tree_t *rt, uint64_t off, uint64_t size) +zfs_range_tree_verify_not_present(zfs_range_tree_t *rt, uint64_t off, + uint64_t size) { - range_seg_t *rs = range_tree_find(rt, off, size); + zfs_range_seg_t *rs = zfs_range_tree_find(rt, off, size); if (rs != NULL) panic("segment already in tree; rs=%p", (void *)rs); } boolean_t -range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size) +zfs_range_tree_contains(zfs_range_tree_t *rt, uint64_t start, uint64_t size) { - return (range_tree_find(rt, start, size) != NULL); + return (zfs_range_tree_find(rt, start, size) != NULL); } /* @@ -633,31 +638,32 @@ range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size) * isn't. */ boolean_t -range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size, +zfs_range_tree_find_in(zfs_range_tree_t *rt, uint64_t start, uint64_t size, uint64_t *ostart, uint64_t *osize) { - if (rt->rt_type == RANGE_SEG64) + if (rt->rt_type == ZFS_RANGE_SEG64) ASSERT3U(start + size, >, start); - range_seg_max_t rsearch; - rs_set_start(&rsearch, rt, start); - rs_set_end_raw(&rsearch, rt, rs_get_start_raw(&rsearch, rt) + 1); + zfs_range_seg_max_t rsearch; + zfs_rs_set_start(&rsearch, rt, start); + zfs_rs_set_end_raw(&rsearch, rt, zfs_rs_get_start_raw(&rsearch, rt) + + 1); zfs_btree_index_t where; - range_seg_t *rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); + zfs_range_seg_t *rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); if (rs != NULL) { *ostart = start; - *osize = MIN(size, rs_get_end(rs, rt) - start); + *osize = MIN(size, zfs_rs_get_end(rs, rt) - start); return (B_TRUE); } rs = zfs_btree_next(&rt->rt_root, &where, &where); - if (rs == NULL || rs_get_start(rs, rt) > start + size) + if (rs == NULL || zfs_rs_get_start(rs, rt) > start + size) return (B_FALSE); - *ostart = rs_get_start(rs, rt); - *osize = MIN(start + size, rs_get_end(rs, rt)) - - rs_get_start(rs, rt); + *ostart = zfs_rs_get_start(rs, rt); + *osize = MIN(start + size, zfs_rs_get_end(rs, rt)) - + zfs_rs_get_start(rs, rt); return (B_TRUE); } @@ -666,29 +672,29 @@ range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size, * it is currently in the tree. */ void -range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size) +zfs_range_tree_clear(zfs_range_tree_t *rt, uint64_t start, uint64_t size) { - range_seg_t *rs; + zfs_range_seg_t *rs; if (size == 0) return; - if (rt->rt_type == RANGE_SEG64) + if (rt->rt_type == ZFS_RANGE_SEG64) ASSERT3U(start + size, >, start); - while ((rs = range_tree_find_impl(rt, start, size)) != NULL) { - uint64_t free_start = MAX(rs_get_start(rs, rt), start); - uint64_t free_end = MIN(rs_get_end(rs, rt), start + size); - range_tree_remove(rt, free_start, free_end - free_start); + while ((rs = zfs_range_tree_find_impl(rt, start, size)) != NULL) { + uint64_t free_start = MAX(zfs_rs_get_start(rs, rt), start); + uint64_t free_end = MIN(zfs_rs_get_end(rs, rt), start + size); + zfs_range_tree_remove(rt, free_start, free_end - free_start); } } void -range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst) +zfs_range_tree_swap(zfs_range_tree_t **rtsrc, zfs_range_tree_t **rtdst) { - range_tree_t *rt; + zfs_range_tree_t *rt; - ASSERT0(range_tree_space(*rtdst)); + ASSERT0(zfs_range_tree_space(*rtdst)); ASSERT0(zfs_btree_numnodes(&(*rtdst)->rt_root)); rt = *rtsrc; @@ -697,19 +703,20 @@ range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst) } void -range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg) +zfs_range_tree_vacate(zfs_range_tree_t *rt, zfs_range_tree_func_t *func, + void *arg) { if (rt->rt_ops != NULL && rt->rt_ops->rtop_vacate != NULL) rt->rt_ops->rtop_vacate(rt, rt->rt_arg); if (func != NULL) { - range_seg_t *rs; + zfs_range_seg_t *rs; zfs_btree_index_t *cookie = NULL; while ((rs = zfs_btree_destroy_nodes(&rt->rt_root, &cookie)) != NULL) { - func(arg, rs_get_start(rs, rt), rs_get_end(rs, rt) - - rs_get_start(rs, rt)); + func(arg, zfs_rs_get_start(rs, rt), + zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt)); } } else { zfs_btree_clear(&rt->rt_root); @@ -720,39 +727,40 @@ range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg) } void -range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg) +zfs_range_tree_walk(zfs_range_tree_t *rt, zfs_range_tree_func_t *func, + void *arg) { zfs_btree_index_t where; - for (range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); + for (zfs_range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); rs != NULL; rs = zfs_btree_next(&rt->rt_root, &where, &where)) { - func(arg, rs_get_start(rs, rt), rs_get_end(rs, rt) - - rs_get_start(rs, rt)); + func(arg, zfs_rs_get_start(rs, rt), zfs_rs_get_end(rs, rt) - + zfs_rs_get_start(rs, rt)); } } -range_seg_t * -range_tree_first(range_tree_t *rt) +zfs_range_seg_t * +zfs_range_tree_first(zfs_range_tree_t *rt) { return (zfs_btree_first(&rt->rt_root, NULL)); } uint64_t -range_tree_space(range_tree_t *rt) +zfs_range_tree_space(zfs_range_tree_t *rt) { return (rt->rt_space); } uint64_t -range_tree_numsegs(range_tree_t *rt) +zfs_range_tree_numsegs(zfs_range_tree_t *rt) { return ((rt == NULL) ? 0 : zfs_btree_numnodes(&rt->rt_root)); } boolean_t -range_tree_is_empty(range_tree_t *rt) +zfs_range_tree_is_empty(zfs_range_tree_t *rt) { ASSERT(rt != NULL); - return (range_tree_space(rt) == 0); + return (zfs_range_tree_space(rt) == 0); } /* @@ -760,46 +768,46 @@ range_tree_is_empty(range_tree_t *rt) * from removefrom. Add non-overlapping leftovers to addto. */ void -range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, - range_tree_t *removefrom, range_tree_t *addto) +zfs_range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, + zfs_range_tree_t *removefrom, zfs_range_tree_t *addto) { zfs_btree_index_t where; - range_seg_max_t starting_rs; - rs_set_start(&starting_rs, removefrom, start); - rs_set_end_raw(&starting_rs, removefrom, rs_get_start_raw(&starting_rs, - removefrom) + 1); + zfs_range_seg_max_t starting_rs; + zfs_rs_set_start(&starting_rs, removefrom, start); + zfs_rs_set_end_raw(&starting_rs, removefrom, + zfs_rs_get_start_raw(&starting_rs, removefrom) + 1); - range_seg_t *curr = zfs_btree_find(&removefrom->rt_root, + zfs_range_seg_t *curr = zfs_btree_find(&removefrom->rt_root, &starting_rs, &where); if (curr == NULL) curr = zfs_btree_next(&removefrom->rt_root, &where, &where); - range_seg_t *next; + zfs_range_seg_t *next; for (; curr != NULL; curr = next) { if (start == end) return; VERIFY3U(start, <, end); /* there is no overlap */ - if (end <= rs_get_start(curr, removefrom)) { - range_tree_add(addto, start, end - start); + if (end <= zfs_rs_get_start(curr, removefrom)) { + zfs_range_tree_add(addto, start, end - start); return; } - uint64_t overlap_start = MAX(rs_get_start(curr, removefrom), + uint64_t overlap_start = MAX(zfs_rs_get_start(curr, removefrom), start); - uint64_t overlap_end = MIN(rs_get_end(curr, removefrom), + uint64_t overlap_end = MIN(zfs_rs_get_end(curr, removefrom), end); uint64_t overlap_size = overlap_end - overlap_start; ASSERT3S(overlap_size, >, 0); - range_seg_max_t rs; - rs_copy(curr, &rs, removefrom); + zfs_range_seg_max_t rs; + zfs_rs_copy(curr, &rs, removefrom); - range_tree_remove(removefrom, overlap_start, overlap_size); + zfs_range_tree_remove(removefrom, overlap_start, overlap_size); if (start < overlap_start) - range_tree_add(addto, start, overlap_start - start); + zfs_range_tree_add(addto, start, overlap_start - start); start = overlap_end; next = zfs_btree_find(&removefrom->rt_root, &rs, &where); @@ -814,7 +822,7 @@ range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, * area to process. */ if (next != NULL) { - ASSERT(start == end || start == rs_get_end(&rs, + ASSERT(start == end || start == zfs_rs_get_end(&rs, removefrom)); } @@ -824,7 +832,7 @@ range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, if (start != end) { VERIFY3U(start, <, end); - range_tree_add(addto, start, end - start); + zfs_range_tree_add(addto, start, end - start); } else { VERIFY3U(start, ==, end); } @@ -835,33 +843,33 @@ range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, * from removefrom. Otherwise, add it to addto. */ void -range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom, - range_tree_t *addto) +zfs_range_tree_remove_xor_add(zfs_range_tree_t *rt, + zfs_range_tree_t *removefrom, zfs_range_tree_t *addto) { zfs_btree_index_t where; - for (range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); rs; + for (zfs_range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); rs; rs = zfs_btree_next(&rt->rt_root, &where, &where)) { - range_tree_remove_xor_add_segment(rs_get_start(rs, rt), - rs_get_end(rs, rt), removefrom, addto); + zfs_range_tree_remove_xor_add_segment(zfs_rs_get_start(rs, rt), + zfs_rs_get_end(rs, rt), removefrom, addto); } } uint64_t -range_tree_min(range_tree_t *rt) +zfs_range_tree_min(zfs_range_tree_t *rt) { - range_seg_t *rs = zfs_btree_first(&rt->rt_root, NULL); - return (rs != NULL ? rs_get_start(rs, rt) : 0); + zfs_range_seg_t *rs = zfs_btree_first(&rt->rt_root, NULL); + return (rs != NULL ? zfs_rs_get_start(rs, rt) : 0); } uint64_t -range_tree_max(range_tree_t *rt) +zfs_range_tree_max(zfs_range_tree_t *rt) { - range_seg_t *rs = zfs_btree_last(&rt->rt_root, NULL); - return (rs != NULL ? rs_get_end(rs, rt) : 0); + zfs_range_seg_t *rs = zfs_btree_last(&rt->rt_root, NULL); + return (rs != NULL ? zfs_rs_get_end(rs, rt) : 0); } uint64_t -range_tree_span(range_tree_t *rt) +zfs_range_tree_span(zfs_range_tree_t *rt) { - return (range_tree_max(rt) - range_tree_min(rt)); + return (zfs_range_tree_max(rt) - zfs_range_tree_min(rt)); } diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c index 956bae46ef1b..bdeef0959da7 100644 --- a/sys/contrib/openzfs/module/zfs/spa.c +++ b/sys/contrib/openzfs/module/zfs/spa.c @@ -9869,7 +9869,7 @@ vdev_indirect_state_sync_verify(vdev_t *vd) * happen in syncing context, the obsolete segments * tree must be empty when we start syncing. */ - ASSERT0(range_tree_space(vd->vdev_obsolete_segments)); + ASSERT0(zfs_range_tree_space(vd->vdev_obsolete_segments)); } /* diff --git a/sys/contrib/openzfs/module/zfs/spa_checkpoint.c b/sys/contrib/openzfs/module/zfs/spa_checkpoint.c index 4c3721c159be..5fbf474b0ece 100644 --- a/sys/contrib/openzfs/module/zfs/spa_checkpoint.c +++ b/sys/contrib/openzfs/module/zfs/spa_checkpoint.c @@ -235,9 +235,9 @@ spa_checkpoint_discard_sync_callback(space_map_entry_t *sme, void *arg) * potentially save ourselves from future headaches. */ mutex_enter(&ms->ms_lock); - if (range_tree_is_empty(ms->ms_freeing)) + if (zfs_range_tree_is_empty(ms->ms_freeing)) vdev_dirty(vd, VDD_METASLAB, ms, sdc->sdc_txg); - range_tree_add(ms->ms_freeing, sme->sme_offset, sme->sme_run); + zfs_range_tree_add(ms->ms_freeing, sme->sme_offset, sme->sme_run); mutex_exit(&ms->ms_lock); ASSERT3U(vd->vdev_spa->spa_checkpoint_info.sci_dspace, >=, diff --git a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c index a95152608578..5eb4d043be41 100644 --- a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c +++ b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c @@ -1108,11 +1108,11 @@ spa_ld_log_sm_cb(space_map_entry_t *sme, void *arg) switch (sme->sme_type) { case SM_ALLOC: - range_tree_remove_xor_add_segment(offset, offset + size, + zfs_range_tree_remove_xor_add_segment(offset, offset + size, ms->ms_unflushed_frees, ms->ms_unflushed_allocs); break; case SM_FREE: - range_tree_remove_xor_add_segment(offset, offset + size, + zfs_range_tree_remove_xor_add_segment(offset, offset + size, ms->ms_unflushed_allocs, ms->ms_unflushed_frees); break; default: @@ -1251,14 +1251,14 @@ out: m != NULL; m = AVL_NEXT(&spa->spa_metaslabs_by_flushed, m)) { mutex_enter(&m->ms_lock); m->ms_allocated_space = space_map_allocated(m->ms_sm) + - range_tree_space(m->ms_unflushed_allocs) - - range_tree_space(m->ms_unflushed_frees); + zfs_range_tree_space(m->ms_unflushed_allocs) - + zfs_range_tree_space(m->ms_unflushed_frees); vdev_t *vd = m->ms_group->mg_vd; metaslab_space_update(vd, m->ms_group->mg_class, - range_tree_space(m->ms_unflushed_allocs), 0, 0); + zfs_range_tree_space(m->ms_unflushed_allocs), 0, 0); metaslab_space_update(vd, m->ms_group->mg_class, - -range_tree_space(m->ms_unflushed_frees), 0, 0); + -zfs_range_tree_space(m->ms_unflushed_frees), 0, 0); ASSERT0(m->ms_weight & METASLAB_ACTIVE_MASK); metaslab_recalculate_weight_and_sort(m); @@ -1317,8 +1317,8 @@ spa_ld_unflushed_txgs(vdev_t *vd) ms->ms_unflushed_txg = entry.msp_unflushed_txg; ms->ms_unflushed_dirty = B_FALSE; - ASSERT(range_tree_is_empty(ms->ms_unflushed_allocs)); - ASSERT(range_tree_is_empty(ms->ms_unflushed_frees)); + ASSERT(zfs_range_tree_is_empty(ms->ms_unflushed_allocs)); + ASSERT(zfs_range_tree_is_empty(ms->ms_unflushed_frees)); if (ms->ms_unflushed_txg != 0) { mutex_enter(&spa->spa_flushed_ms_lock); avl_add(&spa->spa_metaslabs_by_flushed, ms); diff --git a/sys/contrib/openzfs/module/zfs/space_map.c b/sys/contrib/openzfs/module/zfs/space_map.c index a336ff41eadb..36e15b8d73af 100644 --- a/sys/contrib/openzfs/module/zfs/space_map.c +++ b/sys/contrib/openzfs/module/zfs/space_map.c @@ -393,7 +393,7 @@ space_map_incremental_destroy(space_map_t *sm, sm_cb_t callback, void *arg, typedef struct space_map_load_arg { space_map_t *smla_sm; - range_tree_t *smla_rt; + zfs_range_tree_t *smla_rt; maptype_t smla_type; } space_map_load_arg_t; @@ -402,11 +402,13 @@ space_map_load_callback(space_map_entry_t *sme, void *arg) { space_map_load_arg_t *smla = arg; if (sme->sme_type == smla->smla_type) { - VERIFY3U(range_tree_space(smla->smla_rt) + sme->sme_run, <=, + VERIFY3U(zfs_range_tree_space(smla->smla_rt) + sme->sme_run, <=, smla->smla_sm->sm_size); - range_tree_add(smla->smla_rt, sme->sme_offset, sme->sme_run); + zfs_range_tree_add(smla->smla_rt, sme->sme_offset, + sme->sme_run); } else { - range_tree_remove(smla->smla_rt, sme->sme_offset, sme->sme_run); + zfs_range_tree_remove(smla->smla_rt, sme->sme_offset, + sme->sme_run); } return (0); @@ -417,15 +419,15 @@ space_map_load_callback(space_map_entry_t *sme, void *arg) * read the first 'length' bytes of the spacemap. */ int -space_map_load_length(space_map_t *sm, range_tree_t *rt, maptype_t maptype, +space_map_load_length(space_map_t *sm, zfs_range_tree_t *rt, maptype_t maptype, uint64_t length) { space_map_load_arg_t smla; - VERIFY0(range_tree_space(rt)); + VERIFY0(zfs_range_tree_space(rt)); if (maptype == SM_FREE) - range_tree_add(rt, sm->sm_start, sm->sm_size); + zfs_range_tree_add(rt, sm->sm_start, sm->sm_size); smla.smla_rt = rt; smla.smla_sm = sm; @@ -434,7 +436,7 @@ space_map_load_length(space_map_t *sm, range_tree_t *rt, maptype_t maptype, space_map_load_callback, &smla); if (err != 0) - range_tree_vacate(rt, NULL, NULL); + zfs_range_tree_vacate(rt, NULL, NULL); return (err); } @@ -444,7 +446,7 @@ space_map_load_length(space_map_t *sm, range_tree_t *rt, maptype_t maptype, * are added to the range tree, other segment types are removed. */ int -space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype) +space_map_load(space_map_t *sm, zfs_range_tree_t *rt, maptype_t maptype) { return (space_map_load_length(sm, rt, maptype, space_map_length(sm))); } @@ -460,7 +462,7 @@ space_map_histogram_clear(space_map_t *sm) } boolean_t -space_map_histogram_verify(space_map_t *sm, range_tree_t *rt) +space_map_histogram_verify(space_map_t *sm, zfs_range_tree_t *rt) { /* * Verify that the in-core range tree does not have any @@ -474,7 +476,7 @@ space_map_histogram_verify(space_map_t *sm, range_tree_t *rt) } void -space_map_histogram_add(space_map_t *sm, range_tree_t *rt, dmu_tx_t *tx) +space_map_histogram_add(space_map_t *sm, zfs_range_tree_t *rt, dmu_tx_t *tx) { int idx = 0; @@ -495,7 +497,7 @@ space_map_histogram_add(space_map_t *sm, range_tree_t *rt, dmu_tx_t *tx) * map only cares about allocatable blocks (minimum of sm_shift) we * can safely ignore all ranges in the range tree smaller than sm_shift. */ - for (int i = sm->sm_shift; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { + for (int i = sm->sm_shift; i < ZFS_RANGE_TREE_HISTOGRAM_SIZE; i++) { /* * Since the largest histogram bucket in the space map is @@ -667,7 +669,7 @@ space_map_write_seg(space_map_t *sm, uint64_t rstart, uint64_t rend, * take effect. */ static void -space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype, +space_map_write_impl(space_map_t *sm, zfs_range_tree_t *rt, maptype_t maptype, uint64_t vdev_id, dmu_tx_t *tx) { spa_t *spa = tx->tx_pool->dp_spa; @@ -700,12 +702,12 @@ space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype, zfs_btree_t *t = &rt->rt_root; zfs_btree_index_t where; - for (range_seg_t *rs = zfs_btree_first(t, &where); rs != NULL; + for (zfs_range_seg_t *rs = zfs_btree_first(t, &where); rs != NULL; rs = zfs_btree_next(t, &where, &where)) { - uint64_t offset = (rs_get_start(rs, rt) - sm->sm_start) >> - sm->sm_shift; - uint64_t length = (rs_get_end(rs, rt) - rs_get_start(rs, rt)) >> + uint64_t offset = (zfs_rs_get_start(rs, rt) - sm->sm_start) >> sm->sm_shift; + uint64_t length = (zfs_rs_get_end(rs, rt) - + zfs_rs_get_start(rs, rt)) >> sm->sm_shift; uint8_t words = 1; /* @@ -730,8 +732,9 @@ space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype, random_in_range(100) == 0))) words = 2; - space_map_write_seg(sm, rs_get_start(rs, rt), rs_get_end(rs, - rt), maptype, vdev_id, words, &db, FTAG, tx); + space_map_write_seg(sm, zfs_rs_get_start(rs, rt), + zfs_rs_get_end(rs, rt), maptype, vdev_id, words, &db, + FTAG, tx); } dmu_buf_rele(db, FTAG); @@ -753,7 +756,7 @@ space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype, * for synchronizing writes to the space map. */ void -space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype, +space_map_write(space_map_t *sm, zfs_range_tree_t *rt, maptype_t maptype, uint64_t vdev_id, dmu_tx_t *tx) { ASSERT(dsl_pool_sync_context(dmu_objset_pool(sm->sm_os))); @@ -768,18 +771,18 @@ space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype, */ sm->sm_phys->smp_object = sm->sm_object; - if (range_tree_is_empty(rt)) { + if (zfs_range_tree_is_empty(rt)) { VERIFY3U(sm->sm_object, ==, sm->sm_phys->smp_object); return; } if (maptype == SM_ALLOC) - sm->sm_phys->smp_alloc += range_tree_space(rt); + sm->sm_phys->smp_alloc += zfs_range_tree_space(rt); else - sm->sm_phys->smp_alloc -= range_tree_space(rt); + sm->sm_phys->smp_alloc -= zfs_range_tree_space(rt); uint64_t nodes = zfs_btree_numnodes(&rt->rt_root); - uint64_t rt_space = range_tree_space(rt); + uint64_t rt_space = zfs_range_tree_space(rt); space_map_write_impl(sm, rt, maptype, vdev_id, tx); @@ -788,7 +791,7 @@ space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype, * while we were in the middle of writing it out. */ VERIFY3U(nodes, ==, zfs_btree_numnodes(&rt->rt_root)); - VERIFY3U(range_tree_space(rt), ==, rt_space); + VERIFY3U(zfs_range_tree_space(rt), ==, rt_space); } static int @@ -960,7 +963,7 @@ space_map_free(space_map_t *sm, dmu_tx_t *tx) * the given space map. */ uint64_t -space_map_estimate_optimal_size(space_map_t *sm, range_tree_t *rt, +space_map_estimate_optimal_size(space_map_t *sm, zfs_range_tree_t *rt, uint64_t vdev_id) { spa_t *spa = dmu_objset_spa(sm->sm_os); @@ -1047,7 +1050,7 @@ space_map_estimate_optimal_size(space_map_t *sm, range_tree_t *rt, size += histogram[idx] * entry_size; if (!spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_V2)) { - for (; idx < RANGE_TREE_HISTOGRAM_SIZE; idx++) { + for (; idx < ZFS_RANGE_TREE_HISTOGRAM_SIZE; idx++) { ASSERT3U(idx, >=, single_entry_max_bucket); entries_for_seg = 1ULL << (idx - single_entry_max_bucket); @@ -1064,7 +1067,7 @@ space_map_estimate_optimal_size(space_map_t *sm, range_tree_t *rt, for (; idx <= double_entry_max_bucket; idx++) size += histogram[idx] * 2 * sizeof (uint64_t); - for (; idx < RANGE_TREE_HISTOGRAM_SIZE; idx++) { + for (; idx < ZFS_RANGE_TREE_HISTOGRAM_SIZE; idx++) { ASSERT3U(idx, >=, double_entry_max_bucket); entries_for_seg = 1ULL << (idx - double_entry_max_bucket); size += histogram[idx] * diff --git a/sys/contrib/openzfs/module/zfs/space_reftree.c b/sys/contrib/openzfs/module/zfs/space_reftree.c index ee11e162dd5b..baa741395e0c 100644 --- a/sys/contrib/openzfs/module/zfs/space_reftree.c +++ b/sys/contrib/openzfs/module/zfs/space_reftree.c @@ -107,14 +107,14 @@ space_reftree_add_seg(avl_tree_t *t, uint64_t start, uint64_t end, * Convert (or add) a range tree into a reference tree. */ void -space_reftree_add_map(avl_tree_t *t, range_tree_t *rt, int64_t refcnt) +space_reftree_add_map(avl_tree_t *t, zfs_range_tree_t *rt, int64_t refcnt) { zfs_btree_index_t where; - for (range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); rs; rs = - zfs_btree_next(&rt->rt_root, &where, &where)) { - space_reftree_add_seg(t, rs_get_start(rs, rt), rs_get_end(rs, - rt), refcnt); + for (zfs_range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); rs; + rs = zfs_btree_next(&rt->rt_root, &where, &where)) { + space_reftree_add_seg(t, zfs_rs_get_start(rs, rt), + zfs_rs_get_end(rs, rt), refcnt); } } @@ -123,13 +123,13 @@ space_reftree_add_map(avl_tree_t *t, range_tree_t *rt, int64_t refcnt) * all members of the reference tree for which refcnt >= minref. */ void -space_reftree_generate_map(avl_tree_t *t, range_tree_t *rt, int64_t minref) +space_reftree_generate_map(avl_tree_t *t, zfs_range_tree_t *rt, int64_t minref) { uint64_t start = -1ULL; int64_t refcnt = 0; space_ref_t *sr; - range_tree_vacate(rt, NULL, NULL); + zfs_range_tree_vacate(rt, NULL, NULL); for (sr = avl_first(t); sr != NULL; sr = AVL_NEXT(t, sr)) { refcnt += sr->sr_refcnt; @@ -142,7 +142,8 @@ space_reftree_generate_map(avl_tree_t *t, range_tree_t *rt, int64_t minref) uint64_t end = sr->sr_offset; ASSERT(start <= end); if (end > start) - range_tree_add(rt, start, end - start); + zfs_range_tree_add(rt, start, end - + start); start = -1ULL; } } diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c index 5df2f77e5780..74e36c0300f0 100644 --- a/sys/contrib/openzfs/module/zfs/vdev.c +++ b/sys/contrib/openzfs/module/zfs/vdev.c @@ -294,8 +294,8 @@ vdev_get_mg(vdev_t *vd, metaslab_class_t *mc) } void -vdev_default_xlate(vdev_t *vd, const range_seg64_t *logical_rs, - range_seg64_t *physical_rs, range_seg64_t *remain_rs) +vdev_default_xlate(vdev_t *vd, const zfs_range_seg64_t *logical_rs, + zfs_range_seg64_t *physical_rs, zfs_range_seg64_t *remain_rs) { (void) vd, (void) remain_rs; @@ -677,8 +677,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) rw_init(&vd->vdev_indirect_rwlock, NULL, RW_DEFAULT, NULL); mutex_init(&vd->vdev_obsolete_lock, NULL, MUTEX_DEFAULT, NULL); - vd->vdev_obsolete_segments = range_tree_create(NULL, RANGE_SEG64, NULL, - 0, 0); + vd->vdev_obsolete_segments = zfs_range_tree_create(NULL, + ZFS_RANGE_SEG64, NULL, 0, 0); /* * Initialize rate limit structs for events. We rate limit ZIO delay @@ -732,8 +732,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) cv_init(&vd->vdev_rebuild_cv, NULL, CV_DEFAULT, NULL); for (int t = 0; t < DTL_TYPES; t++) { - vd->vdev_dtl[t] = range_tree_create(NULL, RANGE_SEG64, NULL, 0, - 0); + vd->vdev_dtl[t] = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, + NULL, 0, 0); } txg_list_create(&vd->vdev_ms_list, spa, @@ -1155,8 +1155,8 @@ vdev_free(vdev_t *vd) mutex_enter(&vd->vdev_dtl_lock); space_map_close(vd->vdev_dtl_sm); for (int t = 0; t < DTL_TYPES; t++) { - range_tree_vacate(vd->vdev_dtl[t], NULL, NULL); - range_tree_destroy(vd->vdev_dtl[t]); + zfs_range_tree_vacate(vd->vdev_dtl[t], NULL, NULL); + zfs_range_tree_destroy(vd->vdev_dtl[t]); } mutex_exit(&vd->vdev_dtl_lock); @@ -1173,7 +1173,7 @@ vdev_free(vdev_t *vd) space_map_close(vd->vdev_obsolete_sm); vd->vdev_obsolete_sm = NULL; } - range_tree_destroy(vd->vdev_obsolete_segments); + zfs_range_tree_destroy(vd->vdev_obsolete_segments); rw_destroy(&vd->vdev_indirect_rwlock); mutex_destroy(&vd->vdev_obsolete_lock); @@ -1283,7 +1283,7 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd) tvd->vdev_indirect_config = svd->vdev_indirect_config; tvd->vdev_indirect_mapping = svd->vdev_indirect_mapping; tvd->vdev_indirect_births = svd->vdev_indirect_births; - range_tree_swap(&svd->vdev_obsolete_segments, + zfs_range_tree_swap(&svd->vdev_obsolete_segments, &tvd->vdev_obsolete_segments); tvd->vdev_obsolete_sm = svd->vdev_obsolete_sm; svd->vdev_indirect_config.vic_mapping_object = 0; @@ -1677,7 +1677,7 @@ vdev_metaslab_fini(vdev_t *vd) vd->vdev_ms = NULL; vd->vdev_ms_count = 0; - for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { + for (int i = 0; i < ZFS_RANGE_TREE_HISTOGRAM_SIZE; i++) { ASSERT0(mg->mg_histogram[i]); if (vd->vdev_log_mg != NULL) ASSERT0(vd->vdev_log_mg->mg_histogram[i]); @@ -2969,22 +2969,22 @@ vdev_dirty_leaves(vdev_t *vd, int flags, uint64_t txg) void vdev_dtl_dirty(vdev_t *vd, vdev_dtl_type_t t, uint64_t txg, uint64_t size) { - range_tree_t *rt = vd->vdev_dtl[t]; + zfs_range_tree_t *rt = vd->vdev_dtl[t]; ASSERT(t < DTL_TYPES); ASSERT(vd != vd->vdev_spa->spa_root_vdev); ASSERT(spa_writeable(vd->vdev_spa)); mutex_enter(&vd->vdev_dtl_lock); - if (!range_tree_contains(rt, txg, size)) - range_tree_add(rt, txg, size); + if (!zfs_range_tree_contains(rt, txg, size)) + zfs_range_tree_add(rt, txg, size); mutex_exit(&vd->vdev_dtl_lock); } boolean_t vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t t, uint64_t txg, uint64_t size) { - range_tree_t *rt = vd->vdev_dtl[t]; + zfs_range_tree_t *rt = vd->vdev_dtl[t]; boolean_t dirty = B_FALSE; ASSERT(t < DTL_TYPES); @@ -2999,8 +2999,8 @@ vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t t, uint64_t txg, uint64_t size) * always checksummed. */ mutex_enter(&vd->vdev_dtl_lock); - if (!range_tree_is_empty(rt)) - dirty = range_tree_contains(rt, txg, size); + if (!zfs_range_tree_is_empty(rt)) + dirty = zfs_range_tree_contains(rt, txg, size); mutex_exit(&vd->vdev_dtl_lock); return (dirty); @@ -3009,11 +3009,11 @@ vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t t, uint64_t txg, uint64_t size) boolean_t vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t t) { - range_tree_t *rt = vd->vdev_dtl[t]; + zfs_range_tree_t *rt = vd->vdev_dtl[t]; boolean_t empty; mutex_enter(&vd->vdev_dtl_lock); - empty = range_tree_is_empty(rt); + empty = zfs_range_tree_is_empty(rt); mutex_exit(&vd->vdev_dtl_lock); return (empty); @@ -3060,10 +3060,10 @@ static uint64_t vdev_dtl_min(vdev_t *vd) { ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock)); - ASSERT3U(range_tree_space(vd->vdev_dtl[DTL_MISSING]), !=, 0); + ASSERT3U(zfs_range_tree_space(vd->vdev_dtl[DTL_MISSING]), !=, 0); ASSERT0(vd->vdev_children); - return (range_tree_min(vd->vdev_dtl[DTL_MISSING]) - 1); + return (zfs_range_tree_min(vd->vdev_dtl[DTL_MISSING]) - 1); } /* @@ -3073,10 +3073,10 @@ static uint64_t vdev_dtl_max(vdev_t *vd) { ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock)); - ASSERT3U(range_tree_space(vd->vdev_dtl[DTL_MISSING]), !=, 0); + ASSERT3U(zfs_range_tree_space(vd->vdev_dtl[DTL_MISSING]), !=, 0); ASSERT0(vd->vdev_children); - return (range_tree_max(vd->vdev_dtl[DTL_MISSING])); + return (zfs_range_tree_max(vd->vdev_dtl[DTL_MISSING])); } /* @@ -3098,7 +3098,7 @@ vdev_dtl_should_excise(vdev_t *vd, boolean_t rebuild_done) if (vd->vdev_resilver_deferred) return (B_FALSE); - if (range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) + if (zfs_range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) return (B_TRUE); if (rebuild_done) { @@ -3187,7 +3187,7 @@ vdev_dtl_reassess_impl(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, } if (scrub_txg != 0 && - !range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) { + !zfs_range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) { wasempty = B_FALSE; zfs_dbgmsg("guid:%llu txg:%llu scrub:%llu started:%d " "dtl:%llu/%llu errors:%llu", @@ -3243,7 +3243,8 @@ vdev_dtl_reassess_impl(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, vd->vdev_dtl[DTL_MISSING], 1); space_reftree_destroy(&reftree); - if (!range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) { + if (!zfs_range_tree_is_empty( + vd->vdev_dtl[DTL_MISSING])) { zfs_dbgmsg("update DTL_MISSING:%llu/%llu", (u_longlong_t)vdev_dtl_min(vd), (u_longlong_t)vdev_dtl_max(vd)); @@ -3251,12 +3252,13 @@ vdev_dtl_reassess_impl(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, zfs_dbgmsg("DTL_MISSING is now empty"); } } - range_tree_vacate(vd->vdev_dtl[DTL_PARTIAL], NULL, NULL); - range_tree_walk(vd->vdev_dtl[DTL_MISSING], - range_tree_add, vd->vdev_dtl[DTL_PARTIAL]); + zfs_range_tree_vacate(vd->vdev_dtl[DTL_PARTIAL], NULL, NULL); + zfs_range_tree_walk(vd->vdev_dtl[DTL_MISSING], + zfs_range_tree_add, vd->vdev_dtl[DTL_PARTIAL]); if (scrub_done) - range_tree_vacate(vd->vdev_dtl[DTL_SCRUB], NULL, NULL); - range_tree_vacate(vd->vdev_dtl[DTL_OUTAGE], NULL, NULL); + zfs_range_tree_vacate(vd->vdev_dtl[DTL_SCRUB], NULL, + NULL); + zfs_range_tree_vacate(vd->vdev_dtl[DTL_OUTAGE], NULL, NULL); /* * For the faulting case, treat members of a replacing vdev @@ -3267,10 +3269,10 @@ vdev_dtl_reassess_impl(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, if (!vdev_readable(vd) || (faulting && vd->vdev_parent != NULL && vd->vdev_parent->vdev_ops == &vdev_replacing_ops)) { - range_tree_add(vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL); + zfs_range_tree_add(vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL); } else { - range_tree_walk(vd->vdev_dtl[DTL_MISSING], - range_tree_add, vd->vdev_dtl[DTL_OUTAGE]); + zfs_range_tree_walk(vd->vdev_dtl[DTL_MISSING], + zfs_range_tree_add, vd->vdev_dtl[DTL_OUTAGE]); } /* @@ -3279,8 +3281,8 @@ vdev_dtl_reassess_impl(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, * the top level so that we persist the change. */ if (txg != 0 && - range_tree_is_empty(vd->vdev_dtl[DTL_MISSING]) && - range_tree_is_empty(vd->vdev_dtl[DTL_OUTAGE])) { + zfs_range_tree_is_empty(vd->vdev_dtl[DTL_MISSING]) && + zfs_range_tree_is_empty(vd->vdev_dtl[DTL_OUTAGE])) { if (vd->vdev_rebuild_txg != 0) { vd->vdev_rebuild_txg = 0; vdev_config_dirty(vd->vdev_top); @@ -3374,7 +3376,7 @@ vdev_dtl_load(vdev_t *vd) { spa_t *spa = vd->vdev_spa; objset_t *mos = spa->spa_meta_objset; - range_tree_t *rt; + zfs_range_tree_t *rt; int error = 0; if (vd->vdev_ops->vdev_op_leaf && vd->vdev_dtl_object != 0) { @@ -3392,17 +3394,17 @@ vdev_dtl_load(vdev_t *vd) return (error); ASSERT(vd->vdev_dtl_sm != NULL); - rt = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); + rt = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0); error = space_map_load(vd->vdev_dtl_sm, rt, SM_ALLOC); if (error == 0) { mutex_enter(&vd->vdev_dtl_lock); - range_tree_walk(rt, range_tree_add, + zfs_range_tree_walk(rt, zfs_range_tree_add, vd->vdev_dtl[DTL_MISSING]); mutex_exit(&vd->vdev_dtl_lock); } - range_tree_vacate(rt, NULL, NULL); - range_tree_destroy(rt); + zfs_range_tree_vacate(rt, NULL, NULL); + zfs_range_tree_destroy(rt); return (error); } @@ -3496,9 +3498,9 @@ static void vdev_dtl_sync(vdev_t *vd, uint64_t txg) { spa_t *spa = vd->vdev_spa; - range_tree_t *rt = vd->vdev_dtl[DTL_MISSING]; + zfs_range_tree_t *rt = vd->vdev_dtl[DTL_MISSING]; objset_t *mos = spa->spa_meta_objset; - range_tree_t *rtsync; + zfs_range_tree_t *rtsync; dmu_tx_t *tx; uint64_t object = space_map_object(vd->vdev_dtl_sm); @@ -3540,17 +3542,17 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg) ASSERT(vd->vdev_dtl_sm != NULL); } - rtsync = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); + rtsync = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0); mutex_enter(&vd->vdev_dtl_lock); - range_tree_walk(rt, range_tree_add, rtsync); + zfs_range_tree_walk(rt, zfs_range_tree_add, rtsync); mutex_exit(&vd->vdev_dtl_lock); space_map_truncate(vd->vdev_dtl_sm, zfs_vdev_dtl_sm_blksz, tx); space_map_write(vd->vdev_dtl_sm, rtsync, SM_ALLOC, SM_NO_VDEVID, tx); - range_tree_vacate(rtsync, NULL, NULL); + zfs_range_tree_vacate(rtsync, NULL, NULL); - range_tree_destroy(rtsync); + zfs_range_tree_destroy(rtsync); /* * If the object for the space map has changed then dirty @@ -3620,7 +3622,7 @@ vdev_resilver_needed(vdev_t *vd, uint64_t *minp, uint64_t *maxp) if (vd->vdev_children == 0) { mutex_enter(&vd->vdev_dtl_lock); - if (!range_tree_is_empty(vd->vdev_dtl[DTL_MISSING]) && + if (!zfs_range_tree_is_empty(vd->vdev_dtl[DTL_MISSING]) && vdev_writeable(vd)) { thismin = vdev_dtl_min(vd); @@ -4064,7 +4066,7 @@ vdev_sync(vdev_t *vd, uint64_t txg) ASSERT3U(txg, ==, spa->spa_syncing_txg); dmu_tx_t *tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); - if (range_tree_space(vd->vdev_obsolete_segments) > 0) { + if (zfs_range_tree_space(vd->vdev_obsolete_segments) > 0) { ASSERT(vd->vdev_removing || vd->vdev_ops == &vdev_indirect_ops); @@ -5687,7 +5689,7 @@ vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx) } boolean_t -vdev_xlate_is_empty(range_seg64_t *rs) +vdev_xlate_is_empty(zfs_range_seg64_t *rs) { return (rs->rs_start == rs->rs_end); } @@ -5701,8 +5703,8 @@ vdev_xlate_is_empty(range_seg64_t *rs) * specific translation function to do the real conversion. */ void -vdev_xlate(vdev_t *vd, const range_seg64_t *logical_rs, - range_seg64_t *physical_rs, range_seg64_t *remain_rs) +vdev_xlate(vdev_t *vd, const zfs_range_seg64_t *logical_rs, + zfs_range_seg64_t *physical_rs, zfs_range_seg64_t *remain_rs) { /* * Walk up the vdev tree @@ -5734,7 +5736,7 @@ vdev_xlate(vdev_t *vd, const range_seg64_t *logical_rs, * range into its physical and any remaining components by calling * the vdev specific translate function. */ - range_seg64_t intermediate = { 0 }; + zfs_range_seg64_t intermediate = { 0 }; pvd->vdev_ops->vdev_op_xlate(vd, physical_rs, &intermediate, remain_rs); physical_rs->rs_start = intermediate.rs_start; @@ -5742,12 +5744,12 @@ vdev_xlate(vdev_t *vd, const range_seg64_t *logical_rs, } void -vdev_xlate_walk(vdev_t *vd, const range_seg64_t *logical_rs, +vdev_xlate_walk(vdev_t *vd, const zfs_range_seg64_t *logical_rs, vdev_xlate_func_t *func, void *arg) { - range_seg64_t iter_rs = *logical_rs; - range_seg64_t physical_rs; - range_seg64_t remain_rs; + zfs_range_seg64_t iter_rs = *logical_rs; + zfs_range_seg64_t physical_rs; + zfs_range_seg64_t remain_rs; while (!vdev_xlate_is_empty(&iter_rs)) { @@ -6580,3 +6582,7 @@ ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, max_auto_ashift, param_set_max_auto_ashift, param_get_uint, ZMOD_RW, "Maximum ashift used when optimizing for logical -> physical sector " "size on new top-level vdevs"); + +ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, raidz_impl, + param_set_raidz_impl, param_get_raidz_impl, ZMOD_RW, + "RAIDZ implementation"); diff --git a/sys/contrib/openzfs/module/zfs/vdev_draid.c b/sys/contrib/openzfs/module/zfs/vdev_draid.c index 419c8ac5bb28..45f8bcfbd4ed 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_draid.c +++ b/sys/contrib/openzfs/module/zfs/vdev_draid.c @@ -1823,7 +1823,7 @@ static void vdev_draid_io_verify(vdev_t *vd, raidz_row_t *rr, int col) { #ifdef ZFS_DEBUG - range_seg64_t logical_rs, physical_rs, remain_rs; + zfs_range_seg64_t logical_rs, physical_rs, remain_rs; logical_rs.rs_start = rr->rr_offset; logical_rs.rs_end = logical_rs.rs_start + vdev_draid_asize(vd, rr->rr_size, 0); @@ -2080,8 +2080,8 @@ vdev_draid_state_change(vdev_t *vd, int faulted, int degraded) } static void -vdev_draid_xlate(vdev_t *cvd, const range_seg64_t *logical_rs, - range_seg64_t *physical_rs, range_seg64_t *remain_rs) +vdev_draid_xlate(vdev_t *cvd, const zfs_range_seg64_t *logical_rs, + zfs_range_seg64_t *physical_rs, zfs_range_seg64_t *remain_rs) { vdev_t *raidvd = cvd->vdev_parent; ASSERT(raidvd->vdev_ops == &vdev_draid_ops); diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c index cd24f97ae7cd..46c1fed6d2c6 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c +++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c @@ -333,7 +333,7 @@ vdev_indirect_mark_obsolete(vdev_t *vd, uint64_t offset, uint64_t size) if (spa_feature_is_enabled(spa, SPA_FEATURE_OBSOLETE_COUNTS)) { mutex_enter(&vd->vdev_obsolete_lock); - range_tree_add(vd->vdev_obsolete_segments, offset, size); + zfs_range_tree_add(vd->vdev_obsolete_segments, offset, size); mutex_exit(&vd->vdev_obsolete_lock); vdev_dirty(vd, 0, NULL, spa_syncing_txg(spa)); } @@ -816,7 +816,7 @@ vdev_indirect_sync_obsolete(vdev_t *vd, dmu_tx_t *tx) vdev_indirect_config_t *vic __maybe_unused = &vd->vdev_indirect_config; ASSERT3U(vic->vic_mapping_object, !=, 0); - ASSERT(range_tree_space(vd->vdev_obsolete_segments) > 0); + ASSERT(zfs_range_tree_space(vd->vdev_obsolete_segments) > 0); ASSERT(vd->vdev_removing || vd->vdev_ops == &vdev_indirect_ops); ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_OBSOLETE_COUNTS)); @@ -845,7 +845,7 @@ vdev_indirect_sync_obsolete(vdev_t *vd, dmu_tx_t *tx) space_map_write(vd->vdev_obsolete_sm, vd->vdev_obsolete_segments, SM_ALLOC, SM_NO_VDEVID, tx); - range_tree_vacate(vd->vdev_obsolete_segments, NULL, NULL); + zfs_range_tree_vacate(vd->vdev_obsolete_segments, NULL, NULL); } int diff --git a/sys/contrib/openzfs/module/zfs/vdev_initialize.c b/sys/contrib/openzfs/module/zfs/vdev_initialize.c index 0a7323f58df2..f6e2662bd40f 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_initialize.c +++ b/sys/contrib/openzfs/module/zfs/vdev_initialize.c @@ -330,13 +330,14 @@ vdev_initialize_block_free(abd_t *data) static int vdev_initialize_ranges(vdev_t *vd, abd_t *data) { - range_tree_t *rt = vd->vdev_initialize_tree; + zfs_range_tree_t *rt = vd->vdev_initialize_tree; zfs_btree_t *bt = &rt->rt_root; zfs_btree_index_t where; - for (range_seg_t *rs = zfs_btree_first(bt, &where); rs != NULL; + for (zfs_range_seg_t *rs = zfs_btree_first(bt, &where); rs != NULL; rs = zfs_btree_next(bt, &where, &where)) { - uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt); + uint64_t size = zfs_rs_get_end(rs, rt) - + zfs_rs_get_start(rs, rt); /* Split range into legally-sized physical chunks */ uint64_t writes_required = @@ -346,7 +347,7 @@ vdev_initialize_ranges(vdev_t *vd, abd_t *data) int error; error = vdev_initialize_write(vd, - VDEV_LABEL_START_SIZE + rs_get_start(rs, rt) + + VDEV_LABEL_START_SIZE + zfs_rs_get_start(rs, rt) + (w * zfs_initialize_chunk_size), MIN(size - (w * zfs_initialize_chunk_size), zfs_initialize_chunk_size), data); @@ -358,7 +359,7 @@ vdev_initialize_ranges(vdev_t *vd, abd_t *data) } static void -vdev_initialize_xlate_last_rs_end(void *arg, range_seg64_t *physical_rs) +vdev_initialize_xlate_last_rs_end(void *arg, zfs_range_seg64_t *physical_rs) { uint64_t *last_rs_end = (uint64_t *)arg; @@ -367,7 +368,7 @@ vdev_initialize_xlate_last_rs_end(void *arg, range_seg64_t *physical_rs) } static void -vdev_initialize_xlate_progress(void *arg, range_seg64_t *physical_rs) +vdev_initialize_xlate_progress(void *arg, zfs_range_seg64_t *physical_rs) { vdev_t *vd = (vdev_t *)arg; @@ -406,7 +407,7 @@ vdev_initialize_calculate_progress(vdev_t *vd) * on our vdev. We use this to determine if we are * in the middle of this metaslab range. */ - range_seg64_t logical_rs, physical_rs, remain_rs; + zfs_range_seg64_t logical_rs, physical_rs, remain_rs; logical_rs.rs_start = msp->ms_start; logical_rs.rs_end = msp->ms_start + msp->ms_size; @@ -440,13 +441,13 @@ vdev_initialize_calculate_progress(vdev_t *vd) VERIFY0(metaslab_load(msp)); zfs_btree_index_t where; - range_tree_t *rt = msp->ms_allocatable; - for (range_seg_t *rs = + zfs_range_tree_t *rt = msp->ms_allocatable; + for (zfs_range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); rs; rs = zfs_btree_next(&rt->rt_root, &where, &where)) { - logical_rs.rs_start = rs_get_start(rs, rt); - logical_rs.rs_end = rs_get_end(rs, rt); + logical_rs.rs_start = zfs_rs_get_start(rs, rt); + logical_rs.rs_end = zfs_rs_get_end(rs, rt); vdev_xlate_walk(vd, &logical_rs, vdev_initialize_xlate_progress, vd); @@ -480,7 +481,7 @@ vdev_initialize_load(vdev_t *vd) } static void -vdev_initialize_xlate_range_add(void *arg, range_seg64_t *physical_rs) +vdev_initialize_xlate_range_add(void *arg, zfs_range_seg64_t *physical_rs) { vdev_t *vd = arg; @@ -503,7 +504,7 @@ vdev_initialize_xlate_range_add(void *arg, range_seg64_t *physical_rs) ASSERT3U(physical_rs->rs_end, >, physical_rs->rs_start); - range_tree_add(vd->vdev_initialize_tree, physical_rs->rs_start, + zfs_range_tree_add(vd->vdev_initialize_tree, physical_rs->rs_start, physical_rs->rs_end - physical_rs->rs_start); } @@ -515,7 +516,7 @@ static void vdev_initialize_range_add(void *arg, uint64_t start, uint64_t size) { vdev_t *vd = arg; - range_seg64_t logical_rs; + zfs_range_seg64_t logical_rs; logical_rs.rs_start = start; logical_rs.rs_end = start + size; @@ -539,8 +540,8 @@ vdev_initialize_thread(void *arg) abd_t *deadbeef = vdev_initialize_block_alloc(); - vd->vdev_initialize_tree = range_tree_create(NULL, RANGE_SEG64, NULL, - 0, 0); + vd->vdev_initialize_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, + NULL, 0, 0); for (uint64_t i = 0; !vd->vdev_detached && i < vd->vdev_top->vdev_ms_count; i++) { @@ -563,15 +564,15 @@ vdev_initialize_thread(void *arg) unload_when_done = B_TRUE; VERIFY0(metaslab_load(msp)); - range_tree_walk(msp->ms_allocatable, vdev_initialize_range_add, - vd); + zfs_range_tree_walk(msp->ms_allocatable, + vdev_initialize_range_add, vd); mutex_exit(&msp->ms_lock); error = vdev_initialize_ranges(vd, deadbeef); metaslab_enable(msp, B_TRUE, unload_when_done); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - range_tree_vacate(vd->vdev_initialize_tree, NULL, NULL); + zfs_range_tree_vacate(vd->vdev_initialize_tree, NULL, NULL); if (error != 0) break; } @@ -584,7 +585,7 @@ vdev_initialize_thread(void *arg) } mutex_exit(&vd->vdev_initialize_io_lock); - range_tree_destroy(vd->vdev_initialize_tree); + zfs_range_tree_destroy(vd->vdev_initialize_tree); vdev_initialize_block_free(deadbeef); vd->vdev_initialize_tree = NULL; diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c index 9d12bc2eb0a2..2c4e0c1c4848 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_label.c +++ b/sys/contrib/openzfs/module/zfs/vdev_label.c @@ -643,7 +643,8 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, * will be combined with adjacent allocated segments * as a single mapping. */ - for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { + for (int i = 0; i < ZFS_RANGE_TREE_HISTOGRAM_SIZE; + i++) { if (i + 1 < highbit64(vdev_removal_max_span) - 1) { to_alloc += diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c index 6103f780e6bc..59225e766ba1 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c +++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c @@ -2305,7 +2305,7 @@ vdev_raidz_io_verify(zio_t *zio, raidz_map_t *rm, raidz_row_t *rr, int col) { (void) rm; #ifdef ZFS_DEBUG - range_seg64_t logical_rs, physical_rs, remain_rs; + zfs_range_seg64_t logical_rs, physical_rs, remain_rs; logical_rs.rs_start = rr->rr_offset; logical_rs.rs_end = logical_rs.rs_start + vdev_raidz_asize(zio->io_vd, rr->rr_size, @@ -3650,8 +3650,8 @@ vdev_raidz_need_resilver(vdev_t *vd, const dva_t *dva, size_t psize, } static void -vdev_raidz_xlate(vdev_t *cvd, const range_seg64_t *logical_rs, - range_seg64_t *physical_rs, range_seg64_t *remain_rs) +vdev_raidz_xlate(vdev_t *cvd, const zfs_range_seg64_t *logical_rs, + zfs_range_seg64_t *physical_rs, zfs_range_seg64_t *remain_rs) { (void) remain_rs; @@ -3953,18 +3953,18 @@ vdev_raidz_expand_child_replacing(vdev_t *raidz_vd) } static boolean_t -raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt, +raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, zfs_range_tree_t *rt, dmu_tx_t *tx) { spa_t *spa = vd->vdev_spa; uint_t ashift = vd->vdev_top->vdev_ashift; - range_seg_t *rs = range_tree_first(rt); + zfs_range_seg_t *rs = zfs_range_tree_first(rt); if (rt == NULL) return (B_FALSE); - uint64_t offset = rs_get_start(rs, rt); + uint64_t offset = zfs_rs_get_start(rs, rt); ASSERT(IS_P2ALIGNED(offset, 1 << ashift)); - uint64_t size = rs_get_end(rs, rt) - offset; + uint64_t size = zfs_rs_get_end(rs, rt) - offset; ASSERT3U(size, >=, 1 << ashift); ASSERT(IS_P2ALIGNED(size, 1 << ashift)); @@ -4001,7 +4001,7 @@ raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt, uint_t blocks = MIN(size >> ashift, next_overwrite_blkid - blkid); size = (uint64_t)blocks << ashift; - range_tree_remove(rt, offset, size); + zfs_range_tree_remove(rt, offset, size); uint_t reads = MIN(blocks, old_children); uint_t writes = MIN(blocks, vd->vdev_children); @@ -4553,12 +4553,13 @@ spa_raidz_expand_thread(void *arg, zthr_t *zthr) * space (e.g. in ms_defer), and it's fine to copy that too. */ uint64_t shift, start; - range_seg_type_t type = metaslab_calculate_range_tree_type( + zfs_range_seg_type_t type = metaslab_calculate_range_tree_type( raidvd, msp, &start, &shift); - range_tree_t *rt = range_tree_create(NULL, type, NULL, + zfs_range_tree_t *rt = zfs_range_tree_create(NULL, type, NULL, start, shift); - range_tree_add(rt, msp->ms_start, msp->ms_size); - range_tree_walk(msp->ms_allocatable, range_tree_remove, rt); + zfs_range_tree_add(rt, msp->ms_start, msp->ms_size); + zfs_range_tree_walk(msp->ms_allocatable, zfs_range_tree_remove, + rt); mutex_exit(&msp->ms_lock); /* @@ -4572,8 +4573,8 @@ spa_raidz_expand_thread(void *arg, zthr_t *zthr) int sectorsz = 1 << raidvd->vdev_ashift; uint64_t ms_last_offset = msp->ms_start + msp->ms_size - sectorsz; - if (!range_tree_contains(rt, ms_last_offset, sectorsz)) { - range_tree_add(rt, ms_last_offset, sectorsz); + if (!zfs_range_tree_contains(rt, ms_last_offset, sectorsz)) { + zfs_range_tree_add(rt, ms_last_offset, sectorsz); } /* @@ -4582,12 +4583,12 @@ spa_raidz_expand_thread(void *arg, zthr_t *zthr) * discard any state that we have already processed. */ if (vre->vre_offset > msp->ms_start) { - range_tree_clear(rt, msp->ms_start, + zfs_range_tree_clear(rt, msp->ms_start, vre->vre_offset - msp->ms_start); } while (!zthr_iscancelled(zthr) && - !range_tree_is_empty(rt) && + !zfs_range_tree_is_empty(rt) && vre->vre_failed_offset == UINT64_MAX) { /* @@ -4649,8 +4650,8 @@ spa_raidz_expand_thread(void *arg, zthr_t *zthr) spa_config_exit(spa, SCL_CONFIG, FTAG); metaslab_enable(msp, B_FALSE, B_FALSE); - range_tree_vacate(rt, NULL, NULL); - range_tree_destroy(rt); + zfs_range_tree_vacate(rt, NULL, NULL); + zfs_range_tree_destroy(rt); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); raidvd = vdev_lookup_top(spa, vre->vre_vdev_id); diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c b/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c index e12b96170f55..340d32b61bf8 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c +++ b/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c @@ -81,7 +81,7 @@ static boolean_t raidz_math_initialized = B_FALSE; #define RAIDZ_IMPL_READ(i) (*(volatile uint32_t *) &(i)) -static uint32_t zfs_vdev_raidz_impl = IMPL_SCALAR; +uint32_t zfs_vdev_raidz_impl = IMPL_SCALAR; static uint32_t user_sel_impl = IMPL_FASTEST; /* Hold all supported implementations */ @@ -633,16 +633,10 @@ vdev_raidz_impl_set(const char *val) return (err); } -#if defined(_KERNEL) && defined(__linux__) - -static int -zfs_vdev_raidz_impl_set(const char *val, zfs_kernel_param_t *kp) -{ - return (vdev_raidz_impl_set(val)); -} +#if defined(_KERNEL) -static int -zfs_vdev_raidz_impl_get(char *buffer, zfs_kernel_param_t *kp) +int +vdev_raidz_impl_get(char *buffer, size_t size) { int i, cnt = 0; char *fmt; @@ -653,21 +647,18 @@ zfs_vdev_raidz_impl_get(char *buffer, zfs_kernel_param_t *kp) /* list mandatory options */ for (i = 0; i < ARRAY_SIZE(math_impl_opts) - 2; i++) { fmt = (impl == math_impl_opts[i].sel) ? "[%s] " : "%s "; - cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, + cnt += kmem_scnprintf(buffer + cnt, size - cnt, fmt, math_impl_opts[i].name); } /* list all supported implementations */ for (i = 0; i < raidz_supp_impl_cnt; i++) { fmt = (i == impl) ? "[%s] " : "%s "; - cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, + cnt += kmem_scnprintf(buffer + cnt, size - cnt, fmt, raidz_supp_impl[i]->name); } return (cnt); } -module_param_call(zfs_vdev_raidz_impl, zfs_vdev_raidz_impl_set, - zfs_vdev_raidz_impl_get, NULL, 0644); -MODULE_PARM_DESC(zfs_vdev_raidz_impl, "Select raidz implementation."); #endif diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c index f80ed1b401f9..7ca1b1f846b6 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c +++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c @@ -641,10 +641,10 @@ vdev_rebuild_ranges(vdev_rebuild_t *vr) zfs_btree_index_t idx; int error; - for (range_seg_t *rs = zfs_btree_first(t, &idx); rs != NULL; + for (zfs_range_seg_t *rs = zfs_btree_first(t, &idx); rs != NULL; rs = zfs_btree_next(t, &idx, &idx)) { - uint64_t start = rs_get_start(rs, vr->vr_scan_tree); - uint64_t size = rs_get_end(rs, vr->vr_scan_tree) - start; + uint64_t start = zfs_rs_get_start(rs, vr->vr_scan_tree); + uint64_t size = zfs_rs_get_end(rs, vr->vr_scan_tree) - start; /* * zfs_scan_suspend_progress can be set to disable rebuild @@ -786,7 +786,8 @@ vdev_rebuild_thread(void *arg) vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; vr->vr_top_vdev = vd; vr->vr_scan_msp = NULL; - vr->vr_scan_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); + vr->vr_scan_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, + 0, 0); mutex_init(&vr->vr_io_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&vr->vr_io_cv, NULL, CV_DEFAULT, NULL); @@ -833,7 +834,7 @@ vdev_rebuild_thread(void *arg) break; } - ASSERT0(range_tree_space(vr->vr_scan_tree)); + ASSERT0(zfs_range_tree_space(vr->vr_scan_tree)); /* Disable any new allocations to this metaslab */ spa_config_exit(spa, SCL_CONFIG, FTAG); @@ -848,7 +849,7 @@ vdev_rebuild_thread(void *arg) * on disk and therefore will be rebuilt. */ for (int j = 0; j < TXG_SIZE; j++) { - if (range_tree_space(msp->ms_allocating[j])) { + if (zfs_range_tree_space(msp->ms_allocating[j])) { mutex_exit(&msp->ms_lock); mutex_exit(&msp->ms_sync_lock); txg_wait_synced(dsl, 0); @@ -869,21 +870,21 @@ vdev_rebuild_thread(void *arg) vr->vr_scan_tree, SM_ALLOC)); for (int i = 0; i < TXG_SIZE; i++) { - ASSERT0(range_tree_space( + ASSERT0(zfs_range_tree_space( msp->ms_allocating[i])); } - range_tree_walk(msp->ms_unflushed_allocs, - range_tree_add, vr->vr_scan_tree); - range_tree_walk(msp->ms_unflushed_frees, - range_tree_remove, vr->vr_scan_tree); + zfs_range_tree_walk(msp->ms_unflushed_allocs, + zfs_range_tree_add, vr->vr_scan_tree); + zfs_range_tree_walk(msp->ms_unflushed_frees, + zfs_range_tree_remove, vr->vr_scan_tree); /* * Remove ranges which have already been rebuilt based * on the last offset. This can happen when restarting * a scan after exporting and re-importing the pool. */ - range_tree_clear(vr->vr_scan_tree, 0, + zfs_range_tree_clear(vr->vr_scan_tree, 0, vrp->vrp_last_offset); } @@ -904,7 +905,7 @@ vdev_rebuild_thread(void *arg) * Walk the allocated space map and issue the rebuild I/O. */ error = vdev_rebuild_ranges(vr); - range_tree_vacate(vr->vr_scan_tree, NULL, NULL); + zfs_range_tree_vacate(vr->vr_scan_tree, NULL, NULL); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); metaslab_enable(msp, B_FALSE, B_FALSE); @@ -913,7 +914,7 @@ vdev_rebuild_thread(void *arg) break; } - range_tree_destroy(vr->vr_scan_tree); + zfs_range_tree_destroy(vr->vr_scan_tree); spa_config_exit(spa, SCL_CONFIG, FTAG); /* Wait for any remaining rebuild I/O to complete */ diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c index 08c85a874803..1970c5425854 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_removal.c +++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c @@ -369,12 +369,13 @@ spa_vdev_removal_create(vdev_t *vd) spa_vdev_removal_t *svr = kmem_zalloc(sizeof (*svr), KM_SLEEP); mutex_init(&svr->svr_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&svr->svr_cv, NULL, CV_DEFAULT, NULL); - svr->svr_allocd_segs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); + svr->svr_allocd_segs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, + NULL, 0, 0); svr->svr_vdev_id = vd->vdev_id; for (int i = 0; i < TXG_SIZE; i++) { - svr->svr_frees[i] = range_tree_create(NULL, RANGE_SEG64, NULL, - 0, 0); + svr->svr_frees[i] = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, + NULL, 0, 0); list_create(&svr->svr_new_segments[i], sizeof (vdev_indirect_mapping_entry_t), offsetof(vdev_indirect_mapping_entry_t, vime_node)); @@ -389,11 +390,11 @@ spa_vdev_removal_destroy(spa_vdev_removal_t *svr) for (int i = 0; i < TXG_SIZE; i++) { ASSERT0(svr->svr_bytes_done[i]); ASSERT0(svr->svr_max_offset_to_sync[i]); - range_tree_destroy(svr->svr_frees[i]); + zfs_range_tree_destroy(svr->svr_frees[i]); list_destroy(&svr->svr_new_segments[i]); } - range_tree_destroy(svr->svr_allocd_segs); + zfs_range_tree_destroy(svr->svr_allocd_segs); mutex_destroy(&svr->svr_lock); cv_destroy(&svr->svr_cv); kmem_free(svr, sizeof (*svr)); @@ -475,11 +476,11 @@ vdev_remove_initiate_sync(void *arg, dmu_tx_t *tx) * be copied. */ spa->spa_removing_phys.sr_to_copy -= - range_tree_space(ms->ms_freeing); + zfs_range_tree_space(ms->ms_freeing); - ASSERT0(range_tree_space(ms->ms_freed)); + ASSERT0(zfs_range_tree_space(ms->ms_freed)); for (int t = 0; t < TXG_SIZE; t++) - ASSERT0(range_tree_space(ms->ms_allocating[t])); + ASSERT0(zfs_range_tree_space(ms->ms_allocating[t])); } /* @@ -770,7 +771,7 @@ free_from_removing_vdev(vdev_t *vd, uint64_t offset, uint64_t size) * completed the copy and synced the mapping (see * vdev_mapping_sync). */ - range_tree_add(svr->svr_frees[txgoff], + zfs_range_tree_add(svr->svr_frees[txgoff], offset, inflight_size); size -= inflight_size; offset += inflight_size; @@ -806,7 +807,8 @@ free_from_removing_vdev(vdev_t *vd, uint64_t offset, uint64_t size) uint64_t, size); if (svr->svr_allocd_segs != NULL) - range_tree_clear(svr->svr_allocd_segs, offset, size); + zfs_range_tree_clear(svr->svr_allocd_segs, offset, + size); /* * Since we now do not need to copy this data, for @@ -915,7 +917,7 @@ vdev_mapping_sync(void *arg, dmu_tx_t *tx) * mapping entries were in flight. */ mutex_enter(&svr->svr_lock); - range_tree_vacate(svr->svr_frees[txg & TXG_MASK], + zfs_range_tree_vacate(svr->svr_frees[txg & TXG_MASK], free_mapped_segment_cb, vd); ASSERT3U(svr->svr_max_offset_to_sync[txg & TXG_MASK], >=, vdev_indirect_mapping_max_offset(vim)); @@ -929,7 +931,7 @@ typedef struct vdev_copy_segment_arg { spa_t *vcsa_spa; dva_t *vcsa_dest_dva; uint64_t vcsa_txg; - range_tree_t *vcsa_obsolete_segs; + zfs_range_tree_t *vcsa_obsolete_segs; } vdev_copy_segment_arg_t; static void @@ -966,9 +968,9 @@ spa_vdev_copy_segment_done(zio_t *zio) { vdev_copy_segment_arg_t *vcsa = zio->io_private; - range_tree_vacate(vcsa->vcsa_obsolete_segs, + zfs_range_tree_vacate(vcsa->vcsa_obsolete_segs, unalloc_seg, vcsa); - range_tree_destroy(vcsa->vcsa_obsolete_segs); + zfs_range_tree_destroy(vcsa->vcsa_obsolete_segs); kmem_free(vcsa, sizeof (*vcsa)); spa_config_exit(zio->io_spa, SCL_STATE, zio->io_spa); @@ -1119,7 +1121,7 @@ spa_vdev_copy_one_child(vdev_copy_arg_t *vca, zio_t *nzio, * read from the old location and write to the new location. */ static int -spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, +spa_vdev_copy_segment(vdev_t *vd, zfs_range_tree_t *segs, uint64_t maxalloc, uint64_t txg, vdev_copy_arg_t *vca, zio_alloc_list_t *zal) { @@ -1128,28 +1130,28 @@ spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, spa_vdev_removal_t *svr = spa->spa_vdev_removal; vdev_indirect_mapping_entry_t *entry; dva_t dst = {{ 0 }}; - uint64_t start = range_tree_min(segs); + uint64_t start = zfs_range_tree_min(segs); ASSERT0(P2PHASE(start, 1 << spa->spa_min_ashift)); ASSERT3U(maxalloc, <=, SPA_MAXBLOCKSIZE); ASSERT0(P2PHASE(maxalloc, 1 << spa->spa_min_ashift)); - uint64_t size = range_tree_span(segs); - if (range_tree_span(segs) > maxalloc) { + uint64_t size = zfs_range_tree_span(segs); + if (zfs_range_tree_span(segs) > maxalloc) { /* * We can't allocate all the segments. Prefer to end * the allocation at the end of a segment, thus avoiding * additional split blocks. */ - range_seg_max_t search; + zfs_range_seg_max_t search; zfs_btree_index_t where; - rs_set_start(&search, segs, start + maxalloc); - rs_set_end(&search, segs, start + maxalloc); + zfs_rs_set_start(&search, segs, start + maxalloc); + zfs_rs_set_end(&search, segs, start + maxalloc); (void) zfs_btree_find(&segs->rt_root, &search, &where); - range_seg_t *rs = zfs_btree_prev(&segs->rt_root, &where, + zfs_range_seg_t *rs = zfs_btree_prev(&segs->rt_root, &where, &where); if (rs != NULL) { - size = rs_get_end(rs, segs) - start; + size = zfs_rs_get_end(rs, segs) - start; } else { /* * There are no segments that end before maxalloc. @@ -1182,27 +1184,27 @@ spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, * relative to the start of the range to be copied (i.e. relative to the * local variable "start"). */ - range_tree_t *obsolete_segs = range_tree_create(NULL, RANGE_SEG64, NULL, - 0, 0); + zfs_range_tree_t *obsolete_segs = zfs_range_tree_create(NULL, + ZFS_RANGE_SEG64, NULL, 0, 0); zfs_btree_index_t where; - range_seg_t *rs = zfs_btree_first(&segs->rt_root, &where); - ASSERT3U(rs_get_start(rs, segs), ==, start); - uint64_t prev_seg_end = rs_get_end(rs, segs); + zfs_range_seg_t *rs = zfs_btree_first(&segs->rt_root, &where); + ASSERT3U(zfs_rs_get_start(rs, segs), ==, start); + uint64_t prev_seg_end = zfs_rs_get_end(rs, segs); while ((rs = zfs_btree_next(&segs->rt_root, &where, &where)) != NULL) { - if (rs_get_start(rs, segs) >= start + size) { + if (zfs_rs_get_start(rs, segs) >= start + size) { break; } else { - range_tree_add(obsolete_segs, + zfs_range_tree_add(obsolete_segs, prev_seg_end - start, - rs_get_start(rs, segs) - prev_seg_end); + zfs_rs_get_start(rs, segs) - prev_seg_end); } - prev_seg_end = rs_get_end(rs, segs); + prev_seg_end = zfs_rs_get_end(rs, segs); } /* We don't end in the middle of an obsolete range */ ASSERT3U(start + size, <=, prev_seg_end); - range_tree_clear(segs, start, size); + zfs_range_tree_clear(segs, start, size); /* * We can't have any padding of the allocated size, otherwise we will @@ -1216,7 +1218,8 @@ spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, DVA_MAPPING_SET_SRC_OFFSET(&entry->vime_mapping, start); entry->vime_mapping.vimep_dst = dst; if (spa_feature_is_enabled(spa, SPA_FEATURE_OBSOLETE_COUNTS)) { - entry->vime_obsolete_count = range_tree_space(obsolete_segs); + entry->vime_obsolete_count = + zfs_range_tree_space(obsolete_segs); } vdev_copy_segment_arg_t *vcsa = kmem_zalloc(sizeof (*vcsa), KM_SLEEP); @@ -1455,30 +1458,31 @@ spa_vdev_copy_impl(vdev_t *vd, spa_vdev_removal_t *svr, vdev_copy_arg_t *vca, * allocated segments that we are copying. We may also be copying * free segments (of up to vdev_removal_max_span bytes). */ - range_tree_t *segs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); + zfs_range_tree_t *segs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, + NULL, 0, 0); for (;;) { - range_tree_t *rt = svr->svr_allocd_segs; - range_seg_t *rs = range_tree_first(rt); + zfs_range_tree_t *rt = svr->svr_allocd_segs; + zfs_range_seg_t *rs = zfs_range_tree_first(rt); if (rs == NULL) break; uint64_t seg_length; - if (range_tree_is_empty(segs)) { + if (zfs_range_tree_is_empty(segs)) { /* need to truncate the first seg based on max_alloc */ - seg_length = MIN(rs_get_end(rs, rt) - rs_get_start(rs, - rt), *max_alloc); + seg_length = MIN(zfs_rs_get_end(rs, rt) - + zfs_rs_get_start(rs, rt), *max_alloc); } else { - if (rs_get_start(rs, rt) - range_tree_max(segs) > - vdev_removal_max_span) { + if (zfs_rs_get_start(rs, rt) - zfs_range_tree_max(segs) + > vdev_removal_max_span) { /* * Including this segment would cause us to * copy a larger unneeded chunk than is allowed. */ break; - } else if (rs_get_end(rs, rt) - range_tree_min(segs) > - *max_alloc) { + } else if (zfs_rs_get_end(rs, rt) - + zfs_range_tree_min(segs) > *max_alloc) { /* * This additional segment would extend past * max_alloc. Rather than splitting this @@ -1486,19 +1490,19 @@ spa_vdev_copy_impl(vdev_t *vd, spa_vdev_removal_t *svr, vdev_copy_arg_t *vca, */ break; } else { - seg_length = rs_get_end(rs, rt) - - rs_get_start(rs, rt); + seg_length = zfs_rs_get_end(rs, rt) - + zfs_rs_get_start(rs, rt); } } - range_tree_add(segs, rs_get_start(rs, rt), seg_length); - range_tree_remove(svr->svr_allocd_segs, - rs_get_start(rs, rt), seg_length); + zfs_range_tree_add(segs, zfs_rs_get_start(rs, rt), seg_length); + zfs_range_tree_remove(svr->svr_allocd_segs, + zfs_rs_get_start(rs, rt), seg_length); } - if (range_tree_is_empty(segs)) { + if (zfs_range_tree_is_empty(segs)) { mutex_exit(&svr->svr_lock); - range_tree_destroy(segs); + zfs_range_tree_destroy(segs); return; } @@ -1507,20 +1511,20 @@ spa_vdev_copy_impl(vdev_t *vd, spa_vdev_removal_t *svr, vdev_copy_arg_t *vca, svr, tx); } - svr->svr_max_offset_to_sync[txg & TXG_MASK] = range_tree_max(segs); + svr->svr_max_offset_to_sync[txg & TXG_MASK] = zfs_range_tree_max(segs); /* * Note: this is the amount of *allocated* space * that we are taking care of each txg. */ - svr->svr_bytes_done[txg & TXG_MASK] += range_tree_space(segs); + svr->svr_bytes_done[txg & TXG_MASK] += zfs_range_tree_space(segs); mutex_exit(&svr->svr_lock); zio_alloc_list_t zal; metaslab_trace_init(&zal); uint64_t thismax = SPA_MAXBLOCKSIZE; - while (!range_tree_is_empty(segs)) { + while (!zfs_range_tree_is_empty(segs)) { int error = spa_vdev_copy_segment(vd, segs, thismax, txg, vca, &zal); @@ -1537,7 +1541,7 @@ spa_vdev_copy_impl(vdev_t *vd, spa_vdev_removal_t *svr, vdev_copy_arg_t *vca, ASSERT3U(spa->spa_max_ashift, >=, SPA_MINBLOCKSHIFT); ASSERT3U(spa->spa_max_ashift, ==, spa->spa_min_ashift); uint64_t attempted = - MIN(range_tree_span(segs), thismax); + MIN(zfs_range_tree_span(segs), thismax); thismax = P2ROUNDUP(attempted / 2, 1 << spa->spa_max_ashift); /* @@ -1557,7 +1561,7 @@ spa_vdev_copy_impl(vdev_t *vd, spa_vdev_removal_t *svr, vdev_copy_arg_t *vca, } } metaslab_trace_fini(&zal); - range_tree_destroy(segs); + zfs_range_tree_destroy(segs); } /* @@ -1628,7 +1632,7 @@ spa_vdev_remove_thread(void *arg) metaslab_t *msp = vd->vdev_ms[msi]; ASSERT3U(msi, <=, vd->vdev_ms_count); - ASSERT0(range_tree_space(svr->svr_allocd_segs)); + ASSERT0(zfs_range_tree_space(svr->svr_allocd_segs)); mutex_enter(&msp->ms_sync_lock); mutex_enter(&msp->ms_lock); @@ -1637,7 +1641,7 @@ spa_vdev_remove_thread(void *arg) * Assert nothing in flight -- ms_*tree is empty. */ for (int i = 0; i < TXG_SIZE; i++) { - ASSERT0(range_tree_space(msp->ms_allocating[i])); + ASSERT0(zfs_range_tree_space(msp->ms_allocating[i])); } /* @@ -1653,19 +1657,20 @@ spa_vdev_remove_thread(void *arg) VERIFY0(space_map_load(msp->ms_sm, svr->svr_allocd_segs, SM_ALLOC)); - range_tree_walk(msp->ms_unflushed_allocs, - range_tree_add, svr->svr_allocd_segs); - range_tree_walk(msp->ms_unflushed_frees, - range_tree_remove, svr->svr_allocd_segs); - range_tree_walk(msp->ms_freeing, - range_tree_remove, svr->svr_allocd_segs); + zfs_range_tree_walk(msp->ms_unflushed_allocs, + zfs_range_tree_add, svr->svr_allocd_segs); + zfs_range_tree_walk(msp->ms_unflushed_frees, + zfs_range_tree_remove, svr->svr_allocd_segs); + zfs_range_tree_walk(msp->ms_freeing, + zfs_range_tree_remove, svr->svr_allocd_segs); /* * When we are resuming from a paused removal (i.e. * when importing a pool with a removal in progress), * discard any state that we have already processed. */ - range_tree_clear(svr->svr_allocd_segs, 0, start_offset); + zfs_range_tree_clear(svr->svr_allocd_segs, 0, + start_offset); } mutex_exit(&msp->ms_lock); mutex_exit(&msp->ms_sync_lock); @@ -1677,7 +1682,7 @@ spa_vdev_remove_thread(void *arg) (u_longlong_t)msp->ms_id); while (!svr->svr_thread_exit && - !range_tree_is_empty(svr->svr_allocd_segs)) { + !zfs_range_tree_is_empty(svr->svr_allocd_segs)) { mutex_exit(&svr->svr_lock); @@ -1756,7 +1761,7 @@ spa_vdev_remove_thread(void *arg) if (svr->svr_thread_exit) { mutex_enter(&svr->svr_lock); - range_tree_vacate(svr->svr_allocd_segs, NULL, NULL); + zfs_range_tree_vacate(svr->svr_allocd_segs, NULL, NULL); svr->svr_thread = NULL; cv_broadcast(&svr->svr_cv); mutex_exit(&svr->svr_lock); @@ -1776,7 +1781,7 @@ spa_vdev_remove_thread(void *arg) spa_vdev_remove_cancel_impl(spa); } } else { - ASSERT0(range_tree_space(svr->svr_allocd_segs)); + ASSERT0(zfs_range_tree_space(svr->svr_allocd_segs)); vdev_remove_complete(spa); } @@ -1885,7 +1890,7 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx) if (msp->ms_start >= vdev_indirect_mapping_max_offset(vim)) break; - ASSERT0(range_tree_space(svr->svr_allocd_segs)); + ASSERT0(zfs_range_tree_space(svr->svr_allocd_segs)); mutex_enter(&msp->ms_lock); @@ -1893,22 +1898,22 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx) * Assert nothing in flight -- ms_*tree is empty. */ for (int i = 0; i < TXG_SIZE; i++) - ASSERT0(range_tree_space(msp->ms_allocating[i])); + ASSERT0(zfs_range_tree_space(msp->ms_allocating[i])); for (int i = 0; i < TXG_DEFER_SIZE; i++) - ASSERT0(range_tree_space(msp->ms_defer[i])); - ASSERT0(range_tree_space(msp->ms_freed)); + ASSERT0(zfs_range_tree_space(msp->ms_defer[i])); + ASSERT0(zfs_range_tree_space(msp->ms_freed)); if (msp->ms_sm != NULL) { mutex_enter(&svr->svr_lock); VERIFY0(space_map_load(msp->ms_sm, svr->svr_allocd_segs, SM_ALLOC)); - range_tree_walk(msp->ms_unflushed_allocs, - range_tree_add, svr->svr_allocd_segs); - range_tree_walk(msp->ms_unflushed_frees, - range_tree_remove, svr->svr_allocd_segs); - range_tree_walk(msp->ms_freeing, - range_tree_remove, svr->svr_allocd_segs); + zfs_range_tree_walk(msp->ms_unflushed_allocs, + zfs_range_tree_add, svr->svr_allocd_segs); + zfs_range_tree_walk(msp->ms_unflushed_frees, + zfs_range_tree_remove, svr->svr_allocd_segs); + zfs_range_tree_walk(msp->ms_freeing, + zfs_range_tree_remove, svr->svr_allocd_segs); /* * Clear everything past what has been synced, @@ -1918,7 +1923,7 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx) uint64_t sm_end = msp->ms_sm->sm_start + msp->ms_sm->sm_size; if (sm_end > syncd) - range_tree_clear(svr->svr_allocd_segs, + zfs_range_tree_clear(svr->svr_allocd_segs, syncd, sm_end - syncd); mutex_exit(&svr->svr_lock); @@ -1926,7 +1931,7 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx) mutex_exit(&msp->ms_lock); mutex_enter(&svr->svr_lock); - range_tree_vacate(svr->svr_allocd_segs, + zfs_range_tree_vacate(svr->svr_allocd_segs, free_mapped_segment_cb, vd); mutex_exit(&svr->svr_lock); } @@ -1935,7 +1940,7 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx) * Note: this must happen after we invoke free_mapped_segment_cb, * because it adds to the obsolete_segments. */ - range_tree_vacate(vd->vdev_obsolete_segments, NULL, NULL); + zfs_range_tree_vacate(vd->vdev_obsolete_segments, NULL, NULL); ASSERT3U(vic->vic_mapping_object, ==, vdev_indirect_mapping_object(vd->vdev_indirect_mapping)); diff --git a/sys/contrib/openzfs/module/zfs/vdev_trim.c b/sys/contrib/openzfs/module/zfs/vdev_trim.c index 9cf10332e8bf..1ca0b23c0ee4 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_trim.c +++ b/sys/contrib/openzfs/module/zfs/vdev_trim.c @@ -149,7 +149,7 @@ typedef struct trim_args { */ vdev_t *trim_vdev; /* Leaf vdev to TRIM */ metaslab_t *trim_msp; /* Disabled metaslab */ - range_tree_t *trim_tree; /* TRIM ranges (in metaslab) */ + zfs_range_tree_t *trim_tree; /* TRIM ranges (in metaslab) */ trim_type_t trim_type; /* Manual or auto TRIM */ uint64_t trim_extent_bytes_max; /* Maximum TRIM I/O size */ uint64_t trim_extent_bytes_min; /* Minimum TRIM I/O size */ @@ -601,10 +601,10 @@ vdev_trim_ranges(trim_args_t *ta) ta->trim_start_time = gethrtime(); ta->trim_bytes_done = 0; - for (range_seg_t *rs = zfs_btree_first(t, &idx); rs != NULL; + for (zfs_range_seg_t *rs = zfs_btree_first(t, &idx); rs != NULL; rs = zfs_btree_next(t, &idx, &idx)) { - uint64_t size = rs_get_end(rs, ta->trim_tree) - rs_get_start(rs, - ta->trim_tree); + uint64_t size = zfs_rs_get_end(rs, ta->trim_tree) - + zfs_rs_get_start(rs, ta->trim_tree); if (extent_bytes_min && size < extent_bytes_min) { spa_iostats_trim_add(spa, ta->trim_type, @@ -617,7 +617,7 @@ vdev_trim_ranges(trim_args_t *ta) for (uint64_t w = 0; w < writes_required; w++) { error = vdev_trim_range(ta, VDEV_LABEL_START_SIZE + - rs_get_start(rs, ta->trim_tree) + + zfs_rs_get_start(rs, ta->trim_tree) + (w *extent_bytes_max), MIN(size - (w * extent_bytes_max), extent_bytes_max)); if (error != 0) { @@ -645,7 +645,7 @@ done: } static void -vdev_trim_xlate_last_rs_end(void *arg, range_seg64_t *physical_rs) +vdev_trim_xlate_last_rs_end(void *arg, zfs_range_seg64_t *physical_rs) { uint64_t *last_rs_end = (uint64_t *)arg; @@ -654,7 +654,7 @@ vdev_trim_xlate_last_rs_end(void *arg, range_seg64_t *physical_rs) } static void -vdev_trim_xlate_progress(void *arg, range_seg64_t *physical_rs) +vdev_trim_xlate_progress(void *arg, zfs_range_seg64_t *physical_rs) { vdev_t *vd = (vdev_t *)arg; @@ -696,7 +696,7 @@ vdev_trim_calculate_progress(vdev_t *vd) * on our vdev. We use this to determine if we are * in the middle of this metaslab range. */ - range_seg64_t logical_rs, physical_rs, remain_rs; + zfs_range_seg64_t logical_rs, physical_rs, remain_rs; logical_rs.rs_start = msp->ms_start; logical_rs.rs_end = msp->ms_start + msp->ms_size; @@ -729,13 +729,13 @@ vdev_trim_calculate_progress(vdev_t *vd) */ VERIFY0(metaslab_load(msp)); - range_tree_t *rt = msp->ms_allocatable; + zfs_range_tree_t *rt = msp->ms_allocatable; zfs_btree_t *bt = &rt->rt_root; zfs_btree_index_t idx; - for (range_seg_t *rs = zfs_btree_first(bt, &idx); + for (zfs_range_seg_t *rs = zfs_btree_first(bt, &idx); rs != NULL; rs = zfs_btree_next(bt, &idx, &idx)) { - logical_rs.rs_start = rs_get_start(rs, rt); - logical_rs.rs_end = rs_get_end(rs, rt); + logical_rs.rs_start = zfs_rs_get_start(rs, rt); + logical_rs.rs_end = zfs_rs_get_end(rs, rt); vdev_xlate_walk(vd, &logical_rs, vdev_trim_xlate_progress, vd); @@ -807,7 +807,7 @@ vdev_trim_load(vdev_t *vd) } static void -vdev_trim_xlate_range_add(void *arg, range_seg64_t *physical_rs) +vdev_trim_xlate_range_add(void *arg, zfs_range_seg64_t *physical_rs) { trim_args_t *ta = arg; vdev_t *vd = ta->trim_vdev; @@ -832,7 +832,7 @@ vdev_trim_xlate_range_add(void *arg, range_seg64_t *physical_rs) ASSERT3U(physical_rs->rs_end, >, physical_rs->rs_start); - range_tree_add(ta->trim_tree, physical_rs->rs_start, + zfs_range_tree_add(ta->trim_tree, physical_rs->rs_start, physical_rs->rs_end - physical_rs->rs_start); } @@ -845,7 +845,7 @@ vdev_trim_range_add(void *arg, uint64_t start, uint64_t size) { trim_args_t *ta = arg; vdev_t *vd = ta->trim_vdev; - range_seg64_t logical_rs; + zfs_range_seg64_t logical_rs; logical_rs.rs_start = start; logical_rs.rs_end = start + size; @@ -858,7 +858,8 @@ vdev_trim_range_add(void *arg, uint64_t start, uint64_t size) metaslab_t *msp = ta->trim_msp; VERIFY0(metaslab_load(msp)); VERIFY3B(msp->ms_loaded, ==, B_TRUE); - VERIFY(range_tree_contains(msp->ms_allocatable, start, size)); + VERIFY(zfs_range_tree_contains(msp->ms_allocatable, start, + size)); } ASSERT(vd->vdev_ops->vdev_op_leaf); @@ -900,7 +901,7 @@ vdev_trim_thread(void *arg) ta.trim_vdev = vd; ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max; ta.trim_extent_bytes_min = zfs_trim_extent_bytes_min; - ta.trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); + ta.trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0); ta.trim_type = TRIM_TYPE_MANUAL; ta.trim_flags = 0; @@ -946,22 +947,23 @@ vdev_trim_thread(void *arg) } ta.trim_msp = msp; - range_tree_walk(msp->ms_allocatable, vdev_trim_range_add, &ta); - range_tree_vacate(msp->ms_trim, NULL, NULL); + zfs_range_tree_walk(msp->ms_allocatable, vdev_trim_range_add, + &ta); + zfs_range_tree_vacate(msp->ms_trim, NULL, NULL); mutex_exit(&msp->ms_lock); error = vdev_trim_ranges(&ta); metaslab_enable(msp, B_TRUE, B_FALSE); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - range_tree_vacate(ta.trim_tree, NULL, NULL); + zfs_range_tree_vacate(ta.trim_tree, NULL, NULL); if (error != 0) break; } spa_config_exit(spa, SCL_CONFIG, FTAG); - range_tree_destroy(ta.trim_tree); + zfs_range_tree_destroy(ta.trim_tree); mutex_enter(&vd->vdev_trim_lock); if (!vd->vdev_trim_exit_wanted) { @@ -1204,7 +1206,7 @@ vdev_trim_range_verify(void *arg, uint64_t start, uint64_t size) VERIFY3B(msp->ms_loaded, ==, B_TRUE); VERIFY3U(msp->ms_disabled, >, 0); - VERIFY(range_tree_contains(msp->ms_allocatable, start, size)); + VERIFY(zfs_range_tree_contains(msp->ms_allocatable, start, size)); } /* @@ -1261,7 +1263,7 @@ vdev_autotrim_thread(void *arg) for (uint64_t i = shift % txgs_per_trim; i < vd->vdev_ms_count; i += txgs_per_trim) { metaslab_t *msp = vd->vdev_ms[i]; - range_tree_t *trim_tree; + zfs_range_tree_t *trim_tree; boolean_t issued_trim = B_FALSE; boolean_t wait_aborted = B_FALSE; @@ -1276,7 +1278,7 @@ vdev_autotrim_thread(void *arg) * or when there are no recent frees to trim. */ if (msp->ms_sm == NULL || - range_tree_is_empty(msp->ms_trim)) { + zfs_range_tree_is_empty(msp->ms_trim)) { mutex_exit(&msp->ms_lock); metaslab_enable(msp, B_FALSE, B_FALSE); continue; @@ -1302,10 +1304,10 @@ vdev_autotrim_thread(void *arg) * Allocate an empty range tree which is swapped in * for the existing ms_trim tree while it is processed. */ - trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, - 0, 0); - range_tree_swap(&msp->ms_trim, &trim_tree); - ASSERT(range_tree_is_empty(msp->ms_trim)); + trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, + NULL, 0, 0); + zfs_range_tree_swap(&msp->ms_trim, &trim_tree); + ASSERT(zfs_range_tree_is_empty(msp->ms_trim)); /* * There are two cases when constructing the per-vdev @@ -1357,9 +1359,9 @@ vdev_autotrim_thread(void *arg) if (!cvd->vdev_ops->vdev_op_leaf) continue; - ta->trim_tree = range_tree_create(NULL, - RANGE_SEG64, NULL, 0, 0); - range_tree_walk(trim_tree, + ta->trim_tree = zfs_range_tree_create(NULL, + ZFS_RANGE_SEG64, NULL, 0, 0); + zfs_range_tree_walk(trim_tree, vdev_trim_range_add, ta); } @@ -1406,13 +1408,13 @@ vdev_autotrim_thread(void *arg) mutex_enter(&msp->ms_lock); VERIFY0(metaslab_load(msp)); VERIFY3P(tap[0].trim_msp, ==, msp); - range_tree_walk(trim_tree, + zfs_range_tree_walk(trim_tree, vdev_trim_range_verify, &tap[0]); mutex_exit(&msp->ms_lock); } - range_tree_vacate(trim_tree, NULL, NULL); - range_tree_destroy(trim_tree); + zfs_range_tree_vacate(trim_tree, NULL, NULL); + zfs_range_tree_destroy(trim_tree); /* * Wait for couples of kicks, to ensure the trim io is @@ -1434,8 +1436,9 @@ vdev_autotrim_thread(void *arg) if (ta->trim_tree == NULL) continue; - range_tree_vacate(ta->trim_tree, NULL, NULL); - range_tree_destroy(ta->trim_tree); + zfs_range_tree_vacate(ta->trim_tree, NULL, + NULL); + zfs_range_tree_destroy(ta->trim_tree); } kmem_free(tap, sizeof (trim_args_t) * children); @@ -1474,7 +1477,7 @@ vdev_autotrim_thread(void *arg) metaslab_t *msp = vd->vdev_ms[i]; mutex_enter(&msp->ms_lock); - range_tree_vacate(msp->ms_trim, NULL, NULL); + zfs_range_tree_vacate(msp->ms_trim, NULL, NULL); mutex_exit(&msp->ms_lock); } } @@ -1585,7 +1588,7 @@ vdev_trim_l2arc_thread(void *arg) spa_t *spa = vd->vdev_spa; l2arc_dev_t *dev = l2arc_vdev_get(vd); trim_args_t ta = {0}; - range_seg64_t physical_rs; + zfs_range_seg64_t physical_rs; ASSERT(vdev_is_concrete(vd)); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); @@ -1596,7 +1599,7 @@ vdev_trim_l2arc_thread(void *arg) vd->vdev_trim_secure = 0; ta.trim_vdev = vd; - ta.trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); + ta.trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0); ta.trim_type = TRIM_TYPE_MANUAL; ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max; ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE; @@ -1606,7 +1609,7 @@ vdev_trim_l2arc_thread(void *arg) physical_rs.rs_end = vd->vdev_trim_bytes_est = vdev_get_min_asize(vd); - range_tree_add(ta.trim_tree, physical_rs.rs_start, + zfs_range_tree_add(ta.trim_tree, physical_rs.rs_start, physical_rs.rs_end - physical_rs.rs_start); mutex_enter(&vd->vdev_trim_lock); @@ -1622,8 +1625,8 @@ vdev_trim_l2arc_thread(void *arg) } mutex_exit(&vd->vdev_trim_io_lock); - range_tree_vacate(ta.trim_tree, NULL, NULL); - range_tree_destroy(ta.trim_tree); + zfs_range_tree_vacate(ta.trim_tree, NULL, NULL); + zfs_range_tree_destroy(ta.trim_tree); mutex_enter(&vd->vdev_trim_lock); if (!vd->vdev_trim_exit_wanted && vdev_writeable(vd)) { @@ -1719,7 +1722,7 @@ int vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size) { trim_args_t ta = {0}; - range_seg64_t physical_rs; + zfs_range_seg64_t physical_rs; int error; physical_rs.rs_start = start; physical_rs.rs_end = start + size; @@ -1731,7 +1734,7 @@ vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size) ASSERT(!vd->vdev_top->vdev_rz_expanding); ta.trim_vdev = vd; - ta.trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); + ta.trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0); ta.trim_type = TRIM_TYPE_SIMPLE; ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max; ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE; @@ -1740,7 +1743,7 @@ vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size) ASSERT3U(physical_rs.rs_end, >=, physical_rs.rs_start); if (physical_rs.rs_end > physical_rs.rs_start) { - range_tree_add(ta.trim_tree, physical_rs.rs_start, + zfs_range_tree_add(ta.trim_tree, physical_rs.rs_start, physical_rs.rs_end - physical_rs.rs_start); } else { ASSERT3U(physical_rs.rs_end, ==, physical_rs.rs_start); @@ -1754,8 +1757,8 @@ vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size) } mutex_exit(&vd->vdev_trim_io_lock); - range_tree_vacate(ta.trim_tree, NULL, NULL); - range_tree_destroy(ta.trim_tree); + zfs_range_tree_vacate(ta.trim_tree, NULL, NULL); + zfs_range_tree_destroy(ta.trim_tree); return (error); } diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index bd6752f00ac5..b071ac17ed1f 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -23,7 +23,7 @@ * Copyright (c) 2011, 2022 by Delphix. All rights reserved. * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2017, Intel Corporation. - * Copyright (c) 2019, 2023, 2024, Klara Inc. + * Copyright (c) 2019, 2023, 2024, 2025, Klara, Inc. * Copyright (c) 2019, Allan Jude * Copyright (c) 2021, Datto, Inc. * Copyright (c) 2021, 2024 by George Melikov. All rights reserved. @@ -145,10 +145,53 @@ static const int zio_buf_debug_limit = 16384; static const int zio_buf_debug_limit = 0; #endif +typedef struct zio_stats { + kstat_named_t ziostat_total_allocations; + kstat_named_t ziostat_alloc_class_fallbacks; + kstat_named_t ziostat_gang_writes; + kstat_named_t ziostat_gang_multilevel; +} zio_stats_t; + +static zio_stats_t zio_stats = { + { "total_allocations", KSTAT_DATA_UINT64 }, + { "alloc_class_fallbacks", KSTAT_DATA_UINT64 }, + { "gang_writes", KSTAT_DATA_UINT64 }, + { "gang_multilevel", KSTAT_DATA_UINT64 }, +}; + +struct { + wmsum_t ziostat_total_allocations; + wmsum_t ziostat_alloc_class_fallbacks; + wmsum_t ziostat_gang_writes; + wmsum_t ziostat_gang_multilevel; +} ziostat_sums; + +#define ZIOSTAT_BUMP(stat) wmsum_add(&ziostat_sums.stat, 1); + +static kstat_t *zio_ksp; + static inline void __zio_execute(zio_t *zio); static void zio_taskq_dispatch(zio_t *, zio_taskq_type_t, boolean_t); +static int +zio_kstats_update(kstat_t *ksp, int rw) +{ + zio_stats_t *zs = ksp->ks_data; + if (rw == KSTAT_WRITE) + return (EACCES); + + zs->ziostat_total_allocations.value.ui64 = + wmsum_value(&ziostat_sums.ziostat_total_allocations); + zs->ziostat_alloc_class_fallbacks.value.ui64 = + wmsum_value(&ziostat_sums.ziostat_alloc_class_fallbacks); + zs->ziostat_gang_writes.value.ui64 = + wmsum_value(&ziostat_sums.ziostat_gang_writes); + zs->ziostat_gang_multilevel.value.ui64 = + wmsum_value(&ziostat_sums.ziostat_gang_multilevel); + return (0); +} + void zio_init(void) { @@ -159,6 +202,19 @@ zio_init(void) zio_link_cache = kmem_cache_create("zio_link_cache", sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + wmsum_init(&ziostat_sums.ziostat_total_allocations, 0); + wmsum_init(&ziostat_sums.ziostat_alloc_class_fallbacks, 0); + wmsum_init(&ziostat_sums.ziostat_gang_writes, 0); + wmsum_init(&ziostat_sums.ziostat_gang_multilevel, 0); + zio_ksp = kstat_create("zfs", 0, "zio_stats", + "misc", KSTAT_TYPE_NAMED, sizeof (zio_stats) / + sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); + if (zio_ksp != NULL) { + zio_ksp->ks_data = &zio_stats; + zio_ksp->ks_update = zio_kstats_update; + kstat_install(zio_ksp); + } + for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) { size_t size = (c + 1) << SPA_MINBLOCKSHIFT; size_t align, cflags, data_cflags; @@ -286,6 +342,16 @@ zio_fini(void) VERIFY3P(zio_data_buf_cache[i], ==, NULL); } + if (zio_ksp != NULL) { + kstat_delete(zio_ksp); + zio_ksp = NULL; + } + + wmsum_fini(&ziostat_sums.ziostat_total_allocations); + wmsum_fini(&ziostat_sums.ziostat_alloc_class_fallbacks); + wmsum_fini(&ziostat_sums.ziostat_gang_writes); + wmsum_fini(&ziostat_sums.ziostat_gang_multilevel); + kmem_cache_destroy(zio_link_cache); kmem_cache_destroy(zio_cache); @@ -2537,13 +2603,29 @@ zio_reexecute(void *arg) pio->io_state[ZIO_WAIT_READY] = (pio->io_stage >= ZIO_STAGE_READY) || (pio->io_pipeline & ZIO_STAGE_READY) == 0; pio->io_state[ZIO_WAIT_DONE] = (pio->io_stage >= ZIO_STAGE_DONE); + + /* + * It's possible for a failed ZIO to be a descendant of more than one + * ZIO tree. When reexecuting it, we have to be sure to add its wait + * states to all parent wait counts. + * + * Those parents, in turn, may have other children that are currently + * active, usually because they've already been reexecuted after + * resuming. Those children may be executing and may call + * zio_notify_parent() at the same time as we're updating our parent's + * counts. To avoid races while updating the counts, we take + * gio->io_lock before each update. + */ zio_link_t *zl = NULL; while ((gio = zio_walk_parents(pio, &zl)) != NULL) { + mutex_enter(&gio->io_lock); for (int w = 0; w < ZIO_WAIT_TYPES; w++) { gio->io_children[pio->io_child_type][w] += !pio->io_state[w]; } + mutex_exit(&gio->io_lock); } + for (int c = 0; c < ZIO_CHILD_TYPES; c++) pio->io_child_error[c] = 0; @@ -4037,6 +4119,7 @@ zio_dva_allocate(zio_t *zio) mc = spa_preferred_class(spa, zio); zio->io_metaslab_class = mc; } + ZIOSTAT_BUMP(ziostat_total_allocations); /* * Try allocating the block in the usual metaslab class. @@ -4102,6 +4185,7 @@ zio_dva_allocate(zio_t *zio) error); } + ZIOSTAT_BUMP(ziostat_alloc_class_fallbacks); error = metaslab_alloc(spa, mc, zio->io_size, bp, zio->io_prop.zp_copies, zio->io_txg, NULL, flags, &zio->io_alloc_list, zio, zio->io_allocator); @@ -4114,6 +4198,9 @@ zio_dva_allocate(zio_t *zio) spa_name(spa), zio, (u_longlong_t)zio->io_size, error); } + ZIOSTAT_BUMP(ziostat_gang_writes); + if (flags & METASLAB_GANG_CHILD) + ZIOSTAT_BUMP(ziostat_gang_multilevel); return (zio_write_gang_block(zio, mc)); } if (error != 0) { @@ -4205,6 +4292,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, int flags = METASLAB_ZIL; int allocator = (uint_t)cityhash1(os->os_dsl_dataset->ds_object) % spa->spa_alloc_count; + ZIOSTAT_BUMP(ziostat_total_allocations); error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, txg, NULL, flags, &io_alloc_list, NULL, allocator); *slog = (error == 0); @@ -4214,6 +4302,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, &io_alloc_list, NULL, allocator); } if (error != 0) { + ZIOSTAT_BUMP(ziostat_alloc_class_fallbacks); error = metaslab_alloc(spa, spa_normal_class(spa), size, new_bp, 1, txg, NULL, flags, &io_alloc_list, NULL, allocator); @@ -4406,16 +4495,6 @@ zio_vdev_io_start(zio_t *zio) zio->io_type == ZIO_TYPE_WRITE || zio->io_type == ZIO_TYPE_TRIM)) { - if (zio_handle_device_injection(vd, zio, ENOSYS) != 0) { - /* - * "no-op" injections return success, but do no actual - * work. Just skip the remaining vdev stages. - */ - zio_vdev_io_bypass(zio); - zio_interrupt(zio); - return (NULL); - } - if ((zio = vdev_queue_io(zio)) == NULL) return (NULL); @@ -4425,6 +4504,15 @@ zio_vdev_io_start(zio_t *zio) return (NULL); } zio->io_delay = gethrtime(); + + if (zio_handle_device_injection(vd, zio, ENOSYS) != 0) { + /* + * "no-op" injections return success, but do no actual + * work. Just return it. + */ + zio_delay_interrupt(zio); + return (NULL); + } } vd->vdev_ops->vdev_op_io_start(zio); diff --git a/sys/contrib/openzfs/tests/runfiles/freebsd.run b/sys/contrib/openzfs/tests/runfiles/freebsd.run index e1ae0c6b7721..943c8eab2715 100644 --- a/sys/contrib/openzfs/tests/runfiles/freebsd.run +++ b/sys/contrib/openzfs/tests/runfiles/freebsd.run @@ -27,8 +27,8 @@ tests = ['zfs_jail_001_pos'] tags = ['functional', 'cli_root', 'zfs_jail'] [tests/functional/pam:FreeBSD] -tests = ['pam_basic', 'pam_change_unmounted', 'pam_nounmount', 'pam_recursive', - 'pam_short_password'] +tests = ['pam_basic', 'pam_change_unmounted', 'pam_mount_recursively', + 'pam_nounmount', 'pam_recursive', 'pam_short_password'] tags = ['functional', 'pam'] [tests/functional/direct:FreeBSD] diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run index e55ec583d2cc..275772f2820e 100644 --- a/sys/contrib/openzfs/tests/runfiles/linux.run +++ b/sys/contrib/openzfs/tests/runfiles/linux.run @@ -103,7 +103,7 @@ tests = ['devices_001_pos', 'devices_002_neg', 'devices_003_pos'] tags = ['functional', 'devices'] [tests/functional/direct:Linux] -tests = ['dio_write_verify'] +tests = ['dio_loopback_dev', 'dio_write_verify'] tags = ['functional', 'direct'] [tests/functional/events:Linux] @@ -169,8 +169,8 @@ tests = ['umount_unlinked_drain'] tags = ['functional', 'mount'] [tests/functional/pam:Linux] -tests = ['pam_basic', 'pam_change_unmounted', 'pam_nounmount', 'pam_recursive', - 'pam_short_password'] +tests = ['pam_basic', 'pam_change_unmounted', 'pam_mount_recursively', + 'pam_nounmount', 'pam_recursive', 'pam_short_password'] tags = ['functional', 'pam'] [tests/functional/procfs:Linux] diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am index d0eb4c30db48..dcefb26a4036 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am @@ -1476,6 +1476,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/direct/dio_dedup.ksh \ functional/direct/dio_encryption.ksh \ functional/direct/dio_grow_block.ksh \ + functional/direct/dio_loopback_dev.ksh \ functional/direct/dio_max_recordsize.ksh \ functional/direct/dio_mixed.ksh \ functional/direct/dio_mmap.ksh \ @@ -1695,6 +1696,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/pam/cleanup.ksh \ functional/pam/pam_basic.ksh \ functional/pam/pam_change_unmounted.ksh \ + functional/pam/pam_mount_recursively.ksh \ functional/pam/pam_nounmount.ksh \ functional/pam/pam_recursive.ksh \ functional/pam/pam_short_password.ksh \ diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_loopback_dev.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_loopback_dev.ksh new file mode 100755 index 000000000000..7186eba5aafc --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_loopback_dev.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025 by Triad National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify Direct I/O reads work with loopback devices using direct=always. +# +# STRATEGY: +# 1. Create raidz zpool. +# 2. Create dataset with the direct dataset property set to always. +# 3. Create an empty file in dataset and setup loop device on it. +# 4. Read from loopback device. +# + +verify_runnable "global" + +function cleanup +{ + if [[ -n $lofidev ]]; then + losetup -d $lofidev + fi + dio_cleanup +} + +log_assert "Verify loopback devices with Direct I/O." + +if ! is_linux; then + log_unsupported "This is just a check for Linux Direct I/O" +fi + +log_onexit cleanup + +# Create zpool +log_must truncate -s $MINVDEVSIZE $DIO_VDEVS +log_must create_pool $TESTPOOL1 "raidz" $DIO_VDEVS + +# Creating dataset with direct=always +log_must eval "zfs create -o direct=always $TESTPOOL1/$TESTFS1" +mntpt=$(get_prop mountpoint $TESTPOOL1/$TESTFS1) + +# Getting a loopback device +lofidev=$(losetup -f) + +# Create loopback device +log_must truncate -s 1M "$mntpt/temp_file" +log_must losetup $lofidev "$mntpt/temp_file" + +# Read from looback device to make sure Direct I/O works with loopback device +log_must dd if=$lofidev of=/dev/null count=1 bs=4k + +log_pass "Verified loopback devices for Direct I/O." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh index bfb98cd30707..5bb6e518edb0 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh @@ -26,5 +26,6 @@ rmconfig destroy_pool $TESTPOOL del_user ${username} del_user ${username}rec +del_user ${username}mrec del_group pamtestgroup log_must rm -rf "$runstatedir" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_mount_recursively.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_mount_recursively.ksh new file mode 100755 index 000000000000..93683da7d7db --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_mount_recursively.ksh @@ -0,0 +1,90 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/tests/functional/pam/utilities.kshlib + +if [ -n "$ASAN_OPTIONS" ]; then + export LD_PRELOAD=$(ldd "$(command -v zfs)" | awk '/libasan\.so/ {print $3}') +fi + +username="${username}mrec" + +# Set up a deeper hierarchy, a mountpoint that doesn't interfere with other tests, +# and a user which references that mountpoint +log_must zfs create "$TESTPOOL/mrec" +log_must zfs create -o mountpoint="$TESTDIR/mrec" "$TESTPOOL/mrec/pam" +echo "recurpass" | zfs create -o encryption=aes-256-gcm -o keyformat=passphrase \ + -o keylocation=prompt "$TESTPOOL/mrec/pam/${username}" +log_must zfs create "$TESTPOOL/mrec/pam/${username}/deep" +log_must zfs create "$TESTPOOL/mrec/pam/${username}/deep/deeper" +log_must zfs create -o mountpoint=none "$TESTPOOL/mrec/pam/${username}/deep/none" +log_must zfs create -o canmount=noauto "$TESTPOOL/mrec/pam/${username}/deep/noauto" +log_must zfs create -o canmount=off "$TESTPOOL/mrec/pam/${username}/deep/off" +log_must zfs unmount "$TESTPOOL/mrec/pam/${username}" +log_must zfs unload-key "$TESTPOOL/mrec/pam/${username}" +log_must add_user pamtestgroup ${username} "$TESTDIR/mrec" + +function keystatus { + log_must [ "$(get_prop keystatus "$TESTPOOL/mrec/pam/${username}")" = "$1" ] +} + +log_mustnot ismounted "$TESTPOOL/mrec/pam/${username}" +keystatus unavailable + +function test_session { + echo "recurpass" | pamtester ${pamservice} ${username} open_session + references 1 + log_must ismounted "$TESTPOOL/mrec/pam/${username}" + log_must ismounted "$TESTPOOL/mrec/pam/${username}/deep" + log_must ismounted "$TESTPOOL/mrec/pam/${username}/deep/deeper" + log_mustnot ismounted "$TESTPOOL/mrec/pam/${username}/deep/none" + log_mustnot ismounted "$TESTPOOL/mrec/pam/${username}/deep/noauto" + log_mustnot ismounted "$TESTPOOL/mrec/pam/${username}/deep/off" + keystatus available + + log_must pamtester ${pamservice} ${username} close_session + references 0 + log_mustnot ismounted "$TESTPOOL/mrec/pam/${username}" + log_mustnot ismounted "$TESTPOOL/mrec/pam/${username}/deep" + log_mustnot ismounted "$TESTPOOL/mrec/pam/${username}/deep/deeper" + log_mustnot ismounted "$TESTPOOL/mrec/pam/${username}/deep/none" + log_mustnot ismounted "$TESTPOOL/mrec/pam/${username}/deep/noauto" + log_mustnot ismounted "$TESTPOOL/mrec/pam/${username}/deep/off" + keystatus unavailable +} + +genconfig "homes=$TESTPOOL/mrec/pam mount_recursively runstatedir=${runstatedir}" +test_session + +genconfig "homes=$TESTPOOL/mrec/pam prop_mountpoint mount_recursively runstatedir=${runstatedir}" +test_session + +genconfig "homes=$TESTPOOL/mrec recursive_homes prop_mountpoint mount_recursively runstatedir=${runstatedir}" +test_session + +genconfig "homes=$TESTPOOL recursive_homes prop_mountpoint mount_recursively runstatedir=${runstatedir}" +test_session + +genconfig "homes=* recursive_homes prop_mountpoint mount_recursively runstatedir=${runstatedir}" +test_session + +log_pass "done." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_hierarchy.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_hierarchy.ksh index 8417afc88d33..6dd4ae46f947 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_hierarchy.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_hierarchy.ksh @@ -61,16 +61,17 @@ log_must eval "zfs receive -d -F $POOL2 < $BACKDIR/fs-before-R" dstds=$(get_dst_ds $POOL/$FS $POOL2) log_must cmp_ds_subs $POOL/$FS $dstds -log_must verify_encryption_root $POOL/$FS $POOL/$FS -log_must verify_keylocation $POOL/$FS "prompt" -log_must verify_origin $POOL/$FS "-" +log_must verify_encryption_root $POOL2/$FS $POOL2/$FS +log_must verify_keylocation $POOL2/$FS "prompt" +log_must verify_origin $POOL2/$FS "-" -log_must verify_encryption_root $POOL/clone $POOL/$FS -log_must verify_keylocation $POOL/clone "none" -log_must verify_origin $POOL/clone "$POOL/$FS@snap" +log_must verify_encryption_root $POOL2/clone $POOL2/$FS +log_must verify_keylocation $POOL2/clone "none" +log_must verify_origin $POOL2/clone "$POOL2/$FS@snap" log_must verify_encryption_root $POOL/$FS/child $POOL/$FS -log_must verify_keylocation $POOL/$FS/child "none" +log_must verify_encryption_root $POOL2/$FS/child $POOL2/$FS +log_must verify_keylocation $POOL2/$FS/child "none" # Alter the hierarchy and re-send log_must eval "echo $PASSPHRASE1 | zfs change-key -o keyformat=passphrase" \ @@ -93,4 +94,20 @@ log_must verify_origin $POOL/clone "-" log_must verify_encryption_root $POOL/$FS/child $POOL/$FS/child log_must verify_keylocation $POOL/$FS/child "prompt" +log_must verify_encryption_root $POOL2 "-" +log_must verify_encryption_root $POOL2/clone $POOL2/clone +log_must verify_encryption_root $POOL2/$FS $POOL2/clone +log_must verify_encryption_root $POOL2/$FS/child $POOL2/$FS/child + +log_must verify_keylocation $POOL2 "none" +log_must verify_keylocation $POOL2/clone "prompt" +log_must verify_keylocation $POOL2/$FS "none" +log_must verify_keylocation $POOL2/$FS/child "prompt" + +log_must verify_origin $POOL2 "-" +log_must verify_origin $POOL2/clone "-" +log_must verify_origin $POOL2/$FS "$POOL2/clone@snap" +log_must verify_origin $POOL2/$FS/child "-" +log_must zfs list + log_pass "Raw recursive sends preserve filesystem structure." diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index b6cb1ab0652e..fcf5949c8ca6 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -254,6 +254,9 @@ /* Define to 1 if you have the <dlfcn.h> header file. */ #define HAVE_DLFCN_H 1 +/* dops->d_revalidate() takes 4 args */ +/* #undef HAVE_D_REVALIDATE_4ARGS */ + /* Define to 1 if you have the 'execvpe' function. */ #define HAVE_EXECVPE 1 @@ -401,6 +404,9 @@ /* iops->symlink() takes struct user_namespace* */ /* #undef HAVE_IOPS_SYMLINK_USERNS */ +/* iov_iter_get_pages2() is available */ +/* #undef HAVE_IOV_ITER_GET_PAGES2 */ + /* iov_iter_type() is available */ /* #undef HAVE_IOV_ITER_TYPE */ @@ -787,7 +793,7 @@ /* #undef ZFS_DEVICE_MINOR */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.3.99-170-FreeBSD_g34205715e" +#define ZFS_META_ALIAS "zfs-2.3.99-189-FreeBSD_g6a2f7b384" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -817,7 +823,7 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "170-FreeBSD_g34205715e" +#define ZFS_META_RELEASE "189-FreeBSD_g6a2f7b384" /* Define the project version. */ #define ZFS_META_VERSION "2.3.99" diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 0a2a21e6ddf1..d6c39ea2840a 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.3.99-170-g34205715e" +#define ZFS_META_GITREV "zfs-2.3.99-189-g6a2f7b384" |