aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--etc/mtree/BSD.tests.dist2
-rw-r--r--share/mk/bsd.README3
-rw-r--r--share/mk/plain.test.mk11
-rw-r--r--sys/contrib/openzfs/.github/workflows/codeql.yml2
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh18
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh32
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh17
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/qemu-9-summary-page.sh4
-rw-r--r--sys/contrib/openzfs/.github/workflows/zfs-qemu.yml14
-rw-r--r--sys/contrib/openzfs/META4
-rw-r--r--sys/contrib/openzfs/cmd/zdb/zdb.c4
-rw-r--r--sys/contrib/openzfs/cmd/zfs/zfs_main.c30
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_main.c18
-rw-r--r--sys/contrib/openzfs/cmd/ztest.c19
-rw-r--r--sys/contrib/openzfs/config/deb.am20
-rw-r--r--sys/contrib/openzfs/config/user.m42
-rw-r--r--sys/contrib/openzfs/contrib/debian/Makefile.am8
-rw-r--r--sys/contrib/openzfs/contrib/debian/clean4
-rw-r--r--sys/contrib/openzfs/contrib/debian/control26
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.docs (renamed from sys/contrib/openzfs/contrib/debian/openzfs-libzfs4.docs)0
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.install.in (renamed from sys/contrib/openzfs/contrib/debian/openzfs-libzfs4.install.in)0
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.docs (renamed from sys/contrib/openzfs/contrib/debian/openzfs-libzpool5.docs)0
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.install.in (renamed from sys/contrib/openzfs/contrib/debian/openzfs-libzpool5.install.in)0
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install2
-rw-r--r--sys/contrib/openzfs/contrib/initramfs/scripts/zfs5
-rw-r--r--sys/contrib/openzfs/include/libzutil.h4
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h3
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/abd_os.h12
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h1
-rw-r--r--sys/contrib/openzfs/include/sys/fm/fs/zfs.h3
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_raidz.h2
-rw-r--r--sys/contrib/openzfs/include/sys/zio.h29
-rw-r--r--sys/contrib/openzfs/lib/libspl/backtrace.c219
-rw-r--r--sys/contrib/openzfs/lib/libzfs/Makefile.am2
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs.abi2
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_pool.c2
-rw-r--r--sys/contrib/openzfs/lib/libzpool/Makefile.am2
-rw-r--r--sys/contrib/openzfs/lib/libzpool/zfs_debug.c18
-rw-r--r--sys/contrib/openzfs/man/man4/zfs.42
-rw-r--r--sys/contrib/openzfs/man/man5/vdev_id.conf.510
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-list.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-mount.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-program.82
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-set.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-events.85
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-get.84
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-list.82
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-status.810
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c41
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/abd_os.c4
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c109
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c6
-rw-r--r--sys/contrib/openzfs/module/zcommon/zfs_valstr.c1
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_direct.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_dataset.c11
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_draid.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_indirect.c14
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_mirror.c21
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_raidz.c44
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_vnops.c33
-rw-r--r--sys/contrib/openzfs/module/zfs/zio.c120
-rw-r--r--sys/contrib/openzfs/rpm/generic/zfs.spec.in45
-rw-r--r--sys/contrib/openzfs/tests/runfiles/common.run4
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/cmd/manipulate_user_buffer.c180
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am1
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/json/json_sanity.ksh51
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh4
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_quota.ksh2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio.kshlib10
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_read_verify.ksh107
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh6
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh18
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh3
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_snapdev.ksh13
-rwxr-xr-xsys/contrib/openzfs/udev/vdev_id18
-rw-r--r--sys/kern/subr_pctrie.c2
-rw-r--r--sys/modules/zfs/zfs_config.h15
-rw-r--r--sys/modules/zfs/zfs_gitrev.h2
-rw-r--r--tests/sys/kern/Makefile1
-rw-r--r--tests/sys/kern/tty/Makefile12
-rw-r--r--tests/sys/kern/tty/fionread.c21
-rw-r--r--tests/sys/kern/tty/readsz.c130
-rw-r--r--tests/sys/kern/tty/test_canon.orch102
-rw-r--r--tests/sys/kern/tty/test_canon_fullbuf.orch23
-rw-r--r--tests/sys/kern/tty/test_ncanon.orch39
-rw-r--r--tests/sys/kern/tty/test_recanon.orch90
86 files changed, 1547 insertions, 345 deletions
diff --git a/etc/mtree/BSD.tests.dist b/etc/mtree/BSD.tests.dist
index 8cac5e8d55e2..221e4b32a89b 100644
--- a/etc/mtree/BSD.tests.dist
+++ b/etc/mtree/BSD.tests.dist
@@ -854,6 +854,8 @@
..
pipe
..
+ tty
+ ..
..
kqueue
libkqueue
diff --git a/share/mk/bsd.README b/share/mk/bsd.README
index 70d489a9f59f..e957e9cdd48b 100644
--- a/share/mk/bsd.README
+++ b/share/mk/bsd.README
@@ -701,6 +701,9 @@ PLAIN_TESTS_C The names of the plain (legacy) programs to build.
PLAIN_TESTS_CXX The names of the plain (legacy) test programs to build.
+PLAIN_TESTS_PORCH The names of the plain (legacy) porch(1)-based
+ test programs to build.
+
PLAIN_TESTS_SH The names of the plain (legacy) test programs to build.
TAP_PERL_INTERPRETER
diff --git a/share/mk/plain.test.mk b/share/mk/plain.test.mk
index cc6d268185da..485a78f8ea47 100644
--- a/share/mk/plain.test.mk
+++ b/share/mk/plain.test.mk
@@ -43,6 +43,17 @@ TEST_INTERFACE.${_T}= plain
.endfor
.endif
+.if !empty(PLAIN_TESTS_PORCH)
+SCRIPTS+= ${PLAIN_TESTS_PORCH:S/$/.orch/}
+_TESTS+= ${PLAIN_TESTS_PORCH}
+.for _T in ${PLAIN_TESTS_PORCH}
+SCRIPTSDIR_${_T}.orch= ${TESTSDIR}
+
+TEST_INTERFACE.${_T}= plain
+TEST_METADATA.${_T}+= required_programs="porch"
+.endfor
+.endif
+
.if !empty(PLAIN_TESTS_SH)
SCRIPTS+= ${PLAIN_TESTS_SH}
_TESTS+= ${PLAIN_TESTS_SH}
diff --git a/sys/contrib/openzfs/.github/workflows/codeql.yml b/sys/contrib/openzfs/.github/workflows/codeql.yml
index 2656a20fea0d..e975d7dd00b9 100644
--- a/sys/contrib/openzfs/.github/workflows/codeql.yml
+++ b/sys/contrib/openzfs/.github/workflows/codeql.yml
@@ -11,7 +11,7 @@ concurrency:
jobs:
analyze:
name: Analyze
- runs-on: ubuntu-latest
+ runs-on: ubuntu-22.04
permissions:
actions: read
contents: read
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh
index ebd80a2f98c1..f838da34efff 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh
@@ -18,19 +18,21 @@ ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519 -q -N ""
# we expect RAM shortage
cat << EOF | sudo tee /etc/ksmtuned.conf > /dev/null
+# /etc/ksmtuned.conf - Configuration file for ksmtuned
# https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/7/html/virtualization_tuning_and_optimization_guide/chap-ksm
KSM_MONITOR_INTERVAL=60
# Millisecond sleep between ksm scans for 16Gb server.
# Smaller servers sleep more, bigger sleep less.
-KSM_SLEEP_MSEC=10
-KSM_NPAGES_BOOST=300
-KSM_NPAGES_DECAY=-50
-KSM_NPAGES_MIN=64
-KSM_NPAGES_MAX=2048
-
-KSM_THRES_COEF=25
-KSM_THRES_CONST=2048
+KSM_SLEEP_MSEC=30
+
+KSM_NPAGES_BOOST=0
+KSM_NPAGES_DECAY=0
+KSM_NPAGES_MIN=1000
+KSM_NPAGES_MAX=25000
+
+KSM_THRES_COEF=80
+KSM_THRES_CONST=8192
LOGFILE=/var/log/ksmtuned.log
DEBUG=1
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh
index 923c38c0f937..84e13832d10f 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh
@@ -14,7 +14,7 @@ OSv=$OS
# compressed with .zst extension
REPO="https://github.com/mcmilk/openzfs-freebsd-images"
-FREEBSD="$REPO/releases/download/v2024-09-16"
+FREEBSD="$REPO/releases/download/v2024-10-05"
URLzs=""
# Ubuntu mirrors
@@ -62,33 +62,45 @@ case "$OS" in
OSv="fedora39"
URL="https://download.fedoraproject.org/pub/fedora/linux/releases/40/Cloud/x86_64/images/Fedora-Cloud-Base-Generic.x86_64-40-1.14.qcow2"
;;
- freebsd13r)
- OSNAME="FreeBSD 13.4-RELEASE"
+ freebsd13-3r)
+ OSNAME="FreeBSD 13.3-RELEASE"
OSv="freebsd13.0"
- URLzs="$FREEBSD/amd64-freebsd-13.4-RELEASE.qcow2.zst"
+ URLzs="$FREEBSD/amd64-freebsd-13.3-RELEASE.qcow2.zst"
BASH="/usr/local/bin/bash"
NIC="rtl8139"
;;
- freebsd13)
- OSNAME="FreeBSD 13.4-STABLE"
+ freebsd13-4r)
+ OSNAME="FreeBSD 13.4-RELEASE"
OSv="freebsd13.0"
- URLzs="$FREEBSD/amd64-freebsd-13.4-STABLE.qcow2.zst"
+ URLzs="$FREEBSD/amd64-freebsd-13.4-RELEASE.qcow2.zst"
BASH="/usr/local/bin/bash"
NIC="rtl8139"
;;
- freebsd14r)
+ freebsd14-0r)
+ OSNAME="FreeBSD 14.0-RELEASE"
+ OSv="freebsd14.0"
+ URLzs="$FREEBSD/amd64-freebsd-14.0-RELEASE.qcow2.zst"
+ BASH="/usr/local/bin/bash"
+ ;;
+ freebsd14-1r)
OSNAME="FreeBSD 14.1-RELEASE"
OSv="freebsd14.0"
URLzs="$FREEBSD/amd64-freebsd-14.1-RELEASE.qcow2.zst"
BASH="/usr/local/bin/bash"
;;
- freebsd14)
+ freebsd13-4s)
+ OSNAME="FreeBSD 13.4-STABLE"
+ OSv="freebsd13.0"
+ URLzs="$FREEBSD/amd64-freebsd-13.4-STABLE.qcow2.zst"
+ BASH="/usr/local/bin/bash"
+ ;;
+ freebsd14-1s)
OSNAME="FreeBSD 14.1-STABLE"
OSv="freebsd14.0"
URLzs="$FREEBSD/amd64-freebsd-14.1-STABLE.qcow2.zst"
BASH="/usr/local/bin/bash"
;;
- freebsd15)
+ freebsd15-0c)
OSNAME="FreeBSD 15.0-CURRENT"
OSv="freebsd14.0"
URLzs="$FREEBSD/amd64-freebsd-15.0-CURRENT.qcow2.zst"
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh
index 7acb67a27920..bc40e8894b22 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh
@@ -14,17 +14,21 @@ PID=$(pidof /usr/bin/qemu-system-x86_64)
tail --pid=$PID -f /dev/null
sudo virsh undefine openzfs
-# definitions of per operating system
+# default values per test vm:
+VMs=2
+CPU=2
+
+# cpu pinning
+CPUSET=("0,1" "2,3")
+
case "$OS" in
freebsd*)
- VMs=2
- CPU=3
+ # FreeBSD can't be optimized via ksmtuned
RAM=6
;;
*)
- VMs=2
- CPU=3
- RAM=7
+ # Linux can be optimized via ksmtuned
+ RAM=8
;;
esac
@@ -73,6 +77,7 @@ EOF
--cpu host-passthrough \
--virt-type=kvm --hvm \
--vcpus=$CPU,sockets=1 \
+ --cpuset=${CPUSET[$((i-1))]} \
--memory $((1024*RAM)) \
--memballoon model=virtio \
--graphics none \
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-9-summary-page.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-9-summary-page.sh
index cae287ac1073..737dda01b565 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-9-summary-page.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-9-summary-page.sh
@@ -11,12 +11,10 @@ function output() {
}
function outfile() {
- test -s "$1" || return
cat "$1" >> "out-$logfile.md"
}
function outfile_plain() {
- test -s "$1" || return
output "<pre>"
cat "$1" >> "out-$logfile.md"
output "</pre>"
@@ -45,6 +43,8 @@ if [ ! -f out-1.md ]; then
tar xf "$tarfile"
test -s env.txt || continue
source env.txt
+ # when uname.txt is there, the other files are also ok
+ test -s uname.txt || continue
output "\n## Functional Tests: $OSNAME\n"
outfile_plain uname.txt
outfile_plain summary.txt
diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
index 8922701f9899..f819e9938e31 100644
--- a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
+++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
@@ -22,8 +22,8 @@ jobs:
- name: Generate OS config and CI type
id: os
run: |
- FULL_OS='["almalinux8", "almalinux9", "centos-stream9", "debian11", "debian12", "fedora39", "fedora40", "freebsd13", "freebsd13r", "freebsd14", "freebsd14r", "ubuntu20", "ubuntu22", "ubuntu24"]'
- QUICK_OS='["almalinux8", "almalinux9", "debian12", "fedora40", "freebsd13", "freebsd14", "ubuntu24"]'
+ FULL_OS='["almalinux8", "almalinux9", "centos-stream9", "debian11", "debian12", "fedora39", "fedora40", "freebsd13-4r", "freebsd14-0r", "freebsd14-1s", "ubuntu20", "ubuntu22", "ubuntu24"]'
+ QUICK_OS='["almalinux8", "almalinux9", "debian12", "fedora40", "freebsd13-3r", "freebsd14-1r", "ubuntu24"]'
# determine CI type when running on PR
ci_type="full"
if ${{ github.event_name == 'pull_request' }}; then
@@ -46,10 +46,12 @@ jobs:
strategy:
fail-fast: false
matrix:
- # all:
- # os: [almalinux8, almalinux9, archlinux, centos-stream9, fedora39, fedora40, debian11, debian12, freebsd13, freebsd13r, freebsd14, freebsd14r, freebsd15, ubuntu20, ubuntu22, ubuntu24]
- # openzfs:
- # os: [almalinux8, almalinux9, centos-stream9, debian11, debian12, fedora39, fedora40, freebsd13, freebsd13r, freebsd14, freebsd14r, ubuntu20, ubuntu22, ubuntu24]
+ # rhl: almalinux8, almalinux9, centos-stream9, fedora39, fedora40
+ # debian: debian11, debian12, ubuntu20, ubuntu22, ubuntu24
+ # misc: archlinux, tumbleweed
+ # FreeBSD Release: freebsd13-3r, freebsd13-4r, freebsd14-0r, freebsd14-1r
+ # FreeBSD Stable: freebsd13-4s, freebsd14-1s
+ # FreeBSD Current: freebsd15-0c
os: ${{ fromJson(needs.test-config.outputs.test_os) }}
runs-on: ubuntu-24.04
steps:
diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META
index c2eff4ba0bbd..185cca4a44d4 100644
--- a/sys/contrib/openzfs/META
+++ b/sys/contrib/openzfs/META
@@ -1,8 +1,8 @@
Meta: 1
Name: zfs
Branch: 1.0
-Version: 2.3.0
-Release: rc1
+Version: 2.3.99
+Release: 1
Release-Tags: relext
License: CDDL
Author: OpenZFS
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c
index a72b80a93732..2121f70b2b9a 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.c
@@ -4266,6 +4266,10 @@ dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
(void) printf("\ttimestamp = %llu UTC = %s",
(u_longlong_t)ub->ub_timestamp, ctime(&timestamp));
+ char blkbuf[BP_SPRINTF_LEN];
+ snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
+ (void) printf("\tbp = %s\n", blkbuf);
+
(void) printf("\tmmp_magic = %016llx\n",
(u_longlong_t)ub->ub_mmp_magic);
if (MMP_VALID(ub)) {
diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
index 34c693fbcb0f..4116cdb51852 100644
--- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c
+++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
@@ -2162,6 +2162,7 @@ zfs_do_get(int argc, char **argv)
cb.cb_type = ZFS_TYPE_DATASET;
struct option long_options[] = {
+ {"json", no_argument, NULL, 'j'},
{"json-int", no_argument, NULL, ZFS_OPTION_JSON_NUMS_AS_INT},
{0, 0, 0, 0}
};
@@ -3852,6 +3853,7 @@ zfs_do_list(int argc, char **argv)
nvlist_t *data = NULL;
struct option long_options[] = {
+ {"json", no_argument, NULL, 'j'},
{"json-int", no_argument, NULL, ZFS_OPTION_JSON_NUMS_AS_INT},
{0, 0, 0, 0}
};
@@ -7436,9 +7438,15 @@ share_mount(int op, int argc, char **argv)
uint_t nthr;
jsobj = data = item = NULL;
+ struct option long_options[] = {
+ {"json", no_argument, NULL, 'j'},
+ {0, 0, 0, 0}
+ };
+
/* check options */
- while ((c = getopt(argc, argv, op == OP_MOUNT ? ":ajRlvo:Of" : "al"))
- != -1) {
+ while ((c = getopt_long(argc, argv,
+ op == OP_MOUNT ? ":ajRlvo:Of" : "al",
+ op == OP_MOUNT ? long_options : NULL, NULL)) != -1) {
switch (c) {
case 'a':
do_all = 1;
@@ -8374,8 +8382,14 @@ zfs_do_channel_program(int argc, char **argv)
boolean_t sync_flag = B_TRUE, json_output = B_FALSE;
zpool_handle_t *zhp;
+ struct option long_options[] = {
+ {"json", no_argument, NULL, 'j'},
+ {0, 0, 0, 0}
+ };
+
/* check options */
- while ((c = getopt(argc, argv, "nt:m:j")) != -1) {
+ while ((c = getopt_long(argc, argv, "nt:m:j", long_options,
+ NULL)) != -1) {
switch (c) {
case 't':
case 'm': {
@@ -9083,7 +9097,13 @@ zfs_do_version(int argc, char **argv)
int c;
nvlist_t *jsobj = NULL, *zfs_ver = NULL;
boolean_t json = B_FALSE;
- while ((c = getopt(argc, argv, "j")) != -1) {
+
+ struct option long_options[] = {
+ {"json", no_argument, NULL, 'j'},
+ {0, 0, 0, 0}
+ };
+
+ while ((c = getopt_long(argc, argv, "j", long_options, NULL)) != -1) {
switch (c) {
case 'j':
json = B_TRUE;
@@ -9187,7 +9207,7 @@ main(int argc, char **argv)
* Special case '-V|--version'
*/
if ((strcmp(cmdname, "-V") == 0) || (strcmp(cmdname, "--version") == 0))
- return (zfs_do_version(argc, argv));
+ return (zfs_version_print() != 0);
/*
* Special case 'help'
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
index aa7da68aa683..ea180f6b705e 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
@@ -7340,6 +7340,7 @@ zpool_do_list(int argc, char **argv)
current_prop_type = ZFS_TYPE_POOL;
struct option long_options[] = {
+ {"json", no_argument, NULL, 'j'},
{"json-int", no_argument, NULL, ZPOOL_OPTION_JSON_NUMS_AS_INT},
{"json-pool-key-guid", no_argument, NULL,
ZPOOL_OPTION_POOL_KEY_GUID},
@@ -9224,6 +9225,12 @@ vdev_stats_nvlist(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *nv,
}
}
+ if (cb->cb_print_dio_verify) {
+ nice_num_str_nvlist(vds, "dio_verify_errors",
+ vs->vs_dio_verify_errors, cb->cb_literal,
+ cb->cb_json_as_int, ZFS_NICENUM_1024);
+ }
+
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
&notpresent) == 0) {
nice_num_str_nvlist(vds, ZPOOL_CONFIG_NOT_PRESENT,
@@ -10975,6 +10982,7 @@ zpool_do_status(int argc, char **argv)
struct option long_options[] = {
{"power", no_argument, NULL, ZPOOL_OPTION_POWER},
+ {"json", no_argument, NULL, 'j'},
{"json-int", no_argument, NULL, ZPOOL_OPTION_JSON_NUMS_AS_INT},
{"json-flat-vdevs", no_argument, NULL,
ZPOOL_OPTION_JSON_FLAT_VDEVS},
@@ -12583,6 +12591,7 @@ zpool_do_get(int argc, char **argv)
current_prop_type = cb.cb_type;
struct option long_options[] = {
+ {"json", no_argument, NULL, 'j'},
{"json-int", no_argument, NULL, ZPOOL_OPTION_JSON_NUMS_AS_INT},
{"json-pool-key-guid", no_argument, NULL,
ZPOOL_OPTION_POOL_KEY_GUID},
@@ -13497,7 +13506,12 @@ zpool_do_version(int argc, char **argv)
int c;
nvlist_t *jsobj = NULL, *zfs_ver = NULL;
boolean_t json = B_FALSE;
- while ((c = getopt(argc, argv, "j")) != -1) {
+
+ struct option long_options[] = {
+ {"json", no_argument, NULL, 'j'},
+ };
+
+ while ((c = getopt_long(argc, argv, "j", long_options, NULL)) != -1) {
switch (c) {
case 'j':
json = B_TRUE;
@@ -13613,7 +13627,7 @@ main(int argc, char **argv)
* Special case '-V|--version'
*/
if ((strcmp(cmdname, "-V") == 0) || (strcmp(cmdname, "--version") == 0))
- return (zpool_do_version(argc, argv));
+ return (zfs_version_print() != 0);
/*
* Special case 'help'
diff --git a/sys/contrib/openzfs/cmd/ztest.c b/sys/contrib/openzfs/cmd/ztest.c
index 523f280aae1a..4a7959ebfca5 100644
--- a/sys/contrib/openzfs/cmd/ztest.c
+++ b/sys/contrib/openzfs/cmd/ztest.c
@@ -6717,6 +6717,17 @@ out:
*
* Only after a full scrub has been completed is it safe to start injecting
* data corruption. See the comment in zfs_fault_inject().
+ *
+ * EBUSY may be returned for the following six cases. It's the callers
+ * responsibility to handle them accordingly.
+ *
+ * Current state Requested
+ * 1. Normal Scrub Running Normal Scrub or Error Scrub
+ * 2. Normal Scrub Paused Error Scrub
+ * 3. Normal Scrub Paused Pause Normal Scrub
+ * 4. Error Scrub Running Normal Scrub or Error Scrub
+ * 5. Error Scrub Paused Pause Error Scrub
+ * 6. Resilvering Anything else
*/
static int
ztest_scrub_impl(spa_t *spa)
@@ -8082,8 +8093,14 @@ ztest_raidz_expand_check(spa_t *spa)
(void) printf("verifying an interrupted raidz "
"expansion using a pool scrub ...\n");
}
+
/* Will fail here if there is non-recoverable corruption detected */
- VERIFY0(ztest_scrub_impl(spa));
+ int error = ztest_scrub_impl(spa);
+ if (error == EBUSY)
+ error = 0;
+
+ VERIFY0(error);
+
if (ztest_opts.zo_verbose >= 1) {
(void) printf("raidz expansion scrub check complete\n");
}
diff --git a/sys/contrib/openzfs/config/deb.am b/sys/contrib/openzfs/config/deb.am
index 4d86a1b70615..9e58e1905b73 100644
--- a/sys/contrib/openzfs/config/deb.am
+++ b/sys/contrib/openzfs/config/deb.am
@@ -58,9 +58,9 @@ deb-utils: deb-local rpm-utils-initramfs
pkg1=$${name}-$${version}.$${arch}.rpm; \
pkg2=libnvpair3-$${version}.$${arch}.rpm; \
pkg3=libuutil3-$${version}.$${arch}.rpm; \
- pkg4=libzfs5-$${version}.$${arch}.rpm; \
- pkg5=libzpool5-$${version}.$${arch}.rpm; \
- pkg6=libzfs5-devel-$${version}.$${arch}.rpm; \
+ pkg4=libzfs6-$${version}.$${arch}.rpm; \
+ pkg5=libzpool6-$${version}.$${arch}.rpm; \
+ pkg6=libzfs6-devel-$${version}.$${arch}.rpm; \
pkg7=$${name}-test-$${version}.$${arch}.rpm; \
pkg8=$${name}-dracut-$${version}.noarch.rpm; \
pkg9=$${name}-initramfs-$${version}.$${arch}.rpm; \
@@ -72,7 +72,7 @@ deb-utils: deb-local rpm-utils-initramfs
path_prepend=`mktemp -d /tmp/intercept.XXXXXX`; \
echo "#!$(SHELL)" > $${path_prepend}/dh_shlibdeps; \
echo "`which dh_shlibdeps` -- \
- -xlibuutil3linux -xlibnvpair3linux -xlibzfs5linux -xlibzpool5linux" \
+ -xlibuutil3linux -xlibnvpair3linux -xlibzfs6linux -xlibzpool6linux" \
>> $${path_prepend}/dh_shlibdeps; \
## These -x arguments are passed to dpkg-shlibdeps, which exclude the
## Debianized packages from the auto-generated dependencies of the new debs,
@@ -93,13 +93,17 @@ debian:
cp -r contrib/debian debian; chmod +x debian/rules;
native-deb-utils: native-deb-local debian
+ while [ -f debian/deb-build.lock ]; do sleep 1; done; \
+ echo "native-deb-utils" > debian/deb-build.lock; \
cp contrib/debian/control debian/control; \
- $(DPKGBUILD) -b -rfakeroot -us -uc;
+ $(DPKGBUILD) -b -rfakeroot -us -uc; \
+ $(RM) -f debian/deb-build.lock
native-deb-kmod: native-deb-local debian
+ while [ -f debian/deb-build.lock ]; do sleep 1; done; \
+ echo "native-deb-kmod" > debian/deb-build.lock; \
sh scripts/make_gitrev.sh; \
- fakeroot debian/rules override_dh_binary-modules;
+ fakeroot debian/rules override_dh_binary-modules; \
+ $(RM) -f debian/deb-build.lock
native-deb: native-deb-utils native-deb-kmod
-
-.NOTPARALLEL: native-deb native-deb-utils native-deb-kmod
diff --git a/sys/contrib/openzfs/config/user.m4 b/sys/contrib/openzfs/config/user.m4
index badd920d2b8a..4e31745a2abc 100644
--- a/sys/contrib/openzfs/config/user.m4
+++ b/sys/contrib/openzfs/config/user.m4
@@ -33,7 +33,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER], [
ZFS_AC_CONFIG_USER_MAKEDEV_IN_MKDEV
ZFS_AC_CONFIG_USER_ZFSEXEC
- AC_CHECK_FUNCS([execvpe issetugid mlockall strlcat strlcpy gettid])
+ AC_CHECK_FUNCS([execvpe issetugid mlockall strerror_l strlcat strlcpy gettid])
AC_SUBST(RM)
])
diff --git a/sys/contrib/openzfs/contrib/debian/Makefile.am b/sys/contrib/openzfs/contrib/debian/Makefile.am
index f76b59645ead..99d512312df6 100644
--- a/sys/contrib/openzfs/contrib/debian/Makefile.am
+++ b/sys/contrib/openzfs/contrib/debian/Makefile.am
@@ -12,14 +12,14 @@ dist_noinst_DATA += %D%/openzfs-libpam-zfs.postinst
dist_noinst_DATA += %D%/openzfs-libpam-zfs.prerm
dist_noinst_DATA += %D%/openzfs-libuutil3.docs
dist_noinst_DATA += %D%/openzfs-libuutil3.install.in
-dist_noinst_DATA += %D%/openzfs-libzfs4.docs
-dist_noinst_DATA += %D%/openzfs-libzfs4.install.in
+dist_noinst_DATA += %D%/openzfs-libzfs6.docs
+dist_noinst_DATA += %D%/openzfs-libzfs6.install.in
dist_noinst_DATA += %D%/openzfs-libzfsbootenv1.docs
dist_noinst_DATA += %D%/openzfs-libzfsbootenv1.install.in
dist_noinst_DATA += %D%/openzfs-libzfs-dev.docs
dist_noinst_DATA += %D%/openzfs-libzfs-dev.install.in
-dist_noinst_DATA += %D%/openzfs-libzpool5.docs
-dist_noinst_DATA += %D%/openzfs-libzpool5.install.in
+dist_noinst_DATA += %D%/openzfs-libzpool6.docs
+dist_noinst_DATA += %D%/openzfs-libzpool6.install.in
dist_noinst_DATA += %D%/openzfs-python3-pyzfs.install
dist_noinst_DATA += %D%/openzfs-zfs-dkms.config
dist_noinst_DATA += %D%/openzfs-zfs-dkms.dkms
diff --git a/sys/contrib/openzfs/contrib/debian/clean b/sys/contrib/openzfs/contrib/debian/clean
index 3100d693aeba..4f52d01b8108 100644
--- a/sys/contrib/openzfs/contrib/debian/clean
+++ b/sys/contrib/openzfs/contrib/debian/clean
@@ -6,6 +6,6 @@ contrib/pyzfs/libzfs_core/bindings/__pycache__/
contrib/pyzfs/pyzfs.egg-info/
debian/openzfs-libnvpair3.install
debian/openzfs-libuutil3.install
-debian/openzfs-libzfs4.install
+debian/openzfs-libzfs6.install
debian/openzfs-libzfs-dev.install
-debian/openzfs-libzpool5.install
+debian/openzfs-libzpool6.install
diff --git a/sys/contrib/openzfs/contrib/debian/control b/sys/contrib/openzfs/contrib/debian/control
index e56fbf0f1c93..6829c0ccdf93 100644
--- a/sys/contrib/openzfs/contrib/debian/control
+++ b/sys/contrib/openzfs/contrib/debian/control
@@ -78,9 +78,9 @@ Architecture: linux-any
Depends: libssl-dev | libssl1.0-dev,
openzfs-libnvpair3 (= ${binary:Version}),
openzfs-libuutil3 (= ${binary:Version}),
- openzfs-libzfs4 (= ${binary:Version}),
+ openzfs-libzfs6 (= ${binary:Version}),
openzfs-libzfsbootenv1 (= ${binary:Version}),
- openzfs-libzpool5 (= ${binary:Version}),
+ openzfs-libzpool6 (= ${binary:Version}),
${misc:Depends}
Replaces: libzfslinux-dev
Conflicts: libzfslinux-dev
@@ -90,18 +90,18 @@ Description: OpenZFS filesystem development files for Linux
libraries of OpenZFS filesystem.
.
This package includes the development files of libnvpair3, libuutil3,
- libzpool5 and libzfs4.
+ libzpool6 and libzfs6.
-Package: openzfs-libzfs4
+Package: openzfs-libzfs6
Section: contrib/libs
Architecture: linux-any
Depends: ${misc:Depends}, ${shlibs:Depends}
# The libcurl4 is loaded through dlopen("libcurl.so.4").
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=988521
Recommends: libcurl4
-Breaks: libzfs2, libzfs4
-Replaces: libzfs2, libzfs4, libzfs4linux
-Conflicts: libzfs4linux
+Breaks: libzfs2, libzfs4, libzfs4linux, libzfs6linux
+Replaces: libzfs2, libzfs4, libzfs4linux, libzfs6linux
+Conflicts: libzfs6linux
Description: OpenZFS filesystem library for Linux - general support
OpenZFS is a storage platform that encompasses the functionality of
traditional filesystems and volume managers. It supports data checksums,
@@ -123,13 +123,13 @@ Description: OpenZFS filesystem library for Linux - label info support
.
The zfsbootenv library provides support for modifying ZFS label information.
-Package: openzfs-libzpool5
+Package: openzfs-libzpool6
Section: contrib/libs
Architecture: linux-any
Depends: ${misc:Depends}, ${shlibs:Depends}
-Breaks: libzpool2, libzpool5
-Replaces: libzpool2, libzpool5, libzpool5linux
-Conflicts: libzpool5linux
+Breaks: libzpool2, libzpool5, libzpool5linux, libzpool6linux
+Replaces: libzpool2, libzpool5, libzpool5linux, libzpool6linux
+Conflicts: libzpool6linux
Description: OpenZFS pool library for Linux
OpenZFS is a storage platform that encompasses the functionality of
traditional filesystems and volume managers. It supports data checksums,
@@ -246,8 +246,8 @@ Architecture: linux-any
Pre-Depends: ${misc:Pre-Depends}
Depends: openzfs-libnvpair3 (= ${binary:Version}),
openzfs-libuutil3 (= ${binary:Version}),
- openzfs-libzfs4 (= ${binary:Version}),
- openzfs-libzpool5 (= ${binary:Version}),
+ openzfs-libzfs6 (= ${binary:Version}),
+ openzfs-libzpool6 (= ${binary:Version}),
python3,
${misc:Depends},
${shlibs:Depends}
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs4.docs b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.docs
index 4302f1b2ab6a..4302f1b2ab6a 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs4.docs
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.docs
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs4.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.install.in
index 6765aaee59cc..6765aaee59cc 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-libzfs4.install.in
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzfs6.install.in
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzpool5.docs b/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.docs
index 4302f1b2ab6a..4302f1b2ab6a 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-libzpool5.docs
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.docs
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-libzpool5.install.in b/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.install.in
index b9e872df9ba8..b9e872df9ba8 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-libzpool5.install.in
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-libzpool6.install.in
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
index d51e4ef003e6..546745930bff 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
@@ -98,6 +98,7 @@ usr/share/man/man8/zpool-attach.8
usr/share/man/man8/zpool-checkpoint.8
usr/share/man/man8/zpool-clear.8
usr/share/man/man8/zpool-create.8
+usr/share/man/man8/zpool-ddtprune.8
usr/share/man/man8/zpool-destroy.8
usr/share/man/man8/zpool-detach.8
usr/share/man/man8/zpool-ddtprune.8
@@ -113,6 +114,7 @@ usr/share/man/man8/zpool-list.8
usr/share/man/man8/zpool-offline.8
usr/share/man/man8/zpool-online.8
usr/share/man/man8/zpool-prefetch.8
+usr/share/man/man8/zpool-prefetch.8
usr/share/man/man8/zpool-reguid.8
usr/share/man/man8/zpool-remove.8
usr/share/man/man8/zpool-reopen.8
diff --git a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs
index 0a2bd2efda7a..c569b2528368 100644
--- a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs
+++ b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs
@@ -344,7 +344,7 @@ mount_fs()
# Need the _original_ datasets mountpoint!
mountpoint=$(get_fs_value "$fs" mountpoint)
- ZFS_CMD="mount -o zfsutil -t zfs"
+ ZFS_CMD="mount.zfs -o zfsutil"
if [ "$mountpoint" = "legacy" ] || [ "$mountpoint" = "none" ]; then
# Can't use the mountpoint property. Might be one of our
# clones. Check the 'org.zol:mountpoint' property set in
@@ -359,9 +359,8 @@ mount_fs()
# isn't the root fs.
return 0
fi
- # Don't use mount.zfs -o zfsutils for legacy mountpoint
if [ "$mountpoint" = "legacy" ]; then
- ZFS_CMD="mount -t zfs"
+ ZFS_CMD="mount.zfs"
fi
# Last hail-mary: Hope 'rootmnt' is set!
mountpoint=""
diff --git a/sys/contrib/openzfs/include/libzutil.h b/sys/contrib/openzfs/include/libzutil.h
index e2108ceeaa44..f8712340cc5e 100644
--- a/sys/contrib/openzfs/include/libzutil.h
+++ b/sys/contrib/openzfs/include/libzutil.h
@@ -276,7 +276,11 @@ _LIBZUTIL_H void update_vdev_config_dev_sysfs_path(nvlist_t *nv,
* Thread-safe strerror() for use in ZFS libraries
*/
static inline char *zfs_strerror(int errnum) {
+#ifdef HAVE_STRERROR_L
return (strerror_l(errnum, uselocale(0)));
+#else
+ return (strerror(errnum));
+#endif
}
#ifdef __cplusplus
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h b/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
index f63a397f293d..4cb3a055c3c4 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
@@ -38,8 +38,7 @@
#include <sys/rwlock.h>
#include <sys/wait.h>
#include <sys/wmsum.h>
-
-typedef struct kstat_s kstat_t;
+#include <sys/kstat.h>
#define TASKQ_NAMELEN 31
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/abd_os.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/abd_os.h
index 606e8bf682e8..3eed968e90c0 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/abd_os.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/abd_os.h
@@ -30,6 +30,8 @@
extern "C" {
#endif
+struct abd;
+
struct abd_scatter {
uint_t abd_offset;
uint_t abd_nents;
@@ -41,10 +43,8 @@ struct abd_linear {
struct scatterlist *abd_sgl; /* for LINEAR_PAGE */
};
-typedef struct abd abd_t;
-
typedef int abd_iter_page_func_t(struct page *, size_t, size_t, void *);
-int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *,
+int abd_iterate_page_func(struct abd *, size_t, size_t, abd_iter_page_func_t *,
void *);
/*
@@ -52,11 +52,11 @@ int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *,
* Note: these are only needed to support vdev_classic. See comment in
* vdev_disk.c.
*/
-unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t);
-unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t);
+unsigned int abd_bio_map_off(struct bio *, struct abd *, unsigned int, size_t);
+unsigned long abd_nr_pages_off(struct abd *, unsigned int, size_t);
__attribute__((malloc))
-abd_t *abd_alloc_from_pages(struct page **, unsigned long, uint64_t);
+struct abd *abd_alloc_from_pages(struct page **, unsigned long, uint64_t);
#ifdef __cplusplus
}
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
index 7067eb17900d..30aa3a103d33 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
@@ -69,6 +69,7 @@ typedef struct vfs {
boolean_t vfs_do_relatime;
boolean_t vfs_nbmand;
boolean_t vfs_do_nbmand;
+ kmutex_t vfs_mntpt_lock;
} vfs_t;
typedef struct zfs_mnt {
diff --git a/sys/contrib/openzfs/include/sys/fm/fs/zfs.h b/sys/contrib/openzfs/include/sys/fm/fs/zfs.h
index 55b150c044ee..43d6e3f96ea3 100644
--- a/sys/contrib/openzfs/include/sys/fm/fs/zfs.h
+++ b/sys/contrib/openzfs/include/sys/fm/fs/zfs.h
@@ -42,7 +42,8 @@ extern "C" {
#define FM_EREPORT_ZFS_DATA "data"
#define FM_EREPORT_ZFS_DELAY "delay"
#define FM_EREPORT_ZFS_DEADMAN "deadman"
-#define FM_EREPORT_ZFS_DIO_VERIFY "dio_verify"
+#define FM_EREPORT_ZFS_DIO_VERIFY_WR "dio_verify_wr"
+#define FM_EREPORT_ZFS_DIO_VERIFY_RD "dio_verify_rd"
#define FM_EREPORT_ZFS_POOL "zpool"
#define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown"
#define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed"
diff --git a/sys/contrib/openzfs/include/sys/vdev_raidz.h b/sys/contrib/openzfs/include/sys/vdev_raidz.h
index a34bc00ca4df..64f484e9aa13 100644
--- a/sys/contrib/openzfs/include/sys/vdev_raidz.h
+++ b/sys/contrib/openzfs/include/sys/vdev_raidz.h
@@ -57,7 +57,7 @@ void vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
void vdev_raidz_child_done(zio_t *);
void vdev_raidz_io_done(zio_t *);
void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);
-struct raidz_row *vdev_raidz_row_alloc(int);
+struct raidz_row *vdev_raidz_row_alloc(int, zio_t *);
void vdev_raidz_reflow_copy_scratch(spa_t *);
void raidz_dtl_reassessed(vdev_t *);
diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h
index f9409433e031..46f5d68aed4a 100644
--- a/sys/contrib/openzfs/include/sys/zio.h
+++ b/sys/contrib/openzfs/include/sys/zio.h
@@ -208,25 +208,25 @@ typedef uint64_t zio_flag_t;
#define ZIO_FLAG_PROBE (1ULL << 16)
#define ZIO_FLAG_TRYHARD (1ULL << 17)
#define ZIO_FLAG_OPTIONAL (1ULL << 18)
-
+#define ZIO_FLAG_DIO_READ (1ULL << 19)
#define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1)
/*
* Flags not inherited by any children.
*/
-#define ZIO_FLAG_DONT_QUEUE (1ULL << 19) /* must be first for INHERIT */
-#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 20)
-#define ZIO_FLAG_IO_BYPASS (1ULL << 21)
-#define ZIO_FLAG_IO_REWRITE (1ULL << 22)
-#define ZIO_FLAG_RAW_COMPRESS (1ULL << 23)
-#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 24)
-#define ZIO_FLAG_GANG_CHILD (1ULL << 25)
-#define ZIO_FLAG_DDT_CHILD (1ULL << 26)
-#define ZIO_FLAG_GODFATHER (1ULL << 27)
-#define ZIO_FLAG_NOPWRITE (1ULL << 28)
-#define ZIO_FLAG_REEXECUTED (1ULL << 29)
-#define ZIO_FLAG_DELEGATED (1ULL << 30)
-#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 31)
+#define ZIO_FLAG_DONT_QUEUE (1ULL << 20) /* must be first for INHERIT */
+#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 21)
+#define ZIO_FLAG_IO_BYPASS (1ULL << 22)
+#define ZIO_FLAG_IO_REWRITE (1ULL << 23)
+#define ZIO_FLAG_RAW_COMPRESS (1ULL << 24)
+#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 25)
+#define ZIO_FLAG_GANG_CHILD (1ULL << 26)
+#define ZIO_FLAG_DDT_CHILD (1ULL << 27)
+#define ZIO_FLAG_GODFATHER (1ULL << 28)
+#define ZIO_FLAG_NOPWRITE (1ULL << 29)
+#define ZIO_FLAG_REEXECUTED (1ULL << 30)
+#define ZIO_FLAG_DELEGATED (1ULL << 31)
+#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 32)
#define ZIO_ALLOCATOR_NONE (-1)
#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)
@@ -647,6 +647,7 @@ extern void zio_vdev_io_redone(zio_t *zio);
extern void zio_change_priority(zio_t *pio, zio_priority_t priority);
extern void zio_checksum_verified(zio_t *zio);
+extern void zio_dio_chksum_verify_error_report(zio_t *zio);
extern int zio_worst_error(int e1, int e2);
extern enum zio_checksum zio_checksum_select(enum zio_checksum child,
diff --git a/sys/contrib/openzfs/lib/libspl/backtrace.c b/sys/contrib/openzfs/lib/libspl/backtrace.c
index d26d742106e2..6e8b3b12122d 100644
--- a/sys/contrib/openzfs/lib/libspl/backtrace.c
+++ b/sys/contrib/openzfs/lib/libspl/backtrace.c
@@ -25,19 +25,32 @@
#include <sys/backtrace.h>
#include <sys/types.h>
+#include <sys/debug.h>
#include <unistd.h>
/*
- * libspl_backtrace() must be safe to call from inside a signal hander. This
- * mostly means it must not allocate, and so we can't use things like printf.
+ * Output helpers. libspl_backtrace() must not block, must be thread-safe and
+ * must be safe to call from a signal handler. At least, that means not having
+ * printf, so we end up having to call write() directly on the fd. That's
+ * awkward, as we always have to pass through a length, and some systems will
+ * complain if we don't consume the return. So we have some macros to make
+ * things a little more palatable.
*/
+#define spl_bt_write_n(fd, s, n) \
+ do { ssize_t r __maybe_unused = write(fd, s, n); } while (0)
+#define spl_bt_write(fd, s) spl_bt_write_n(fd, s, sizeof (s))
#if defined(HAVE_LIBUNWIND)
#define UNW_LOCAL_ONLY
#include <libunwind.h>
+/*
+ * Convert `v` to ASCII hex characters. The bottom `n` nybbles (4-bits ie one
+ * hex digit) will be written, up to `buflen`. The buffer will not be
+ * null-terminated. Returns the number of digits written.
+ */
static size_t
-libspl_u64_to_hex_str(uint64_t v, size_t digits, char *buf, size_t buflen)
+spl_bt_u64_to_hex_str(uint64_t v, size_t n, char *buf, size_t buflen)
{
static const char hexdigits[] = {
'0', '1', '2', '3', '4', '5', '6', '7',
@@ -45,10 +58,10 @@ libspl_u64_to_hex_str(uint64_t v, size_t digits, char *buf, size_t buflen)
};
size_t pos = 0;
- boolean_t want = (digits == 0);
+ boolean_t want = (n == 0);
for (int i = 15; i >= 0; i--) {
const uint64_t d = v >> (i * 4) & 0xf;
- if (!want && (d != 0 || digits > i))
+ if (!want && (d != 0 || n > i))
want = B_TRUE;
if (want) {
buf[pos++] = hexdigits[d];
@@ -62,40 +75,181 @@ libspl_u64_to_hex_str(uint64_t v, size_t digits, char *buf, size_t buflen)
void
libspl_backtrace(int fd)
{
- ssize_t ret __attribute__((unused));
unw_context_t uc;
unw_cursor_t cp;
- unw_word_t loc;
+ unw_word_t v;
char buf[128];
size_t n;
+ int err;
- ret = write(fd, "Call trace:\n", 12);
+ /* Snapshot the current frame and state. */
unw_getcontext(&uc);
+
+ /*
+ * TODO: walk back to the frame that tripped the assertion / the place
+ * where the signal was recieved.
+ */
+
+ /*
+ * Register dump. We're going to loop over all the registers in the
+ * top frame, and show them, with names, in a nice three-column
+ * layout, which keeps us within 80 columns.
+ */
+ spl_bt_write(fd, "Registers:\n");
+
+ /* Initialise a frame cursor, starting at the current frame */
unw_init_local(&cp, &uc);
- while (unw_step(&cp) > 0) {
- unw_get_reg(&cp, UNW_REG_IP, &loc);
- ret = write(fd, " [0x", 5);
- n = libspl_u64_to_hex_str(loc, 10, buf, sizeof (buf));
- ret = write(fd, buf, n);
- ret = write(fd, "] ", 2);
- unw_get_proc_name(&cp, buf, sizeof (buf), &loc);
- for (n = 0; n < sizeof (buf) && buf[n] != '\0'; n++) {}
- ret = write(fd, buf, n);
- ret = write(fd, "+0x", 3);
- n = libspl_u64_to_hex_str(loc, 2, buf, sizeof (buf));
- ret = write(fd, buf, n);
+
+ /*
+ * libunwind's list of possible registers for this architecture is an
+ * enum, unw_regnum_t. UNW_TDEP_LAST_REG is the highest-numbered
+ * register in that list, however, not all register numbers in this
+ * range are defined by the architecture, and not all defined registers
+ * will be present on every implementation of that architecture.
+ * Moreover, libunwind provides nice names for most, but not all
+ * registers, but these are hardcoded; a name being available does not
+ * mean that register is available.
+ *
+ * So, we have to pull this all together here. We try to get the value
+ * of every possible register. If we get a value for it, then the
+ * register must exist, and so we get its name. If libunwind has no
+ * name for it, we synthesize something. These cases should be rare,
+ * and they're usually for uninteresting or niche registers, so it
+ * shouldn't really matter. We can see the value, and that's the main
+ * thing.
+ */
+ uint_t cols = 0;
+ for (uint_t regnum = 0; regnum <= UNW_TDEP_LAST_REG; regnum++) {
+ /*
+ * Get the value. Any error probably means the register
+ * doesn't exist, and we skip it.
+ */
+ if (unw_get_reg(&cp, regnum, &v) < 0)
+ continue;
+
+ /*
+ * Register name. If libunwind doesn't have a name for it,
+ * it will return "???". As a shortcut, we just treat '?'
+ * is an alternate end-of-string character.
+ */
+ const char *name = unw_regname(regnum);
+ for (n = 0; name[n] != '\0' && name[n] != '?'; n++) {}
+ if (n == 0) {
+ /*
+ * No valid name, so make one of the form "?xx", where
+ * "xx" is the two-char hex of libunwind's register
+ * number.
+ */
+ buf[0] = '?';
+ n = spl_bt_u64_to_hex_str(regnum, 2,
+ &buf[1], sizeof (buf)-1) + 1;
+ name = buf;
+ }
+
+ /*
+ * Two spaces of padding before each column, plus extra
+ * spaces to align register names shorter than three chars.
+ */
+ spl_bt_write_n(fd, " ", 5-MIN(n, 3));
+
+ /* Register name and column punctuation */
+ spl_bt_write_n(fd, name, n);
+ spl_bt_write(fd, ": 0x");
+
+ /*
+ * Convert register value (from unw_get_reg()) to hex. We're
+ * assuming that all registers are 64-bits wide, which is
+ * probably fine for any general-purpose registers on any
+ * machine currently in use. A more generic way would be to
+ * look at the width of unw_word_t, but that would also
+ * complicate the column code a bit. This is fine.
+ */
+ n = spl_bt_u64_to_hex_str(v, 16, buf, sizeof (buf));
+ spl_bt_write_n(fd, buf, n);
+
+ /* Every third column, emit a newline */
+ if (!(++cols % 3))
+ spl_bt_write(fd, "\n");
+ }
+
+ /* If we finished before the third column, emit a newline. */
+ if (cols % 3)
+ spl_bt_write(fd, "\n");
+
+ /* Now the main event, the backtrace. */
+ spl_bt_write(fd, "Call trace:\n");
+
+ /* Reset the cursor to the top again. */
+ unw_init_local(&cp, &uc);
+
+ do {
+ /*
+ * Getting the IP should never fail; libunwind handles it
+ * specially, because its used a lot internally. Still, no
+ * point being silly about it, as the last thing we want is
+ * our crash handler to crash. So if it ever does fail, we'll
+ * show an error line, but keep going to the next frame.
+ */
+ if (unw_get_reg(&cp, UNW_REG_IP, &v) < 0) {
+ spl_bt_write(fd, " [couldn't get IP register; "
+ "corrupt frame?]");
+ continue;
+ }
+
+ /* IP & punctuation */
+ n = spl_bt_u64_to_hex_str(v, 16, buf, sizeof (buf));
+ spl_bt_write(fd, " [0x");
+ spl_bt_write_n(fd, buf, n);
+ spl_bt_write(fd, "] ");
+
+ /*
+ * Function ("procedure") name for the current frame. `v`
+ * receives the offset from the named function to the IP, which
+ * we show as a "+offset" suffix.
+ *
+ * If libunwind can't determine the name, we just show "???"
+ * instead. We've already displayed the IP above; that will
+ * have to do.
+ *
+ * unw_get_proc_name() will return ENOMEM if the buffer is too
+ * small, instead truncating the name. So we treat that as a
+ * success and use whatever is in the buffer.
+ */
+ err = unw_get_proc_name(&cp, buf, sizeof (buf), &v);
+ if (err == 0 || err == -UNW_ENOMEM) {
+ for (n = 0; n < sizeof (buf) && buf[n] != '\0'; n++) {}
+ spl_bt_write_n(fd, buf, n);
+
+ /* Offset from proc name */
+ spl_bt_write(fd, "+0x");
+ n = spl_bt_u64_to_hex_str(v, 2, buf, sizeof (buf));
+ spl_bt_write_n(fd, buf, n);
+ } else
+ spl_bt_write(fd, "???");
+
#ifdef HAVE_LIBUNWIND_ELF
- ret = write(fd, " (in ", 5);
- unw_get_elf_filename(&cp, buf, sizeof (buf), &loc);
- for (n = 0; n < sizeof (buf) && buf[n] != '\0'; n++) {}
- ret = write(fd, buf, n);
- ret = write(fd, " +0x", 4);
- n = libspl_u64_to_hex_str(loc, 2, buf, sizeof (buf));
- ret = write(fd, buf, n);
- ret = write(fd, ")", 1);
+ /*
+ * Newer libunwind has unw_get_elf_filename(), which gets
+ * the name of the ELF object that the frame was executing in.
+ * Like `unw_get_proc_name()`, `v` recieves the offset within
+ * the file, and UNW_ENOMEM indicates that a truncate filename
+ * was left in the buffer.
+ */
+ err = unw_get_elf_filename(&cp, buf, sizeof (buf), &v);
+ if (err == 0 || err == -UNW_ENOMEM) {
+ for (n = 0; n < sizeof (buf) && buf[n] != '\0'; n++) {}
+ spl_bt_write(fd, " (in ");
+ spl_bt_write_n(fd, buf, n);
+
+ /* Offset within file */
+ spl_bt_write(fd, " +0x");
+ n = spl_bt_u64_to_hex_str(v, 2, buf, sizeof (buf));
+ spl_bt_write_n(fd, buf, n);
+ spl_bt_write(fd, ")");
+ }
#endif
- ret = write(fd, "\n", 1);
- }
+ spl_bt_write(fd, "\n");
+ } while (unw_step(&cp) > 0);
}
#elif defined(HAVE_BACKTRACE)
#include <execinfo.h>
@@ -103,15 +257,12 @@ libspl_backtrace(int fd)
void
libspl_backtrace(int fd)
{
- ssize_t ret __attribute__((unused));
void *btptrs[64];
size_t nptrs = backtrace(btptrs, 64);
- ret = write(fd, "Call trace:\n", 12);
+ spl_bt_write(fd, "Call trace:\n");
backtrace_symbols_fd(btptrs, nptrs, fd);
}
#else
-#include <sys/debug.h>
-
void
libspl_backtrace(int fd __maybe_unused)
{
diff --git a/sys/contrib/openzfs/lib/libzfs/Makefile.am b/sys/contrib/openzfs/lib/libzfs/Makefile.am
index a976faaf9913..5f8963dccd1a 100644
--- a/sys/contrib/openzfs/lib/libzfs/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzfs/Makefile.am
@@ -70,7 +70,7 @@ if BUILD_FREEBSD
libzfs_la_LIBADD += -lutil -lgeom
endif
-libzfs_la_LDFLAGS += -version-info 5:0:1
+libzfs_la_LDFLAGS += -version-info 6:0:0
pkgconfig_DATA += %D%/libzfs.pc
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
index 1a96460c2b84..ac9ae233c72d 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
@@ -1,4 +1,4 @@
-<abi-corpus version='2.0' architecture='elf-amd-x86_64' soname='libzfs.so.4'>
+<abi-corpus version='2.0' architecture='elf-amd-x86_64' soname='libzfs.so.6'>
<elf-needed>
<dependency name='libzfs_core.so.3'/>
<dependency name='libnvpair.so.3'/>
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
index 14410b153130..44f2c6f19dff 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
@@ -2796,7 +2796,7 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
}
/*
- * With EBUSY, five cases are possible:
+ * With EBUSY, six cases are possible:
*
* Current state Requested
* 1. Normal Scrub Running Normal Scrub or Error Scrub
diff --git a/sys/contrib/openzfs/lib/libzpool/Makefile.am b/sys/contrib/openzfs/lib/libzpool/Makefile.am
index 397959c679e9..404b737c204d 100644
--- a/sys/contrib/openzfs/lib/libzpool/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzpool/Makefile.am
@@ -212,7 +212,7 @@ if BUILD_FREEBSD
libzpool_la_LIBADD += -lgeom
endif
-libzpool_la_LDFLAGS += -version-info 5:0:0
+libzpool_la_LDFLAGS += -version-info 6:0:0
if TARGET_CPU_POWERPC
module/zfs/libzpool_la-vdev_raidz_math_powerpc_altivec.$(OBJEXT) : CFLAGS += -maltivec
diff --git a/sys/contrib/openzfs/lib/libzpool/zfs_debug.c b/sys/contrib/openzfs/lib/libzpool/zfs_debug.c
index df49a9a33fe8..82c7229932f0 100644
--- a/sys/contrib/openzfs/lib/libzpool/zfs_debug.c
+++ b/sys/contrib/openzfs/lib/libzpool/zfs_debug.c
@@ -35,9 +35,25 @@ typedef struct zfs_dbgmsg {
static list_t zfs_dbgmsgs;
static kmutex_t zfs_dbgmsgs_lock;
+static uint_t zfs_dbgmsg_size = 0;
+static uint_t zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
int zfs_dbgmsg_enable = B_TRUE;
+static void
+zfs_dbgmsg_purge(uint_t max_size)
+{
+ while (zfs_dbgmsg_size > max_size) {
+ zfs_dbgmsg_t *zdm = list_remove_head(&zfs_dbgmsgs);
+ if (zdm == NULL)
+ return;
+
+ uint_t size = zdm->zdm_size;
+ kmem_free(zdm, size);
+ zfs_dbgmsg_size -= size;
+ }
+}
+
void
zfs_dbgmsg_init(void)
{
@@ -74,6 +90,8 @@ __zfs_dbgmsg(char *buf)
mutex_enter(&zfs_dbgmsgs_lock);
list_insert_tail(&zfs_dbgmsgs, zdm);
+ zfs_dbgmsg_size += size;
+ zfs_dbgmsg_purge(zfs_dbgmsg_maxsize);
mutex_exit(&zfs_dbgmsgs_lock);
}
diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
index e19573d5ee14..c9f6ed0dece3 100644
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@@ -436,7 +436,7 @@ write.
It can also help to identify if reported checksum errors are tied to Direct I/O
writes.
Each verify error causes a
-.Sy dio_verify
+.Sy dio_verify_wr
zevent.
Direct Write I/O checkum verify errors can be seen with
.Nm zpool Cm status Fl d .
diff --git a/sys/contrib/openzfs/man/man5/vdev_id.conf.5 b/sys/contrib/openzfs/man/man5/vdev_id.conf.5
index a2d38add4ee0..aaf91825a6c6 100644
--- a/sys/contrib/openzfs/man/man5/vdev_id.conf.5
+++ b/sys/contrib/openzfs/man/man5/vdev_id.conf.5
@@ -92,6 +92,11 @@ before a generic mapping for the same slot.
In this way a custom mapping may be applied to a particular channel
and a default mapping applied to the others.
.
+.It Sy zpad_slot Ar digits
+Pad slot numbers with zeros to make them
+.Ar digits
+long, which can help to make disk names a consistent length and easier to sort.
+.
.It Sy multipath Sy yes Ns | Ns Sy no
Specifies whether
.Xr vdev_id 8
@@ -122,7 +127,7 @@ device is connected to.
The default is
.Sy 4 .
.
-.It Sy slot Sy bay Ns | Ns Sy phy Ns | Ns Sy port Ns | Ns Sy id Ns | Ns Sy lun Ns | Ns Sy ses
+.It Sy slot Sy bay Ns | Ns Sy phy Ns | Ns Sy port Ns | Ns Sy id Ns | Ns Sy lun Ns | Ns Sy bay_lun Ns | Ns Sy ses
Specifies from which element of a SAS identifier the slot number is
taken.
The default is
@@ -138,6 +143,9 @@ use the SAS port as the slot number.
use the scsi id as the slot number.
.It Sy lun
use the scsi lun as the slot number.
+.It Sy bay_lun
+read the slot number from the bay identifier and append the lun number.
+Useful for multi-lun multi-actuator hard drives.
.It Sy ses
use the SCSI Enclosure Services (SES) enclosure device slot number,
as reported by
diff --git a/sys/contrib/openzfs/man/man8/zfs-list.8 b/sys/contrib/openzfs/man/man8/zfs-list.8
index b49def08b72b..0fa7e9cc2613 100644
--- a/sys/contrib/openzfs/man/man8/zfs-list.8
+++ b/sys/contrib/openzfs/man/man8/zfs-list.8
@@ -71,7 +71,7 @@ The following fields are displayed:
Used for scripting mode.
Do not print headers and separate fields by a single tab instead of arbitrary
white space.
-.It Fl j Op Ar --json-int
+.It Fl j , -json Op Ar --json-int
Print the output in JSON format.
Specify
.Sy --json-int
diff --git a/sys/contrib/openzfs/man/man8/zfs-mount.8 b/sys/contrib/openzfs/man/man8/zfs-mount.8
index 6116fbaab77f..921e2499e31b 100644
--- a/sys/contrib/openzfs/man/man8/zfs-mount.8
+++ b/sys/contrib/openzfs/man/man8/zfs-mount.8
@@ -59,7 +59,7 @@
.Xc
Displays all ZFS file systems currently mounted.
.Bl -tag -width "-j"
-.It Fl j
+.It Fl j , -json
Displays all mounted file systems in JSON format.
.El
.It Xo
diff --git a/sys/contrib/openzfs/man/man8/zfs-program.8 b/sys/contrib/openzfs/man/man8/zfs-program.8
index 928620362be7..460cd2e11cf3 100644
--- a/sys/contrib/openzfs/man/man8/zfs-program.8
+++ b/sys/contrib/openzfs/man/man8/zfs-program.8
@@ -50,7 +50,7 @@ and any attempts to access or modify other pools will cause an error.
.
.Sh OPTIONS
.Bl -tag -width "-t"
-.It Fl j
+.It Fl j , -json
Display channel program output in JSON format.
When this flag is specified and standard output is empty -
channel program encountered an error.
diff --git a/sys/contrib/openzfs/man/man8/zfs-set.8 b/sys/contrib/openzfs/man/man8/zfs-set.8
index 204450d72ec9..f1718608b44b 100644
--- a/sys/contrib/openzfs/man/man8/zfs-set.8
+++ b/sys/contrib/openzfs/man/man8/zfs-set.8
@@ -130,7 +130,7 @@ The value
can be used to display all properties that apply to the given dataset's type
.Pq Sy filesystem , volume , snapshot , No or Sy bookmark .
.Bl -tag -width "-s source"
-.It Fl j Op Ar --json-int
+.It Fl j , -json Op Ar --json-int
Display the output in JSON format.
Specify
.Sy --json-int
diff --git a/sys/contrib/openzfs/man/man8/zpool-events.8 b/sys/contrib/openzfs/man/man8/zpool-events.8
index 234612baea8d..01f849845bd9 100644
--- a/sys/contrib/openzfs/man/man8/zpool-events.8
+++ b/sys/contrib/openzfs/man/man8/zpool-events.8
@@ -98,7 +98,10 @@ This can be an indicator of problems with the underlying storage device.
The number of delay events is ratelimited by the
.Sy zfs_slow_io_events_per_second
module parameter.
-.It Sy dio_verify
+.It Sy dio_verify_rd
+Issued when there was a checksum verify error after a Direct I/O read has been
+issued.
+.It Sy dio_verify_wr
Issued when there was a checksum verify error after a Direct I/O write has been
issued.
This event can only take place if the module parameter
diff --git a/sys/contrib/openzfs/man/man8/zpool-get.8 b/sys/contrib/openzfs/man/man8/zpool-get.8
index 5384906f17f2..1be83526d22d 100644
--- a/sys/contrib/openzfs/man/man8/zpool-get.8
+++ b/sys/contrib/openzfs/man/man8/zpool-get.8
@@ -98,7 +98,7 @@ See the
.Xr zpoolprops 7
manual page for more information on the available pool properties.
.Bl -tag -compact -offset Ds -width "-o field"
-.It Fl j Op Ar --json-int, --json-pool-key-guid
+.It Fl j , -json Op Ar --json-int, --json-pool-key-guid
Display the list of properties in JSON format.
Specify
.Sy --json-int
@@ -157,7 +157,7 @@ See the
.Xr vdevprops 7
manual page for more information on the available pool properties.
.Bl -tag -compact -offset Ds -width "-o field"
-.It Fl j Op Ar --json-int
+.It Fl j , -json Op Ar --json-int
Display the list of properties in JSON format.
Specify
.Sy --json-int
diff --git a/sys/contrib/openzfs/man/man8/zpool-list.8 b/sys/contrib/openzfs/man/man8/zpool-list.8
index b0ee659701d4..6d3478a68711 100644
--- a/sys/contrib/openzfs/man/man8/zpool-list.8
+++ b/sys/contrib/openzfs/man/man8/zpool-list.8
@@ -59,7 +59,7 @@ is specified, the command exits after
.Ar count
reports are printed.
.Bl -tag -width Ds
-.It Fl j Op Ar --json-int, --json-pool-key-guid
+.It Fl j , -json Op Ar --json-int, --json-pool-key-guid
Display the list of pools in JSON format.
Specify
.Sy --json-int
diff --git a/sys/contrib/openzfs/man/man8/zpool-status.8 b/sys/contrib/openzfs/man/man8/zpool-status.8
index 868fc4414dbb..b9b54185d050 100644
--- a/sys/contrib/openzfs/man/man8/zpool-status.8
+++ b/sys/contrib/openzfs/man/man8/zpool-status.8
@@ -70,7 +70,7 @@ See the
option of
.Nm zpool Cm iostat
for complete details.
-.It Fl j Op Ar --json-int, --json-flat-vdevs, --json-pool-key-guid
+.It Fl j , -json Op Ar --json-int, --json-flat-vdevs, --json-pool-key-guid
Display the status for ZFS pools in JSON format.
Specify
.Sy --json-int
@@ -82,14 +82,18 @@ Specify
.Sy --json-pool-key-guid
to set pool GUID as key for pool objects instead of pool names.
.It Fl d
-Display the number of Direct I/O write checksum verify errors that have occured
-on a top-level VDEV.
+Display the number of Direct I/O read/write checksum verify errors that have
+occured on a top-level VDEV.
See
.Sx zfs_vdev_direct_write_verify
in
.Xr zfs 4
for details about the conditions that can cause Direct I/O write checksum
verify failures to occur.
+Direct I/O reads checksum verify errors can also occur if the contents of the
+buffer are being manipulated after the I/O has been issued and is in flight.
+In the case of Direct I/O read checksum verify errors, the I/O will be reissued
+through the ARC.
.It Fl D
Display a histogram of deduplication statistics, showing the allocated
.Pq physically present on disk
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c
index f20dc5d8c325..ab4f49a4ec5a 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c
@@ -620,9 +620,16 @@ abd_borrow_buf_copy(abd_t *abd, size_t n)
/*
* Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will
- * no change the contents of the ABD and will ASSERT that you didn't modify
- * the buffer since it was borrowed. If you want any changes you made to buf to
- * be copied back to abd, use abd_return_buf_copy() instead.
+ * not change the contents of the ABD. If you want any changes you made to
+ * buf to be copied back to abd, use abd_return_buf_copy() instead. If the
+ * ABD is not constructed from user pages from Direct I/O then an ASSERT
+ * checks to make sure the contents of the buffer have not changed since it was
+ * borrowed. We can not ASSERT the contents of the buffer have not changed if
+ * it is composed of user pages. While Direct I/O write pages are placed under
+ * write protection and can not be changed, this is not the case for Direct I/O
+ * reads. The pages of a Direct I/O read could be manipulated at any time.
+ * Checksum verifications in the ZIO pipeline check for this issue and handle
+ * it by returning an error on checksum verification failure.
*/
void
abd_return_buf(abd_t *abd, void *buf, size_t n)
@@ -632,8 +639,34 @@ abd_return_buf(abd_t *abd, void *buf, size_t n)
#ifdef ZFS_DEBUG
(void) zfs_refcount_remove_many(&abd->abd_children, n, buf);
#endif
- if (abd_is_linear(abd)) {
+ if (abd_is_from_pages(abd)) {
+ if (!abd_is_linear_page(abd))
+ zio_buf_free(buf, n);
+ } else if (abd_is_linear(abd)) {
ASSERT3P(buf, ==, abd_to_buf(abd));
+ } else if (abd_is_gang(abd)) {
+#ifdef ZFS_DEBUG
+ /*
+ * We have to be careful with gang ABD's that we do not ASSERT
+ * for any ABD's that contain user pages from Direct I/O. See
+ * the comment above about Direct I/O read buffers possibly
+ * being manipulated. In order to handle this, we jsut iterate
+ * through the gang ABD and only verify ABD's that are not from
+ * user pages.
+ */
+ void *cmp_buf = buf;
+
+ for (abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain);
+ cabd != NULL;
+ cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
+ if (!abd_is_from_pages(cabd)) {
+ ASSERT0(abd_cmp_buf(cabd, cmp_buf,
+ cabd->abd_size));
+ }
+ cmp_buf = (char *)cmp_buf + cabd->abd_size;
+ }
+#endif
+ zio_buf_free(buf, n);
} else {
ASSERT0(abd_cmp_buf(abd, buf, n));
zio_buf_free(buf, n);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
index 03362b1ee860..303af48cf3af 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
@@ -1008,7 +1008,9 @@ abd_borrow_buf_copy(abd_t *abd, size_t n)
* borrowed. We can not ASSERT that the contents of the buffer have not changed
* if it is composed of user pages because the pages can not be placed under
* write protection and the user could have possibly changed the contents in
- * the pages at any time.
+ * the pages at any time. This is also an issue for Direct I/O reads. Checksum
+ * verifications in the ZIO pipeline check for this issue and handle it by
+ * returning an error on checksum verification failure.
*/
void
abd_return_buf(abd_t *abd, void *buf, size_t n)
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
index 8a42a075cd25..f60d6ae91e0b 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
@@ -767,9 +767,6 @@ zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
uint64_t id, pos = 0;
int error = 0;
- if (zfsvfs->z_vfs->vfs_mntpoint == NULL)
- return (SET_ERROR(ENOENT));
-
cookie = spl_fstrans_mark();
snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
@@ -786,8 +783,14 @@ zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
break;
}
- snprintf(full_path, path_len, "%s/.zfs/snapshot/%s",
- zfsvfs->z_vfs->vfs_mntpoint, snapname);
+ mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
+ if (zfsvfs->z_vfs->vfs_mntpoint != NULL) {
+ snprintf(full_path, path_len, "%s/.zfs/snapshot/%s",
+ zfsvfs->z_vfs->vfs_mntpoint, snapname);
+ } else
+ error = SET_ERROR(ENOENT);
+ mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
+
out:
kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
spl_fstrans_unmark(cookie);
@@ -1050,6 +1053,66 @@ exportfs_flush(void)
}
/*
+ * Returns the path in char format for given struct path. Uses
+ * d_path exported by kernel to convert struct path to char
+ * format. Returns the correct path for mountpoints and chroot
+ * environments.
+ *
+ * If chroot environment has directories that are mounted with
+ * --bind or --rbind flag, d_path returns the complete path inside
+ * chroot environment but does not return the absolute path, i.e.
+ * the path to chroot environment is missing.
+ */
+static int
+get_root_path(struct path *path, char *buff, int len)
+{
+ char *path_buffer, *path_ptr;
+ int error = 0;
+
+ path_get(path);
+ path_buffer = kmem_zalloc(len, KM_SLEEP);
+ path_ptr = d_path(path, path_buffer, len);
+ if (IS_ERR(path_ptr))
+ error = SET_ERROR(-PTR_ERR(path_ptr));
+ else
+ strcpy(buff, path_ptr);
+
+ kmem_free(path_buffer, len);
+ path_put(path);
+ return (error);
+}
+
+/*
+ * Returns if the current process root is chrooted or not. Linux
+ * kernel exposes the task_struct for current process and init.
+ * Since init process root points to actual root filesystem when
+ * Linux runtime is reached, we can compare the current process
+ * root with init process root to determine if root of the current
+ * process is different from init, which can reliably determine if
+ * current process is in chroot context or not.
+ */
+static int
+is_current_chrooted(void)
+{
+ struct task_struct *curr = current, *global = &init_task;
+ struct path cr_root, gl_root;
+
+ task_lock(curr);
+ get_fs_root(curr->fs, &cr_root);
+ task_unlock(curr);
+
+ task_lock(global);
+ get_fs_root(global->fs, &gl_root);
+ task_unlock(global);
+
+ int chrooted = !path_equal(&cr_root, &gl_root);
+ path_put(&gl_root);
+ path_put(&cr_root);
+
+ return (chrooted);
+}
+
+/*
* Attempt to unmount a snapshot by making a call to user space.
* There is no assurance that this can or will succeed, is just a
* best effort. In the case where it does fail, perhaps because
@@ -1123,14 +1186,50 @@ zfsctl_snapshot_mount(struct path *path, int flags)
if (error)
goto error;
+ if (is_current_chrooted() == 0) {
+ /*
+ * Current process is not in chroot context
+ */
+
+ char *m = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+ struct path mnt_path;
+ mnt_path.mnt = path->mnt;
+ mnt_path.dentry = path->mnt->mnt_root;
+
+ /*
+ * Get path to current mountpoint
+ */
+ error = get_root_path(&mnt_path, m, MAXPATHLEN);
+ if (error != 0) {
+ kmem_free(m, MAXPATHLEN);
+ goto error;
+ }
+ mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
+ if (zfsvfs->z_vfs->vfs_mntpoint != NULL) {
+ /*
+ * If current mnountpoint and vfs_mntpoint are not same,
+ * store current mountpoint in vfs_mntpoint.
+ */
+ if (strcmp(zfsvfs->z_vfs->vfs_mntpoint, m) != 0) {
+ kmem_strfree(zfsvfs->z_vfs->vfs_mntpoint);
+ zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m);
+ }
+ } else
+ zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m);
+ mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
+ kmem_free(m, MAXPATHLEN);
+ }
+
/*
* Construct a mount point path from sb of the ctldir inode and dirent
* name, instead of from d_path(), so that chroot'd process doesn't fail
* on mount.zfs(8).
*/
+ mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s",
zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "",
dname(dentry));
+ mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
snprintf(options, 7, "%s",
zfs_snapshot_no_setuid ? "nosuid" : "suid");
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
index de3e8c89cfdd..3c53a8a315c3 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@@ -115,7 +115,7 @@ zfsvfs_vfs_free(vfs_t *vfsp)
if (vfsp != NULL) {
if (vfsp->vfs_mntpoint != NULL)
kmem_strfree(vfsp->vfs_mntpoint);
-
+ mutex_destroy(&vfsp->vfs_mntpt_lock);
kmem_free(vfsp, sizeof (vfs_t));
}
}
@@ -197,10 +197,11 @@ zfsvfs_parse_option(char *option, int token, substring_t *args, vfs_t *vfsp)
vfsp->vfs_do_nbmand = B_TRUE;
break;
case TOKEN_MNTPOINT:
+ if (vfsp->vfs_mntpoint != NULL)
+ kmem_strfree(vfsp->vfs_mntpoint);
vfsp->vfs_mntpoint = match_strdup(&args[0]);
if (vfsp->vfs_mntpoint == NULL)
return (SET_ERROR(ENOMEM));
-
break;
default:
break;
@@ -219,6 +220,7 @@ zfsvfs_parse_options(char *mntopts, vfs_t **vfsp)
int error;
tmp_vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP);
+ mutex_init(&tmp_vfsp->vfs_mntpt_lock, NULL, MUTEX_DEFAULT, NULL);
if (mntopts != NULL) {
substring_t args[MAX_OPT_ARGS];
diff --git a/sys/contrib/openzfs/module/zcommon/zfs_valstr.c b/sys/contrib/openzfs/module/zcommon/zfs_valstr.c
index 622323bbbd5f..43bccea14a85 100644
--- a/sys/contrib/openzfs/module/zcommon/zfs_valstr.c
+++ b/sys/contrib/openzfs/module/zcommon/zfs_valstr.c
@@ -206,6 +206,7 @@ _VALSTR_BITFIELD_IMPL(zio_flag,
{ '.', "PR", "PROBE" },
{ '.', "TH", "TRYHARD" },
{ '.', "OP", "OPTIONAL" },
+ { '.', "RD", "DIO_READ" },
{ '.', "DQ", "DONT_QUEUE" },
{ '.', "DP", "DONT_PROPAGATE" },
{ '.', "BY", "IO_BYPASS" },
diff --git a/sys/contrib/openzfs/module/zfs/dmu_direct.c b/sys/contrib/openzfs/module/zfs/dmu_direct.c
index 91a7fd8df464..ed96e7515bc7 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_direct.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_direct.c
@@ -330,7 +330,7 @@ dmu_read_abd(dnode_t *dn, uint64_t offset, uint64_t size,
*/
zio_t *cio = zio_read(rio, spa, bp, mbuf, db->db.db_size,
dmu_read_abd_done, NULL, ZIO_PRIORITY_SYNC_READ,
- ZIO_FLAG_CANFAIL, &zb);
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_DIO_READ, &zb);
mutex_exit(&db->db_mtx);
zfs_racct_read(spa, db->db.db_size, 1, flags);
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
index 6a9ed891093b..2248f644bee7 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
@@ -2987,6 +2987,7 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
dsl_dataset_t *ds;
uint64_t val;
dmu_tx_t *tx = ddrsa->ddrsa_tx;
+ char *oldname, *newname;
int error;
error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val);
@@ -3011,8 +3012,14 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
VERIFY0(zap_add(dp->dp_meta_objset,
dsl_dataset_phys(hds)->ds_snapnames_zapobj,
ds->ds_snapname, 8, 1, &ds->ds_object, tx));
- zvol_rename_minors(dp->dp_spa, ddrsa->ddrsa_oldsnapname,
- ddrsa->ddrsa_newsnapname, B_TRUE);
+
+ oldname = kmem_asprintf("%s@%s", ddrsa->ddrsa_fsname,
+ ddrsa->ddrsa_oldsnapname);
+ newname = kmem_asprintf("%s@%s", ddrsa->ddrsa_fsname,
+ ddrsa->ddrsa_newsnapname);
+ zvol_rename_minors(dp->dp_spa, oldname, newname, B_TRUE);
+ kmem_strfree(oldname);
+ kmem_strfree(newname);
dsl_dataset_rele(ds, FTAG);
return (0);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_draid.c b/sys/contrib/openzfs/module/zfs/vdev_draid.c
index 13bb33cc6871..419c8ac5bb28 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_draid.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_draid.c
@@ -1026,7 +1026,7 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset,
ASSERT3U(vdc->vdc_nparity, >, 0);
- raidz_row_t *rr = vdev_raidz_row_alloc(groupwidth);
+ raidz_row_t *rr = vdev_raidz_row_alloc(groupwidth, zio);
rr->rr_bigcols = bc;
rr->rr_firstdatacol = vdc->vdc_nparity;
#ifdef ZFS_DEBUG
diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
index acb725696674..e3dba0257b21 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
@@ -34,6 +34,7 @@
#include <sys/zap.h>
#include <sys/abd.h>
#include <sys/zthr.h>
+#include <sys/fm/fs/zfs.h>
/*
* An indirect vdev corresponds to a vdev that has been removed. Since
@@ -1832,6 +1833,19 @@ vdev_indirect_io_done(zio_t *zio)
zio_bad_cksum_t zbc;
int ret = zio_checksum_error(zio, &zbc);
+ /*
+ * Any Direct I/O read that has a checksum error must be treated as
+ * suspicious as the contents of the buffer could be getting
+ * manipulated while the I/O is taking place. The checksum verify error
+ * will be reported to the top-level VDEV.
+ */
+ if (zio->io_flags & ZIO_FLAG_DIO_READ && ret == ECKSUM) {
+ zio->io_error = ret;
+ zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
+ zio_dio_chksum_verify_error_report(zio);
+ ret = 0;
+ }
+
if (ret == 0) {
zio_checksum_verified(zio);
return;
diff --git a/sys/contrib/openzfs/module/zfs/vdev_mirror.c b/sys/contrib/openzfs/module/zfs/vdev_mirror.c
index 102eacb03349..65a840bf9728 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_mirror.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_mirror.c
@@ -765,6 +765,27 @@ vdev_mirror_io_done(zio_t *zio)
ASSERT(zio->io_type == ZIO_TYPE_READ);
/*
+ * Any Direct I/O read that has a checksum error must be treated as
+ * suspicious as the contents of the buffer could be getting
+ * manipulated while the I/O is taking place. The checksum verify error
+ * will be reported to the top-level Mirror VDEV.
+ *
+ * There will be no attampt at reading any additional data copies. If
+ * the buffer is still being manipulated while attempting to read from
+ * another child, there exists a possibly that the checksum could be
+ * verified as valid. However, the buffer contents could again get
+ * manipulated after verifying the checksum. This would lead to bad data
+ * being written out during self healing.
+ */
+ if ((zio->io_flags & ZIO_FLAG_DIO_READ) &&
+ (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) {
+ zio_dio_chksum_verify_error_report(zio);
+ zio->io_error = vdev_mirror_worst_error(mm);
+ ASSERT3U(zio->io_error, ==, ECKSUM);
+ return;
+ }
+
+ /*
* If we don't have a good copy yet, keep trying other children.
*/
if (good_copies == 0 && (c = vdev_mirror_child_select(zio)) != -1) {
diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
index 15c8b8ca6016..5e330626be2b 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
@@ -433,7 +433,7 @@ const zio_vsd_ops_t vdev_raidz_vsd_ops = {
};
raidz_row_t *
-vdev_raidz_row_alloc(int cols)
+vdev_raidz_row_alloc(int cols, zio_t *zio)
{
raidz_row_t *rr =
kmem_zalloc(offsetof(raidz_row_t, rr_col[cols]), KM_SLEEP);
@@ -445,7 +445,17 @@ vdev_raidz_row_alloc(int cols)
raidz_col_t *rc = &rr->rr_col[c];
rc->rc_shadow_devidx = INT_MAX;
rc->rc_shadow_offset = UINT64_MAX;
- rc->rc_allow_repair = 1;
+ /*
+ * We can not allow self healing to take place for Direct I/O
+ * reads. There is nothing that stops the buffer contents from
+ * being manipulated while the I/O is in flight. It is possible
+ * that the checksum could be verified on the buffer and then
+ * the contents of that buffer are manipulated afterwards. This
+ * could lead to bad data being written out during self
+ * healing.
+ */
+ if (!(zio->io_flags & ZIO_FLAG_DIO_READ))
+ rc->rc_allow_repair = 1;
}
return (rr);
}
@@ -619,7 +629,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
}
ASSERT3U(acols, <=, scols);
- rr = vdev_raidz_row_alloc(scols);
+ rr = vdev_raidz_row_alloc(scols, zio);
rm->rm_row[0] = rr;
rr->rr_cols = acols;
rr->rr_bigcols = bc;
@@ -765,7 +775,7 @@ vdev_raidz_map_alloc_expanded(zio_t *zio,
for (uint64_t row = 0; row < rows; row++) {
boolean_t row_use_scratch = B_FALSE;
- raidz_row_t *rr = vdev_raidz_row_alloc(cols);
+ raidz_row_t *rr = vdev_raidz_row_alloc(cols, zio);
rm->rm_row[row] = rr;
/* The starting RAIDZ (parent) vdev sector of the row. */
@@ -2633,6 +2643,20 @@ raidz_checksum_verify(zio_t *zio)
raidz_map_t *rm = zio->io_vsd;
int ret = zio_checksum_error(zio, &zbc);
+ /*
+ * Any Direct I/O read that has a checksum error must be treated as
+ * suspicious as the contents of the buffer could be getting
+ * manipulated while the I/O is taking place. The checksum verify error
+ * will be reported to the top-level RAIDZ VDEV.
+ */
+ if (zio->io_flags & ZIO_FLAG_DIO_READ && ret == ECKSUM) {
+ zio->io_error = ret;
+ zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
+ zio_dio_chksum_verify_error_report(zio);
+ zio_checksum_verified(zio);
+ return (0);
+ }
+
if (ret != 0 && zbc.zbc_injected != 0)
rm->rm_ecksuminjected = 1;
@@ -2776,6 +2800,11 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
(rc->rc_error == 0 || rc->rc_size == 0)) {
continue;
}
+ /*
+ * We do not allow self healing for Direct I/O reads.
+ * See comment in vdev_raid_row_alloc().
+ */
+ ASSERT0(zio->io_flags & ZIO_FLAG_DIO_READ);
zfs_dbgmsg("zio=%px repairing c=%u devidx=%u "
"offset=%llx",
@@ -2979,6 +3008,8 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
/* Check for success */
if (raidz_checksum_verify(zio) == 0) {
+ if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
+ return (0);
/* Reconstruction succeeded - report errors */
for (int i = 0; i < rm->rm_nrows; i++) {
@@ -3379,7 +3410,6 @@ vdev_raidz_io_done_unrecoverable(zio_t *zio)
zio_bad_cksum_t zbc;
zbc.zbc_has_cksum = 0;
zbc.zbc_injected = rm->rm_ecksuminjected;
-
mutex_enter(&cvd->vdev_stat_lock);
cvd->vdev_stat.vs_checksum_errors++;
mutex_exit(&cvd->vdev_stat_lock);
@@ -3444,6 +3474,9 @@ vdev_raidz_io_done(zio_t *zio)
}
if (raidz_checksum_verify(zio) == 0) {
+ if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
+ goto done;
+
for (int i = 0; i < rm->rm_nrows; i++) {
raidz_row_t *rr = rm->rm_row[i];
vdev_raidz_io_done_verified(zio, rr);
@@ -3538,6 +3571,7 @@ vdev_raidz_io_done(zio_t *zio)
}
}
}
+done:
if (rm->rm_lr != NULL) {
zfs_rangelock_exit(rm->rm_lr);
rm->rm_lr = NULL;
diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
index a96f508ffac0..0799f17758bf 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
@@ -77,8 +77,15 @@ static int zfs_bclone_wait_dirty = 0;
* Enable Direct I/O. If this setting is 0, then all I/O requests will be
* directed through the ARC acting as though the dataset property direct was
* set to disabled.
+ *
+ * Disabled by default on FreeBSD until a potential range locking issue in
+ * zfs_getpages() can be resolved.
*/
+#ifdef __FreeBSD__
static int zfs_dio_enabled = 0;
+#else
+static int zfs_dio_enabled = 1;
+#endif
/*
@@ -303,6 +310,7 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
(void) cr;
int error = 0;
boolean_t frsync = B_FALSE;
+ boolean_t dio_checksum_failure = B_FALSE;
zfsvfs_t *zfsvfs = ZTOZSB(zp);
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
@@ -424,8 +432,26 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
if (error) {
/* convert checksum errors into IO errors */
- if (error == ECKSUM)
- error = SET_ERROR(EIO);
+ if (error == ECKSUM) {
+ /*
+ * If a Direct I/O read returned a checksum
+ * verify error, then it must be treated as
+ * suspicious. The contents of the buffer could
+ * have beeen manipulated while the I/O was in
+ * flight. In this case, the remainder of I/O
+ * request will just be reissued through the
+ * ARC.
+ */
+ if (uio->uio_extflg & UIO_DIRECT) {
+ dio_checksum_failure = B_TRUE;
+ uio->uio_extflg &= ~UIO_DIRECT;
+ n += dio_remaining_resid;
+ dio_remaining_resid = 0;
+ continue;
+ } else {
+ error = SET_ERROR(EIO);
+ }
+ }
#if defined(__linux__)
/*
@@ -472,6 +498,9 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
out:
zfs_rangelock_exit(lr);
+ if (dio_checksum_failure == B_TRUE)
+ uio->uio_extflg |= UIO_DIRECT;
+
/*
* Cleanup for Direct I/O if requested.
*/
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index b26f5e80abfb..a5daf73d59ba 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -804,11 +804,11 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait,
pio->io_reexecute |= zio->io_reexecute;
ASSERT3U(*countp, >, 0);
- if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) {
- ASSERT3U(*errorp, ==, EIO);
- ASSERT3U(pio->io_child_type, ==, ZIO_CHILD_LOGICAL);
+ /*
+ * Propogate the Direct I/O checksum verify failure to the parent.
+ */
+ if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
pio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
- }
(*countp)--;
@@ -1573,6 +1573,14 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
*/
pipeline |= ZIO_STAGE_CHECKSUM_VERIFY;
pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
+ /*
+ * We never allow the mirror VDEV to attempt reading from any
+ * additional data copies after the first Direct I/O checksum
+ * verify failure. This is to avoid bad data being written out
+ * through the mirror during self healing. See comment in
+ * vdev_mirror_io_done() for more details.
+ */
+ ASSERT0(pio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
} else if (type == ZIO_TYPE_WRITE &&
pio->io_prop.zp_direct_write == B_TRUE) {
/*
@@ -4555,18 +4563,18 @@ zio_vdev_io_assess(zio_t *zio)
}
/*
- * If a Direct I/O write checksum verify error has occurred then this
- * I/O should not attempt to be issued again. Instead the EIO will
- * be returned.
+ * If a Direct I/O operation has a checksum verify error then this I/O
+ * should not attempt to be issued again.
*/
if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) {
- ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_LOGICAL);
- ASSERT3U(zio->io_error, ==, EIO);
+ if (zio->io_type == ZIO_TYPE_WRITE) {
+ ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_LOGICAL);
+ ASSERT3U(zio->io_error, ==, EIO);
+ }
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
return (zio);
}
-
if (zio_injection_enabled && zio->io_error == 0)
zio->io_error = zio_handle_fault_injection(zio, EIO);
@@ -4864,16 +4872,40 @@ zio_checksum_verify(zio_t *zio)
ASSERT3U(zio->io_prop.zp_checksum, ==, ZIO_CHECKSUM_LABEL);
}
+ ASSERT0(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
+ IMPLY(zio->io_flags & ZIO_FLAG_DIO_READ,
+ !(zio->io_flags & ZIO_FLAG_SPECULATIVE));
+
if ((error = zio_checksum_error(zio, &info)) != 0) {
zio->io_error = error;
if (error == ECKSUM &&
!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
- mutex_enter(&zio->io_vd->vdev_stat_lock);
- zio->io_vd->vdev_stat.vs_checksum_errors++;
- mutex_exit(&zio->io_vd->vdev_stat_lock);
- (void) zfs_ereport_start_checksum(zio->io_spa,
- zio->io_vd, &zio->io_bookmark, zio,
- zio->io_offset, zio->io_size, &info);
+ if (zio->io_flags & ZIO_FLAG_DIO_READ) {
+ zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
+ zio_t *pio = zio_unique_parent(zio);
+ /*
+ * Any Direct I/O read that has a checksum
+ * error must be treated as suspicous as the
+ * contents of the buffer could be getting
+ * manipulated while the I/O is taking place.
+ *
+ * The checksum verify error will only be
+ * reported here for disk and file VDEV's and
+ * will be reported on those that the failure
+ * occurred on. Other types of VDEV's report the
+ * verify failure in their own code paths.
+ */
+ if (pio->io_child_type == ZIO_CHILD_LOGICAL) {
+ zio_dio_chksum_verify_error_report(zio);
+ }
+ } else {
+ mutex_enter(&zio->io_vd->vdev_stat_lock);
+ zio->io_vd->vdev_stat.vs_checksum_errors++;
+ mutex_exit(&zio->io_vd->vdev_stat_lock);
+ (void) zfs_ereport_start_checksum(zio->io_spa,
+ zio->io_vd, &zio->io_bookmark, zio,
+ zio->io_offset, zio->io_size, &info);
+ }
}
}
@@ -4899,22 +4931,8 @@ zio_dio_checksum_verify(zio_t *zio)
if ((error = zio_checksum_error(zio, NULL)) != 0) {
zio->io_error = error;
if (error == ECKSUM) {
- mutex_enter(&zio->io_vd->vdev_stat_lock);
- zio->io_vd->vdev_stat.vs_dio_verify_errors++;
- mutex_exit(&zio->io_vd->vdev_stat_lock);
- zio->io_error = SET_ERROR(EIO);
zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
-
- /*
- * The EIO error must be propagated up to the logical
- * parent ZIO in zio_notify_parent() so it can be
- * returned to dmu_write_abd().
- */
- zio->io_flags &= ~ZIO_FLAG_DONT_PROPAGATE;
-
- (void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY,
- zio->io_spa, zio->io_vd, &zio->io_bookmark,
- zio, 0);
+ zio_dio_chksum_verify_error_report(zio);
}
}
@@ -4933,6 +4951,39 @@ zio_checksum_verified(zio_t *zio)
}
/*
+ * Report Direct I/O checksum verify error and create ZED event.
+ */
+void
+zio_dio_chksum_verify_error_report(zio_t *zio)
+{
+ ASSERT(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
+
+ if (zio->io_child_type == ZIO_CHILD_LOGICAL)
+ return;
+
+ mutex_enter(&zio->io_vd->vdev_stat_lock);
+ zio->io_vd->vdev_stat.vs_dio_verify_errors++;
+ mutex_exit(&zio->io_vd->vdev_stat_lock);
+ if (zio->io_type == ZIO_TYPE_WRITE) {
+ /*
+ * Convert checksum error for writes into EIO.
+ */
+ zio->io_error = SET_ERROR(EIO);
+ /*
+ * Report dio_verify_wr ZED event.
+ */
+ (void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY_WR,
+ zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0);
+ } else {
+ /*
+ * Report dio_verify_rd ZED event.
+ */
+ (void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY_RD,
+ zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0);
+ }
+}
+
+/*
* ==========================================================================
* Error rank. Error are ranked in the order 0, ENXIO, ECKSUM, EIO, other.
* An error of 0 indicates success. ENXIO indicates whole-device failure,
@@ -5343,10 +5394,9 @@ zio_done(zio_t *zio)
if (zio->io_reexecute) {
/*
- * A Direct I/O write that has a checksum verify error should
- * not attempt to reexecute. Instead, EAGAIN should just be
- * propagated back up so the write can be attempt to be issued
- * through the ARC.
+ * A Direct I/O operation that has a checksum verify error
+ * should not attempt to reexecute. Instead, the error should
+ * just be propagated back.
*/
ASSERT(!(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR));
diff --git a/sys/contrib/openzfs/rpm/generic/zfs.spec.in b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
index c7a00c61f6bb..d0d850af2629 100644
--- a/sys/contrib/openzfs/rpm/generic/zfs.spec.in
+++ b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
@@ -99,10 +99,10 @@ License: @ZFS_META_LICENSE@
URL: https://github.com/openzfs/zfs
Source0: %{name}-%{version}.tar.gz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
-Requires: libzpool5%{?_isa} = %{version}-%{release}
+Requires: libzpool6%{?_isa} = %{version}-%{release}
Requires: libnvpair3%{?_isa} = %{version}-%{release}
Requires: libuutil3%{?_isa} = %{version}-%{release}
-Requires: libzfs5%{?_isa} = %{version}-%{release}
+Requires: libzfs6%{?_isa} = %{version}-%{release}
Requires: %{name}-kmod = %{version}
Provides: %{name}-kmod-common = %{version}-%{release}
Obsoletes: spl <= %{version}
@@ -150,21 +150,22 @@ Requires: sysstat
%description
This package contains the core ZFS command line utilities.
-%package -n libzpool5
+%package -n libzpool6
Summary: Native ZFS pool library for Linux
Group: System Environment/Kernel
Obsoletes: libzpool2 <= %{version}
Obsoletes: libzpool4 <= %{version}
+Obsoletes: libzpool5 <= %{version}
-%description -n libzpool5
+%description -n libzpool6
This package contains the zpool library, which provides support
for managing zpools
%if %{defined ldconfig_scriptlets}
-%ldconfig_scriptlets -n libzpool5
+%ldconfig_scriptlets -n libzpool6
%else
-%post -n libzpool5 -p /sbin/ldconfig
-%postun -n libzpool5 -p /sbin/ldconfig
+%post -n libzpool6 -p /sbin/ldconfig
+%postun -n libzpool6 -p /sbin/ldconfig
%endif
%package -n libnvpair3
@@ -211,37 +212,39 @@ This library provides a variety of compatibility functions for OpenZFS:
# The library version is encoded in the package name. When updating the
# version information it is important to add an obsoletes line below for
# the previous version of the package.
-%package -n libzfs5
+%package -n libzfs6
Summary: Native ZFS filesystem library for Linux
Group: System Environment/Kernel
Obsoletes: libzfs2 <= %{version}
Obsoletes: libzfs4 <= %{version}
+Obsoletes: libzfs5 <= %{version}
-%description -n libzfs5
+%description -n libzfs6
This package provides support for managing ZFS filesystems
%if %{defined ldconfig_scriptlets}
-%ldconfig_scriptlets -n libzfs5
+%ldconfig_scriptlets -n libzfs6
%else
-%post -n libzfs5 -p /sbin/ldconfig
-%postun -n libzfs5 -p /sbin/ldconfig
+%post -n libzfs6 -p /sbin/ldconfig
+%postun -n libzfs6 -p /sbin/ldconfig
%endif
-%package -n libzfs5-devel
+%package -n libzfs6-devel
Summary: Development headers
Group: System Environment/Kernel
-Requires: libzfs5%{?_isa} = %{version}-%{release}
-Requires: libzpool5%{?_isa} = %{version}-%{release}
+Requires: libzfs6%{?_isa} = %{version}-%{release}
+Requires: libzpool6%{?_isa} = %{version}-%{release}
Requires: libnvpair3%{?_isa} = %{version}-%{release}
Requires: libuutil3%{?_isa} = %{version}-%{release}
-Provides: libzpool5-devel = %{version}-%{release}
+Provides: libzpool6-devel = %{version}-%{release}
Provides: libnvpair3-devel = %{version}-%{release}
Provides: libuutil3-devel = %{version}-%{release}
Obsoletes: zfs-devel <= %{version}
Obsoletes: libzfs2-devel <= %{version}
Obsoletes: libzfs4-devel <= %{version}
+Obsoletes: libzfs5-devel <= %{version}
-%description -n libzfs5-devel
+%description -n libzfs6-devel
This package contains the header files needed for building additional
applications against the ZFS libraries.
@@ -290,7 +293,7 @@ Summary: Python %{python_version} wrapper for libzfs_core
Group: Development/Languages/Python
License: Apache-2.0
BuildArch: noarch
-Requires: libzfs5 = %{version}-%{release}
+Requires: libzfs6 = %{version}-%{release}
Requires: libnvpair3 = %{version}-%{release}
Requires: libffi
Requires: python%{__python_pkg_version}
@@ -534,7 +537,7 @@ systemctl --system daemon-reload >/dev/null || true
%config(noreplace) %{_bashcompletiondir}/zfs
%config(noreplace) %{_bashcompletiondir}/zpool
-%files -n libzpool5
+%files -n libzpool6
%{_libdir}/libzpool.so.*
%files -n libnvpair3
@@ -543,10 +546,10 @@ systemctl --system daemon-reload >/dev/null || true
%files -n libuutil3
%{_libdir}/libuutil.so.*
-%files -n libzfs5
+%files -n libzfs6
%{_libdir}/libzfs*.so.*
-%files -n libzfs5-devel
+%files -n libzfs6-devel
%{_pkgconfigdir}/libzfs.pc
%{_pkgconfigdir}/libzfsbootenv.pc
%{_pkgconfigdir}/libzfs_core.pc
diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
index f89a4b3e0aae..fc4adc42d00a 100644
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@@ -697,8 +697,8 @@ tags = ['functional', 'delegate']
tests = ['dio_aligned_block', 'dio_async_always', 'dio_async_fio_ioengines',
'dio_compression', 'dio_dedup', 'dio_encryption', 'dio_grow_block',
'dio_max_recordsize', 'dio_mixed', 'dio_mmap', 'dio_overwrites',
- 'dio_property', 'dio_random', 'dio_recordsize', 'dio_unaligned_block',
- 'dio_unaligned_filesize']
+ 'dio_property', 'dio_random', 'dio_read_verify', 'dio_recordsize',
+ 'dio_unaligned_block', 'dio_unaligned_filesize']
tags = ['functional', 'direct']
[tests/functional/exec]
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/manipulate_user_buffer.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/manipulate_user_buffer.c
index 714f42200557..173581094443 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/manipulate_user_buffer.c
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/manipulate_user_buffer.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2022 by Triad National Security, LLC.
+ * Copyright (c) 2024 by Triad National Security, LLC.
*/
#include <sys/types.h>
@@ -39,51 +39,59 @@
#define MIN(a, b) ((a) < (b)) ? (a) : (b)
#endif
-static char *outputfile = NULL;
+static char *filename = NULL;
static int blocksize = 131072; /* 128K */
-static int wr_err_expected = 0;
+static int err_expected = 0;
+static int read_op = 0;
+static int write_op = 0;
static int numblocks = 100;
static char *execname = NULL;
static int print_usage = 0;
static int randompattern = 0;
-static int ofd;
+static int fd;
char *buf = NULL;
typedef struct {
- int entire_file_written;
+ int entire_file_completed;
} pthread_args_t;
static void
usage(void)
{
(void) fprintf(stderr,
- "usage %s -o outputfile [-b blocksize] [-e wr_error_expected]\n"
- " [-n numblocks] [-p randpattern] [-h help]\n"
+ "usage %s -f filename [-b blocksize] [-e wr_error_expected]\n"
+ " [-n numblocks] [-p randompattern] -r read_op \n"
+ " -w write_op [-h help]\n"
"\n"
"Testing whether checksum verify works correctly for O_DIRECT.\n"
"when manipulating the contents of a userspace buffer.\n"
"\n"
- " outputfile: File to write to.\n"
- " blocksize: Size of each block to write (must be at \n"
- " least >= 512).\n"
- " wr_err_expected: Whether pwrite() is expected to return EIO\n"
- " while manipulating the contents of the\n"
- " buffer.\n"
- " numblocks: Total number of blocksized blocks to\n"
- " write.\n"
- " randpattern: Fill data buffer with random data. Default\n"
- " behavior is to fill the buffer with the \n"
- " known data pattern (0xdeadbeef).\n"
+ " filename: File to read or write to.\n"
+ " blocksize: Size of each block to write (must be at \n"
+ " least >= 512).\n"
+ " err_expected: Whether write() is expected to return EIO\n"
+ " while manipulating the contents of the\n"
+ " buffer.\n"
+ " numblocks: Total number of blocksized blocks to\n"
+ " write.\n"
+ " read_op: Perform reads to the filename file while\n"
+ " while manipulating the buffer contents\n"
+ " write_op: Perform writes to the filename file while\n"
+ " manipulating the buffer contents\n"
+ " randompattern: Fill data buffer with random data for \n"
+ " writes. Default behavior is to fill the \n"
+ " buffer with known data pattern (0xdeadbeef)\n"
" help: Print usage information and exit.\n"
"\n"
" Required parameters:\n"
- " outputfile\n"
+ " filename\n"
+ " read_op or write_op\n"
"\n"
" Default Values:\n"
" blocksize -> 131072\n"
" wr_err_expexted -> false\n"
" numblocks -> 100\n"
- " randpattern -> false\n",
+ " randompattern -> false\n",
execname);
(void) exit(1);
}
@@ -97,16 +105,21 @@ parse_options(int argc, char *argv[])
extern int optind, optopt;
execname = argv[0];
- while ((c = getopt(argc, argv, "b:ehn:o:p")) != -1) {
+ while ((c = getopt(argc, argv, "b:ef:hn:rw")) != -1) {
switch (c) {
case 'b':
blocksize = atoi(optarg);
break;
case 'e':
- wr_err_expected = 1;
+ err_expected = 1;
break;
+ case 'f':
+ filename = optarg;
+ break;
+
+
case 'h':
print_usage = 1;
break;
@@ -115,12 +128,12 @@ parse_options(int argc, char *argv[])
numblocks = atoi(optarg);
break;
- case 'o':
- outputfile = optarg;
+ case 'r':
+ read_op = 1;
break;
- case 'p':
- randompattern = 1;
+ case 'w':
+ write_op = 1;
break;
case ':':
@@ -141,7 +154,8 @@ parse_options(int argc, char *argv[])
if (errflag || print_usage == 1)
(void) usage();
- if (blocksize < 512 || outputfile == NULL || numblocks <= 0) {
+ if (blocksize < 512 || filename == NULL || numblocks <= 0 ||
+ (read_op == 0 && write_op == 0)) {
(void) fprintf(stderr,
"Required paramater(s) missing or invalid.\n");
(void) usage();
@@ -160,10 +174,10 @@ write_thread(void *arg)
ssize_t wrote = 0;
pthread_args_t *args = (pthread_args_t *)arg;
- while (!args->entire_file_written) {
- wrote = pwrite(ofd, buf, blocksize, offset);
+ while (!args->entire_file_completed) {
+ wrote = pwrite(fd, buf, blocksize, offset);
if (wrote != blocksize) {
- if (wr_err_expected)
+ if (err_expected)
assert(errno == EIO);
else
exit(2);
@@ -173,7 +187,35 @@ write_thread(void *arg)
left -= blocksize;
if (left == 0)
- args->entire_file_written = 1;
+ args->entire_file_completed = 1;
+ }
+
+ pthread_exit(NULL);
+}
+
+/*
+ * Read blocksize * numblocks to the file using O_DIRECT.
+ */
+static void *
+read_thread(void *arg)
+{
+ size_t offset = 0;
+ int total_data = blocksize * numblocks;
+ int left = total_data;
+ ssize_t read = 0;
+ pthread_args_t *args = (pthread_args_t *)arg;
+
+ while (!args->entire_file_completed) {
+ read = pread(fd, buf, blocksize, offset);
+ if (read != blocksize) {
+ exit(2);
+ }
+
+ offset = ((offset + blocksize) % total_data);
+ left -= blocksize;
+
+ if (left == 0)
+ args->entire_file_completed = 1;
}
pthread_exit(NULL);
@@ -189,7 +231,7 @@ manipulate_buf_thread(void *arg)
char rand_char;
pthread_args_t *args = (pthread_args_t *)arg;
- while (!args->entire_file_written) {
+ while (!args->entire_file_completed) {
rand_offset = (rand() % blocksize);
rand_char = (rand() % (126 - 33) + 33);
buf[rand_offset] = rand_char;
@@ -202,9 +244,9 @@ int
main(int argc, char *argv[])
{
const char *datapattern = "0xdeadbeef";
- int ofd_flags = O_WRONLY | O_CREAT | O_DIRECT;
+ int fd_flags = O_DIRECT;
mode_t mode = S_IRUSR | S_IWUSR;
- pthread_t write_thr;
+ pthread_t io_thr;
pthread_t manipul_thr;
int left = blocksize;
int offset = 0;
@@ -213,9 +255,15 @@ main(int argc, char *argv[])
parse_options(argc, argv);
- ofd = open(outputfile, ofd_flags, mode);
- if (ofd == -1) {
- (void) fprintf(stderr, "%s, %s\n", execname, outputfile);
+ if (write_op) {
+ fd_flags |= (O_WRONLY | O_CREAT);
+ } else {
+ fd_flags |= O_RDONLY;
+ }
+
+ fd = open(filename, fd_flags, mode);
+ if (fd == -1) {
+ (void) fprintf(stderr, "%s, %s\n", execname, filename);
perror("open");
exit(2);
}
@@ -228,24 +276,22 @@ main(int argc, char *argv[])
exit(2);
}
- if (!randompattern) {
- /* Putting known data pattern in buffer */
- while (left) {
- size_t amt = MIN(strlen(datapattern), left);
- memcpy(&buf[offset], datapattern, amt);
- offset += amt;
- left -= amt;
+ if (write_op) {
+ if (!randompattern) {
+ /* Putting known data pattern in buffer */
+ while (left) {
+ size_t amt = MIN(strlen(datapattern), left);
+ memcpy(&buf[offset], datapattern, amt);
+ offset += amt;
+ left -= amt;
+ }
+ } else {
+ /* Putting random data in buffer */
+ for (int i = 0; i < blocksize; i++)
+ buf[i] = rand();
}
- } else {
- /* Putting random data in buffer */
- for (int i = 0; i < blocksize; i++)
- buf[i] = rand();
}
- /*
- * Writing using O_DIRECT while manipulating the buffer contents until
- * the entire file is written.
- */
if ((rc = pthread_create(&manipul_thr, NULL, manipulate_buf_thread,
&args))) {
fprintf(stderr, "error: pthreads_create, manipul_thr, "
@@ -253,18 +299,34 @@ main(int argc, char *argv[])
exit(2);
}
- if ((rc = pthread_create(&write_thr, NULL, write_thread, &args))) {
- fprintf(stderr, "error: pthreads_create, write_thr, "
- "rc: %d\n", rc);
- exit(2);
+ if (write_op) {
+ /*
+ * Writing using O_DIRECT while manipulating the buffer contents
+ * until the entire file is written.
+ */
+ if ((rc = pthread_create(&io_thr, NULL, write_thread, &args))) {
+ fprintf(stderr, "error: pthreads_create, io_thr, "
+ "rc: %d\n", rc);
+ exit(2);
+ }
+ } else {
+ /*
+ * Reading using O_DIRECT while manipulating the buffer contents
+ * until the entire file is read.
+ */
+ if ((rc = pthread_create(&io_thr, NULL, read_thread, &args))) {
+ fprintf(stderr, "error: pthreads_create, io_thr, "
+ "rc: %d\n", rc);
+ exit(2);
+ }
}
- pthread_join(write_thr, NULL);
+ pthread_join(io_thr, NULL);
pthread_join(manipul_thr, NULL);
- assert(args.entire_file_written == 1);
+ assert(args.entire_file_completed == 1);
- (void) close(ofd);
+ (void) close(fd);
free(buf);
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
index 206ee8ac1542..bc767b9f624f 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@@ -1477,6 +1477,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/direct/dio_overwrites.ksh \
functional/direct/dio_property.ksh \
functional/direct/dio_random.ksh \
+ functional/direct/dio_read_verify.ksh \
functional/direct/dio_recordsize.ksh \
functional/direct/dio_unaligned_block.ksh \
functional/direct/dio_unaligned_filesize.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/json/json_sanity.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/json/json_sanity.ksh
index e598dd57181e..d092a3b0e828 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/json/json_sanity.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/json/json_sanity.ksh
@@ -30,28 +30,39 @@
# STRATEGY:
# 1. Run different zfs/zpool -j commands and check for valid JSON
+#
+# -j and --json mean the same thing. Each command will be run twice, replacing
+# JSONFLAG with the flag under test.
list=(
- "zpool status -j -g --json-int --json-flat-vdevs --json-pool-key-guid"
- "zpool status -p -j -g --json-int --json-flat-vdevs --json-pool-key-guid"
- "zpool status -j -c upath"
- "zpool status -j"
- "zpool status -j testpool1"
- "zpool list -j"
- "zpool list -j -g"
- "zpool list -j -o fragmentation"
- "zpool get -j size"
- "zpool get -j all"
- "zpool version -j"
- "zfs list -j"
- "zfs list -j testpool1"
- "zfs get -j all"
- "zfs get -j available"
- "zfs mount -j"
- "zfs version -j"
+ "zpool status JSONFLAG -g --json-int --json-flat-vdevs --json-pool-key-guid"
+ "zpool status -p JSONFLAG -g --json-int --json-flat-vdevs --json-pool-key-guid"
+ "zpool status JSONFLAG -c upath"
+ "zpool status JSONFLAG"
+ "zpool status JSONFLAG testpool1"
+ "zpool list JSONFLAG"
+ "zpool list JSONFLAG -g"
+ "zpool list JSONFLAG -o fragmentation"
+ "zpool get JSONFLAG size"
+ "zpool get JSONFLAG all"
+ "zpool version JSONFLAG"
+ "zfs list JSONFLAG"
+ "zfs list JSONFLAG testpool1"
+ "zfs get JSONFLAG all"
+ "zfs get JSONFLAG available"
+ "zfs mount JSONFLAG"
+ "zfs version JSONFLAG"
)
-for cmd in "${list[@]}" ; do
- log_must eval "$cmd | jq > /dev/null"
-done
+function run_json_tests
+{
+ typeset flag=$1
+ for cmd in "${list[@]}" ; do
+ cmd=${cmd//JSONFLAG/$flag}
+ log_must eval "$cmd | jq > /dev/null"
+ done
+}
+
+log_must run_json_tests -j
+log_must run_json_tests --json
log_pass "zpool and zfs commands outputted valid JSON"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh
index 71b2437a37ec..57a7412e37cc 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh
@@ -113,7 +113,7 @@ wait
parallel_time=$SECONDS
log_note "asyncronously imported 4 pools in $parallel_time seconds"
-log_must test $parallel_time -lt $(($sequential_time / 3))
+log_must test $parallel_time -lt $(($sequential_time / 2))
#
# export pools with import delay injectors
@@ -132,6 +132,6 @@ log_must zpool import -a -d $DEVICE_DIR -f
parallel_time=$SECONDS
log_note "asyncronously imported 4 pools in $parallel_time seconds"
-log_must test $parallel_time -lt $(($sequential_time / 3))
+log_must test $parallel_time -lt $(($sequential_time / 2))
log_pass "Pool imports occur in parallel"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_quota.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_quota.ksh
index 326152b510a9..b1657648b5a1 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_quota.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_quota.ksh
@@ -221,7 +221,7 @@ function ddt_dedup_vdev_limit
# For here, we just set the entry count a little higher than what we
# expect to allow for some instability.
#
- log_must test $(ddt_entries) -le 600000
+ log_must test $(ddt_entries) -le 650000
do_clean
}
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio.kshlib
index 3a70cf293967..5b3f893e1ce1 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio.kshlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio.kshlib
@@ -84,8 +84,9 @@ function get_zpool_status_chksum_verify_failures # pool_name vdev_type
function get_zed_dio_verify_events # pool
{
typeset pool=$1
+ typeset op=$2
- val=$(zpool events $pool | grep -c dio_verify)
+ val=$(zpool events $pool | grep -c "dio_verify_${op}")
echo "$val"
}
@@ -96,11 +97,12 @@ function get_zed_dio_verify_events # pool
# zpool events
# After getting that counts will clear the out the ZPool errors and events
#
-function check_dio_write_chksum_verify_failures # pool vdev_type expect_errors
+function check_dio_chksum_verify_failures # pool vdev_type op expect_errors
{
typeset pool=$1
typeset vdev_type=$2
typeset expect_errors=$3
+ typeset op=$4
typeset note_str="expecting none"
if [[ $expect_errors -ne 0 ]]; then
@@ -108,10 +110,10 @@ function check_dio_write_chksum_verify_failures # pool vdev_type expect_errors
fi
log_note "Checking for Direct I/O write checksum verify errors \
- $note_str on ZPool: $pool"
+ $note_str on ZPool: $pool with $vdev_type"
status_failures=$(get_zpool_status_chksum_verify_failures $pool $vdev_type)
- zed_dio_verify_events=$(get_zed_dio_verify_events $pool)
+ zed_dio_verify_events=$(get_zed_dio_verify_events $pool $op)
if [[ $expect_errors -ne 0 ]]; then
if [[ $status_failures -eq 0 ||
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_read_verify.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_read_verify.ksh
new file mode 100755
index 000000000000..456d429b1d99
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_read_verify.ksh
@@ -0,0 +1,107 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2024 by Triad National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/direct/dio.cfg
+. $STF_SUITE/tests/functional/direct/dio.kshlib
+
+#
+# DESCRIPTION:
+# Verify checksum verify works for Direct I/O reads.
+#
+# STRATEGY:
+# 1. Create a zpool from each vdev type.
+# 2. Start a Direct I/O read workload while manipulating the user buffer
+# contents.
+# 3. Verify there are Direct I/O read verify failures using
+# zpool status -d and checking for zevents. We also make sure there
+# are reported no data errors.
+#
+
+verify_runnable "global"
+
+log_assert "Verify checksum verify works for Direct I/O reads."
+
+log_onexit dio_cleanup
+
+NUMBLOCKS=300
+BS=$((128 * 1024)) # 128k
+
+log_must truncate -s $MINVDEVSIZE $DIO_VDEVS
+
+# We will verify that there are no checksum errors for every Direct I/O read
+# while manipulating the buffer contents while the I/O is still in flight and
+# also that Direct I/O checksum verify failures and dio_verify_rd zevents are
+# reported.
+
+
+for type in "" "mirror" "raidz" "draid"; do
+ typeset vdev_type=$type
+ if [[ "${vdev_type}" == "" ]]; then
+ vdev_type="stripe"
+ fi
+
+ log_note "Verifying every Direct I/O read verify with VDEV type \
+ ${vdev_type}"
+
+ create_pool $TESTPOOL1 $type $DIO_VDEVS
+ log_must eval "zfs create -o recordsize=128k -o compression=off \
+ $TESTPOOL1/$TESTFS1"
+
+ mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS1)
+ prev_dio_rd=$(get_iostats_stat $TESTPOOL1 direct_read_count)
+ prev_arc_rd=$(get_iostats_stat $TESTPOOL1 arc_read_count)
+
+ # Create the file before trying to manipulate the contents
+ log_must stride_dd -o "$mntpnt/direct-write.iso" -i /dev/urandom \
+ -b $BS -c $NUMBLOCKS -D
+ # Manipulate the buffer contents will reading the file with Direct I/O
+ log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" \
+ -n $NUMBLOCKS -b $BS -r
+
+ # Getting new Direct I/O and ARC Write counts.
+ curr_dio_rd=$(get_iostats_stat $TESTPOOL1 direct_read_count)
+ curr_arc_rd=$(get_iostats_stat $TESTPOOL1 arc_read_count)
+ total_dio_rd=$((curr_dio_rd - prev_dio_rd))
+ total_arc_rd=$((curr_arc_rd - prev_arc_rd))
+
+ log_note "Making sure there are no checksum errors with the ZPool"
+ log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+ log_note "Making sure we have Direct I/O and ARC reads logged"
+ if [[ $total_dio_rd -lt 1 ]]; then
+ log_fail "No Direct I/O reads $total_dio_rd"
+ fi
+ if [[ $total_arc_rd -lt 1 ]]; then
+ log_fail "No ARC reads $total_arc_rd"
+ fi
+
+ log_note "Making sure we have Direct I/O write checksum verifies with ZPool"
+ check_dio_chksum_verify_failures "$TESTPOOL1" "$vdev_type" 1 "rd"
+ destroy_pool $TESTPOOL1
+done
+
+log_pass "Verified checksum verify works for Direct I/O reads."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh
index efc9ee639184..ccdabc678a68 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh
@@ -46,7 +46,7 @@ verify_runnable "global"
function cleanup
{
log_must rm -f "$mntpnt/direct-write.iso"
- check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0
+ check_dio_chksum_verify_failures $TESTPOOL "raidz" 0 "wr"
}
log_assert "Verify stable pages work for Direct I/O writes."
@@ -76,8 +76,8 @@ do
# Manipulate the user's buffer while running O_DIRECT write
# workload with the buffer.
- log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \
- -n $NUMBLOCKS -b $BS
+ log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" \
+ -n $NUMBLOCKS -b $BS -w
# Reading back the contents of the file
log_must stride_dd -i $mntpnt/direct-write.iso -o /dev/null \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh
index 536459a35e6c..4eb9efe95ef1 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh
@@ -91,8 +91,8 @@ log_must set_tunable32 VDEV_DIRECT_WR_VERIFY 0
log_note "Verifying no panics for Direct I/O writes with compression"
log_must zfs set compression=on $TESTPOOL/$TESTFS
prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count)
-log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" -n $NUMBLOCKS \
- -b $BS
+log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" -n $NUMBLOCKS \
+ -b $BS -w
curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count)
total_dio_wr=$((curr_dio_wr - prev_dio_wr))
@@ -116,8 +116,8 @@ for i in $(seq 1 $ITERATIONS); do
$i of $ITERATIONS with zfs_vdev_direct_write_verify=0"
prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count)
- log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \
- -n $NUMBLOCKS -b $BS
+ log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" \
+ -n $NUMBLOCKS -b $BS -w
# Reading file back to verify checksum errors
filesize=$(get_file_size "$mntpnt/direct-write.iso")
@@ -144,7 +144,7 @@ for i in $(seq 1 $ITERATIONS); do
fi
log_note "Making sure we have no Direct I/O write checksum verifies \
with ZPool"
- check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0
+ check_dio_chksum_verify_failures $TESTPOOL "raidz" 0 "wr"
log_must rm -f "$mntpnt/direct-write.iso"
done
@@ -166,8 +166,8 @@ for i in $(seq 1 $ITERATIONS); do
$ITERATIONS with zfs_vdev_direct_write_verify=1"
prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count)
- log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \
- -n $NUMBLOCKS -b $BS -e
+ log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" \
+ -n $NUMBLOCKS -b $BS -e -w
# Reading file back to verify there no are checksum errors
filesize=$(get_file_size "$mntpnt/direct-write.iso")
@@ -175,7 +175,7 @@ for i in $(seq 1 $ITERATIONS); do
log_must stride_dd -i "$mntpnt/direct-write.iso" -o /dev/null -b $BS \
-c $num_blocks
- # Getting new Direct I/O and ARC Write counts.
+ # Getting new Direct I/O write counts.
curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count)
total_dio_wr=$((curr_dio_wr - prev_dio_wr))
@@ -188,7 +188,7 @@ for i in $(seq 1 $ITERATIONS); do
fi
log_note "Making sure we have Direct I/O write checksum verifies with ZPool"
- check_dio_write_chksum_verify_failures "$TESTPOOL" "raidz" 1
+ check_dio_chksum_verify_failures "$TESTPOOL" "raidz" 1 "wr"
done
log_must rm -f "$mntpnt/direct-write.iso"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh
index 629870adf9ea..101fa410bdb6 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh
@@ -48,6 +48,8 @@ function cleanup
log_must set_tunable32 RESILVER_MIN_TIME_MS $ORIG_RESILVER_MIN_TIME
log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
$ORIG_SCAN_SUSPEND_PROGRESS
+ log_must set_tunable32 RESILVER_DEFER_PERCENT \
+ $ORIG_RESILVER_DEFER_PERCENT
log_must set_tunable32 ZEVENT_LEN_MAX $ORIG_ZFS_ZEVENT_LEN_MAX
log_must zinject -c all
destroy_pool $TESTPOOL1
@@ -90,6 +92,7 @@ log_assert "Check for unnecessary resilver restarts"
ORIG_RESILVER_MIN_TIME=$(get_tunable RESILVER_MIN_TIME_MS)
ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
+ORIG_RESILVER_DEFER_PERCENT=$(get_tunable RESILVER_DEFER_PERCENT)
ORIG_ZFS_ZEVENT_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX)
set -A RESTARTS -- '1' '2' '2' '2'
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_snapdev.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_snapdev.ksh
index 1fc2d2780b1c..af780b628ce4 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_snapdev.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_snapdev.ksh
@@ -117,5 +117,18 @@ log_must zfs set snapdev=visible $TESTPOOL
verify_inherited 'snapdev' 'hidden' $SUBZVOL $VOLFS
blockdev_missing $SUBSNAPDEV
blockdev_exists $SNAPDEV
+log_must zfs destroy $SNAP
+
+# 4. Verify "rename" is correctly reflected when "snapdev=visible"
+# 4.1 First create a snapshot and verify the device is present
+log_must zfs snapshot $SNAP
+log_must zfs set snapdev=visible $ZVOL
+blockdev_exists $SNAPDEV
+# 4.2 rename the snapshot and verify the devices are updated
+log_must zfs rename $SNAP $SNAP-new
+blockdev_missing $SNAPDEV
+blockdev_exists $SNAPDEV-new
+# 4.3 cleanup
+log_must zfs destroy $SNAP-new
log_pass "ZFS volume property 'snapdev' works as expected"
diff --git a/sys/contrib/openzfs/udev/vdev_id b/sys/contrib/openzfs/udev/vdev_id
index 7b5aab141997..5897fea3ed66 100755
--- a/sys/contrib/openzfs/udev/vdev_id
+++ b/sys/contrib/openzfs/udev/vdev_id
@@ -124,6 +124,7 @@ TOPOLOGY=
BAY=
ENCL_ID=""
UNIQ_ENCL_ID=""
+ZPAD=1
usage() {
cat << EOF
@@ -154,7 +155,7 @@ map_slot() {
if [ -z "$MAPPED_SLOT" ] ; then
MAPPED_SLOT=$LINUX_SLOT
fi
- printf "%d" "${MAPPED_SLOT}"
+ printf "%0${ZPAD}d" "${MAPPED_SLOT}"
}
map_channel() {
@@ -430,6 +431,15 @@ sas_handler() {
d=$(eval echo '$'{$i})
SLOT=$(echo "$d" | sed -e 's/^.*://')
;;
+ "bay_lun")
+ # Like 'bay' but with the LUN number appened. Added for SAS
+ # multi-actuator HDDs, where one physical drive has multiple
+ # LUNs, thus multiple logical drives share the same bay number
+ i=$((i + 2))
+ d=$(eval echo '$'{$i})
+ LUN="-lun$(echo "$d" | sed -e 's/^.*://')"
+ SLOT=$(cat "$end_device_dir/bay_identifier" 2>/dev/null)
+ ;;
"ses")
# look for this SAS path in all SCSI Enclosure Services
# (SES) enclosures
@@ -460,7 +470,7 @@ sas_handler() {
if [ -z "$CHAN" ] ; then
return
fi
- echo "${CHAN}"-"${JBOD}"-"${SLOT}${PART}"
+ echo "${CHAN}"-"${JBOD}"-"${SLOT}${LUN}${PART}"
else
CHAN=$(map_channel "$PCI_ID" "$PORT")
SLOT=$(map_slot "$SLOT" "$CHAN")
@@ -468,7 +478,7 @@ sas_handler() {
if [ -z "$CHAN" ] ; then
return
fi
- echo "${CHAN}${SLOT}${PART}"
+ echo "${CHAN}${SLOT}${LUN}${PART}"
fi
}
@@ -748,6 +758,8 @@ if [ -z "$BAY" ] ; then
BAY=$(awk '($1 == "slot") {print $2; exit}' "$CONFIG")
fi
+ZPAD=$(awk '($1 == "zpad_slot") {print $2; exit}' "$CONFIG")
+
TOPOLOGY=${TOPOLOGY:-sas_direct}
# Should we create /dev/by-enclosure symlinks?
diff --git a/sys/kern/subr_pctrie.c b/sys/kern/subr_pctrie.c
index 50216287845f..ea1c1cf881d2 100644
--- a/sys/kern/subr_pctrie.c
+++ b/sys/kern/subr_pctrie.c
@@ -958,7 +958,7 @@ pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index)
/*
* If no such node was found, and instead this path leads only to nodes
- * > index, back up to find a subtrie with the least value > index.
+ * > index, back up to find a subtrie with the greatest value < index.
*/
if (pctrie_isleaf(node) ?
(m = pctrie_toval(node)) == NULL || *m > index :
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index 5dc43281242f..91bb885be72f 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -687,15 +687,6 @@
/* All required iov_iter interfaces are available */
/* #undef HAVE_VFS_IOV_ITER */
-/* fops->iterate() is available */
-/* #undef HAVE_VFS_ITERATE */
-
-/* fops->iterate_shared() is available */
-/* #undef HAVE_VFS_ITERATE_SHARED */
-
-/* fops->readdir() is available */
-/* #undef HAVE_VFS_READDIR */
-
/* address_space_operations->readpages exists */
/* #undef HAVE_VFS_READPAGES */
@@ -801,7 +792,7 @@
/* #undef ZFS_DEVICE_MINOR */
/* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.3.0-rc1-FreeBSD_g3a9fca901"
+#define ZFS_META_ALIAS "zfs-2.3.99-31-FreeBSD_gb2f6de7b5"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@@ -831,10 +822,10 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
-#define ZFS_META_RELEASE "rc1-FreeBSD_g3a9fca901"
+#define ZFS_META_RELEASE "31-FreeBSD_gb2f6de7b5"
/* Define the project version. */
-#define ZFS_META_VERSION "2.3.0"
+#define ZFS_META_VERSION "2.3.99"
/* count is located in percpu_ref.data */
/* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index 3a1480d8dc66..e090ccb924f2 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define ZFS_META_GITREV "zfs-2.3.0-rc1-0-g3a9fca901"
+#define ZFS_META_GITREV "zfs-2.3.99-31-gb2f6de7b5"
diff --git a/tests/sys/kern/Makefile b/tests/sys/kern/Makefile
index e334ff64da41..933c1c9aa10e 100644
--- a/tests/sys/kern/Makefile
+++ b/tests/sys/kern/Makefile
@@ -127,6 +127,7 @@ WARNS?= 3
TESTS_SUBDIRS+= acct
TESTS_SUBDIRS+= execve
TESTS_SUBDIRS+= pipe
+TESTS_SUBDIRS+= tty
.include <netbsd-tests.test.mk>
diff --git a/tests/sys/kern/tty/Makefile b/tests/sys/kern/tty/Makefile
new file mode 100644
index 000000000000..c362793a8b64
--- /dev/null
+++ b/tests/sys/kern/tty/Makefile
@@ -0,0 +1,12 @@
+TESTSDIR= ${TESTSBASE}/sys/kern/tty
+BINDIR= ${TESTSDIR}
+
+PLAIN_TESTS_PORCH+= test_canon
+PLAIN_TESTS_PORCH+= test_canon_fullbuf
+PLAIN_TESTS_PORCH+= test_ncanon
+PLAIN_TESTS_PORCH+= test_recanon
+
+PROGS+= fionread
+PROGS+= readsz
+
+.include <bsd.test.mk>
diff --git a/tests/sys/kern/tty/fionread.c b/tests/sys/kern/tty/fionread.c
new file mode 100644
index 000000000000..929d613f883b
--- /dev/null
+++ b/tests/sys/kern/tty/fionread.c
@@ -0,0 +1,21 @@
+/*-
+ * Copyright (c) 2024 Kyle Evans <kevans@FreeBSD.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <sys/ioctl.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int
+main(void)
+{
+ int nb;
+
+ assert(ioctl(STDIN_FILENO, FIONREAD, &nb) == 0);
+ printf("%d", nb);
+ return (0);
+}
diff --git a/tests/sys/kern/tty/readsz.c b/tests/sys/kern/tty/readsz.c
new file mode 100644
index 000000000000..95dafa02472f
--- /dev/null
+++ b/tests/sys/kern/tty/readsz.c
@@ -0,0 +1,130 @@
+/*-
+ * Copyright (c) 2024 Kyle Evans <kevans@FreeBSD.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <sys/param.h>
+
+#include <err.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static void
+usage(void)
+{
+
+ fprintf(stderr, "usage: %s [-b bytes | -c lines | -e] [-s buffer-size]\n",
+ getprogname());
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *buf;
+ const char *errstr;
+ size_t bufsz = 0, reps;
+ ssize_t ret;
+ enum { MODE_BYTES, MODE_COUNT, MODE_EOF } mode;
+ int ch;
+
+ /*
+ * -b specifies number of bytes.
+ * -c specifies number of read() calls.
+ * -e specifies eof (default)
+ * -s to pass a buffer size
+ *
+ * Reading N lines is the same as -c with a high buffer size.
+ */
+ mode = MODE_EOF;
+ while ((ch = getopt(argc, argv, "b:c:es:")) != -1) {
+ switch (ch) {
+ case 'b':
+ mode = MODE_BYTES;
+ reps = strtonum(optarg, 0, SSIZE_MAX, &errstr);
+ if (errstr != NULL)
+ errx(1, "strtonum: %s", errstr);
+ break;
+ case 'c':
+ mode = MODE_COUNT;
+ reps = strtonum(optarg, 1, SSIZE_MAX, &errstr);
+ if (errstr != NULL)
+ errx(1, "strtonum: %s", errstr);
+ break;
+ case 'e':
+ mode = MODE_EOF;
+ break;
+ case 's':
+ bufsz = strtonum(optarg, 1, SSIZE_MAX, &errstr);
+ if (errstr != NULL)
+ errx(1, "strtonum: %s", errstr);
+ break;
+ default:
+ usage();
+ }
+ }
+
+ if (bufsz == 0) {
+ if (mode == MODE_BYTES)
+ bufsz = reps;
+ else
+ bufsz = LINE_MAX;
+ }
+
+ buf = malloc(bufsz);
+ if (buf == NULL)
+ err(1, "malloc");
+
+ for (;;) {
+ size_t readsz;
+
+ /*
+ * Be careful not to over-read if we're in byte-mode. In every other
+ * mode, we'll read as much as we can.
+ */
+ if (mode == MODE_BYTES)
+ readsz = MIN(bufsz, reps);
+ else
+ readsz = bufsz;
+
+ ret = read(STDIN_FILENO, buf, readsz);
+ if (ret == -1 && errno == EINTR)
+ continue;
+ if (ret == -1)
+ err(1, "read");
+ if (ret == 0) {
+ if (mode == MODE_EOF)
+ return (0);
+ errx(1, "premature EOF");
+ }
+
+ /* Write out what we've got */
+ write(STDOUT_FILENO, buf, ret);
+
+ /*
+ * Bail out if we've hit our metric (byte mode / count mode).
+ */
+ switch (mode) {
+ case MODE_BYTES:
+ reps -= ret;
+ if (reps == 0)
+ return (0);
+ break;
+ case MODE_COUNT:
+ reps--;
+ if (reps == 0)
+ return (0);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return (0);
+}
diff --git a/tests/sys/kern/tty/test_canon.orch b/tests/sys/kern/tty/test_canon.orch
new file mode 100644
index 000000000000..28018edfdcd6
--- /dev/null
+++ b/tests/sys/kern/tty/test_canon.orch
@@ -0,0 +1,102 @@
+#!/usr/bin/env -S porch -f
+--
+-- Copyright (c) 2024 Kyle Evans <kevans@FreeBSD.org>
+--
+-- SPDX-License-Identifier: BSD-2-Clause
+--
+
+timeout(3)
+
+spawn("cat")
+
+write "Complete\r"
+match "Complete\r"
+
+write "Basic\rIncomplete"
+match "Basic\r"
+
+-- We shouldn't see any of the "Incomplete" line
+fail(function()
+end)
+
+match "Incomp" {
+ callback = function()
+ exit(1)
+ end
+}
+
+fail(nil)
+
+-- Pushing a ^D along should force a flush of the tty, cat(1) will write the
+-- result without a trailing newline.
+write " line^D"
+match "Incomplete line$"
+
+-- Erase!
+write "Dog^H^D"
+match "Do$"
+
+-- More erase!
+write "Cat Dog^W^D"
+match "Cat $"
+
+write "^D"
+eof()
+
+local function fionread_test(str, expected)
+ spawn("fionread")
+
+ write(str)
+ match(expected)
+end
+
+-- Incomplete line
+fionread_test("Hello", "0")
+-- VEOF does not count
+fionread_test("Hello^D", "5")
+-- VEOF still doesn't count, even if the next line is an extra VEOF later
+fionread_test("Hello^D^D", "5")
+-- read(2) definitely won't return the second incomplete line
+fionread_test("Hello^Dther", "5")
+-- read(2) also won't return a second complete line at once
+fionread_test("Hello^Dthere^D", "5")
+-- Finally, send a VEOF to terminate a blank line and signal EOF in read(2)
+fionread_test("^D", "0")
+
+-- \r will instead show up in the input stream to the application, so we must
+-- make sure those are counted where VEOF generally wouldn't be.
+fionread_test("Hello\r", "6")
+fionread_test("Hello\rther", "6")
+fionread_test("Hello\rthere\r", "6")
+fionread_test("\r", "1")
+
+local function readsz_test(str, arg, expected)
+ spawn("readsz", table.unpack(arg))
+
+ if type(str) == "table" then
+ assert(#str == 2)
+ write(str[1])
+ release()
+
+ -- Give readsz a chance to consume the partial input before we send more
+ -- along.
+ sleep(1)
+ write(str[2])
+ else
+ write(str)
+ end
+ match(expected)
+end
+
+readsz_test("partial", {"-b", 3}, "^$")
+readsz_test("partial^D", {"-b", 3}, "^par$")
+readsz_test("partial^D", {"-c", 1}, "^partial$")
+for s = 1, #"partial" do
+ readsz_test("partial^D", {"-s", s}, "^partial$")
+end
+-- Send part of the line, release and pause, then finish it.
+readsz_test({"par", "tial^D"}, {"-c", 1}, "^partial$")
+-- line is incomplete, so we'll just see the "partial" even if we want two
+readsz_test("partial^Dline", {"-c", 2}, "^partial$")
+readsz_test("partial^Dline^D", {"-c", 1}, "^partial$")
+readsz_test("partial^Dline^D", {"-c", 2}, "^partialline$")
diff --git a/tests/sys/kern/tty/test_canon_fullbuf.orch b/tests/sys/kern/tty/test_canon_fullbuf.orch
new file mode 100644
index 000000000000..1833703e4f45
--- /dev/null
+++ b/tests/sys/kern/tty/test_canon_fullbuf.orch
@@ -0,0 +1,23 @@
+#!/usr/bin/env -S porch -f
+--
+-- Copyright (c) 2024 Kyle Evans <kevans@FreeBSD.org>
+--
+-- SPDX-License-Identifier: BSD-2-Clause
+--
+
+timeout(3)
+
+local TTYINQ_DATASIZE = 128
+local scream = string.rep("A", TTYINQ_DATASIZE - 1)
+
+spawn("cat")
+
+-- Fill up a whole block with screaming + VEOF
+write(scream .. "^D")
+match(scream .. "$")
+
+scream = scream .. "A"
+
+-- Now fill up the next block, but spill the VEOF over to a third block.
+write(scream .. "^D")
+match(scream .. "$")
diff --git a/tests/sys/kern/tty/test_ncanon.orch b/tests/sys/kern/tty/test_ncanon.orch
new file mode 100644
index 000000000000..14a34d82fa9a
--- /dev/null
+++ b/tests/sys/kern/tty/test_ncanon.orch
@@ -0,0 +1,39 @@
+#!/usr/bin/env -S porch -f
+--
+-- Copyright (c) 2024 Kyle Evans <kevans@FreeBSD.org>
+--
+-- SPDX-License-Identifier: BSD-2-Clause
+--
+
+timeout(3)
+
+local function spawn_one(...)
+ spawn(...)
+
+ stty("lflag", 0, tty.lflag.ICANON)
+end
+
+-- We can send one byte...
+spawn_one("readsz", "-c", 1)
+write "H"
+match "^H$"
+
+-- or many.
+spawn_one("readsz", "-c", 1)
+write "Hello"
+match "^Hello$"
+
+-- VEOF is a normal character here, passed through as-is.
+spawn_one("readsz", "-c", 1)
+write "Hello^D"
+match "^Hello\x04$"
+spawn_one("readsz", "-c", 1)
+write "^D"
+match "^\x04$"
+
+-- Confirm that FIONREAD agrees that VEOF will be returned, even if it was sent
+-- while the tty was still in canonical mode.
+spawn("fionread")
+write "^D"
+stty("lflag", 0, tty.lflag.ICANON)
+match "^1$"
diff --git a/tests/sys/kern/tty/test_recanon.orch b/tests/sys/kern/tty/test_recanon.orch
new file mode 100644
index 000000000000..e3943495ca5d
--- /dev/null
+++ b/tests/sys/kern/tty/test_recanon.orch
@@ -0,0 +1,90 @@
+#!/usr/bin/env -S porch -f
+--
+-- Copyright (c) 2024 Kyle Evans <kevans@FreeBSD.org>
+--
+-- SPDX-License-Identifier: BSD-2-Clause
+--
+
+timeout(3)
+
+local TTYINQ_DATASIZE = 128
+local scream = string.rep("A", TTYINQ_DATASIZE - 1)
+
+local function ncanon()
+ stty("lflag", nil, tty.lflag.ICANON)
+end
+
+local function canon()
+ stty("lflag", tty.lflag.ICANON)
+end
+
+spawn("readsz", "-e")
+ncanon()
+
+-- Fill up a whole block with screaming + VEOF; when it gets recanonicalized,
+-- the next line should be pointing to the beginning of the next block.
+write(scream .. "^D")
+
+canon()
+match(scream .. "$")
+
+-- The same as above, but spilling VEOF over to the next block.
+spawn("readsz", "-e")
+ncanon()
+
+write(scream .. "A^D")
+
+canon()
+match(scream .. "A$")
+
+-- We'll do it again, except with one character spilled over to the next block
+-- before we recanonicalize. We should then have the scream, followed by a
+-- partial line containing the spill over.
+spawn("cat")
+ncanon()
+
+write(scream .. "^DZ")
+
+canon()
+match(scream .. "$")
+
+-- Sending "B^D" should give us "ZB" to make sure that we didn't lose anything
+-- at the beginning of the next block.
+
+write("B^D")
+match("^ZB$")
+
+-- Next we'll VEOF at the beginning.
+spawn("readsz", "-e")
+ncanon()
+
+write("^D")
+match("^$")
+
+-- Finally, we'll trigger recanonicalization with an empty buffer. This one is
+-- just about avoiding a panic.
+spawn("true")
+
+ncanon()
+canon()
+release()
+eof()
+
+spawn("readsz", "-c", "1")
+
+write("Test^Dfoo")
+ncanon()
+
+match("^Test\x04foo$")
+
+-- Finally, swap VEOF out with ^F; before recent changes, we would remain
+-- canonicalized at Test^D and the kernel would block on it unless a short
+-- buffer was used since VEOF would not appear within the canonicalized bit.
+spawn("readsz", "-c", 1)
+
+write("Test^DLine^F")
+stty("cc", {
+ VEOF = "^F"
+})
+
+match("^Test\x04Line$")