aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2022-06-23 15:47:42 +0000
committerMartin Matuska <mm@FreeBSD.org>2022-06-23 15:49:33 +0000
commit1f1e2261e341e6ca6862f82261066ef1705f0a7a (patch)
treea716a986e2b73d9496b08386e621059ac76e0e0e /sys/contrib
parenta320e9dd51e6e58715a1390f2e00790a94017dfb (diff)
parentdeb1213098e2dc10e6eee5e5c57bb40584e096a6 (diff)
downloadsrc-1f1e2261e341e6ca6862f82261066ef1705f0a7a.tar.gz
src-1f1e2261e341e6ca6862f82261066ef1705f0a7a.zip
zfs: merge openzfs/zfs@deb121309
Notable upstream pull request merges: #12918 Introduce BLAKE3 checksums as an OpenZFS feature #13553 Reduce ZIO io_lock contention on sorted scrub #13537 Improve sorted scan memory accounting #13540 AVL: Remove obsolete branching optimizations #13563 FreeBSD: Improve crypto_dispatch() handling Obtained from: OpenZFS OpenZFS commit: deb1213098e2dc10e6eee5e5c57bb40584e096a6
Diffstat (limited to 'sys/contrib')
-rw-r--r--sys/contrib/openzfs/AUTHORS1
-rw-r--r--sys/contrib/openzfs/cmd/zfs/zfs_iter.c6
-rw-r--r--sys/contrib/openzfs/cmd/zfs/zfs_main.c68
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_main.c4
-rw-r--r--sys/contrib/openzfs/cmd/ztest.c89
-rw-r--r--sys/contrib/openzfs/config/always-arch.m42
-rw-r--r--sys/contrib/openzfs/config/kernel-add-disk.m43
-rw-r--r--sys/contrib/openzfs/config/kernel-blk-queue.m432
-rw-r--r--sys/contrib/openzfs/config/kernel-user-ns-inum.m423
-rw-r--r--sys/contrib/openzfs/config/kernel.m42
-rwxr-xr-xsys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in3
-rw-r--r--sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py1
-rw-r--r--sys/contrib/openzfs/etc/Makefile.am3
-rw-r--r--sys/contrib/openzfs/etc/systemd/system/zfs-trim-monthly@.timer.in12
-rw-r--r--sys/contrib/openzfs/etc/systemd/system/zfs-trim-weekly@.timer.in12
-rw-r--r--sys/contrib/openzfs/etc/systemd/system/zfs-trim@.service.in15
-rw-r--r--sys/contrib/openzfs/include/Makefile.am2
-rw-r--r--sys/contrib/openzfs/include/libzfs.h10
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h2
-rw-r--r--sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h111
-rw-r--r--sys/contrib/openzfs/include/os/linux/kernel/linux/simd_powerpc.h34
-rw-r--r--sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h16
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/uio.h39
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/zone.h31
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_context_os.h5
-rw-r--r--sys/contrib/openzfs/include/sys/blake3.h125
-rw-r--r--sys/contrib/openzfs/include/sys/fs/zfs.h6
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_chksum.h48
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_ioctl.h3
-rw-r--r--sys/contrib/openzfs/include/sys/zio.h1
-rw-r--r--sys/contrib/openzfs/include/sys/zio_checksum.h12
-rw-r--r--sys/contrib/openzfs/include/zfeature_common.h1
-rw-r--r--sys/contrib/openzfs/lib/libicp/Makefile.am25
-rw-r--r--sys/contrib/openzfs/lib/libspl/include/sys/simd.h18
-rw-r--r--sys/contrib/openzfs/lib/libspl/include/sys/types.h2
-rw-r--r--sys/contrib/openzfs/lib/libspl/include/zone.h12
-rw-r--r--sys/contrib/openzfs/lib/libspl/os/linux/zone.c32
-rw-r--r--sys/contrib/openzfs/lib/libuutil/libuutil.abi2
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs.abi18
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c10
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c64
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_diff.c2
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_impl.h4
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_pool.c299
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c36
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_util.c12
-rw-r--r--sys/contrib/openzfs/lib/libzfs/os/freebsd/libzfs_compat.c2
-rw-r--r--sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c2
-rw-r--r--sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_util_os.c71
-rw-r--r--sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi2
-rw-r--r--sys/contrib/openzfs/lib/libzpool/Makefile.am2
-rw-r--r--sys/contrib/openzfs/man/Makefile.am2
-rw-r--r--sys/contrib/openzfs/man/man4/zfs.469
-rw-r--r--sys/contrib/openzfs/man/man7/zfsprops.710
-rw-r--r--sys/contrib/openzfs/man/man7/zpool-features.78
l---------sys/contrib/openzfs/man/man8/zfs-unzone.81
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-zone.8116
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-trim.821
-rw-r--r--sys/contrib/openzfs/module/Kbuild.in38
-rw-r--r--sys/contrib/openzfs/module/Makefile.bsd34
-rw-r--r--sys/contrib/openzfs/module/avl/avl.c24
-rw-r--r--sys/contrib/openzfs/module/icp/algs/blake3/blake3.c732
-rw-r--r--sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c202
-rw-r--r--sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c284
-rw-r--r--sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h213
-rw-r--r--sys/contrib/openzfs/module/icp/algs/blake3/blake3_x86-64.c248
-rw-r--r--sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S2450
-rw-r--r--sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S2463
-rw-r--r--sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S2823
-rw-r--r--sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S3064
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S1845
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S2618
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S2323
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S2058
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c41
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-generic.c6
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-zone.c424
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/policy.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c47
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c154
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c20
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c17
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c1
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c5
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c632
-rw-r--r--sys/contrib/openzfs/module/zcommon/zfeature_common.c31
-rw-r--r--sys/contrib/openzfs/module/zcommon/zfs_prop.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/blake3_zfs.c117
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_prop.c10
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_scan.c26
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_misc.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/zcp_synctask.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_chksum.c323
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c14
-rw-r--r--sys/contrib/openzfs/module/zfs/zio_checksum.c6
-rw-r--r--sys/contrib/openzfs/tests/runfiles/common.run10
-rw-r--r--sys/contrib/openzfs/tests/runfiles/linux.run8
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore1
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am6
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/blake3_test.c575
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/edonr_test.c3
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/sha2_test.c3
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/skein_test.c3
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg5
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib45
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/properties.shlib2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg1
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am9
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/checksum/default.cfg2
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/checksum/run_blake3_test.ksh30
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/checksum_001_pos.ksh2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg1
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh5
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh115
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh97
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh67
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_common.shlib11
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh96
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh136
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/cleanup.ksh36
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/setup.ksh36
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh169
123 files changed, 25842 insertions, 510 deletions
diff --git a/sys/contrib/openzfs/AUTHORS b/sys/contrib/openzfs/AUTHORS
index aab8bf29c99f..86083ba87715 100644
--- a/sys/contrib/openzfs/AUTHORS
+++ b/sys/contrib/openzfs/AUTHORS
@@ -285,6 +285,7 @@ CONTRIBUTORS:
Tim Connors <tconnors@rather.puzzling.org>
Tim Crawford <tcrawford@datto.com>
Tim Haley <Tim.Haley@Sun.COM>
+ Tino Reichardt <milky-zfs@mcmilk.de>
Tobin Harding <me@tobin.cc>
Tom Caputi <tcaputi@datto.com>
Tom Matthews <tom@axiom-partners.com>
diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_iter.c b/sys/contrib/openzfs/cmd/zfs/zfs_iter.c
index d44bc2ad7621..c0426d4f81ea 100644
--- a/sys/contrib/openzfs/cmd/zfs/zfs_iter.c
+++ b/sys/contrib/openzfs/cmd/zfs/zfs_iter.c
@@ -174,7 +174,7 @@ zfs_add_sort_column(zfs_sort_column_t **sc, const char *name,
zfs_sort_column_t *col;
zfs_prop_t prop;
- if ((prop = zfs_name_to_prop(name)) == ZPROP_INVAL &&
+ if ((prop = zfs_name_to_prop(name)) == ZPROP_USERPROP &&
!zfs_prop_user(name))
return (-1);
@@ -182,7 +182,7 @@ zfs_add_sort_column(zfs_sort_column_t **sc, const char *name,
col->sc_prop = prop;
col->sc_reverse = reverse;
- if (prop == ZPROP_INVAL) {
+ if (prop == ZPROP_USERPROP) {
col->sc_user_prop = safe_malloc(strlen(name) + 1);
(void) strcpy(col->sc_user_prop, name);
}
@@ -311,7 +311,7 @@ zfs_sort(const void *larg, const void *rarg, void *data)
* Otherwise, we compare 'lnum' and 'rnum'.
*/
lstr = rstr = NULL;
- if (psc->sc_prop == ZPROP_INVAL) {
+ if (psc->sc_prop == ZPROP_USERPROP) {
nvlist_t *luser, *ruser;
nvlist_t *lval, *rval;
diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
index 6282d894638e..3cb88b6a1a15 100644
--- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c
+++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
@@ -127,6 +127,11 @@ static int zfs_do_jail(int argc, char **argv);
static int zfs_do_unjail(int argc, char **argv);
#endif
+#ifdef __linux__
+static int zfs_do_zone(int argc, char **argv);
+static int zfs_do_unzone(int argc, char **argv);
+#endif
+
/*
* Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
*/
@@ -184,6 +189,8 @@ typedef enum {
HELP_JAIL,
HELP_UNJAIL,
HELP_WAIT,
+ HELP_ZONE,
+ HELP_UNZONE,
} zfs_help_t;
typedef struct zfs_command {
@@ -254,6 +261,11 @@ static zfs_command_t command_table[] = {
{ "jail", zfs_do_jail, HELP_JAIL },
{ "unjail", zfs_do_unjail, HELP_UNJAIL },
#endif
+
+#ifdef __linux__
+ { "zone", zfs_do_zone, HELP_ZONE },
+ { "unzone", zfs_do_unzone, HELP_UNZONE },
+#endif
};
#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
@@ -415,6 +427,10 @@ get_usage(zfs_help_t idx)
return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
case HELP_WAIT:
return (gettext("\twait [-t <activity>] <filesystem>\n"));
+ case HELP_ZONE:
+ return (gettext("\tzone <nsfile> <filesystem>\n"));
+ case HELP_UNZONE:
+ return (gettext("\tunzone <nsfile> <filesystem>\n"));
default:
__builtin_unreachable();
}
@@ -1901,7 +1917,7 @@ get_callback(zfs_handle_t *zhp, void *data)
pl == cbp->cb_proplist)
continue;
- if (pl->pl_prop != ZPROP_INVAL) {
+ if (pl->pl_prop != ZPROP_USERPROP) {
if (zfs_prop_get(zhp, pl->pl_prop, buf,
sizeof (buf), &sourcetype, source,
sizeof (source),
@@ -2291,7 +2307,7 @@ zfs_do_inherit(int argc, char **argv)
argc--;
argv++;
- if ((prop = zfs_name_to_prop(propname)) != ZPROP_INVAL) {
+ if ((prop = zfs_name_to_prop(propname)) != ZPROP_USERPROP) {
if (zfs_prop_readonly(prop)) {
(void) fprintf(stderr, gettext(
"%s property is read-only\n"),
@@ -3427,7 +3443,7 @@ print_header(list_cbdata_t *cb)
}
right_justify = B_FALSE;
- if (pl->pl_prop != ZPROP_INVAL) {
+ if (pl->pl_prop != ZPROP_USERPROP) {
header = zfs_prop_column_name(pl->pl_prop);
right_justify = zfs_prop_align_right(pl->pl_prop);
} else {
@@ -3478,7 +3494,7 @@ print_dataset(zfs_handle_t *zhp, list_cbdata_t *cb)
sizeof (property));
propstr = property;
right_justify = zfs_prop_align_right(pl->pl_prop);
- } else if (pl->pl_prop != ZPROP_INVAL) {
+ } else if (pl->pl_prop != ZPROP_USERPROP) {
if (zfs_prop_get(zhp, pl->pl_prop, property,
sizeof (property), NULL, NULL, 0,
cb->cb_literal) != 0)
@@ -8692,6 +8708,50 @@ main(int argc, char **argv)
return (ret);
}
+/*
+ * zfs zone nsfile filesystem
+ *
+ * Add or delete the given dataset to/from the namespace.
+ */
+#ifdef __linux__
+static int
+zfs_do_zone_impl(int argc, char **argv, boolean_t attach)
+{
+ zfs_handle_t *zhp;
+ int ret;
+
+ if (argc < 3) {
+ (void) fprintf(stderr, gettext("missing argument(s)\n"));
+ usage(B_FALSE);
+ }
+ if (argc > 3) {
+ (void) fprintf(stderr, gettext("too many arguments\n"));
+ usage(B_FALSE);
+ }
+
+ zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM);
+ if (zhp == NULL)
+ return (1);
+
+ ret = (zfs_userns(zhp, argv[1], attach) != 0);
+
+ zfs_close(zhp);
+ return (ret);
+}
+
+static int
+zfs_do_zone(int argc, char **argv)
+{
+ return (zfs_do_zone_impl(argc, argv, B_TRUE));
+}
+
+static int
+zfs_do_unzone(int argc, char **argv)
+{
+ return (zfs_do_zone_impl(argc, argv, B_FALSE));
+}
+#endif
+
#ifdef __FreeBSD__
#include <sys/jail.h>
#include <jail.h>
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
index 598d8b1bbb65..a0aeb8c06bd7 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
@@ -5946,7 +5946,7 @@ print_header(list_cbdata_t *cb)
first = B_FALSE;
right_justify = B_FALSE;
- if (pl->pl_prop != ZPROP_INVAL) {
+ if (pl->pl_prop != ZPROP_USERPROP) {
header = zpool_prop_column_name(pl->pl_prop);
right_justify = zpool_prop_align_right(pl->pl_prop);
} else {
@@ -6004,7 +6004,7 @@ print_pool(zpool_handle_t *zhp, list_cbdata_t *cb)
}
right_justify = B_FALSE;
- if (pl->pl_prop != ZPROP_INVAL) {
+ if (pl->pl_prop != ZPROP_USERPROP) {
if (zpool_get_prop(zhp, pl->pl_prop, property,
sizeof (property), NULL, cb->cb_literal) != 0)
propstr = "-";
diff --git a/sys/contrib/openzfs/cmd/ztest.c b/sys/contrib/openzfs/cmd/ztest.c
index ca05cf26511e..95f6107ff420 100644
--- a/sys/contrib/openzfs/cmd/ztest.c
+++ b/sys/contrib/openzfs/cmd/ztest.c
@@ -121,6 +121,7 @@
#include <sys/zfeature.h>
#include <sys/dsl_userhold.h>
#include <sys/abd.h>
+#include <sys/blake3.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -417,6 +418,7 @@ ztest_func_t ztest_device_removal;
ztest_func_t ztest_spa_checkpoint_create_discard;
ztest_func_t ztest_initialize;
ztest_func_t ztest_trim;
+ztest_func_t ztest_blake3;
ztest_func_t ztest_fletcher;
ztest_func_t ztest_fletcher_incr;
ztest_func_t ztest_verify_dnode_bt;
@@ -470,6 +472,7 @@ ztest_info_t ztest_info[] = {
ZTI_INIT(ztest_spa_checkpoint_create_discard, 1, &zopt_rarely),
ZTI_INIT(ztest_initialize, 1, &zopt_sometimes),
ZTI_INIT(ztest_trim, 1, &zopt_sometimes),
+ ZTI_INIT(ztest_blake3, 1, &zopt_rarely),
ZTI_INIT(ztest_fletcher, 1, &zopt_rarely),
ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
@@ -6374,6 +6377,92 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id)
}
void
+ztest_blake3(ztest_ds_t *zd, uint64_t id)
+{
+ (void) zd, (void) id;
+ hrtime_t end = gethrtime() + NANOSEC;
+ zio_cksum_salt_t salt;
+ void *salt_ptr = &salt.zcs_bytes;
+ struct abd *abd_data, *abd_meta;
+ void *buf, *templ;
+ int i, *ptr;
+ uint32_t size;
+ BLAKE3_CTX ctx;
+
+ size = ztest_random_blocksize();
+ buf = umem_alloc(size, UMEM_NOFAIL);
+ abd_data = abd_alloc(size, B_FALSE);
+ abd_meta = abd_alloc(size, B_TRUE);
+
+ for (i = 0, ptr = buf; i < size / sizeof (*ptr); i++, ptr++)
+ *ptr = ztest_random(UINT_MAX);
+ memset(salt_ptr, 'A', 32);
+
+ abd_copy_from_buf_off(abd_data, buf, 0, size);
+ abd_copy_from_buf_off(abd_meta, buf, 0, size);
+
+ while (gethrtime() <= end) {
+ int run_count = 100;
+ zio_cksum_t zc_ref1, zc_ref2;
+ zio_cksum_t zc_res1, zc_res2;
+
+ void *ref1 = &zc_ref1;
+ void *ref2 = &zc_ref2;
+ void *res1 = &zc_res1;
+ void *res2 = &zc_res2;
+
+ /* BLAKE3_KEY_LEN = 32 */
+ VERIFY0(blake3_set_impl_name("generic"));
+ templ = abd_checksum_blake3_tmpl_init(&salt);
+ Blake3_InitKeyed(&ctx, salt_ptr);
+ Blake3_Update(&ctx, buf, size);
+ Blake3_Final(&ctx, ref1);
+ zc_ref2 = zc_ref1;
+ ZIO_CHECKSUM_BSWAP(&zc_ref2);
+ abd_checksum_blake3_tmpl_free(templ);
+
+ VERIFY0(blake3_set_impl_name("cycle"));
+ while (run_count-- > 0) {
+
+ /* Test current implementation */
+ Blake3_InitKeyed(&ctx, salt_ptr);
+ Blake3_Update(&ctx, buf, size);
+ Blake3_Final(&ctx, res1);
+ zc_res2 = zc_res1;
+ ZIO_CHECKSUM_BSWAP(&zc_res2);
+
+ VERIFY0(memcmp(ref1, res1, 32));
+ VERIFY0(memcmp(ref2, res2, 32));
+
+ /* Test ABD - data */
+ templ = abd_checksum_blake3_tmpl_init(&salt);
+ abd_checksum_blake3_native(abd_data, size,
+ templ, &zc_res1);
+ abd_checksum_blake3_byteswap(abd_data, size,
+ templ, &zc_res2);
+
+ VERIFY0(memcmp(ref1, res1, 32));
+ VERIFY0(memcmp(ref2, res2, 32));
+
+ /* Test ABD - metadata */
+ abd_checksum_blake3_native(abd_meta, size,
+ templ, &zc_res1);
+ abd_checksum_blake3_byteswap(abd_meta, size,
+ templ, &zc_res2);
+ abd_checksum_blake3_tmpl_free(templ);
+
+ VERIFY0(memcmp(ref1, res1, 32));
+ VERIFY0(memcmp(ref2, res2, 32));
+
+ }
+ }
+
+ abd_free(abd_data);
+ abd_free(abd_meta);
+ umem_free(buf, size);
+}
+
+void
ztest_fletcher(ztest_ds_t *zd, uint64_t id)
{
(void) zd, (void) id;
diff --git a/sys/contrib/openzfs/config/always-arch.m4 b/sys/contrib/openzfs/config/always-arch.m4
index 02c8e4775b95..f7090a4826ba 100644
--- a/sys/contrib/openzfs/config/always-arch.m4
+++ b/sys/contrib/openzfs/config/always-arch.m4
@@ -30,6 +30,8 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_ARCH], [
;;
esac
+ AM_CONDITIONAL([TARGET_CPU_AARCH64], test $TARGET_CPU = aarch64)
AM_CONDITIONAL([TARGET_CPU_X86_64], test $TARGET_CPU = x86_64)
AM_CONDITIONAL([TARGET_CPU_POWERPC], test $TARGET_CPU = powerpc)
+ AM_CONDITIONAL([TARGET_CPU_SPARC64], test $TARGET_CPU = sparc64)
])
diff --git a/sys/contrib/openzfs/config/kernel-add-disk.m4 b/sys/contrib/openzfs/config/kernel-add-disk.m4
index 44a8a5fd25b6..86d81ea325b9 100644
--- a/sys/contrib/openzfs/config/kernel-add-disk.m4
+++ b/sys/contrib/openzfs/config/kernel-add-disk.m4
@@ -7,8 +7,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_ADD_DISK], [
#include <linux/blkdev.h>
], [
struct gendisk *disk = NULL;
- int err = add_disk(disk);
- err = err;
+ int error __attribute__ ((unused)) = add_disk(disk);
])
])
diff --git a/sys/contrib/openzfs/config/kernel-blk-queue.m4 b/sys/contrib/openzfs/config/kernel-blk-queue.m4
index 6f42b98125cd..29b0a28290ab 100644
--- a/sys/contrib/openzfs/config/kernel-blk-queue.m4
+++ b/sys/contrib/openzfs/config/kernel-blk-queue.m4
@@ -359,6 +359,36 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [
])
])
+dnl #
+dnl # See if kernel supports block multi-queue and blk_status_t.
+dnl # blk_status_t represents the new status codes introduced in the 4.13
+dnl # kernel patch:
+dnl #
+dnl # block: introduce new block status code type
+dnl #
+dnl # We do not currently support the "old" block multi-queue interfaces from
+dnl # prior kernels.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ], [
+ ZFS_LINUX_TEST_SRC([blk_mq], [
+ #include <linux/blk-mq.h>
+ ], [
+ struct blk_mq_tag_set tag_set __attribute__ ((unused)) = {0};
+ (void) blk_mq_alloc_tag_set(&tag_set);
+ return BLK_STS_OK;
+ ], [])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [
+ AC_MSG_CHECKING([whether block multiqueue with blk_status_t is available])
+ ZFS_LINUX_TEST_RESULT([blk_mq], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BLK_MQ, 1, [block multiqueue is available])
+ ], [
+ AC_MSG_RESULT(no)
+ ])
+])
+
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [
ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG
ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI
@@ -370,6 +400,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [
ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH
ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS
ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS
+ ZFS_AC_KERNEL_SRC_BLK_MQ
])
AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [
@@ -383,4 +414,5 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [
ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
+ ZFS_AC_KERNEL_BLK_MQ
])
diff --git a/sys/contrib/openzfs/config/kernel-user-ns-inum.m4 b/sys/contrib/openzfs/config/kernel-user-ns-inum.m4
new file mode 100644
index 000000000000..2207a4aa6921
--- /dev/null
+++ b/sys/contrib/openzfs/config/kernel-user-ns-inum.m4
@@ -0,0 +1,23 @@
+dnl #
+dnl # 3.18 API change
+dnl # struct user_namespace inum moved from .proc_inum to .ns.inum.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM], [
+ ZFS_LINUX_TEST_SRC([user_ns_common_inum], [
+ #include <linux/user_namespace.h>
+ ], [
+ struct user_namespace uns;
+ uns.ns.inum = 0;
+ ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_USER_NS_COMMON_INUM], [
+ AC_MSG_CHECKING([whether user_namespace->ns.inum exists])
+ ZFS_LINUX_TEST_RESULT([user_ns_common_inum], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_USER_NS_COMMON_INUM, 1,
+ [user_namespace->ns.inum exists])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4
index 9530367507d6..1f274cbe4f30 100644
--- a/sys/contrib/openzfs/config/kernel.m4
+++ b/sys/contrib/openzfs/config/kernel.m4
@@ -145,6 +145,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_KTHREAD
ZFS_AC_KERNEL_SRC_ZERO_PAGE
ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC
+ ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM
AC_MSG_CHECKING([for available kernel interfaces])
ZFS_LINUX_TEST_COMPILE_ALL([kabi])
@@ -263,6 +264,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_KTHREAD
ZFS_AC_KERNEL_ZERO_PAGE
ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC
+ ZFS_AC_KERNEL_USER_NS_COMMON_INUM
])
dnl #
diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in b/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in
index f2145861d497..81d7d2abe496 100755
--- a/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in
+++ b/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in
@@ -83,8 +83,7 @@ install() {
for _service in \
"zfs-import-scan.service" \
- "zfs-import-cache.service" \
- "zfs-load-module.service"; do
+ "zfs-import-cache.service"; do
inst_simple "${systemdsystemunitdir}/${_service}"
systemctl -q --root "${initdir}" add-wants zfs-import.target "${_service}"
done
diff --git a/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py b/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py
index 3273652f758a..7ee2ef87df3e 100644
--- a/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py
+++ b/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py
@@ -100,6 +100,7 @@ zfs_errno = enum_with_offset(1024, [
'ZFS_ERR_REBUILD_IN_PROGRESS',
'ZFS_ERR_BADPROP',
'ZFS_ERR_VDEV_NOTSUP',
+ 'ZFS_ERR_NOT_USER_NAMESPACE',
],
{}
)
diff --git a/sys/contrib/openzfs/etc/Makefile.am b/sys/contrib/openzfs/etc/Makefile.am
index 53064eb6f6ea..b4b3ae1f5798 100644
--- a/sys/contrib/openzfs/etc/Makefile.am
+++ b/sys/contrib/openzfs/etc/Makefile.am
@@ -59,6 +59,9 @@ systemdunit_DATA = \
%D%/systemd/system/zfs-scrub-monthly@.timer \
%D%/systemd/system/zfs-scrub-weekly@.timer \
%D%/systemd/system/zfs-scrub@.service \
+ %D%/systemd/system/zfs-trim-monthly@.timer \
+ %D%/systemd/system/zfs-trim-weekly@.timer \
+ %D%/systemd/system/zfs-trim@.service \
%D%/systemd/system/zfs-share.service \
%D%/systemd/system/zfs-volume-wait.service \
%D%/systemd/system/zfs-volumes.target \
diff --git a/sys/contrib/openzfs/etc/systemd/system/zfs-trim-monthly@.timer.in b/sys/contrib/openzfs/etc/systemd/system/zfs-trim-monthly@.timer.in
new file mode 100644
index 000000000000..8c13ffb304b3
--- /dev/null
+++ b/sys/contrib/openzfs/etc/systemd/system/zfs-trim-monthly@.timer.in
@@ -0,0 +1,12 @@
+[Unit]
+Description=Monthly zpool trim timer for %i
+Documentation=man:zpool-trim(8)
+
+[Timer]
+OnCalendar=monthly
+Persistent=true
+RandomizedDelaySec=1h
+Unit=zfs-trim@%i.service
+
+[Install]
+WantedBy=timers.target
diff --git a/sys/contrib/openzfs/etc/systemd/system/zfs-trim-weekly@.timer.in b/sys/contrib/openzfs/etc/systemd/system/zfs-trim-weekly@.timer.in
new file mode 100644
index 000000000000..dced3d88b5c9
--- /dev/null
+++ b/sys/contrib/openzfs/etc/systemd/system/zfs-trim-weekly@.timer.in
@@ -0,0 +1,12 @@
+[Unit]
+Description=Weekly zpool trim timer for %i
+Documentation=man:zpool-trim(8)
+
+[Timer]
+OnCalendar=weekly
+Persistent=true
+RandomizedDelaySec=1h
+Unit=zfs-trim@%i.service
+
+[Install]
+WantedBy=timers.target
diff --git a/sys/contrib/openzfs/etc/systemd/system/zfs-trim@.service.in b/sys/contrib/openzfs/etc/systemd/system/zfs-trim@.service.in
new file mode 100644
index 000000000000..423fb448c16f
--- /dev/null
+++ b/sys/contrib/openzfs/etc/systemd/system/zfs-trim@.service.in
@@ -0,0 +1,15 @@
+[Unit]
+Description=zpool trim on %i
+Documentation=man:zpool-trim(8)
+Requires=zfs.target
+After=zfs.target
+ConditionACPower=true
+ConditionPathIsDirectory=/sys/module/zfs
+
+[Service]
+EnvironmentFile=-@initconfdir@/zfs
+ExecStart=/bin/sh -c '\
+if @sbindir@/zpool status %i | grep -q "(trimming)"; then\
+exec @sbindir@/zpool wait -t trim %i;\
+else exec @sbindir@/zpool trim -w %i; fi'
+ExecStop=-/bin/sh -c '@sbindir@/zpool trim -s %i 2>/dev/null || true'
diff --git a/sys/contrib/openzfs/include/Makefile.am b/sys/contrib/openzfs/include/Makefile.am
index eee989d4a150..1a7f67e9c440 100644
--- a/sys/contrib/openzfs/include/Makefile.am
+++ b/sys/contrib/openzfs/include/Makefile.am
@@ -23,6 +23,7 @@ COMMON_H = \
sys/avl.h \
sys/avl_impl.h \
sys/bitops.h \
+ sys/blake3.h \
sys/blkptr.h \
sys/bplist.h \
sys/bpobj.h \
@@ -117,6 +118,7 @@ COMMON_H = \
sys/zfeature.h \
sys/zfs_acl.h \
sys/zfs_bootenv.h \
+ sys/zfs_chksum.h \
sys/zfs_context.h \
sys/zfs_debug.h \
sys/zfs_delay.h \
diff --git a/sys/contrib/openzfs/include/libzfs.h b/sys/contrib/openzfs/include/libzfs.h
index 2c2aa3faf14b..fe420de4d4de 100644
--- a/sys/contrib/openzfs/include/libzfs.h
+++ b/sys/contrib/openzfs/include/libzfs.h
@@ -150,6 +150,7 @@ typedef enum zfs_error {
EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */
EZFS_REBUILDING, /* resilvering (sequential reconstrution) */
EZFS_VDEV_NOTSUP, /* ops not supported for this type of vdev */
+ EZFS_NOT_USER_NAMESPACE, /* a file is not a user namespace */
EZFS_UNKNOWN
} zfs_error_t;
@@ -979,6 +980,15 @@ _LIBZFS_H int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t,
#endif /* __FreeBSD__ */
+#ifdef __linux__
+
+/*
+ * Add or delete the given filesystem to/from the given user namespace.
+ */
+_LIBZFS_H int zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach);
+
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
index a46a3a18be14..90b077a7be4e 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
@@ -74,10 +74,12 @@ extern "C" {
#ifndef LOCORE
#ifndef HAVE_RPC_TYPES
+#ifndef _KERNEL
typedef int bool_t;
typedef int enum_t;
#endif
#endif
+#endif
#ifndef __cplusplus
#define __init
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h
index fd91560a3cc4..7964937a0f4d 100644
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h
@@ -34,6 +34,11 @@
#include <linux/hdreg.h>
#include <linux/major.h>
#include <linux/msdos_fs.h> /* for SECTOR_* */
+#include <linux/bio.h>
+
+#ifdef HAVE_BLK_MQ
+#include <linux/blk-mq.h>
+#endif
#ifndef HAVE_BLK_QUEUE_FLAG_SET
static inline void
@@ -608,4 +613,110 @@ blk_generic_alloc_queue(make_request_fn make_request, int node_id)
}
#endif /* !HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
+/*
+ * All the io_*() helper functions below can operate on a bio, or a rq, but
+ * not both. The older submit_bio() codepath will pass a bio, and the
+ * newer blk-mq codepath will pass a rq.
+ */
+static inline int
+io_data_dir(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+ if (rq != NULL) {
+ if (op_is_write(req_op(rq))) {
+ return (WRITE);
+ } else {
+ return (READ);
+ }
+ }
+#else
+ ASSERT3P(rq, ==, NULL);
+#endif
+ return (bio_data_dir(bio));
+}
+
+static inline int
+io_is_flush(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+ if (rq != NULL)
+ return (req_op(rq) == REQ_OP_FLUSH);
+#else
+ ASSERT3P(rq, ==, NULL);
+#endif
+ return (bio_is_flush(bio));
+}
+
+static inline int
+io_is_discard(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+ if (rq != NULL)
+ return (req_op(rq) == REQ_OP_DISCARD);
+#else
+ ASSERT3P(rq, ==, NULL);
+#endif
+ return (bio_is_discard(bio));
+}
+
+static inline int
+io_is_secure_erase(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+ if (rq != NULL)
+ return (req_op(rq) == REQ_OP_SECURE_ERASE);
+#else
+ ASSERT3P(rq, ==, NULL);
+#endif
+ return (bio_is_secure_erase(bio));
+}
+
+static inline int
+io_is_fua(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+ if (rq != NULL)
+ return (rq->cmd_flags & REQ_FUA);
+#else
+ ASSERT3P(rq, ==, NULL);
+#endif
+ return (bio_is_fua(bio));
+}
+
+
+static inline uint64_t
+io_offset(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+ if (rq != NULL)
+ return (blk_rq_pos(rq) << 9);
+#else
+ ASSERT3P(rq, ==, NULL);
+#endif
+ return (BIO_BI_SECTOR(bio) << 9);
+}
+
+static inline uint64_t
+io_size(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+ if (rq != NULL)
+ return (blk_rq_bytes(rq));
+#else
+ ASSERT3P(rq, ==, NULL);
+#endif
+ return (BIO_BI_SIZE(bio));
+}
+
+static inline int
+io_has_data(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+ if (rq != NULL)
+ return (bio_has_data(rq->bio));
+#else
+ ASSERT3P(rq, ==, NULL);
+#endif
+ return (bio_has_data(bio));
+}
#endif /* _ZFS_BLKDEV_H */
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_powerpc.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_powerpc.h
index 108cef22f56f..31e51ea20a1d 100644
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_powerpc.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_powerpc.h
@@ -57,25 +57,45 @@
#include <sys/types.h>
#include <linux/version.h>
-#define kfpu_allowed() 1
-#define kfpu_begin() \
- { \
- preempt_disable(); \
- enable_kernel_altivec(); \
- }
+#define kfpu_allowed() 1
+
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
#define kfpu_end() \
{ \
+ disable_kernel_vsx(); \
disable_kernel_altivec(); \
preempt_enable(); \
}
+#define kfpu_begin() \
+ { \
+ preempt_disable(); \
+ enable_kernel_altivec(); \
+ enable_kernel_vsx(); \
+ }
#else
-/* seems that before 4.5 no-one bothered disabling ... */
+/* seems that before 4.5 no-one bothered */
+#define kfpu_begin()
#define kfpu_end() preempt_enable()
#endif
#define kfpu_init() 0
#define kfpu_fini() ((void) 0)
+static inline boolean_t
+zfs_vsx_available(void)
+{
+ boolean_t res;
+#if defined(__powerpc64__)
+ u64 msr;
+#else
+ u32 msr;
+#endif
+ kfpu_begin();
+ __asm volatile("mfmsr %0" : "=r"(msr));
+ res = (msr & 0x800000) != 0;
+ kfpu_end();
+ return (res);
+}
+
/*
* Check if AltiVec instruction set is available
*/
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h
index 91e908598fbb..b5ff1559ece6 100644
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h
@@ -296,11 +296,7 @@ static inline struct dentry *file_dentry(const struct file *f)
static inline uid_t zfs_uid_read_impl(struct inode *ip)
{
-#ifdef HAVE_SUPER_USER_NS
- return (from_kuid(ip->i_sb->s_user_ns, ip->i_uid));
-#else
return (from_kuid(kcred->user_ns, ip->i_uid));
-#endif
}
static inline uid_t zfs_uid_read(struct inode *ip)
@@ -310,11 +306,7 @@ static inline uid_t zfs_uid_read(struct inode *ip)
static inline gid_t zfs_gid_read_impl(struct inode *ip)
{
-#ifdef HAVE_SUPER_USER_NS
- return (from_kgid(ip->i_sb->s_user_ns, ip->i_gid));
-#else
return (from_kgid(kcred->user_ns, ip->i_gid));
-#endif
}
static inline gid_t zfs_gid_read(struct inode *ip)
@@ -324,20 +316,12 @@ static inline gid_t zfs_gid_read(struct inode *ip)
static inline void zfs_uid_write(struct inode *ip, uid_t uid)
{
-#ifdef HAVE_SUPER_USER_NS
- ip->i_uid = make_kuid(ip->i_sb->s_user_ns, uid);
-#else
ip->i_uid = make_kuid(kcred->user_ns, uid);
-#endif
}
static inline void zfs_gid_write(struct inode *ip, gid_t gid)
{
-#ifdef HAVE_SUPER_USER_NS
- ip->i_gid = make_kgid(ip->i_sb->s_user_ns, gid);
-#else
ip->i_gid = make_kgid(kcred->user_ns, gid);
-#endif
}
/*
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h b/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
index 439eec986236..fe2b5c07a018 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
@@ -69,9 +69,20 @@ typedef struct zfs_uio {
uint16_t uio_fmode;
uint16_t uio_extflg;
ssize_t uio_resid;
+
size_t uio_skip;
+
+ struct request *rq;
+
+ /*
+ * Used for saving rq_for_each_segment() state between calls
+ * to zfs_uiomove_bvec_rq().
+ */
+ struct req_iterator iter;
+ struct bio_vec bv;
} zfs_uio_t;
+
#define zfs_uio_segflg(u) (u)->uio_segflg
#define zfs_uio_offset(u) (u)->uio_loffset
#define zfs_uio_resid(u) (u)->uio_resid
@@ -116,17 +127,33 @@ zfs_uio_iovec_init(zfs_uio_t *uio, const struct iovec *iov,
}
static inline void
-zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio)
+zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
{
- uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
- uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
- uio->uio_loffset = BIO_BI_SECTOR(bio) << 9;
+ /* Either bio or rq will be set, but not both */
+ ASSERT3P(uio, !=, bio);
+
+ if (bio) {
+ uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
+ uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
+ } else {
+ uio->uio_bvec = NULL;
+ uio->uio_iovcnt = 0;
+ memset(&uio->iter, 0, sizeof (uio->iter));
+ }
+
+ uio->uio_loffset = io_offset(bio, rq);
uio->uio_segflg = UIO_BVEC;
uio->uio_fault_disable = B_FALSE;
uio->uio_fmode = 0;
uio->uio_extflg = 0;
- uio->uio_resid = BIO_BI_SIZE(bio);
- uio->uio_skip = BIO_BI_SKIP(bio);
+ uio->uio_resid = io_size(bio, rq);
+ if (bio) {
+ uio->uio_skip = BIO_BI_SKIP(bio);
+ } else {
+ uio->uio_skip = 0;
+ }
+
+ uio->rq = rq;
}
#if defined(HAVE_VFS_IOV_ITER)
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/zone.h b/sys/contrib/openzfs/include/os/linux/spl/sys/zone.h
index 00e30f690c38..5978a6285fa1 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/zone.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/zone.h
@@ -25,11 +25,34 @@
#define _SPL_ZONE_H
#include <sys/byteorder.h>
+#include <sys/cred.h>
-#define GLOBAL_ZONEID 0
+#include <linux/cred.h>
+#include <linux/user_namespace.h>
-#define zone_dataset_visible(x, y) (1)
-#define crgetzoneid(x) (GLOBAL_ZONEID)
-#define INGLOBALZONE(z) (1)
+/*
+ * Attach the given dataset to the given user namespace.
+ */
+extern int zone_dataset_attach(cred_t *, const char *, int);
+
+/*
+ * Detach the given dataset from the given user namespace.
+ */
+extern int zone_dataset_detach(cred_t *, const char *, int);
+
+/*
+ * Returns true if the named pool/dataset is visible in the current zone.
+ */
+extern int zone_dataset_visible(const char *dataset, int *write);
+
+int spl_zone_init(void);
+void spl_zone_fini(void);
+
+extern unsigned int crgetzoneid(const cred_t *);
+extern unsigned int global_zoneid(void);
+extern boolean_t inglobalzone(proc_t *);
+
+#define INGLOBALZONE(x) inglobalzone(x)
+#define GLOBAL_ZONEID global_zoneid()
#endif /* SPL_ZONE_H */
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_context_os.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_context_os.h
index 9e4260558285..ac8bccab4f0d 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_context_os.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_context_os.h
@@ -32,4 +32,9 @@
#define HAVE_LARGE_STACKS 1
#endif
+#if defined(CONFIG_UML)
+#undef setjmp
+#undef longjmp
+#endif
+
#endif
diff --git a/sys/contrib/openzfs/include/sys/blake3.h b/sys/contrib/openzfs/include/sys/blake3.h
new file mode 100644
index 000000000000..b3391c5f2349
--- /dev/null
+++ b/sys/contrib/openzfs/include/sys/blake3.h
@@ -0,0 +1,125 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
+ * Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#ifndef BLAKE3_H
+#define BLAKE3_H
+
+#ifdef _KERNEL
+#include <sys/types.h>
+#else
+#include <stdint.h>
+#include <stdlib.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BLAKE3_KEY_LEN 32
+#define BLAKE3_OUT_LEN 32
+#define BLAKE3_MAX_DEPTH 54
+#define BLAKE3_BLOCK_LEN 64
+#define BLAKE3_CHUNK_LEN 1024
+
+/*
+ * This struct is a private implementation detail.
+ * It has to be here because it's part of BLAKE3_CTX below.
+ */
+typedef struct {
+ uint32_t cv[8];
+ uint64_t chunk_counter;
+ uint8_t buf[BLAKE3_BLOCK_LEN];
+ uint8_t buf_len;
+ uint8_t blocks_compressed;
+ uint8_t flags;
+} blake3_chunk_state_t;
+
+typedef struct {
+ uint32_t key[8];
+ blake3_chunk_state_t chunk;
+ uint8_t cv_stack_len;
+
+ /*
+ * The stack size is MAX_DEPTH + 1 because we do lazy merging. For
+ * example, with 7 chunks, we have 3 entries in the stack. Adding an
+ * 8th chunk requires a 4th entry, rather than merging everything down
+ * to 1, because we don't know whether more input is coming. This is
+ * different from how the reference implementation does things.
+ */
+ uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
+
+ /* const blake3_impl_ops_t *ops */
+ const void *ops;
+} BLAKE3_CTX;
+
+/* init the context for hash operation */
+void Blake3_Init(BLAKE3_CTX *ctx);
+
+/* init the context for a MAC and/or tree hash operation */
+void Blake3_InitKeyed(BLAKE3_CTX *ctx, const uint8_t key[BLAKE3_KEY_LEN]);
+
+/* process the input bytes */
+void Blake3_Update(BLAKE3_CTX *ctx, const void *input, size_t input_len);
+
+/* finalize the hash computation and output the result */
+void Blake3_Final(const BLAKE3_CTX *ctx, uint8_t *out);
+
+/* finalize the hash computation and output the result */
+void Blake3_FinalSeek(const BLAKE3_CTX *ctx, uint64_t seek, uint8_t *out,
+ size_t out_len);
+
+/* these are pre-allocated contexts */
+extern void **blake3_per_cpu_ctx;
+extern void blake3_per_cpu_ctx_init(void);
+extern void blake3_per_cpu_ctx_fini(void);
+
+/* return number of supported implementations */
+extern int blake3_get_impl_count(void);
+
+/* return id of selected implementation */
+extern int blake3_get_impl_id(void);
+
+/* return name of selected implementation */
+extern const char *blake3_get_impl_name(void);
+
+/* setup id as fastest implementation */
+extern void blake3_set_impl_fastest(uint32_t id);
+
+/* set implementation by id */
+extern void blake3_set_impl_id(uint32_t id);
+
+/* set implementation by name */
+extern int blake3_set_impl_name(const char *name);
+
+/* set startup implementation */
+extern void blake3_setup_impl(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* BLAKE3_H */
diff --git a/sys/contrib/openzfs/include/sys/fs/zfs.h b/sys/contrib/openzfs/include/sys/fs/zfs.h
index 9cd1e32cd053..f013e6b20603 100644
--- a/sys/contrib/openzfs/include/sys/fs/zfs.h
+++ b/sys/contrib/openzfs/include/sys/fs/zfs.h
@@ -93,6 +93,7 @@ typedef enum dmu_objset_type {
typedef enum {
ZPROP_CONT = -2,
ZPROP_INVAL = -1,
+ ZPROP_USERPROP = ZPROP_INVAL,
ZFS_PROP_TYPE = 0,
ZFS_PROP_CREATION,
ZFS_PROP_USED,
@@ -310,7 +311,7 @@ typedef int (*zprop_func)(int, void *);
*/
typedef enum {
VDEV_PROP_INVAL = -1,
-#define VDEV_PROP_USER VDEV_PROP_INVAL
+ VDEV_PROP_USERPROP = VDEV_PROP_INVAL,
VDEV_PROP_NAME,
VDEV_PROP_CAPACITY,
VDEV_PROP_STATE,
@@ -1450,7 +1451,9 @@ typedef enum zfs_ioc {
ZFS_IOC_EVENTS_SEEK, /* 0x83 (Linux) */
ZFS_IOC_NEXTBOOT, /* 0x84 (FreeBSD) */
ZFS_IOC_JAIL, /* 0x85 (FreeBSD) */
+ ZFS_IOC_USERNS_ATTACH = ZFS_IOC_JAIL, /* 0x85 (Linux) */
ZFS_IOC_UNJAIL, /* 0x86 (FreeBSD) */
+ ZFS_IOC_USERNS_DETACH = ZFS_IOC_UNJAIL, /* 0x86 (Linux) */
ZFS_IOC_SET_BOOTENV, /* 0x87 */
ZFS_IOC_GET_BOOTENV, /* 0x88 */
ZFS_IOC_LAST
@@ -1531,6 +1534,7 @@ typedef enum {
ZFS_ERR_REBUILD_IN_PROGRESS,
ZFS_ERR_BADPROP,
ZFS_ERR_VDEV_NOTSUP,
+ ZFS_ERR_NOT_USER_NAMESPACE,
} zfs_errno_t;
/*
diff --git a/sys/contrib/openzfs/include/sys/zfs_chksum.h b/sys/contrib/openzfs/include/sys/zfs_chksum.h
new file mode 100644
index 000000000000..cfd07bd0ffe7
--- /dev/null
+++ b/sys/contrib/openzfs/include/sys/zfs_chksum.h
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#ifndef _ZFS_CHKSUM_H
+#define _ZFS_CHKSUM_H
+
+#ifdef _KERNEL
+#include <sys/types.h>
+#else
+#include <stdint.h>
+#include <stdlib.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Benchmark the chksums of ZFS when the module is loading */
+void chksum_init(void);
+void chksum_fini(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZFS_CHKSUM_H */
diff --git a/sys/contrib/openzfs/include/sys/zfs_ioctl.h b/sys/contrib/openzfs/include/sys/zfs_ioctl.h
index 4fb15636ecb8..94522179676a 100644
--- a/sys/contrib/openzfs/include/sys/zfs_ioctl.h
+++ b/sys/contrib/openzfs/include/sys/zfs_ioctl.h
@@ -124,6 +124,7 @@ typedef enum drr_headertype {
* default use of "zfs send" won't encounter the bug mentioned above.
*/
#define DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS (1 << 27)
+#define DMU_BACKUP_FEATURE_BLAKE3 (1 << 28)
/*
* Mask of all supported backup features
@@ -134,7 +135,7 @@ typedef enum drr_headertype {
DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS | \
DMU_BACKUP_FEATURE_REDACTED | DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS | \
- DMU_BACKUP_FEATURE_ZSTD)
+ DMU_BACKUP_FEATURE_ZSTD | DMU_BACKUP_FEATURE_BLAKE3)
/* Are all features in the given flag word currently supported? */
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h
index 7b78f08787bf..4b624165f8b3 100644
--- a/sys/contrib/openzfs/include/sys/zio.h
+++ b/sys/contrib/openzfs/include/sys/zio.h
@@ -89,6 +89,7 @@ enum zio_checksum {
ZIO_CHECKSUM_SHA512,
ZIO_CHECKSUM_SKEIN,
ZIO_CHECKSUM_EDONR,
+ ZIO_CHECKSUM_BLAKE3,
ZIO_CHECKSUM_FUNCTIONS
};
diff --git a/sys/contrib/openzfs/include/sys/zio_checksum.h b/sys/contrib/openzfs/include/sys/zio_checksum.h
index 9a73a626229b..a2ce5081644c 100644
--- a/sys/contrib/openzfs/include/sys/zio_checksum.h
+++ b/sys/contrib/openzfs/include/sys/zio_checksum.h
@@ -21,7 +21,8 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2016 by Delphix. All rights reserved.
- * Copyright Saso Kiselkov 2013, All rights reserved.
+ * Copyright (c) 2013 Saso Kiselkov, All rights reserved.
+ * Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
*/
#ifndef _SYS_ZIO_CHECKSUM_H
@@ -107,6 +108,8 @@ _SYS_ZIO_CHECKSUM_H zio_checksum_info_t
/*
* Checksum routines.
*/
+
+/* SHA2 */
extern zio_checksum_t abd_checksum_SHA256;
extern zio_checksum_t abd_checksum_SHA512_native;
extern zio_checksum_t abd_checksum_SHA512_byteswap;
@@ -123,6 +126,13 @@ extern zio_checksum_t abd_checksum_edonr_byteswap;
extern zio_checksum_tmpl_init_t abd_checksum_edonr_tmpl_init;
extern zio_checksum_tmpl_free_t abd_checksum_edonr_tmpl_free;
+/* BLAKE3 */
+extern zio_checksum_t abd_checksum_blake3_native;
+extern zio_checksum_t abd_checksum_blake3_byteswap;
+extern zio_checksum_tmpl_init_t abd_checksum_blake3_tmpl_init;
+extern zio_checksum_tmpl_free_t abd_checksum_blake3_tmpl_free;
+
+/* Fletcher 4 */
_SYS_ZIO_CHECKSUM_H zio_abd_checksum_func_t fletcher_4_abd_ops;
extern zio_checksum_t abd_fletcher_4_native;
extern zio_checksum_t abd_fletcher_4_byteswap;
diff --git a/sys/contrib/openzfs/include/zfeature_common.h b/sys/contrib/openzfs/include/zfeature_common.h
index d4d636f9c266..d98345fe6850 100644
--- a/sys/contrib/openzfs/include/zfeature_common.h
+++ b/sys/contrib/openzfs/include/zfeature_common.h
@@ -77,6 +77,7 @@ typedef enum spa_feature {
SPA_FEATURE_DRAID,
SPA_FEATURE_ZILSAXATTR,
SPA_FEATURE_HEAD_ERRLOG,
+ SPA_FEATURE_BLAKE3,
SPA_FEATURES
} spa_feature_t;
diff --git a/sys/contrib/openzfs/lib/libicp/Makefile.am b/sys/contrib/openzfs/lib/libicp/Makefile.am
index 304f49e39005..b7f1d0e1b1e4 100644
--- a/sys/contrib/openzfs/lib/libicp/Makefile.am
+++ b/sys/contrib/openzfs/lib/libicp/Makefile.am
@@ -13,6 +13,10 @@ nodist_libicp_la_SOURCES = \
module/icp/algs/aes/aes_impl_x86-64.c \
module/icp/algs/aes/aes_impl.c \
module/icp/algs/aes/aes_modes.c \
+ module/icp/algs/blake3/blake3.c \
+ module/icp/algs/blake3/blake3_generic.c \
+ module/icp/algs/blake3/blake3_impl.c \
+ module/icp/algs/blake3/blake3_x86-64.c \
module/icp/algs/edonr/edonr.c \
module/icp/algs/modes/modes.c \
module/icp/algs/modes/cbc.c \
@@ -36,15 +40,30 @@ nodist_libicp_la_SOURCES = \
module/icp/core/kcf_mech_tabs.c \
module/icp/core/kcf_prov_tabs.c
-if TARGET_CPU_X86_64
+if TARGET_CPU_AARCH64
+nodist_libicp_la_SOURCES += \
+ module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S \
+ module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S
+endif
+
+if TARGET_CPU_POWERPC
nodist_libicp_la_SOURCES += \
- module/icp/asm-x86_64/aes/aeskey.c
+ module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S \
+ module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S
+endif
+
+if TARGET_CPU_X86_64
nodist_libicp_la_SOURCES += \
+ module/icp/asm-x86_64/aes/aeskey.c \
module/icp/asm-x86_64/aes/aes_amd64.S \
module/icp/asm-x86_64/aes/aes_aesni.S \
module/icp/asm-x86_64/modes/gcm_pclmulqdq.S \
module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S \
module/icp/asm-x86_64/modes/ghash-x86_64.S \
module/icp/asm-x86_64/sha2/sha256_impl.S \
- module/icp/asm-x86_64/sha2/sha512_impl.S
+ module/icp/asm-x86_64/sha2/sha512_impl.S \
+ module/icp/asm-x86_64/blake3/blake3_avx2.S \
+ module/icp/asm-x86_64/blake3/blake3_avx512.S \
+ module/icp/asm-x86_64/blake3/blake3_sse2.S \
+ module/icp/asm-x86_64/blake3/blake3_sse41.S
endif
diff --git a/sys/contrib/openzfs/lib/libspl/include/sys/simd.h b/sys/contrib/openzfs/lib/libspl/include/sys/simd.h
index 6ef836c16e5c..6a6d8b7c6191 100644
--- a/sys/contrib/openzfs/lib/libspl/include/sys/simd.h
+++ b/sys/contrib/openzfs/lib/libspl/include/sys/simd.h
@@ -491,6 +491,24 @@ zfs_altivec_available(void)
#endif
return (has_altivec);
}
+static inline boolean_t
+zfs_vsx_available(void)
+{
+ boolean_t has_vsx = B_FALSE;
+#if defined(__ALTIVEC__) && !defined(__FreeBSD__)
+ sighandler_t savesig;
+ savesig = signal(SIGILL, sigillhandler);
+ if (setjmp(env)) {
+ signal(SIGILL, savesig);
+ has_vsx = B_FALSE;
+ } else {
+ __asm__ __volatile__("xssubsp 0,0,0\n");
+ signal(SIGILL, savesig);
+ has_vsx = B_TRUE;
+ }
+#endif
+ return (has_vsx);
+}
#else
#define kfpu_allowed() 0
diff --git a/sys/contrib/openzfs/lib/libspl/include/sys/types.h b/sys/contrib/openzfs/lib/libspl/include/sys/types.h
index f32c2188a111..8dc38ae3394f 100644
--- a/sys/contrib/openzfs/lib/libspl/include/sys/types.h
+++ b/sys/contrib/openzfs/lib/libspl/include/sys/types.h
@@ -44,7 +44,7 @@
#include <inttypes.h>
#endif /* HAVE_INTTYPES */
-typedef int zoneid_t;
+typedef uint_t zoneid_t;
typedef int projid_t;
/*
diff --git a/sys/contrib/openzfs/lib/libspl/include/zone.h b/sys/contrib/openzfs/lib/libspl/include/zone.h
index b0ac2d9bc610..0af4e7a2fa49 100644
--- a/sys/contrib/openzfs/lib/libspl/include/zone.h
+++ b/sys/contrib/openzfs/lib/libspl/include/zone.h
@@ -33,7 +33,17 @@
extern "C" {
#endif
-#define GLOBAL_ZONEID 0
+#ifdef __FreeBSD__
+#define GLOBAL_ZONEID 0
+#else
+/*
+ * Hardcoded in the kernel's root user namespace. A "better" way to get
+ * this would be by using ioctl_ns(2), but this would need to be performed
+ * recursively on NS_GET_PARENT and then NS_GET_USERNS. Also, that's only
+ * supported since Linux 4.9.
+ */
+#define GLOBAL_ZONEID 4026531837U
+#endif
extern zoneid_t getzoneid(void);
diff --git a/sys/contrib/openzfs/lib/libspl/os/linux/zone.c b/sys/contrib/openzfs/lib/libspl/os/linux/zone.c
index 393a16ad5cdd..65c02dfe7aab 100644
--- a/sys/contrib/openzfs/lib/libspl/os/linux/zone.c
+++ b/sys/contrib/openzfs/lib/libspl/os/linux/zone.c
@@ -23,10 +23,40 @@
* Use is subject to license terms.
*/
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+
#include <zone.h>
zoneid_t
getzoneid(void)
{
- return (GLOBAL_ZONEID);
+ char path[PATH_MAX];
+ char buf[128] = { '\0' };
+ char *cp;
+
+ int c = snprintf(path, sizeof (path), "/proc/self/ns/user");
+ /* This API doesn't have any error checking... */
+ if (c < 0)
+ return (0);
+
+ ssize_t r = readlink(path, buf, sizeof (buf) - 1);
+ if (r < 0)
+ return (0);
+
+ cp = strchr(buf, '[');
+ if (cp == NULL)
+ return (0);
+ cp++;
+
+ unsigned long n = strtoul(cp, NULL, 10);
+ if (n == ULONG_MAX && errno == ERANGE)
+ return (0);
+ zoneid_t z = (zoneid_t)n;
+
+ return (z);
}
diff --git a/sys/contrib/openzfs/lib/libuutil/libuutil.abi b/sys/contrib/openzfs/lib/libuutil/libuutil.abi
index 86220b44b229..766d8843000d 100644
--- a/sys/contrib/openzfs/lib/libuutil/libuutil.abi
+++ b/sys/contrib/openzfs/lib/libuutil/libuutil.abi
@@ -1081,7 +1081,7 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='os/linux/zone.c' language='LANG_C99'>
- <typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
+ <typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
<return type-id='4da03624'/>
</function-decl>
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
index 8a71da95148e..fb5e01b82c40 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
@@ -433,6 +433,7 @@
<elf-symbol name='zfs_unmountall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_unshare' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_unshareall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+ <elf-symbol name='zfs_userns' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_userspace' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_valid_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_version_kernel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@@ -583,7 +584,7 @@
<elf-symbol name='fletcher_4_superscalar_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='sa_protocol_names' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
- <elf-symbol name='spa_feature_table' size='2016' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+ <elf-symbol name='spa_feature_table' size='2072' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@@ -1537,7 +1538,7 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='lib/libspl/os/linux/zone.c' language='LANG_C99'>
- <typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
+ <typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
<return type-id='4da03624'/>
</function-decl>
@@ -4414,6 +4415,12 @@
<function-decl name='zfs_version_kernel' mangled-name='zfs_version_kernel' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_kernel'>
<return type-id='26a90f95'/>
</function-decl>
+ <function-decl name='zfs_userns' mangled-name='zfs_userns' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_userns'>
+ <parameter type-id='9200a744' name='zhp'/>
+ <parameter type-id='80f4b756' name='nspath'/>
+ <parameter type-id='95e97e5e' name='attach'/>
+ <return type-id='95e97e5e'/>
+ </function-decl>
</abi-instr>
<abi-instr address-size='64' path='lib/libzutil/os/linux/zutil_device_path_os.c' language='LANG_C99'>
<function-decl name='zfs_append_partition' mangled-name='zfs_append_partition' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_append_partition'>
@@ -4770,8 +4777,8 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'>
- <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='16128' id='9d5e9e2e'>
- <subrange length='36' type-id='7359adad' id='ae666bde'/>
+ <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='16576' id='9d5e9e2e'>
+ <subrange length='37' type-id='7359adad' id='ae666bde'/>
</array-type-def>
<enum-decl name='spa_feature' id='33ecb627'>
<underlying-type type-id='9cac1fee'/>
@@ -4812,7 +4819,8 @@
<enumerator name='SPA_FEATURE_DRAID' value='33'/>
<enumerator name='SPA_FEATURE_ZILSAXATTR' value='34'/>
<enumerator name='SPA_FEATURE_HEAD_ERRLOG' value='35'/>
- <enumerator name='SPA_FEATURES' value='36'/>
+ <enumerator name='SPA_FEATURE_BLAKE3' value='36'/>
+ <enumerator name='SPA_FEATURES' value='37'/>
</enum-decl>
<typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
<enum-decl name='zfeature_flags' id='6db816a4'>
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c b/sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c
index 1effe74f33d6..737b3f4dccd7 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c
@@ -1003,7 +1003,7 @@ zfs_crypto_create(libzfs_handle_t *hdl, char *parent_name, nvlist_t *props,
uint_t *wkeylen_out)
{
int ret;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
uint64_t crypt = ZIO_CRYPT_INHERIT, pcrypt = ZIO_CRYPT_INHERIT;
uint64_t keyformat = ZFS_KEYFORMAT_NONE;
char *keylocation = NULL;
@@ -1174,7 +1174,7 @@ zfs_crypto_clone_check(libzfs_handle_t *hdl, zfs_handle_t *origin_zhp,
char *parent_name, nvlist_t *props)
{
(void) origin_zhp, (void) parent_name;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "Encryption clone error"));
@@ -1276,7 +1276,7 @@ zfs_crypto_load_key(zfs_handle_t *zhp, boolean_t noop,
const char *alt_keylocation)
{
int ret, attempts = 0;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
uint64_t keystatus, iters = 0, salt = 0;
uint64_t keyformat = ZFS_KEYFORMAT_NONE;
char prop_keylocation[MAXNAMELEN];
@@ -1444,7 +1444,7 @@ int
zfs_crypto_unload_key(zfs_handle_t *zhp)
{
int ret;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
char prop_encroot[MAXNAMELEN];
uint64_t keystatus, keyformat;
boolean_t is_encroot;
@@ -1580,7 +1580,7 @@ int
zfs_crypto_rewrap(zfs_handle_t *zhp, nvlist_t *raw_props, boolean_t inheritkey)
{
int ret;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
boolean_t is_encroot;
nvlist_t *props = NULL;
uint8_t *wkeydata = NULL;
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c
index 1f6addcae58a..24d9b81f4976 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c
@@ -678,7 +678,7 @@ zfs_handle_t *
zfs_open(libzfs_handle_t *hdl, const char *path, int types)
{
zfs_handle_t *zhp;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
char *bookp;
(void) snprintf(errbuf, sizeof (errbuf),
@@ -1022,7 +1022,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
const char *propname = nvpair_name(elem);
prop = zfs_name_to_prop(propname);
- if (prop == ZPROP_INVAL && zfs_prop_user(propname)) {
+ if (prop == ZPROP_USERPROP && zfs_prop_user(propname)) {
/*
* This is a user property: make sure it's a
* string, and that it's less than ZAP_MAXNAMELEN.
@@ -1061,7 +1061,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
goto error;
}
- if (prop == ZPROP_INVAL && zfs_prop_userquota(propname)) {
+ if (prop == ZPROP_USERPROP && zfs_prop_userquota(propname)) {
zfs_userquota_prop_t uqtype;
char *newpropname = NULL;
char domain[128];
@@ -1143,7 +1143,8 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
}
free(newpropname);
continue;
- } else if (prop == ZPROP_INVAL && zfs_prop_written(propname)) {
+ } else if (prop == ZPROP_USERPROP &&
+ zfs_prop_written(propname)) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"'%s' is readonly"),
propname);
@@ -1716,7 +1717,7 @@ int
zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
{
int ret = -1;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
libzfs_handle_t *hdl = zhp->zfs_hdl;
nvlist_t *nvl = NULL;
@@ -1750,7 +1751,7 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props)
int ret = -1;
prop_changelist_t **cls = NULL;
int cl_idx;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
libzfs_handle_t *hdl = zhp->zfs_hdl;
nvlist_t *nvl;
int nvl_len = 0;
@@ -1930,14 +1931,14 @@ zfs_prop_inherit(zfs_handle_t *zhp, const char *propname, boolean_t received)
int ret;
prop_changelist_t *cl;
libzfs_handle_t *hdl = zhp->zfs_hdl;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
zfs_prop_t prop;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot inherit %s for '%s'"), propname, zhp->zfs_name);
zc.zc_cookie = received;
- if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
+ if ((prop = zfs_name_to_prop(propname)) == ZPROP_USERPROP) {
/*
* For user properties, the amount of work we have to do is very
* small, so just do it here.
@@ -2356,7 +2357,7 @@ zfs_prop_get_recvd(zfs_handle_t *zhp, const char *propname, char *propbuf,
prop = zfs_name_to_prop(propname);
- if (prop != ZPROP_INVAL) {
+ if (prop != ZPROP_USERPROP) {
uint64_t cookie;
if (!nvlist_exists(zhp->zfs_recvd_props, propname))
return (-1);
@@ -3402,7 +3403,7 @@ check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
char parent[ZFS_MAX_DATASET_NAME_LEN];
char *slash;
zfs_handle_t *zhp;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
uint64_t is_zoned;
(void) snprintf(errbuf, sizeof (errbuf),
@@ -3580,7 +3581,7 @@ zfs_create_ancestors(libzfs_handle_t *hdl, const char *path)
{
int prefix;
char *path_copy;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
int rc = 0;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
@@ -3624,7 +3625,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
zpool_handle_t *zpool_handle;
uint8_t *wkeydata = NULL;
uint_t wkeylen = 0;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
char parent[ZFS_MAX_DATASET_NAME_LEN];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
@@ -3897,7 +3898,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer)
}
if (nvlist_empty(errlist)) {
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot destroy snapshots"));
@@ -3905,7 +3906,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer)
}
for (pair = nvlist_next_nvpair(errlist, NULL);
pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) {
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"),
nvpair_name(pair));
@@ -3934,7 +3935,7 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
{
char parent[ZFS_MAX_DATASET_NAME_LEN];
int ret;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
libzfs_handle_t *hdl = zhp->zfs_hdl;
uint64_t zoned;
@@ -4018,7 +4019,7 @@ zfs_promote(zfs_handle_t *zhp)
libzfs_handle_t *hdl = zhp->zfs_hdl;
char snapname[ZFS_MAX_DATASET_NAME_LEN];
int ret;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot promote '%s'"), zhp->zfs_name);
@@ -4100,7 +4101,7 @@ int
zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props)
{
int ret;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
nvpair_t *elem;
nvlist_t *errors;
zpool_handle_t *zpool_hdl;
@@ -4185,7 +4186,7 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
char fsname[ZFS_MAX_DATASET_NAME_LEN];
char *cp;
zfs_handle_t *zhp;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot snapshot %s"), path);
@@ -4328,7 +4329,7 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
*/
err = lzc_rollback_to(zhp->zfs_name, snap->zfs_name);
if (err != 0) {
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot rollback '%s'"),
@@ -4387,7 +4388,7 @@ zfs_rename(zfs_handle_t *zhp, const char *target, renameflags_t flags)
char parent[ZFS_MAX_DATASET_NAME_LEN];
char property[ZFS_MAXPROPLEN];
libzfs_handle_t *hdl = zhp->zfs_hdl;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
/* if we have the same exact name, just return success */
if (strcmp(zhp->zfs_name, target) == 0)
@@ -4635,7 +4636,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received,
*/
start = plp;
while (*start != NULL) {
- if ((*start)->pl_prop == ZPROP_INVAL)
+ if ((*start)->pl_prop == ZPROP_USERPROP)
break;
start = &(*start)->pl_next;
}
@@ -4656,7 +4657,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received,
entry = zfs_alloc(hdl, sizeof (zprop_list_t));
entry->pl_user_prop =
zfs_strdup(hdl, nvpair_name(elem));
- entry->pl_prop = ZPROP_INVAL;
+ entry->pl_prop = ZPROP_USERPROP;
entry->pl_width = strlen(nvpair_name(elem));
entry->pl_all = B_TRUE;
*last = entry;
@@ -4671,7 +4672,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received,
if (entry->pl_fixed && !literal)
continue;
- if (entry->pl_prop != ZPROP_INVAL) {
+ if (entry->pl_prop != ZPROP_USERPROP) {
if (zfs_prop_get(zhp, entry->pl_prop,
buf, sizeof (buf), NULL, NULL, 0, literal) == 0) {
if (strlen(buf) > entry->pl_width)
@@ -4720,13 +4721,14 @@ zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props)
next = nvlist_next_nvpair(zhp->zfs_props, curr);
/*
- * User properties will result in ZPROP_INVAL, and since we
+ * User properties will result in ZPROP_USERPROP (an alias
+ * for ZPROP_INVAL), and since we
* only know how to prune standard ZFS properties, we always
* leave these in the list. This can also happen if we
* encounter an unknown DSL property (when running older
* software, for example).
*/
- if (zfs_prop != ZPROP_INVAL && props[zfs_prop] == B_FALSE)
+ if (zfs_prop != ZPROP_USERPROP && props[zfs_prop] == B_FALSE)
(void) nvlist_remove(zhp->zfs_props,
nvpair_name(curr), nvpair_type(curr));
curr = next;
@@ -4902,7 +4904,7 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
(void) zfs_hold_one(zfs_handle_dup(zhp), &ha);
if (nvlist_empty(ha.nvl)) {
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
fnvlist_free(ha.nvl);
ret = ENOENT;
@@ -4926,7 +4928,7 @@ zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds)
int ret;
nvlist_t *errors;
libzfs_handle_t *hdl = zhp->zfs_hdl;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
nvpair_t *elem;
errors = NULL;
@@ -5028,7 +5030,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
nvlist_t *errors = NULL;
nvpair_t *elem;
libzfs_handle_t *hdl = zhp->zfs_hdl;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
ha.nvl = fnvlist_alloc();
ha.snapname = snapname;
@@ -5108,7 +5110,7 @@ zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl)
int nvsz = 2048;
void *nvbuf;
int err = 0;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
@@ -5172,7 +5174,7 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl)
zfs_cmd_t zc = {"\0"};
libzfs_handle_t *hdl = zhp->zfs_hdl;
char *nvbuf;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
size_t nvsz;
int err;
@@ -5224,7 +5226,7 @@ int
zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
{
int err;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
err = lzc_get_holds(zhp->zfs_name, nvl);
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c b/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c
index a414024bb427..f0e9a262e6bf 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c
@@ -709,7 +709,7 @@ zfs_show_diffs(zfs_handle_t *zhp, int outfd, const char *fromsnap,
const char *tosnap, int flags)
{
zfs_cmd_t zc = {"\0"};
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
differ_info_t di = { 0 };
pthread_t tid;
int pipefd[2];
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_impl.h b/sys/contrib/openzfs/lib/libzfs/libzfs_impl.h
index 926e14a3716c..38104fb5cfa4 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_impl.h
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_impl.h
@@ -44,6 +44,8 @@
extern "C" {
#endif
+#define ERRBUFLEN 1024
+
struct libzfs_handle {
int libzfs_error;
int libzfs_fd;
@@ -208,7 +210,7 @@ typedef struct differ_info {
char *ds;
char *dsmnt;
char *tmpsnap;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
boolean_t isclone;
boolean_t scripted;
boolean_t classify;
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
index 730e6db53f1b..8333e0e786eb 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
@@ -776,7 +776,7 @@ zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
{
zfs_cmd_t zc = {"\0"};
int ret = -1;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
nvlist_t *nvl = NULL;
nvlist_t *realprops;
uint64_t version;
@@ -854,7 +854,7 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp,
for (i = 0; i < SPA_FEATURES; i++) {
zprop_list_t *entry = zfs_alloc(hdl,
sizeof (zprop_list_t));
- entry->pl_prop = ZPROP_INVAL;
+ entry->pl_prop = ZPROP_USERPROP;
entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
spa_feature_table[i].fi_uname);
entry->pl_width = strlen(entry->pl_user_prop);
@@ -898,7 +898,7 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp,
}
entry = zfs_alloc(hdl, sizeof (zprop_list_t));
- entry->pl_prop = ZPROP_INVAL;
+ entry->pl_prop = ZPROP_USERPROP;
entry->pl_user_prop = propname;
entry->pl_width = strlen(entry->pl_user_prop);
entry->pl_all = B_TRUE;
@@ -911,7 +911,7 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp,
if (entry->pl_fixed && !literal)
continue;
- if (entry->pl_prop != ZPROP_INVAL &&
+ if (entry->pl_prop != ZPROP_USERPROP &&
zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
NULL, literal) == 0) {
if (strlen(buf) > entry->pl_width)
@@ -967,7 +967,7 @@ vdev_expand_proplist(zpool_handle_t *zhp, const char *vdevname,
/* Skip properties that are not user defined */
if ((prop = vdev_name_to_prop(propname)) !=
- VDEV_PROP_USER)
+ VDEV_PROP_USERPROP)
continue;
if (nvpair_value_nvlist(elem, &propval) != 0)
@@ -1368,14 +1368,14 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
nvlist_t *hidden_args = NULL;
uint8_t *wkeydata = NULL;
uint_t wkeylen = 0;
- char msg[1024];
+ char errbuf[ERRBUFLEN];
int ret = -1;
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot create '%s'"), pool);
if (!zpool_name_valid(hdl, B_FALSE, pool))
- return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
+ return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
zcmd_write_conf_nvlist(hdl, &zc, nvroot);
@@ -1383,7 +1383,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
if ((zc_props = zpool_valid_proplist(hdl, pool, props,
- SPA_VERSION_1, flags, msg)) == NULL) {
+ SPA_VERSION_1, flags, errbuf)) == NULL) {
goto create_failed;
}
}
@@ -1397,7 +1397,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
strcmp(zonestr, "on") == 0);
if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
- fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) {
+ fsprops, zoned, NULL, NULL, B_TRUE, errbuf)) == NULL) {
goto create_failed;
}
@@ -1407,7 +1407,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"%s property requires a special vdev"),
zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS));
- (void) zfs_error(hdl, EZFS_BADPROP, msg);
+ (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
goto create_failed;
}
@@ -1417,7 +1417,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
}
if (zfs_crypto_create(hdl, NULL, zc_fsprops, props, B_TRUE,
&wkeydata, &wkeylen) != 0) {
- zfs_error(hdl, EZFS_CRYPTOFAILED, msg);
+ zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
goto create_failed;
}
if (nvlist_add_nvlist(zc_props,
@@ -1465,7 +1465,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
"one or more vdevs refer to the same device, or "
"one of\nthe devices is part of an active md or "
"lvm device"));
- return (zfs_error(hdl, EZFS_BADDEV, msg));
+ return (zfs_error(hdl, EZFS_BADDEV, errbuf));
case ERANGE:
/*
@@ -1480,7 +1480,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
*/
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"record size invalid"));
- return (zfs_error(hdl, EZFS_BADPROP, msg));
+ return (zfs_error(hdl, EZFS_BADPROP, errbuf));
case EOVERFLOW:
/*
@@ -1499,12 +1499,12 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
"one or more devices is less than the "
"minimum size (%s)"), buf);
}
- return (zfs_error(hdl, EZFS_BADDEV, msg));
+ return (zfs_error(hdl, EZFS_BADDEV, errbuf));
case ENOSPC:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"one or more devices is out of space"));
- return (zfs_error(hdl, EZFS_BADDEV, msg));
+ return (zfs_error(hdl, EZFS_BADDEV, errbuf));
case EINVAL:
if (zpool_has_draid_vdev(nvroot) &&
@@ -1512,13 +1512,14 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"dRAID vdevs are unsupported by the "
"kernel"));
- return (zfs_error(hdl, EZFS_BADDEV, msg));
+ return (zfs_error(hdl, EZFS_BADDEV, errbuf));
} else {
- return (zpool_standard_error(hdl, errno, msg));
+ return (zpool_standard_error(hdl, errno,
+ errbuf));
}
default:
- return (zpool_standard_error(hdl, errno, msg));
+ return (zpool_standard_error(hdl, errno, errbuf));
}
}
@@ -1542,7 +1543,7 @@ zpool_destroy(zpool_handle_t *zhp, const char *log_str)
zfs_cmd_t zc = {"\0"};
zfs_handle_t *zfp = NULL;
libzfs_handle_t *hdl = zhp->zpool_hdl;
- char msg[1024];
+ char errbuf[ERRBUFLEN];
if (zhp->zpool_state == POOL_STATE_ACTIVE &&
(zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
@@ -1552,15 +1553,15 @@ zpool_destroy(zpool_handle_t *zhp, const char *log_str)
zc.zc_history = (uint64_t)(uintptr_t)log_str;
if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot destroy '%s'"), zhp->zpool_name);
if (errno == EROFS) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"one or more devices is read only"));
- (void) zfs_error(hdl, EZFS_BADDEV, msg);
+ (void) zfs_error(hdl, EZFS_BADDEV, errbuf);
} else {
- (void) zpool_standard_error(hdl, errno, msg);
+ (void) zpool_standard_error(hdl, errno, errbuf);
}
if (zfp)
@@ -1583,14 +1584,14 @@ int
zpool_checkpoint(zpool_handle_t *zhp)
{
libzfs_handle_t *hdl = zhp->zpool_hdl;
- char msg[1024];
+ char errbuf[ERRBUFLEN];
int error;
error = lzc_pool_checkpoint(zhp->zpool_name);
if (error != 0) {
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot checkpoint '%s'"), zhp->zpool_name);
- (void) zpool_standard_error(hdl, error, msg);
+ (void) zpool_standard_error(hdl, error, errbuf);
return (-1);
}
@@ -1604,14 +1605,14 @@ int
zpool_discard_checkpoint(zpool_handle_t *zhp)
{
libzfs_handle_t *hdl = zhp->zpool_hdl;
- char msg[1024];
+ char errbuf[ERRBUFLEN];
int error;
error = lzc_pool_checkpoint_discard(zhp->zpool_name);
if (error != 0) {
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot discard checkpoint in '%s'"), zhp->zpool_name);
- (void) zpool_standard_error(hdl, error, msg);
+ (void) zpool_standard_error(hdl, error, errbuf);
return (-1);
}
@@ -1628,11 +1629,11 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
zfs_cmd_t zc = {"\0"};
int ret;
libzfs_handle_t *hdl = zhp->zpool_hdl;
- char msg[1024];
+ char errbuf[ERRBUFLEN];
nvlist_t **spares, **l2cache;
uint_t nspares, nl2cache;
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot add to '%s'"), zhp->zpool_name);
if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
@@ -1641,7 +1642,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
&spares, &nspares) == 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
"upgraded to add hot spares"));
- return (zfs_error(hdl, EZFS_BADVERSION, msg));
+ return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
}
if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
@@ -1650,7 +1651,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
&l2cache, &nl2cache) == 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
"upgraded to add cache devices"));
- return (zfs_error(hdl, EZFS_BADVERSION, msg));
+ return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
}
zcmd_write_conf_nvlist(hdl, &zc, nvroot);
@@ -1667,7 +1668,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
*/
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"one or more vdevs refer to the same device"));
- (void) zfs_error(hdl, EZFS_BADDEV, msg);
+ (void) zfs_error(hdl, EZFS_BADDEV, errbuf);
break;
case EINVAL:
@@ -1684,7 +1685,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
"raidz or dRAID vdevs"));
}
- (void) zfs_error(hdl, EZFS_BADDEV, msg);
+ (void) zfs_error(hdl, EZFS_BADDEV, errbuf);
break;
case EOVERFLOW:
@@ -1704,17 +1705,17 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
"device is less than the minimum "
"size (%s)"), buf);
}
- (void) zfs_error(hdl, EZFS_BADDEV, msg);
+ (void) zfs_error(hdl, EZFS_BADDEV, errbuf);
break;
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded to add these vdevs"));
- (void) zfs_error(hdl, EZFS_BADVERSION, msg);
+ (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
break;
default:
- (void) zpool_standard_error(hdl, errno, msg);
+ (void) zpool_standard_error(hdl, errno, errbuf);
}
ret = -1;
@@ -2009,7 +2010,7 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
char *origname;
int ret;
int error = 0;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
origname = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME);
@@ -2516,11 +2517,11 @@ zpool_trim(zpool_handle_t *zhp, pool_trim_func_t cmd_type, nvlist_t *vds,
goto out;
}
} else {
- char msg[1024];
+ char errbuf[ERRBUFLEN];
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "operation failed"));
- zpool_standard_error(zhp->zpool_hdl, err, msg);
+ zpool_standard_error(zhp->zpool_hdl, err, errbuf);
retval = -1;
goto out;
}
@@ -2545,7 +2546,7 @@ int
zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
{
zfs_cmd_t zc = {"\0"};
- char msg[1024];
+ char errbuf[ERRBUFLEN];
int err;
libzfs_handle_t *hdl = zhp->zpool_hdl;
@@ -2568,21 +2569,22 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
if (func == POOL_SCAN_SCRUB) {
if (cmd == POOL_SCRUB_PAUSE) {
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
- "cannot pause scrubbing %s"), zc.zc_name);
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot pause scrubbing %s"),
+ zc.zc_name);
} else {
assert(cmd == POOL_SCRUB_NORMAL);
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
- "cannot scrub %s"), zc.zc_name);
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot scrub %s"),
+ zc.zc_name);
}
} else if (func == POOL_SCAN_RESILVER) {
assert(cmd == POOL_SCRUB_NORMAL);
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot restart resilver on %s"), zc.zc_name);
} else if (func == POOL_SCAN_NONE) {
- (void) snprintf(msg, sizeof (msg),
- dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
- zc.zc_name);
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot cancel scrubbing %s"), zc.zc_name);
} else {
assert(!"unexpected result");
}
@@ -2599,18 +2601,19 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
if (ps && ps->pss_func == POOL_SCAN_SCRUB &&
ps->pss_state == DSS_SCANNING) {
if (cmd == POOL_SCRUB_PAUSE)
- return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
+ return (zfs_error(hdl, EZFS_SCRUB_PAUSED,
+ errbuf));
else
- return (zfs_error(hdl, EZFS_SCRUBBING, msg));
+ return (zfs_error(hdl, EZFS_SCRUBBING, errbuf));
} else {
- return (zfs_error(hdl, EZFS_RESILVERING, msg));
+ return (zfs_error(hdl, EZFS_RESILVERING, errbuf));
}
} else if (err == ENOENT) {
- return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
+ return (zfs_error(hdl, EZFS_NO_SCRUB, errbuf));
} else if (err == ENOTSUP && func == POOL_SCAN_RESILVER) {
- return (zfs_error(hdl, EZFS_NO_RESILVER_DEFER, msg));
+ return (zfs_error(hdl, EZFS_NO_RESILVER_DEFER, errbuf));
} else {
- return (zpool_standard_error(hdl, err, msg));
+ return (zpool_standard_error(hdl, err, errbuf));
}
}
@@ -3087,28 +3090,28 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
vdev_state_t *newstate)
{
zfs_cmd_t zc = {"\0"};
- char msg[1024];
+ char errbuf[ERRBUFLEN];
nvlist_t *tgt;
boolean_t avail_spare, l2cache, islog;
libzfs_handle_t *hdl = zhp->zpool_hdl;
if (flags & ZFS_ONLINE_EXPAND) {
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
} else {
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot online %s"), path);
}
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
&islog)) == NULL)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
+ return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
if (avail_spare)
- return (zfs_error(hdl, EZFS_ISSPARE, msg));
+ return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
#ifndef __FreeBSD__
char *pathname;
@@ -3126,7 +3129,7 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
if (l2cache) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"cannot expand cache devices"));
- return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
+ return (zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf));
}
if (wholedisk) {
@@ -3139,12 +3142,12 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
sizeof (buf));
if (error != 0)
return (zfs_error(hdl, EZFS_NODEVICE,
- msg));
+ errbuf));
fullpath = buf;
}
- error = zpool_relabel_disk(hdl, fullpath, msg);
+ error = zpool_relabel_disk(hdl, fullpath, errbuf);
if (error != 0)
return (error);
}
@@ -3159,9 +3162,9 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
"from this pool into a new one. Use '%s' "
"instead"), "zpool detach");
- return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
+ return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, errbuf));
}
- return (zpool_standard_error(hdl, errno, msg));
+ return (zpool_standard_error(hdl, errno, errbuf));
}
*newstate = zc.zc_cookie;
@@ -3175,23 +3178,23 @@ int
zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
{
zfs_cmd_t zc = {"\0"};
- char msg[1024];
+ char errbuf[ERRBUFLEN];
nvlist_t *tgt;
boolean_t avail_spare, l2cache;
libzfs_handle_t *hdl = zhp->zpool_hdl;
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
NULL)) == NULL)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
+ return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
if (avail_spare)
- return (zfs_error(hdl, EZFS_ISSPARE, msg));
+ return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
zc.zc_cookie = VDEV_STATE_OFFLINE;
zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
@@ -3205,16 +3208,16 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
/*
* There are no other replicas of this device.
*/
- return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
+ return (zfs_error(hdl, EZFS_NOREPLICAS, errbuf));
case EEXIST:
/*
* The log device has unplayed logs
*/
- return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
+ return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, errbuf));
default:
- return (zpool_standard_error(hdl, errno, msg));
+ return (zpool_standard_error(hdl, errno, errbuf));
}
}
@@ -3225,10 +3228,10 @@ int
zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
{
zfs_cmd_t zc = {"\0"};
- char msg[1024];
+ char errbuf[ERRBUFLEN];
libzfs_handle_t *hdl = zhp->zpool_hdl;
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
@@ -3245,10 +3248,10 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
/*
* There are no other replicas of this device.
*/
- return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
+ return (zfs_error(hdl, EZFS_NOREPLICAS, errbuf));
default:
- return (zpool_standard_error(hdl, errno, msg));
+ return (zpool_standard_error(hdl, errno, errbuf));
}
}
@@ -3260,10 +3263,10 @@ int
zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
{
zfs_cmd_t zc = {"\0"};
- char msg[1024];
+ char errbuf[ERRBUFLEN];
libzfs_handle_t *hdl = zhp->zpool_hdl;
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
@@ -3274,7 +3277,7 @@ zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
return (0);
- return (zpool_standard_error(hdl, errno, msg));
+ return (zpool_standard_error(hdl, errno, errbuf));
}
/*
@@ -3312,7 +3315,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
const char *new_disk, nvlist_t *nvroot, int replacing, boolean_t rebuild)
{
zfs_cmd_t zc = {"\0"};
- char msg[1024];
+ char errbuf[ERRBUFLEN];
int ret;
nvlist_t *tgt;
boolean_t avail_spare, l2cache, islog;
@@ -3324,22 +3327,22 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
libzfs_handle_t *hdl = zhp->zpool_hdl;
if (replacing)
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot replace %s with %s"), old_disk, new_disk);
else
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot attach %s to %s"), new_disk, old_disk);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
&islog)) == NULL)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
+ return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
if (avail_spare)
- return (zfs_error(hdl, EZFS_ISSPARE, msg));
+ return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
if (l2cache)
- return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
+ return (zfs_error(hdl, EZFS_ISL2CACHE, errbuf));
zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
zc.zc_cookie = replacing;
@@ -3349,14 +3352,14 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
zfeature_lookup_guid("org.openzfs:device_rebuild", NULL) != 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"the loaded zfs module doesn't support device rebuilds"));
- return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
+ return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
}
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0 || children != 1) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"new device must be a single disk"));
- return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
+ return (zfs_error(hdl, EZFS_INVALCONFIG, errbuf));
}
config_root = fnvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
@@ -3377,7 +3380,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"can only be replaced by another hot spare"));
free(newname);
- return (zfs_error(hdl, EZFS_BADTARGET, msg));
+ return (zfs_error(hdl, EZFS_BADTARGET, errbuf));
}
free(newname);
@@ -3435,7 +3438,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
"disks"));
}
}
- (void) zfs_error(hdl, EZFS_BADTARGET, msg);
+ (void) zfs_error(hdl, EZFS_BADTARGET, errbuf);
break;
case EINVAL:
@@ -3444,14 +3447,14 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
*/
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"new device must be a single disk"));
- (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
+ (void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
break;
case EBUSY:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, "
"or device removal is in progress"),
new_disk);
- (void) zfs_error(hdl, EZFS_BADDEV, msg);
+ (void) zfs_error(hdl, EZFS_BADDEV, errbuf);
break;
case EOVERFLOW:
@@ -3460,7 +3463,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
*/
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"device is too small"));
- (void) zfs_error(hdl, EZFS_BADDEV, msg);
+ (void) zfs_error(hdl, EZFS_BADDEV, errbuf);
break;
case EDOM:
@@ -3470,18 +3473,18 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"new device has a different optimal sector size; use the "
"option '-o ashift=N' to override the optimal size"));
- (void) zfs_error(hdl, EZFS_BADDEV, msg);
+ (void) zfs_error(hdl, EZFS_BADDEV, errbuf);
break;
case ENAMETOOLONG:
/*
* The resulting top-level vdev spec won't fit in the label.
*/
- (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
+ (void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf);
break;
default:
- (void) zpool_standard_error(hdl, errno, msg);
+ (void) zpool_standard_error(hdl, errno, errbuf);
}
return (-1);
@@ -3494,24 +3497,24 @@ int
zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
{
zfs_cmd_t zc = {"\0"};
- char msg[1024];
+ char errbuf[ERRBUFLEN];
nvlist_t *tgt;
boolean_t avail_spare, l2cache;
libzfs_handle_t *hdl = zhp->zpool_hdl;
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
NULL)) == NULL)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
+ return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
if (avail_spare)
- return (zfs_error(hdl, EZFS_ISSPARE, msg));
+ return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
if (l2cache)
- return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
+ return (zfs_error(hdl, EZFS_ISL2CACHE, errbuf));
zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
@@ -3526,18 +3529,18 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
*/
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
"applicable to mirror and replacing vdevs"));
- (void) zfs_error(hdl, EZFS_BADTARGET, msg);
+ (void) zfs_error(hdl, EZFS_BADTARGET, errbuf);
break;
case EBUSY:
/*
* There are no other replicas of this device.
*/
- (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
+ (void) zfs_error(hdl, EZFS_NOREPLICAS, errbuf);
break;
default:
- (void) zpool_standard_error(hdl, errno, msg);
+ (void) zpool_standard_error(hdl, errno, errbuf);
}
return (-1);
@@ -3592,7 +3595,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
nvlist_t *props, splitflags_t flags)
{
zfs_cmd_t zc = {"\0"};
- char msg[1024], *bias;
+ char errbuf[ERRBUFLEN], *bias;
nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
nvlist_t **varray = NULL, *zc_props = NULL;
uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
@@ -3601,11 +3604,11 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
boolean_t freelist = B_FALSE, memory_err = B_TRUE;
int retval = 0;
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
if (!zpool_name_valid(hdl, B_FALSE, newname))
- return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
+ return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
if ((config = zpool_get_config(zhp, NULL)) == NULL) {
(void) fprintf(stderr, gettext("Internal error: unable to "
@@ -3619,7 +3622,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
if (props) {
prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
- props, vers, flags, msg)) == NULL)
+ props, vers, flags, errbuf)) == NULL)
return (-1);
(void) nvlist_lookup_uint64(zc_props,
zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
@@ -3691,7 +3694,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
} else if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"Source pool must be composed only of mirrors\n"));
- retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
+ retval = zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
goto out;
}
@@ -3739,7 +3742,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
if (found != newchildren) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
"include at most one disk from each mirror"));
- retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
+ retval = zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
goto out;
}
@@ -3793,7 +3796,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
zcmd_write_src_nvlist(hdl, &zc, zc_props);
if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
- retval = zpool_standard_error(hdl, errno, msg);
+ retval = zpool_standard_error(hdl, errno, errbuf);
goto out;
}
@@ -3832,31 +3835,31 @@ int
zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
{
zfs_cmd_t zc = {"\0"};
- char msg[1024];
+ char errbuf[ERRBUFLEN];
nvlist_t *tgt;
boolean_t avail_spare, l2cache, islog;
libzfs_handle_t *hdl = zhp->zpool_hdl;
uint64_t version;
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
if (zpool_is_draid_spare(path)) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"dRAID spares cannot be removed"));
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
+ return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
}
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
&islog)) == NULL)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
+ return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
if (islog && version < SPA_VERSION_HOLES) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded to support log removal"));
- return (zfs_error(hdl, EZFS_BADVERSION, msg));
+ return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
}
zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
@@ -3870,7 +3873,7 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"invalid config; all top-level vdevs must "
"have the same sector size and not be raidz."));
- (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
+ (void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
break;
case EBUSY:
@@ -3881,21 +3884,21 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"Pool busy; removal may already be in progress"));
}
- (void) zfs_error(hdl, EZFS_BUSY, msg);
+ (void) zfs_error(hdl, EZFS_BUSY, errbuf);
break;
case EACCES:
if (islog) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"Mount encrypted datasets to replay logs."));
- (void) zfs_error(hdl, EZFS_BUSY, msg);
+ (void) zfs_error(hdl, EZFS_BUSY, errbuf);
} else {
- (void) zpool_standard_error(hdl, errno, msg);
+ (void) zpool_standard_error(hdl, errno, errbuf);
}
break;
default:
- (void) zpool_standard_error(hdl, errno, msg);
+ (void) zpool_standard_error(hdl, errno, errbuf);
}
return (-1);
}
@@ -3904,10 +3907,10 @@ int
zpool_vdev_remove_cancel(zpool_handle_t *zhp)
{
zfs_cmd_t zc = {{0}};
- char msg[1024];
+ char errbuf[ERRBUFLEN];
libzfs_handle_t *hdl = zhp->zpool_hdl;
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot cancel removal"));
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
@@ -3916,25 +3919,25 @@ zpool_vdev_remove_cancel(zpool_handle_t *zhp)
if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
return (0);
- return (zpool_standard_error(hdl, errno, msg));
+ return (zpool_standard_error(hdl, errno, errbuf));
}
int
zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
uint64_t *sizep)
{
- char msg[1024];
+ char errbuf[ERRBUFLEN];
nvlist_t *tgt;
boolean_t avail_spare, l2cache, islog;
libzfs_handle_t *hdl = zhp->zpool_hdl;
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot determine indirect size of %s"),
path);
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
&islog)) == NULL)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
+ return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
if (avail_spare || l2cache || islog) {
*sizep = 0;
@@ -3944,7 +3947,7 @@ zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
if (nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_INDIRECT_SIZE, sizep) != 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"indirect size not available"));
- return (zfs_error(hdl, EINVAL, msg));
+ return (zfs_error(hdl, EINVAL, errbuf));
}
return (0);
}
@@ -3956,7 +3959,7 @@ int
zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
{
zfs_cmd_t zc = {"\0"};
- char msg[1024];
+ char errbuf[ERRBUFLEN];
nvlist_t *tgt;
zpool_load_policy_t policy;
boolean_t avail_spare, l2cache;
@@ -3965,11 +3968,11 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
int error;
if (path)
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
path);
else
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
zhp->zpool_name);
@@ -3977,14 +3980,14 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
if (path) {
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
&l2cache, NULL)) == NULL)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
+ return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
/*
* Don't allow error clearing for hot spares. Do allow
* error clearing for l2cache devices.
*/
if (avail_spare)
- return (zfs_error(hdl, EZFS_ISSPARE, msg));
+ return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
}
@@ -4014,7 +4017,7 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
}
zcmd_free_nvlists(&zc);
- return (zpool_standard_error(hdl, errno, msg));
+ return (zpool_standard_error(hdl, errno, errbuf));
}
/*
@@ -4024,10 +4027,10 @@ int
zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
{
zfs_cmd_t zc = {"\0"};
- char msg[1024];
+ char errbuf[ERRBUFLEN];
libzfs_handle_t *hdl = zhp->zpool_hdl;
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
(u_longlong_t)guid);
@@ -4038,7 +4041,7 @@ zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
return (0);
- return (zpool_standard_error(hdl, errno, msg));
+ return (zpool_standard_error(hdl, errno, errbuf));
}
/*
@@ -4047,18 +4050,18 @@ zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
int
zpool_reguid(zpool_handle_t *zhp)
{
- char msg[1024];
+ char errbuf[ERRBUFLEN];
libzfs_handle_t *hdl = zhp->zpool_hdl;
zfs_cmd_t zc = {"\0"};
- (void) snprintf(msg, sizeof (msg),
+ (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
return (0);
- return (zpool_standard_error(hdl, errno, msg));
+ return (zpool_standard_error(hdl, errno, errbuf));
}
/*
@@ -4998,7 +5001,7 @@ zpool_vdev_guid(zpool_handle_t *zhp, const char *vdevname, uint64_t *vdev_guid)
verify(zhp != NULL);
if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "pool is in an unavailable state"));
return (zfs_error(zhp->zpool_hdl, EZFS_POOLUNAVAIL, errbuf));
@@ -5006,7 +5009,7 @@ zpool_vdev_guid(zpool_handle_t *zhp, const char *vdevname, uint64_t *vdev_guid)
if ((tgt = zpool_find_vdev(zhp, vdevname, &avail_spare, &l2cache,
NULL)) == NULL) {
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "can not find %s in %s"),
vdevname, zhp->zpool_name);
@@ -5030,7 +5033,7 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name,
uint64_t intval;
zprop_source_t src = ZPROP_SRC_NONE;
- if (prop == VDEV_PROP_USER) {
+ if (prop == VDEV_PROP_USERPROP) {
/* user property, prop_name must contain the property name */
assert(prop_name != NULL);
if (nvlist_lookup_nvlist(nvprop, prop_name, &nv) == 0) {
@@ -5192,7 +5195,7 @@ zpool_get_vdev_prop(zpool_handle_t *zhp, const char *vdevname, vdev_prop_t prop,
fnvlist_add_uint64(reqnvl, ZPOOL_VDEV_PROPS_GET_VDEV, vdev_guid);
- if (prop != VDEV_PROP_USER) {
+ if (prop != VDEV_PROP_USERPROP) {
/* prop_name overrides prop value */
if (prop_name != NULL)
prop = vdev_name_to_prop(prop_name);
@@ -5216,7 +5219,7 @@ zpool_get_vdev_prop(zpool_handle_t *zhp, const char *vdevname, vdev_prop_t prop,
ret = zpool_get_vdev_prop_value(retprops, prop, prop_name, buf,
len, srctype, literal);
} else {
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot get vdev property %s from"
" %s in %s"), prop_name, vdevname, zhp->zpool_name);
@@ -5254,7 +5257,7 @@ zpool_get_all_vdev_props(zpool_handle_t *zhp, const char *vdevname,
nvlist_free(nvl);
if (ret) {
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot get vdev properties for"
" %s in %s"), vdevname, zhp->zpool_name);
@@ -5295,7 +5298,7 @@ zpool_set_vdev_prop(zpool_handle_t *zhp, const char *vdevname,
return (no_memory(zhp->zpool_hdl));
}
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot set property %s for %s on %s"),
propname, vdevname, zhp->zpool_name);
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c
index 6017b0e43a51..a27446f54da7 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c
@@ -734,7 +734,7 @@ zfs_send_space(zfs_handle_t *zhp, const char *snapname, const char *from,
if (error == 0)
return (0);
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"warning: cannot estimate space for '%s'"), snapname);
@@ -804,7 +804,7 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
}
if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
int error = errno;
(void) snprintf(errbuf, sizeof (errbuf), "%s '%s'",
@@ -1615,7 +1615,7 @@ find_redact_book(libzfs_handle_t *hdl, const char *path,
const uint64_t *redact_snap_guids, int num_redact_snaps,
char **bookname)
{
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
nvlist_t *bmarks;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
@@ -1679,7 +1679,7 @@ static int
zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
int outfd, nvlist_t *resume_nvl)
{
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
char *toname;
char *fromname = NULL;
uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
@@ -1827,7 +1827,7 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
if (flags->progress && send_progress_thread_exit(hdl, tid))
return (-1);
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"warning: cannot send '%s'"), zhp->zfs_name);
@@ -1907,7 +1907,7 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
const char *resume_token)
{
int ret;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
nvlist_t *resume_nvl;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
@@ -1938,7 +1938,7 @@ zfs_send_saved(zfs_handle_t *zhp, sendflags_t *flags, int outfd,
uint64_t saved_guid = 0, resume_guid = 0;
uint64_t obj = 0, off = 0, bytes = 0;
char token_buf[ZFS_MAXPROPLEN];
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"saved send failed"));
@@ -2062,7 +2062,7 @@ send_prelim_records(zfs_handle_t *zhp, const char *from, int fd,
/* short name of snap we are sending */
char *tosnap = "";
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"warning: cannot send '%s'"), zhp->zfs_name);
if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM && zfs_prop_get_int(zhp,
@@ -2187,7 +2187,7 @@ zfs_send_cb_impl(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
void *cb_arg, nvlist_t **debugnvp)
{
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
send_dump_data_t sdd = { 0 };
int err = 0;
nvlist_t *fss = NULL;
@@ -2366,9 +2366,9 @@ zfs_send_cb_impl(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
* there was some error, because it might not be totally
* failed.
*/
- err = send_conclusion_record(outfd, NULL);
- if (err != 0)
- return (zfs_standard_error(zhp->zfs_hdl, err, errbuf));
+ int err2 = send_conclusion_record(outfd, NULL);
+ if (err2 != 0)
+ return (zfs_standard_error(zhp->zfs_hdl, err2, errbuf));
}
return (err || sdd.err);
@@ -2510,7 +2510,7 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
pthread_t ptid;
progress_arg_t pa = { 0 };
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"warning: cannot send '%s'"), name);
@@ -3654,7 +3654,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
char *cp;
char tofs[ZFS_MAX_DATASET_NAME_LEN];
char sendfs[ZFS_MAX_DATASET_NAME_LEN];
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
dmu_replay_record_t drre;
int error;
boolean_t anyerr = B_FALSE;
@@ -3871,7 +3871,7 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
dmu_replay_record_t *drr;
void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
uint64_t payload_size;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot receive"));
@@ -4239,7 +4239,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
int ioctl_err, ioctl_errno, err;
char *cp;
struct drr_begin *drrb = &drr->drr_u.drr_begin;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
const char *chopprefix;
boolean_t newfs = B_FALSE;
boolean_t stream_wantsnewfs, stream_resumingnewfs;
@@ -5107,7 +5107,7 @@ zfs_receive_checkprops(libzfs_handle_t *hdl, nvlist_t *props,
name = nvpair_name(nvp);
prop = zfs_name_to_prop(name);
- if (prop == ZPROP_INVAL) {
+ if (prop == ZPROP_USERPROP) {
if (!zfs_prop_user(name)) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"%s: invalid property '%s'"), errbuf, name);
@@ -5151,7 +5151,7 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
int err;
dmu_replay_record_t drr, drr_noswap;
struct drr_begin *drrb = &drr.drr_u.drr_begin;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
zio_cksum_t zcksum = { { 0 } };
uint64_t featureflags;
int hdrtype;
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
index 1c067e214800..3ab82c350fd5 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
@@ -299,6 +299,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_VDEV_NOTSUP:
return (dgettext(TEXT_DOMAIN, "operation not supported "
"on this type of vdev"));
+ case EZFS_NOT_USER_NAMESPACE:
+ return (dgettext(TEXT_DOMAIN, "the provided file "
+ "was not a user namespace file"));
case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error"));
default:
@@ -485,6 +488,9 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case ZFS_ERR_BADPROP:
zfs_verror(hdl, EZFS_BADPROP, fmt, ap);
break;
+ case ZFS_ERR_NOT_USER_NAMESPACE:
+ zfs_verror(hdl, EZFS_NOT_USER_NAMESPACE, fmt, ap);
+ break;
default:
zfs_error_aux(hdl, "%s", strerror(error));
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
@@ -1276,7 +1282,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
/*
* 'PROPERTY' column
*/
- if (pl->pl_prop != ZPROP_INVAL) {
+ if (pl->pl_prop != ZPROP_USERPROP) {
const char *propname = (type == ZFS_TYPE_POOL) ?
zpool_prop_to_name(pl->pl_prop) :
((type == ZFS_TYPE_VDEV) ?
@@ -1749,7 +1755,7 @@ addlist(libzfs_handle_t *hdl, const char *propname, zprop_list_t **listp,
* Return failure if no property table entry was found and this isn't
* a user-defined property.
*/
- if (prop == ZPROP_INVAL && ((type == ZFS_TYPE_POOL &&
+ if (prop == ZPROP_USERPROP && ((type == ZFS_TYPE_POOL &&
!zpool_prop_feature(propname) &&
!zpool_prop_unsupported(propname)) ||
((type == ZFS_TYPE_DATASET) && !zfs_prop_user(propname) &&
@@ -1764,7 +1770,7 @@ addlist(libzfs_handle_t *hdl, const char *propname, zprop_list_t **listp,
zprop_list_t *entry = zfs_alloc(hdl, sizeof (*entry));
entry->pl_prop = prop;
- if (prop == ZPROP_INVAL) {
+ if (prop == ZPROP_USERPROP) {
entry->pl_user_prop = zfs_strdup(hdl, propname);
entry->pl_width = strlen(propname);
} else {
diff --git a/sys/contrib/openzfs/lib/libzfs/os/freebsd/libzfs_compat.c b/sys/contrib/openzfs/lib/libzfs/os/freebsd/libzfs_compat.c
index 289c6703c2dd..b08b7e2c439b 100644
--- a/sys/contrib/openzfs/lib/libzfs/os/freebsd/libzfs_compat.c
+++ b/sys/contrib/openzfs/lib/libzfs/os/freebsd/libzfs_compat.c
@@ -193,8 +193,6 @@ execvpe(const char *name, char * const argv[], char * const envp[])
return (execvPe(name, path, argv, envp));
}
-#define ERRBUFLEN 1024
-
static __thread char errbuf[ERRBUFLEN];
const char *
diff --git a/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c b/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c
index 570615cfd48c..b135e0146962 100644
--- a/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c
+++ b/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c
@@ -216,7 +216,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name)
size_t resv = EFI_MIN_RESV_SIZE;
uint64_t slice_size;
diskaddr_t start_block;
- char errbuf[1024];
+ char errbuf[ERRBUFLEN];
/* prepare an error message just in case */
(void) snprintf(errbuf, sizeof (errbuf),
diff --git a/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_util_os.c b/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_util_os.c
index 9d6f574a5546..0ab79be4e721 100644
--- a/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_util_os.c
+++ b/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_util_os.c
@@ -19,6 +19,9 @@
* CDDL HEADER END
*/
+/*
+ * Copyright (c) 2021 Klara, Inc.
+ */
#include <alloca.h>
#include <errno.h>
@@ -207,3 +210,71 @@ zfs_version_kernel(void)
ret[read - 1] = '\0';
return (ret);
}
+
+/*
+ * Add or delete the given filesystem to/from the given user namespace.
+ */
+int
+zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach)
+{
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+ zfs_cmd_t zc = {"\0"};
+ char errbuf[1024];
+ unsigned long cmd;
+ int ret;
+
+ if (attach) {
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot add '%s' to namespace"),
+ zhp->zfs_name);
+ } else {
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot remove '%s' from namespace"),
+ zhp->zfs_name);
+ }
+
+ switch (zhp->zfs_type) {
+ case ZFS_TYPE_VOLUME:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "volumes can not be namespaced"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ case ZFS_TYPE_SNAPSHOT:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "snapshots can not be namespaced"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ case ZFS_TYPE_BOOKMARK:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "bookmarks can not be namespaced"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ case ZFS_TYPE_VDEV:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "vdevs can not be namespaced"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ case ZFS_TYPE_INVALID:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid zfs_type_t: ZFS_TYPE_INVALID"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ case ZFS_TYPE_POOL:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pools can not be namespaced"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ case ZFS_TYPE_FILESYSTEM:
+ zfs_fallthrough;
+ }
+ assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
+
+ (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+ zc.zc_objset_type = DMU_OST_ZFS;
+ zc.zc_cleanup_fd = open(nspath, O_RDONLY);
+ if (zc.zc_cleanup_fd < 0) {
+ return (zfs_error(hdl, EZFS_NOT_USER_NAMESPACE, errbuf));
+ }
+
+ cmd = attach ? ZFS_IOC_USERNS_ATTACH : ZFS_IOC_USERNS_DETACH;
+ if ((ret = zfs_ioctl(hdl, cmd, &zc)) != 0)
+ zfs_standard_error(hdl, errno, errbuf);
+
+ (void) close(zc.zc_cleanup_fd);
+
+ return (ret);
+}
diff --git a/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi b/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi
index 266007e4dcad..fae98469a04f 100644
--- a/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi
+++ b/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi
@@ -939,7 +939,7 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='os/linux/zone.c' language='LANG_C99'>
- <typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
+ <typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
<return type-id='4da03624'/>
</function-decl>
diff --git a/sys/contrib/openzfs/lib/libzpool/Makefile.am b/sys/contrib/openzfs/lib/libzpool/Makefile.am
index 60eb30749847..eaa920e56106 100644
--- a/sys/contrib/openzfs/lib/libzpool/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzpool/Makefile.am
@@ -67,6 +67,7 @@ nodist_libzpool_la_SOURCES = \
module/zfs/abd.c \
module/zfs/aggsum.c \
module/zfs/arc.c \
+ module/zfs/blake3_zfs.c \
module/zfs/blkptr.c \
module/zfs/bplist.c \
module/zfs/bpobj.c \
@@ -171,6 +172,7 @@ nodist_libzpool_la_SOURCES = \
module/zfs/zcp_synctask.c \
module/zfs/zfeature.c \
module/zfs/zfs_byteswap.c \
+ module/zfs/zfs_chksum.c \
module/zfs/zfs_fm.c \
module/zfs/zfs_fuid.c \
module/zfs/zfs_ratelimit.c \
diff --git a/sys/contrib/openzfs/man/Makefile.am b/sys/contrib/openzfs/man/Makefile.am
index 8fa21d2fd23e..12f818372f37 100644
--- a/sys/contrib/openzfs/man/Makefile.am
+++ b/sys/contrib/openzfs/man/Makefile.am
@@ -59,9 +59,11 @@ dist_man_MANS = \
%D%/man8/zfs-unjail.8 \
%D%/man8/zfs-unload-key.8 \
%D%/man8/zfs-unmount.8 \
+ %D%/man8/zfs-unzone.8 \
%D%/man8/zfs-upgrade.8 \
%D%/man8/zfs-userspace.8 \
%D%/man8/zfs-wait.8 \
+ %D%/man8/zfs-zone.8 \
%D%/man8/zfs_ids_to_path.8 \
%D%/man8/zgenhostid.8 \
%D%/man8/zinject.8 \
diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
index a086e1a5d56c..a7e5408e5e37 100644
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@@ -2248,9 +2248,74 @@ for each I/O submitter.
When unset, requests are handled asynchronously by a thread pool.
The number of requests which can be handled concurrently is controlled by
.Sy zvol_threads .
+.Sy zvol_request_sync
+is ignored when running on a kernel that supports block multiqueue
+.Pq Li blk-mq .
.
-.It Sy zvol_threads Ns = Ns Sy 32 Pq uint
-Max number of threads which can handle zvol I/O requests concurrently.
+.It Sy zvol_threads Ns = Ns Sy 0 Pq uint
+The number of system wide threads to use for processing zvol block IOs.
+If
+.Sy 0
+(the default) then internally set
+.Sy zvol_threads
+to the number of CPUs present or 32 (whichever is greater).
+.
+.It Sy zvol_blk_mq_threads Ns = Ns Sy 0 Pq uint
+The number of threads per zvol to use for queuing IO requests.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read and assigned to a zvol at zvol load time.
+If
+.Sy 0
+(the default) then internally set
+.Sy zvol_blk_mq_threads
+to the number of CPUs present.
+.
+.It Sy zvol_use_blk_mq Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+Set to
+.Sy 1
+to use the
+.Li blk-mq
+API for zvols.
+Set to
+.Sy 0
+(the default) to use the legacy zvol APIs.
+This setting can give better or worse zvol performance depending on
+the workload.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read and assigned to a zvol at zvol load time.
+.
+.It Sy zvol_blk_mq_blocks_per_thread Ns = Ns Sy 8 Pq uint
+If
+.Sy zvol_use_blk_mq
+is enabled, then process this number of
+.Sy volblocksize Ns -sized blocks per zvol thread.
+This tunable can be use to favor better performance for zvol reads (lower
+values) or writes (higher values).
+If set to
+.Sy 0 ,
+then the zvol layer will process the maximum number of blocks
+per thread that it can.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only applied at each zvol's load time.
+.
+.It Sy zvol_blk_mq_queue_depth Ns = Ns Sy 0 Pq uint
+The queue_depth value for the zvol
+.Li blk-mq
+interface.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only applied at each zvol's load time.
+If
+.Sy 0
+(the default) then use the kernel's default queue depth.
+Values are clamped to the kernel's
+.Dv BLKDEV_MIN_RQ
+and
+.Dv BLKDEV_MAX_RQ Ns / Ns Dv BLKDEV_DEFAULT_RQ
+limits.
.
.It Sy zvol_volmode Ns = Ns Sy 1 Pq uint
Defines zvol block devices behaviour when
diff --git a/sys/contrib/openzfs/man/man7/zfsprops.7 b/sys/contrib/openzfs/man/man7/zfsprops.7
index 2694938aa206..4d6fc613c851 100644
--- a/sys/contrib/openzfs/man/man7/zfsprops.7
+++ b/sys/contrib/openzfs/man/man7/zfsprops.7
@@ -743,7 +743,7 @@ This property is not inherited.
.It Xo
.Sy checksum Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy fletcher2 Ns | Ns
.Sy fletcher4 Ns | Ns Sy sha256 Ns | Ns Sy noparity Ns | Ns
-.Sy sha512 Ns | Ns Sy skein Ns | Ns Sy edonr
+.Sy sha512 Ns | Ns Sy skein Ns | Ns Sy edonr Ns | Ns Sy blake3
.Xc
Controls the checksum used to verify data integrity.
The default value is
@@ -768,8 +768,9 @@ a recommended practice.
The
.Sy sha512 ,
.Sy skein ,
+.Sy edonr ,
and
-.Sy edonr
+.Sy blake3
checksum algorithms require enabling the appropriate features on the pool.
.Pp
Please see
@@ -984,7 +985,7 @@ mount options.
.It Xo
.Sy dedup Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy verify Ns | Ns
.Sy sha256 Ns Oo , Ns Sy verify Oc Ns | Ns Sy sha512 Ns Oo , Ns Sy verify Oc Ns | Ns Sy skein Ns Oo , Ns Sy verify Oc Ns | Ns
-.Sy edonr , Ns Sy verify
+.Sy edonr , Ns Sy verify Ns | Ns Sy blake3 Ns Oo , Ns Sy verify Oc Ns
.Xc
Configures deduplication for a dataset.
The default value is
@@ -1884,8 +1885,7 @@ feature and are not relevant on other platforms.
The default value is
.Sy off .
.It Sy zoned Ns = Ns Sy on Ns | Ns Sy off
-Controls whether the dataset is managed from a non-global zone.
-Zones are a Solaris feature and are not relevant on other platforms.
+Controls whether the dataset is managed from a non-global zone or namespace.
The default value is
.Sy off .
.El
diff --git a/sys/contrib/openzfs/man/man7/zpool-features.7 b/sys/contrib/openzfs/man/man7/zpool-features.7
index b92109c4ac98..df9e64701e37 100644
--- a/sys/contrib/openzfs/man/man7/zpool-features.7
+++ b/sys/contrib/openzfs/man/man7/zpool-features.7
@@ -326,6 +326,12 @@ while
.Sy freeing
is non-zero.
.
+.feature org.openzfs blake3 no extensible_dataset
+This feature enables the use of the BLAKE3 hash algorithm for checksum and dedup.
+BLAKE3 is a secure hash algorithm focused on high performance.
+.Pp
+.checksum-spiel blake3
+.
.feature com.delphix bookmarks yes extensible_dataset
This feature enables use of the
.Nm zfs Cm bookmark
@@ -436,6 +442,8 @@ in ZFS, which means that the checksum is pre-seeded with a secret
to be checksummed.
Thus the produced checksums are unique to a given pool,
preventing hash collision attacks on systems with dedup.
+.Pp
+.checksum-spiel edonr
.
.feature com.delphix embedded_data no
This feature improves the performance and compression ratio of
diff --git a/sys/contrib/openzfs/man/man8/zfs-unzone.8 b/sys/contrib/openzfs/man/man8/zfs-unzone.8
new file mode 120000
index 000000000000..9052b28aa880
--- /dev/null
+++ b/sys/contrib/openzfs/man/man8/zfs-unzone.8
@@ -0,0 +1 @@
+zfs-zone.8 \ No newline at end of file
diff --git a/sys/contrib/openzfs/man/man8/zfs-zone.8 b/sys/contrib/openzfs/man/man8/zfs-zone.8
new file mode 100644
index 000000000000..2f975dde6799
--- /dev/null
+++ b/sys/contrib/openzfs/man/man8/zfs-zone.8
@@ -0,0 +1,116 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2011, Pawel Jakub Dawidek <pjd@FreeBSD.org>
+.\" Copyright (c) 2012, Glen Barber <gjb@FreeBSD.org>
+.\" Copyright (c) 2012, Bryan Drewery <bdrewery@FreeBSD.org>
+.\" Copyright (c) 2013, Steven Hartland <smh@FreeBSD.org>
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright (c) 2014, Xin LI <delphij@FreeBSD.org>
+.\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved.
+.\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved.
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\" Copyright 2021 Klara, Inc.
+.\"
+.Dd June 3, 2022
+.Dt ZFS-ZONE 8
+.Os
+.
+.Sh NAME
+.Nm zfs-zone ,
+.Nm zfs-unzone
+.Nd attach and detach ZFS filesystems to user namespaces
+.Sh SYNOPSIS
+.Nm zfs Cm zone
+.Ar nsfile
+.Ar filesystem
+.Nm zfs Cm unzone
+.Ar nsfile
+.Ar filesystem
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm zone
+.Ar nsfile
+.Ar filesystem
+.Xc
+Attach the specified
+.Ar filesystem
+to the user namespace identified by
+.Ar nsfile .
+From now on this file system tree can be managed from within a user namespace
+if the
+.Sy zoned
+property has been set.
+.Pp
+You cannot attach a zoned dataset's children to another user namespace.
+You can also not attach the root file system
+of the user namespace or any dataset
+which needs to be mounted before the zfs service
+is run inside the user namespace,
+as it would be attached unmounted until it is
+mounted from the service inside the user namespace.
+.Pp
+To allow management of the dataset from within a user namespace, the
+.Sy zoned
+property has to be set and the user namespaces needs access to the
+.Pa /dev/zfs
+device.
+The
+.Sy quota
+property cannot be changed from within a user namespace.
+.Pp
+After a dataset is attached to a user namespace and the
+.Sy zoned
+property is set,
+a zoned file system cannot be mounted outside the user namespace,
+since the user namespace administrator might have set the mount point
+to an unacceptable value.
+.It Xo
+.Nm zfs
+.Cm unzone
+.Ar nsfile
+.Ar filesystem
+.Xc
+Detach the specified
+.Ar filesystem
+from the user namespace identified by
+.Ar nsfile .
+.El
+.Sh EXAMPLES
+.Ss Example 1 : No Delegating a Dataset to a User Namespace
+The following example delegates the
+.Ar tank/users
+dataset to a user namespace identified by user namespace file
+.Pa /proc/1234/ns/user .
+.Dl # Nm zfs Cm zone Ar /proc/1234/ns/user Ar tank/users
+.
+.Sh SEE ALSO
+.Xr zfsprops 7
diff --git a/sys/contrib/openzfs/man/man8/zpool-trim.8 b/sys/contrib/openzfs/man/man8/zpool-trim.8
index d9a7b4400301..ad0909a3b08d 100644
--- a/sys/contrib/openzfs/man/man8/zpool-trim.8
+++ b/sys/contrib/openzfs/man/man8/zpool-trim.8
@@ -84,8 +84,29 @@ with no flags on the relevant target devices.
.It Fl w , -wait
Wait until the devices are done being trimmed before returning.
.El
+.Sh PERIODIC TRIM
+On machines using systemd, trim timers can be enabled on a per-pool basis.
+.Nm weekly
+and
+.Nm monthly
+timer units are provided.
+.Bl -tag -width Ds
+.It Xo
+.Xc
+.Nm systemctl
+.Cm enable
+.Cm zfs-trim-\fIweekly\fB@\fIrpool\fB.timer
+.Cm --now
+.It Xo
+.Xc
+.Nm systemctl
+.Cm enable
+.Cm zfs-trim-\fImonthly\fB@\fIotherpool\fB.timer
+.Cm --now
+.El
.
.Sh SEE ALSO
+.Xr systemd.timer 5 ,
.Xr zpoolprops 7 ,
.Xr zpool-initialize 8 ,
.Xr zpool-wait 8
diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in
index 11099999fb87..4803952cbfed 100644
--- a/sys/contrib/openzfs/module/Kbuild.in
+++ b/sys/contrib/openzfs/module/Kbuild.in
@@ -65,7 +65,8 @@ SPL_OBJS := \
spl-tsd.o \
spl-vmem.o \
spl-xdr.o \
- spl-zlib.o
+ spl-zlib.o \
+ spl-zone.o
spl-objs += $(addprefix os/linux/spl/,$(SPL_OBJS))
@@ -75,6 +76,10 @@ ICP_OBJS := \
algs/aes/aes_impl.o \
algs/aes/aes_impl_generic.o \
algs/aes/aes_modes.o \
+ algs/blake3/blake3.o \
+ algs/blake3/blake3_generic.o \
+ algs/blake3/blake3_impl.o \
+ algs/blake3/blake3_x86-64.o \
algs/edonr/edonr.o \
algs/modes/cbc.o \
algs/modes/ccm.o \
@@ -105,23 +110,45 @@ ICP_OBJS_X86_64 := \
asm-x86_64/aes/aes_aesni.o \
asm-x86_64/aes/aes_amd64.o \
asm-x86_64/aes/aeskey.o \
+ asm-x86_64/blake3/blake3_avx2.o \
+ asm-x86_64/blake3/blake3_avx512.o \
+ asm-x86_64/blake3/blake3_sse2.o \
+ asm-x86_64/blake3/blake3_sse41.o \
asm-x86_64/modes/aesni-gcm-x86_64.o \
asm-x86_64/modes/gcm_pclmulqdq.o \
asm-x86_64/modes/ghash-x86_64.o \
asm-x86_64/sha2/sha256_impl.o \
asm-x86_64/sha2/sha512_impl.o
+
ICP_OBJS_X86 := \
algs/aes/aes_impl_aesni.o \
algs/aes/aes_impl_x86-64.o \
algs/modes/gcm_pclmulqdq.o
+
+ICP_OBJS_ARM64 := \
+ asm-aarch64/blake3/b3_aarch64_sse2.o \
+ asm-aarch64/blake3/b3_aarch64_sse41.o
+
+
+ICP_OBJS_PPC_PPC64 := \
+ asm-ppc64/blake3/b3_ppc64le_sse2.o \
+ asm-ppc64/blake3/b3_ppc64le_sse41.o
+
zfs-objs += $(addprefix icp/,$(ICP_OBJS))
zfs-$(CONFIG_X86) += $(addprefix icp/,$(ICP_OBJS_X86))
+zfs-$(CONFIG_UML_X86)+= $(addprefix icp/,$(ICP_OBJS_X86))
zfs-$(CONFIG_X86_64) += $(addprefix icp/,$(ICP_OBJS_X86_64))
+zfs-$(CONFIG_ARM64) += $(addprefix icp/,$(ICP_OBJS_ARM64))
+zfs-$(CONFIG_PPC) += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64))
+zfs-$(CONFIG_PPC64) += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64))
+
+$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
+ $(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : asflags-y += -I$(icp_include)
-$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64)) : asflags-y += -I$(icp_include)
-$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64)) : ccflags-y += -I$(icp_include)
+$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
+ $(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : ccflags-y += -I$(icp_include)
# Suppress objtool "can't find jump dest instruction at" warnings. They
# are caused by the constants which are defined in the text section of the
@@ -129,6 +156,7 @@ $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64)) : ccflag
# utility tries to interpret them as opcodes and obviously fails doing so.
OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y
OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y
+
# Suppress objtool "unsupported stack pointer realignment" warnings. We are
# not using a DRAP register while aligning the stack to a 64 byte boundary.
# See #6950 for the reasoning.
@@ -205,6 +233,7 @@ ZCOMMON_OBJS_ARM64 := \
zfs-objs += $(addprefix zcommon/,$(ZCOMMON_OBJS))
zfs-$(CONFIG_X86) += $(addprefix zcommon/,$(ZCOMMON_OBJS_X86))
+zfs-$(CONFIG_UML_X86)+= $(addprefix zcommon/,$(ZCOMMON_OBJS_X86))
zfs-$(CONFIG_ARM64) += $(addprefix zcommon/,$(ZCOMMON_OBJS_ARM64))
@@ -261,6 +290,7 @@ ZFS_OBJS := \
abd.o \
aggsum.o \
arc.o \
+ blake3_zfs.o \
blkptr.o \
bplist.o \
bpobj.o \
@@ -358,6 +388,7 @@ ZFS_OBJS := \
zcp_synctask.o \
zfeature.o \
zfs_byteswap.o \
+ zfs_chksum.o \
zfs_fm.o \
zfs_fuid.o \
zfs_ioctl.o \
@@ -428,6 +459,7 @@ ZFS_OBJS_PPC_PPC64 := \
zfs-objs += $(addprefix zfs/,$(ZFS_OBJS)) $(addprefix os/linux/zfs/,$(ZFS_OBJS_OS))
zfs-$(CONFIG_X86) += $(addprefix zfs/,$(ZFS_OBJS_X86))
+zfs-$(CONFIG_UML_X86)+= $(addprefix zfs/,$(ZFS_OBJS_X86))
zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
diff --git a/sys/contrib/openzfs/module/Makefile.bsd b/sys/contrib/openzfs/module/Makefile.bsd
index 61f02152d334..589ca60b29be 100644
--- a/sys/contrib/openzfs/module/Makefile.bsd
+++ b/sys/contrib/openzfs/module/Makefile.bsd
@@ -10,6 +10,10 @@ INCDIR=${.CURDIR:H}/include
KMOD= openzfs
.PATH: ${SRCDIR}/avl \
+ ${SRCDIR}/icp/algs/blake3 \
+ ${SRCDIR}/icp/asm-aarch64/blake3 \
+ ${SRCDIR}/icp/asm-ppc64/blake3 \
+ ${SRCDIR}/icp/asm-x86_64/blake3 \
${SRCDIR}/lua \
${SRCDIR}/nvpair \
${SRCDIR}/icp/algs/edonr \
@@ -31,6 +35,7 @@ CFLAGS+= -I${INCDIR}/os/freebsd
CFLAGS+= -I${INCDIR}/os/freebsd/spl
CFLAGS+= -I${INCDIR}/os/freebsd/zfs
CFLAGS+= -I${SRCDIR}/zstd/include
+CFLAGS+= -I${SRCDIR}/icp/include
CFLAGS+= -include ${INCDIR}/os/freebsd/spl/sys/ccompile.h
CFLAGS+= -D__KERNEL__ -DFREEBSD_NAMECACHE -DBUILDING_ZFS -D__BSD_VISIBLE=1 \
@@ -38,7 +43,8 @@ CFLAGS+= -D__KERNEL__ -DFREEBSD_NAMECACHE -DBUILDING_ZFS -D__BSD_VISIBLE=1 \
-D_SYS_VMEM_H_ -DKDTRACE_HOOKS -DSMP -DCOMPAT_FREEBSD11
.if ${MACHINE_ARCH} == "amd64"
-CFLAGS+= -DHAVE_AVX2 -DHAVE_AVX -D__x86_64 -DHAVE_SSE2 -DHAVE_AVX512F -DHAVE_SSSE3
+CFLAGS+= -D__x86_64 -DHAVE_SSE2 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 \
+ -DHAVE_AVX -DHAVE_AVX2 -DHAVE_AVX512F -DHAVE_AVX512VL
.endif
.if defined(WITH_DEBUG) && ${WITH_DEBUG} == "true"
@@ -73,12 +79,32 @@ CFLAGS+= -DBITS_PER_LONG=64
SRCS= vnode_if.h device_if.h bus_if.h
-# avl
+#avl
SRCS+= avl.c
# icp
SRCS+= edonr.c
+#icp/algs/blake3
+SRCS+= blake3.c \
+ blake3_generic.c \
+ blake3_impl.c \
+ blake3_x86-64.c
+
+#icp/asm-aarch64/blake3
+SRCS+= b3_aarch64_sse2.S \
+ b3_aarch64_sse41.S
+
+#icp/asm-ppc64/blake3
+SRCS+= b3_ppc64le_sse2.S \
+ b3_ppc64le_sse41.S
+
+#icp/asm-x86_64/blake3
+SRCS+= blake3_avx2.S \
+ blake3_avx512.S \
+ blake3_sse2.S \
+ blake3_sse41.S
+
#lua
SRCS+= lapi.c \
lauxlib.c \
@@ -189,6 +215,7 @@ SRCS+= zfeature_common.c \
SRCS+= abd.c \
aggsum.c \
arc.c \
+ blake3_zfs.c \
blkptr.c \
bplist.c \
bpobj.c \
@@ -291,6 +318,7 @@ SRCS+= abd.c \
zcp_synctask.c \
zfeature.c \
zfs_byteswap.c \
+ zfs_chksum.c \
zfs_file_os.c \
zfs_fm.c \
zfs_fuid.c \
@@ -337,8 +365,6 @@ SRCS+= zfs_zstd.c \
zstd_decompress.c \
zstd_decompress_block.c
-
-
beforeinstall:
.if ${MK_DEBUG_FILES} != "no"
mtree -eu \
diff --git a/sys/contrib/openzfs/module/avl/avl.c b/sys/contrib/openzfs/module/avl/avl.c
index 69cb8bf6815b..c45be4793fa8 100644
--- a/sys/contrib/openzfs/module/avl/avl.c
+++ b/sys/contrib/openzfs/module/avl/avl.c
@@ -109,21 +109,6 @@
#include <sys/mod.h>
/*
- * Small arrays to translate between balance (or diff) values and child indices.
- *
- * Code that deals with binary tree data structures will randomly use
- * left and right children when examining a tree. C "if()" statements
- * which evaluate randomly suffer from very poor hardware branch prediction.
- * In this code we avoid some of the branch mispredictions by using the
- * following translation arrays. They replace random branches with an
- * additional memory reference. Since the translation arrays are both very
- * small the data should remain efficiently in cache.
- */
-static const int avl_child2balance[] = {-1, 1};
-static const int avl_balance2child[] = {0, 0, 1};
-
-
-/*
* Walk from one node to the previous valued node (ie. an infix walk
* towards the left). At any given node we do one of 2 things:
*
@@ -278,8 +263,7 @@ avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
#endif
return (AVL_NODE2DATA(node, off));
}
- child = avl_balance2child[1 + diff];
-
+ child = (diff > 0);
}
if (where != NULL)
@@ -527,7 +511,7 @@ avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
* Compute the new balance
*/
old_balance = AVL_XBALANCE(node);
- new_balance = old_balance + avl_child2balance[which_child];
+ new_balance = old_balance + (which_child ? 1 : -1);
/*
* If we introduced equal balance, then we are done immediately
@@ -693,7 +677,7 @@ avl_remove(avl_tree_t *tree, void *data)
* choose node to swap from whichever side is taller
*/
old_balance = AVL_XBALANCE(delete);
- left = avl_balance2child[old_balance + 1];
+ left = (old_balance > 0);
right = 1 - left;
/*
@@ -777,7 +761,7 @@ avl_remove(avl_tree_t *tree, void *data)
*/
node = parent;
old_balance = AVL_XBALANCE(node);
- new_balance = old_balance - avl_child2balance[which_child];
+ new_balance = old_balance - (which_child ? 1 : -1);
parent = AVL_XPARENT(node);
which_child = AVL_XCHILD(node);
diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c b/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c
new file mode 100644
index 000000000000..8c9c06eb9d9f
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c
@@ -0,0 +1,732 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/blake3.h>
+
+#include "blake3_impl.h"
+
+/*
+ * We need 1056 byte stack for blake3_compress_subtree_wide()
+ * - we define this pragma to make gcc happy
+ */
+#if defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
+#endif
+
+/* internal used */
+typedef struct {
+ uint32_t input_cv[8];
+ uint64_t counter;
+ uint8_t block[BLAKE3_BLOCK_LEN];
+ uint8_t block_len;
+ uint8_t flags;
+} output_t;
+
+/* internal flags */
+enum blake3_flags {
+ CHUNK_START = 1 << 0,
+ CHUNK_END = 1 << 1,
+ PARENT = 1 << 2,
+ ROOT = 1 << 3,
+ KEYED_HASH = 1 << 4,
+ DERIVE_KEY_CONTEXT = 1 << 5,
+ DERIVE_KEY_MATERIAL = 1 << 6,
+};
+
+/* internal start */
+static void chunk_state_init(blake3_chunk_state_t *ctx,
+ const uint32_t key[8], uint8_t flags)
+{
+ memcpy(ctx->cv, key, BLAKE3_KEY_LEN);
+ ctx->chunk_counter = 0;
+ memset(ctx->buf, 0, BLAKE3_BLOCK_LEN);
+ ctx->buf_len = 0;
+ ctx->blocks_compressed = 0;
+ ctx->flags = flags;
+}
+
+static void chunk_state_reset(blake3_chunk_state_t *ctx,
+ const uint32_t key[8], uint64_t chunk_counter)
+{
+ memcpy(ctx->cv, key, BLAKE3_KEY_LEN);
+ ctx->chunk_counter = chunk_counter;
+ ctx->blocks_compressed = 0;
+ memset(ctx->buf, 0, BLAKE3_BLOCK_LEN);
+ ctx->buf_len = 0;
+}
+
+static size_t chunk_state_len(const blake3_chunk_state_t *ctx)
+{
+ return (BLAKE3_BLOCK_LEN * (size_t)ctx->blocks_compressed) +
+ ((size_t)ctx->buf_len);
+}
+
+static size_t chunk_state_fill_buf(blake3_chunk_state_t *ctx,
+ const uint8_t *input, size_t input_len)
+{
+ size_t take = BLAKE3_BLOCK_LEN - ((size_t)ctx->buf_len);
+ if (take > input_len) {
+ take = input_len;
+ }
+ uint8_t *dest = ctx->buf + ((size_t)ctx->buf_len);
+ memcpy(dest, input, take);
+ ctx->buf_len += (uint8_t)take;
+ return (take);
+}
+
+static uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state_t *ctx)
+{
+ if (ctx->blocks_compressed == 0) {
+ return (CHUNK_START);
+ } else {
+ return (0);
+ }
+}
+
+static output_t make_output(const uint32_t input_cv[8],
+ const uint8_t *block, uint8_t block_len,
+ uint64_t counter, uint8_t flags)
+{
+ output_t ret;
+ memcpy(ret.input_cv, input_cv, 32);
+ memcpy(ret.block, block, BLAKE3_BLOCK_LEN);
+ ret.block_len = block_len;
+ ret.counter = counter;
+ ret.flags = flags;
+ return (ret);
+}
+
+/*
+ * Chaining values within a given chunk (specifically the compress_in_place
+ * interface) are represented as words. This avoids unnecessary bytes<->words
+ * conversion overhead in the portable implementation. However, the hash_many
+ * interface handles both user input and parent node blocks, so it accepts
+ * bytes. For that reason, chaining values in the CV stack are represented as
+ * bytes.
+ */
+static void output_chaining_value(const blake3_impl_ops_t *ops,
+ const output_t *ctx, uint8_t cv[32])
+{
+ uint32_t cv_words[8];
+ memcpy(cv_words, ctx->input_cv, 32);
+ ops->compress_in_place(cv_words, ctx->block, ctx->block_len,
+ ctx->counter, ctx->flags);
+ store_cv_words(cv, cv_words);
+}
+
+static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx,
+ uint64_t seek, uint8_t *out, size_t out_len)
+{
+ uint64_t output_block_counter = seek / 64;
+ size_t offset_within_block = seek % 64;
+ uint8_t wide_buf[64];
+ while (out_len > 0) {
+ ops->compress_xof(ctx->input_cv, ctx->block, ctx->block_len,
+ output_block_counter, ctx->flags | ROOT, wide_buf);
+ size_t available_bytes = 64 - offset_within_block;
+ size_t memcpy_len;
+ if (out_len > available_bytes) {
+ memcpy_len = available_bytes;
+ } else {
+ memcpy_len = out_len;
+ }
+ memcpy(out, wide_buf + offset_within_block, memcpy_len);
+ out += memcpy_len;
+ out_len -= memcpy_len;
+ output_block_counter += 1;
+ offset_within_block = 0;
+ }
+}
+
+static void chunk_state_update(const blake3_impl_ops_t *ops,
+ blake3_chunk_state_t *ctx, const uint8_t *input, size_t input_len)
+{
+ if (ctx->buf_len > 0) {
+ size_t take = chunk_state_fill_buf(ctx, input, input_len);
+ input += take;
+ input_len -= take;
+ if (input_len > 0) {
+ ops->compress_in_place(ctx->cv, ctx->buf,
+ BLAKE3_BLOCK_LEN, ctx->chunk_counter,
+ ctx->flags|chunk_state_maybe_start_flag(ctx));
+ ctx->blocks_compressed += 1;
+ ctx->buf_len = 0;
+ memset(ctx->buf, 0, BLAKE3_BLOCK_LEN);
+ }
+ }
+
+ while (input_len > BLAKE3_BLOCK_LEN) {
+ ops->compress_in_place(ctx->cv, input, BLAKE3_BLOCK_LEN,
+ ctx->chunk_counter,
+ ctx->flags|chunk_state_maybe_start_flag(ctx));
+ ctx->blocks_compressed += 1;
+ input += BLAKE3_BLOCK_LEN;
+ input_len -= BLAKE3_BLOCK_LEN;
+ }
+
+ size_t take = chunk_state_fill_buf(ctx, input, input_len);
+ input += take;
+ input_len -= take;
+}
+
+static output_t chunk_state_output(const blake3_chunk_state_t *ctx)
+{
+ uint8_t block_flags =
+ ctx->flags | chunk_state_maybe_start_flag(ctx) | CHUNK_END;
+ return (make_output(ctx->cv, ctx->buf, ctx->buf_len, ctx->chunk_counter,
+ block_flags));
+}
+
+static output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN],
+ const uint32_t key[8], uint8_t flags)
+{
+ return (make_output(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT));
+}
+
+/*
+ * Given some input larger than one chunk, return the number of bytes that
+ * should go in the left subtree. This is the largest power-of-2 number of
+ * chunks that leaves at least 1 byte for the right subtree.
+ */
+static size_t left_len(size_t content_len)
+{
+ /*
+ * Subtract 1 to reserve at least one byte for the right side.
+ * content_len
+ * should always be greater than BLAKE3_CHUNK_LEN.
+ */
+ size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN;
+ return (round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN);
+}
+
+/*
+ * Use SIMD parallelism to hash up to MAX_SIMD_DEGREE chunks at the same time
+ * on a single thread. Write out the chunk chaining values and return the
+ * number of chunks hashed. These chunks are never the root and never empty;
+ * those cases use a different codepath.
+ */
+static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops,
+ const uint8_t *input, size_t input_len, const uint32_t key[8],
+ uint64_t chunk_counter, uint8_t flags, uint8_t *out)
+{
+ const uint8_t *chunks_array[MAX_SIMD_DEGREE];
+ size_t input_position = 0;
+ size_t chunks_array_len = 0;
+ while (input_len - input_position >= BLAKE3_CHUNK_LEN) {
+ chunks_array[chunks_array_len] = &input[input_position];
+ input_position += BLAKE3_CHUNK_LEN;
+ chunks_array_len += 1;
+ }
+
+ ops->hash_many(chunks_array, chunks_array_len, BLAKE3_CHUNK_LEN /
+ BLAKE3_BLOCK_LEN, key, chunk_counter, B_TRUE, flags, CHUNK_START,
+ CHUNK_END, out);
+
+ /*
+ * Hash the remaining partial chunk, if there is one. Note that the
+ * empty chunk (meaning the empty message) is a different codepath.
+ */
+ if (input_len > input_position) {
+ uint64_t counter = chunk_counter + (uint64_t)chunks_array_len;
+ blake3_chunk_state_t chunk_state;
+ chunk_state_init(&chunk_state, key, flags);
+ chunk_state.chunk_counter = counter;
+ chunk_state_update(ops, &chunk_state, &input[input_position],
+ input_len - input_position);
+ output_t output = chunk_state_output(&chunk_state);
+ output_chaining_value(ops, &output, &out[chunks_array_len *
+ BLAKE3_OUT_LEN]);
+ return (chunks_array_len + 1);
+ } else {
+ return (chunks_array_len);
+ }
+}
+
+/*
+ * Use SIMD parallelism to hash up to MAX_SIMD_DEGREE parents at the same time
+ * on a single thread. Write out the parent chaining values and return the
+ * number of parents hashed. (If there's an odd input chaining value left over,
+ * return it as an additional output.) These parents are never the root and
+ * never empty; those cases use a different codepath.
+ */
+static size_t compress_parents_parallel(const blake3_impl_ops_t *ops,
+ const uint8_t *child_chaining_values, size_t num_chaining_values,
+ const uint32_t key[8], uint8_t flags, uint8_t *out)
+{
+ const uint8_t *parents_array[MAX_SIMD_DEGREE_OR_2];
+ size_t parents_array_len = 0;
+
+ while (num_chaining_values - (2 * parents_array_len) >= 2) {
+ parents_array[parents_array_len] = &child_chaining_values[2 *
+ parents_array_len * BLAKE3_OUT_LEN];
+ parents_array_len += 1;
+ }
+
+ ops->hash_many(parents_array, parents_array_len, 1, key, 0, B_FALSE,
+ flags | PARENT, 0, 0, out);
+
+ /* If there's an odd child left over, it becomes an output. */
+ if (num_chaining_values > 2 * parents_array_len) {
+ memcpy(&out[parents_array_len * BLAKE3_OUT_LEN],
+ &child_chaining_values[2 * parents_array_len *
+ BLAKE3_OUT_LEN], BLAKE3_OUT_LEN);
+ return (parents_array_len + 1);
+ } else {
+ return (parents_array_len);
+ }
+}
+
+/*
+ * The wide helper function returns (writes out) an array of chaining values
+ * and returns the length of that array. The number of chaining values returned
+ * is the dyanmically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer,
+ * if the input is shorter than that many chunks. The reason for maintaining a
+ * wide array of chaining values going back up the tree, is to allow the
+ * implementation to hash as many parents in parallel as possible.
+ *
+ * As a special case when the SIMD degree is 1, this function will still return
+ * at least 2 outputs. This guarantees that this function doesn't perform the
+ * root compression. (If it did, it would use the wrong flags, and also we
+ * wouldn't be able to implement exendable ouput.) Note that this function is
+ * not used when the whole input is only 1 chunk long; that's a different
+ * codepath.
+ *
+ * Why not just have the caller split the input on the first update(), instead
+ * of implementing this special rule? Because we don't want to limit SIMD or
+ * multi-threading parallelism for that update().
+ */
+static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops,
+ const uint8_t *input, size_t input_len, const uint32_t key[8],
+ uint64_t chunk_counter, uint8_t flags, uint8_t *out)
+{
+ /*
+ * Note that the single chunk case does *not* bump the SIMD degree up
+ * to 2 when it is 1. If this implementation adds multi-threading in
+ * the future, this gives us the option of multi-threading even the
+ * 2-chunk case, which can help performance on smaller platforms.
+ */
+ if (input_len <= (size_t)(ops->degree * BLAKE3_CHUNK_LEN)) {
+ return (compress_chunks_parallel(ops, input, input_len, key,
+ chunk_counter, flags, out));
+ }
+
+
+ /*
+ * With more than simd_degree chunks, we need to recurse. Start by
+ * dividing the input into left and right subtrees. (Note that this is
+ * only optimal as long as the SIMD degree is a power of 2. If we ever
+ * get a SIMD degree of 3 or something, we'll need a more complicated
+ * strategy.)
+ */
+ size_t left_input_len = left_len(input_len);
+ size_t right_input_len = input_len - left_input_len;
+ const uint8_t *right_input = &input[left_input_len];
+ uint64_t right_chunk_counter = chunk_counter +
+ (uint64_t)(left_input_len / BLAKE3_CHUNK_LEN);
+
+ /*
+ * Make space for the child outputs. Here we use MAX_SIMD_DEGREE_OR_2
+ * to account for the special case of returning 2 outputs when the
+ * SIMD degree is 1.
+ */
+ uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
+ size_t degree = ops->degree;
+ if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) {
+
+ /*
+ * The special case: We always use a degree of at least two,
+ * to make sure there are two outputs. Except, as noted above,
+ * at the chunk level, where we allow degree=1. (Note that the
+ * 1-chunk-input case is a different codepath.)
+ */
+ degree = 2;
+ }
+ uint8_t *right_cvs = &cv_array[degree * BLAKE3_OUT_LEN];
+
+ /*
+ * Recurse! If this implementation adds multi-threading support in the
+ * future, this is where it will go.
+ */
+ size_t left_n = blake3_compress_subtree_wide(ops, input, left_input_len,
+ key, chunk_counter, flags, cv_array);
+ size_t right_n = blake3_compress_subtree_wide(ops, right_input,
+ right_input_len, key, right_chunk_counter, flags, right_cvs);
+
+ /*
+ * The special case again. If simd_degree=1, then we'll have left_n=1
+ * and right_n=1. Rather than compressing them into a single output,
+ * return them directly, to make sure we always have at least two
+ * outputs.
+ */
+ if (left_n == 1) {
+ memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
+ return (2);
+ }
+
+ /* Otherwise, do one layer of parent node compression. */
+ size_t num_chaining_values = left_n + right_n;
+ return compress_parents_parallel(ops, cv_array,
+ num_chaining_values, key, flags, out);
+}
+
+/*
+ * Hash a subtree with compress_subtree_wide(), and then condense the resulting
+ * list of chaining values down to a single parent node. Don't compress that
+ * last parent node, however. Instead, return its message bytes (the
+ * concatenated chaining values of its children). This is necessary when the
+ * first call to update() supplies a complete subtree, because the topmost
+ * parent node of that subtree could end up being the root. It's also necessary
+ * for extended output in the general case.
+ *
+ * As with compress_subtree_wide(), this function is not used on inputs of 1
+ * chunk or less. That's a different codepath.
+ */
+static void compress_subtree_to_parent_node(const blake3_impl_ops_t *ops,
+ const uint8_t *input, size_t input_len, const uint32_t key[8],
+ uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN])
+{
+ uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
+ size_t num_cvs = blake3_compress_subtree_wide(ops, input, input_len,
+ key, chunk_counter, flags, cv_array);
+
+ /*
+ * If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
+ * compress_subtree_wide() returns more than 2 chaining values. Condense
+ * them into 2 by forming parent nodes repeatedly.
+ */
+ uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
+ while (num_cvs > 2) {
+ num_cvs = compress_parents_parallel(ops, cv_array, num_cvs, key,
+ flags, out_array);
+ memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
+ }
+ memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
+}
+
+static void hasher_init_base(BLAKE3_CTX *ctx, const uint32_t key[8],
+ uint8_t flags)
+{
+ memcpy(ctx->key, key, BLAKE3_KEY_LEN);
+ chunk_state_init(&ctx->chunk, key, flags);
+ ctx->cv_stack_len = 0;
+ ctx->ops = blake3_impl_get_ops();
+}
+
+/*
+ * As described in hasher_push_cv() below, we do "lazy merging", delaying
+ * merges until right before the next CV is about to be added. This is
+ * different from the reference implementation. Another difference is that we
+ * aren't always merging 1 chunk at a time. Instead, each CV might represent
+ * any power-of-two number of chunks, as long as the smaller-above-larger
+ * stack order is maintained. Instead of the "count the trailing 0-bits"
+ * algorithm described in the spec, we use a "count the total number of
+ * 1-bits" variant that doesn't require us to retain the subtree size of the
+ * CV on top of the stack. The principle is the same: each CV that should
+ * remain in the stack is represented by a 1-bit in the total number of chunks
+ * (or bytes) so far.
+ */
+static void hasher_merge_cv_stack(BLAKE3_CTX *ctx, uint64_t total_len)
+{
+ size_t post_merge_stack_len = (size_t)popcnt(total_len);
+ while (ctx->cv_stack_len > post_merge_stack_len) {
+ uint8_t *parent_node =
+ &ctx->cv_stack[(ctx->cv_stack_len - 2) * BLAKE3_OUT_LEN];
+ output_t output =
+ parent_output(parent_node, ctx->key, ctx->chunk.flags);
+ output_chaining_value(ctx->ops, &output, parent_node);
+ ctx->cv_stack_len -= 1;
+ }
+}
+
+/*
+ * In reference_impl.rs, we merge the new CV with existing CVs from the stack
+ * before pushing it. We can do that because we know more input is coming, so
+ * we know none of the merges are root.
+ *
+ * This setting is different. We want to feed as much input as possible to
+ * compress_subtree_wide(), without setting aside anything for the chunk_state.
+ * If the user gives us 64 KiB, we want to parallelize over all 64 KiB at once
+ * as a single subtree, if at all possible.
+ *
+ * This leads to two problems:
+ * 1) This 64 KiB input might be the only call that ever gets made to update.
+ * In this case, the root node of the 64 KiB subtree would be the root node
+ * of the whole tree, and it would need to be ROOT finalized. We can't
+ * compress it until we know.
+ * 2) This 64 KiB input might complete a larger tree, whose root node is
+ * similarly going to be the the root of the whole tree. For example, maybe
+ * we have 196 KiB (that is, 128 + 64) hashed so far. We can't compress the
+ * node at the root of the 256 KiB subtree until we know how to finalize it.
+ *
+ * The second problem is solved with "lazy merging". That is, when we're about
+ * to add a CV to the stack, we don't merge it with anything first, as the
+ * reference impl does. Instead we do merges using the *previous* CV that was
+ * added, which is sitting on top of the stack, and we put the new CV
+ * (unmerged) on top of the stack afterwards. This guarantees that we never
+ * merge the root node until finalize().
+ *
+ * Solving the first problem requires an additional tool,
+ * compress_subtree_to_parent_node(). That function always returns the top
+ * *two* chaining values of the subtree it's compressing. We then do lazy
+ * merging with each of them separately, so that the second CV will always
+ * remain unmerged. (That also helps us support extendable output when we're
+ * hashing an input all-at-once.)
+ */
+static void hasher_push_cv(BLAKE3_CTX *ctx, uint8_t new_cv[BLAKE3_OUT_LEN],
+ uint64_t chunk_counter)
+{
+ hasher_merge_cv_stack(ctx, chunk_counter);
+ memcpy(&ctx->cv_stack[ctx->cv_stack_len * BLAKE3_OUT_LEN], new_cv,
+ BLAKE3_OUT_LEN);
+ ctx->cv_stack_len += 1;
+}
+
+void
+Blake3_Init(BLAKE3_CTX *ctx)
+{
+ hasher_init_base(ctx, BLAKE3_IV, 0);
+}
+
+void
+Blake3_InitKeyed(BLAKE3_CTX *ctx, const uint8_t key[BLAKE3_KEY_LEN])
+{
+ uint32_t key_words[8];
+ load_key_words(key, key_words);
+ hasher_init_base(ctx, key_words, KEYED_HASH);
+}
+
+static void
+Blake3_Update2(BLAKE3_CTX *ctx, const void *input, size_t input_len)
+{
+ /*
+ * Explicitly checking for zero avoids causing UB by passing a null
+ * pointer to memcpy. This comes up in practice with things like:
+ * std::vector<uint8_t> v;
+ * blake3_hasher_update(&hasher, v.data(), v.size());
+ */
+ if (input_len == 0) {
+ return;
+ }
+
+ const uint8_t *input_bytes = (const uint8_t *)input;
+
+ /*
+ * If we have some partial chunk bytes in the internal chunk_state, we
+ * need to finish that chunk first.
+ */
+ if (chunk_state_len(&ctx->chunk) > 0) {
+ size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&ctx->chunk);
+ if (take > input_len) {
+ take = input_len;
+ }
+ chunk_state_update(ctx->ops, &ctx->chunk, input_bytes, take);
+ input_bytes += take;
+ input_len -= take;
+ /*
+ * If we've filled the current chunk and there's more coming,
+ * finalize this chunk and proceed. In this case we know it's
+ * not the root.
+ */
+ if (input_len > 0) {
+ output_t output = chunk_state_output(&ctx->chunk);
+ uint8_t chunk_cv[32];
+ output_chaining_value(ctx->ops, &output, chunk_cv);
+ hasher_push_cv(ctx, chunk_cv, ctx->chunk.chunk_counter);
+ chunk_state_reset(&ctx->chunk, ctx->key,
+ ctx->chunk.chunk_counter + 1);
+ } else {
+ return;
+ }
+ }
+
+ /*
+ * Now the chunk_state is clear, and we have more input. If there's
+ * more than a single chunk (so, definitely not the root chunk), hash
+ * the largest whole subtree we can, with the full benefits of SIMD
+ * (and maybe in the future, multi-threading) parallelism. Two
+ * restrictions:
+ * - The subtree has to be a power-of-2 number of chunks. Only
+ * subtrees along the right edge can be incomplete, and we don't know
+ * where the right edge is going to be until we get to finalize().
+ * - The subtree must evenly divide the total number of chunks up
+ * until this point (if total is not 0). If the current incomplete
+ * subtree is only waiting for 1 more chunk, we can't hash a subtree
+ * of 4 chunks. We have to complete the current subtree first.
+ * Because we might need to break up the input to form powers of 2, or
+ * to evenly divide what we already have, this part runs in a loop.
+ */
+ while (input_len > BLAKE3_CHUNK_LEN) {
+ size_t subtree_len = round_down_to_power_of_2(input_len);
+ uint64_t count_so_far =
+ ctx->chunk.chunk_counter * BLAKE3_CHUNK_LEN;
+ /*
+ * Shrink the subtree_len until it evenly divides the count so
+ * far. We know that subtree_len itself is a power of 2, so we
+ * can use a bitmasking trick instead of an actual remainder
+ * operation. (Note that if the caller consistently passes
+ * power-of-2 inputs of the same size, as is hopefully
+ * typical, this loop condition will always fail, and
+ * subtree_len will always be the full length of the input.)
+ *
+ * An aside: We don't have to shrink subtree_len quite this
+ * much. For example, if count_so_far is 1, we could pass 2
+ * chunks to compress_subtree_to_parent_node. Since we'll get
+ * 2 CVs back, we'll still get the right answer in the end,
+ * and we might get to use 2-way SIMD parallelism. The problem
+ * with this optimization, is that it gets us stuck always
+ * hashing 2 chunks. The total number of chunks will remain
+ * odd, and we'll never graduate to higher degrees of
+ * parallelism. See
+ * https://github.com/BLAKE3-team/BLAKE3/issues/69.
+ */
+ while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) {
+ subtree_len /= 2;
+ }
+ /*
+ * The shrunken subtree_len might now be 1 chunk long. If so,
+ * hash that one chunk by itself. Otherwise, compress the
+ * subtree into a pair of CVs.
+ */
+ uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN;
+ if (subtree_len <= BLAKE3_CHUNK_LEN) {
+ blake3_chunk_state_t chunk_state;
+ chunk_state_init(&chunk_state, ctx->key,
+ ctx->chunk.flags);
+ chunk_state.chunk_counter = ctx->chunk.chunk_counter;
+ chunk_state_update(ctx->ops, &chunk_state, input_bytes,
+ subtree_len);
+ output_t output = chunk_state_output(&chunk_state);
+ uint8_t cv[BLAKE3_OUT_LEN];
+ output_chaining_value(ctx->ops, &output, cv);
+ hasher_push_cv(ctx, cv, chunk_state.chunk_counter);
+ } else {
+ /*
+ * This is the high-performance happy path, though
+ * getting here depends on the caller giving us a long
+ * enough input.
+ */
+ uint8_t cv_pair[2 * BLAKE3_OUT_LEN];
+ compress_subtree_to_parent_node(ctx->ops, input_bytes,
+ subtree_len, ctx->key, ctx-> chunk.chunk_counter,
+ ctx->chunk.flags, cv_pair);
+ hasher_push_cv(ctx, cv_pair, ctx->chunk.chunk_counter);
+ hasher_push_cv(ctx, &cv_pair[BLAKE3_OUT_LEN],
+ ctx->chunk.chunk_counter + (subtree_chunks / 2));
+ }
+ ctx->chunk.chunk_counter += subtree_chunks;
+ input_bytes += subtree_len;
+ input_len -= subtree_len;
+ }
+
+ /*
+ * If there's any remaining input less than a full chunk, add it to
+ * the chunk state. In that case, also do a final merge loop to make
+ * sure the subtree stack doesn't contain any unmerged pairs. The
+ * remaining input means we know these merges are non-root. This merge
+ * loop isn't strictly necessary here, because hasher_push_chunk_cv
+ * already does its own merge loop, but it simplifies
+ * blake3_hasher_finalize below.
+ */
+ if (input_len > 0) {
+ chunk_state_update(ctx->ops, &ctx->chunk, input_bytes,
+ input_len);
+ hasher_merge_cv_stack(ctx, ctx->chunk.chunk_counter);
+ }
+}
+
+void
+Blake3_Update(BLAKE3_CTX *ctx, const void *input, size_t todo)
+{
+ size_t done = 0;
+ const uint8_t *data = input;
+ const size_t block_max = 1024 * 64;
+
+ /* max feed buffer to leave the stack size small */
+ while (todo != 0) {
+ size_t block = (todo >= block_max) ? block_max : todo;
+ Blake3_Update2(ctx, data + done, block);
+ done += block;
+ todo -= block;
+ }
+}
+
+void
+Blake3_Final(const BLAKE3_CTX *ctx, uint8_t *out)
+{
+ Blake3_FinalSeek(ctx, 0, out, BLAKE3_OUT_LEN);
+}
+
+void
+Blake3_FinalSeek(const BLAKE3_CTX *ctx, uint64_t seek, uint8_t *out,
+ size_t out_len)
+{
+ /*
+ * Explicitly checking for zero avoids causing UB by passing a null
+ * pointer to memcpy. This comes up in practice with things like:
+ * std::vector<uint8_t> v;
+ * blake3_hasher_finalize(&hasher, v.data(), v.size());
+ */
+ if (out_len == 0) {
+ return;
+ }
+ /* If the subtree stack is empty, then the current chunk is the root. */
+ if (ctx->cv_stack_len == 0) {
+ output_t output = chunk_state_output(&ctx->chunk);
+ output_root_bytes(ctx->ops, &output, seek, out, out_len);
+ return;
+ }
+ /*
+ * If there are any bytes in the chunk state, finalize that chunk and
+ * do a roll-up merge between that chunk hash and every subtree in the
+ * stack. In this case, the extra merge loop at the end of
+ * blake3_hasher_update guarantees that none of the subtrees in the
+ * stack need to be merged with each other first. Otherwise, if there
+ * are no bytes in the chunk state, then the top of the stack is a
+ * chunk hash, and we start the merge from that.
+ */
+ output_t output;
+ size_t cvs_remaining;
+ if (chunk_state_len(&ctx->chunk) > 0) {
+ cvs_remaining = ctx->cv_stack_len;
+ output = chunk_state_output(&ctx->chunk);
+ } else {
+ /* There are always at least 2 CVs in the stack in this case. */
+ cvs_remaining = ctx->cv_stack_len - 2;
+ output = parent_output(&ctx->cv_stack[cvs_remaining * 32],
+ ctx->key, ctx->chunk.flags);
+ }
+ while (cvs_remaining > 0) {
+ cvs_remaining -= 1;
+ uint8_t parent_block[BLAKE3_BLOCK_LEN];
+ memcpy(parent_block, &ctx->cv_stack[cvs_remaining * 32], 32);
+ output_chaining_value(ctx->ops, &output, &parent_block[32]);
+ output = parent_output(parent_block, ctx->key,
+ ctx->chunk.flags);
+ }
+ output_root_bytes(ctx->ops, &output, seek, out, out_len);
+}
diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c
new file mode 100644
index 000000000000..6ff9a845ccdc
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c
@@ -0,0 +1,202 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/zfs_context.h>
+#include "blake3_impl.h"
+
+#define rotr32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
+static inline void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
+ uint32_t x, uint32_t y)
+{
+ state[a] = state[a] + state[b] + x;
+ state[d] = rotr32(state[d] ^ state[a], 16);
+ state[c] = state[c] + state[d];
+ state[b] = rotr32(state[b] ^ state[c], 12);
+ state[a] = state[a] + state[b] + y;
+ state[d] = rotr32(state[d] ^ state[a], 8);
+ state[c] = state[c] + state[d];
+ state[b] = rotr32(state[b] ^ state[c], 7);
+}
+
+static inline void round_fn(uint32_t state[16], const uint32_t *msg,
+ size_t round)
+{
+ /* Select the message schedule based on the round. */
+ const uint8_t *schedule = BLAKE3_MSG_SCHEDULE[round];
+
+ /* Mix the columns. */
+ g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
+ g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
+ g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
+ g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
+
+ /* Mix the rows. */
+ g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
+ g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
+ g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
+ g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
+}
+
+static inline void compress_pre(uint32_t state[16], const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags)
+{
+ uint32_t block_words[16];
+ block_words[0] = load32(block + 4 * 0);
+ block_words[1] = load32(block + 4 * 1);
+ block_words[2] = load32(block + 4 * 2);
+ block_words[3] = load32(block + 4 * 3);
+ block_words[4] = load32(block + 4 * 4);
+ block_words[5] = load32(block + 4 * 5);
+ block_words[6] = load32(block + 4 * 6);
+ block_words[7] = load32(block + 4 * 7);
+ block_words[8] = load32(block + 4 * 8);
+ block_words[9] = load32(block + 4 * 9);
+ block_words[10] = load32(block + 4 * 10);
+ block_words[11] = load32(block + 4 * 11);
+ block_words[12] = load32(block + 4 * 12);
+ block_words[13] = load32(block + 4 * 13);
+ block_words[14] = load32(block + 4 * 14);
+ block_words[15] = load32(block + 4 * 15);
+
+ state[0] = cv[0];
+ state[1] = cv[1];
+ state[2] = cv[2];
+ state[3] = cv[3];
+ state[4] = cv[4];
+ state[5] = cv[5];
+ state[6] = cv[6];
+ state[7] = cv[7];
+ state[8] = BLAKE3_IV[0];
+ state[9] = BLAKE3_IV[1];
+ state[10] = BLAKE3_IV[2];
+ state[11] = BLAKE3_IV[3];
+ state[12] = counter_low(counter);
+ state[13] = counter_high(counter);
+ state[14] = (uint32_t)block_len;
+ state[15] = (uint32_t)flags;
+
+ round_fn(state, &block_words[0], 0);
+ round_fn(state, &block_words[0], 1);
+ round_fn(state, &block_words[0], 2);
+ round_fn(state, &block_words[0], 3);
+ round_fn(state, &block_words[0], 4);
+ round_fn(state, &block_words[0], 5);
+ round_fn(state, &block_words[0], 6);
+}
+
+static inline void blake3_compress_in_place_generic(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags)
+{
+ uint32_t state[16];
+ compress_pre(state, cv, block, block_len, counter, flags);
+ cv[0] = state[0] ^ state[8];
+ cv[1] = state[1] ^ state[9];
+ cv[2] = state[2] ^ state[10];
+ cv[3] = state[3] ^ state[11];
+ cv[4] = state[4] ^ state[12];
+ cv[5] = state[5] ^ state[13];
+ cv[6] = state[6] ^ state[14];
+ cv[7] = state[7] ^ state[15];
+}
+
+static inline void hash_one_generic(const uint8_t *input, size_t blocks,
+ const uint32_t key[8], uint64_t counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN])
+{
+ uint32_t cv[8];
+ memcpy(cv, key, BLAKE3_KEY_LEN);
+ uint8_t block_flags = flags | flags_start;
+ while (blocks > 0) {
+ if (blocks == 1) {
+ block_flags |= flags_end;
+ }
+ blake3_compress_in_place_generic(cv, input, BLAKE3_BLOCK_LEN,
+ counter, block_flags);
+ input = &input[BLAKE3_BLOCK_LEN];
+ blocks -= 1;
+ block_flags = flags;
+ }
+ store_cv_words(out, cv);
+}
+
+static inline void blake3_compress_xof_generic(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64])
+{
+ uint32_t state[16];
+ compress_pre(state, cv, block, block_len, counter, flags);
+
+ store32(&out[0 * 4], state[0] ^ state[8]);
+ store32(&out[1 * 4], state[1] ^ state[9]);
+ store32(&out[2 * 4], state[2] ^ state[10]);
+ store32(&out[3 * 4], state[3] ^ state[11]);
+ store32(&out[4 * 4], state[4] ^ state[12]);
+ store32(&out[5 * 4], state[5] ^ state[13]);
+ store32(&out[6 * 4], state[6] ^ state[14]);
+ store32(&out[7 * 4], state[7] ^ state[15]);
+ store32(&out[8 * 4], state[8] ^ cv[0]);
+ store32(&out[9 * 4], state[9] ^ cv[1]);
+ store32(&out[10 * 4], state[10] ^ cv[2]);
+ store32(&out[11 * 4], state[11] ^ cv[3]);
+ store32(&out[12 * 4], state[12] ^ cv[4]);
+ store32(&out[13 * 4], state[13] ^ cv[5]);
+ store32(&out[14 * 4], state[14] ^ cv[6]);
+ store32(&out[15 * 4], state[15] ^ cv[7]);
+}
+
+static inline void blake3_hash_many_generic(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter,
+ boolean_t increment_counter, uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out)
+{
+ while (num_inputs > 0) {
+ hash_one_generic(inputs[0], blocks, key, counter, flags,
+ flags_start, flags_end, out);
+ if (increment_counter) {
+ counter += 1;
+ }
+ inputs += 1;
+ num_inputs -= 1;
+ out = &out[BLAKE3_OUT_LEN];
+ }
+}
+
+static inline boolean_t blake3_is_generic_supported(void)
+{
+ return (B_TRUE);
+}
+
+const blake3_impl_ops_t blake3_generic_impl = {
+ .compress_in_place = blake3_compress_in_place_generic,
+ .compress_xof = blake3_compress_xof_generic,
+ .hash_many = blake3_hash_many_generic,
+ .is_supported = blake3_is_generic_supported,
+ .degree = 4,
+ .name = "generic"
+};
diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
new file mode 100644
index 000000000000..c3809a2827be
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
@@ -0,0 +1,284 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/zio_checksum.h>
+
+#include "blake3_impl.h"
+
+static const blake3_impl_ops_t *const blake3_impls[] = {
+ &blake3_generic_impl,
+#if defined(__aarch64__) || \
+ (defined(__x86_64) && defined(HAVE_SSE2)) || \
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+ &blake3_sse2_impl,
+#endif
+#if defined(__aarch64__) || \
+ (defined(__x86_64) && defined(HAVE_SSE4_1)) || \
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+ &blake3_sse41_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
+ &blake3_avx2_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+ &blake3_avx512_impl,
+#endif
+};
+
+/* this pointer holds current ops for implementation */
+static const blake3_impl_ops_t *blake3_selected_impl = &blake3_generic_impl;
+
+/* special implementation selections */
+#define IMPL_FASTEST (UINT32_MAX)
+#define IMPL_CYCLE (UINT32_MAX-1)
+#define IMPL_USER (UINT32_MAX-2)
+#define IMPL_PARAM (UINT32_MAX-3)
+
+#define IMPL_READ(i) (*(volatile uint32_t *) &(i))
+static uint32_t icp_blake3_impl = IMPL_FASTEST;
+
+#define BLAKE3_IMPL_NAME_MAX 16
+
+/* id of fastest implementation */
+static uint32_t blake3_fastest_id = 0;
+
+/* currently used id */
+static uint32_t blake3_current_id = 0;
+
+/* id of module parameter (-1 == unused) */
+static int blake3_param_id = -1;
+
+/* return number of supported implementations */
+int
+blake3_get_impl_count(void)
+{
+ static int impls = 0;
+ int i;
+
+ if (impls)
+ return (impls);
+
+ for (i = 0; i < ARRAY_SIZE(blake3_impls); i++) {
+ if (!blake3_impls[i]->is_supported()) continue;
+ impls++;
+ }
+
+ return (impls);
+}
+
+/* return id of selected implementation */
+int
+blake3_get_impl_id(void)
+{
+ return (blake3_current_id);
+}
+
+/* return name of selected implementation */
+const char *
+blake3_get_impl_name(void)
+{
+ return (blake3_selected_impl->name);
+}
+
+/* setup id as fastest implementation */
+void
+blake3_set_impl_fastest(uint32_t id)
+{
+ blake3_fastest_id = id;
+}
+
+/* set implementation by id */
+void
+blake3_set_impl_id(uint32_t id)
+{
+ int i, cid;
+
+ /* select fastest */
+ if (id == IMPL_FASTEST)
+ id = blake3_fastest_id;
+
+ /* select next or first */
+ if (id == IMPL_CYCLE)
+ id = (++blake3_current_id) % blake3_get_impl_count();
+
+ /* 0..N for the real impl */
+ for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
+ if (!blake3_impls[i]->is_supported()) continue;
+ if (cid == id) {
+ blake3_current_id = cid;
+ blake3_selected_impl = blake3_impls[i];
+ return;
+ }
+ cid++;
+ }
+}
+
+/* set implementation by name */
+int
+blake3_set_impl_name(const char *name)
+{
+ int i, cid;
+
+ if (strcmp(name, "fastest") == 0) {
+ atomic_swap_32(&icp_blake3_impl, IMPL_FASTEST);
+ blake3_set_impl_id(IMPL_FASTEST);
+ return (0);
+ } else if (strcmp(name, "cycle") == 0) {
+ atomic_swap_32(&icp_blake3_impl, IMPL_CYCLE);
+ blake3_set_impl_id(IMPL_CYCLE);
+ return (0);
+ }
+
+ for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
+ if (!blake3_impls[i]->is_supported()) continue;
+ if (strcmp(name, blake3_impls[i]->name) == 0) {
+ if (icp_blake3_impl == IMPL_PARAM) {
+ blake3_param_id = cid;
+ return (0);
+ }
+ blake3_selected_impl = blake3_impls[i];
+ blake3_current_id = cid;
+ return (0);
+ }
+ cid++;
+ }
+
+ return (-EINVAL);
+}
+
+/* setup implementation */
+void
+blake3_setup_impl(void)
+{
+ switch (IMPL_READ(icp_blake3_impl)) {
+ case IMPL_PARAM:
+ blake3_set_impl_id(blake3_param_id);
+ atomic_swap_32(&icp_blake3_impl, IMPL_USER);
+ break;
+ case IMPL_FASTEST:
+ blake3_set_impl_id(IMPL_FASTEST);
+ break;
+ case IMPL_CYCLE:
+ blake3_set_impl_id(IMPL_CYCLE);
+ break;
+ default:
+ blake3_set_impl_id(blake3_current_id);
+ break;
+ }
+}
+
+/* return selected implementation */
+const blake3_impl_ops_t *
+blake3_impl_get_ops(void)
+{
+ /* each call to ops will cycle */
+ if (icp_blake3_impl == IMPL_CYCLE)
+ blake3_set_impl_id(IMPL_CYCLE);
+
+ return (blake3_selected_impl);
+}
+
+#if defined(_KERNEL)
+void **blake3_per_cpu_ctx;
+
+void
+blake3_per_cpu_ctx_init(void)
+{
+ /*
+ * Create "The Godfather" ptr to hold all blake3 ctx
+ */
+ blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP);
+ for (int i = 0; i < max_ncpus; i++) {
+ blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
+ KM_SLEEP);
+ }
+}
+
+void
+blake3_per_cpu_ctx_fini(void)
+{
+ for (int i = 0; i < max_ncpus; i++) {
+ memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX));
+ kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX));
+ }
+ memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));
+ kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));
+}
+#endif
+
+#if defined(_KERNEL) && defined(__linux__)
+static int
+icp_blake3_impl_set(const char *name, zfs_kernel_param_t *kp)
+{
+ char req_name[BLAKE3_IMPL_NAME_MAX];
+ size_t i;
+
+ /* sanitize input */
+ i = strnlen(name, BLAKE3_IMPL_NAME_MAX);
+ if (i == 0 || i >= BLAKE3_IMPL_NAME_MAX)
+ return (-EINVAL);
+
+ strlcpy(req_name, name, BLAKE3_IMPL_NAME_MAX);
+ while (i > 0 && isspace(req_name[i-1]))
+ i--;
+ req_name[i] = '\0';
+
+ atomic_swap_32(&icp_blake3_impl, IMPL_PARAM);
+ return (blake3_set_impl_name(req_name));
+}
+
+static int
+icp_blake3_impl_get(char *buffer, zfs_kernel_param_t *kp)
+{
+ int i, cid, cnt = 0;
+ char *fmt;
+
+ /* cycling */
+ fmt = (icp_blake3_impl == IMPL_CYCLE) ? "[cycle] " : "cycle ";
+ cnt += sprintf(buffer + cnt, fmt);
+
+ /* fastest one */
+ fmt = (icp_blake3_impl == IMPL_FASTEST) ? "[fastest] " : "fastest ";
+ cnt += sprintf(buffer + cnt, fmt);
+
+ /* user selected */
+ for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
+ if (!blake3_impls[i]->is_supported()) continue;
+ fmt = (icp_blake3_impl == IMPL_USER &&
+ cid == blake3_current_id) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt, blake3_impls[i]->name);
+ cid++;
+ }
+
+ buffer[cnt] = 0;
+
+ return (cnt);
+}
+
+module_param_call(icp_blake3_impl, icp_blake3_impl_set, icp_blake3_impl_get,
+ NULL, 0644);
+MODULE_PARM_DESC(icp_blake3_impl, "Select BLAKE3 implementation.");
+#endif
diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h
new file mode 100644
index 000000000000..7b40cc4d3f02
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h
@@ -0,0 +1,213 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#ifndef BLAKE3_IMPL_H
+#define BLAKE3_IMPL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/blake3.h>
+#include <sys/simd.h>
+
+/*
+ * Methods used to define BLAKE3 assembler implementations
+ */
+typedef void (*blake3_compress_in_place_f)(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags);
+
+typedef void (*blake3_compress_xof_f)(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+typedef void (*blake3_hash_many_f)(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+typedef boolean_t (*blake3_is_supported_f)(void);
+
+typedef struct blake3_impl_ops {
+ blake3_compress_in_place_f compress_in_place;
+ blake3_compress_xof_f compress_xof;
+ blake3_hash_many_f hash_many;
+ blake3_is_supported_f is_supported;
+ int degree;
+ const char *name;
+} blake3_impl_ops_t;
+
+/* Return selected BLAKE3 implementation ops */
+extern const blake3_impl_ops_t *blake3_impl_get_ops(void);
+
+extern const blake3_impl_ops_t blake3_generic_impl;
+
+#if defined(__aarch64__) || \
+ (defined(__x86_64) && defined(HAVE_SSE2)) || \
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+extern const blake3_impl_ops_t blake3_sse2_impl;
+#endif
+
+#if defined(__aarch64__) || \
+ (defined(__x86_64) && defined(HAVE_SSE4_1)) || \
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+extern const blake3_impl_ops_t blake3_sse41_impl;
+#endif
+
+#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
+extern const blake3_impl_ops_t blake3_avx2_impl;
+#endif
+
+#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+extern const blake3_impl_ops_t blake3_avx512_impl;
+#endif
+
+#if defined(__x86_64)
+#define MAX_SIMD_DEGREE 16
+#else
+#define MAX_SIMD_DEGREE 4
+#endif
+
+#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
+
+static const uint32_t BLAKE3_IV[8] = {
+ 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
+ 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL};
+
+static const uint8_t BLAKE3_MSG_SCHEDULE[7][16] = {
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+ {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
+ {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
+ {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
+ {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
+ {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
+ {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
+};
+
+/* Find index of the highest set bit */
+static inline unsigned int highest_one(uint64_t x) {
+#if defined(__GNUC__) || defined(__clang__)
+ return (63 ^ __builtin_clzll(x));
+#elif defined(_MSC_VER) && defined(IS_X86_64)
+ unsigned long index;
+ _BitScanReverse64(&index, x);
+ return (index);
+#elif defined(_MSC_VER) && defined(IS_X86_32)
+ if (x >> 32) {
+ unsigned long index;
+ _BitScanReverse(&index, x >> 32);
+ return (32 + index);
+ } else {
+ unsigned long index;
+ _BitScanReverse(&index, x);
+ return (index);
+ }
+#else
+ unsigned int c = 0;
+ if (x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }
+ if (x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }
+ if (x & 0x000000000000ff00ULL) { x >>= 8; c += 8; }
+ if (x & 0x00000000000000f0ULL) { x >>= 4; c += 4; }
+ if (x & 0x000000000000000cULL) { x >>= 2; c += 2; }
+ if (x & 0x0000000000000002ULL) { c += 1; }
+ return (c);
+#endif
+}
+
+/* Count the number of 1 bits. */
+static inline unsigned int popcnt(uint64_t x) {
+ unsigned int count = 0;
+
+ while (x != 0) {
+ count += 1;
+ x &= x - 1;
+ }
+
+ return (count);
+}
+
+/*
+ * Largest power of two less than or equal to x.
+ * As a special case, returns 1 when x is 0.
+ */
+static inline uint64_t round_down_to_power_of_2(uint64_t x) {
+ return (1ULL << highest_one(x | 1));
+}
+
+static inline uint32_t counter_low(uint64_t counter) {
+ return ((uint32_t)counter);
+}
+
+static inline uint32_t counter_high(uint64_t counter) {
+ return ((uint32_t)(counter >> 32));
+}
+
+static inline uint32_t load32(const void *src) {
+ const uint8_t *p = (const uint8_t *)src;
+ return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
+ ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
+}
+
+static inline void load_key_words(const uint8_t key[BLAKE3_KEY_LEN],
+ uint32_t key_words[8]) {
+ key_words[0] = load32(&key[0 * 4]);
+ key_words[1] = load32(&key[1 * 4]);
+ key_words[2] = load32(&key[2 * 4]);
+ key_words[3] = load32(&key[3 * 4]);
+ key_words[4] = load32(&key[4 * 4]);
+ key_words[5] = load32(&key[5 * 4]);
+ key_words[6] = load32(&key[6 * 4]);
+ key_words[7] = load32(&key[7 * 4]);
+}
+
+static inline void store32(void *dst, uint32_t w) {
+ uint8_t *p = (uint8_t *)dst;
+ p[0] = (uint8_t)(w >> 0);
+ p[1] = (uint8_t)(w >> 8);
+ p[2] = (uint8_t)(w >> 16);
+ p[3] = (uint8_t)(w >> 24);
+}
+
+static inline void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
+ store32(&bytes_out[0 * 4], cv_words[0]);
+ store32(&bytes_out[1 * 4], cv_words[1]);
+ store32(&bytes_out[2 * 4], cv_words[2]);
+ store32(&bytes_out[3 * 4], cv_words[3]);
+ store32(&bytes_out[4 * 4], cv_words[4]);
+ store32(&bytes_out[5 * 4], cv_words[5]);
+ store32(&bytes_out[6 * 4], cv_words[6]);
+ store32(&bytes_out[7 * 4], cv_words[7]);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* BLAKE3_IMPL_H */
diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_x86-64.c b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_x86-64.c
new file mode 100644
index 000000000000..48715e2128d2
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_x86-64.c
@@ -0,0 +1,248 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include "blake3_impl.h"
+
+#if defined(__aarch64__) || \
+ (defined(__x86_64) && defined(HAVE_SSE2)) || \
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+
+extern void zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags);
+
+extern void zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_sse2(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags) {
+ kfpu_begin();
+ zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
+ flags);
+ kfpu_end();
+}
+
+static void blake3_compress_xof_sse2(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]) {
+ kfpu_begin();
+ zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
+ out);
+ kfpu_end();
+}
+
+static void blake3_hash_many_sse2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
+}
+
+static boolean_t blake3_is_sse2_supported(void)
+{
+#if defined(__x86_64)
+ return (kfpu_allowed() && zfs_sse2_available());
+#elif defined(__PPC64__)
+ return (kfpu_allowed() && zfs_vsx_available());
+#else
+ return (kfpu_allowed());
+#endif
+}
+
+const blake3_impl_ops_t blake3_sse2_impl = {
+ .compress_in_place = blake3_compress_in_place_sse2,
+ .compress_xof = blake3_compress_xof_sse2,
+ .hash_many = blake3_hash_many_sse2,
+ .is_supported = blake3_is_sse2_supported,
+ .degree = 4,
+ .name = "sse2"
+};
+#endif
+
+#if defined(__aarch64__) || \
+ (defined(__x86_64) && defined(HAVE_SSE2)) || \
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+
+extern void zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags);
+
+extern void zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_sse41(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags) {
+ kfpu_begin();
+ zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
+ flags);
+ kfpu_end();
+}
+
+static void blake3_compress_xof_sse41(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]) {
+ kfpu_begin();
+ zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
+ out);
+ kfpu_end();
+}
+
+static void blake3_hash_many_sse41(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
+}
+
+static boolean_t blake3_is_sse41_supported(void)
+{
+#if defined(__x86_64)
+ return (kfpu_allowed() && zfs_sse4_1_available());
+#elif defined(__PPC64__)
+ return (kfpu_allowed() && zfs_vsx_available());
+#else
+ return (kfpu_allowed());
+#endif
+}
+
+const blake3_impl_ops_t blake3_sse41_impl = {
+ .compress_in_place = blake3_compress_in_place_sse41,
+ .compress_xof = blake3_compress_xof_sse41,
+ .hash_many = blake3_hash_many_sse41,
+ .is_supported = blake3_is_sse41_supported,
+ .degree = 4,
+ .name = "sse41"
+};
+#endif
+
+#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
+extern void zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_hash_many_avx2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
+}
+
+static boolean_t blake3_is_avx2_supported(void)
+{
+ return (kfpu_allowed() && zfs_sse4_1_available() &&
+ zfs_avx2_available());
+}
+
+const blake3_impl_ops_t blake3_avx2_impl = {
+ .compress_in_place = blake3_compress_in_place_sse41,
+ .compress_xof = blake3_compress_xof_sse41,
+ .hash_many = blake3_hash_many_avx2,
+ .is_supported = blake3_is_avx2_supported,
+ .degree = 8,
+ .name = "avx2"
+};
+#endif
+
+#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+extern void zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags);
+
+extern void zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_avx512(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags) {
+ kfpu_begin();
+ zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
+ flags);
+ kfpu_end();
+}
+
+static void blake3_compress_xof_avx512(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]) {
+ kfpu_begin();
+ zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
+ out);
+ kfpu_end();
+}
+
+static void blake3_hash_many_avx512(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
+}
+
+static boolean_t blake3_is_avx512_supported(void)
+{
+ return (kfpu_allowed() && zfs_avx512f_available() &&
+ zfs_avx512vl_available());
+}
+
+const blake3_impl_ops_t blake3_avx512_impl = {
+ .compress_in_place = blake3_compress_in_place_avx512,
+ .compress_xof = blake3_compress_xof_avx512,
+ .hash_many = blake3_hash_many_avx512,
+ .is_supported = blake3_is_avx512_supported,
+ .degree = 16,
+ .name = "avx512"
+};
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S b/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S
new file mode 100644
index 000000000000..59a4d9afd437
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S
@@ -0,0 +1,2450 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2022 Samuel Neves and Matthew Krupcale
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ *
+ * This is converted assembly: SSE2 -> ARMv8-A
+ * Used tools: SIMDe https://github.com/simd-everywhere/simde
+ */
+
+#if defined(__aarch64__)
+ .text
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI0_0:
+ .word 1779033703
+ .word 3144134277
+ .word 1013904242
+ .word 2773480762
+.LCPI0_1:
+ .xword 0
+ .xword -4294967296
+.LCPI0_2:
+ .xword -1
+ .xword 4294967295
+ .text
+ .globl zfs_blake3_compress_in_place_sse2
+ .p2align 2
+ .type zfs_blake3_compress_in_place_sse2,@function
+zfs_blake3_compress_in_place_sse2:
+ .cfi_startproc
+ ldp q3, q2, [x0]
+ ldp q5, q6, [x1]
+ add x10, x1, #32
+ lsr x11, x3, #32
+ fmov s4, w3
+ ld2 { v17.4s, v18.4s }, [x10]
+ adrp x10, .LCPI0_2
+ and w8, w2, #0xff
+ mov v4.s[1], w11
+ ldr q1, [x10, :lo12:.LCPI0_2]
+ and w9, w4, #0xff
+ adrp x12, .LCPI0_0
+ mov v4.s[2], w8
+ uzp1 v19.4s, v5.4s, v6.4s
+ add v3.4s, v2.4s, v3.4s
+ ldr q7, [x12, :lo12:.LCPI0_0]
+ mov v4.s[3], w9
+ add v3.4s, v3.4s, v19.4s
+ uzp2 v5.4s, v5.4s, v6.4s
+ ext v21.16b, v18.16b, v18.16b, #12
+ uzp1 v6.4s, v19.4s, v19.4s
+ ext v22.16b, v19.16b, v19.16b, #12
+ eor v4.16b, v3.16b, v4.16b
+ ext v20.16b, v17.16b, v17.16b, #12
+ ext v6.16b, v6.16b, v19.16b, #8
+ ext v19.16b, v19.16b, v22.16b, #12
+ zip1 v22.2d, v21.2d, v5.2d
+ rev32 v24.8h, v4.8h
+ mov v4.16b, v1.16b
+ zip2 v23.4s, v5.4s, v21.4s
+ uzp2 v6.4s, v6.4s, v5.4s
+ bsl v4.16b, v22.16b, v20.16b
+ add v3.4s, v3.4s, v5.4s
+ zip1 v5.4s, v23.4s, v20.4s
+ zip1 v22.4s, v20.4s, v23.4s
+ add v23.4s, v24.4s, v7.4s
+ ext v7.16b, v6.16b, v6.16b, #4
+ ext v25.16b, v4.16b, v4.16b, #12
+ ext v5.16b, v22.16b, v5.16b, #8
+ eor v2.16b, v23.16b, v2.16b
+ uzp1 v4.4s, v4.4s, v25.4s
+ uzp1 v22.4s, v7.4s, v7.4s
+ ext v25.16b, v7.16b, v7.16b, #12
+ ext v22.16b, v22.16b, v7.16b, #8
+ ext v7.16b, v7.16b, v25.16b, #12
+ ushr v25.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ orr v2.16b, v2.16b, v25.16b
+ add v3.4s, v3.4s, v2.4s
+ eor v24.16b, v3.16b, v24.16b
+ add v3.4s, v3.4s, v17.4s
+ ushr v17.4s, v24.4s, #8
+ shl v18.4s, v24.4s, #24
+ orr v17.16b, v18.16b, v17.16b
+ add v18.4s, v17.4s, v23.4s
+ eor v2.16b, v18.16b, v2.16b
+ ushr v23.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ ext v3.16b, v3.16b, v3.16b, #12
+ orr v2.16b, v2.16b, v23.16b
+ ext v17.16b, v17.16b, v17.16b, #8
+ add v3.4s, v2.4s, v3.4s
+ adrp x11, .LCPI0_1
+ eor v17.16b, v3.16b, v17.16b
+ ldr q16, [x11, :lo12:.LCPI0_1]
+ ext v18.16b, v18.16b, v18.16b, #4
+ rev32 v24.8h, v17.8h
+ movi v0.2d, #0xffffffff00000000
+ add v23.4s, v3.4s, v21.4s
+ mov v21.s[1], v20.s[2]
+ add v20.4s, v18.4s, v24.4s
+ bit v19.16b, v21.16b, v0.16b
+ eor v3.16b, v20.16b, v2.16b
+ uzp2 v2.4s, v22.4s, v19.4s
+ zip1 v17.2d, v5.2d, v19.2d
+ zip2 v18.4s, v19.4s, v5.4s
+ ushr v21.4s, v3.4s, #12
+ shl v3.4s, v3.4s, #20
+ ext v22.16b, v2.16b, v2.16b, #4
+ bsl v16.16b, v4.16b, v17.16b
+ zip1 v17.4s, v18.4s, v4.4s
+ zip1 v18.4s, v4.4s, v18.4s
+ orr v21.16b, v3.16b, v21.16b
+ ext v25.16b, v16.16b, v16.16b, #12
+ ext v3.16b, v18.16b, v17.16b, #8
+ uzp1 v18.4s, v22.4s, v22.4s
+ ext v26.16b, v22.16b, v22.16b, #12
+ add v23.4s, v23.4s, v21.4s
+ uzp1 v17.4s, v16.4s, v25.4s
+ ext v16.16b, v18.16b, v22.16b, #8
+ ext v18.16b, v22.16b, v26.16b, #12
+ eor v22.16b, v23.16b, v24.16b
+ add v6.4s, v23.4s, v6.4s
+ ushr v23.4s, v22.4s, #8
+ shl v22.4s, v22.4s, #24
+ orr v22.16b, v22.16b, v23.16b
+ add v20.4s, v22.4s, v20.4s
+ eor v21.16b, v20.16b, v21.16b
+ ushr v23.4s, v21.4s, #7
+ shl v21.4s, v21.4s, #25
+ ext v6.16b, v6.16b, v6.16b, #4
+ orr v21.16b, v21.16b, v23.16b
+ ext v22.16b, v22.16b, v22.16b, #8
+ add v6.4s, v21.4s, v6.4s
+ eor v22.16b, v6.16b, v22.16b
+ ext v20.16b, v20.16b, v20.16b, #12
+ add v6.4s, v6.4s, v19.4s
+ rev32 v19.8h, v22.8h
+ add v20.4s, v20.4s, v19.4s
+ eor v21.16b, v20.16b, v21.16b
+ ushr v22.4s, v21.4s, #12
+ shl v21.4s, v21.4s, #20
+ orr v21.16b, v21.16b, v22.16b
+ add v6.4s, v6.4s, v21.4s
+ eor v19.16b, v6.16b, v19.16b
+ ushr v22.4s, v19.4s, #8
+ shl v19.4s, v19.4s, #24
+ orr v19.16b, v19.16b, v22.16b
+ add v20.4s, v19.4s, v20.4s
+ eor v21.16b, v20.16b, v21.16b
+ ext v6.16b, v6.16b, v6.16b, #12
+ ushr v22.4s, v21.4s, #7
+ shl v21.4s, v21.4s, #25
+ add v6.4s, v6.4s, v4.4s
+ orr v21.16b, v21.16b, v22.16b
+ ext v19.16b, v19.16b, v19.16b, #8
+ add v6.4s, v6.4s, v21.4s
+ eor v19.16b, v6.16b, v19.16b
+ ext v20.16b, v20.16b, v20.16b, #4
+ rev32 v19.8h, v19.8h
+ add v20.4s, v20.4s, v19.4s
+ add v6.4s, v6.4s, v5.4s
+ mov v5.s[1], v4.s[2]
+ eor v4.16b, v20.16b, v21.16b
+ ushr v21.4s, v4.4s, #12
+ shl v4.4s, v4.4s, #20
+ orr v21.16b, v4.16b, v21.16b
+ add v6.4s, v6.4s, v21.4s
+ eor v19.16b, v6.16b, v19.16b
+ add v2.4s, v6.4s, v2.4s
+ ushr v6.4s, v19.4s, #8
+ shl v19.4s, v19.4s, #24
+ orr v6.16b, v19.16b, v6.16b
+ add v19.4s, v6.4s, v20.4s
+ eor v20.16b, v19.16b, v21.16b
+ ushr v21.4s, v20.4s, #7
+ shl v20.4s, v20.4s, #25
+ ext v2.16b, v2.16b, v2.16b, #4
+ orr v20.16b, v20.16b, v21.16b
+ ext v6.16b, v6.16b, v6.16b, #8
+ add v2.4s, v20.4s, v2.4s
+ eor v6.16b, v2.16b, v6.16b
+ ext v19.16b, v19.16b, v19.16b, #12
+ rev32 v6.8h, v6.8h
+ add v19.4s, v19.4s, v6.4s
+ mov v22.16b, v0.16b
+ eor v20.16b, v19.16b, v20.16b
+ bsl v22.16b, v5.16b, v7.16b
+ ushr v21.4s, v20.4s, #12
+ shl v20.4s, v20.4s, #20
+ add v2.4s, v2.4s, v22.4s
+ orr v20.16b, v20.16b, v21.16b
+ add v2.4s, v2.4s, v20.4s
+ eor v6.16b, v2.16b, v6.16b
+ ushr v21.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ orr v6.16b, v6.16b, v21.16b
+ add v19.4s, v6.4s, v19.4s
+ eor v20.16b, v19.16b, v20.16b
+ ext v2.16b, v2.16b, v2.16b, #12
+ ushr v21.4s, v20.4s, #7
+ shl v20.4s, v20.4s, #25
+ add v2.4s, v2.4s, v17.4s
+ orr v20.16b, v20.16b, v21.16b
+ ext v6.16b, v6.16b, v6.16b, #8
+ add v2.4s, v2.4s, v20.4s
+ eor v6.16b, v2.16b, v6.16b
+ uzp2 v5.4s, v16.4s, v22.4s
+ zip1 v7.2d, v3.2d, v22.2d
+ zip2 v16.4s, v22.4s, v3.4s
+ ext v19.16b, v19.16b, v19.16b, #4
+ rev32 v22.8h, v6.8h
+ ext v23.16b, v5.16b, v5.16b, #4
+ bif v7.16b, v17.16b, v1.16b
+ zip1 v24.4s, v16.4s, v17.4s
+ zip1 v16.4s, v17.4s, v16.4s
+ add v21.4s, v2.4s, v3.4s
+ mov v3.s[1], v17.s[2]
+ add v17.4s, v19.4s, v22.4s
+ mov v19.16b, v0.16b
+ ext v25.16b, v7.16b, v7.16b, #12
+ ext v4.16b, v16.16b, v24.16b, #8
+ uzp1 v16.4s, v23.4s, v23.4s
+ bsl v19.16b, v3.16b, v18.16b
+ eor v2.16b, v17.16b, v20.16b
+ uzp1 v7.4s, v7.4s, v25.4s
+ ext v25.16b, v16.16b, v23.16b, #8
+ zip1 v3.2d, v4.2d, v19.2d
+ ushr v20.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ ext v24.16b, v23.16b, v23.16b, #12
+ uzp2 v6.4s, v25.4s, v19.4s
+ zip2 v18.4s, v19.4s, v4.4s
+ bif v3.16b, v7.16b, v1.16b
+ orr v20.16b, v2.16b, v20.16b
+ ext v16.16b, v23.16b, v24.16b, #12
+ ext v23.16b, v6.16b, v6.16b, #4
+ zip1 v24.4s, v18.4s, v7.4s
+ zip1 v18.4s, v7.4s, v18.4s
+ ext v25.16b, v3.16b, v3.16b, #12
+ add v21.4s, v21.4s, v20.4s
+ ext v2.16b, v18.16b, v24.16b, #8
+ uzp1 v18.4s, v23.4s, v23.4s
+ ext v24.16b, v23.16b, v23.16b, #12
+ uzp1 v3.4s, v3.4s, v25.4s
+ eor v22.16b, v21.16b, v22.16b
+ ext v25.16b, v18.16b, v23.16b, #8
+ dup v18.4s, v2.s[3]
+ ext v23.16b, v23.16b, v24.16b, #12
+ add v5.4s, v21.4s, v5.4s
+ trn1 v21.4s, v3.4s, v3.4s
+ ushr v24.4s, v22.4s, #8
+ shl v22.4s, v22.4s, #24
+ ext v18.16b, v21.16b, v18.16b, #8
+ orr v21.16b, v22.16b, v24.16b
+ add v17.4s, v21.4s, v17.4s
+ eor v20.16b, v17.16b, v20.16b
+ ushr v22.4s, v20.4s, #7
+ shl v20.4s, v20.4s, #25
+ ext v5.16b, v5.16b, v5.16b, #4
+ orr v20.16b, v20.16b, v22.16b
+ ext v21.16b, v21.16b, v21.16b, #8
+ add v5.4s, v20.4s, v5.4s
+ eor v21.16b, v5.16b, v21.16b
+ ext v17.16b, v17.16b, v17.16b, #12
+ add v5.4s, v5.4s, v19.4s
+ rev32 v19.8h, v21.8h
+ add v17.4s, v17.4s, v19.4s
+ eor v20.16b, v17.16b, v20.16b
+ ushr v21.4s, v20.4s, #12
+ shl v20.4s, v20.4s, #20
+ orr v20.16b, v20.16b, v21.16b
+ add v5.4s, v5.4s, v20.4s
+ eor v19.16b, v5.16b, v19.16b
+ ushr v21.4s, v19.4s, #8
+ shl v19.4s, v19.4s, #24
+ orr v19.16b, v19.16b, v21.16b
+ add v17.4s, v19.4s, v17.4s
+ eor v20.16b, v17.16b, v20.16b
+ ext v5.16b, v5.16b, v5.16b, #12
+ ushr v21.4s, v20.4s, #7
+ shl v20.4s, v20.4s, #25
+ add v5.4s, v5.4s, v7.4s
+ orr v20.16b, v20.16b, v21.16b
+ ext v19.16b, v19.16b, v19.16b, #8
+ add v5.4s, v5.4s, v20.4s
+ eor v19.16b, v5.16b, v19.16b
+ ext v17.16b, v17.16b, v17.16b, #4
+ rev32 v22.8h, v19.8h
+ add v21.4s, v5.4s, v4.4s
+ mov v4.s[1], v7.s[2]
+ add v19.4s, v17.4s, v22.4s
+ bit v16.16b, v4.16b, v0.16b
+ eor v5.16b, v19.16b, v20.16b
+ uzp2 v4.4s, v25.4s, v16.4s
+ zip1 v7.2d, v2.2d, v16.2d
+ zip2 v17.4s, v16.4s, v2.4s
+ ushr v20.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ ext v24.16b, v4.16b, v4.16b, #4
+ bif v7.16b, v3.16b, v1.16b
+ zip1 v25.4s, v17.4s, v3.4s
+ zip1 v17.4s, v3.4s, v17.4s
+ orr v20.16b, v5.16b, v20.16b
+ ext v26.16b, v7.16b, v7.16b, #12
+ ext v5.16b, v17.16b, v25.16b, #8
+ uzp1 v17.4s, v24.4s, v24.4s
+ ext v25.16b, v24.16b, v24.16b, #12
+ bit v23.16b, v18.16b, v0.16b
+ add v21.4s, v21.4s, v20.4s
+ uzp1 v7.4s, v7.4s, v26.4s
+ ext v26.16b, v17.16b, v24.16b, #8
+ ext v17.16b, v24.16b, v25.16b, #12
+ eor v22.16b, v21.16b, v22.16b
+ add v6.4s, v21.4s, v6.4s
+ zip1 v21.2d, v5.2d, v23.2d
+ zip2 v24.4s, v23.4s, v5.4s
+ bif v21.16b, v7.16b, v1.16b
+ zip1 v1.4s, v24.4s, v7.4s
+ zip1 v24.4s, v7.4s, v24.4s
+ ext v1.16b, v24.16b, v1.16b, #8
+ ushr v24.4s, v22.4s, #8
+ shl v22.4s, v22.4s, #24
+ orr v22.16b, v22.16b, v24.16b
+ add v19.4s, v22.4s, v19.4s
+ ext v24.16b, v21.16b, v21.16b, #12
+ eor v20.16b, v19.16b, v20.16b
+ uzp1 v21.4s, v21.4s, v24.4s
+ ushr v24.4s, v20.4s, #7
+ shl v20.4s, v20.4s, #25
+ orr v20.16b, v20.16b, v24.16b
+ ext v6.16b, v6.16b, v6.16b, #4
+ ext v22.16b, v22.16b, v22.16b, #8
+ add v6.4s, v20.4s, v6.4s
+ eor v22.16b, v6.16b, v22.16b
+ ext v19.16b, v19.16b, v19.16b, #12
+ add v6.4s, v6.4s, v16.4s
+ rev32 v16.8h, v22.8h
+ add v19.4s, v19.4s, v16.4s
+ eor v20.16b, v19.16b, v20.16b
+ ushr v22.4s, v20.4s, #12
+ shl v20.4s, v20.4s, #20
+ orr v20.16b, v20.16b, v22.16b
+ add v6.4s, v6.4s, v20.4s
+ eor v16.16b, v6.16b, v16.16b
+ ext v6.16b, v6.16b, v6.16b, #12
+ add v3.4s, v6.4s, v3.4s
+ ushr v6.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ orr v6.16b, v16.16b, v6.16b
+ add v16.4s, v6.4s, v19.4s
+ eor v19.16b, v16.16b, v20.16b
+ ushr v20.4s, v19.4s, #7
+ shl v19.4s, v19.4s, #25
+ orr v19.16b, v19.16b, v20.16b
+ ext v6.16b, v6.16b, v6.16b, #8
+ add v3.4s, v3.4s, v19.4s
+ eor v6.16b, v3.16b, v6.16b
+ ext v16.16b, v16.16b, v16.16b, #4
+ add v2.4s, v3.4s, v2.4s
+ rev32 v3.8h, v6.8h
+ add v6.4s, v16.4s, v3.4s
+ eor v16.16b, v6.16b, v19.16b
+ ushr v19.4s, v16.4s, #12
+ shl v16.4s, v16.4s, #20
+ orr v16.16b, v16.16b, v19.16b
+ add v2.4s, v2.4s, v16.4s
+ eor v3.16b, v2.16b, v3.16b
+ add v2.4s, v2.4s, v4.4s
+ ushr v4.4s, v3.4s, #8
+ shl v3.4s, v3.4s, #24
+ orr v3.16b, v3.16b, v4.16b
+ add v4.4s, v3.4s, v6.4s
+ eor v6.16b, v4.16b, v16.16b
+ ushr v16.4s, v6.4s, #7
+ shl v6.4s, v6.4s, #25
+ ext v2.16b, v2.16b, v2.16b, #4
+ orr v6.16b, v6.16b, v16.16b
+ ext v3.16b, v3.16b, v3.16b, #8
+ add v2.4s, v6.4s, v2.4s
+ eor v3.16b, v2.16b, v3.16b
+ ext v4.16b, v4.16b, v4.16b, #12
+ rev32 v3.8h, v3.8h
+ add v4.4s, v4.4s, v3.4s
+ eor v6.16b, v4.16b, v6.16b
+ ushr v16.4s, v6.4s, #12
+ shl v6.4s, v6.4s, #20
+ add v2.4s, v2.4s, v23.4s
+ orr v6.16b, v6.16b, v16.16b
+ add v2.4s, v2.4s, v6.4s
+ eor v3.16b, v2.16b, v3.16b
+ ushr v16.4s, v3.4s, #8
+ shl v3.4s, v3.4s, #24
+ orr v3.16b, v3.16b, v16.16b
+ add v4.4s, v3.4s, v4.4s
+ eor v6.16b, v4.16b, v6.16b
+ ext v2.16b, v2.16b, v2.16b, #12
+ ushr v16.4s, v6.4s, #7
+ shl v6.4s, v6.4s, #25
+ add v2.4s, v2.4s, v7.4s
+ orr v6.16b, v6.16b, v16.16b
+ ext v3.16b, v3.16b, v3.16b, #8
+ add v2.4s, v2.4s, v6.4s
+ eor v3.16b, v2.16b, v3.16b
+ ext v4.16b, v4.16b, v4.16b, #4
+ rev32 v3.8h, v3.8h
+ add v2.4s, v2.4s, v5.4s
+ mov v5.s[1], v7.s[2]
+ add v4.4s, v4.4s, v3.4s
+ bsl v0.16b, v5.16b, v17.16b
+ eor v5.16b, v4.16b, v6.16b
+ ushr v6.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ orr v5.16b, v5.16b, v6.16b
+ add v2.4s, v2.4s, v5.4s
+ eor v3.16b, v2.16b, v3.16b
+ ushr v6.4s, v3.4s, #8
+ shl v3.4s, v3.4s, #24
+ orr v3.16b, v3.16b, v6.16b
+ add v4.4s, v3.4s, v4.4s
+ uzp2 v18.4s, v26.4s, v18.4s
+ eor v5.16b, v4.16b, v5.16b
+ add v2.4s, v2.4s, v18.4s
+ ushr v6.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ ext v2.16b, v2.16b, v2.16b, #4
+ orr v5.16b, v5.16b, v6.16b
+ ext v3.16b, v3.16b, v3.16b, #8
+ add v2.4s, v5.4s, v2.4s
+ eor v3.16b, v2.16b, v3.16b
+ ext v4.16b, v4.16b, v4.16b, #12
+ add v0.4s, v2.4s, v0.4s
+ rev32 v2.8h, v3.8h
+ add v3.4s, v4.4s, v2.4s
+ eor v4.16b, v3.16b, v5.16b
+ ushr v5.4s, v4.4s, #12
+ shl v4.4s, v4.4s, #20
+ orr v4.16b, v4.16b, v5.16b
+ add v0.4s, v0.4s, v4.4s
+ eor v2.16b, v0.16b, v2.16b
+ ushr v5.4s, v2.4s, #8
+ shl v2.4s, v2.4s, #24
+ orr v2.16b, v2.16b, v5.16b
+ add v3.4s, v2.4s, v3.4s
+ eor v4.16b, v3.16b, v4.16b
+ ext v0.16b, v0.16b, v0.16b, #12
+ ushr v5.4s, v4.4s, #7
+ shl v4.4s, v4.4s, #25
+ add v0.4s, v0.4s, v21.4s
+ orr v4.16b, v4.16b, v5.16b
+ ext v2.16b, v2.16b, v2.16b, #8
+ add v0.4s, v0.4s, v4.4s
+ eor v2.16b, v0.16b, v2.16b
+ ext v3.16b, v3.16b, v3.16b, #4
+ add v0.4s, v0.4s, v1.4s
+ rev32 v1.8h, v2.8h
+ add v2.4s, v3.4s, v1.4s
+ eor v3.16b, v2.16b, v4.16b
+ ushr v4.4s, v3.4s, #12
+ shl v3.4s, v3.4s, #20
+ orr v3.16b, v3.16b, v4.16b
+ add v0.4s, v0.4s, v3.4s
+ eor v1.16b, v0.16b, v1.16b
+ ushr v4.4s, v1.4s, #8
+ shl v1.4s, v1.4s, #24
+ orr v1.16b, v1.16b, v4.16b
+ add v2.4s, v1.4s, v2.4s
+ eor v3.16b, v2.16b, v3.16b
+ ext v0.16b, v0.16b, v0.16b, #4
+ ext v2.16b, v2.16b, v2.16b, #12
+ ushr v4.4s, v3.4s, #7
+ shl v3.4s, v3.4s, #25
+ ext v1.16b, v1.16b, v1.16b, #8
+ eor v0.16b, v2.16b, v0.16b
+ orr v2.16b, v3.16b, v4.16b
+ eor v1.16b, v2.16b, v1.16b
+ stp q0, q1, [x0]
+ ret
+.Lfunc_end0:
+ .size zfs_blake3_compress_in_place_sse2, .Lfunc_end0-zfs_blake3_compress_in_place_sse2
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI1_0:
+ .word 1779033703
+ .word 3144134277
+ .word 1013904242
+ .word 2773480762
+.LCPI1_1:
+ .xword 0
+ .xword -4294967296
+.LCPI1_2:
+ .xword -1
+ .xword 4294967295
+ .text
+ .globl zfs_blake3_compress_xof_sse2
+ .p2align 2
+ .type zfs_blake3_compress_xof_sse2,@function
+zfs_blake3_compress_xof_sse2:
+ .cfi_startproc
+ ldp q3, q2, [x0]
+ ldp q5, q6, [x1]
+ add x10, x1, #32
+ lsr x11, x3, #32
+ fmov s4, w3
+ ld2 { v17.4s, v18.4s }, [x10]
+ adrp x10, .LCPI1_2
+ and w8, w2, #0xff
+ mov v4.s[1], w11
+ ldr q1, [x10, :lo12:.LCPI1_2]
+ and w9, w4, #0xff
+ adrp x12, .LCPI1_0
+ mov v4.s[2], w8
+ uzp1 v19.4s, v5.4s, v6.4s
+ add v3.4s, v2.4s, v3.4s
+ ldr q7, [x12, :lo12:.LCPI1_0]
+ mov v4.s[3], w9
+ add v3.4s, v3.4s, v19.4s
+ uzp2 v5.4s, v5.4s, v6.4s
+ ext v21.16b, v18.16b, v18.16b, #12
+ uzp1 v6.4s, v19.4s, v19.4s
+ ext v22.16b, v19.16b, v19.16b, #12
+ eor v4.16b, v3.16b, v4.16b
+ ext v20.16b, v17.16b, v17.16b, #12
+ ext v6.16b, v6.16b, v19.16b, #8
+ ext v19.16b, v19.16b, v22.16b, #12
+ zip1 v22.2d, v21.2d, v5.2d
+ rev32 v24.8h, v4.8h
+ mov v4.16b, v1.16b
+ zip2 v23.4s, v5.4s, v21.4s
+ uzp2 v6.4s, v6.4s, v5.4s
+ bsl v4.16b, v22.16b, v20.16b
+ add v3.4s, v3.4s, v5.4s
+ zip1 v5.4s, v23.4s, v20.4s
+ zip1 v22.4s, v20.4s, v23.4s
+ add v23.4s, v24.4s, v7.4s
+ ext v7.16b, v6.16b, v6.16b, #4
+ ext v25.16b, v4.16b, v4.16b, #12
+ ext v5.16b, v22.16b, v5.16b, #8
+ eor v2.16b, v23.16b, v2.16b
+ uzp1 v4.4s, v4.4s, v25.4s
+ uzp1 v22.4s, v7.4s, v7.4s
+ ext v25.16b, v7.16b, v7.16b, #12
+ ext v22.16b, v22.16b, v7.16b, #8
+ ext v7.16b, v7.16b, v25.16b, #12
+ ushr v25.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ orr v2.16b, v2.16b, v25.16b
+ add v3.4s, v3.4s, v2.4s
+ eor v24.16b, v3.16b, v24.16b
+ add v3.4s, v3.4s, v17.4s
+ ushr v17.4s, v24.4s, #8
+ shl v18.4s, v24.4s, #24
+ orr v17.16b, v18.16b, v17.16b
+ add v18.4s, v17.4s, v23.4s
+ eor v2.16b, v18.16b, v2.16b
+ ushr v23.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ ext v3.16b, v3.16b, v3.16b, #12
+ orr v2.16b, v2.16b, v23.16b
+ ext v17.16b, v17.16b, v17.16b, #8
+ add v3.4s, v2.4s, v3.4s
+ adrp x11, .LCPI1_1
+ eor v17.16b, v3.16b, v17.16b
+ ldr q16, [x11, :lo12:.LCPI1_1]
+ ext v18.16b, v18.16b, v18.16b, #4
+ rev32 v24.8h, v17.8h
+ movi v0.2d, #0xffffffff00000000
+ add v23.4s, v3.4s, v21.4s
+ mov v21.s[1], v20.s[2]
+ add v20.4s, v18.4s, v24.4s
+ bit v19.16b, v21.16b, v0.16b
+ eor v3.16b, v20.16b, v2.16b
+ uzp2 v2.4s, v22.4s, v19.4s
+ zip1 v17.2d, v5.2d, v19.2d
+ zip2 v18.4s, v19.4s, v5.4s
+ ushr v21.4s, v3.4s, #12
+ shl v3.4s, v3.4s, #20
+ ext v22.16b, v2.16b, v2.16b, #4
+ bsl v16.16b, v4.16b, v17.16b
+ zip1 v17.4s, v18.4s, v4.4s
+ zip1 v18.4s, v4.4s, v18.4s
+ orr v21.16b, v3.16b, v21.16b
+ ext v25.16b, v16.16b, v16.16b, #12
+ ext v3.16b, v18.16b, v17.16b, #8
+ uzp1 v18.4s, v22.4s, v22.4s
+ ext v26.16b, v22.16b, v22.16b, #12
+ add v23.4s, v23.4s, v21.4s
+ uzp1 v17.4s, v16.4s, v25.4s
+ ext v16.16b, v18.16b, v22.16b, #8
+ ext v18.16b, v22.16b, v26.16b, #12
+ eor v22.16b, v23.16b, v24.16b
+ add v6.4s, v23.4s, v6.4s
+ ushr v23.4s, v22.4s, #8
+ shl v22.4s, v22.4s, #24
+ orr v22.16b, v22.16b, v23.16b
+ add v20.4s, v22.4s, v20.4s
+ eor v21.16b, v20.16b, v21.16b
+ ushr v23.4s, v21.4s, #7
+ shl v21.4s, v21.4s, #25
+ ext v6.16b, v6.16b, v6.16b, #4
+ orr v21.16b, v21.16b, v23.16b
+ ext v22.16b, v22.16b, v22.16b, #8
+ add v6.4s, v21.4s, v6.4s
+ eor v22.16b, v6.16b, v22.16b
+ ext v20.16b, v20.16b, v20.16b, #12
+ add v6.4s, v6.4s, v19.4s
+ rev32 v19.8h, v22.8h
+ add v20.4s, v20.4s, v19.4s
+ eor v21.16b, v20.16b, v21.16b
+ ushr v22.4s, v21.4s, #12
+ shl v21.4s, v21.4s, #20
+ orr v21.16b, v21.16b, v22.16b
+ add v6.4s, v6.4s, v21.4s
+ eor v19.16b, v6.16b, v19.16b
+ ushr v22.4s, v19.4s, #8
+ shl v19.4s, v19.4s, #24
+ orr v19.16b, v19.16b, v22.16b
+ add v20.4s, v19.4s, v20.4s
+ eor v21.16b, v20.16b, v21.16b
+ ext v6.16b, v6.16b, v6.16b, #12
+ ushr v22.4s, v21.4s, #7
+ shl v21.4s, v21.4s, #25
+ add v6.4s, v6.4s, v4.4s
+ orr v21.16b, v21.16b, v22.16b
+ ext v19.16b, v19.16b, v19.16b, #8
+ add v6.4s, v6.4s, v21.4s
+ eor v19.16b, v6.16b, v19.16b
+ ext v20.16b, v20.16b, v20.16b, #4
+ rev32 v19.8h, v19.8h
+ add v20.4s, v20.4s, v19.4s
+ add v6.4s, v6.4s, v5.4s
+ mov v5.s[1], v4.s[2]
+ eor v4.16b, v20.16b, v21.16b
+ ushr v21.4s, v4.4s, #12
+ shl v4.4s, v4.4s, #20
+ orr v21.16b, v4.16b, v21.16b
+ add v6.4s, v6.4s, v21.4s
+ eor v19.16b, v6.16b, v19.16b
+ add v2.4s, v6.4s, v2.4s
+ ushr v6.4s, v19.4s, #8
+ shl v19.4s, v19.4s, #24
+ orr v6.16b, v19.16b, v6.16b
+ add v19.4s, v6.4s, v20.4s
+ eor v20.16b, v19.16b, v21.16b
+ ushr v21.4s, v20.4s, #7
+ shl v20.4s, v20.4s, #25
+ ext v2.16b, v2.16b, v2.16b, #4
+ orr v20.16b, v20.16b, v21.16b
+ ext v6.16b, v6.16b, v6.16b, #8
+ add v2.4s, v20.4s, v2.4s
+ eor v6.16b, v2.16b, v6.16b
+ ext v19.16b, v19.16b, v19.16b, #12
+ rev32 v6.8h, v6.8h
+ add v19.4s, v19.4s, v6.4s
+ mov v22.16b, v0.16b
+ eor v20.16b, v19.16b, v20.16b
+ bsl v22.16b, v5.16b, v7.16b
+ ushr v21.4s, v20.4s, #12
+ shl v20.4s, v20.4s, #20
+ add v2.4s, v2.4s, v22.4s
+ orr v20.16b, v20.16b, v21.16b
+ add v2.4s, v2.4s, v20.4s
+ eor v6.16b, v2.16b, v6.16b
+ ushr v21.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ orr v6.16b, v6.16b, v21.16b
+ add v19.4s, v6.4s, v19.4s
+ eor v20.16b, v19.16b, v20.16b
+ ext v2.16b, v2.16b, v2.16b, #12
+ ushr v21.4s, v20.4s, #7
+ shl v20.4s, v20.4s, #25
+ add v2.4s, v2.4s, v17.4s
+ orr v20.16b, v20.16b, v21.16b
+ ext v6.16b, v6.16b, v6.16b, #8
+ add v2.4s, v2.4s, v20.4s
+ eor v6.16b, v2.16b, v6.16b
+ uzp2 v5.4s, v16.4s, v22.4s
+ zip1 v7.2d, v3.2d, v22.2d
+ zip2 v16.4s, v22.4s, v3.4s
+ ext v19.16b, v19.16b, v19.16b, #4
+ rev32 v22.8h, v6.8h
+ ext v23.16b, v5.16b, v5.16b, #4
+ bif v7.16b, v17.16b, v1.16b
+ zip1 v24.4s, v16.4s, v17.4s
+ zip1 v16.4s, v17.4s, v16.4s
+ add v21.4s, v2.4s, v3.4s
+ mov v3.s[1], v17.s[2]
+ add v17.4s, v19.4s, v22.4s
+ mov v19.16b, v0.16b
+ ext v25.16b, v7.16b, v7.16b, #12
+ ext v4.16b, v16.16b, v24.16b, #8
+ uzp1 v16.4s, v23.4s, v23.4s
+ bsl v19.16b, v3.16b, v18.16b
+ eor v2.16b, v17.16b, v20.16b
+ uzp1 v7.4s, v7.4s, v25.4s
+ ext v25.16b, v16.16b, v23.16b, #8
+ zip1 v3.2d, v4.2d, v19.2d
+ ushr v20.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ ext v24.16b, v23.16b, v23.16b, #12
+ uzp2 v6.4s, v25.4s, v19.4s
+ zip2 v18.4s, v19.4s, v4.4s
+ bif v3.16b, v7.16b, v1.16b
+ orr v20.16b, v2.16b, v20.16b
+ ext v16.16b, v23.16b, v24.16b, #12
+ ext v23.16b, v6.16b, v6.16b, #4
+ zip1 v24.4s, v18.4s, v7.4s
+ zip1 v18.4s, v7.4s, v18.4s
+ ext v25.16b, v3.16b, v3.16b, #12
+ add v21.4s, v21.4s, v20.4s
+ ext v2.16b, v18.16b, v24.16b, #8
+ uzp1 v18.4s, v23.4s, v23.4s
+ ext v24.16b, v23.16b, v23.16b, #12
+ uzp1 v3.4s, v3.4s, v25.4s
+ eor v22.16b, v21.16b, v22.16b
+ ext v25.16b, v18.16b, v23.16b, #8
+ dup v18.4s, v2.s[3]
+ ext v23.16b, v23.16b, v24.16b, #12
+ add v5.4s, v21.4s, v5.4s
+ trn1 v21.4s, v3.4s, v3.4s
+ ushr v24.4s, v22.4s, #8
+ shl v22.4s, v22.4s, #24
+ ext v18.16b, v21.16b, v18.16b, #8
+ orr v21.16b, v22.16b, v24.16b
+ add v17.4s, v21.4s, v17.4s
+ eor v20.16b, v17.16b, v20.16b
+ ushr v22.4s, v20.4s, #7
+ shl v20.4s, v20.4s, #25
+ ext v5.16b, v5.16b, v5.16b, #4
+ orr v20.16b, v20.16b, v22.16b
+ ext v21.16b, v21.16b, v21.16b, #8
+ add v5.4s, v20.4s, v5.4s
+ eor v21.16b, v5.16b, v21.16b
+ ext v17.16b, v17.16b, v17.16b, #12
+ add v5.4s, v5.4s, v19.4s
+ rev32 v19.8h, v21.8h
+ add v17.4s, v17.4s, v19.4s
+ eor v20.16b, v17.16b, v20.16b
+ ushr v21.4s, v20.4s, #12
+ shl v20.4s, v20.4s, #20
+ orr v20.16b, v20.16b, v21.16b
+ add v5.4s, v5.4s, v20.4s
+ eor v19.16b, v5.16b, v19.16b
+ ushr v21.4s, v19.4s, #8
+ shl v19.4s, v19.4s, #24
+ orr v19.16b, v19.16b, v21.16b
+ add v17.4s, v19.4s, v17.4s
+ eor v20.16b, v17.16b, v20.16b
+ ext v5.16b, v5.16b, v5.16b, #12
+ ushr v21.4s, v20.4s, #7
+ shl v20.4s, v20.4s, #25
+ add v5.4s, v5.4s, v7.4s
+ orr v20.16b, v20.16b, v21.16b
+ ext v19.16b, v19.16b, v19.16b, #8
+ add v5.4s, v5.4s, v20.4s
+ eor v19.16b, v5.16b, v19.16b
+ ext v17.16b, v17.16b, v17.16b, #4
+ rev32 v22.8h, v19.8h
+ add v21.4s, v5.4s, v4.4s
+ mov v4.s[1], v7.s[2]
+ add v19.4s, v17.4s, v22.4s
+ bit v16.16b, v4.16b, v0.16b
+ eor v5.16b, v19.16b, v20.16b
+ uzp2 v4.4s, v25.4s, v16.4s
+ zip1 v7.2d, v2.2d, v16.2d
+ zip2 v17.4s, v16.4s, v2.4s
+ ushr v20.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ ext v24.16b, v4.16b, v4.16b, #4
+ bif v7.16b, v3.16b, v1.16b
+ zip1 v25.4s, v17.4s, v3.4s
+ zip1 v17.4s, v3.4s, v17.4s
+ orr v20.16b, v5.16b, v20.16b
+ ext v26.16b, v7.16b, v7.16b, #12
+ ext v5.16b, v17.16b, v25.16b, #8
+ uzp1 v17.4s, v24.4s, v24.4s
+ ext v25.16b, v24.16b, v24.16b, #12
+ bit v23.16b, v18.16b, v0.16b
+ add v21.4s, v21.4s, v20.4s
+ uzp1 v7.4s, v7.4s, v26.4s
+ ext v26.16b, v17.16b, v24.16b, #8
+ ext v17.16b, v24.16b, v25.16b, #12
+ eor v22.16b, v21.16b, v22.16b
+ add v6.4s, v21.4s, v6.4s
+ zip1 v21.2d, v5.2d, v23.2d
+ zip2 v24.4s, v23.4s, v5.4s
+ bif v21.16b, v7.16b, v1.16b
+ zip1 v1.4s, v24.4s, v7.4s
+ zip1 v24.4s, v7.4s, v24.4s
+ ext v1.16b, v24.16b, v1.16b, #8
+ ushr v24.4s, v22.4s, #8
+ shl v22.4s, v22.4s, #24
+ orr v22.16b, v22.16b, v24.16b
+ add v19.4s, v22.4s, v19.4s
+ ext v24.16b, v21.16b, v21.16b, #12
+ eor v20.16b, v19.16b, v20.16b
+ uzp1 v21.4s, v21.4s, v24.4s
+ ushr v24.4s, v20.4s, #7
+ shl v20.4s, v20.4s, #25
+ orr v20.16b, v20.16b, v24.16b
+ ext v6.16b, v6.16b, v6.16b, #4
+ ext v22.16b, v22.16b, v22.16b, #8
+ add v6.4s, v20.4s, v6.4s
+ eor v22.16b, v6.16b, v22.16b
+ ext v19.16b, v19.16b, v19.16b, #12
+ add v6.4s, v6.4s, v16.4s
+ rev32 v16.8h, v22.8h
+ add v19.4s, v19.4s, v16.4s
+ eor v20.16b, v19.16b, v20.16b
+ ushr v22.4s, v20.4s, #12
+ shl v20.4s, v20.4s, #20
+ orr v20.16b, v20.16b, v22.16b
+ add v6.4s, v6.4s, v20.4s
+ eor v16.16b, v6.16b, v16.16b
+ ext v6.16b, v6.16b, v6.16b, #12
+ add v3.4s, v6.4s, v3.4s
+ ushr v6.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ orr v6.16b, v16.16b, v6.16b
+ add v16.4s, v6.4s, v19.4s
+ eor v19.16b, v16.16b, v20.16b
+ ushr v20.4s, v19.4s, #7
+ shl v19.4s, v19.4s, #25
+ orr v19.16b, v19.16b, v20.16b
+ ext v6.16b, v6.16b, v6.16b, #8
+ add v3.4s, v3.4s, v19.4s
+ eor v6.16b, v3.16b, v6.16b
+ ext v16.16b, v16.16b, v16.16b, #4
+ add v2.4s, v3.4s, v2.4s
+ rev32 v3.8h, v6.8h
+ add v6.4s, v16.4s, v3.4s
+ eor v16.16b, v6.16b, v19.16b
+ ushr v19.4s, v16.4s, #12
+ shl v16.4s, v16.4s, #20
+ orr v16.16b, v16.16b, v19.16b
+ add v2.4s, v2.4s, v16.4s
+ eor v3.16b, v2.16b, v3.16b
+ add v2.4s, v2.4s, v4.4s
+ ushr v4.4s, v3.4s, #8
+ shl v3.4s, v3.4s, #24
+ orr v3.16b, v3.16b, v4.16b
+ add v4.4s, v3.4s, v6.4s
+ eor v6.16b, v4.16b, v16.16b
+ ushr v16.4s, v6.4s, #7
+ shl v6.4s, v6.4s, #25
+ ext v2.16b, v2.16b, v2.16b, #4
+ orr v6.16b, v6.16b, v16.16b
+ ext v3.16b, v3.16b, v3.16b, #8
+ add v2.4s, v6.4s, v2.4s
+ eor v3.16b, v2.16b, v3.16b
+ ext v4.16b, v4.16b, v4.16b, #12
+ rev32 v3.8h, v3.8h
+ add v4.4s, v4.4s, v3.4s
+ eor v6.16b, v4.16b, v6.16b
+ ushr v16.4s, v6.4s, #12
+ shl v6.4s, v6.4s, #20
+ add v2.4s, v2.4s, v23.4s
+ orr v6.16b, v6.16b, v16.16b
+ add v2.4s, v2.4s, v6.4s
+ eor v3.16b, v2.16b, v3.16b
+ ushr v16.4s, v3.4s, #8
+ shl v3.4s, v3.4s, #24
+ orr v3.16b, v3.16b, v16.16b
+ add v4.4s, v3.4s, v4.4s
+ eor v6.16b, v4.16b, v6.16b
+ ext v2.16b, v2.16b, v2.16b, #12
+ ushr v16.4s, v6.4s, #7
+ shl v6.4s, v6.4s, #25
+ add v2.4s, v2.4s, v7.4s
+ orr v6.16b, v6.16b, v16.16b
+ ext v3.16b, v3.16b, v3.16b, #8
+ add v2.4s, v2.4s, v6.4s
+ eor v3.16b, v2.16b, v3.16b
+ ext v4.16b, v4.16b, v4.16b, #4
+ rev32 v3.8h, v3.8h
+ add v2.4s, v2.4s, v5.4s
+ mov v5.s[1], v7.s[2]
+ add v4.4s, v4.4s, v3.4s
+ bsl v0.16b, v5.16b, v17.16b
+ eor v5.16b, v4.16b, v6.16b
+ ushr v6.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ orr v5.16b, v5.16b, v6.16b
+ add v2.4s, v2.4s, v5.4s
+ eor v3.16b, v2.16b, v3.16b
+ ushr v6.4s, v3.4s, #8
+ shl v3.4s, v3.4s, #24
+ orr v3.16b, v3.16b, v6.16b
+ add v4.4s, v3.4s, v4.4s
+ uzp2 v18.4s, v26.4s, v18.4s
+ eor v5.16b, v4.16b, v5.16b
+ add v2.4s, v2.4s, v18.4s
+ ushr v6.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ ext v2.16b, v2.16b, v2.16b, #4
+ orr v5.16b, v5.16b, v6.16b
+ ext v3.16b, v3.16b, v3.16b, #8
+ add v2.4s, v5.4s, v2.4s
+ eor v3.16b, v2.16b, v3.16b
+ ext v4.16b, v4.16b, v4.16b, #12
+ add v0.4s, v2.4s, v0.4s
+ rev32 v2.8h, v3.8h
+ add v3.4s, v4.4s, v2.4s
+ eor v4.16b, v3.16b, v5.16b
+ ushr v5.4s, v4.4s, #12
+ shl v4.4s, v4.4s, #20
+ orr v4.16b, v4.16b, v5.16b
+ add v0.4s, v0.4s, v4.4s
+ eor v2.16b, v0.16b, v2.16b
+ ushr v5.4s, v2.4s, #8
+ shl v2.4s, v2.4s, #24
+ orr v2.16b, v2.16b, v5.16b
+ add v3.4s, v2.4s, v3.4s
+ eor v4.16b, v3.16b, v4.16b
+ ext v0.16b, v0.16b, v0.16b, #12
+ ushr v5.4s, v4.4s, #7
+ shl v4.4s, v4.4s, #25
+ add v0.4s, v0.4s, v21.4s
+ orr v4.16b, v4.16b, v5.16b
+ ext v2.16b, v2.16b, v2.16b, #8
+ add v0.4s, v0.4s, v4.4s
+ eor v2.16b, v0.16b, v2.16b
+ ext v3.16b, v3.16b, v3.16b, #4
+ add v0.4s, v0.4s, v1.4s
+ rev32 v1.8h, v2.8h
+ add v2.4s, v3.4s, v1.4s
+ eor v3.16b, v2.16b, v4.16b
+ ushr v4.4s, v3.4s, #12
+ shl v3.4s, v3.4s, #20
+ orr v3.16b, v3.16b, v4.16b
+ add v0.4s, v0.4s, v3.4s
+ eor v1.16b, v0.16b, v1.16b
+ ushr v4.4s, v1.4s, #8
+ shl v1.4s, v1.4s, #24
+ orr v1.16b, v1.16b, v4.16b
+ add v2.4s, v1.4s, v2.4s
+ eor v3.16b, v2.16b, v3.16b
+ ushr v4.4s, v3.4s, #7
+ shl v3.4s, v3.4s, #25
+ ext v0.16b, v0.16b, v0.16b, #4
+ ext v1.16b, v1.16b, v1.16b, #8
+ ext v2.16b, v2.16b, v2.16b, #12
+ orr v3.16b, v3.16b, v4.16b
+ eor v0.16b, v2.16b, v0.16b
+ eor v3.16b, v3.16b, v1.16b
+ stp q0, q3, [x5]
+ ldr q0, [x0]
+ eor v0.16b, v0.16b, v2.16b
+ str q0, [x5, #32]
+ ldr q0, [x0, #16]
+ eor v0.16b, v0.16b, v1.16b
+ str q0, [x5, #48]
+ ret
+.Lfunc_end1:
+ .size zfs_blake3_compress_xof_sse2, .Lfunc_end1-zfs_blake3_compress_xof_sse2
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI2_0:
+ .word 0
+ .word 1
+ .word 2
+ .word 3
+ .text
+ .globl zfs_blake3_hash_many_sse2
+ .p2align 2
+ .type zfs_blake3_hash_many_sse2,@function
+zfs_blake3_hash_many_sse2:
+ .cfi_startproc
+ stp d15, d14, [sp, #-160]!
+ stp d13, d12, [sp, #16]
+ stp d11, d10, [sp, #32]
+ stp d9, d8, [sp, #48]
+ stp x29, x30, [sp, #64]
+ stp x28, x27, [sp, #80]
+ stp x26, x25, [sp, #96]
+ stp x24, x23, [sp, #112]
+ stp x22, x21, [sp, #128]
+ stp x20, x19, [sp, #144]
+ mov x29, sp
+ sub sp, sp, #384
+ .cfi_def_cfa w29, 160
+ .cfi_offset w19, -8
+ .cfi_offset w20, -16
+ .cfi_offset w21, -24
+ .cfi_offset w22, -32
+ .cfi_offset w23, -40
+ .cfi_offset w24, -48
+ .cfi_offset w25, -56
+ .cfi_offset w26, -64
+ .cfi_offset w27, -72
+ .cfi_offset w28, -80
+ .cfi_offset w30, -88
+ .cfi_offset w29, -96
+ .cfi_offset b8, -104
+ .cfi_offset b9, -112
+ .cfi_offset b10, -120
+ .cfi_offset b11, -128
+ .cfi_offset b12, -136
+ .cfi_offset b13, -144
+ .cfi_offset b14, -152
+ .cfi_offset b15, -160
+ ldr x26, [x29, #168]
+ ldrb w27, [x29, #160]
+ mov w19, w6
+ mov x20, x4
+ mov x22, x2
+ mov x28, x1
+ cmp x1, #4
+ mov x24, x0
+ str x3, [sp, #40]
+ b.lo .LBB2_8
+ adrp x9, .LCPI2_0
+ ldr q0, [x9, :lo12:.LCPI2_0]
+ sbfx w11, w5, #0, #1
+ dup v1.4s, w11
+ mov w9, #58983
+ mov w10, #44677
+ and v0.16b, v1.16b, v0.16b
+ mov w11, #62322
+ mov w12, #62778
+ orr w8, w7, w19
+ movk w9, #27145, lsl #16
+ movk w10, #47975, lsl #16
+ movk w11, #15470, lsl #16
+ str q0, [sp, #16]
+ orr v0.4s, #128, lsl #24
+ movk w12, #42319, lsl #16
+ str q0, [sp]
+.LBB2_2:
+ ldr x0, [sp, #40]
+ mov x13, x0
+ ld1r { v20.4s }, [x13], #4
+ add x14, x0, #8
+ add x15, x0, #12
+ add x16, x0, #16
+ add x17, x0, #20
+ add x18, x0, #24
+ add x0, x0, #28
+ ld1r { v17.4s }, [x14]
+ ld1r { v6.4s }, [x15]
+ ld1r { v8.4s }, [x16]
+ ld1r { v9.4s }, [x17]
+ ld1r { v31.4s }, [x18]
+ ld1r { v26.4s }, [x13]
+ ld1r { v15.4s }, [x0]
+ cbz x22, .LBB2_7
+ ldr q1, [sp, #16]
+ dup v0.4s, w20
+ ldp x13, x14, [x24]
+ ldp x15, x16, [x24, #16]
+ add v1.4s, v0.4s, v1.4s
+ movi v0.4s, #128, lsl #24
+ str q1, [sp, #64]
+ eor v0.16b, v1.16b, v0.16b
+ ldr q1, [sp]
+ lsr x18, x20, #32
+ mov x17, xzr
+ cmgt v0.4s, v1.4s, v0.4s
+ dup v1.4s, w18
+ sub v0.4s, v1.4s, v0.4s
+ mov w18, w8
+ str q0, [sp, #48]
+.LBB2_4:
+ mov w2, #16
+ bfi x2, x17, #6, #58
+ ldr q1, [x13, x2]
+ ldr q3, [x14, x2]
+ ldr q2, [x15, x2]
+ ldr q4, [x16, x2]
+ mov w2, #32
+ bfi x2, x17, #6, #58
+ ldr q5, [x13, x2]
+ ldr q18, [x14, x2]
+ ldr q19, [x15, x2]
+ ldr q23, [x16, x2]
+ mov w2, #48
+ lsl x3, x17, #6
+ bfi x2, x17, #6, #58
+ add x17, x17, #1
+ ldr q0, [x13, x3]
+ ldr q21, [x14, x3]
+ ldr q7, [x15, x3]
+ ldr q16, [x16, x3]
+ cmp x17, x22
+ ldr q13, [x13, x2]
+ ldr q14, [x14, x2]
+ ldr q29, [x15, x2]
+ ldr q10, [x16, x2]
+ csel w2, w27, wzr, eq
+ orr w18, w2, w18
+ mov x0, xzr
+ and w18, w18, #0xff
+ add x3, x3, #256
+.LBB2_5:
+ ldr x2, [x24, x0]
+ add x0, x0, #8
+ cmp x0, #32
+ add x2, x2, x3
+ prfm pldl1keep, [x2]
+ b.ne .LBB2_5
+ dup v22.4s, w18
+ str q22, [sp, #192]
+ zip1 v27.4s, v0.4s, v21.4s
+ zip2 v21.4s, v0.4s, v21.4s
+ zip1 v0.4s, v7.4s, v16.4s
+ zip2 v22.4s, v7.4s, v16.4s
+ zip1 v7.4s, v1.4s, v3.4s
+ zip1 v25.4s, v2.4s, v4.4s
+ zip2 v16.4s, v2.4s, v4.4s
+ zip1 v11.4s, v19.4s, v23.4s
+ zip2 v12.4s, v19.4s, v23.4s
+ zip1 v19.4s, v13.4s, v14.4s
+ zip2 v23.4s, v13.4s, v14.4s
+ zip1 v13.4s, v29.4s, v10.4s
+ zip2 v14.4s, v29.4s, v10.4s
+ add v10.4s, v20.4s, v8.4s
+ add v2.4s, v26.4s, v9.4s
+ ext v20.16b, v22.16b, v21.16b, #8
+ ext v26.16b, v25.16b, v7.16b, #8
+ zip2 v24.4s, v1.4s, v3.4s
+ add v1.4s, v6.4s, v15.4s
+ ext v6.16b, v0.16b, v27.16b, #8
+ ext v20.16b, v21.16b, v20.16b, #8
+ mov v21.d[1], v22.d[0]
+ ext v22.16b, v7.16b, v26.16b, #8
+ mov v7.d[1], v25.d[0]
+ add v3.4s, v17.4s, v31.4s
+ str q1, [sp, #144]
+ ext v1.16b, v27.16b, v6.16b, #8
+ mov v6.16b, v7.16b
+ zip1 v28.4s, v5.4s, v18.4s
+ stur q1, [x29, #-80]
+ mov v1.16b, v27.16b
+ mov v27.16b, v24.16b
+ add v3.4s, v3.4s, v6.4s
+ ldr q6, [sp, #64]
+ ext v29.16b, v16.16b, v24.16b, #8
+ mov v1.d[1], v0.d[0]
+ ext v0.16b, v11.16b, v28.16b, #8
+ mov v27.d[1], v16.d[0]
+ ext v16.16b, v14.16b, v23.16b, #8
+ stur q7, [x29, #-144]
+ ext v7.16b, v24.16b, v29.16b, #8
+ ext v29.16b, v28.16b, v0.16b, #8
+ ext v0.16b, v23.16b, v16.16b, #8
+ mov v23.d[1], v14.d[0]
+ stp q0, q23, [sp, #80]
+ add v0.4s, v10.4s, v1.4s
+ eor v16.16b, v0.16b, v6.16b
+ ldr q6, [sp, #48]
+ add v2.4s, v2.4s, v21.4s
+ mov v28.d[1], v11.d[0]
+ zip2 v18.4s, v5.4s, v18.4s
+ eor v10.16b, v2.16b, v6.16b
+ movi v6.4s, #64
+ eor v11.16b, v3.16b, v6.16b
+ ldr q6, [sp, #144]
+ dup v17.4s, w9
+ ext v30.16b, v12.16b, v18.16b, #8
+ rev32 v16.8h, v16.8h
+ dup v5.4s, w10
+ ext v25.16b, v18.16b, v30.16b, #8
+ mov v30.16b, v23.16b
+ mov v23.16b, v1.16b
+ str q1, [sp, #160]
+ rev32 v10.8h, v10.8h
+ add v1.4s, v16.4s, v17.4s
+ add v17.4s, v6.4s, v27.4s
+ ldr q6, [sp, #192]
+ dup v4.4s, w11
+ rev32 v11.8h, v11.8h
+ add v5.4s, v10.4s, v5.4s
+ eor v8.16b, v1.16b, v8.16b
+ stur q21, [x29, #-128]
+ mov v18.d[1], v12.d[0]
+ add v4.4s, v11.4s, v4.4s
+ eor v9.16b, v5.16b, v9.16b
+ ushr v12.4s, v8.4s, #12
+ shl v8.4s, v8.4s, #20
+ ldur q21, [x29, #-80]
+ ext v26.16b, v13.16b, v19.16b, #8
+ eor v31.16b, v4.16b, v31.16b
+ orr v8.16b, v8.16b, v12.16b
+ ushr v12.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ ext v26.16b, v19.16b, v26.16b, #8
+ mov v19.d[1], v13.d[0]
+ orr v9.16b, v9.16b, v12.16b
+ ushr v12.4s, v31.4s, #12
+ shl v31.4s, v31.4s, #20
+ eor v13.16b, v17.16b, v6.16b
+ orr v31.16b, v31.16b, v12.16b
+ dup v12.4s, w12
+ rev32 v13.8h, v13.8h
+ add v12.4s, v13.4s, v12.4s
+ add v0.4s, v0.4s, v21.4s
+ eor v14.16b, v12.16b, v15.16b
+ add v0.4s, v0.4s, v8.4s
+ add v2.4s, v2.4s, v20.4s
+ ushr v15.4s, v14.4s, #12
+ shl v14.4s, v14.4s, #20
+ eor v16.16b, v0.16b, v16.16b
+ add v2.4s, v2.4s, v9.4s
+ add v3.4s, v3.4s, v22.4s
+ orr v14.16b, v14.16b, v15.16b
+ ushr v15.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ eor v10.16b, v2.16b, v10.16b
+ add v3.4s, v3.4s, v31.4s
+ add v17.4s, v17.4s, v7.4s
+ orr v16.16b, v16.16b, v15.16b
+ ushr v15.4s, v10.4s, #8
+ shl v10.4s, v10.4s, #24
+ eor v11.16b, v3.16b, v11.16b
+ add v17.4s, v17.4s, v14.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v11.4s, #8
+ shl v11.4s, v11.4s, #24
+ eor v13.16b, v17.16b, v13.16b
+ add v1.4s, v16.4s, v1.4s
+ orr v11.16b, v11.16b, v15.16b
+ ushr v15.4s, v13.4s, #8
+ shl v13.4s, v13.4s, #24
+ eor v8.16b, v1.16b, v8.16b
+ add v5.4s, v10.4s, v5.4s
+ orr v13.16b, v13.16b, v15.16b
+ ushr v15.4s, v8.4s, #7
+ shl v8.4s, v8.4s, #25
+ eor v9.16b, v5.16b, v9.16b
+ add v4.4s, v11.4s, v4.4s
+ orr v8.16b, v8.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v31.16b, v4.16b, v31.16b
+ add v12.4s, v13.4s, v12.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #7
+ shl v31.4s, v31.4s, #25
+ eor v14.16b, v12.16b, v14.16b
+ add v0.4s, v0.4s, v28.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v14.4s, #7
+ shl v14.4s, v14.4s, #25
+ add v0.4s, v0.4s, v9.4s
+ add v2.4s, v2.4s, v18.4s
+ orr v14.16b, v14.16b, v15.16b
+ eor v13.16b, v0.16b, v13.16b
+ add v2.4s, v2.4s, v31.4s
+ add v3.4s, v3.4s, v19.4s
+ rev32 v13.8h, v13.8h
+ eor v16.16b, v2.16b, v16.16b
+ add v3.4s, v3.4s, v14.4s
+ add v17.4s, v17.4s, v30.4s
+ add v4.4s, v4.4s, v13.4s
+ rev32 v16.8h, v16.8h
+ eor v10.16b, v3.16b, v10.16b
+ add v17.4s, v17.4s, v8.4s
+ eor v9.16b, v4.16b, v9.16b
+ add v12.4s, v12.4s, v16.4s
+ rev32 v10.8h, v10.8h
+ eor v11.16b, v17.16b, v11.16b
+ mov v24.16b, v7.16b
+ stur q7, [x29, #-112]
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v31.16b, v12.16b, v31.16b
+ add v1.4s, v1.4s, v10.4s
+ rev32 v11.8h, v11.8h
+ mov v7.16b, v26.16b
+ add v3.4s, v3.4s, v26.4s
+ ldr q26, [sp, #80]
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #12
+ shl v31.4s, v31.4s, #20
+ eor v14.16b, v1.16b, v14.16b
+ add v5.4s, v5.4s, v11.4s
+ add v0.4s, v0.4s, v29.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v14.4s, #12
+ shl v14.4s, v14.4s, #20
+ eor v8.16b, v5.16b, v8.16b
+ add v0.4s, v0.4s, v9.4s
+ add v2.4s, v2.4s, v25.4s
+ orr v14.16b, v14.16b, v15.16b
+ ushr v15.4s, v8.4s, #12
+ shl v8.4s, v8.4s, #20
+ eor v13.16b, v0.16b, v13.16b
+ add v2.4s, v2.4s, v31.4s
+ orr v8.16b, v8.16b, v15.16b
+ ushr v15.4s, v13.4s, #8
+ shl v13.4s, v13.4s, #24
+ eor v16.16b, v2.16b, v16.16b
+ add v3.4s, v3.4s, v14.4s
+ add v17.4s, v17.4s, v26.4s
+ orr v13.16b, v13.16b, v15.16b
+ ushr v15.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ eor v10.16b, v3.16b, v10.16b
+ add v17.4s, v17.4s, v8.4s
+ orr v16.16b, v16.16b, v15.16b
+ ushr v15.4s, v10.4s, #8
+ shl v10.4s, v10.4s, #24
+ eor v11.16b, v17.16b, v11.16b
+ add v4.4s, v13.4s, v4.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v11.4s, #8
+ shl v11.4s, v11.4s, #24
+ eor v9.16b, v4.16b, v9.16b
+ add v12.4s, v16.4s, v12.4s
+ str q22, [sp, #128]
+ orr v11.16b, v11.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v31.16b, v12.16b, v31.16b
+ add v1.4s, v10.4s, v1.4s
+ ldur q22, [x29, #-128]
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #7
+ shl v31.4s, v31.4s, #25
+ eor v14.16b, v1.16b, v14.16b
+ add v5.4s, v11.4s, v5.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v14.4s, #7
+ shl v14.4s, v14.4s, #25
+ eor v8.16b, v5.16b, v8.16b
+ mov v6.16b, v18.16b
+ orr v14.16b, v14.16b, v15.16b
+ ushr v15.4s, v8.4s, #7
+ shl v8.4s, v8.4s, #25
+ ldur q18, [x29, #-144]
+ orr v8.16b, v8.16b, v15.16b
+ add v0.4s, v0.4s, v22.4s
+ add v0.4s, v0.4s, v8.4s
+ add v2.4s, v2.4s, v20.4s
+ eor v16.16b, v0.16b, v16.16b
+ add v2.4s, v2.4s, v9.4s
+ add v3.4s, v3.4s, v24.4s
+ rev32 v16.8h, v16.8h
+ eor v10.16b, v2.16b, v10.16b
+ add v3.4s, v3.4s, v31.4s
+ add v17.4s, v17.4s, v18.4s
+ add v1.4s, v1.4s, v16.4s
+ rev32 v10.8h, v10.8h
+ eor v11.16b, v3.16b, v11.16b
+ add v17.4s, v17.4s, v14.4s
+ eor v8.16b, v1.16b, v8.16b
+ add v5.4s, v5.4s, v10.4s
+ rev32 v11.8h, v11.8h
+ eor v13.16b, v17.16b, v13.16b
+ ushr v15.4s, v8.4s, #12
+ shl v8.4s, v8.4s, #20
+ eor v9.16b, v5.16b, v9.16b
+ add v4.4s, v4.4s, v11.4s
+ rev32 v13.8h, v13.8h
+ orr v8.16b, v8.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v31.16b, v4.16b, v31.16b
+ add v12.4s, v12.4s, v13.4s
+ add v0.4s, v0.4s, v27.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #12
+ shl v31.4s, v31.4s, #20
+ eor v14.16b, v12.16b, v14.16b
+ add v0.4s, v0.4s, v8.4s
+ add v2.4s, v2.4s, v6.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v14.4s, #12
+ shl v14.4s, v14.4s, #20
+ eor v16.16b, v0.16b, v16.16b
+ add v2.4s, v2.4s, v9.4s
+ add v3.4s, v3.4s, v23.4s
+ orr v14.16b, v14.16b, v15.16b
+ ushr v15.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ eor v10.16b, v2.16b, v10.16b
+ add v3.4s, v3.4s, v31.4s
+ add v17.4s, v17.4s, v7.4s
+ orr v16.16b, v16.16b, v15.16b
+ ushr v15.4s, v10.4s, #8
+ shl v10.4s, v10.4s, #24
+ eor v11.16b, v3.16b, v11.16b
+ add v17.4s, v17.4s, v14.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v11.4s, #8
+ shl v11.4s, v11.4s, #24
+ eor v13.16b, v17.16b, v13.16b
+ add v1.4s, v16.4s, v1.4s
+ orr v11.16b, v11.16b, v15.16b
+ ushr v15.4s, v13.4s, #8
+ shl v13.4s, v13.4s, #24
+ eor v8.16b, v1.16b, v8.16b
+ add v5.4s, v10.4s, v5.4s
+ orr v13.16b, v13.16b, v15.16b
+ ushr v15.4s, v8.4s, #7
+ shl v8.4s, v8.4s, #25
+ eor v9.16b, v5.16b, v9.16b
+ add v4.4s, v11.4s, v4.4s
+ orr v8.16b, v8.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v31.16b, v4.16b, v31.16b
+ add v12.4s, v13.4s, v12.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #7
+ shl v31.4s, v31.4s, #25
+ eor v14.16b, v12.16b, v14.16b
+ add v0.4s, v0.4s, v21.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v14.4s, #7
+ shl v14.4s, v14.4s, #25
+ add v0.4s, v0.4s, v9.4s
+ add v2.4s, v2.4s, v19.4s
+ orr v14.16b, v14.16b, v15.16b
+ eor v13.16b, v0.16b, v13.16b
+ add v2.4s, v2.4s, v31.4s
+ add v3.4s, v3.4s, v29.4s
+ str q28, [sp, #112]
+ rev32 v13.8h, v13.8h
+ eor v16.16b, v2.16b, v16.16b
+ add v3.4s, v3.4s, v14.4s
+ add v17.4s, v17.4s, v26.4s
+ add v4.4s, v4.4s, v13.4s
+ rev32 v16.8h, v16.8h
+ eor v10.16b, v3.16b, v10.16b
+ add v17.4s, v17.4s, v8.4s
+ ldp q28, q23, [sp, #112]
+ eor v9.16b, v4.16b, v9.16b
+ add v12.4s, v12.4s, v16.4s
+ rev32 v10.8h, v10.8h
+ eor v11.16b, v17.16b, v11.16b
+ ldr q21, [sp, #96]
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v31.16b, v12.16b, v31.16b
+ add v1.4s, v1.4s, v10.4s
+ rev32 v11.8h, v11.8h
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #12
+ shl v31.4s, v31.4s, #20
+ eor v14.16b, v1.16b, v14.16b
+ add v5.4s, v5.4s, v11.4s
+ add v0.4s, v0.4s, v25.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v14.4s, #12
+ shl v14.4s, v14.4s, #20
+ eor v8.16b, v5.16b, v8.16b
+ add v0.4s, v0.4s, v9.4s
+ add v2.4s, v2.4s, v23.4s
+ orr v14.16b, v14.16b, v15.16b
+ ushr v15.4s, v8.4s, #12
+ shl v8.4s, v8.4s, #20
+ eor v13.16b, v0.16b, v13.16b
+ add v2.4s, v2.4s, v31.4s
+ add v3.4s, v3.4s, v21.4s
+ orr v8.16b, v8.16b, v15.16b
+ ushr v15.4s, v13.4s, #8
+ shl v13.4s, v13.4s, #24
+ eor v16.16b, v2.16b, v16.16b
+ add v3.4s, v3.4s, v14.4s
+ add v17.4s, v17.4s, v28.4s
+ orr v13.16b, v13.16b, v15.16b
+ ushr v15.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ eor v10.16b, v3.16b, v10.16b
+ add v17.4s, v17.4s, v8.4s
+ orr v16.16b, v16.16b, v15.16b
+ ushr v15.4s, v10.4s, #8
+ shl v10.4s, v10.4s, #24
+ eor v11.16b, v17.16b, v11.16b
+ add v4.4s, v13.4s, v4.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v11.4s, #8
+ shl v11.4s, v11.4s, #24
+ eor v9.16b, v4.16b, v9.16b
+ add v12.4s, v16.4s, v12.4s
+ orr v11.16b, v11.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v31.16b, v12.16b, v31.16b
+ add v1.4s, v10.4s, v1.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #7
+ shl v31.4s, v31.4s, #25
+ eor v14.16b, v1.16b, v14.16b
+ add v5.4s, v11.4s, v5.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v14.4s, #7
+ shl v14.4s, v14.4s, #25
+ eor v8.16b, v5.16b, v8.16b
+ mov v30.16b, v29.16b
+ mov v29.16b, v25.16b
+ orr v14.16b, v14.16b, v15.16b
+ ushr v15.4s, v8.4s, #7
+ shl v8.4s, v8.4s, #25
+ ldur q25, [x29, #-112]
+ orr v8.16b, v8.16b, v15.16b
+ add v0.4s, v0.4s, v20.4s
+ add v0.4s, v0.4s, v8.4s
+ add v2.4s, v2.4s, v6.4s
+ eor v16.16b, v0.16b, v16.16b
+ add v2.4s, v2.4s, v9.4s
+ add v3.4s, v3.4s, v7.4s
+ rev32 v16.8h, v16.8h
+ eor v10.16b, v2.16b, v10.16b
+ add v3.4s, v3.4s, v31.4s
+ add v17.4s, v17.4s, v25.4s
+ add v1.4s, v1.4s, v16.4s
+ rev32 v10.8h, v10.8h
+ eor v11.16b, v3.16b, v11.16b
+ add v17.4s, v17.4s, v14.4s
+ eor v8.16b, v1.16b, v8.16b
+ add v5.4s, v5.4s, v10.4s
+ rev32 v11.8h, v11.8h
+ eor v13.16b, v17.16b, v13.16b
+ ushr v15.4s, v8.4s, #12
+ shl v8.4s, v8.4s, #20
+ eor v9.16b, v5.16b, v9.16b
+ add v4.4s, v4.4s, v11.4s
+ rev32 v13.8h, v13.8h
+ orr v8.16b, v8.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v31.16b, v4.16b, v31.16b
+ add v12.4s, v12.4s, v13.4s
+ add v0.4s, v0.4s, v18.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #12
+ shl v31.4s, v31.4s, #20
+ eor v14.16b, v12.16b, v14.16b
+ add v0.4s, v0.4s, v8.4s
+ add v2.4s, v2.4s, v19.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v14.4s, #12
+ shl v14.4s, v14.4s, #20
+ eor v16.16b, v0.16b, v16.16b
+ add v2.4s, v2.4s, v9.4s
+ add v3.4s, v3.4s, v22.4s
+ orr v14.16b, v14.16b, v15.16b
+ ushr v15.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ eor v10.16b, v2.16b, v10.16b
+ add v3.4s, v3.4s, v31.4s
+ add v17.4s, v17.4s, v21.4s
+ orr v16.16b, v16.16b, v15.16b
+ ushr v15.4s, v10.4s, #8
+ shl v10.4s, v10.4s, #24
+ eor v11.16b, v3.16b, v11.16b
+ add v17.4s, v17.4s, v14.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v11.4s, #8
+ shl v11.4s, v11.4s, #24
+ eor v13.16b, v17.16b, v13.16b
+ add v1.4s, v16.4s, v1.4s
+ orr v11.16b, v11.16b, v15.16b
+ ushr v15.4s, v13.4s, #8
+ shl v13.4s, v13.4s, #24
+ eor v8.16b, v1.16b, v8.16b
+ add v5.4s, v10.4s, v5.4s
+ orr v13.16b, v13.16b, v15.16b
+ ushr v15.4s, v8.4s, #7
+ shl v8.4s, v8.4s, #25
+ eor v9.16b, v5.16b, v9.16b
+ add v4.4s, v11.4s, v4.4s
+ orr v8.16b, v8.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v31.16b, v4.16b, v31.16b
+ add v12.4s, v13.4s, v12.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #7
+ shl v31.4s, v31.4s, #25
+ eor v14.16b, v12.16b, v14.16b
+ add v0.4s, v0.4s, v27.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v14.4s, #7
+ shl v14.4s, v14.4s, #25
+ add v0.4s, v0.4s, v9.4s
+ add v2.4s, v2.4s, v30.4s
+ orr v14.16b, v14.16b, v15.16b
+ eor v13.16b, v0.16b, v13.16b
+ add v2.4s, v2.4s, v31.4s
+ add v3.4s, v3.4s, v29.4s
+ rev32 v13.8h, v13.8h
+ eor v16.16b, v2.16b, v16.16b
+ add v3.4s, v3.4s, v14.4s
+ add v17.4s, v17.4s, v28.4s
+ add v4.4s, v4.4s, v13.4s
+ rev32 v16.8h, v16.8h
+ eor v10.16b, v3.16b, v10.16b
+ add v17.4s, v17.4s, v8.4s
+ eor v9.16b, v4.16b, v9.16b
+ add v12.4s, v12.4s, v16.4s
+ rev32 v10.8h, v10.8h
+ eor v11.16b, v17.16b, v11.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v31.16b, v12.16b, v31.16b
+ add v1.4s, v1.4s, v10.4s
+ rev32 v11.8h, v11.8h
+ ldr q24, [sp, #160]
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #12
+ shl v31.4s, v31.4s, #20
+ eor v14.16b, v1.16b, v14.16b
+ add v5.4s, v5.4s, v11.4s
+ stur q7, [x29, #-64]
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v14.4s, #12
+ shl v14.4s, v14.4s, #20
+ eor v8.16b, v5.16b, v8.16b
+ mov v7.16b, v26.16b
+ add v3.4s, v3.4s, v26.4s
+ ldur q26, [x29, #-80]
+ orr v14.16b, v14.16b, v15.16b
+ ushr v15.4s, v8.4s, #12
+ shl v8.4s, v8.4s, #20
+ add v0.4s, v0.4s, v23.4s
+ orr v8.16b, v8.16b, v15.16b
+ add v15.4s, v0.4s, v9.4s
+ add v2.4s, v2.4s, v24.4s
+ eor v0.16b, v15.16b, v13.16b
+ add v2.4s, v2.4s, v31.4s
+ ushr v13.4s, v0.4s, #8
+ shl v0.4s, v0.4s, #24
+ eor v16.16b, v2.16b, v16.16b
+ add v3.4s, v3.4s, v14.4s
+ add v17.4s, v17.4s, v26.4s
+ orr v0.16b, v0.16b, v13.16b
+ ushr v13.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ eor v10.16b, v3.16b, v10.16b
+ add v17.4s, v17.4s, v8.4s
+ orr v16.16b, v16.16b, v13.16b
+ ushr v13.4s, v10.4s, #8
+ shl v10.4s, v10.4s, #24
+ eor v11.16b, v17.16b, v11.16b
+ add v4.4s, v0.4s, v4.4s
+ orr v10.16b, v10.16b, v13.16b
+ ushr v13.4s, v11.4s, #8
+ shl v11.4s, v11.4s, #24
+ eor v9.16b, v4.16b, v9.16b
+ add v12.4s, v16.4s, v12.4s
+ orr v11.16b, v11.16b, v13.16b
+ ushr v13.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v31.16b, v12.16b, v31.16b
+ orr v9.16b, v9.16b, v13.16b
+ ushr v13.4s, v31.4s, #7
+ shl v31.4s, v31.4s, #25
+ add v1.4s, v10.4s, v1.4s
+ orr v31.16b, v31.16b, v13.16b
+ eor v13.16b, v1.16b, v14.16b
+ add v5.4s, v11.4s, v5.4s
+ ushr v14.4s, v13.4s, #7
+ shl v13.4s, v13.4s, #25
+ eor v8.16b, v5.16b, v8.16b
+ orr v13.16b, v13.16b, v14.16b
+ ushr v14.4s, v8.4s, #7
+ shl v8.4s, v8.4s, #25
+ stur q6, [x29, #-96]
+ orr v8.16b, v8.16b, v14.16b
+ add v14.4s, v15.4s, v6.4s
+ ldur q6, [x29, #-64]
+ mov v18.16b, v19.16b
+ add v14.4s, v14.4s, v8.4s
+ add v2.4s, v2.4s, v18.4s
+ eor v16.16b, v14.16b, v16.16b
+ add v2.4s, v2.4s, v9.4s
+ add v3.4s, v3.4s, v21.4s
+ rev32 v16.8h, v16.8h
+ eor v10.16b, v2.16b, v10.16b
+ add v3.4s, v3.4s, v31.4s
+ add v17.4s, v17.4s, v6.4s
+ add v1.4s, v1.4s, v16.4s
+ rev32 v10.8h, v10.8h
+ eor v11.16b, v3.16b, v11.16b
+ add v17.4s, v17.4s, v13.4s
+ eor v8.16b, v1.16b, v8.16b
+ add v5.4s, v5.4s, v10.4s
+ rev32 v11.8h, v11.8h
+ eor v0.16b, v17.16b, v0.16b
+ ushr v15.4s, v8.4s, #12
+ shl v8.4s, v8.4s, #20
+ eor v9.16b, v5.16b, v9.16b
+ add v4.4s, v4.4s, v11.4s
+ rev32 v0.8h, v0.8h
+ str q27, [sp, #176]
+ mov v27.16b, v30.16b
+ orr v8.16b, v8.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v31.16b, v4.16b, v31.16b
+ add v12.4s, v12.4s, v0.4s
+ add v14.4s, v14.4s, v25.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #12
+ shl v31.4s, v31.4s, #20
+ eor v13.16b, v12.16b, v13.16b
+ add v14.4s, v14.4s, v8.4s
+ add v2.4s, v2.4s, v27.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v13.4s, #12
+ shl v13.4s, v13.4s, #20
+ eor v16.16b, v14.16b, v16.16b
+ add v2.4s, v2.4s, v9.4s
+ add v3.4s, v3.4s, v20.4s
+ orr v13.16b, v13.16b, v15.16b
+ ushr v15.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ eor v10.16b, v2.16b, v10.16b
+ add v3.4s, v3.4s, v31.4s
+ add v17.4s, v17.4s, v7.4s
+ orr v16.16b, v16.16b, v15.16b
+ ushr v15.4s, v10.4s, #8
+ shl v10.4s, v10.4s, #24
+ eor v11.16b, v3.16b, v11.16b
+ add v17.4s, v17.4s, v13.4s
+ mov v30.16b, v23.16b
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v11.4s, #8
+ shl v11.4s, v11.4s, #24
+ eor v0.16b, v17.16b, v0.16b
+ add v1.4s, v16.4s, v1.4s
+ ldur q23, [x29, #-144]
+ orr v11.16b, v11.16b, v15.16b
+ ushr v15.4s, v0.4s, #8
+ shl v0.4s, v0.4s, #24
+ eor v8.16b, v1.16b, v8.16b
+ add v5.4s, v10.4s, v5.4s
+ orr v0.16b, v0.16b, v15.16b
+ ushr v15.4s, v8.4s, #7
+ shl v8.4s, v8.4s, #25
+ eor v9.16b, v5.16b, v9.16b
+ add v4.4s, v11.4s, v4.4s
+ orr v8.16b, v8.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v31.16b, v4.16b, v31.16b
+ add v12.4s, v0.4s, v12.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #7
+ shl v31.4s, v31.4s, #25
+ eor v13.16b, v12.16b, v13.16b
+ add v14.4s, v14.4s, v23.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v13.4s, #7
+ shl v13.4s, v13.4s, #25
+ add v14.4s, v14.4s, v9.4s
+ add v2.4s, v2.4s, v29.4s
+ orr v13.16b, v13.16b, v15.16b
+ eor v0.16b, v14.16b, v0.16b
+ add v2.4s, v2.4s, v31.4s
+ add v3.4s, v3.4s, v30.4s
+ rev32 v0.8h, v0.8h
+ eor v16.16b, v2.16b, v16.16b
+ add v3.4s, v3.4s, v13.4s
+ add v17.4s, v17.4s, v26.4s
+ add v4.4s, v4.4s, v0.4s
+ rev32 v16.8h, v16.8h
+ eor v10.16b, v3.16b, v10.16b
+ add v17.4s, v17.4s, v8.4s
+ ldur q22, [x29, #-128]
+ eor v9.16b, v4.16b, v9.16b
+ add v12.4s, v12.4s, v16.4s
+ rev32 v10.8h, v10.8h
+ eor v11.16b, v17.16b, v11.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v31.16b, v12.16b, v31.16b
+ add v1.4s, v1.4s, v10.4s
+ rev32 v11.8h, v11.8h
+ ldr q26, [sp, #176]
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #12
+ shl v31.4s, v31.4s, #20
+ eor v13.16b, v1.16b, v13.16b
+ add v5.4s, v5.4s, v11.4s
+ add v14.4s, v14.4s, v24.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v13.4s, #12
+ shl v13.4s, v13.4s, #20
+ eor v8.16b, v5.16b, v8.16b
+ add v14.4s, v14.4s, v9.4s
+ add v2.4s, v2.4s, v22.4s
+ orr v13.16b, v13.16b, v15.16b
+ ushr v15.4s, v8.4s, #12
+ shl v8.4s, v8.4s, #20
+ eor v0.16b, v14.16b, v0.16b
+ add v2.4s, v2.4s, v31.4s
+ add v3.4s, v3.4s, v28.4s
+ orr v8.16b, v8.16b, v15.16b
+ ushr v15.4s, v0.4s, #8
+ shl v0.4s, v0.4s, #24
+ eor v16.16b, v2.16b, v16.16b
+ add v3.4s, v3.4s, v13.4s
+ add v17.4s, v17.4s, v26.4s
+ orr v0.16b, v0.16b, v15.16b
+ ushr v15.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ eor v10.16b, v3.16b, v10.16b
+ add v17.4s, v17.4s, v8.4s
+ orr v16.16b, v16.16b, v15.16b
+ ushr v15.4s, v10.4s, #8
+ shl v10.4s, v10.4s, #24
+ eor v11.16b, v17.16b, v11.16b
+ add v4.4s, v0.4s, v4.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v11.4s, #8
+ shl v11.4s, v11.4s, #24
+ eor v9.16b, v4.16b, v9.16b
+ add v12.4s, v16.4s, v12.4s
+ orr v11.16b, v11.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v31.16b, v12.16b, v31.16b
+ add v1.4s, v10.4s, v1.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #7
+ shl v31.4s, v31.4s, #25
+ eor v13.16b, v1.16b, v13.16b
+ add v5.4s, v11.4s, v5.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v13.4s, #7
+ shl v13.4s, v13.4s, #25
+ eor v8.16b, v5.16b, v8.16b
+ orr v13.16b, v13.16b, v15.16b
+ ushr v15.4s, v8.4s, #7
+ shl v8.4s, v8.4s, #25
+ orr v8.16b, v8.16b, v15.16b
+ add v14.4s, v14.4s, v18.4s
+ add v14.4s, v14.4s, v8.4s
+ add v2.4s, v2.4s, v27.4s
+ eor v16.16b, v14.16b, v16.16b
+ add v2.4s, v2.4s, v9.4s
+ add v3.4s, v3.4s, v7.4s
+ rev32 v16.8h, v16.8h
+ eor v10.16b, v2.16b, v10.16b
+ add v3.4s, v3.4s, v31.4s
+ add v17.4s, v17.4s, v21.4s
+ add v1.4s, v1.4s, v16.4s
+ rev32 v10.8h, v10.8h
+ eor v11.16b, v3.16b, v11.16b
+ add v17.4s, v17.4s, v13.4s
+ eor v8.16b, v1.16b, v8.16b
+ add v5.4s, v5.4s, v10.4s
+ rev32 v11.8h, v11.8h
+ eor v0.16b, v17.16b, v0.16b
+ add v14.4s, v14.4s, v6.4s
+ ldur q6, [x29, #-96]
+ ushr v15.4s, v8.4s, #12
+ shl v8.4s, v8.4s, #20
+ eor v9.16b, v5.16b, v9.16b
+ add v4.4s, v4.4s, v11.4s
+ rev32 v0.8h, v0.8h
+ stur q20, [x29, #-160]
+ mov v20.16b, v29.16b
+ orr v8.16b, v8.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v31.16b, v4.16b, v31.16b
+ add v12.4s, v12.4s, v0.4s
+ mov v19.16b, v29.16b
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #12
+ shl v31.4s, v31.4s, #20
+ eor v13.16b, v12.16b, v13.16b
+ add v14.4s, v14.4s, v8.4s
+ add v2.4s, v2.4s, v20.4s
+ mov v19.16b, v28.16b
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v13.4s, #12
+ shl v13.4s, v13.4s, #20
+ eor v16.16b, v14.16b, v16.16b
+ add v2.4s, v2.4s, v9.4s
+ add v3.4s, v3.4s, v6.4s
+ orr v13.16b, v13.16b, v15.16b
+ ushr v15.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ eor v10.16b, v2.16b, v10.16b
+ add v3.4s, v3.4s, v31.4s
+ add v17.4s, v17.4s, v19.4s
+ orr v16.16b, v16.16b, v15.16b
+ ushr v15.4s, v10.4s, #8
+ shl v10.4s, v10.4s, #24
+ eor v11.16b, v3.16b, v11.16b
+ add v17.4s, v17.4s, v13.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v11.4s, #8
+ shl v11.4s, v11.4s, #24
+ eor v0.16b, v17.16b, v0.16b
+ add v1.4s, v16.4s, v1.4s
+ orr v11.16b, v11.16b, v15.16b
+ ushr v15.4s, v0.4s, #8
+ shl v0.4s, v0.4s, #24
+ eor v8.16b, v1.16b, v8.16b
+ add v5.4s, v10.4s, v5.4s
+ orr v0.16b, v0.16b, v15.16b
+ ushr v15.4s, v8.4s, #7
+ shl v8.4s, v8.4s, #25
+ eor v9.16b, v5.16b, v9.16b
+ add v4.4s, v11.4s, v4.4s
+ orr v8.16b, v8.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v31.16b, v4.16b, v31.16b
+ add v12.4s, v0.4s, v12.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #7
+ shl v31.4s, v31.4s, #25
+ eor v13.16b, v12.16b, v13.16b
+ add v14.4s, v14.4s, v25.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v13.4s, #7
+ shl v13.4s, v13.4s, #25
+ add v14.4s, v14.4s, v9.4s
+ add v2.4s, v2.4s, v30.4s
+ orr v13.16b, v13.16b, v15.16b
+ eor v0.16b, v14.16b, v0.16b
+ add v2.4s, v2.4s, v31.4s
+ add v3.4s, v3.4s, v24.4s
+ rev32 v0.8h, v0.8h
+ eor v16.16b, v2.16b, v16.16b
+ add v3.4s, v3.4s, v13.4s
+ add v17.4s, v17.4s, v26.4s
+ mov v29.16b, v27.16b
+ add v4.4s, v4.4s, v0.4s
+ rev32 v16.8h, v16.8h
+ eor v10.16b, v3.16b, v10.16b
+ add v17.4s, v17.4s, v8.4s
+ ldur q27, [x29, #-160]
+ eor v9.16b, v4.16b, v9.16b
+ add v12.4s, v12.4s, v16.4s
+ rev32 v10.8h, v10.8h
+ eor v11.16b, v17.16b, v11.16b
+ ldur q6, [x29, #-80]
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v31.16b, v12.16b, v31.16b
+ add v1.4s, v1.4s, v10.4s
+ rev32 v11.8h, v11.8h
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #12
+ shl v31.4s, v31.4s, #20
+ eor v13.16b, v1.16b, v13.16b
+ add v5.4s, v5.4s, v11.4s
+ add v14.4s, v14.4s, v22.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v13.4s, #12
+ shl v13.4s, v13.4s, #20
+ eor v8.16b, v5.16b, v8.16b
+ add v14.4s, v14.4s, v9.4s
+ add v2.4s, v2.4s, v27.4s
+ orr v13.16b, v13.16b, v15.16b
+ ushr v15.4s, v8.4s, #12
+ shl v8.4s, v8.4s, #20
+ eor v0.16b, v14.16b, v0.16b
+ add v2.4s, v2.4s, v31.4s
+ add v3.4s, v3.4s, v6.4s
+ orr v8.16b, v8.16b, v15.16b
+ ushr v15.4s, v0.4s, #8
+ shl v0.4s, v0.4s, #24
+ eor v16.16b, v2.16b, v16.16b
+ add v3.4s, v3.4s, v13.4s
+ add v17.4s, v17.4s, v23.4s
+ orr v0.16b, v0.16b, v15.16b
+ ushr v15.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ eor v10.16b, v3.16b, v10.16b
+ add v17.4s, v17.4s, v8.4s
+ orr v16.16b, v16.16b, v15.16b
+ ushr v15.4s, v10.4s, #8
+ shl v10.4s, v10.4s, #24
+ eor v11.16b, v17.16b, v11.16b
+ add v4.4s, v0.4s, v4.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v11.4s, #8
+ shl v11.4s, v11.4s, #24
+ eor v9.16b, v4.16b, v9.16b
+ add v12.4s, v16.4s, v12.4s
+ orr v11.16b, v11.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v31.16b, v12.16b, v31.16b
+ add v1.4s, v10.4s, v1.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #7
+ shl v31.4s, v31.4s, #25
+ eor v13.16b, v1.16b, v13.16b
+ add v5.4s, v11.4s, v5.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v13.4s, #7
+ shl v13.4s, v13.4s, #25
+ eor v8.16b, v5.16b, v8.16b
+ orr v13.16b, v13.16b, v15.16b
+ ushr v15.4s, v8.4s, #7
+ shl v8.4s, v8.4s, #25
+ orr v8.16b, v8.16b, v15.16b
+ add v14.4s, v14.4s, v29.4s
+ add v14.4s, v14.4s, v8.4s
+ add v2.4s, v2.4s, v20.4s
+ mov v28.16b, v7.16b
+ eor v16.16b, v14.16b, v16.16b
+ add v2.4s, v2.4s, v9.4s
+ add v3.4s, v3.4s, v19.4s
+ rev32 v16.8h, v16.8h
+ eor v10.16b, v2.16b, v10.16b
+ add v3.4s, v3.4s, v31.4s
+ add v17.4s, v17.4s, v28.4s
+ add v1.4s, v1.4s, v16.4s
+ rev32 v10.8h, v10.8h
+ eor v11.16b, v3.16b, v11.16b
+ add v17.4s, v17.4s, v13.4s
+ eor v8.16b, v1.16b, v8.16b
+ add v5.4s, v5.4s, v10.4s
+ rev32 v11.8h, v11.8h
+ eor v0.16b, v17.16b, v0.16b
+ ushr v15.4s, v8.4s, #12
+ shl v8.4s, v8.4s, #20
+ eor v9.16b, v5.16b, v9.16b
+ add v4.4s, v4.4s, v11.4s
+ rev32 v0.8h, v0.8h
+ orr v8.16b, v8.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v31.16b, v4.16b, v31.16b
+ add v12.4s, v12.4s, v0.4s
+ add v14.4s, v14.4s, v21.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v31.4s, #12
+ shl v31.4s, v31.4s, #20
+ eor v13.16b, v12.16b, v13.16b
+ add v14.4s, v14.4s, v8.4s
+ add v2.4s, v2.4s, v30.4s
+ orr v31.16b, v31.16b, v15.16b
+ ushr v15.4s, v13.4s, #12
+ shl v13.4s, v13.4s, #20
+ eor v16.16b, v14.16b, v16.16b
+ add v2.4s, v2.4s, v9.4s
+ orr v13.16b, v13.16b, v15.16b
+ ushr v15.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ eor v10.16b, v2.16b, v10.16b
+ orr v16.16b, v16.16b, v15.16b
+ ushr v15.4s, v10.4s, #8
+ shl v10.4s, v10.4s, #24
+ add v3.4s, v3.4s, v18.4s
+ orr v10.16b, v10.16b, v15.16b
+ add v15.4s, v3.4s, v31.4s
+ eor v3.16b, v15.16b, v11.16b
+ ushr v11.4s, v3.4s, #8
+ shl v3.4s, v3.4s, #24
+ orr v11.16b, v3.16b, v11.16b
+ add v3.4s, v17.4s, v6.4s
+ add v17.4s, v3.4s, v13.4s
+ eor v0.16b, v17.16b, v0.16b
+ ushr v3.4s, v0.4s, #8
+ shl v0.4s, v0.4s, #24
+ add v1.4s, v16.4s, v1.4s
+ orr v0.16b, v0.16b, v3.16b
+ eor v3.16b, v1.16b, v8.16b
+ ushr v8.4s, v3.4s, #7
+ shl v3.4s, v3.4s, #25
+ add v5.4s, v10.4s, v5.4s
+ orr v8.16b, v3.16b, v8.16b
+ eor v3.16b, v5.16b, v9.16b
+ add v4.4s, v11.4s, v4.4s
+ ushr v9.4s, v3.4s, #7
+ shl v3.4s, v3.4s, #25
+ eor v31.16b, v4.16b, v31.16b
+ mov v7.16b, v23.16b
+ mov v23.16b, v28.16b
+ mov v28.16b, v6.16b
+ orr v3.16b, v3.16b, v9.16b
+ ushr v9.4s, v31.4s, #7
+ shl v31.4s, v31.4s, #25
+ ldur q6, [x29, #-64]
+ orr v31.16b, v31.16b, v9.16b
+ add v9.4s, v0.4s, v12.4s
+ eor v12.16b, v9.16b, v13.16b
+ ushr v13.4s, v12.4s, #7
+ shl v12.4s, v12.4s, #25
+ orr v12.16b, v12.16b, v13.16b
+ add v13.4s, v14.4s, v6.4s
+ add v13.4s, v13.4s, v3.4s
+ eor v0.16b, v13.16b, v0.16b
+ add v2.4s, v2.4s, v24.4s
+ rev32 v14.8h, v0.8h
+ add v0.4s, v2.4s, v31.4s
+ add v6.4s, v4.4s, v14.4s
+ eor v2.16b, v0.16b, v16.16b
+ eor v3.16b, v6.16b, v3.16b
+ rev32 v16.8h, v2.8h
+ ushr v4.4s, v3.4s, #12
+ shl v3.4s, v3.4s, #20
+ add v2.4s, v9.4s, v16.4s
+ orr v4.16b, v3.16b, v4.16b
+ eor v3.16b, v2.16b, v31.16b
+ ushr v31.4s, v3.4s, #12
+ shl v3.4s, v3.4s, #20
+ orr v3.16b, v3.16b, v31.16b
+ add v31.4s, v15.4s, v22.4s
+ add v31.4s, v31.4s, v12.4s
+ add v17.4s, v17.4s, v7.4s
+ eor v9.16b, v31.16b, v10.16b
+ add v17.4s, v17.4s, v8.4s
+ rev32 v9.8h, v9.8h
+ eor v11.16b, v17.16b, v11.16b
+ add v1.4s, v1.4s, v9.4s
+ rev32 v11.8h, v11.8h
+ eor v10.16b, v1.16b, v12.16b
+ add v5.4s, v5.4s, v11.4s
+ ushr v12.4s, v10.4s, #12
+ shl v10.4s, v10.4s, #20
+ eor v8.16b, v5.16b, v8.16b
+ orr v10.16b, v10.16b, v12.16b
+ ushr v12.4s, v8.4s, #12
+ shl v8.4s, v8.4s, #20
+ orr v8.16b, v8.16b, v12.16b
+ add v12.4s, v13.4s, v27.4s
+ add v12.4s, v12.4s, v4.4s
+ eor v13.16b, v12.16b, v14.16b
+ ldur q14, [x29, #-96]
+ mov v25.16b, v29.16b
+ add v29.4s, v12.4s, v20.4s
+ add v20.4s, v31.4s, v26.4s
+ add v0.4s, v0.4s, v14.4s
+ add v0.4s, v0.4s, v3.4s
+ eor v16.16b, v0.16b, v16.16b
+ add v0.4s, v0.4s, v30.4s
+ ldur q30, [x29, #-112]
+ add v20.4s, v20.4s, v10.4s
+ eor v31.16b, v20.16b, v9.16b
+ add v20.4s, v20.4s, v28.4s
+ add v17.4s, v17.4s, v30.4s
+ add v17.4s, v17.4s, v8.4s
+ eor v9.16b, v17.16b, v11.16b
+ ushr v28.4s, v13.4s, #8
+ shl v11.4s, v13.4s, #24
+ orr v28.16b, v11.16b, v28.16b
+ ushr v11.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ orr v16.16b, v16.16b, v11.16b
+ ushr v11.4s, v31.4s, #8
+ shl v31.4s, v31.4s, #24
+ add v6.4s, v28.4s, v6.4s
+ orr v31.16b, v31.16b, v11.16b
+ ushr v11.4s, v9.4s, #8
+ shl v9.4s, v9.4s, #24
+ add v2.4s, v16.4s, v2.4s
+ eor v4.16b, v6.16b, v4.16b
+ orr v9.16b, v9.16b, v11.16b
+ add v1.4s, v31.4s, v1.4s
+ eor v3.16b, v2.16b, v3.16b
+ ushr v11.4s, v4.4s, #7
+ shl v4.4s, v4.4s, #25
+ add v5.4s, v9.4s, v5.4s
+ eor v10.16b, v1.16b, v10.16b
+ orr v4.16b, v4.16b, v11.16b
+ ushr v11.4s, v3.4s, #7
+ shl v3.4s, v3.4s, #25
+ eor v8.16b, v5.16b, v8.16b
+ orr v3.16b, v3.16b, v11.16b
+ ushr v11.4s, v10.4s, #7
+ shl v10.4s, v10.4s, #25
+ orr v10.16b, v10.16b, v11.16b
+ ushr v11.4s, v8.4s, #7
+ shl v8.4s, v8.4s, #25
+ orr v8.16b, v8.16b, v11.16b
+ add v29.4s, v29.4s, v8.4s
+ eor v16.16b, v29.16b, v16.16b
+ add v0.4s, v0.4s, v4.4s
+ mov v12.16b, v26.16b
+ add v17.4s, v17.4s, v19.4s
+ add v26.4s, v29.4s, v23.4s
+ eor v29.16b, v0.16b, v31.16b
+ add v20.4s, v20.4s, v3.4s
+ rev32 v16.8h, v16.8h
+ stur q18, [x29, #-176]
+ mov v18.16b, v27.16b
+ add v0.4s, v0.4s, v24.4s
+ eor v27.16b, v20.16b, v9.16b
+ add v17.4s, v17.4s, v10.4s
+ rev32 v24.8h, v29.8h
+ add v1.4s, v1.4s, v16.4s
+ add v20.4s, v20.4s, v25.4s
+ eor v25.16b, v17.16b, v28.16b
+ rev32 v27.8h, v27.8h
+ add v5.4s, v5.4s, v24.4s
+ eor v28.16b, v1.16b, v8.16b
+ rev32 v25.8h, v25.8h
+ add v6.4s, v6.4s, v27.4s
+ eor v4.16b, v5.16b, v4.16b
+ ushr v31.4s, v28.4s, #12
+ shl v28.4s, v28.4s, #20
+ add v2.4s, v2.4s, v25.4s
+ eor v3.16b, v6.16b, v3.16b
+ orr v28.16b, v28.16b, v31.16b
+ ushr v31.4s, v4.4s, #12
+ shl v4.4s, v4.4s, #20
+ eor v29.16b, v2.16b, v10.16b
+ orr v4.16b, v4.16b, v31.16b
+ ushr v31.4s, v3.4s, #12
+ shl v3.4s, v3.4s, #20
+ add v26.4s, v26.4s, v28.4s
+ orr v3.16b, v3.16b, v31.16b
+ ushr v31.4s, v29.4s, #12
+ shl v29.4s, v29.4s, #20
+ eor v16.16b, v26.16b, v16.16b
+ add v0.4s, v0.4s, v4.4s
+ add v17.4s, v17.4s, v12.4s
+ orr v29.16b, v29.16b, v31.16b
+ eor v24.16b, v0.16b, v24.16b
+ add v0.4s, v0.4s, v22.4s
+ add v20.4s, v20.4s, v3.4s
+ ushr v22.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ add v23.4s, v26.4s, v21.4s
+ eor v21.16b, v20.16b, v27.16b
+ add v17.4s, v17.4s, v29.4s
+ orr v16.16b, v16.16b, v22.16b
+ ushr v22.4s, v24.4s, #8
+ shl v24.4s, v24.4s, #24
+ eor v25.16b, v17.16b, v25.16b
+ orr v22.16b, v24.16b, v22.16b
+ ushr v24.4s, v21.4s, #8
+ shl v21.4s, v21.4s, #24
+ orr v21.16b, v21.16b, v24.16b
+ ushr v24.4s, v25.4s, #8
+ shl v25.4s, v25.4s, #24
+ add v1.4s, v16.4s, v1.4s
+ orr v24.16b, v25.16b, v24.16b
+ add v5.4s, v22.4s, v5.4s
+ eor v25.16b, v1.16b, v28.16b
+ add v6.4s, v21.4s, v6.4s
+ eor v4.16b, v5.16b, v4.16b
+ ushr v27.4s, v25.4s, #7
+ shl v25.4s, v25.4s, #25
+ add v2.4s, v24.4s, v2.4s
+ eor v3.16b, v6.16b, v3.16b
+ orr v25.16b, v25.16b, v27.16b
+ ushr v27.4s, v4.4s, #7
+ shl v4.4s, v4.4s, #25
+ ldur q19, [x29, #-176]
+ eor v26.16b, v2.16b, v29.16b
+ orr v4.16b, v4.16b, v27.16b
+ ushr v27.4s, v3.4s, #7
+ shl v3.4s, v3.4s, #25
+ orr v3.16b, v3.16b, v27.16b
+ ushr v27.4s, v26.4s, #7
+ shl v26.4s, v26.4s, #25
+ add v20.4s, v20.4s, v18.4s
+ add v17.4s, v17.4s, v30.4s
+ orr v26.16b, v26.16b, v27.16b
+ add v0.4s, v0.4s, v3.4s
+ eor v16.16b, v0.16b, v16.16b
+ add v0.4s, v0.4s, v19.4s
+ add v19.4s, v20.4s, v26.4s
+ add v17.4s, v17.4s, v25.4s
+ eor v20.16b, v19.16b, v22.16b
+ add v7.4s, v19.4s, v7.4s
+ eor v19.16b, v17.16b, v21.16b
+ ldur q21, [x29, #-64]
+ add v23.4s, v23.4s, v4.4s
+ eor v24.16b, v23.16b, v24.16b
+ rev32 v16.8h, v16.8h
+ add v17.4s, v17.4s, v21.4s
+ rev32 v21.8h, v24.8h
+ add v6.4s, v6.4s, v21.4s
+ rev32 v20.8h, v20.8h
+ add v2.4s, v2.4s, v16.4s
+ eor v4.16b, v6.16b, v4.16b
+ rev32 v19.8h, v19.8h
+ add v1.4s, v1.4s, v20.4s
+ eor v3.16b, v2.16b, v3.16b
+ ushr v24.4s, v4.4s, #12
+ shl v4.4s, v4.4s, #20
+ add v5.4s, v5.4s, v19.4s
+ eor v22.16b, v1.16b, v26.16b
+ orr v4.16b, v4.16b, v24.16b
+ ushr v24.4s, v3.4s, #12
+ shl v3.4s, v3.4s, #20
+ add v18.4s, v23.4s, v14.4s
+ eor v23.16b, v5.16b, v25.16b
+ orr v3.16b, v3.16b, v24.16b
+ ushr v24.4s, v22.4s, #12
+ shl v22.4s, v22.4s, #20
+ orr v22.16b, v22.16b, v24.16b
+ ushr v24.4s, v23.4s, #12
+ shl v23.4s, v23.4s, #20
+ orr v23.16b, v23.16b, v24.16b
+ add v18.4s, v18.4s, v4.4s
+ add v0.4s, v0.4s, v3.4s
+ add v24.4s, v17.4s, v23.4s
+ eor v17.16b, v18.16b, v21.16b
+ add v7.4s, v7.4s, v22.4s
+ eor v16.16b, v0.16b, v16.16b
+ ushr v21.4s, v17.4s, #8
+ shl v17.4s, v17.4s, #24
+ eor v20.16b, v7.16b, v20.16b
+ orr v21.16b, v17.16b, v21.16b
+ ushr v17.4s, v16.4s, #8
+ shl v16.4s, v16.4s, #24
+ eor v19.16b, v24.16b, v19.16b
+ orr v16.16b, v16.16b, v17.16b
+ ushr v17.4s, v20.4s, #8
+ shl v20.4s, v20.4s, #24
+ orr v25.16b, v20.16b, v17.16b
+ ushr v17.4s, v19.4s, #8
+ shl v19.4s, v19.4s, #24
+ orr v19.16b, v19.16b, v17.16b
+ add v1.4s, v25.4s, v1.4s
+ eor v22.16b, v1.16b, v22.16b
+ eor v20.16b, v1.16b, v18.16b
+ add v1.4s, v19.4s, v5.4s
+ eor v26.16b, v1.16b, v0.16b
+ add v0.4s, v21.4s, v6.4s
+ eor v5.16b, v1.16b, v23.16b
+ eor v1.16b, v0.16b, v4.16b
+ eor v17.16b, v0.16b, v7.16b
+ add v0.4s, v16.4s, v2.4s
+ eor v2.16b, v0.16b, v3.16b
+ eor v6.16b, v0.16b, v24.16b
+ ushr v0.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ orr v0.16b, v1.16b, v0.16b
+ ushr v1.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ orr v1.16b, v2.16b, v1.16b
+ ushr v2.4s, v22.4s, #7
+ shl v3.4s, v22.4s, #25
+ orr v2.16b, v3.16b, v2.16b
+ ushr v3.4s, v5.4s, #7
+ shl v4.4s, v5.4s, #25
+ orr v3.16b, v4.16b, v3.16b
+ eor v8.16b, v16.16b, v3.16b
+ eor v9.16b, v25.16b, v0.16b
+ eor v31.16b, v1.16b, v19.16b
+ cmp x17, x22
+ eor v15.16b, v2.16b, v21.16b
+ mov w18, w19
+ b.ne .LBB2_4
+.LBB2_7:
+ zip1 v0.4s, v20.4s, v26.4s
+ zip2 v1.4s, v20.4s, v26.4s
+ zip1 v2.4s, v17.4s, v6.4s
+ zip2 v3.4s, v17.4s, v6.4s
+ zip1 v4.4s, v8.4s, v9.4s
+ zip2 v5.4s, v8.4s, v9.4s
+ zip1 v6.4s, v31.4s, v15.4s
+ zip2 v7.4s, v31.4s, v15.4s
+ add x13, x20, #4
+ tst w5, #0x1
+ sub x28, x28, #4
+ zip1 v16.2d, v0.2d, v2.2d
+ zip2 v0.2d, v0.2d, v2.2d
+ zip1 v2.2d, v1.2d, v3.2d
+ zip2 v1.2d, v1.2d, v3.2d
+ zip1 v3.2d, v4.2d, v6.2d
+ zip2 v4.2d, v4.2d, v6.2d
+ zip1 v6.2d, v5.2d, v7.2d
+ zip2 v5.2d, v5.2d, v7.2d
+ add x24, x24, #32
+ csel x20, x13, x20, ne
+ cmp x28, #3
+ stp q16, q3, [x26]
+ stp q0, q4, [x26, #32]
+ stp q2, q6, [x26, #64]
+ stp q1, q5, [x26, #96]
+ add x26, x26, #128
+ b.hi .LBB2_2
+.LBB2_8:
+ cbz x28, .LBB2_16
+ orr w8, w7, w19
+ and x21, x5, #0x1
+ stur w8, [x29, #-64]
+.LBB2_10:
+ ldr x8, [sp, #40]
+ ldr x25, [x24]
+ ldur w4, [x29, #-64]
+ ldp q1, q0, [x8]
+ mov x8, x22
+ stp q1, q0, [x29, #-48]
+.LBB2_11:
+ subs x23, x8, #1
+ b.eq .LBB2_13
+ cbnz x8, .LBB2_14
+ b .LBB2_15
+.LBB2_13:
+ orr w4, w4, w27
+.LBB2_14:
+ sub x0, x29, #48
+ mov w2, #64
+ mov x1, x25
+ mov x3, x20
+ bl zfs_blake3_compress_in_place_sse2
+ add x25, x25, #64
+ mov x8, x23
+ mov w4, w19
+ b .LBB2_11
+.LBB2_15:
+ ldp q0, q1, [x29, #-48]
+ add x20, x20, x21
+ add x24, x24, #8
+ subs x28, x28, #1
+ stp q0, q1, [x26], #32
+ b.ne .LBB2_10
+.LBB2_16:
+ add sp, sp, #384
+ ldp x20, x19, [sp, #144]
+ ldp x22, x21, [sp, #128]
+ ldp x24, x23, [sp, #112]
+ ldp x26, x25, [sp, #96]
+ ldp x28, x27, [sp, #80]
+ ldp x29, x30, [sp, #64]
+ ldp d9, d8, [sp, #48]
+ ldp d11, d10, [sp, #32]
+ ldp d13, d12, [sp, #16]
+ ldp d15, d14, [sp], #160
+ ret
+.Lfunc_end2:
+ .size zfs_blake3_hash_many_sse2, .Lfunc_end2-zfs_blake3_hash_many_sse2
+ .cfi_endproc
+ .section ".note.GNU-stack","",@progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S b/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S
new file mode 100644
index 000000000000..eb6946400b8a
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S
@@ -0,0 +1,2463 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2022 Samuel Neves
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ *
+ * This is converted assembly: SSE4.1 -> ARMv8-A
+ * Used tools: SIMDe https://github.com/simd-everywhere/simde
+ */
+
+#if defined(__aarch64__)
+ .text
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI0_0:
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 5
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 9
+ .byte 14
+ .byte 15
+ .byte 12
+ .byte 13
+.LCPI0_1:
+ .word 1779033703
+ .word 3144134277
+ .word 1013904242
+ .word 2773480762
+.LCPI0_2:
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 12
+.LCPI0_3:
+ .byte 0
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 20
+ .byte 21
+ .byte 22
+ .byte 23
+ .byte 8
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 28
+ .byte 29
+ .byte 30
+ .byte 31
+.LCPI0_4:
+ .byte 0
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 4
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 8
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 28
+ .byte 29
+ .byte 30
+ .byte 31
+ .text
+ .globl zfs_blake3_compress_in_place_sse41
+ .p2align 2
+ .type zfs_blake3_compress_in_place_sse41,@function
+zfs_blake3_compress_in_place_sse41:
+ .cfi_startproc
+ ldp q7, q6, [x0]
+ ldp q17, q18, [x1]
+ add x12, x1, #32
+ ld2 { v4.4s, v5.4s }, [x12]
+ lsr x10, x3, #32
+ fmov s16, w3
+ adrp x13, .LCPI0_0
+ adrp x11, .LCPI0_1
+ and w8, w2, #0xff
+ mov v16.s[1], w10
+ ldr q0, [x13, :lo12:.LCPI0_0]
+ ldr q20, [x11, :lo12:.LCPI0_1]
+ adrp x11, .LCPI0_4
+ and w9, w4, #0xff
+ ldr q2, [x11, :lo12:.LCPI0_4]
+ mov v16.s[2], w8
+ uzp1 v21.4s, v17.4s, v18.4s
+ add v7.4s, v6.4s, v7.4s
+ adrp x12, .LCPI0_3
+ mov v16.s[3], w9
+ uzp2 v18.4s, v17.4s, v18.4s
+ add v7.4s, v7.4s, v21.4s
+ ext v17.16b, v5.16b, v5.16b, #12
+ ldr q3, [x12, :lo12:.LCPI0_3]
+ ext v24.16b, v4.16b, v4.16b, #12
+ eor v16.16b, v7.16b, v16.16b
+ mov v27.16b, v17.16b
+ uzp1 v19.4s, v21.4s, v21.4s
+ ext v25.16b, v21.16b, v21.16b, #12
+ zip2 v28.4s, v18.4s, v17.4s
+ tbl v29.16b, { v16.16b }, v0.16b
+ mov v27.s[1], v24.s[2]
+ zip1 v23.2d, v17.2d, v18.2d
+ ext v19.16b, v19.16b, v21.16b, #8
+ add v22.4s, v29.4s, v20.4s
+ ext v26.16b, v21.16b, v25.16b, #12
+ tbl v20.16b, { v23.16b, v24.16b }, v2.16b
+ zip1 v21.4s, v28.4s, v24.4s
+ zip1 v23.4s, v24.4s, v28.4s
+ uzp2 v19.4s, v19.4s, v18.4s
+ eor v24.16b, v22.16b, v6.16b
+ ext v25.16b, v20.16b, v20.16b, #12
+ ext v6.16b, v23.16b, v21.16b, #8
+ add v7.4s, v7.4s, v18.4s
+ ext v18.16b, v19.16b, v19.16b, #4
+ tbl v16.16b, { v26.16b, v27.16b }, v3.16b
+ uzp1 v21.4s, v20.4s, v25.4s
+ mov v26.16b, v6.16b
+ ext v23.16b, v18.16b, v18.16b, #12
+ mov v26.s[1], v21.s[2]
+ adrp x10, .LCPI0_2
+ ext v25.16b, v18.16b, v23.16b, #12
+ uzp1 v23.4s, v18.4s, v18.4s
+ ldr q1, [x10, :lo12:.LCPI0_2]
+ ext v18.16b, v23.16b, v18.16b, #8
+ ushr v23.4s, v24.4s, #12
+ shl v24.4s, v24.4s, #20
+ orr v23.16b, v24.16b, v23.16b
+ add v7.4s, v7.4s, v23.4s
+ eor v27.16b, v29.16b, v7.16b
+ add v4.4s, v7.4s, v4.4s
+ tbl v7.16b, { v25.16b, v26.16b }, v3.16b
+ tbl v26.16b, { v27.16b }, v1.16b
+ add v22.4s, v22.4s, v26.4s
+ uzp2 v18.4s, v18.4s, v16.4s
+ eor v23.16b, v23.16b, v22.16b
+ ext v5.16b, v18.16b, v18.16b, #4
+ ushr v27.4s, v23.4s, #7
+ shl v23.4s, v23.4s, #25
+ uzp1 v25.4s, v5.4s, v5.4s
+ orr v23.16b, v23.16b, v27.16b
+ ext v28.16b, v4.16b, v4.16b, #12
+ ext v4.16b, v25.16b, v5.16b, #8
+ ext v25.16b, v26.16b, v26.16b, #8
+ add v26.4s, v28.4s, v23.4s
+ eor v25.16b, v26.16b, v25.16b
+ ext v22.16b, v22.16b, v22.16b, #4
+ tbl v25.16b, { v25.16b }, v0.16b
+ add v22.4s, v22.4s, v25.4s
+ eor v23.16b, v23.16b, v22.16b
+ add v17.4s, v26.4s, v17.4s
+ ushr v26.4s, v23.4s, #12
+ shl v23.4s, v23.4s, #20
+ orr v23.16b, v23.16b, v26.16b
+ add v17.4s, v17.4s, v23.4s
+ eor v25.16b, v25.16b, v17.16b
+ add v17.4s, v17.4s, v19.4s
+ tbl v19.16b, { v25.16b }, v1.16b
+ add v22.4s, v22.4s, v19.4s
+ eor v23.16b, v23.16b, v22.16b
+ ushr v25.4s, v23.4s, #7
+ shl v23.4s, v23.4s, #25
+ ext v17.16b, v17.16b, v17.16b, #4
+ orr v23.16b, v23.16b, v25.16b
+ ext v19.16b, v19.16b, v19.16b, #8
+ add v17.4s, v17.4s, v23.4s
+ eor v19.16b, v17.16b, v19.16b
+ ext v22.16b, v22.16b, v22.16b, #12
+ tbl v19.16b, { v19.16b }, v0.16b
+ add v22.4s, v22.4s, v19.4s
+ eor v23.16b, v23.16b, v22.16b
+ ushr v25.4s, v23.4s, #12
+ shl v23.4s, v23.4s, #20
+ add v17.4s, v17.4s, v16.4s
+ orr v23.16b, v23.16b, v25.16b
+ add v17.4s, v17.4s, v23.4s
+ ext v25.16b, v17.16b, v17.16b, #12
+ eor v17.16b, v19.16b, v17.16b
+ tbl v17.16b, { v17.16b }, v1.16b
+ add v19.4s, v22.4s, v17.4s
+ eor v22.16b, v23.16b, v19.16b
+ add v25.4s, v25.4s, v21.4s
+ zip1 v20.2d, v6.2d, v16.2d
+ ushr v23.4s, v22.4s, #7
+ shl v22.4s, v22.4s, #25
+ zip2 v24.4s, v16.4s, v6.4s
+ tbl v26.16b, { v20.16b, v21.16b }, v2.16b
+ orr v22.16b, v22.16b, v23.16b
+ zip1 v16.4s, v24.4s, v21.4s
+ zip1 v20.4s, v21.4s, v24.4s
+ ext v21.16b, v26.16b, v26.16b, #12
+ ext v17.16b, v17.16b, v17.16b, #8
+ add v25.4s, v25.4s, v22.4s
+ ext v16.16b, v20.16b, v16.16b, #8
+ uzp1 v21.4s, v26.4s, v21.4s
+ eor v26.16b, v25.16b, v17.16b
+ ext v19.16b, v19.16b, v19.16b, #4
+ tbl v26.16b, { v26.16b }, v0.16b
+ mov v29.16b, v16.16b
+ add v19.4s, v19.4s, v26.4s
+ ext v27.16b, v5.16b, v5.16b, #12
+ mov v29.s[1], v21.s[2]
+ eor v22.16b, v22.16b, v19.16b
+ ext v28.16b, v5.16b, v27.16b, #12
+ ushr v27.4s, v22.4s, #12
+ shl v22.4s, v22.4s, #20
+ add v6.4s, v25.4s, v6.4s
+ orr v22.16b, v22.16b, v27.16b
+ add v6.4s, v6.4s, v22.4s
+ eor v26.16b, v26.16b, v6.16b
+ add v6.4s, v6.4s, v18.4s
+ tbl v18.16b, { v26.16b }, v1.16b
+ add v19.4s, v19.4s, v18.4s
+ eor v22.16b, v22.16b, v19.16b
+ ushr v26.4s, v22.4s, #7
+ shl v22.4s, v22.4s, #25
+ ext v6.16b, v6.16b, v6.16b, #4
+ orr v22.16b, v22.16b, v26.16b
+ ext v18.16b, v18.16b, v18.16b, #8
+ add v6.4s, v6.4s, v22.4s
+ eor v18.16b, v6.16b, v18.16b
+ ext v19.16b, v19.16b, v19.16b, #12
+ tbl v18.16b, { v18.16b }, v0.16b
+ add v19.4s, v19.4s, v18.4s
+ eor v22.16b, v22.16b, v19.16b
+ ushr v26.4s, v22.4s, #12
+ shl v22.4s, v22.4s, #20
+ add v6.4s, v6.4s, v7.4s
+ orr v22.16b, v22.16b, v26.16b
+ add v6.4s, v6.4s, v22.4s
+ ext v26.16b, v6.16b, v6.16b, #12
+ eor v6.16b, v18.16b, v6.16b
+ uzp2 v4.4s, v4.4s, v7.4s
+ zip2 v25.4s, v7.4s, v16.4s
+ add v26.4s, v26.4s, v21.4s
+ zip1 v20.2d, v16.2d, v7.2d
+ tbl v6.16b, { v6.16b }, v1.16b
+ ext v24.16b, v4.16b, v4.16b, #4
+ tbl v27.16b, { v20.16b, v21.16b }, v2.16b
+ zip1 v7.4s, v25.4s, v21.4s
+ zip1 v20.4s, v21.4s, v25.4s
+ add v18.4s, v19.4s, v6.4s
+ uzp1 v5.4s, v24.4s, v24.4s
+ ext v21.16b, v27.16b, v27.16b, #12
+ ext v7.16b, v20.16b, v7.16b, #8
+ eor v19.16b, v22.16b, v18.16b
+ ext v5.16b, v5.16b, v24.16b, #8
+ tbl v17.16b, { v28.16b, v29.16b }, v3.16b
+ uzp1 v21.4s, v27.4s, v21.4s
+ mov v28.16b, v7.16b
+ ushr v22.4s, v19.4s, #7
+ shl v19.4s, v19.4s, #25
+ ext v23.16b, v24.16b, v24.16b, #12
+ uzp2 v5.4s, v5.4s, v17.4s
+ mov v28.s[1], v21.s[2]
+ orr v19.16b, v19.16b, v22.16b
+ ext v27.16b, v24.16b, v23.16b, #12
+ ext v23.16b, v5.16b, v5.16b, #4
+ ext v6.16b, v6.16b, v6.16b, #8
+ ext v25.16b, v18.16b, v18.16b, #4
+ add v18.4s, v26.4s, v19.4s
+ uzp1 v24.4s, v23.4s, v23.4s
+ eor v6.16b, v18.16b, v6.16b
+ ext v24.16b, v24.16b, v23.16b, #8
+ add v16.4s, v18.4s, v16.4s
+ tbl v18.16b, { v27.16b, v28.16b }, v3.16b
+ tbl v27.16b, { v6.16b }, v0.16b
+ uzp2 v6.4s, v24.4s, v18.4s
+ add v24.4s, v25.4s, v27.4s
+ eor v19.16b, v19.16b, v24.16b
+ ushr v25.4s, v19.4s, #12
+ shl v19.4s, v19.4s, #20
+ orr v19.16b, v19.16b, v25.16b
+ add v16.4s, v16.4s, v19.4s
+ eor v25.16b, v27.16b, v16.16b
+ add v4.4s, v16.4s, v4.4s
+ tbl v16.16b, { v25.16b }, v1.16b
+ add v24.4s, v24.4s, v16.4s
+ eor v19.16b, v19.16b, v24.16b
+ ushr v25.4s, v19.4s, #7
+ shl v19.4s, v19.4s, #25
+ ext v4.16b, v4.16b, v4.16b, #4
+ orr v19.16b, v19.16b, v25.16b
+ ext v16.16b, v16.16b, v16.16b, #8
+ add v4.4s, v4.4s, v19.4s
+ eor v16.16b, v4.16b, v16.16b
+ ext v24.16b, v24.16b, v24.16b, #12
+ tbl v25.16b, { v16.16b }, v0.16b
+ add v24.4s, v24.4s, v25.4s
+ eor v16.16b, v19.16b, v24.16b
+ ushr v19.4s, v16.4s, #12
+ shl v16.4s, v16.4s, #20
+ add v4.4s, v4.4s, v17.4s
+ orr v19.16b, v16.16b, v19.16b
+ add v27.4s, v4.4s, v19.4s
+ eor v25.16b, v25.16b, v27.16b
+ tbl v25.16b, { v25.16b }, v1.16b
+ add v24.4s, v24.4s, v25.4s
+ zip2 v26.4s, v17.4s, v7.4s
+ ext v4.16b, v27.16b, v27.16b, #12
+ eor v19.16b, v19.16b, v24.16b
+ add v28.4s, v4.4s, v21.4s
+ zip1 v20.2d, v7.2d, v17.2d
+ zip1 v4.4s, v26.4s, v21.4s
+ zip1 v17.4s, v21.4s, v26.4s
+ ushr v26.4s, v19.4s, #7
+ shl v19.4s, v19.4s, #25
+ orr v19.16b, v19.16b, v26.16b
+ ext v25.16b, v25.16b, v25.16b, #8
+ add v27.4s, v28.4s, v19.4s
+ eor v25.16b, v27.16b, v25.16b
+ ext v24.16b, v24.16b, v24.16b, #4
+ tbl v25.16b, { v25.16b }, v0.16b
+ add v24.4s, v24.4s, v25.4s
+ eor v19.16b, v19.16b, v24.16b
+ add v7.4s, v27.4s, v7.4s
+ ushr v27.4s, v19.4s, #12
+ shl v19.4s, v19.4s, #20
+ orr v19.16b, v19.16b, v27.16b
+ add v7.4s, v7.4s, v19.4s
+ eor v25.16b, v25.16b, v7.16b
+ add v5.4s, v7.4s, v5.4s
+ tbl v7.16b, { v25.16b }, v1.16b
+ add v24.4s, v24.4s, v7.4s
+ eor v19.16b, v19.16b, v24.16b
+ ushr v25.4s, v19.4s, #7
+ shl v19.4s, v19.4s, #25
+ ext v5.16b, v5.16b, v5.16b, #4
+ orr v19.16b, v19.16b, v25.16b
+ ext v7.16b, v7.16b, v7.16b, #8
+ add v5.4s, v5.4s, v19.4s
+ eor v7.16b, v5.16b, v7.16b
+ ext v24.16b, v24.16b, v24.16b, #12
+ tbl v7.16b, { v7.16b }, v0.16b
+ add v24.4s, v24.4s, v7.4s
+ eor v19.16b, v19.16b, v24.16b
+ ushr v25.4s, v19.4s, #12
+ shl v19.4s, v19.4s, #20
+ tbl v16.16b, { v20.16b, v21.16b }, v2.16b
+ add v5.4s, v5.4s, v18.4s
+ orr v19.16b, v19.16b, v25.16b
+ ext v20.16b, v16.16b, v16.16b, #12
+ ext v4.16b, v17.16b, v4.16b, #8
+ add v5.4s, v5.4s, v19.4s
+ uzp1 v21.4s, v16.4s, v20.4s
+ mov v17.16b, v4.16b
+ ext v25.16b, v5.16b, v5.16b, #12
+ mov v17.s[1], v21.s[2]
+ add v25.4s, v25.4s, v21.4s
+ zip1 v20.2d, v4.2d, v18.2d
+ ext v22.16b, v23.16b, v23.16b, #12
+ zip2 v26.4s, v18.4s, v4.4s
+ tbl v18.16b, { v20.16b, v21.16b }, v2.16b
+ eor v5.16b, v7.16b, v5.16b
+ ext v16.16b, v23.16b, v22.16b, #12
+ ext v22.16b, v6.16b, v6.16b, #4
+ zip1 v27.4s, v26.4s, v21.4s
+ zip1 v20.4s, v21.4s, v26.4s
+ ext v21.16b, v18.16b, v18.16b, #12
+ tbl v5.16b, { v5.16b }, v1.16b
+ ext v20.16b, v20.16b, v27.16b, #8
+ uzp1 v27.4s, v18.4s, v21.4s
+ uzp1 v18.4s, v22.4s, v22.4s
+ add v21.4s, v24.4s, v5.4s
+ ext v18.16b, v18.16b, v22.16b, #8
+ eor v19.16b, v19.16b, v21.16b
+ tbl v7.16b, { v16.16b, v17.16b }, v3.16b
+ uzp2 v18.4s, v18.4s, v17.4s
+ zip2 v16.4s, v16.4s, v20.4s
+ ushr v17.4s, v19.4s, #7
+ shl v19.4s, v19.4s, #25
+ orr v17.16b, v19.16b, v17.16b
+ ext v5.16b, v5.16b, v5.16b, #8
+ add v19.4s, v25.4s, v17.4s
+ eor v5.16b, v19.16b, v5.16b
+ ext v21.16b, v21.16b, v21.16b, #4
+ tbl v5.16b, { v5.16b }, v0.16b
+ add v4.4s, v19.4s, v4.4s
+ add v19.4s, v21.4s, v5.4s
+ eor v17.16b, v17.16b, v19.16b
+ ushr v21.4s, v17.4s, #12
+ shl v17.4s, v17.4s, #20
+ orr v17.16b, v17.16b, v21.16b
+ add v4.4s, v4.4s, v17.4s
+ eor v5.16b, v5.16b, v4.16b
+ tbl v5.16b, { v5.16b }, v1.16b
+ add v4.4s, v4.4s, v6.4s
+ add v6.4s, v19.4s, v5.4s
+ eor v17.16b, v17.16b, v6.16b
+ ushr v19.4s, v17.4s, #7
+ shl v17.4s, v17.4s, #25
+ ext v4.16b, v4.16b, v4.16b, #4
+ orr v17.16b, v17.16b, v19.16b
+ ext v5.16b, v5.16b, v5.16b, #8
+ add v4.4s, v4.4s, v17.4s
+ eor v5.16b, v4.16b, v5.16b
+ ext v6.16b, v6.16b, v6.16b, #12
+ tbl v5.16b, { v5.16b }, v0.16b
+ add v6.4s, v6.4s, v5.4s
+ eor v17.16b, v17.16b, v6.16b
+ ushr v19.4s, v17.4s, #12
+ shl v17.4s, v17.4s, #20
+ add v4.4s, v4.4s, v7.4s
+ orr v17.16b, v17.16b, v19.16b
+ add v4.4s, v4.4s, v17.4s
+ eor v5.16b, v5.16b, v4.16b
+ tbl v5.16b, { v5.16b }, v1.16b
+ mov v29.16b, v20.16b
+ ext v4.16b, v4.16b, v4.16b, #12
+ add v6.4s, v6.4s, v5.4s
+ mov v29.s[1], v27.s[2]
+ add v4.4s, v4.4s, v27.4s
+ zip1 v26.2d, v20.2d, v7.2d
+ zip1 v7.4s, v16.4s, v27.4s
+ zip1 v16.4s, v27.4s, v16.4s
+ eor v17.16b, v17.16b, v6.16b
+ ext v7.16b, v16.16b, v7.16b, #8
+ ushr v16.4s, v17.4s, #7
+ shl v17.4s, v17.4s, #25
+ orr v16.16b, v17.16b, v16.16b
+ ext v5.16b, v5.16b, v5.16b, #8
+ add v4.4s, v4.4s, v16.4s
+ eor v5.16b, v4.16b, v5.16b
+ ext v6.16b, v6.16b, v6.16b, #4
+ tbl v5.16b, { v5.16b }, v0.16b
+ add v6.4s, v6.4s, v5.4s
+ eor v16.16b, v16.16b, v6.16b
+ ushr v17.4s, v16.4s, #12
+ shl v16.4s, v16.4s, #20
+ add v4.4s, v4.4s, v20.4s
+ orr v16.16b, v16.16b, v17.16b
+ add v4.4s, v4.4s, v16.4s
+ eor v5.16b, v5.16b, v4.16b
+ tbl v5.16b, { v5.16b }, v1.16b
+ add v6.4s, v6.4s, v5.4s
+ eor v16.16b, v16.16b, v6.16b
+ add v4.4s, v4.4s, v18.4s
+ ushr v17.4s, v16.4s, #7
+ shl v16.4s, v16.4s, #25
+ ext v23.16b, v22.16b, v22.16b, #12
+ ext v4.16b, v4.16b, v4.16b, #4
+ orr v16.16b, v16.16b, v17.16b
+ ext v28.16b, v22.16b, v23.16b, #12
+ ext v5.16b, v5.16b, v5.16b, #8
+ add v4.4s, v16.4s, v4.4s
+ tbl v3.16b, { v28.16b, v29.16b }, v3.16b
+ eor v5.16b, v4.16b, v5.16b
+ ext v6.16b, v6.16b, v6.16b, #12
+ add v3.4s, v4.4s, v3.4s
+ tbl v4.16b, { v5.16b }, v0.16b
+ add v5.4s, v6.4s, v4.4s
+ eor v6.16b, v16.16b, v5.16b
+ ushr v16.4s, v6.4s, #12
+ shl v6.4s, v6.4s, #20
+ orr v6.16b, v6.16b, v16.16b
+ tbl v2.16b, { v26.16b, v27.16b }, v2.16b
+ add v3.4s, v3.4s, v6.4s
+ ext v19.16b, v2.16b, v2.16b, #12
+ eor v4.16b, v4.16b, v3.16b
+ uzp1 v2.4s, v2.4s, v19.4s
+ ext v3.16b, v3.16b, v3.16b, #12
+ tbl v4.16b, { v4.16b }, v1.16b
+ add v2.4s, v3.4s, v2.4s
+ add v3.4s, v5.4s, v4.4s
+ eor v5.16b, v6.16b, v3.16b
+ ushr v6.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ orr v5.16b, v5.16b, v6.16b
+ ext v4.16b, v4.16b, v4.16b, #8
+ add v2.4s, v2.4s, v5.4s
+ eor v4.16b, v2.16b, v4.16b
+ ext v3.16b, v3.16b, v3.16b, #4
+ tbl v0.16b, { v4.16b }, v0.16b
+ add v3.4s, v3.4s, v0.4s
+ eor v4.16b, v5.16b, v3.16b
+ ushr v5.4s, v4.4s, #12
+ shl v4.4s, v4.4s, #20
+ add v2.4s, v2.4s, v7.4s
+ orr v4.16b, v4.16b, v5.16b
+ add v2.4s, v2.4s, v4.4s
+ eor v0.16b, v0.16b, v2.16b
+ tbl v0.16b, { v0.16b }, v1.16b
+ add v1.4s, v3.4s, v0.4s
+ eor v3.16b, v4.16b, v1.16b
+ ext v2.16b, v2.16b, v2.16b, #4
+ ext v1.16b, v1.16b, v1.16b, #12
+ ushr v4.4s, v3.4s, #7
+ shl v3.4s, v3.4s, #25
+ ext v0.16b, v0.16b, v0.16b, #8
+ eor v1.16b, v2.16b, v1.16b
+ orr v2.16b, v3.16b, v4.16b
+ eor v0.16b, v2.16b, v0.16b
+ stp q1, q0, [x0]
+ ret
+.Lfunc_end0:
+ .size zfs_blake3_compress_in_place_sse41, .Lfunc_end0-zfs_blake3_compress_in_place_sse41
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI1_0:
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 5
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 9
+ .byte 14
+ .byte 15
+ .byte 12
+ .byte 13
+.LCPI1_1:
+ .word 1779033703
+ .word 3144134277
+ .word 1013904242
+ .word 2773480762
+.LCPI1_2:
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 12
+.LCPI1_3:
+ .byte 0
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 20
+ .byte 21
+ .byte 22
+ .byte 23
+ .byte 8
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 28
+ .byte 29
+ .byte 30
+ .byte 31
+.LCPI1_4:
+ .byte 0
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 4
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 8
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 28
+ .byte 29
+ .byte 30
+ .byte 31
+ .text
+ .globl zfs_blake3_compress_xof_sse41
+ .p2align 2
+ .type zfs_blake3_compress_xof_sse41,@function
+zfs_blake3_compress_xof_sse41:
+ .cfi_startproc
+ ldp q7, q6, [x0]
+ ldp q17, q18, [x1]
+ add x12, x1, #32
+ ld2 { v4.4s, v5.4s }, [x12]
+ lsr x10, x3, #32
+ fmov s16, w3
+ adrp x13, .LCPI1_0
+ adrp x11, .LCPI1_1
+ and w8, w2, #0xff
+ mov v16.s[1], w10
+ ldr q0, [x13, :lo12:.LCPI1_0]
+ ldr q20, [x11, :lo12:.LCPI1_1]
+ adrp x11, .LCPI1_4
+ and w9, w4, #0xff
+ ldr q2, [x11, :lo12:.LCPI1_4]
+ mov v16.s[2], w8
+ uzp1 v21.4s, v17.4s, v18.4s
+ add v7.4s, v6.4s, v7.4s
+ adrp x12, .LCPI1_3
+ mov v16.s[3], w9
+ uzp2 v18.4s, v17.4s, v18.4s
+ add v7.4s, v7.4s, v21.4s
+ ext v17.16b, v5.16b, v5.16b, #12
+ ldr q3, [x12, :lo12:.LCPI1_3]
+ ext v24.16b, v4.16b, v4.16b, #12
+ eor v16.16b, v7.16b, v16.16b
+ mov v27.16b, v17.16b
+ uzp1 v19.4s, v21.4s, v21.4s
+ ext v25.16b, v21.16b, v21.16b, #12
+ zip2 v28.4s, v18.4s, v17.4s
+ tbl v29.16b, { v16.16b }, v0.16b
+ mov v27.s[1], v24.s[2]
+ zip1 v23.2d, v17.2d, v18.2d
+ ext v19.16b, v19.16b, v21.16b, #8
+ add v22.4s, v29.4s, v20.4s
+ ext v26.16b, v21.16b, v25.16b, #12
+ tbl v20.16b, { v23.16b, v24.16b }, v2.16b
+ zip1 v21.4s, v28.4s, v24.4s
+ zip1 v23.4s, v24.4s, v28.4s
+ uzp2 v19.4s, v19.4s, v18.4s
+ eor v24.16b, v22.16b, v6.16b
+ ext v25.16b, v20.16b, v20.16b, #12
+ ext v6.16b, v23.16b, v21.16b, #8
+ add v7.4s, v7.4s, v18.4s
+ ext v18.16b, v19.16b, v19.16b, #4
+ tbl v16.16b, { v26.16b, v27.16b }, v3.16b
+ uzp1 v21.4s, v20.4s, v25.4s
+ mov v26.16b, v6.16b
+ ext v23.16b, v18.16b, v18.16b, #12
+ mov v26.s[1], v21.s[2]
+ adrp x10, .LCPI1_2
+ ext v25.16b, v18.16b, v23.16b, #12
+ uzp1 v23.4s, v18.4s, v18.4s
+ ldr q1, [x10, :lo12:.LCPI1_2]
+ ext v18.16b, v23.16b, v18.16b, #8
+ ushr v23.4s, v24.4s, #12
+ shl v24.4s, v24.4s, #20
+ orr v23.16b, v24.16b, v23.16b
+ add v7.4s, v7.4s, v23.4s
+ eor v27.16b, v29.16b, v7.16b
+ add v4.4s, v7.4s, v4.4s
+ tbl v7.16b, { v25.16b, v26.16b }, v3.16b
+ tbl v26.16b, { v27.16b }, v1.16b
+ add v22.4s, v22.4s, v26.4s
+ uzp2 v18.4s, v18.4s, v16.4s
+ eor v23.16b, v23.16b, v22.16b
+ ext v5.16b, v18.16b, v18.16b, #4
+ ushr v27.4s, v23.4s, #7
+ shl v23.4s, v23.4s, #25
+ uzp1 v25.4s, v5.4s, v5.4s
+ orr v23.16b, v23.16b, v27.16b
+ ext v28.16b, v4.16b, v4.16b, #12
+ ext v4.16b, v25.16b, v5.16b, #8
+ ext v25.16b, v26.16b, v26.16b, #8
+ add v26.4s, v28.4s, v23.4s
+ eor v25.16b, v26.16b, v25.16b
+ ext v22.16b, v22.16b, v22.16b, #4
+ tbl v25.16b, { v25.16b }, v0.16b
+ add v22.4s, v22.4s, v25.4s
+ eor v23.16b, v23.16b, v22.16b
+ add v17.4s, v26.4s, v17.4s
+ ushr v26.4s, v23.4s, #12
+ shl v23.4s, v23.4s, #20
+ orr v23.16b, v23.16b, v26.16b
+ add v17.4s, v17.4s, v23.4s
+ eor v25.16b, v25.16b, v17.16b
+ add v17.4s, v17.4s, v19.4s
+ tbl v19.16b, { v25.16b }, v1.16b
+ add v22.4s, v22.4s, v19.4s
+ eor v23.16b, v23.16b, v22.16b
+ ushr v25.4s, v23.4s, #7
+ shl v23.4s, v23.4s, #25
+ ext v17.16b, v17.16b, v17.16b, #4
+ orr v23.16b, v23.16b, v25.16b
+ ext v19.16b, v19.16b, v19.16b, #8
+ add v17.4s, v17.4s, v23.4s
+ eor v19.16b, v17.16b, v19.16b
+ ext v22.16b, v22.16b, v22.16b, #12
+ tbl v19.16b, { v19.16b }, v0.16b
+ add v22.4s, v22.4s, v19.4s
+ eor v23.16b, v23.16b, v22.16b
+ ushr v25.4s, v23.4s, #12
+ shl v23.4s, v23.4s, #20
+ add v17.4s, v17.4s, v16.4s
+ orr v23.16b, v23.16b, v25.16b
+ add v17.4s, v17.4s, v23.4s
+ ext v25.16b, v17.16b, v17.16b, #12
+ eor v17.16b, v19.16b, v17.16b
+ tbl v17.16b, { v17.16b }, v1.16b
+ add v19.4s, v22.4s, v17.4s
+ eor v22.16b, v23.16b, v19.16b
+ add v25.4s, v25.4s, v21.4s
+ zip1 v20.2d, v6.2d, v16.2d
+ ushr v23.4s, v22.4s, #7
+ shl v22.4s, v22.4s, #25
+ zip2 v24.4s, v16.4s, v6.4s
+ tbl v26.16b, { v20.16b, v21.16b }, v2.16b
+ orr v22.16b, v22.16b, v23.16b
+ zip1 v16.4s, v24.4s, v21.4s
+ zip1 v20.4s, v21.4s, v24.4s
+ ext v21.16b, v26.16b, v26.16b, #12
+ ext v17.16b, v17.16b, v17.16b, #8
+ add v25.4s, v25.4s, v22.4s
+ ext v16.16b, v20.16b, v16.16b, #8
+ uzp1 v21.4s, v26.4s, v21.4s
+ eor v26.16b, v25.16b, v17.16b
+ ext v19.16b, v19.16b, v19.16b, #4
+ tbl v26.16b, { v26.16b }, v0.16b
+ mov v29.16b, v16.16b
+ add v19.4s, v19.4s, v26.4s
+ ext v27.16b, v5.16b, v5.16b, #12
+ mov v29.s[1], v21.s[2]
+ eor v22.16b, v22.16b, v19.16b
+ ext v28.16b, v5.16b, v27.16b, #12
+ ushr v27.4s, v22.4s, #12
+ shl v22.4s, v22.4s, #20
+ add v6.4s, v25.4s, v6.4s
+ orr v22.16b, v22.16b, v27.16b
+ add v6.4s, v6.4s, v22.4s
+ eor v26.16b, v26.16b, v6.16b
+ add v6.4s, v6.4s, v18.4s
+ tbl v18.16b, { v26.16b }, v1.16b
+ add v19.4s, v19.4s, v18.4s
+ eor v22.16b, v22.16b, v19.16b
+ ushr v26.4s, v22.4s, #7
+ shl v22.4s, v22.4s, #25
+ ext v6.16b, v6.16b, v6.16b, #4
+ orr v22.16b, v22.16b, v26.16b
+ ext v18.16b, v18.16b, v18.16b, #8
+ add v6.4s, v6.4s, v22.4s
+ eor v18.16b, v6.16b, v18.16b
+ ext v19.16b, v19.16b, v19.16b, #12
+ tbl v18.16b, { v18.16b }, v0.16b
+ add v19.4s, v19.4s, v18.4s
+ eor v22.16b, v22.16b, v19.16b
+ ushr v26.4s, v22.4s, #12
+ shl v22.4s, v22.4s, #20
+ add v6.4s, v6.4s, v7.4s
+ orr v22.16b, v22.16b, v26.16b
+ add v6.4s, v6.4s, v22.4s
+ ext v26.16b, v6.16b, v6.16b, #12
+ eor v6.16b, v18.16b, v6.16b
+ uzp2 v4.4s, v4.4s, v7.4s
+ zip2 v25.4s, v7.4s, v16.4s
+ add v26.4s, v26.4s, v21.4s
+ zip1 v20.2d, v16.2d, v7.2d
+ tbl v6.16b, { v6.16b }, v1.16b
+ ext v24.16b, v4.16b, v4.16b, #4
+ tbl v27.16b, { v20.16b, v21.16b }, v2.16b
+ zip1 v7.4s, v25.4s, v21.4s
+ zip1 v20.4s, v21.4s, v25.4s
+ add v18.4s, v19.4s, v6.4s
+ uzp1 v5.4s, v24.4s, v24.4s
+ ext v21.16b, v27.16b, v27.16b, #12
+ ext v7.16b, v20.16b, v7.16b, #8
+ eor v19.16b, v22.16b, v18.16b
+ ext v5.16b, v5.16b, v24.16b, #8
+ tbl v17.16b, { v28.16b, v29.16b }, v3.16b
+ uzp1 v21.4s, v27.4s, v21.4s
+ mov v28.16b, v7.16b
+ ushr v22.4s, v19.4s, #7
+ shl v19.4s, v19.4s, #25
+ ext v23.16b, v24.16b, v24.16b, #12
+ uzp2 v5.4s, v5.4s, v17.4s
+ mov v28.s[1], v21.s[2]
+ orr v19.16b, v19.16b, v22.16b
+ ext v27.16b, v24.16b, v23.16b, #12
+ ext v23.16b, v5.16b, v5.16b, #4
+ ext v6.16b, v6.16b, v6.16b, #8
+ ext v25.16b, v18.16b, v18.16b, #4
+ add v18.4s, v26.4s, v19.4s
+ uzp1 v24.4s, v23.4s, v23.4s
+ eor v6.16b, v18.16b, v6.16b
+ ext v24.16b, v24.16b, v23.16b, #8
+ add v16.4s, v18.4s, v16.4s
+ tbl v18.16b, { v27.16b, v28.16b }, v3.16b
+ tbl v27.16b, { v6.16b }, v0.16b
+ uzp2 v6.4s, v24.4s, v18.4s
+ add v24.4s, v25.4s, v27.4s
+ eor v19.16b, v19.16b, v24.16b
+ ushr v25.4s, v19.4s, #12
+ shl v19.4s, v19.4s, #20
+ orr v19.16b, v19.16b, v25.16b
+ add v16.4s, v16.4s, v19.4s
+ eor v25.16b, v27.16b, v16.16b
+ add v4.4s, v16.4s, v4.4s
+ tbl v16.16b, { v25.16b }, v1.16b
+ add v24.4s, v24.4s, v16.4s
+ eor v19.16b, v19.16b, v24.16b
+ ushr v25.4s, v19.4s, #7
+ shl v19.4s, v19.4s, #25
+ ext v4.16b, v4.16b, v4.16b, #4
+ orr v19.16b, v19.16b, v25.16b
+ ext v16.16b, v16.16b, v16.16b, #8
+ add v4.4s, v4.4s, v19.4s
+ eor v16.16b, v4.16b, v16.16b
+ ext v24.16b, v24.16b, v24.16b, #12
+ tbl v25.16b, { v16.16b }, v0.16b
+ add v24.4s, v24.4s, v25.4s
+ eor v16.16b, v19.16b, v24.16b
+ ushr v19.4s, v16.4s, #12
+ shl v16.4s, v16.4s, #20
+ add v4.4s, v4.4s, v17.4s
+ orr v19.16b, v16.16b, v19.16b
+ add v27.4s, v4.4s, v19.4s
+ eor v25.16b, v25.16b, v27.16b
+ tbl v25.16b, { v25.16b }, v1.16b
+ add v24.4s, v24.4s, v25.4s
+ zip2 v26.4s, v17.4s, v7.4s
+ ext v4.16b, v27.16b, v27.16b, #12
+ eor v19.16b, v19.16b, v24.16b
+ add v28.4s, v4.4s, v21.4s
+ zip1 v20.2d, v7.2d, v17.2d
+ zip1 v4.4s, v26.4s, v21.4s
+ zip1 v17.4s, v21.4s, v26.4s
+ ushr v26.4s, v19.4s, #7
+ shl v19.4s, v19.4s, #25
+ orr v19.16b, v19.16b, v26.16b
+ ext v25.16b, v25.16b, v25.16b, #8
+ add v27.4s, v28.4s, v19.4s
+ eor v25.16b, v27.16b, v25.16b
+ ext v24.16b, v24.16b, v24.16b, #4
+ tbl v25.16b, { v25.16b }, v0.16b
+ add v24.4s, v24.4s, v25.4s
+ eor v19.16b, v19.16b, v24.16b
+ add v7.4s, v27.4s, v7.4s
+ ushr v27.4s, v19.4s, #12
+ shl v19.4s, v19.4s, #20
+ orr v19.16b, v19.16b, v27.16b
+ add v7.4s, v7.4s, v19.4s
+ eor v25.16b, v25.16b, v7.16b
+ add v5.4s, v7.4s, v5.4s
+ tbl v7.16b, { v25.16b }, v1.16b
+ add v24.4s, v24.4s, v7.4s
+ eor v19.16b, v19.16b, v24.16b
+ ushr v25.4s, v19.4s, #7
+ shl v19.4s, v19.4s, #25
+ ext v5.16b, v5.16b, v5.16b, #4
+ orr v19.16b, v19.16b, v25.16b
+ ext v7.16b, v7.16b, v7.16b, #8
+ add v5.4s, v5.4s, v19.4s
+ eor v7.16b, v5.16b, v7.16b
+ ext v24.16b, v24.16b, v24.16b, #12
+ tbl v7.16b, { v7.16b }, v0.16b
+ add v24.4s, v24.4s, v7.4s
+ eor v19.16b, v19.16b, v24.16b
+ ushr v25.4s, v19.4s, #12
+ shl v19.4s, v19.4s, #20
+ tbl v16.16b, { v20.16b, v21.16b }, v2.16b
+ add v5.4s, v5.4s, v18.4s
+ orr v19.16b, v19.16b, v25.16b
+ ext v20.16b, v16.16b, v16.16b, #12
+ ext v4.16b, v17.16b, v4.16b, #8
+ add v5.4s, v5.4s, v19.4s
+ uzp1 v21.4s, v16.4s, v20.4s
+ mov v17.16b, v4.16b
+ ext v25.16b, v5.16b, v5.16b, #12
+ mov v17.s[1], v21.s[2]
+ add v25.4s, v25.4s, v21.4s
+ zip1 v20.2d, v4.2d, v18.2d
+ ext v22.16b, v23.16b, v23.16b, #12
+ zip2 v26.4s, v18.4s, v4.4s
+ tbl v18.16b, { v20.16b, v21.16b }, v2.16b
+ eor v5.16b, v7.16b, v5.16b
+ ext v16.16b, v23.16b, v22.16b, #12
+ ext v22.16b, v6.16b, v6.16b, #4
+ zip1 v27.4s, v26.4s, v21.4s
+ zip1 v20.4s, v21.4s, v26.4s
+ ext v21.16b, v18.16b, v18.16b, #12
+ tbl v5.16b, { v5.16b }, v1.16b
+ ext v20.16b, v20.16b, v27.16b, #8
+ uzp1 v27.4s, v18.4s, v21.4s
+ uzp1 v18.4s, v22.4s, v22.4s
+ add v21.4s, v24.4s, v5.4s
+ ext v18.16b, v18.16b, v22.16b, #8
+ eor v19.16b, v19.16b, v21.16b
+ tbl v7.16b, { v16.16b, v17.16b }, v3.16b
+ uzp2 v18.4s, v18.4s, v17.4s
+ zip2 v16.4s, v16.4s, v20.4s
+ ushr v17.4s, v19.4s, #7
+ shl v19.4s, v19.4s, #25
+ orr v17.16b, v19.16b, v17.16b
+ ext v5.16b, v5.16b, v5.16b, #8
+ add v19.4s, v25.4s, v17.4s
+ eor v5.16b, v19.16b, v5.16b
+ ext v21.16b, v21.16b, v21.16b, #4
+ tbl v5.16b, { v5.16b }, v0.16b
+ add v4.4s, v19.4s, v4.4s
+ add v19.4s, v21.4s, v5.4s
+ eor v17.16b, v17.16b, v19.16b
+ ushr v21.4s, v17.4s, #12
+ shl v17.4s, v17.4s, #20
+ orr v17.16b, v17.16b, v21.16b
+ add v4.4s, v4.4s, v17.4s
+ eor v5.16b, v5.16b, v4.16b
+ tbl v5.16b, { v5.16b }, v1.16b
+ add v4.4s, v4.4s, v6.4s
+ add v6.4s, v19.4s, v5.4s
+ eor v17.16b, v17.16b, v6.16b
+ ushr v19.4s, v17.4s, #7
+ shl v17.4s, v17.4s, #25
+ ext v4.16b, v4.16b, v4.16b, #4
+ orr v17.16b, v17.16b, v19.16b
+ ext v5.16b, v5.16b, v5.16b, #8
+ add v4.4s, v4.4s, v17.4s
+ eor v5.16b, v4.16b, v5.16b
+ ext v6.16b, v6.16b, v6.16b, #12
+ tbl v5.16b, { v5.16b }, v0.16b
+ add v6.4s, v6.4s, v5.4s
+ eor v17.16b, v17.16b, v6.16b
+ ushr v19.4s, v17.4s, #12
+ shl v17.4s, v17.4s, #20
+ add v4.4s, v4.4s, v7.4s
+ orr v17.16b, v17.16b, v19.16b
+ add v4.4s, v4.4s, v17.4s
+ eor v5.16b, v5.16b, v4.16b
+ tbl v5.16b, { v5.16b }, v1.16b
+ mov v29.16b, v20.16b
+ ext v4.16b, v4.16b, v4.16b, #12
+ add v6.4s, v6.4s, v5.4s
+ mov v29.s[1], v27.s[2]
+ add v4.4s, v4.4s, v27.4s
+ zip1 v26.2d, v20.2d, v7.2d
+ zip1 v7.4s, v16.4s, v27.4s
+ zip1 v16.4s, v27.4s, v16.4s
+ eor v17.16b, v17.16b, v6.16b
+ ext v7.16b, v16.16b, v7.16b, #8
+ ushr v16.4s, v17.4s, #7
+ shl v17.4s, v17.4s, #25
+ orr v16.16b, v17.16b, v16.16b
+ ext v5.16b, v5.16b, v5.16b, #8
+ add v4.4s, v4.4s, v16.4s
+ eor v5.16b, v4.16b, v5.16b
+ ext v6.16b, v6.16b, v6.16b, #4
+ tbl v5.16b, { v5.16b }, v0.16b
+ add v6.4s, v6.4s, v5.4s
+ eor v16.16b, v16.16b, v6.16b
+ ushr v17.4s, v16.4s, #12
+ shl v16.4s, v16.4s, #20
+ add v4.4s, v4.4s, v20.4s
+ orr v16.16b, v16.16b, v17.16b
+ add v4.4s, v4.4s, v16.4s
+ eor v5.16b, v5.16b, v4.16b
+ tbl v5.16b, { v5.16b }, v1.16b
+ add v6.4s, v6.4s, v5.4s
+ eor v16.16b, v16.16b, v6.16b
+ add v4.4s, v4.4s, v18.4s
+ ushr v17.4s, v16.4s, #7
+ shl v16.4s, v16.4s, #25
+ ext v23.16b, v22.16b, v22.16b, #12
+ ext v4.16b, v4.16b, v4.16b, #4
+ orr v16.16b, v16.16b, v17.16b
+ ext v28.16b, v22.16b, v23.16b, #12
+ ext v5.16b, v5.16b, v5.16b, #8
+ add v4.4s, v16.4s, v4.4s
+ tbl v3.16b, { v28.16b, v29.16b }, v3.16b
+ eor v5.16b, v4.16b, v5.16b
+ ext v6.16b, v6.16b, v6.16b, #12
+ add v3.4s, v4.4s, v3.4s
+ tbl v4.16b, { v5.16b }, v0.16b
+ add v5.4s, v6.4s, v4.4s
+ eor v6.16b, v16.16b, v5.16b
+ ushr v16.4s, v6.4s, #12
+ shl v6.4s, v6.4s, #20
+ orr v6.16b, v6.16b, v16.16b
+ tbl v2.16b, { v26.16b, v27.16b }, v2.16b
+ add v3.4s, v3.4s, v6.4s
+ ext v19.16b, v2.16b, v2.16b, #12
+ eor v4.16b, v4.16b, v3.16b
+ uzp1 v2.4s, v2.4s, v19.4s
+ ext v3.16b, v3.16b, v3.16b, #12
+ tbl v4.16b, { v4.16b }, v1.16b
+ add v2.4s, v3.4s, v2.4s
+ add v3.4s, v5.4s, v4.4s
+ eor v5.16b, v6.16b, v3.16b
+ ushr v6.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ orr v5.16b, v5.16b, v6.16b
+ ext v4.16b, v4.16b, v4.16b, #8
+ add v2.4s, v2.4s, v5.4s
+ eor v4.16b, v2.16b, v4.16b
+ ext v3.16b, v3.16b, v3.16b, #4
+ tbl v0.16b, { v4.16b }, v0.16b
+ add v3.4s, v3.4s, v0.4s
+ eor v4.16b, v5.16b, v3.16b
+ ushr v5.4s, v4.4s, #12
+ shl v4.4s, v4.4s, #20
+ add v2.4s, v2.4s, v7.4s
+ orr v4.16b, v4.16b, v5.16b
+ add v2.4s, v2.4s, v4.4s
+ eor v0.16b, v0.16b, v2.16b
+ tbl v0.16b, { v0.16b }, v1.16b
+ add v1.4s, v3.4s, v0.4s
+ eor v3.16b, v4.16b, v1.16b
+ ushr v4.4s, v3.4s, #7
+ shl v3.4s, v3.4s, #25
+ ext v2.16b, v2.16b, v2.16b, #4
+ ext v0.16b, v0.16b, v0.16b, #8
+ ext v1.16b, v1.16b, v1.16b, #12
+ orr v3.16b, v3.16b, v4.16b
+ eor v2.16b, v2.16b, v1.16b
+ eor v3.16b, v3.16b, v0.16b
+ stp q2, q3, [x5]
+ ldr q2, [x0]
+ eor v1.16b, v2.16b, v1.16b
+ str q1, [x5, #32]
+ ldr q1, [x0, #16]
+ eor v0.16b, v1.16b, v0.16b
+ str q0, [x5, #48]
+ ret
+.Lfunc_end1:
+ .size zfs_blake3_compress_xof_sse41, .Lfunc_end1-zfs_blake3_compress_xof_sse41
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI2_0:
+ .word 0
+ .word 1
+ .word 2
+ .word 3
+.LCPI2_1:
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 5
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 9
+ .byte 14
+ .byte 15
+ .byte 12
+ .byte 13
+.LCPI2_2:
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 12
+ .text
+ .globl zfs_blake3_hash_many_sse41
+ .p2align 2
+ .type zfs_blake3_hash_many_sse41,@function
+zfs_blake3_hash_many_sse41:
+ .cfi_startproc
+ stp d15, d14, [sp, #-160]!
+ stp d13, d12, [sp, #16]
+ stp d11, d10, [sp, #32]
+ stp d9, d8, [sp, #48]
+ stp x29, x30, [sp, #64]
+ stp x28, x27, [sp, #80]
+ stp x26, x25, [sp, #96]
+ stp x24, x23, [sp, #112]
+ stp x22, x21, [sp, #128]
+ stp x20, x19, [sp, #144]
+ mov x29, sp
+ sub sp, sp, #448
+ .cfi_def_cfa w29, 160
+ .cfi_offset w19, -8
+ .cfi_offset w20, -16
+ .cfi_offset w21, -24
+ .cfi_offset w22, -32
+ .cfi_offset w23, -40
+ .cfi_offset w24, -48
+ .cfi_offset w25, -56
+ .cfi_offset w26, -64
+ .cfi_offset w27, -72
+ .cfi_offset w28, -80
+ .cfi_offset w30, -88
+ .cfi_offset w29, -96
+ .cfi_offset b8, -104
+ .cfi_offset b9, -112
+ .cfi_offset b10, -120
+ .cfi_offset b11, -128
+ .cfi_offset b12, -136
+ .cfi_offset b13, -144
+ .cfi_offset b14, -152
+ .cfi_offset b15, -160
+ ldr x26, [x29, #168]
+ ldrb w27, [x29, #160]
+ mov w19, w6
+ mov x20, x4
+ mov x22, x2
+ mov x28, x1
+ cmp x1, #4
+ mov x24, x0
+ str x3, [sp, #40]
+ b.lo .LBB2_8
+ adrp x11, .LCPI2_0
+ ldr q0, [x11, :lo12:.LCPI2_0]
+ sbfx w13, w5, #0, #1
+ dup v1.4s, w13
+ mov w10, #58983
+ mov w11, #44677
+ mov w12, #62322
+ and v0.16b, v1.16b, v0.16b
+ mov w13, #62778
+ orr w8, w7, w19
+ adrp x9, .LCPI2_1
+ movk w10, #27145, lsl #16
+ movk w11, #47975, lsl #16
+ movk w12, #15470, lsl #16
+ movk w13, #42319, lsl #16
+ str q0, [sp, #16]
+ orr v0.4s, #128, lsl #24
+ adrp x14, .LCPI2_2
+ str q0, [sp]
+.LBB2_2:
+ ldr x2, [sp, #40]
+ mov x15, x2
+ ld1r { v7.4s }, [x15], #4
+ add x16, x2, #8
+ add x17, x2, #12
+ add x18, x2, #16
+ add x0, x2, #20
+ add x3, x2, #24
+ add x2, x2, #28
+ ld1r { v6.4s }, [x16]
+ ld1r { v17.4s }, [x17]
+ ld1r { v10.4s }, [x18]
+ ld1r { v11.4s }, [x0]
+ ld1r { v19.4s }, [x3]
+ ld1r { v18.4s }, [x15]
+ ld1r { v16.4s }, [x2]
+ cbz x22, .LBB2_7
+ ldr q1, [sp, #16]
+ dup v0.4s, w20
+ ldp x15, x16, [x24]
+ ldp x17, x18, [x24, #16]
+ add v1.4s, v0.4s, v1.4s
+ movi v0.4s, #128, lsl #24
+ str q1, [sp, #64]
+ eor v0.16b, v1.16b, v0.16b
+ ldr q1, [sp]
+ lsr x2, x20, #32
+ mov x0, xzr
+ mov w6, w8
+ cmgt v0.4s, v1.4s, v0.4s
+ dup v1.4s, w2
+ sub v0.4s, v1.4s, v0.4s
+ str q0, [sp, #48]
+.LBB2_4:
+ mov w4, #16
+ stp q16, q17, [sp, #192]
+ bfi x4, x0, #6, #58
+ ldr q1, [x15, x4]
+ ldr q3, [x16, x4]
+ ldr q2, [x17, x4]
+ ldr q4, [x18, x4]
+ mov w4, #32
+ bfi x4, x0, #6, #58
+ ldr q5, [x15, x4]
+ ldr q20, [x16, x4]
+ ldr q21, [x17, x4]
+ ldr q22, [x18, x4]
+ mov w4, #48
+ lsl x3, x0, #6
+ bfi x4, x0, #6, #58
+ add x0, x0, #1
+ ldr q0, [x15, x3]
+ ldr q23, [x16, x3]
+ ldr q16, [x17, x3]
+ ldr q17, [x18, x3]
+ cmp x0, x22
+ ldr q25, [x15, x4]
+ ldr q14, [x16, x4]
+ ldr q28, [x17, x4]
+ ldr q31, [x18, x4]
+ csel w4, w27, wzr, eq
+ orr w4, w4, w6
+ mov x2, xzr
+ and w6, w4, #0xff
+ add x3, x3, #256
+.LBB2_5:
+ ldr x4, [x24, x2]
+ add x2, x2, #8
+ cmp x2, #32
+ add x4, x4, x3
+ prfm pldl1keep, [x4]
+ b.ne .LBB2_5
+ zip1 v29.4s, v0.4s, v23.4s
+ zip2 v23.4s, v0.4s, v23.4s
+ zip1 v0.4s, v16.4s, v17.4s
+ zip2 v24.4s, v16.4s, v17.4s
+ zip1 v9.4s, v1.4s, v3.4s
+ zip2 v26.4s, v1.4s, v3.4s
+ zip1 v27.4s, v2.4s, v4.4s
+ zip2 v17.4s, v2.4s, v4.4s
+ zip1 v12.4s, v21.4s, v22.4s
+ zip2 v13.4s, v21.4s, v22.4s
+ add v2.4s, v7.4s, v10.4s
+ add v1.4s, v18.4s, v11.4s
+ ext v7.16b, v0.16b, v29.16b, #8
+ ext v22.16b, v24.16b, v23.16b, #8
+ zip1 v30.4s, v5.4s, v20.4s
+ zip2 v20.4s, v5.4s, v20.4s
+ stp q1, q2, [sp, #112]
+ ext v2.16b, v29.16b, v7.16b, #8
+ mov v29.d[1], v0.d[0]
+ ext v18.16b, v23.16b, v22.16b, #8
+ mov v23.d[1], v24.d[0]
+ zip1 v21.4s, v25.4s, v14.4s
+ zip2 v4.4s, v25.4s, v14.4s
+ zip1 v14.4s, v28.4s, v31.4s
+ zip2 v15.4s, v28.4s, v31.4s
+ add v8.4s, v6.4s, v19.4s
+ ext v28.16b, v27.16b, v9.16b, #8
+ ext v31.16b, v17.16b, v26.16b, #8
+ stur q2, [x29, #-208]
+ mov v7.16b, v29.16b
+ ext v0.16b, v12.16b, v30.16b, #8
+ stp q23, q29, [x29, #-80]
+ mov v2.16b, v19.16b
+ ext v19.16b, v13.16b, v20.16b, #8
+ mov v29.16b, v9.16b
+ ext v25.16b, v9.16b, v28.16b, #8
+ mov v29.d[1], v27.d[0]
+ ext v24.16b, v26.16b, v31.16b, #8
+ mov v26.d[1], v17.d[0]
+ ext v17.16b, v15.16b, v4.16b, #8
+ ext v27.16b, v30.16b, v0.16b, #8
+ ext v0.16b, v20.16b, v19.16b, #8
+ stp q0, q25, [sp, #80]
+ ext v0.16b, v4.16b, v17.16b, #8
+ str q0, [sp, #224]
+ ldr q0, [sp, #128]
+ mov v6.16b, v23.16b
+ mov v22.16b, v4.16b
+ ldr q16, [x9, :lo12:.LCPI2_1]
+ add v17.4s, v0.4s, v7.4s
+ ldr q0, [sp, #112]
+ mov v30.d[1], v12.d[0]
+ add v7.4s, v8.4s, v29.4s
+ mov v20.d[1], v13.d[0]
+ add v4.4s, v0.4s, v6.4s
+ ldr q0, [sp, #64]
+ dup v3.4s, w12
+ ext v28.16b, v14.16b, v21.16b, #8
+ dup v1.4s, w10
+ eor v19.16b, v17.16b, v0.16b
+ ldr q0, [sp, #48]
+ ext v23.16b, v21.16b, v28.16b, #8
+ mov v21.d[1], v14.d[0]
+ tbl v14.16b, { v19.16b }, v16.16b
+ eor v12.16b, v4.16b, v0.16b
+ movi v0.4s, #64
+ eor v13.16b, v7.16b, v0.16b
+ tbl v13.16b, { v13.16b }, v16.16b
+ add v6.4s, v13.4s, v3.4s
+ dup v5.4s, w11
+ tbl v12.16b, { v12.16b }, v16.16b
+ add v1.4s, v14.4s, v1.4s
+ eor v9.16b, v6.16b, v2.16b
+ ldp q2, q0, [sp, #192]
+ add v5.4s, v12.4s, v5.4s
+ eor v19.16b, v1.16b, v10.16b
+ eor v10.16b, v5.16b, v11.16b
+ ushr v11.4s, v19.4s, #12
+ shl v19.4s, v19.4s, #20
+ orr v11.16b, v19.16b, v11.16b
+ ushr v19.4s, v10.4s, #12
+ shl v10.4s, v10.4s, #20
+ mov v22.d[1], v15.d[0]
+ orr v10.16b, v10.16b, v19.16b
+ ushr v19.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ add v15.4s, v0.4s, v2.4s
+ orr v9.16b, v9.16b, v19.16b
+ dup v19.4s, w6
+ add v15.4s, v15.4s, v26.4s
+ eor v19.16b, v15.16b, v19.16b
+ tbl v3.16b, { v19.16b }, v16.16b
+ dup v19.4s, w13
+ add v8.4s, v3.4s, v19.4s
+ ldur q31, [x29, #-208]
+ eor v19.16b, v8.16b, v2.16b
+ ushr v0.4s, v19.4s, #12
+ shl v19.4s, v19.4s, #20
+ orr v2.16b, v19.16b, v0.16b
+ ldr q19, [x14, :lo12:.LCPI2_2]
+ add v17.4s, v17.4s, v31.4s
+ add v17.4s, v17.4s, v11.4s
+ eor v14.16b, v14.16b, v17.16b
+ tbl v14.16b, { v14.16b }, v19.16b
+ add v1.4s, v1.4s, v14.4s
+ eor v11.16b, v1.16b, v11.16b
+ add v4.4s, v4.4s, v18.4s
+ ushr v0.4s, v11.4s, #7
+ shl v11.4s, v11.4s, #25
+ add v4.4s, v4.4s, v10.4s
+ orr v0.16b, v11.16b, v0.16b
+ eor v11.16b, v12.16b, v4.16b
+ tbl v11.16b, { v11.16b }, v19.16b
+ add v5.4s, v5.4s, v11.4s
+ eor v10.16b, v5.16b, v10.16b
+ add v7.4s, v7.4s, v25.4s
+ ushr v12.4s, v10.4s, #7
+ shl v10.4s, v10.4s, #25
+ add v7.4s, v7.4s, v9.4s
+ orr v10.16b, v10.16b, v12.16b
+ eor v12.16b, v13.16b, v7.16b
+ tbl v12.16b, { v12.16b }, v19.16b
+ add v6.4s, v6.4s, v12.4s
+ eor v9.16b, v6.16b, v9.16b
+ ushr v13.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ orr v9.16b, v9.16b, v13.16b
+ add v13.4s, v15.4s, v24.4s
+ add v13.4s, v13.4s, v2.4s
+ eor v3.16b, v3.16b, v13.16b
+ tbl v3.16b, { v3.16b }, v19.16b
+ add v8.4s, v8.4s, v3.4s
+ eor v2.16b, v8.16b, v2.16b
+ add v17.4s, v17.4s, v30.4s
+ ushr v15.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ add v17.4s, v17.4s, v10.4s
+ add v4.4s, v4.4s, v20.4s
+ orr v2.16b, v2.16b, v15.16b
+ eor v3.16b, v3.16b, v17.16b
+ add v4.4s, v4.4s, v9.4s
+ add v7.4s, v7.4s, v21.4s
+ tbl v3.16b, { v3.16b }, v16.16b
+ eor v14.16b, v14.16b, v4.16b
+ add v7.4s, v7.4s, v2.4s
+ add v13.4s, v13.4s, v22.4s
+ mov v28.16b, v26.16b
+ stur q26, [x29, #-112]
+ mov v26.16b, v18.16b
+ mov v18.16b, v24.16b
+ stur q24, [x29, #-160]
+ add v6.4s, v6.4s, v3.4s
+ mov v24.16b, v20.16b
+ tbl v14.16b, { v14.16b }, v16.16b
+ eor v11.16b, v11.16b, v7.16b
+ add v13.4s, v13.4s, v0.4s
+ ldr q20, [sp, #80]
+ eor v10.16b, v6.16b, v10.16b
+ add v8.4s, v8.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v16.16b
+ eor v12.16b, v12.16b, v13.16b
+ stp q30, q22, [x29, #-192]
+ ushr v15.4s, v10.4s, #12
+ shl v10.4s, v10.4s, #20
+ eor v9.16b, v8.16b, v9.16b
+ add v1.4s, v1.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v16.16b
+ mov v30.16b, v27.16b
+ add v17.4s, v17.4s, v27.4s
+ ldr q27, [sp, #224]
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v2.16b, v1.16b, v2.16b
+ add v5.4s, v5.4s, v12.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v0.16b, v5.16b, v0.16b
+ add v17.4s, v17.4s, v10.4s
+ add v4.4s, v4.4s, v20.4s
+ orr v2.16b, v2.16b, v15.16b
+ ushr v15.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v3.16b, v3.16b, v17.16b
+ add v4.4s, v4.4s, v9.4s
+ add v7.4s, v7.4s, v23.4s
+ orr v0.16b, v0.16b, v15.16b
+ tbl v3.16b, { v3.16b }, v19.16b
+ eor v14.16b, v14.16b, v4.16b
+ add v7.4s, v7.4s, v2.4s
+ add v13.4s, v13.4s, v27.4s
+ add v6.4s, v6.4s, v3.4s
+ tbl v14.16b, { v14.16b }, v19.16b
+ eor v11.16b, v11.16b, v7.16b
+ add v13.4s, v13.4s, v0.4s
+ eor v10.16b, v6.16b, v10.16b
+ add v8.4s, v8.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v19.16b
+ eor v12.16b, v12.16b, v13.16b
+ stur q21, [x29, #-144]
+ ushr v15.4s, v10.4s, #7
+ shl v10.4s, v10.4s, #25
+ eor v9.16b, v8.16b, v9.16b
+ add v1.4s, v1.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v19.16b
+ ldur q21, [x29, #-80]
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v2.16b, v1.16b, v2.16b
+ add v5.4s, v5.4s, v12.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ eor v0.16b, v5.16b, v0.16b
+ orr v2.16b, v2.16b, v15.16b
+ ushr v15.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ orr v0.16b, v0.16b, v15.16b
+ add v17.4s, v17.4s, v21.4s
+ add v17.4s, v17.4s, v0.4s
+ add v4.4s, v4.4s, v26.4s
+ eor v14.16b, v14.16b, v17.16b
+ add v4.4s, v4.4s, v10.4s
+ add v7.4s, v7.4s, v18.4s
+ tbl v14.16b, { v14.16b }, v16.16b
+ eor v11.16b, v11.16b, v4.16b
+ add v7.4s, v7.4s, v9.4s
+ add v13.4s, v13.4s, v29.4s
+ add v1.4s, v1.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v16.16b
+ eor v12.16b, v12.16b, v7.16b
+ add v13.4s, v13.4s, v2.4s
+ eor v0.16b, v0.16b, v1.16b
+ add v5.4s, v5.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v16.16b
+ eor v3.16b, v3.16b, v13.16b
+ ldur q22, [x29, #-64]
+ ushr v15.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v10.16b, v5.16b, v10.16b
+ add v6.4s, v6.4s, v12.4s
+ tbl v3.16b, { v3.16b }, v16.16b
+ orr v0.16b, v0.16b, v15.16b
+ ushr v15.4s, v10.4s, #12
+ shl v10.4s, v10.4s, #20
+ eor v9.16b, v6.16b, v9.16b
+ add v8.4s, v8.4s, v3.4s
+ add v17.4s, v17.4s, v28.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v2.16b, v8.16b, v2.16b
+ add v17.4s, v17.4s, v0.4s
+ add v4.4s, v4.4s, v24.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v14.16b, v14.16b, v17.16b
+ add v4.4s, v4.4s, v10.4s
+ add v7.4s, v7.4s, v22.4s
+ orr v2.16b, v2.16b, v15.16b
+ tbl v14.16b, { v14.16b }, v19.16b
+ eor v11.16b, v11.16b, v4.16b
+ add v7.4s, v7.4s, v9.4s
+ add v13.4s, v13.4s, v23.4s
+ add v1.4s, v1.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v19.16b
+ eor v12.16b, v12.16b, v7.16b
+ add v13.4s, v13.4s, v2.4s
+ eor v0.16b, v0.16b, v1.16b
+ add v5.4s, v5.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v19.16b
+ eor v3.16b, v3.16b, v13.16b
+ ldur q22, [x29, #-144]
+ ushr v15.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ eor v10.16b, v5.16b, v10.16b
+ add v6.4s, v6.4s, v12.4s
+ tbl v3.16b, { v3.16b }, v19.16b
+ orr v0.16b, v0.16b, v15.16b
+ ushr v15.4s, v10.4s, #7
+ shl v10.4s, v10.4s, #25
+ eor v9.16b, v6.16b, v9.16b
+ add v8.4s, v8.4s, v3.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v2.16b, v8.16b, v2.16b
+ add v17.4s, v17.4s, v31.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ add v17.4s, v17.4s, v10.4s
+ add v4.4s, v4.4s, v22.4s
+ orr v2.16b, v2.16b, v15.16b
+ eor v3.16b, v3.16b, v17.16b
+ add v4.4s, v4.4s, v9.4s
+ add v7.4s, v7.4s, v30.4s
+ tbl v3.16b, { v3.16b }, v16.16b
+ eor v14.16b, v14.16b, v4.16b
+ add v7.4s, v7.4s, v2.4s
+ add v13.4s, v13.4s, v27.4s
+ add v6.4s, v6.4s, v3.4s
+ tbl v14.16b, { v14.16b }, v16.16b
+ eor v11.16b, v11.16b, v7.16b
+ add v13.4s, v13.4s, v0.4s
+ ldr q27, [sp, #96]
+ mov v21.16b, v26.16b
+ stur q26, [x29, #-96]
+ mov v28.16b, v31.16b
+ eor v10.16b, v6.16b, v10.16b
+ add v8.4s, v8.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v16.16b
+ eor v12.16b, v12.16b, v13.16b
+ ldp q31, q26, [x29, #-192]
+ ushr v15.4s, v10.4s, #12
+ shl v10.4s, v10.4s, #20
+ eor v9.16b, v8.16b, v9.16b
+ add v1.4s, v1.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v16.16b
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v2.16b, v1.16b, v2.16b
+ add v5.4s, v5.4s, v12.4s
+ add v17.4s, v17.4s, v20.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v0.16b, v5.16b, v0.16b
+ add v17.4s, v17.4s, v10.4s
+ add v4.4s, v4.4s, v27.4s
+ orr v2.16b, v2.16b, v15.16b
+ ushr v15.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v3.16b, v3.16b, v17.16b
+ add v4.4s, v4.4s, v9.4s
+ add v7.4s, v7.4s, v26.4s
+ orr v0.16b, v0.16b, v15.16b
+ tbl v3.16b, { v3.16b }, v19.16b
+ eor v14.16b, v14.16b, v4.16b
+ add v7.4s, v7.4s, v2.4s
+ add v13.4s, v13.4s, v31.4s
+ add v6.4s, v6.4s, v3.4s
+ tbl v14.16b, { v14.16b }, v19.16b
+ eor v11.16b, v11.16b, v7.16b
+ add v13.4s, v13.4s, v0.4s
+ eor v10.16b, v6.16b, v10.16b
+ add v8.4s, v8.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v19.16b
+ eor v12.16b, v12.16b, v13.16b
+ ushr v15.4s, v10.4s, #7
+ shl v10.4s, v10.4s, #25
+ eor v9.16b, v8.16b, v9.16b
+ add v1.4s, v1.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v19.16b
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v2.16b, v1.16b, v2.16b
+ add v5.4s, v5.4s, v12.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ eor v0.16b, v5.16b, v0.16b
+ mov v18.16b, v24.16b
+ mov v24.16b, v20.16b
+ orr v2.16b, v2.16b, v15.16b
+ ushr v15.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ ldur q20, [x29, #-160]
+ orr v0.16b, v0.16b, v15.16b
+ add v17.4s, v17.4s, v21.4s
+ add v17.4s, v17.4s, v0.4s
+ add v4.4s, v4.4s, v18.4s
+ eor v14.16b, v14.16b, v17.16b
+ add v4.4s, v4.4s, v10.4s
+ add v7.4s, v7.4s, v23.4s
+ tbl v14.16b, { v14.16b }, v16.16b
+ eor v11.16b, v11.16b, v4.16b
+ add v7.4s, v7.4s, v9.4s
+ add v13.4s, v13.4s, v20.4s
+ add v1.4s, v1.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v16.16b
+ eor v12.16b, v12.16b, v7.16b
+ add v13.4s, v13.4s, v2.4s
+ eor v0.16b, v0.16b, v1.16b
+ add v5.4s, v5.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v16.16b
+ eor v3.16b, v3.16b, v13.16b
+ ldur q25, [x29, #-80]
+ ushr v15.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v10.16b, v5.16b, v10.16b
+ add v6.4s, v6.4s, v12.4s
+ tbl v3.16b, { v3.16b }, v16.16b
+ orr v0.16b, v0.16b, v15.16b
+ ushr v15.4s, v10.4s, #12
+ shl v10.4s, v10.4s, #20
+ eor v9.16b, v6.16b, v9.16b
+ add v8.4s, v8.4s, v3.4s
+ add v17.4s, v17.4s, v29.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v2.16b, v8.16b, v2.16b
+ add v17.4s, v17.4s, v0.4s
+ add v4.4s, v4.4s, v22.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v14.16b, v14.16b, v17.16b
+ add v4.4s, v4.4s, v10.4s
+ add v7.4s, v7.4s, v25.4s
+ orr v2.16b, v2.16b, v15.16b
+ tbl v14.16b, { v14.16b }, v19.16b
+ eor v11.16b, v11.16b, v4.16b
+ add v7.4s, v7.4s, v9.4s
+ add v13.4s, v13.4s, v26.4s
+ add v1.4s, v1.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v19.16b
+ eor v12.16b, v12.16b, v7.16b
+ add v13.4s, v13.4s, v2.4s
+ ldur q25, [x29, #-112]
+ eor v0.16b, v0.16b, v1.16b
+ add v5.4s, v5.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v19.16b
+ eor v3.16b, v3.16b, v13.16b
+ ushr v15.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ eor v10.16b, v5.16b, v10.16b
+ add v6.4s, v6.4s, v12.4s
+ tbl v3.16b, { v3.16b }, v19.16b
+ orr v0.16b, v0.16b, v15.16b
+ ushr v15.4s, v10.4s, #7
+ shl v10.4s, v10.4s, #25
+ eor v9.16b, v6.16b, v9.16b
+ add v8.4s, v8.4s, v3.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v2.16b, v8.16b, v2.16b
+ add v17.4s, v17.4s, v25.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ add v17.4s, v17.4s, v10.4s
+ add v4.4s, v4.4s, v30.4s
+ orr v2.16b, v2.16b, v15.16b
+ eor v3.16b, v3.16b, v17.16b
+ add v4.4s, v4.4s, v9.4s
+ add v7.4s, v7.4s, v24.4s
+ tbl v3.16b, { v3.16b }, v16.16b
+ eor v14.16b, v14.16b, v4.16b
+ add v7.4s, v7.4s, v2.4s
+ add v13.4s, v13.4s, v31.4s
+ add v6.4s, v6.4s, v3.4s
+ tbl v14.16b, { v14.16b }, v16.16b
+ eor v11.16b, v11.16b, v7.16b
+ add v13.4s, v13.4s, v0.4s
+ ldur q25, [x29, #-64]
+ eor v10.16b, v6.16b, v10.16b
+ add v8.4s, v8.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v16.16b
+ eor v12.16b, v12.16b, v13.16b
+ ldr q31, [sp, #224]
+ ushr v15.4s, v10.4s, #12
+ shl v10.4s, v10.4s, #20
+ eor v9.16b, v8.16b, v9.16b
+ add v1.4s, v1.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v16.16b
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v2.16b, v1.16b, v2.16b
+ add v5.4s, v5.4s, v12.4s
+ add v17.4s, v17.4s, v27.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v0.16b, v5.16b, v0.16b
+ add v17.4s, v17.4s, v10.4s
+ add v4.4s, v4.4s, v25.4s
+ orr v2.16b, v2.16b, v15.16b
+ ushr v15.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v3.16b, v3.16b, v17.16b
+ add v4.4s, v4.4s, v9.4s
+ add v7.4s, v7.4s, v31.4s
+ orr v0.16b, v0.16b, v15.16b
+ tbl v3.16b, { v3.16b }, v19.16b
+ eor v14.16b, v14.16b, v4.16b
+ add v7.4s, v7.4s, v2.4s
+ add v13.4s, v13.4s, v28.4s
+ add v6.4s, v6.4s, v3.4s
+ tbl v14.16b, { v14.16b }, v19.16b
+ eor v11.16b, v11.16b, v7.16b
+ add v13.4s, v13.4s, v0.4s
+ eor v10.16b, v6.16b, v10.16b
+ add v8.4s, v8.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v19.16b
+ eor v12.16b, v12.16b, v13.16b
+ ushr v15.4s, v10.4s, #7
+ shl v10.4s, v10.4s, #25
+ eor v9.16b, v8.16b, v9.16b
+ add v1.4s, v1.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v19.16b
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v2.16b, v1.16b, v2.16b
+ add v5.4s, v5.4s, v12.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ eor v0.16b, v5.16b, v0.16b
+ orr v2.16b, v2.16b, v15.16b
+ ushr v15.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ orr v0.16b, v0.16b, v15.16b
+ add v17.4s, v17.4s, v18.4s
+ add v17.4s, v17.4s, v0.4s
+ add v4.4s, v4.4s, v22.4s
+ eor v14.16b, v14.16b, v17.16b
+ add v4.4s, v4.4s, v10.4s
+ add v7.4s, v7.4s, v26.4s
+ tbl v14.16b, { v14.16b }, v16.16b
+ eor v11.16b, v11.16b, v4.16b
+ add v7.4s, v7.4s, v9.4s
+ add v13.4s, v13.4s, v23.4s
+ add v1.4s, v1.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v16.16b
+ eor v12.16b, v12.16b, v7.16b
+ add v13.4s, v13.4s, v2.4s
+ mov v21.16b, v29.16b
+ stur q29, [x29, #-128]
+ mov v29.16b, v30.16b
+ mov v30.16b, v27.16b
+ mov v27.16b, v18.16b
+ str q18, [sp, #176]
+ eor v0.16b, v0.16b, v1.16b
+ mov v18.16b, v22.16b
+ add v5.4s, v5.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v16.16b
+ eor v3.16b, v3.16b, v13.16b
+ ldur q22, [x29, #-96]
+ ushr v15.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v10.16b, v5.16b, v10.16b
+ add v6.4s, v6.4s, v12.4s
+ tbl v3.16b, { v3.16b }, v16.16b
+ orr v0.16b, v0.16b, v15.16b
+ ushr v15.4s, v10.4s, #12
+ shl v10.4s, v10.4s, #20
+ eor v9.16b, v6.16b, v9.16b
+ add v8.4s, v8.4s, v3.4s
+ add v17.4s, v17.4s, v20.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v2.16b, v8.16b, v2.16b
+ add v17.4s, v17.4s, v0.4s
+ add v4.4s, v4.4s, v29.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v14.16b, v14.16b, v17.16b
+ add v4.4s, v4.4s, v10.4s
+ add v7.4s, v7.4s, v22.4s
+ orr v2.16b, v2.16b, v15.16b
+ tbl v14.16b, { v14.16b }, v19.16b
+ eor v11.16b, v11.16b, v4.16b
+ add v7.4s, v7.4s, v9.4s
+ add v13.4s, v13.4s, v31.4s
+ add v1.4s, v1.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v19.16b
+ eor v12.16b, v12.16b, v7.16b
+ add v13.4s, v13.4s, v2.4s
+ eor v0.16b, v0.16b, v1.16b
+ add v5.4s, v5.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v19.16b
+ eor v3.16b, v3.16b, v13.16b
+ ushr v15.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ eor v10.16b, v5.16b, v10.16b
+ add v6.4s, v6.4s, v12.4s
+ tbl v3.16b, { v3.16b }, v19.16b
+ orr v0.16b, v0.16b, v15.16b
+ ushr v15.4s, v10.4s, #7
+ shl v10.4s, v10.4s, #25
+ eor v9.16b, v6.16b, v9.16b
+ add v8.4s, v8.4s, v3.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v2.16b, v8.16b, v2.16b
+ add v17.4s, v17.4s, v21.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ add v17.4s, v17.4s, v10.4s
+ add v4.4s, v4.4s, v24.4s
+ orr v2.16b, v2.16b, v15.16b
+ eor v3.16b, v3.16b, v17.16b
+ add v4.4s, v4.4s, v9.4s
+ add v7.4s, v7.4s, v30.4s
+ tbl v3.16b, { v3.16b }, v16.16b
+ eor v14.16b, v14.16b, v4.16b
+ add v7.4s, v7.4s, v2.4s
+ add v13.4s, v13.4s, v28.4s
+ add v6.4s, v6.4s, v3.4s
+ mov v22.16b, v24.16b
+ tbl v14.16b, { v14.16b }, v16.16b
+ eor v11.16b, v11.16b, v7.16b
+ add v13.4s, v13.4s, v0.4s
+ ldur q24, [x29, #-80]
+ eor v10.16b, v6.16b, v10.16b
+ add v8.4s, v8.4s, v14.4s
+ mov v21.16b, v30.16b
+ tbl v11.16b, { v11.16b }, v16.16b
+ eor v12.16b, v12.16b, v13.16b
+ ldur q30, [x29, #-192]
+ mov v20.16b, v29.16b
+ ushr v15.4s, v10.4s, #12
+ shl v10.4s, v10.4s, #20
+ eor v9.16b, v8.16b, v9.16b
+ add v1.4s, v1.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v16.16b
+ ldur q29, [x29, #-112]
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v2.16b, v1.16b, v2.16b
+ add v5.4s, v5.4s, v12.4s
+ add v17.4s, v17.4s, v25.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v0.16b, v5.16b, v0.16b
+ add v17.4s, v17.4s, v10.4s
+ add v4.4s, v4.4s, v24.4s
+ orr v2.16b, v2.16b, v15.16b
+ ushr v15.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v3.16b, v3.16b, v17.16b
+ add v4.4s, v4.4s, v9.4s
+ add v7.4s, v7.4s, v30.4s
+ orr v0.16b, v0.16b, v15.16b
+ tbl v3.16b, { v3.16b }, v19.16b
+ eor v14.16b, v14.16b, v4.16b
+ add v7.4s, v7.4s, v2.4s
+ add v13.4s, v13.4s, v29.4s
+ add v6.4s, v6.4s, v3.4s
+ tbl v14.16b, { v14.16b }, v19.16b
+ eor v11.16b, v11.16b, v7.16b
+ add v13.4s, v13.4s, v0.4s
+ eor v10.16b, v6.16b, v10.16b
+ add v8.4s, v8.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v19.16b
+ eor v12.16b, v12.16b, v13.16b
+ ushr v15.4s, v10.4s, #7
+ shl v10.4s, v10.4s, #25
+ eor v9.16b, v8.16b, v9.16b
+ add v1.4s, v1.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v19.16b
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v2.16b, v1.16b, v2.16b
+ add v5.4s, v5.4s, v12.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ eor v0.16b, v5.16b, v0.16b
+ orr v2.16b, v2.16b, v15.16b
+ ushr v15.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ orr v0.16b, v0.16b, v15.16b
+ add v17.4s, v17.4s, v18.4s
+ add v17.4s, v17.4s, v0.4s
+ add v4.4s, v4.4s, v20.4s
+ eor v14.16b, v14.16b, v17.16b
+ add v4.4s, v4.4s, v10.4s
+ add v7.4s, v7.4s, v31.4s
+ tbl v14.16b, { v14.16b }, v16.16b
+ eor v11.16b, v11.16b, v4.16b
+ add v7.4s, v7.4s, v9.4s
+ add v13.4s, v13.4s, v26.4s
+ add v1.4s, v1.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v16.16b
+ eor v12.16b, v12.16b, v7.16b
+ add v13.4s, v13.4s, v2.4s
+ eor v0.16b, v0.16b, v1.16b
+ add v5.4s, v5.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v16.16b
+ eor v3.16b, v3.16b, v13.16b
+ ushr v15.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v10.16b, v5.16b, v10.16b
+ add v6.4s, v6.4s, v12.4s
+ tbl v3.16b, { v3.16b }, v16.16b
+ orr v0.16b, v0.16b, v15.16b
+ ushr v15.4s, v10.4s, #12
+ shl v10.4s, v10.4s, #20
+ eor v9.16b, v6.16b, v9.16b
+ add v8.4s, v8.4s, v3.4s
+ add v17.4s, v17.4s, v23.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v2.16b, v8.16b, v2.16b
+ add v17.4s, v17.4s, v0.4s
+ add v4.4s, v4.4s, v22.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v14.16b, v14.16b, v17.16b
+ add v4.4s, v4.4s, v10.4s
+ add v7.4s, v7.4s, v27.4s
+ orr v2.16b, v2.16b, v15.16b
+ tbl v14.16b, { v14.16b }, v19.16b
+ eor v11.16b, v11.16b, v4.16b
+ add v7.4s, v7.4s, v9.4s
+ add v13.4s, v13.4s, v30.4s
+ add v1.4s, v1.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v19.16b
+ eor v12.16b, v12.16b, v7.16b
+ add v13.4s, v13.4s, v2.4s
+ ldur q27, [x29, #-160]
+ eor v0.16b, v0.16b, v1.16b
+ add v5.4s, v5.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v19.16b
+ eor v3.16b, v3.16b, v13.16b
+ ushr v15.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ eor v10.16b, v5.16b, v10.16b
+ add v6.4s, v6.4s, v12.4s
+ tbl v3.16b, { v3.16b }, v19.16b
+ orr v0.16b, v0.16b, v15.16b
+ ushr v15.4s, v10.4s, #7
+ shl v10.4s, v10.4s, #25
+ eor v9.16b, v6.16b, v9.16b
+ add v8.4s, v8.4s, v3.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v2.16b, v8.16b, v2.16b
+ add v17.4s, v17.4s, v27.4s
+ mov v28.16b, v25.16b
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ add v17.4s, v17.4s, v10.4s
+ add v4.4s, v4.4s, v21.4s
+ orr v2.16b, v2.16b, v15.16b
+ eor v3.16b, v3.16b, v17.16b
+ add v4.4s, v4.4s, v9.4s
+ add v7.4s, v7.4s, v28.4s
+ tbl v3.16b, { v3.16b }, v16.16b
+ eor v14.16b, v14.16b, v4.16b
+ add v7.4s, v7.4s, v2.4s
+ add v13.4s, v13.4s, v29.4s
+ mov v25.16b, v31.16b
+ add v6.4s, v6.4s, v3.4s
+ tbl v14.16b, { v14.16b }, v16.16b
+ eor v11.16b, v11.16b, v7.16b
+ add v13.4s, v13.4s, v0.4s
+ ldur q31, [x29, #-96]
+ eor v10.16b, v6.16b, v10.16b
+ add v8.4s, v8.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v16.16b
+ eor v12.16b, v12.16b, v13.16b
+ ldur q28, [x29, #-208]
+ mov v18.16b, v20.16b
+ str q20, [sp, #144]
+ ushr v15.4s, v10.4s, #12
+ shl v10.4s, v10.4s, #20
+ eor v9.16b, v8.16b, v9.16b
+ add v1.4s, v1.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v16.16b
+ ldur q20, [x29, #-128]
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v2.16b, v1.16b, v2.16b
+ add v5.4s, v5.4s, v12.4s
+ add v17.4s, v17.4s, v24.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v0.16b, v5.16b, v0.16b
+ add v17.4s, v17.4s, v10.4s
+ add v4.4s, v4.4s, v31.4s
+ orr v2.16b, v2.16b, v15.16b
+ ushr v15.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v3.16b, v3.16b, v17.16b
+ add v4.4s, v4.4s, v9.4s
+ add v7.4s, v7.4s, v28.4s
+ orr v0.16b, v0.16b, v15.16b
+ tbl v3.16b, { v3.16b }, v19.16b
+ eor v14.16b, v14.16b, v4.16b
+ add v7.4s, v7.4s, v2.4s
+ add v13.4s, v13.4s, v20.4s
+ add v6.4s, v6.4s, v3.4s
+ tbl v14.16b, { v14.16b }, v19.16b
+ eor v11.16b, v11.16b, v7.16b
+ add v13.4s, v13.4s, v0.4s
+ eor v10.16b, v6.16b, v10.16b
+ add v8.4s, v8.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v19.16b
+ eor v12.16b, v12.16b, v13.16b
+ ushr v15.4s, v10.4s, #7
+ shl v10.4s, v10.4s, #25
+ eor v9.16b, v8.16b, v9.16b
+ add v1.4s, v1.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v19.16b
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v2.16b, v1.16b, v2.16b
+ add v5.4s, v5.4s, v12.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ eor v0.16b, v5.16b, v0.16b
+ orr v2.16b, v2.16b, v15.16b
+ ushr v15.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ orr v0.16b, v0.16b, v15.16b
+ add v17.4s, v17.4s, v18.4s
+ add v17.4s, v17.4s, v0.4s
+ add v4.4s, v4.4s, v22.4s
+ eor v14.16b, v14.16b, v17.16b
+ add v4.4s, v4.4s, v10.4s
+ add v7.4s, v7.4s, v30.4s
+ tbl v14.16b, { v14.16b }, v16.16b
+ eor v11.16b, v11.16b, v4.16b
+ add v7.4s, v7.4s, v9.4s
+ add v13.4s, v13.4s, v25.4s
+ add v1.4s, v1.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v16.16b
+ eor v12.16b, v12.16b, v7.16b
+ add v13.4s, v13.4s, v2.4s
+ eor v0.16b, v0.16b, v1.16b
+ add v5.4s, v5.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v16.16b
+ eor v3.16b, v3.16b, v13.16b
+ add v17.4s, v17.4s, v26.4s
+ mov v26.16b, v21.16b
+ add v4.4s, v4.4s, v21.4s
+ ldur q21, [x29, #-144]
+ ushr v15.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v10.16b, v5.16b, v10.16b
+ add v6.4s, v6.4s, v12.4s
+ tbl v3.16b, { v3.16b }, v16.16b
+ orr v0.16b, v0.16b, v15.16b
+ ushr v15.4s, v10.4s, #12
+ shl v10.4s, v10.4s, #20
+ eor v9.16b, v6.16b, v9.16b
+ add v8.4s, v8.4s, v3.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v2.16b, v8.16b, v2.16b
+ add v17.4s, v17.4s, v0.4s
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v14.16b, v14.16b, v17.16b
+ add v4.4s, v4.4s, v10.4s
+ add v7.4s, v7.4s, v21.4s
+ orr v2.16b, v2.16b, v15.16b
+ tbl v14.16b, { v14.16b }, v19.16b
+ eor v11.16b, v11.16b, v4.16b
+ add v7.4s, v7.4s, v9.4s
+ add v13.4s, v13.4s, v28.4s
+ add v1.4s, v1.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v19.16b
+ eor v12.16b, v12.16b, v7.16b
+ add v13.4s, v13.4s, v2.4s
+ str q23, [sp, #160]
+ eor v0.16b, v0.16b, v1.16b
+ add v5.4s, v5.4s, v11.4s
+ tbl v12.16b, { v12.16b }, v19.16b
+ eor v3.16b, v3.16b, v13.16b
+ add v17.4s, v17.4s, v23.4s
+ ldur q23, [x29, #-64]
+ ushr v15.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ eor v10.16b, v5.16b, v10.16b
+ add v6.4s, v6.4s, v12.4s
+ tbl v3.16b, { v3.16b }, v19.16b
+ orr v0.16b, v0.16b, v15.16b
+ ushr v15.4s, v10.4s, #7
+ shl v10.4s, v10.4s, #25
+ eor v9.16b, v6.16b, v9.16b
+ add v8.4s, v8.4s, v3.4s
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v2.16b, v8.16b, v2.16b
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ add v17.4s, v17.4s, v10.4s
+ add v4.4s, v4.4s, v23.4s
+ orr v2.16b, v2.16b, v15.16b
+ eor v3.16b, v3.16b, v17.16b
+ add v4.4s, v4.4s, v9.4s
+ add v7.4s, v7.4s, v24.4s
+ tbl v3.16b, { v3.16b }, v16.16b
+ eor v14.16b, v14.16b, v4.16b
+ add v7.4s, v7.4s, v2.4s
+ add v6.4s, v6.4s, v3.4s
+ tbl v14.16b, { v14.16b }, v16.16b
+ eor v11.16b, v11.16b, v7.16b
+ add v13.4s, v13.4s, v20.4s
+ eor v10.16b, v6.16b, v10.16b
+ add v8.4s, v8.4s, v14.4s
+ tbl v11.16b, { v11.16b }, v16.16b
+ add v13.4s, v13.4s, v0.4s
+ ldr q20, [sp, #176]
+ ushr v15.4s, v10.4s, #12
+ shl v10.4s, v10.4s, #20
+ eor v9.16b, v8.16b, v9.16b
+ add v1.4s, v1.4s, v11.4s
+ eor v12.16b, v12.16b, v13.16b
+ orr v10.16b, v10.16b, v15.16b
+ ushr v15.4s, v9.4s, #12
+ shl v9.4s, v9.4s, #20
+ eor v2.16b, v1.16b, v2.16b
+ tbl v12.16b, { v12.16b }, v16.16b
+ orr v9.16b, v9.16b, v15.16b
+ ushr v15.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ add v5.4s, v5.4s, v12.4s
+ add v17.4s, v17.4s, v31.4s
+ orr v2.16b, v2.16b, v15.16b
+ eor v0.16b, v5.16b, v0.16b
+ add v17.4s, v17.4s, v10.4s
+ add v4.4s, v4.4s, v20.4s
+ add v7.4s, v7.4s, v29.4s
+ ushr v15.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v3.16b, v3.16b, v17.16b
+ add v4.4s, v4.4s, v9.4s
+ add v7.4s, v7.4s, v2.4s
+ orr v0.16b, v0.16b, v15.16b
+ mov v15.16b, v31.16b
+ add v17.4s, v17.4s, v22.4s
+ eor v31.16b, v14.16b, v4.16b
+ eor v22.16b, v11.16b, v7.16b
+ add v11.4s, v13.4s, v27.4s
+ tbl v3.16b, { v3.16b }, v19.16b
+ add v11.4s, v11.4s, v0.4s
+ tbl v31.16b, { v31.16b }, v19.16b
+ add v6.4s, v6.4s, v3.4s
+ eor v12.16b, v12.16b, v11.16b
+ tbl v22.16b, { v22.16b }, v19.16b
+ add v8.4s, v8.4s, v31.4s
+ eor v10.16b, v6.16b, v10.16b
+ add v30.4s, v11.4s, v30.4s
+ tbl v11.16b, { v12.16b }, v19.16b
+ add v1.4s, v1.4s, v22.4s
+ eor v9.16b, v8.16b, v9.16b
+ ushr v12.4s, v10.4s, #7
+ shl v10.4s, v10.4s, #25
+ add v5.4s, v5.4s, v11.4s
+ eor v2.16b, v1.16b, v2.16b
+ orr v10.16b, v10.16b, v12.16b
+ ushr v12.4s, v9.4s, #7
+ shl v9.4s, v9.4s, #25
+ eor v0.16b, v5.16b, v0.16b
+ orr v9.16b, v9.16b, v12.16b
+ ushr v12.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ orr v2.16b, v2.16b, v12.16b
+ ushr v12.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ orr v0.16b, v0.16b, v12.16b
+ add v4.4s, v4.4s, v26.4s
+ add v17.4s, v17.4s, v0.4s
+ add v7.4s, v7.4s, v28.4s
+ mov v18.16b, v27.16b
+ eor v31.16b, v31.16b, v17.16b
+ add v4.4s, v4.4s, v10.4s
+ add v27.4s, v30.4s, v2.4s
+ eor v22.16b, v22.16b, v4.16b
+ add v7.4s, v7.4s, v9.4s
+ eor v3.16b, v3.16b, v27.16b
+ add v26.4s, v27.4s, v29.4s
+ tbl v27.16b, { v31.16b }, v16.16b
+ eor v28.16b, v11.16b, v7.16b
+ tbl v22.16b, { v22.16b }, v16.16b
+ add v1.4s, v1.4s, v27.4s
+ add v4.4s, v4.4s, v23.4s
+ ldr q23, [sp, #144]
+ tbl v28.16b, { v28.16b }, v16.16b
+ tbl v3.16b, { v3.16b }, v16.16b
+ add v5.4s, v5.4s, v22.4s
+ eor v0.16b, v0.16b, v1.16b
+ add v6.4s, v6.4s, v28.4s
+ add v29.4s, v8.4s, v3.4s
+ eor v30.16b, v5.16b, v10.16b
+ ushr v8.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v31.16b, v6.16b, v9.16b
+ orr v0.16b, v0.16b, v8.16b
+ ushr v8.4s, v30.4s, #12
+ shl v30.4s, v30.4s, #20
+ eor v2.16b, v29.16b, v2.16b
+ orr v30.16b, v30.16b, v8.16b
+ ushr v8.4s, v31.4s, #12
+ shl v31.4s, v31.4s, #20
+ add v17.4s, v17.4s, v25.4s
+ add v7.4s, v7.4s, v23.4s
+ orr v31.16b, v31.16b, v8.16b
+ ushr v8.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ ldur q23, [x29, #-176]
+ orr v2.16b, v2.16b, v8.16b
+ add v17.4s, v17.4s, v0.4s
+ eor v27.16b, v27.16b, v17.16b
+ add v4.4s, v4.4s, v30.4s
+ add v25.4s, v26.4s, v2.4s
+ eor v22.16b, v22.16b, v4.16b
+ add v4.4s, v4.4s, v24.4s
+ add v7.4s, v7.4s, v31.4s
+ eor v3.16b, v3.16b, v25.16b
+ add v24.4s, v25.4s, v18.4s
+ tbl v25.16b, { v27.16b }, v19.16b
+ add v17.4s, v17.4s, v23.4s
+ eor v23.16b, v28.16b, v7.16b
+ tbl v22.16b, { v22.16b }, v19.16b
+ add v1.4s, v1.4s, v25.4s
+ tbl v23.16b, { v23.16b }, v19.16b
+ tbl v3.16b, { v3.16b }, v19.16b
+ add v5.4s, v5.4s, v22.4s
+ eor v0.16b, v0.16b, v1.16b
+ add v6.4s, v6.4s, v23.4s
+ add v26.4s, v29.4s, v3.4s
+ eor v27.16b, v5.16b, v30.16b
+ ushr v29.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ eor v28.16b, v6.16b, v31.16b
+ orr v0.16b, v0.16b, v29.16b
+ ushr v29.4s, v27.4s, #7
+ shl v27.4s, v27.4s, #25
+ eor v2.16b, v26.16b, v2.16b
+ orr v27.16b, v27.16b, v29.16b
+ ushr v29.4s, v28.4s, #7
+ shl v28.4s, v28.4s, #25
+ ldur q18, [x29, #-128]
+ orr v28.16b, v28.16b, v29.16b
+ ushr v29.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ add v7.4s, v7.4s, v15.4s
+ orr v2.16b, v2.16b, v29.16b
+ add v17.4s, v17.4s, v27.4s
+ add v4.4s, v4.4s, v28.4s
+ add v7.4s, v7.4s, v2.4s
+ eor v3.16b, v3.16b, v17.16b
+ add v17.4s, v17.4s, v20.4s
+ eor v20.16b, v25.16b, v4.16b
+ add v4.4s, v4.4s, v21.4s
+ eor v21.16b, v22.16b, v7.16b
+ add v7.4s, v7.4s, v18.4s
+ add v18.4s, v24.4s, v0.4s
+ eor v22.16b, v23.16b, v18.16b
+ ldr q23, [sp, #160]
+ tbl v3.16b, { v3.16b }, v16.16b
+ tbl v20.16b, { v20.16b }, v16.16b
+ add v6.4s, v6.4s, v3.4s
+ add v18.4s, v18.4s, v23.4s
+ tbl v21.16b, { v21.16b }, v16.16b
+ tbl v16.16b, { v22.16b }, v16.16b
+ add v22.4s, v26.4s, v20.4s
+ eor v23.16b, v6.16b, v27.16b
+ add v1.4s, v1.4s, v21.4s
+ eor v24.16b, v22.16b, v28.16b
+ ushr v25.4s, v23.4s, #12
+ shl v23.4s, v23.4s, #20
+ add v5.4s, v5.4s, v16.4s
+ eor v2.16b, v1.16b, v2.16b
+ orr v23.16b, v23.16b, v25.16b
+ ushr v25.4s, v24.4s, #12
+ shl v24.4s, v24.4s, #20
+ eor v0.16b, v5.16b, v0.16b
+ orr v24.16b, v24.16b, v25.16b
+ ushr v25.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ orr v2.16b, v2.16b, v25.16b
+ ushr v25.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ orr v0.16b, v0.16b, v25.16b
+ add v25.4s, v7.4s, v2.4s
+ add v26.4s, v18.4s, v0.4s
+ eor v18.16b, v21.16b, v25.16b
+ add v17.4s, v17.4s, v23.4s
+ add v4.4s, v4.4s, v24.4s
+ eor v16.16b, v16.16b, v26.16b
+ tbl v21.16b, { v18.16b }, v19.16b
+ eor v3.16b, v3.16b, v17.16b
+ eor v7.16b, v20.16b, v4.16b
+ tbl v16.16b, { v16.16b }, v19.16b
+ add v1.4s, v1.4s, v21.4s
+ tbl v3.16b, { v3.16b }, v19.16b
+ tbl v20.16b, { v7.16b }, v19.16b
+ eor v2.16b, v1.16b, v2.16b
+ eor v7.16b, v1.16b, v17.16b
+ add v1.4s, v5.4s, v16.4s
+ eor v0.16b, v1.16b, v0.16b
+ eor v18.16b, v1.16b, v4.16b
+ add v1.4s, v6.4s, v3.4s
+ eor v4.16b, v1.16b, v23.16b
+ eor v6.16b, v25.16b, v1.16b
+ add v1.4s, v22.4s, v20.4s
+ eor v5.16b, v1.16b, v24.16b
+ eor v17.16b, v26.16b, v1.16b
+ ushr v1.4s, v4.4s, #7
+ shl v4.4s, v4.4s, #25
+ orr v1.16b, v4.16b, v1.16b
+ ushr v4.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ orr v4.16b, v5.16b, v4.16b
+ ushr v5.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ orr v2.16b, v2.16b, v5.16b
+ ushr v5.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ orr v0.16b, v0.16b, v5.16b
+ eor v10.16b, v0.16b, v20.16b
+ eor v11.16b, v1.16b, v21.16b
+ eor v19.16b, v4.16b, v16.16b
+ cmp x0, x22
+ eor v16.16b, v2.16b, v3.16b
+ mov w6, w19
+ b.ne .LBB2_4
+.LBB2_7:
+ zip1 v0.4s, v7.4s, v18.4s
+ zip2 v1.4s, v7.4s, v18.4s
+ zip1 v2.4s, v6.4s, v17.4s
+ zip2 v3.4s, v6.4s, v17.4s
+ zip1 v4.4s, v10.4s, v11.4s
+ zip2 v5.4s, v10.4s, v11.4s
+ zip1 v6.4s, v19.4s, v16.4s
+ zip2 v7.4s, v19.4s, v16.4s
+ add x15, x20, #4
+ tst w5, #0x1
+ sub x28, x28, #4
+ zip1 v16.2d, v0.2d, v2.2d
+ zip2 v0.2d, v0.2d, v2.2d
+ zip1 v2.2d, v1.2d, v3.2d
+ zip2 v1.2d, v1.2d, v3.2d
+ zip1 v3.2d, v4.2d, v6.2d
+ zip2 v4.2d, v4.2d, v6.2d
+ zip1 v6.2d, v5.2d, v7.2d
+ zip2 v5.2d, v5.2d, v7.2d
+ add x24, x24, #32
+ csel x20, x15, x20, ne
+ cmp x28, #3
+ stp q16, q3, [x26]
+ stp q0, q4, [x26, #32]
+ stp q2, q6, [x26, #64]
+ stp q1, q5, [x26, #96]
+ add x26, x26, #128
+ b.hi .LBB2_2
+.LBB2_8:
+ cbz x28, .LBB2_16
+ orr w8, w7, w19
+ and x21, x5, #0x1
+ stur w8, [x29, #-64]
+.LBB2_10:
+ ldr x8, [sp, #40]
+ ldr x25, [x24]
+ ldur w4, [x29, #-64]
+ ldp q1, q0, [x8]
+ mov x8, x22
+ stp q1, q0, [x29, #-48]
+.LBB2_11:
+ subs x23, x8, #1
+ b.eq .LBB2_13
+ cbnz x8, .LBB2_14
+ b .LBB2_15
+.LBB2_13:
+ orr w4, w4, w27
+.LBB2_14:
+ sub x0, x29, #48
+ mov w2, #64
+ mov x1, x25
+ mov x3, x20
+ bl zfs_blake3_compress_in_place_sse41
+ add x25, x25, #64
+ mov x8, x23
+ mov w4, w19
+ b .LBB2_11
+.LBB2_15:
+ ldp q0, q1, [x29, #-48]
+ add x20, x20, x21
+ add x24, x24, #8
+ subs x28, x28, #1
+ stp q0, q1, [x26], #32
+ b.ne .LBB2_10
+.LBB2_16:
+ add sp, sp, #448
+ ldp x20, x19, [sp, #144]
+ ldp x22, x21, [sp, #128]
+ ldp x24, x23, [sp, #112]
+ ldp x26, x25, [sp, #96]
+ ldp x28, x27, [sp, #80]
+ ldp x29, x30, [sp, #64]
+ ldp d9, d8, [sp, #48]
+ ldp d11, d10, [sp, #32]
+ ldp d13, d12, [sp, #16]
+ ldp d15, d14, [sp], #160
+ ret
+.Lfunc_end2:
+ .size zfs_blake3_hash_many_sse41, .Lfunc_end2-zfs_blake3_hash_many_sse41
+ .cfi_endproc
+ .section ".note.GNU-stack","",@progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S b/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S
new file mode 100644
index 000000000000..9deba202fde8
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S
@@ -0,0 +1,2823 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2022 Samuel Neves and Matthew Krupcale
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ *
+ * This is converted assembly: SSE2 -> POWER8 PPC64 Little Endian
+ * Used tools: SIMDe https://github.com/simd-everywhere/simde
+ */
+
+#if (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+ .text
+ .abiversion 2
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI0_0:
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 26
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+.LCPI0_1:
+ .long 1779033703
+ .long 3144134277
+ .long 1013904242
+ .long 2773480762
+.LCPI0_2:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_3:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI0_4:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_5:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_6:
+ .short 1
+ .short 2
+ .short 4
+ .short 8
+ .short 16
+ .short 32
+ .short 64
+ .short 128
+.LCPI0_7:
+ .short 0
+ .short 0
+ .short 4
+ .short 8
+ .short 0
+ .short 0
+ .short 64
+ .short 128
+.LCPI0_8:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+.LCPI0_9:
+ .short 0
+ .short 0
+ .short 0
+ .short 0
+ .short 0
+ .short 0
+ .short 64
+ .short 128
+.LCPI0_10:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 7
+ .byte 6
+ .byte 5
+ .byte 4
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI0_11:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI0_12:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 15
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+.LCPI0_13:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 15
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI0_14:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .text
+ .globl zfs_blake3_compress_in_place_sse2
+ .p2align 2
+ .type zfs_blake3_compress_in_place_sse2,@function
+zfs_blake3_compress_in_place_sse2:
+.Lfunc_begin0:
+ .cfi_startproc
+.Lfunc_gep0:
+ addis 2, 12, .TOC.-.Lfunc_gep0@ha
+ addi 2, 2, .TOC.-.Lfunc_gep0@l
+.Lfunc_lep0:
+ .localentry zfs_blake3_compress_in_place_sse2, .Lfunc_lep0-.Lfunc_gep0
+ li 8, -64
+ mtvsrd 35, 5
+ li 5, 16
+ lfdx 0, 0, 4
+ vspltisw 12, 9
+ stxvd2x 60, 1, 8
+ li 8, -48
+ mtvsrd 36, 7
+ lfd 2, 16(4)
+ stxvd2x 61, 1, 8
+ li 8, -32
+ lfd 1, 8(4)
+ mtvsrwz 37, 6
+ rldicl 6, 6, 32, 32
+ addis 7, 2, .LCPI0_2@toc@ha
+ stxvd2x 62, 1, 8
+ li 8, -16
+ addi 7, 7, .LCPI0_2@toc@l
+ stxvd2x 63, 1, 8
+ li 8, 0
+ lvx 9, 0, 7
+ li 7, 48
+ mtvsrd 34, 8
+ xxmrghd 32, 1, 0
+ lxvd2x 0, 0, 3
+ lxvd2x 1, 3, 5
+ lfd 3, 24(4)
+ addis 8, 2, .LCPI0_5@toc@ha
+ vmrghb 3, 2, 3
+ addi 8, 8, .LCPI0_5@toc@l
+ vmrghb 4, 2, 4
+ vspltb 2, 2, 7
+ xxmrghd 33, 3, 2
+ vpkudum 7, 1, 0
+ vmrglh 3, 2, 3
+ vmrglh 2, 2, 4
+ mtvsrwz 36, 6
+ addis 6, 2, .LCPI0_0@toc@ha
+ addi 6, 6, .LCPI0_0@toc@l
+ vperm 10, 1, 0, 9
+ vmrghw 4, 4, 5
+ xxswapd 37, 1
+ lxvd2x 1, 4, 7
+ addis 7, 2, .LCPI0_8@toc@ha
+ addi 7, 7, .LCPI0_8@toc@l
+ vmrglw 2, 2, 3
+ xxswapd 35, 0
+ xxswapd 41, 1
+ xxspltd 62, 42, 1
+ vadduwm 3, 7, 3
+ vadduwm 6, 3, 5
+ xxmrgld 36, 34, 36
+ lvx 2, 0, 6
+ addis 6, 2, .LCPI0_1@toc@ha
+ addi 6, 6, .LCPI0_1@toc@l
+ xxlxor 35, 38, 36
+ lvx 4, 0, 6
+ li 6, 32
+ lxvd2x 0, 4, 6
+ addis 4, 2, .LCPI0_3@toc@ha
+ addis 6, 2, .LCPI0_7@toc@ha
+ vperm 8, 3, 3, 2
+ vspltisw 3, 10
+ addi 4, 4, .LCPI0_3@toc@l
+ addi 6, 6, .LCPI0_7@toc@l
+ vadduwm 3, 3, 3
+ vadduwm 11, 8, 4
+ xxlxor 36, 43, 37
+ vadduwm 5, 6, 10
+ vrlw 0, 4, 3
+ vspltisw 4, 12
+ vadduwm 4, 4, 4
+ vadduwm 1, 0, 5
+ xxlxor 37, 33, 40
+ xxswapd 40, 0
+ vrlw 6, 5, 4
+ vspltisw 5, -16
+ vpkudum 13, 9, 8
+ vsubuwm 5, 12, 5
+ lvx 12, 0, 4
+ addis 4, 2, .LCPI0_4@toc@ha
+ addi 4, 4, .LCPI0_4@toc@l
+ vadduwm 11, 6, 11
+ xxswapd 0, 38
+ vadduwm 1, 1, 13
+ xxsldwi 50, 45, 45, 1
+ xxlxor 32, 43, 32
+ xxsldwi 43, 43, 43, 3
+ xxsldwi 33, 33, 33, 1
+ vperm 12, 8, 9, 12
+ vrlw 0, 0, 5
+ vadduwm 1, 0, 1
+ xxlxor 38, 33, 0
+ vadduwm 1, 1, 12
+ vperm 6, 6, 6, 2
+ vadduwm 15, 6, 11
+ lvx 11, 0, 4
+ addis 4, 2, .LCPI0_6@toc@ha
+ addi 4, 4, .LCPI0_6@toc@l
+ xxlxor 32, 47, 32
+ lvx 17, 0, 4
+ addis 4, 2, .LCPI0_9@toc@ha
+ vperm 14, 10, 7, 11
+ addi 4, 4, .LCPI0_9@toc@l
+ vrlw 0, 0, 3
+ vadduwm 1, 0, 1
+ xxlxor 38, 33, 38
+ vrlw 6, 6, 4
+ vadduwm 8, 6, 15
+ xxswapd 0, 38
+ lvx 6, 0, 8
+ xxlxor 32, 40, 32
+ xxsldwi 40, 40, 40, 1
+ vperm 13, 12, 18, 6
+ vrlw 9, 0, 5
+ vadduwm 0, 1, 14
+ lvx 1, 0, 7
+ xxsldwi 46, 46, 46, 3
+ xxsldwi 32, 32, 32, 3
+ vperm 7, 7, 7, 1
+ vadduwm 15, 9, 0
+ xxlxor 32, 47, 0
+ vperm 16, 0, 0, 2
+ lvx 0, 0, 6
+ addis 6, 2, .LCPI0_10@toc@ha
+ vcmpequh 0, 0, 17
+ vadduwm 19, 16, 8
+ xxlxor 40, 51, 41
+ xxsel 45, 39, 45, 32
+ vrlw 31, 8, 3
+ lvx 8, 0, 4
+ addis 4, 2, .LCPI0_11@toc@ha
+ addi 4, 4, .LCPI0_11@toc@l
+ vcmpequh 7, 8, 17
+ vadduwm 8, 15, 13
+ vadduwm 15, 31, 8
+ lvx 8, 0, 4
+ addi 4, 6, .LCPI0_10@toc@l
+ lvx 17, 0, 4
+ addis 4, 2, .LCPI0_12@toc@ha
+ xxlxor 41, 47, 48
+ xxsldwi 47, 47, 47, 1
+ addi 4, 4, .LCPI0_12@toc@l
+ xxlnor 48, 39, 39
+ vrlw 29, 9, 4
+ vperm 9, 16, 16, 8
+ xxland 48, 50, 39
+ vperm 17, 30, 12, 17
+ vperm 16, 16, 16, 8
+ vmrghw 12, 12, 10
+ lvx 10, 0, 4
+ addis 4, 2, .LCPI0_13@toc@ha
+ vadduwm 19, 29, 19
+ addi 4, 4, .LCPI0_13@toc@l
+ xxlxor 63, 51, 63
+ xxsldwi 51, 51, 51, 3
+ xxland 0, 49, 41
+ vrlw 17, 31, 5
+ xxlor 48, 0, 48
+ xxswapd 0, 61
+ vperm 18, 12, 18, 10
+ vadduwm 15, 15, 16
+ xxland 60, 48, 39
+ vadduwm 15, 17, 15
+ vperm 28, 28, 28, 8
+ xxlxor 63, 47, 0
+ vadduwm 15, 15, 18
+ vperm 31, 31, 31, 2
+ vperm 30, 18, 16, 6
+ vadduwm 19, 31, 19
+ xxlxor 44, 51, 49
+ vrlw 12, 12, 3
+ vadduwm 15, 12, 15
+ xxlxor 49, 47, 63
+ vperm 31, 13, 14, 11
+ vrlw 17, 17, 4
+ vperm 14, 14, 14, 1
+ vadduwm 15, 15, 31
+ vadduwm 19, 17, 19
+ xxswapd 0, 49
+ xxsldwi 47, 47, 47, 3
+ xxsel 46, 46, 62, 32
+ xxlxor 44, 51, 44
+ xxsldwi 51, 51, 51, 1
+ vrlw 12, 12, 5
+ vadduwm 15, 12, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 19, 17, 19
+ xxlxor 44, 51, 44
+ vrlw 29, 12, 3
+ vadduwm 12, 15, 14
+ vadduwm 15, 29, 12
+ lvx 12, 0, 4
+ addis 4, 2, .LCPI0_14@toc@ha
+ addi 4, 4, .LCPI0_14@toc@l
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ vperm 30, 13, 18, 12
+ vrlw 17, 17, 4
+ vmrghw 13, 18, 13
+ xxland 0, 62, 41
+ vadduwm 19, 17, 19
+ vperm 16, 13, 16, 10
+ xxlxor 61, 51, 61
+ xxsldwi 50, 51, 51, 3
+ xxsldwi 51, 63, 63, 3
+ vrlw 30, 29, 5
+ xxlor 61, 60, 0
+ xxswapd 0, 49
+ vperm 31, 14, 19, 11
+ vadduwm 15, 15, 29
+ vperm 19, 19, 19, 1
+ vadduwm 15, 30, 15
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 16
+ vperm 17, 17, 17, 2
+ vadduwm 18, 17, 18
+ xxlxor 45, 50, 62
+ vperm 30, 16, 29, 6
+ vrlw 13, 13, 3
+ vadduwm 15, 13, 15
+ xxlxor 49, 47, 49
+ vadduwm 15, 15, 31
+ xxsldwi 63, 63, 63, 3
+ vrlw 17, 17, 4
+ xxsldwi 47, 47, 47, 3
+ vadduwm 18, 17, 18
+ xxswapd 0, 49
+ xxlxor 45, 50, 45
+ xxsldwi 50, 50, 50, 1
+ vrlw 13, 13, 5
+ vadduwm 15, 13, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 18, 17, 18
+ xxlxor 45, 50, 45
+ vrlw 28, 13, 3
+ xxsel 45, 51, 62, 32
+ xxland 51, 61, 39
+ vperm 30, 14, 16, 12
+ vadduwm 15, 15, 13
+ vperm 19, 19, 19, 8
+ vmrghw 14, 16, 14
+ vadduwm 15, 28, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ xxland 0, 62, 41
+ vrlw 17, 17, 4
+ xxlor 51, 51, 0
+ vadduwm 15, 15, 19
+ vadduwm 18, 17, 18
+ xxswapd 0, 49
+ xxlxor 60, 50, 60
+ xxsldwi 48, 50, 50, 3
+ vperm 18, 14, 29, 10
+ vrlw 30, 28, 5
+ vperm 29, 18, 19, 6
+ vadduwm 15, 30, 15
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 18
+ vperm 17, 17, 17, 2
+ vadduwm 16, 17, 16
+ xxlxor 46, 48, 62
+ vperm 30, 13, 31, 11
+ vrlw 14, 14, 3
+ vperm 31, 31, 31, 1
+ vadduwm 15, 14, 15
+ xxlxor 49, 47, 49
+ vadduwm 15, 15, 30
+ vrlw 17, 17, 4
+ xxsldwi 47, 47, 47, 3
+ vadduwm 16, 17, 16
+ xxswapd 0, 49
+ xxlxor 46, 48, 46
+ xxsldwi 48, 48, 48, 1
+ vrlw 14, 14, 5
+ vadduwm 15, 14, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 16, 17, 16
+ xxlxor 46, 48, 46
+ vrlw 28, 14, 3
+ xxsel 46, 63, 61, 32
+ xxland 63, 51, 39
+ vperm 29, 13, 18, 12
+ vadduwm 15, 15, 14
+ vperm 31, 31, 31, 8
+ vmrghw 13, 18, 13
+ vadduwm 15, 28, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ xxland 0, 61, 41
+ vrlw 17, 17, 4
+ xxlor 63, 63, 0
+ vperm 13, 13, 19, 10
+ xxsldwi 51, 62, 62, 3
+ vadduwm 15, 15, 31
+ vperm 30, 14, 19, 11
+ vadduwm 16, 17, 16
+ xxswapd 0, 49
+ xxlxor 60, 48, 60
+ xxsldwi 48, 48, 48, 3
+ vrlw 29, 28, 5
+ vadduwm 15, 29, 15
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 13
+ vperm 17, 17, 17, 2
+ vadduwm 16, 17, 16
+ xxlxor 50, 48, 61
+ vrlw 18, 18, 3
+ vadduwm 15, 18, 15
+ xxlxor 49, 47, 49
+ vadduwm 15, 15, 30
+ vrlw 17, 17, 4
+ xxsldwi 47, 47, 47, 3
+ vadduwm 11, 17, 16
+ xxswapd 0, 49
+ xxlxor 48, 43, 50
+ xxsldwi 43, 43, 43, 1
+ vperm 18, 19, 19, 1
+ vrlw 16, 16, 5
+ vperm 19, 13, 31, 6
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 29, 17, 11
+ xxlxor 43, 61, 48
+ vrlw 16, 11, 3
+ xxsel 43, 50, 51, 32
+ xxland 50, 63, 39
+ vperm 19, 14, 13, 12
+ vadduwm 15, 15, 11
+ vperm 18, 18, 18, 8
+ vmrghw 13, 13, 14
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ xxland 0, 51, 41
+ lvx 19, 0, 4
+ vrlw 17, 17, 4
+ xxlor 50, 50, 0
+ vperm 13, 13, 31, 10
+ xxsldwi 63, 62, 62, 3
+ vadduwm 15, 15, 18
+ vperm 19, 11, 31, 19
+ vadduwm 29, 17, 29
+ xxswapd 0, 49
+ vperm 1, 31, 31, 1
+ xxlxor 48, 61, 48
+ xxsldwi 46, 61, 61, 3
+ vperm 6, 13, 18, 6
+ vrlw 16, 16, 5
+ xxsel 32, 33, 38, 32
+ xxland 38, 50, 39
+ vadduwm 15, 16, 15
+ vperm 7, 11, 13, 12
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 13
+ vperm 17, 17, 17, 2
+ vperm 6, 6, 6, 8
+ vadduwm 14, 17, 14
+ xxlxor 48, 46, 48
+ vrlw 16, 16, 3
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 3
+ vrlw 17, 17, 4
+ vadduwm 15, 15, 19
+ vadduwm 14, 17, 14
+ xxswapd 0, 49
+ xxlxor 48, 46, 48
+ xxsldwi 46, 46, 46, 1
+ vrlw 16, 16, 5
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 0
+ vadduwm 0, 15, 0
+ vperm 17, 17, 17, 2
+ xxland 0, 39, 41
+ xxlor 38, 38, 0
+ vadduwm 14, 17, 14
+ xxlxor 48, 46, 48
+ vrlw 16, 16, 3
+ vadduwm 0, 16, 0
+ xxlxor 33, 32, 49
+ xxsldwi 32, 32, 32, 1
+ vrlw 1, 1, 4
+ vadduwm 0, 0, 6
+ vadduwm 8, 1, 14
+ xxswapd 0, 33
+ xxlxor 44, 40, 48
+ xxsldwi 38, 40, 40, 3
+ vrlw 7, 12, 5
+ vadduwm 0, 7, 0
+ xxlxor 33, 32, 0
+ vperm 2, 1, 1, 2
+ vmrghw 1, 13, 11
+ vadduwm 6, 2, 6
+ vperm 1, 1, 18, 10
+ xxlxor 39, 38, 39
+ vrlw 3, 7, 3
+ vadduwm 0, 0, 1
+ vadduwm 0, 3, 0
+ xxlxor 34, 32, 34
+ xxsldwi 0, 32, 32, 3
+ vrlw 2, 2, 4
+ vadduwm 4, 2, 6
+ xxswapd 2, 34
+ xxlxor 35, 36, 35
+ xxsldwi 1, 36, 36, 1
+ vrlw 3, 3, 5
+ xxlxor 0, 1, 0
+ xxswapd 0, 0
+ xxlxor 1, 35, 2
+ stxvd2x 0, 0, 3
+ xxswapd 1, 1
+ stxvd2x 1, 3, 5
+ li 3, -16
+ lxvd2x 63, 1, 3
+ li 3, -32
+ lxvd2x 62, 1, 3
+ li 3, -48
+ lxvd2x 61, 1, 3
+ li 3, -64
+ lxvd2x 60, 1, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end0:
+ .size zfs_blake3_compress_in_place_sse2, .Lfunc_end0-.Lfunc_begin0
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI1_0:
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 26
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+.LCPI1_1:
+ .long 1779033703
+ .long 3144134277
+ .long 1013904242
+ .long 2773480762
+.LCPI1_2:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_3:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI1_4:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_5:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_6:
+ .short 1
+ .short 2
+ .short 4
+ .short 8
+ .short 16
+ .short 32
+ .short 64
+ .short 128
+.LCPI1_7:
+ .short 0
+ .short 0
+ .short 4
+ .short 8
+ .short 0
+ .short 0
+ .short 64
+ .short 128
+.LCPI1_8:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+.LCPI1_9:
+ .short 0
+ .short 0
+ .short 0
+ .short 0
+ .short 0
+ .short 0
+ .short 64
+ .short 128
+.LCPI1_10:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 7
+ .byte 6
+ .byte 5
+ .byte 4
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI1_11:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI1_12:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 15
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+.LCPI1_13:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 15
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI1_14:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .text
+ .globl zfs_blake3_compress_xof_sse2
+ .p2align 2
+ .type zfs_blake3_compress_xof_sse2,@function
+zfs_blake3_compress_xof_sse2:
+.Lfunc_begin1:
+ .cfi_startproc
+.Lfunc_gep1:
+ addis 2, 12, .TOC.-.Lfunc_gep1@ha
+ addi 2, 2, .TOC.-.Lfunc_gep1@l
+.Lfunc_lep1:
+ .localentry zfs_blake3_compress_xof_sse2, .Lfunc_lep1-.Lfunc_gep1
+ li 9, -80
+ mtvsrd 35, 5
+ li 5, 16
+ lfdx 0, 0, 4
+ addis 10, 2, .LCPI1_2@toc@ha
+ vspltisw 12, 9
+ std 30, -16(1)
+ addis 12, 2, .LCPI1_8@toc@ha
+ addis 30, 2, .LCPI1_5@toc@ha
+ addis 11, 2, .LCPI1_7@toc@ha
+ stxvd2x 60, 1, 9
+ li 9, -64
+ mtvsrd 36, 7
+ lfd 2, 16(4)
+ addi 10, 10, .LCPI1_2@toc@l
+ addi 12, 12, .LCPI1_8@toc@l
+ addi 11, 11, .LCPI1_7@toc@l
+ stxvd2x 61, 1, 9
+ li 9, -48
+ lfd 3, 24(4)
+ mtvsrwz 37, 6
+ rldicl 6, 6, 32, 32
+ lvx 9, 0, 10
+ stxvd2x 62, 1, 9
+ li 9, -32
+ li 10, 32
+ stxvd2x 63, 1, 9
+ li 9, 0
+ mtvsrd 34, 9
+ xxmrghd 33, 3, 2
+ lfd 1, 8(4)
+ vmrghb 3, 2, 3
+ vmrghb 4, 2, 4
+ vspltb 2, 2, 7
+ xxmrghd 32, 1, 0
+ lxvd2x 0, 0, 3
+ lxvd2x 1, 3, 5
+ vpkudum 7, 1, 0
+ vmrglh 3, 2, 3
+ vmrglh 2, 2, 4
+ mtvsrwz 36, 6
+ addis 6, 2, .LCPI1_0@toc@ha
+ addi 6, 6, .LCPI1_0@toc@l
+ vperm 10, 1, 0, 9
+ vmrghw 4, 4, 5
+ xxswapd 37, 1
+ vmrglw 2, 2, 3
+ xxswapd 35, 0
+ lxvd2x 0, 4, 10
+ xxspltd 62, 42, 1
+ vadduwm 3, 7, 3
+ vadduwm 6, 3, 5
+ xxmrgld 36, 34, 36
+ lvx 2, 0, 6
+ addis 6, 2, .LCPI1_1@toc@ha
+ addi 6, 6, .LCPI1_1@toc@l
+ xxlxor 35, 38, 36
+ lvx 4, 0, 6
+ li 6, 48
+ lxvd2x 1, 4, 6
+ addis 4, 2, .LCPI1_3@toc@ha
+ vperm 8, 3, 3, 2
+ vspltisw 3, 10
+ addi 4, 4, .LCPI1_3@toc@l
+ xxswapd 41, 1
+ vadduwm 3, 3, 3
+ vadduwm 11, 8, 4
+ xxlxor 36, 43, 37
+ vadduwm 5, 6, 10
+ vrlw 0, 4, 3
+ vspltisw 4, 12
+ vadduwm 4, 4, 4
+ vadduwm 1, 0, 5
+ xxlxor 37, 33, 40
+ xxswapd 40, 0
+ vrlw 6, 5, 4
+ vspltisw 5, -16
+ vpkudum 13, 9, 8
+ vsubuwm 5, 12, 5
+ lvx 12, 0, 4
+ addis 4, 2, .LCPI1_4@toc@ha
+ addi 4, 4, .LCPI1_4@toc@l
+ vadduwm 11, 6, 11
+ xxswapd 0, 38
+ vadduwm 1, 1, 13
+ xxsldwi 50, 45, 45, 1
+ xxlxor 32, 43, 32
+ xxsldwi 43, 43, 43, 3
+ xxsldwi 33, 33, 33, 1
+ vperm 12, 8, 9, 12
+ vrlw 0, 0, 5
+ vadduwm 1, 0, 1
+ xxlxor 38, 33, 0
+ vadduwm 1, 1, 12
+ vperm 6, 6, 6, 2
+ vadduwm 15, 6, 11
+ lvx 11, 0, 4
+ addis 4, 2, .LCPI1_6@toc@ha
+ addi 4, 4, .LCPI1_6@toc@l
+ xxlxor 32, 47, 32
+ lvx 17, 0, 4
+ addi 4, 30, .LCPI1_5@toc@l
+ vperm 14, 10, 7, 11
+ vrlw 0, 0, 3
+ vadduwm 1, 0, 1
+ xxlxor 38, 33, 38
+ vrlw 6, 6, 4
+ vadduwm 8, 6, 15
+ xxswapd 0, 38
+ lvx 6, 0, 4
+ addis 4, 2, .LCPI1_9@toc@ha
+ addi 4, 4, .LCPI1_9@toc@l
+ xxlxor 32, 40, 32
+ xxsldwi 40, 40, 40, 1
+ vperm 13, 12, 18, 6
+ vrlw 9, 0, 5
+ vadduwm 0, 1, 14
+ lvx 1, 0, 12
+ xxsldwi 46, 46, 46, 3
+ xxsldwi 32, 32, 32, 3
+ vperm 7, 7, 7, 1
+ vadduwm 15, 9, 0
+ xxlxor 32, 47, 0
+ vperm 16, 0, 0, 2
+ lvx 0, 0, 11
+ addis 11, 2, .LCPI1_10@toc@ha
+ vcmpequh 0, 0, 17
+ vadduwm 19, 16, 8
+ xxlxor 40, 51, 41
+ xxsel 45, 39, 45, 32
+ vrlw 31, 8, 3
+ lvx 8, 0, 4
+ addis 4, 2, .LCPI1_11@toc@ha
+ addi 4, 4, .LCPI1_11@toc@l
+ vcmpequh 7, 8, 17
+ vadduwm 8, 15, 13
+ vadduwm 15, 31, 8
+ lvx 8, 0, 4
+ addi 4, 11, .LCPI1_10@toc@l
+ lvx 17, 0, 4
+ addis 4, 2, .LCPI1_12@toc@ha
+ xxlxor 41, 47, 48
+ xxsldwi 47, 47, 47, 1
+ addi 4, 4, .LCPI1_12@toc@l
+ xxlnor 48, 39, 39
+ vrlw 29, 9, 4
+ vperm 9, 16, 16, 8
+ xxland 48, 50, 39
+ vperm 17, 30, 12, 17
+ vperm 16, 16, 16, 8
+ vmrghw 12, 12, 10
+ lvx 10, 0, 4
+ addis 4, 2, .LCPI1_13@toc@ha
+ vadduwm 19, 29, 19
+ addi 4, 4, .LCPI1_13@toc@l
+ xxlxor 63, 51, 63
+ xxsldwi 51, 51, 51, 3
+ xxland 0, 49, 41
+ vrlw 17, 31, 5
+ xxlor 48, 0, 48
+ xxswapd 0, 61
+ vperm 18, 12, 18, 10
+ vadduwm 15, 15, 16
+ xxland 60, 48, 39
+ vadduwm 15, 17, 15
+ vperm 28, 28, 28, 8
+ xxlxor 63, 47, 0
+ vadduwm 15, 15, 18
+ vperm 31, 31, 31, 2
+ vperm 30, 18, 16, 6
+ vadduwm 19, 31, 19
+ xxlxor 44, 51, 49
+ vrlw 12, 12, 3
+ vadduwm 15, 12, 15
+ xxlxor 49, 47, 63
+ vperm 31, 13, 14, 11
+ vrlw 17, 17, 4
+ vperm 14, 14, 14, 1
+ vadduwm 15, 15, 31
+ vadduwm 19, 17, 19
+ xxswapd 0, 49
+ xxsldwi 47, 47, 47, 3
+ xxsel 46, 46, 62, 32
+ xxlxor 44, 51, 44
+ xxsldwi 51, 51, 51, 1
+ vrlw 12, 12, 5
+ vadduwm 15, 12, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 19, 17, 19
+ xxlxor 44, 51, 44
+ vrlw 29, 12, 3
+ vadduwm 12, 15, 14
+ vadduwm 15, 29, 12
+ lvx 12, 0, 4
+ addis 4, 2, .LCPI1_14@toc@ha
+ addi 4, 4, .LCPI1_14@toc@l
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ vperm 30, 13, 18, 12
+ vrlw 17, 17, 4
+ vmrghw 13, 18, 13
+ xxland 0, 62, 41
+ vadduwm 19, 17, 19
+ vperm 16, 13, 16, 10
+ xxlxor 61, 51, 61
+ xxsldwi 50, 51, 51, 3
+ xxsldwi 51, 63, 63, 3
+ vrlw 30, 29, 5
+ xxlor 61, 60, 0
+ xxswapd 0, 49
+ vperm 31, 14, 19, 11
+ vadduwm 15, 15, 29
+ vperm 19, 19, 19, 1
+ vadduwm 15, 30, 15
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 16
+ vperm 17, 17, 17, 2
+ vadduwm 18, 17, 18
+ xxlxor 45, 50, 62
+ vperm 30, 16, 29, 6
+ vrlw 13, 13, 3
+ vadduwm 15, 13, 15
+ xxlxor 49, 47, 49
+ vadduwm 15, 15, 31
+ xxsldwi 63, 63, 63, 3
+ vrlw 17, 17, 4
+ xxsldwi 47, 47, 47, 3
+ vadduwm 18, 17, 18
+ xxswapd 0, 49
+ xxlxor 45, 50, 45
+ xxsldwi 50, 50, 50, 1
+ vrlw 13, 13, 5
+ vadduwm 15, 13, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 18, 17, 18
+ xxlxor 45, 50, 45
+ vrlw 28, 13, 3
+ xxsel 45, 51, 62, 32
+ xxland 51, 61, 39
+ vperm 30, 14, 16, 12
+ vadduwm 15, 15, 13
+ vperm 19, 19, 19, 8
+ vmrghw 14, 16, 14
+ vadduwm 15, 28, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ xxland 0, 62, 41
+ vrlw 17, 17, 4
+ xxlor 51, 51, 0
+ vadduwm 15, 15, 19
+ vadduwm 18, 17, 18
+ xxswapd 0, 49
+ xxlxor 60, 50, 60
+ xxsldwi 48, 50, 50, 3
+ vperm 18, 14, 29, 10
+ vrlw 30, 28, 5
+ vperm 29, 18, 19, 6
+ vadduwm 15, 30, 15
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 18
+ vperm 17, 17, 17, 2
+ vadduwm 16, 17, 16
+ xxlxor 46, 48, 62
+ vperm 30, 13, 31, 11
+ vrlw 14, 14, 3
+ vperm 31, 31, 31, 1
+ vadduwm 15, 14, 15
+ xxlxor 49, 47, 49
+ vadduwm 15, 15, 30
+ vrlw 17, 17, 4
+ xxsldwi 47, 47, 47, 3
+ vadduwm 16, 17, 16
+ xxswapd 0, 49
+ xxlxor 46, 48, 46
+ xxsldwi 48, 48, 48, 1
+ vrlw 14, 14, 5
+ vadduwm 15, 14, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 16, 17, 16
+ xxlxor 46, 48, 46
+ vrlw 28, 14, 3
+ xxsel 46, 63, 61, 32
+ xxland 63, 51, 39
+ vperm 29, 13, 18, 12
+ vadduwm 15, 15, 14
+ vperm 31, 31, 31, 8
+ vmrghw 13, 18, 13
+ vadduwm 15, 28, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ xxland 0, 61, 41
+ vrlw 17, 17, 4
+ xxlor 63, 63, 0
+ vperm 13, 13, 19, 10
+ xxsldwi 51, 62, 62, 3
+ vadduwm 15, 15, 31
+ vperm 30, 14, 19, 11
+ vadduwm 16, 17, 16
+ xxswapd 0, 49
+ xxlxor 60, 48, 60
+ xxsldwi 48, 48, 48, 3
+ vrlw 29, 28, 5
+ vadduwm 15, 29, 15
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 13
+ vperm 17, 17, 17, 2
+ vadduwm 16, 17, 16
+ xxlxor 50, 48, 61
+ vrlw 18, 18, 3
+ vadduwm 15, 18, 15
+ xxlxor 49, 47, 49
+ vadduwm 15, 15, 30
+ vrlw 17, 17, 4
+ xxsldwi 47, 47, 47, 3
+ vadduwm 11, 17, 16
+ xxswapd 0, 49
+ xxlxor 48, 43, 50
+ xxsldwi 43, 43, 43, 1
+ vperm 18, 19, 19, 1
+ vrlw 16, 16, 5
+ vperm 19, 13, 31, 6
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 29, 17, 11
+ xxlxor 43, 61, 48
+ vrlw 16, 11, 3
+ xxsel 43, 50, 51, 32
+ xxland 50, 63, 39
+ vperm 19, 14, 13, 12
+ vadduwm 15, 15, 11
+ vperm 18, 18, 18, 8
+ vmrghw 13, 13, 14
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ xxland 0, 51, 41
+ lvx 19, 0, 4
+ vrlw 17, 17, 4
+ xxlor 50, 50, 0
+ vperm 13, 13, 31, 10
+ xxsldwi 63, 62, 62, 3
+ vadduwm 15, 15, 18
+ vperm 19, 11, 31, 19
+ vadduwm 29, 17, 29
+ xxswapd 0, 49
+ vperm 1, 31, 31, 1
+ xxlxor 48, 61, 48
+ xxsldwi 46, 61, 61, 3
+ vperm 6, 13, 18, 6
+ vrlw 16, 16, 5
+ xxsel 32, 33, 38, 32
+ xxland 38, 50, 39
+ vadduwm 15, 16, 15
+ vperm 7, 11, 13, 12
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 13
+ vperm 17, 17, 17, 2
+ vperm 6, 6, 6, 8
+ vadduwm 14, 17, 14
+ xxlxor 48, 46, 48
+ vrlw 16, 16, 3
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 3
+ vrlw 17, 17, 4
+ vadduwm 15, 15, 19
+ vadduwm 14, 17, 14
+ xxswapd 0, 49
+ xxlxor 48, 46, 48
+ xxsldwi 46, 46, 46, 1
+ vrlw 16, 16, 5
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 0
+ vadduwm 0, 15, 0
+ vperm 17, 17, 17, 2
+ xxland 0, 39, 41
+ xxlor 38, 38, 0
+ vadduwm 14, 17, 14
+ xxlxor 48, 46, 48
+ vrlw 16, 16, 3
+ vadduwm 0, 16, 0
+ xxlxor 33, 32, 49
+ xxsldwi 32, 32, 32, 1
+ vrlw 1, 1, 4
+ vadduwm 0, 0, 6
+ vadduwm 8, 1, 14
+ xxswapd 0, 33
+ xxlxor 44, 40, 48
+ xxsldwi 38, 40, 40, 3
+ vrlw 7, 12, 5
+ vadduwm 0, 7, 0
+ xxlxor 33, 32, 0
+ vperm 2, 1, 1, 2
+ vmrghw 1, 13, 11
+ vadduwm 6, 2, 6
+ vperm 1, 1, 18, 10
+ xxlxor 39, 38, 39
+ vrlw 3, 7, 3
+ vadduwm 0, 0, 1
+ vadduwm 0, 3, 0
+ xxlxor 34, 32, 34
+ xxsldwi 0, 32, 32, 3
+ vrlw 2, 2, 4
+ vadduwm 4, 2, 6
+ xxswapd 2, 34
+ xxlxor 35, 36, 35
+ xxsldwi 1, 36, 36, 1
+ vrlw 3, 3, 5
+ xxlxor 0, 1, 0
+ xxswapd 0, 0
+ xxlxor 3, 35, 2
+ stxvd2x 0, 0, 8
+ xxswapd 3, 3
+ stxvd2x 3, 8, 5
+ lfdx 0, 0, 3
+ lfd 3, 8(3)
+ xxmrghd 34, 3, 0
+ xxlxor 0, 1, 34
+ xxswapd 0, 0
+ stxvd2x 0, 8, 10
+ lfd 0, 16(3)
+ lfd 1, 24(3)
+ li 3, -32
+ xxmrghd 34, 1, 0
+ xxlxor 0, 2, 34
+ xxswapd 0, 0
+ stxvd2x 0, 8, 6
+ lxvd2x 63, 1, 3
+ li 3, -48
+ ld 30, -16(1)
+ lxvd2x 62, 1, 3
+ li 3, -64
+ lxvd2x 61, 1, 3
+ li 3, -80
+ lxvd2x 60, 1, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end1:
+ .size zfs_blake3_compress_xof_sse2, .Lfunc_end1-.Lfunc_begin1
+ .cfi_endproc
+
+ .globl zfs_blake3_hash_many_sse2
+ .p2align 2
+ .type zfs_blake3_hash_many_sse2,@function
+zfs_blake3_hash_many_sse2:
+.Lfunc_begin2:
+ .cfi_startproc
+.Lfunc_gep2:
+ addis 2, 12, .TOC.-.Lfunc_gep2@ha
+ addi 2, 2, .TOC.-.Lfunc_gep2@l
+.Lfunc_lep2:
+ .localentry zfs_blake3_hash_many_sse2, .Lfunc_lep2-.Lfunc_gep2
+ mfocrf 12, 32
+ mflr 0
+ std 0, 16(1)
+ stw 12, 8(1)
+ stdu 1, -256(1)
+ .cfi_def_cfa_offset 256
+ .cfi_offset lr, 16
+ .cfi_offset r17, -120
+ .cfi_offset r18, -112
+ .cfi_offset r19, -104
+ .cfi_offset r20, -96
+ .cfi_offset r21, -88
+ .cfi_offset r22, -80
+ .cfi_offset r23, -72
+ .cfi_offset r24, -64
+ .cfi_offset r25, -56
+ .cfi_offset r26, -48
+ .cfi_offset r27, -40
+ .cfi_offset r28, -32
+ .cfi_offset r29, -24
+ .cfi_offset r30, -16
+ .cfi_offset cr2, 8
+ std 26, 208(1)
+ mr 26, 4
+ cmpldi 1, 4, 4
+ andi. 4, 8, 1
+ std 18, 144(1)
+ std 19, 152(1)
+ crmove 8, 1
+ ld 19, 360(1)
+ lwz 18, 352(1)
+ std 24, 192(1)
+ std 25, 200(1)
+ std 27, 216(1)
+ std 28, 224(1)
+ mr 24, 10
+ mr 28, 6
+ mr 27, 5
+ mr 25, 3
+ std 29, 232(1)
+ std 30, 240(1)
+ mr 30, 9
+ mr 29, 7
+ std 17, 136(1)
+ std 20, 160(1)
+ std 21, 168(1)
+ std 22, 176(1)
+ std 23, 184(1)
+ blt 1, .LBB2_3
+ li 3, 0
+ li 4, 1
+ clrldi 23, 30, 32
+ isel 22, 4, 3, 8
+ clrldi 21, 24, 32
+ clrldi 20, 18, 32
+.LBB2_2:
+ mr 3, 25
+ mr 4, 27
+ mr 5, 28
+ mr 6, 29
+ mr 7, 22
+ mr 8, 23
+ mr 9, 21
+ mr 10, 20
+ std 19, 32(1)
+ bl blake3_hash4_sse2
+ addi 26, 26, -4
+ addi 3, 29, 4
+ addi 25, 25, 32
+ addi 19, 19, 128
+ cmpldi 26, 3
+ isel 29, 3, 29, 8
+ bgt 0, .LBB2_2
+.LBB2_3:
+ cmpldi 26, 0
+ beq 0, .LBB2_11
+ li 3, 0
+ li 4, 1
+ or 21, 24, 30
+ li 20, 16
+ addi 24, 1, 96
+ isel 22, 4, 3, 8
+.LBB2_5:
+ lxvd2x 0, 28, 20
+ ld 23, 0(25)
+ mr 17, 27
+ mr 3, 21
+ stxvd2x 0, 24, 20
+ lxvd2x 0, 0, 28
+ stxvd2x 0, 0, 24
+.LBB2_6:
+ cmpldi 17, 1
+ beq 0, .LBB2_8
+ cmpldi 17, 0
+ bne 0, .LBB2_9
+ b .LBB2_10
+.LBB2_8:
+ or 3, 3, 18
+.LBB2_9:
+ clrldi 7, 3, 56
+ mr 3, 24
+ mr 4, 23
+ li 5, 64
+ mr 6, 29
+ bl zfs_blake3_compress_in_place_sse2
+ addi 23, 23, 64
+ addi 17, 17, -1
+ mr 3, 30
+ b .LBB2_6
+.LBB2_10:
+ lxvd2x 0, 24, 20
+ addi 26, 26, -1
+ add 29, 29, 22
+ addi 25, 25, 8
+ cmpldi 26, 0
+ stxvd2x 0, 19, 20
+ lxvd2x 0, 0, 24
+ stxvd2x 0, 0, 19
+ addi 19, 19, 32
+ bne 0, .LBB2_5
+.LBB2_11:
+ ld 30, 240(1)
+ ld 29, 232(1)
+ ld 28, 224(1)
+ ld 27, 216(1)
+ ld 26, 208(1)
+ ld 25, 200(1)
+ ld 24, 192(1)
+ ld 23, 184(1)
+ ld 22, 176(1)
+ ld 21, 168(1)
+ ld 20, 160(1)
+ ld 19, 152(1)
+ ld 18, 144(1)
+ ld 17, 136(1)
+ addi 1, 1, 256
+ ld 0, 16(1)
+ lwz 12, 8(1)
+ mtocrf 32, 12
+ mtlr 0
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end2:
+ .size zfs_blake3_hash_many_sse2, .Lfunc_end2-.Lfunc_begin2
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI3_0:
+ .quad 4294967296
+ .quad 12884901890
+.LCPI3_1:
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 26
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+.LCPI3_2:
+ .long 1779033703
+ .long 1779033703
+ .long 1779033703
+ .long 1779033703
+.LCPI3_3:
+ .long 3144134277
+ .long 3144134277
+ .long 3144134277
+ .long 3144134277
+.LCPI3_4:
+ .long 1013904242
+ .long 1013904242
+ .long 1013904242
+ .long 1013904242
+.LCPI3_5:
+ .long 2773480762
+ .long 2773480762
+ .long 2773480762
+ .long 2773480762
+ .text
+ .p2align 2
+ .type blake3_hash4_sse2,@function
+blake3_hash4_sse2:
+.Lfunc_begin3:
+ .cfi_startproc
+.Lfunc_gep3:
+ addis 2, 12, .TOC.-.Lfunc_gep3@ha
+ addi 2, 2, .TOC.-.Lfunc_gep3@l
+.Lfunc_lep3:
+ .localentry blake3_hash4_sse2, .Lfunc_lep3-.Lfunc_gep3
+ stdu 1, -400(1)
+ .cfi_def_cfa_offset 400
+ .cfi_offset r22, -152
+ .cfi_offset r23, -144
+ .cfi_offset r24, -136
+ .cfi_offset r25, -128
+ .cfi_offset r26, -120
+ .cfi_offset r27, -112
+ .cfi_offset r28, -104
+ .cfi_offset r29, -96
+ .cfi_offset r30, -88
+ .cfi_offset f23, -72
+ .cfi_offset f24, -64
+ .cfi_offset f25, -56
+ .cfi_offset f26, -48
+ .cfi_offset f27, -40
+ .cfi_offset f28, -32
+ .cfi_offset f29, -24
+ .cfi_offset f30, -16
+ .cfi_offset f31, -8
+ .cfi_offset v20, -352
+ .cfi_offset v21, -336
+ .cfi_offset v22, -320
+ .cfi_offset v23, -304
+ .cfi_offset v24, -288
+ .cfi_offset v25, -272
+ .cfi_offset v26, -256
+ .cfi_offset v27, -240
+ .cfi_offset v28, -224
+ .cfi_offset v29, -208
+ .cfi_offset v30, -192
+ .cfi_offset v31, -176
+ li 11, 48
+ li 0, 8
+ std 30, 312(1)
+ li 30, 12
+ li 12, 4
+ lfiwzx 0, 0, 5
+ stxvd2x 52, 1, 11
+ li 11, 64
+ lfiwzx 2, 5, 0
+ li 0, 20
+ lfiwzx 3, 5, 30
+ stxvd2x 53, 1, 11
+ li 11, 80
+ li 30, 24
+ lfiwzx 4, 5, 0
+ li 0, 28
+ stxvd2x 54, 1, 11
+ li 11, 96
+ lfiwzx 1, 5, 12
+ lfiwzx 6, 5, 30
+ xxspltw 45, 0, 1
+ cmpldi 4, 0
+ std 22, 248(1)
+ stxvd2x 55, 1, 11
+ li 11, 112
+ lfiwzx 7, 5, 0
+ xxspltw 40, 2, 1
+ std 23, 256(1)
+ xxspltw 38, 3, 1
+ xxspltw 50, 4, 1
+ std 24, 264(1)
+ std 25, 272(1)
+ std 26, 280(1)
+ xxspltw 54, 7, 1
+ std 27, 288(1)
+ std 28, 296(1)
+ std 29, 304(1)
+ stxvd2x 56, 1, 11
+ li 11, 128
+ stfd 23, 328(1)
+ stxvd2x 57, 1, 11
+ li 11, 144
+ stfd 24, 336(1)
+ stxvd2x 58, 1, 11
+ li 11, 160
+ stfd 25, 344(1)
+ stxvd2x 59, 1, 11
+ li 11, 176
+ xxspltw 59, 1, 1
+ stxvd2x 60, 1, 11
+ li 11, 192
+ stfd 26, 352(1)
+ stxvd2x 61, 1, 11
+ li 11, 208
+ stfd 27, 360(1)
+ stxvd2x 62, 1, 11
+ li 11, 224
+ xxspltw 62, 6, 1
+ stxvd2x 63, 1, 11
+ li 11, 16
+ stfd 28, 368(1)
+ lfiwzx 5, 5, 11
+ ld 5, 432(1)
+ stfd 29, 376(1)
+ stfd 30, 384(1)
+ stfd 31, 392(1)
+ xxspltw 61, 5, 1
+ beq 0, .LBB3_5
+ addis 30, 2, .LCPI3_0@toc@ha
+ neg 7, 7
+ xxleqv 34, 34, 34
+ addis 28, 2, .LCPI3_2@toc@ha
+ addis 27, 2, .LCPI3_3@toc@ha
+ addis 26, 2, .LCPI3_4@toc@ha
+ addis 25, 2, .LCPI3_5@toc@ha
+ ld 29, 24(3)
+ addi 0, 30, .LCPI3_0@toc@l
+ mtfprwz 1, 7
+ addis 7, 2, .LCPI3_1@toc@ha
+ ld 30, 16(3)
+ lxvd2x 0, 0, 0
+ mtfprwz 2, 6
+ rldicl 6, 6, 32, 32
+ addi 0, 7, .LCPI3_1@toc@l
+ ld 7, 8(3)
+ vslw 2, 2, 2
+ lvx 5, 0, 0
+ addi 0, 28, .LCPI3_2@toc@l
+ addi 28, 27, .LCPI3_3@toc@l
+ addi 27, 26, .LCPI3_4@toc@l
+ addi 26, 25, .LCPI3_5@toc@l
+ or 25, 9, 8
+ li 9, 0
+ xxspltw 36, 2, 1
+ xxswapd 35, 0
+ xxspltw 0, 1, 1
+ xxland 35, 0, 35
+ mtfprwz 0, 6
+ ld 6, 0(3)
+ addi 3, 3, -8
+ vadduwm 4, 3, 4
+ xxlor 35, 35, 34
+ xxlxor 34, 36, 34
+ xxlor 9, 36, 36
+ vspltisw 4, 4
+ vcmpgtsw 2, 3, 2
+ xxspltw 35, 0, 1
+ xxlor 10, 36, 36
+ vsubuwm 2, 3, 2
+ xxlor 11, 34, 34
+ lvx 2, 0, 0
+ li 0, 32
+ xxlor 12, 34, 34
+ lvx 2, 0, 28
+ li 28, 48
+ xxlor 13, 34, 34
+ lvx 2, 0, 27
+ li 27, 0
+ xxlor 31, 34, 34
+ lvx 2, 0, 26
+ xxlor 30, 34, 34
+.LBB3_2:
+ mr 26, 27
+ addi 27, 27, 1
+ xxlor 28, 40, 40
+ cmpld 27, 4
+ sldi 26, 26, 6
+ xxlor 24, 45, 45
+ iseleq 24, 10, 9
+ add 23, 6, 26
+ add 22, 30, 26
+ lxvd2x 0, 6, 26
+ lxvd2x 1, 7, 26
+ or 25, 24, 25
+ add 24, 7, 26
+ lxvd2x 2, 30, 26
+ lxvd2x 3, 29, 26
+ xxlor 29, 38, 38
+ lxvd2x 4, 23, 11
+ lxvd2x 6, 24, 11
+ clrlwi 25, 25, 24
+ lxvd2x 7, 22, 11
+ lxvd2x 8, 23, 0
+ mtfprd 5, 25
+ add 25, 29, 26
+ xxswapd 34, 0
+ lxvd2x 0, 25, 11
+ xxswapd 36, 1
+ xxswapd 33, 2
+ lxvd2x 1, 24, 0
+ lxvd2x 2, 22, 0
+ xxswapd 39, 3
+ xxswapd 32, 4
+ lxvd2x 3, 25, 0
+ lxvd2x 4, 23, 28
+ xxswapd 49, 6
+ xxswapd 51, 7
+ lxvd2x 6, 24, 28
+ xxswapd 58, 8
+ lxvd2x 7, 22, 28
+ lxvd2x 8, 25, 28
+ xxswapd 60, 0
+ mr 25, 3
+ xxswapd 57, 1
+ xxswapd 53, 2
+ xxswapd 52, 3
+ xxswapd 56, 4
+ xxswapd 55, 6
+ xxswapd 0, 5
+ xxswapd 40, 7
+ xxswapd 41, 8
+ mtctr 12
+.LBB3_3:
+ ldu 24, 8(25)
+ add 24, 24, 26
+ addi 24, 24, 256
+ dcbt 0, 24
+ bdnz .LBB3_3
+ vmrgew 3, 4, 2
+ vspltisw 31, 9
+ mr 25, 8
+ vmrglw 10, 4, 2
+ vspltisw 14, 10
+ vmrghw 6, 4, 2
+ xxspltw 0, 0, 3
+ vmrgew 4, 17, 0
+ vmrglw 11, 17, 0
+ vmrghw 16, 17, 0
+ vmrgew 0, 25, 26
+ vmrgew 13, 7, 1
+ vmrglw 2, 7, 1
+ vmrghw 7, 7, 1
+ xxlor 25, 36, 36
+ vmrgew 4, 28, 19
+ xxlor 26, 32, 32
+ vmrglw 0, 25, 26
+ vmrglw 1, 28, 19
+ xxmrgld 47, 34, 42
+ xxlor 44, 28, 28
+ vmrghw 25, 25, 26
+ xxlor 23, 36, 36
+ vmrghw 4, 28, 19
+ vspltisw 19, -16
+ xxlor 5, 32, 32
+ vmrgew 0, 20, 21
+ xxmrgld 34, 33, 43
+ vmrglw 28, 20, 21
+ vmrghw 21, 20, 21
+ vmrglw 20, 23, 24
+ vmrghw 26, 23, 24
+ vmrglw 17, 9, 8
+ xxlor 8, 32, 32
+ vmrgew 0, 23, 24
+ xxmrgld 56, 39, 38
+ vmrgew 23, 9, 8
+ xxlor 33, 24, 24
+ xxlor 2, 34, 34
+ vadduwm 11, 15, 1
+ xxmrgld 33, 36, 48
+ xxlor 6, 47, 47
+ xxlor 27, 32, 32
+ vmrghw 0, 9, 8
+ vspltisw 9, 12
+ vsubuwm 8, 31, 19
+ xxmrgld 51, 23, 25
+ vadduwm 31, 2, 12
+ xxlor 34, 10, 10
+ vadduwm 10, 14, 14
+ vslw 15, 2, 2
+ xxlor 34, 29, 29
+ vadduwm 14, 24, 27
+ xxlor 24, 48, 48
+ vadduwm 16, 1, 2
+ xxmrgld 34, 45, 35
+ vadduwm 31, 31, 30
+ xxmrghd 36, 36, 24
+ vadduwm 11, 11, 29
+ vadduwm 14, 14, 18
+ vadduwm 13, 16, 22
+ xxlxor 47, 63, 47
+ xxlor 1, 9, 9
+ xxlor 1, 11, 11
+ xxlxor 48, 43, 9
+ vadduwm 11, 11, 2
+ xxlor 7, 34, 34
+ xxmrghd 34, 39, 38
+ xxlxor 39, 46, 11
+ xxlor 1, 50, 50
+ xxlxor 50, 45, 0
+ vperm 15, 15, 15, 5
+ vperm 16, 16, 16, 5
+ vperm 7, 7, 7, 5
+ vperm 18, 18, 18, 5
+ xxlor 4, 33, 33
+ xxlor 33, 31, 31
+ vadduwm 14, 14, 2
+ xxlor 3, 34, 34
+ xxlor 34, 12, 12
+ xxlor 35, 13, 13
+ vadduwm 6, 15, 1
+ xxlor 33, 30, 30
+ vadduwm 2, 16, 2
+ vadduwm 3, 7, 3
+ vadduwm 12, 18, 1
+ xxlxor 59, 34, 61
+ xxlxor 61, 35, 1
+ xxlxor 33, 38, 62
+ xxlxor 62, 44, 54
+ vrlw 22, 27, 10
+ vrlw 29, 29, 10
+ vrlw 1, 1, 10
+ vrlw 30, 30, 10
+ vadduwm 31, 31, 19
+ vadduwm 13, 13, 4
+ vadduwm 11, 22, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 1, 31
+ vadduwm 13, 30, 13
+ vadduwm 9, 9, 9
+ xxlor 1, 36, 36
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 39
+ xxmrgld 39, 60, 5
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vrlw 16, 16, 9
+ vrlw 28, 4, 9
+ xxmrgld 36, 53, 57
+ vrlw 15, 15, 9
+ xxmrghd 57, 53, 57
+ vrlw 18, 18, 9
+ vadduwm 14, 14, 4
+ xxlor 0, 36, 36
+ xxmrgld 36, 49, 52
+ vadduwm 2, 16, 2
+ xxmrgld 49, 8, 26
+ vadduwm 3, 28, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 54, 34, 54
+ xxlxor 61, 35, 61
+ xxlxor 33, 38, 33
+ xxlxor 62, 44, 62
+ vrlw 29, 29, 8
+ vrlw 20, 1, 8
+ xxmrgld 33, 55, 27
+ vrlw 30, 30, 8
+ vrlw 22, 22, 8
+ vadduwm 11, 11, 7
+ xxlor 5, 39, 39
+ xxmrgld 39, 32, 58
+ vadduwm 31, 31, 4
+ vadduwm 11, 29, 11
+ vadduwm 13, 13, 7
+ vadduwm 14, 20, 14
+ vadduwm 31, 30, 31
+ vadduwm 13, 22, 13
+ xxlor 28, 36, 36
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 60
+ xxlxor 47, 45, 47
+ vperm 18, 18, 18, 5
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vadduwm 11, 11, 17
+ vmr 28, 17
+ xxmrghd 49, 32, 58
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 21, 4, 2
+ vadduwm 3, 15, 3
+ xxlxor 34, 38, 61
+ xxlxor 61, 44, 52
+ xxlxor 62, 53, 62
+ xxlxor 54, 35, 54
+ vrlw 20, 2, 10
+ vrlw 29, 29, 10
+ vrlw 0, 30, 10
+ vrlw 30, 22, 10
+ vadduwm 14, 14, 25
+ vadduwm 31, 31, 1
+ vadduwm 13, 13, 17
+ vadduwm 11, 20, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vrlw 18, 18, 9
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vadduwm 11, 11, 24
+ xxlor 8, 56, 56
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 21
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 52
+ xxlxor 61, 44, 61
+ xxlxor 62, 35, 62
+ xxlxor 32, 56, 32
+ vrlw 30, 30, 8
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ xxlor 25, 51, 51
+ vmr 26, 17
+ xxlor 49, 3, 3
+ xxlor 52, 1, 1
+ xxlor 51, 2, 2
+ vadduwm 14, 14, 17
+ vadduwm 31, 31, 20
+ vadduwm 13, 13, 19
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vperm 18, 18, 18, 5
+ xxlor 29, 39, 39
+ xxlor 59, 4, 4
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 30, 30, 10
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ xxlor 53, 0, 0
+ xxlor 39, 6, 6
+ vadduwm 11, 11, 27
+ vadduwm 14, 14, 21
+ vadduwm 31, 31, 7
+ vadduwm 13, 13, 1
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vrlw 18, 18, 9
+ xxlor 34, 7, 7
+ vadduwm 31, 31, 28
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vrlw 30, 30, 8
+ vadduwm 11, 11, 2
+ xxlor 34, 28, 28
+ vadduwm 13, 13, 26
+ vadduwm 14, 14, 2
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vperm 18, 18, 18, 5
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ xxlor 2, 58, 58
+ xxlor 39, 25, 25
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 32, 56, 32
+ xxlxor 62, 35, 62
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ vrlw 30, 30, 10
+ xxlor 54, 29, 29
+ xxlor 58, 5, 5
+ vadduwm 11, 11, 25
+ vadduwm 14, 14, 7
+ vadduwm 31, 31, 22
+ vadduwm 13, 13, 26
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vrlw 18, 18, 9
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vadduwm 11, 11, 17
+ vadduwm 14, 14, 21
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 62, 35, 62
+ xxlxor 32, 56, 32
+ vrlw 30, 30, 8
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vadduwm 31, 31, 1
+ vadduwm 13, 13, 20
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vperm 18, 18, 18, 5
+ xxlor 0, 33, 33
+ xxlor 33, 8, 8
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 30, 30, 10
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ vadduwm 11, 11, 19
+ vadduwm 14, 14, 2
+ vadduwm 31, 31, 1
+ vadduwm 13, 13, 22
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vrlw 18, 18, 9
+ vadduwm 11, 11, 27
+ vadduwm 14, 14, 28
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vrlw 30, 30, 8
+ vadduwm 31, 31, 25
+ vadduwm 13, 13, 26
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vperm 18, 18, 18, 5
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ xxlor 3, 7, 7
+ vadduwm 11, 11, 7
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 32, 56, 32
+ xxlxor 62, 35, 62
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ vrlw 30, 30, 10
+ xxlor 33, 6, 6
+ xxlor 58, 2, 2
+ xxlor 39, 3, 3
+ vadduwm 14, 14, 1
+ vadduwm 31, 31, 26
+ vadduwm 13, 13, 7
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vrlw 18, 18, 9
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ xxlor 52, 0, 0
+ vadduwm 11, 11, 21
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 62, 35, 62
+ xxlxor 32, 56, 32
+ vrlw 30, 30, 8
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vadduwm 14, 14, 2
+ vadduwm 31, 31, 22
+ vadduwm 13, 13, 20
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vperm 18, 18, 18, 5
+ xxlor 7, 49, 49
+ vmr 17, 2
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 30, 30, 10
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ xxlor 54, 1, 1
+ xxlor 34, 7, 7
+ vadduwm 11, 11, 22
+ vadduwm 14, 14, 28
+ vadduwm 31, 31, 2
+ vadduwm 13, 13, 26
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vrlw 18, 18, 9
+ xxlor 59, 25, 25
+ vadduwm 11, 11, 19
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vrlw 30, 30, 8
+ vadduwm 14, 14, 25
+ vadduwm 31, 31, 27
+ vadduwm 13, 13, 7
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vperm 18, 18, 18, 5
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vmr 2, 19
+ xxlor 0, 7, 7
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 32, 56, 32
+ xxlxor 62, 35, 62
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ vrlw 30, 30, 10
+ xxlor 1, 51, 51
+ xxlor 7, 39, 39
+ xxlor 51, 8, 8
+ xxlor 39, 5, 5
+ xxlor 34, 4, 4
+ vadduwm 11, 11, 1
+ vadduwm 14, 14, 19
+ vadduwm 31, 31, 7
+ vadduwm 13, 13, 2
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vrlw 18, 18, 9
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ xxlor 2, 53, 53
+ vmr 21, 28
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 62, 35, 62
+ xxlxor 32, 56, 32
+ vrlw 30, 30, 8
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ xxlor 53, 29, 29
+ vadduwm 11, 11, 17
+ vadduwm 14, 14, 28
+ vadduwm 31, 31, 26
+ vadduwm 13, 13, 21
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vperm 18, 18, 18, 5
+ vadduwm 11, 11, 20
+ xxlor 5, 52, 52
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 30, 30, 10
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ xxlor 52, 2, 2
+ vadduwm 14, 14, 25
+ vadduwm 31, 31, 20
+ vadduwm 13, 13, 7
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vrlw 18, 18, 9
+ vadduwm 11, 11, 22
+ vadduwm 14, 14, 27
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vrlw 30, 30, 8
+ vadduwm 31, 31, 1
+ vadduwm 13, 13, 2
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vperm 18, 18, 18, 5
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ xxlor 3, 29, 29
+ xxlor 4, 49, 49
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 32, 56, 32
+ xxlxor 62, 35, 62
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ vrlw 30, 30, 10
+ vmr 17, 28
+ xxlor 2, 54, 54
+ xxlor 3, 34, 34
+ xxlor 34, 8, 8
+ xxlor 51, 0, 0
+ xxlor 60, 7, 7
+ xxlor 54, 1, 1
+ vadduwm 11, 11, 2
+ vadduwm 14, 14, 19
+ vadduwm 31, 31, 28
+ vadduwm 13, 13, 22
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vrlw 18, 18, 9
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vadduwm 11, 11, 17
+ vadduwm 14, 14, 25
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 62, 35, 62
+ xxlxor 32, 56, 32
+ vrlw 30, 30, 8
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vadduwm 31, 31, 7
+ vadduwm 13, 13, 26
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vperm 18, 18, 18, 5
+ xxlor 6, 39, 39
+ xxlor 39, 4, 4
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 30, 30, 10
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ vadduwm 11, 11, 21
+ vadduwm 14, 14, 27
+ vadduwm 31, 31, 7
+ vadduwm 13, 13, 28
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vrlw 18, 18, 9
+ xxlor 0, 49, 49
+ xxlor 49, 5, 5
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vrlw 30, 30, 8
+ vadduwm 11, 11, 17
+ vadduwm 14, 14, 1
+ vadduwm 31, 31, 2
+ vadduwm 13, 13, 22
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vperm 18, 18, 18, 5
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ xxlor 34, 3, 3
+ xxlor 49, 2, 2
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 32, 56, 32
+ xxlxor 62, 35, 62
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ vrlw 30, 30, 10
+ vadduwm 11, 11, 19
+ vadduwm 14, 14, 20
+ vadduwm 31, 31, 2
+ vadduwm 13, 13, 17
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vrlw 18, 18, 9
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vadduwm 14, 14, 27
+ vadduwm 11, 11, 25
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 27, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 57, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 62, 35, 62
+ xxlxor 32, 59, 32
+ xxlor 39, 7, 7
+ vrlw 30, 30, 8
+ vrlw 25, 25, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ xxlor 1, 58, 58
+ vmr 26, 19
+ vadduwm 19, 31, 7
+ xxlor 39, 6, 6
+ vadduwm 11, 30, 11
+ vadduwm 7, 13, 7
+ vadduwm 13, 25, 14
+ vadduwm 14, 29, 19
+ vadduwm 7, 0, 7
+ xxlxor 48, 43, 48
+ xxlxor 36, 45, 36
+ xxlxor 47, 46, 47
+ xxlxor 50, 39, 50
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vperm 18, 18, 18, 5
+ xxlor 51, 1, 1
+ vadduwm 13, 13, 1
+ vadduwm 11, 11, 19
+ vadduwm 19, 16, 27
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 63, 51, 62
+ xxlxor 62, 35, 57
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 31, 31, 10
+ vrlw 30, 30, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ xxlor 33, 0, 0
+ vadduwm 7, 7, 2
+ vadduwm 14, 14, 1
+ vadduwm 11, 31, 11
+ vadduwm 13, 30, 13
+ vadduwm 14, 29, 14
+ vadduwm 7, 0, 7
+ xxlxor 48, 43, 48
+ xxlxor 36, 45, 36
+ xxlxor 47, 46, 47
+ xxlxor 50, 39, 50
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vrlw 18, 18, 9
+ xxlor 60, 8, 8
+ vadduwm 1, 11, 21
+ vadduwm 11, 13, 28
+ vadduwm 13, 16, 19
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 51, 45, 63
+ xxlxor 63, 35, 62
+ xxlxor 62, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 31, 31, 8
+ vrlw 30, 30, 8
+ vrlw 0, 0, 8
+ vrlw 19, 19, 8
+ vadduwm 14, 14, 26
+ vadduwm 7, 7, 17
+ vadduwm 1, 31, 1
+ vadduwm 11, 30, 11
+ vadduwm 14, 0, 14
+ vadduwm 7, 19, 7
+ xxlxor 50, 33, 50
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 39, 47
+ vperm 18, 18, 18, 5
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ xxlor 34, 4, 4
+ vadduwm 14, 14, 22
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 13, 4, 13
+ vadduwm 3, 15, 3
+ xxlxor 49, 38, 63
+ xxlxor 63, 44, 62
+ xxlxor 32, 45, 32
+ xxlxor 51, 35, 51
+ vrlw 17, 17, 10
+ vrlw 31, 31, 10
+ vrlw 0, 0, 10
+ vrlw 10, 19, 10
+ vadduwm 11, 11, 2
+ xxlor 34, 5, 5
+ vadduwm 1, 1, 20
+ vadduwm 2, 7, 2
+ vadduwm 7, 31, 11
+ vadduwm 11, 0, 14
+ vadduwm 2, 10, 2
+ vadduwm 1, 17, 1
+ xxlxor 36, 43, 36
+ xxlxor 46, 34, 47
+ vrlw 4, 4, 9
+ vrlw 14, 14, 9
+ xxlxor 47, 33, 50
+ xxlxor 48, 39, 48
+ vrlw 15, 15, 9
+ vrlw 9, 16, 9
+ vadduwm 13, 4, 13
+ vadduwm 3, 14, 3
+ xxlxor 32, 45, 32
+ xxlxor 45, 45, 33
+ xxlxor 33, 35, 42
+ xxlxor 59, 35, 39
+ vadduwm 3, 15, 6
+ vadduwm 6, 9, 12
+ xxlxor 39, 35, 49
+ xxlxor 42, 38, 63
+ vrlw 1, 1, 8
+ vrlw 7, 7, 8
+ vrlw 10, 10, 8
+ vrlw 0, 0, 8
+ xxlxor 40, 35, 43
+ xxlxor 38, 38, 34
+ xxlxor 61, 33, 41
+ xxlxor 50, 39, 36
+ xxlxor 62, 42, 46
+ xxlxor 54, 32, 47
+ bne 0, .LBB3_2
+.LBB3_5:
+ vmrglw 2, 27, 13
+ li 3, 32
+ li 4, 48
+ vmrglw 4, 6, 8
+ vmrglw 0, 18, 29
+ vmrglw 1, 22, 30
+ vmrghw 3, 27, 13
+ vmrghw 5, 6, 8
+ vmrghw 6, 18, 29
+ vmrghw 7, 22, 30
+ xxmrgld 40, 36, 34
+ xxmrghd 34, 36, 34
+ xxmrgld 41, 33, 32
+ xxswapd 0, 40
+ xxmrgld 36, 37, 35
+ xxmrghd 35, 37, 35
+ xxmrghd 37, 33, 32
+ xxswapd 1, 41
+ xxmrgld 32, 39, 38
+ xxmrghd 33, 39, 38
+ xxswapd 2, 34
+ xxswapd 4, 36
+ xxswapd 3, 37
+ stxvd2x 0, 0, 5
+ xxswapd 5, 32
+ stxvd2x 1, 5, 11
+ xxswapd 0, 35
+ xxswapd 1, 33
+ stxvd2x 2, 5, 3
+ li 3, 64
+ stxvd2x 3, 5, 4
+ li 4, 80
+ stxvd2x 4, 5, 3
+ li 3, 96
+ stxvd2x 5, 5, 4
+ li 4, 112
+ stxvd2x 0, 5, 3
+ stxvd2x 1, 5, 4
+ li 3, 224
+ lxvd2x 63, 1, 3
+ li 3, 208
+ lfd 31, 392(1)
+ ld 30, 312(1)
+ ld 29, 304(1)
+ lxvd2x 62, 1, 3
+ li 3, 192
+ lfd 30, 384(1)
+ ld 28, 296(1)
+ ld 27, 288(1)
+ lxvd2x 61, 1, 3
+ li 3, 176
+ lfd 29, 376(1)
+ ld 26, 280(1)
+ ld 25, 272(1)
+ lxvd2x 60, 1, 3
+ li 3, 160
+ lfd 28, 368(1)
+ ld 24, 264(1)
+ ld 23, 256(1)
+ lxvd2x 59, 1, 3
+ li 3, 144
+ lfd 27, 360(1)
+ ld 22, 248(1)
+ lxvd2x 58, 1, 3
+ li 3, 128
+ lfd 26, 352(1)
+ lxvd2x 57, 1, 3
+ li 3, 112
+ lfd 25, 344(1)
+ lxvd2x 56, 1, 3
+ li 3, 96
+ lfd 24, 336(1)
+ lxvd2x 55, 1, 3
+ li 3, 80
+ lfd 23, 328(1)
+ lxvd2x 54, 1, 3
+ li 3, 64
+ lxvd2x 53, 1, 3
+ li 3, 48
+ lxvd2x 52, 1, 3
+ addi 1, 1, 400
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end3:
+ .size blake3_hash4_sse2, .Lfunc_end3-.Lfunc_begin3
+ .cfi_endproc
+ .section ".note.GNU-stack","",@progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S b/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S
new file mode 100644
index 000000000000..a8b2627f12b0
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S
@@ -0,0 +1,3064 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2022 Samuel Neves
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ *
+ * This is converted assembly: SSE4.1 -> POWER8 PPC64 Little Endian
+ * Used tools: SIMDe https://github.com/simd-everywhere/simde
+ */
+
+#if (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+ .text
+ .abiversion 2
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI0_0:
+ .byte 31
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 30
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 29
+ .byte 6
+ .byte 5
+ .byte 4
+ .byte 28
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_1:
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 5
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 9
+ .byte 14
+ .byte 15
+ .byte 12
+ .byte 13
+.LCPI0_2:
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 26
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+.LCPI0_3:
+ .long 1779033703
+ .long 3144134277
+ .long 1013904242
+ .long 2773480762
+.LCPI0_4:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_5:
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 12
+.LCPI0_6:
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 19
+.LCPI0_7:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI0_8:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_9:
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_10:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+.LCPI0_11:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_12:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI0_13:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 15
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+.LCPI0_14:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .text
+ .globl zfs_blake3_compress_in_place_sse41
+ .p2align 2
+ .type zfs_blake3_compress_in_place_sse41,@function
+zfs_blake3_compress_in_place_sse41:
+.Lfunc_begin0:
+ .cfi_startproc
+.Lfunc_gep0:
+ addis 2, 12, .TOC.-.Lfunc_gep0@ha
+ addi 2, 2, .TOC.-.Lfunc_gep0@l
+.Lfunc_lep0:
+ .localentry zfs_blake3_compress_in_place_sse41, .Lfunc_lep0-.Lfunc_gep0
+ li 8, -64
+ mtvsrd 34, 5
+ li 5, 16
+ lfdx 0, 0, 4
+ vspltisw 13, -16
+ stxvd2x 60, 1, 8
+ li 8, -48
+ mtvsrd 35, 7
+ lfd 2, 16(4)
+ lfd 3, 24(4)
+ addis 7, 2, .LCPI0_0@toc@ha
+ stxvd2x 61, 1, 8
+ li 8, -32
+ mtvsrwz 36, 6
+ rldicl 6, 6, 32, 32
+ stxvd2x 62, 1, 8
+ li 8, -16
+ vmrghb 2, 3, 2
+ stxvd2x 63, 1, 8
+ mtvsrwz 35, 6
+ addi 6, 7, .LCPI0_0@toc@l
+ addis 7, 2, .LCPI0_2@toc@ha
+ lfd 1, 8(4)
+ xxmrghd 32, 3, 2
+ lvx 6, 0, 6
+ xxlxor 33, 33, 33
+ addis 6, 2, .LCPI0_1@toc@ha
+ addi 7, 7, .LCPI0_2@toc@l
+ vmrghw 3, 3, 4
+ addi 6, 6, .LCPI0_1@toc@l
+ vspltisw 14, 9
+ xxmrghd 37, 1, 0
+ lxvd2x 0, 0, 3
+ lxvd2x 1, 3, 5
+ vperm 2, 1, 2, 6
+ vpkudum 9, 0, 5
+ xxswapd 36, 0
+ xxswapd 38, 1
+ xxmrgld 34, 34, 35
+ lvx 3, 0, 7
+ addis 7, 2, .LCPI0_4@toc@ha
+ addi 7, 7, .LCPI0_4@toc@l
+ vadduwm 4, 9, 4
+ lvx 11, 0, 7
+ addis 7, 2, .LCPI0_6@toc@ha
+ addi 7, 7, .LCPI0_6@toc@l
+ vadduwm 7, 4, 6
+ lvx 4, 0, 6
+ addis 6, 2, .LCPI0_3@toc@ha
+ addi 6, 6, .LCPI0_3@toc@l
+ vperm 11, 0, 5, 11
+ lvx 0, 0, 7
+ li 7, 48
+ xxlxor 40, 39, 34
+ lvx 10, 0, 6
+ addis 6, 2, .LCPI0_5@toc@ha
+ lxvd2x 1, 4, 7
+ vcmpgtsb 2, 1, 4
+ addi 6, 6, .LCPI0_5@toc@l
+ vperm 4, 8, 8, 3
+ vspltisw 8, 10
+ xxlandc 44, 36, 34
+ vadduwm 4, 8, 8
+ vadduwm 8, 12, 10
+ xxlxor 37, 40, 38
+ vrlw 6, 5, 4
+ vadduwm 5, 7, 11
+ vadduwm 7, 6, 5
+ lvx 5, 0, 6
+ li 6, 32
+ lxvd2x 0, 4, 6
+ addis 4, 2, .LCPI0_7@toc@ha
+ addis 6, 2, .LCPI0_9@toc@ha
+ xxlxor 42, 39, 44
+ xxswapd 44, 1
+ addi 4, 4, .LCPI0_7@toc@l
+ addi 6, 6, .LCPI0_9@toc@l
+ vcmpgtsb 5, 1, 5
+ vperm 1, 10, 10, 0
+ xxswapd 42, 0
+ vpkudum 16, 12, 10
+ xxlandc 47, 33, 37
+ vsubuwm 1, 14, 13
+ lvx 14, 0, 4
+ addis 4, 2, .LCPI0_8@toc@ha
+ vadduwm 8, 15, 8
+ xxswapd 45, 47
+ addi 4, 4, .LCPI0_8@toc@l
+ vadduwm 7, 7, 16
+ xxsldwi 48, 48, 48, 1
+ xxlxor 38, 40, 38
+ xxsldwi 40, 40, 40, 3
+ xxsldwi 39, 39, 39, 1
+ vperm 14, 10, 12, 14
+ vrlw 6, 6, 1
+ vadduwm 7, 6, 7
+ xxlxor 45, 39, 45
+ vperm 13, 13, 13, 3
+ xxlandc 45, 45, 34
+ vadduwm 8, 13, 8
+ xxlxor 38, 40, 38
+ vrlw 10, 6, 4
+ vadduwm 6, 7, 14
+ vadduwm 7, 10, 6
+ xxlxor 38, 39, 45
+ vperm 12, 6, 6, 0
+ lvx 6, 0, 4
+ addis 4, 2, .LCPI0_10@toc@ha
+ addi 4, 4, .LCPI0_10@toc@l
+ vperm 13, 11, 9, 6
+ xxlandc 44, 44, 37
+ vadduwm 15, 12, 8
+ vadduwm 7, 7, 13
+ xxsldwi 45, 45, 45, 3
+ xxlxor 40, 47, 42
+ xxsldwi 47, 47, 47, 1
+ xxsldwi 39, 39, 39, 3
+ vrlw 10, 8, 1
+ xxswapd 40, 44
+ vadduwm 17, 10, 7
+ lvx 7, 0, 4
+ addis 4, 2, .LCPI0_11@toc@ha
+ addi 4, 4, .LCPI0_11@toc@l
+ xxlxor 44, 49, 40
+ lvx 8, 0, 6
+ vperm 18, 9, 9, 7
+ lvx 9, 0, 4
+ addis 4, 2, .LCPI0_12@toc@ha
+ vperm 12, 12, 12, 3
+ addi 4, 4, .LCPI0_12@toc@l
+ vperm 19, 14, 16, 8
+ xxlandc 63, 44, 34
+ vperm 12, 19, 18, 9
+ vadduwm 15, 31, 15
+ xxlxor 42, 47, 42
+ vrlw 18, 10, 4
+ vadduwm 10, 17, 12
+ vadduwm 17, 18, 10
+ xxlxor 42, 49, 63
+ xxmrgld 63, 43, 46
+ xxsldwi 49, 49, 49, 1
+ vmrghw 14, 14, 11
+ vperm 19, 10, 10, 0
+ lvx 10, 0, 4
+ addis 4, 2, .LCPI0_13@toc@ha
+ addi 4, 4, .LCPI0_13@toc@l
+ lvx 11, 0, 4
+ addis 4, 2, .LCPI0_14@toc@ha
+ vperm 31, 16, 31, 10
+ addi 4, 4, .LCPI0_14@toc@l
+ vperm 14, 14, 16, 11
+ xxlandc 51, 51, 37
+ vadduwm 15, 19, 15
+ xxswapd 51, 51
+ vadduwm 17, 17, 31
+ xxlxor 50, 47, 50
+ xxsldwi 47, 47, 47, 3
+ vperm 30, 14, 31, 8
+ vrlw 18, 18, 1
+ vadduwm 17, 18, 17
+ xxlxor 51, 49, 51
+ vadduwm 17, 17, 14
+ vperm 19, 19, 19, 3
+ xxlandc 51, 51, 34
+ vadduwm 15, 19, 15
+ xxlxor 48, 47, 50
+ vrlw 16, 16, 4
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 51
+ vperm 19, 12, 13, 6
+ vperm 18, 18, 18, 0
+ vperm 13, 13, 13, 7
+ vadduwm 17, 17, 19
+ xxlandc 50, 50, 37
+ xxsldwi 49, 49, 49, 3
+ vperm 13, 30, 13, 9
+ vadduwm 15, 18, 15
+ xxswapd 50, 50
+ xxmrgld 62, 44, 46
+ vmrghw 12, 14, 12
+ xxlxor 48, 47, 48
+ xxsldwi 47, 47, 47, 1
+ vrlw 16, 16, 1
+ vperm 30, 31, 30, 10
+ vperm 12, 12, 31, 11
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 50
+ vadduwm 17, 17, 13
+ vperm 18, 18, 18, 3
+ vperm 31, 12, 30, 8
+ xxlandc 50, 50, 34
+ vadduwm 15, 18, 15
+ xxlxor 48, 47, 48
+ vrlw 16, 16, 4
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 50
+ xxsldwi 49, 49, 49, 1
+ vperm 18, 18, 18, 0
+ vadduwm 17, 17, 30
+ xxlandc 50, 50, 37
+ vadduwm 15, 18, 15
+ xxswapd 50, 50
+ xxlxor 48, 47, 48
+ xxsldwi 46, 47, 47, 3
+ vrlw 16, 16, 1
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 50
+ vadduwm 17, 17, 12
+ vperm 18, 18, 18, 3
+ xxlandc 47, 50, 34
+ xxsldwi 50, 51, 51, 3
+ vadduwm 14, 15, 14
+ vperm 19, 13, 18, 6
+ xxlxor 48, 46, 48
+ vperm 18, 18, 18, 7
+ vrlw 16, 16, 4
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ vadduwm 17, 17, 19
+ vperm 15, 15, 15, 0
+ xxsldwi 49, 49, 49, 3
+ xxlandc 47, 47, 37
+ vadduwm 14, 15, 14
+ xxswapd 47, 47
+ xxlxor 48, 46, 48
+ xxsldwi 46, 46, 46, 1
+ vrlw 16, 16, 1
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ vperm 15, 15, 15, 3
+ xxlandc 47, 47, 34
+ vadduwm 29, 15, 14
+ vperm 14, 31, 18, 9
+ xxmrgld 50, 45, 44
+ xxlxor 48, 61, 48
+ vmrghw 12, 12, 13
+ vrlw 16, 16, 4
+ vperm 18, 30, 18, 10
+ vadduwm 17, 17, 14
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ xxsldwi 49, 49, 49, 1
+ vperm 15, 15, 15, 0
+ vadduwm 17, 17, 18
+ xxlandc 47, 47, 37
+ vadduwm 31, 15, 29
+ xxswapd 47, 47
+ xxlxor 48, 63, 48
+ xxsldwi 45, 63, 63, 3
+ vperm 31, 12, 30, 11
+ vrlw 16, 16, 1
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ vperm 15, 15, 15, 3
+ xxlandc 47, 47, 34
+ vadduwm 13, 15, 13
+ xxlxor 44, 45, 48
+ vadduwm 16, 17, 31
+ xxsldwi 49, 51, 51, 3
+ vrlw 12, 12, 4
+ vperm 19, 14, 17, 6
+ vadduwm 16, 12, 16
+ xxlxor 47, 48, 47
+ vperm 15, 15, 15, 0
+ xxlandc 47, 47, 37
+ vadduwm 13, 15, 13
+ xxswapd 47, 47
+ xxlxor 44, 45, 44
+ xxsldwi 45, 45, 45, 1
+ vrlw 30, 12, 1
+ vadduwm 12, 16, 19
+ xxsldwi 44, 44, 44, 3
+ vadduwm 16, 30, 12
+ xxlxor 44, 48, 47
+ vperm 15, 17, 17, 7
+ vperm 12, 12, 12, 3
+ vperm 17, 31, 18, 8
+ xxlandc 61, 44, 34
+ vperm 12, 17, 15, 9
+ vadduwm 13, 29, 13
+ xxlxor 47, 45, 62
+ xxmrgld 62, 46, 63
+ vmrghw 14, 31, 14
+ vrlw 15, 15, 4
+ vadduwm 16, 16, 12
+ vperm 30, 18, 30, 10
+ vperm 14, 14, 18, 11
+ xxsldwi 50, 51, 51, 3
+ vadduwm 16, 15, 16
+ xxlxor 49, 48, 61
+ xxsldwi 48, 48, 48, 1
+ vperm 19, 12, 18, 6
+ vperm 17, 17, 17, 0
+ vadduwm 16, 16, 30
+ xxmrgld 60, 44, 46
+ vmrghw 12, 14, 12
+ vperm 28, 30, 28, 10
+ xxlandc 49, 49, 37
+ vadduwm 13, 17, 13
+ xxswapd 49, 49
+ vperm 12, 12, 30, 11
+ xxlxor 47, 45, 47
+ xxsldwi 45, 45, 45, 3
+ vrlw 15, 15, 1
+ vperm 8, 12, 28, 8
+ vadduwm 16, 15, 16
+ xxlxor 49, 48, 49
+ vadduwm 16, 16, 14
+ vperm 17, 17, 17, 3
+ xxlandc 49, 49, 34
+ vadduwm 13, 17, 13
+ xxlxor 47, 45, 47
+ vrlw 15, 15, 4
+ vadduwm 16, 15, 16
+ xxlxor 49, 48, 49
+ vperm 17, 17, 17, 0
+ xxlandc 49, 49, 37
+ vadduwm 31, 17, 13
+ xxlxor 45, 63, 47
+ vrlw 15, 13, 1
+ vadduwm 13, 16, 19
+ xxswapd 48, 49
+ xxsldwi 51, 51, 51, 3
+ xxsldwi 45, 45, 45, 3
+ vadduwm 17, 15, 13
+ xxlxor 45, 49, 48
+ lvx 16, 0, 4
+ vperm 29, 13, 13, 3
+ vperm 13, 18, 18, 7
+ xxsldwi 50, 63, 63, 1
+ vperm 16, 14, 30, 16
+ vperm 7, 19, 19, 7
+ xxlandc 63, 61, 34
+ vadduwm 18, 31, 18
+ vperm 29, 16, 13, 9
+ xxlxor 47, 50, 47
+ vperm 6, 16, 19, 6
+ vrlw 15, 15, 4
+ vperm 7, 8, 7, 9
+ vadduwm 17, 17, 29
+ xxmrgld 41, 61, 44
+ vadduwm 17, 15, 17
+ vperm 9, 28, 9, 10
+ xxlxor 63, 49, 63
+ xxsldwi 49, 49, 49, 1
+ vperm 31, 31, 31, 0
+ vadduwm 17, 17, 28
+ xxlandc 63, 63, 37
+ vadduwm 18, 31, 18
+ xxswapd 63, 63
+ xxlxor 47, 50, 47
+ xxsldwi 46, 50, 50, 3
+ vrlw 15, 15, 1
+ vadduwm 17, 15, 17
+ xxlxor 63, 49, 63
+ vadduwm 17, 17, 12
+ vperm 31, 31, 31, 3
+ xxlandc 50, 63, 34
+ vadduwm 14, 18, 14
+ xxlxor 47, 46, 47
+ vrlw 15, 15, 4
+ vadduwm 17, 15, 17
+ xxlxor 50, 49, 50
+ vadduwm 6, 17, 6
+ vperm 18, 18, 18, 0
+ xxsldwi 38, 38, 38, 3
+ xxlandc 50, 50, 37
+ vadduwm 14, 18, 14
+ xxswapd 48, 50
+ xxlxor 47, 46, 47
+ xxsldwi 46, 46, 46, 1
+ vrlw 15, 15, 1
+ vadduwm 6, 15, 6
+ xxlxor 48, 38, 48
+ vadduwm 6, 6, 7
+ vperm 16, 16, 16, 3
+ xxlandc 48, 48, 34
+ vadduwm 14, 16, 14
+ xxlxor 40, 46, 47
+ vrlw 8, 8, 4
+ vadduwm 6, 8, 6
+ xxlxor 39, 38, 48
+ xxsldwi 38, 38, 38, 1
+ vperm 7, 7, 7, 0
+ vadduwm 6, 6, 9
+ xxlandc 39, 39, 37
+ vadduwm 14, 7, 14
+ xxswapd 39, 39
+ xxlxor 40, 46, 40
+ xxsldwi 41, 46, 46, 3
+ vrlw 8, 8, 1
+ vadduwm 6, 8, 6
+ xxlxor 39, 38, 39
+ vperm 3, 7, 7, 3
+ vmrghw 7, 12, 13
+ xxlandc 34, 35, 34
+ vperm 7, 7, 28, 11
+ vadduwm 3, 2, 9
+ xxlxor 40, 35, 40
+ vrlw 4, 8, 4
+ vadduwm 6, 6, 7
+ vadduwm 6, 4, 6
+ xxlxor 34, 38, 34
+ xxsldwi 0, 38, 38, 3
+ vperm 2, 2, 2, 0
+ xxlandc 34, 34, 37
+ vadduwm 3, 2, 3
+ xxswapd 34, 34
+ xxlxor 36, 35, 36
+ xxsldwi 1, 35, 35, 1
+ vrlw 4, 4, 1
+ xxlxor 0, 1, 0
+ xxswapd 0, 0
+ xxlxor 1, 36, 34
+ stxvd2x 0, 0, 3
+ xxswapd 1, 1
+ stxvd2x 1, 3, 5
+ li 3, -16
+ lxvd2x 63, 1, 3
+ li 3, -32
+ lxvd2x 62, 1, 3
+ li 3, -48
+ lxvd2x 61, 1, 3
+ li 3, -64
+ lxvd2x 60, 1, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end0:
+ .size zfs_blake3_compress_in_place_sse41, .Lfunc_end0-.Lfunc_begin0
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI1_0:
+ .byte 31
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 30
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 29
+ .byte 6
+ .byte 5
+ .byte 4
+ .byte 28
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_1:
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 5
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 9
+ .byte 14
+ .byte 15
+ .byte 12
+ .byte 13
+.LCPI1_2:
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 26
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+.LCPI1_3:
+ .long 1779033703
+ .long 3144134277
+ .long 1013904242
+ .long 2773480762
+.LCPI1_4:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_5:
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 12
+.LCPI1_6:
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 19
+.LCPI1_7:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI1_8:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_9:
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_10:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+.LCPI1_11:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_12:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI1_13:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 15
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+.LCPI1_14:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .text
+ .globl zfs_blake3_compress_xof_sse41
+ .p2align 2
+ .type zfs_blake3_compress_xof_sse41,@function
+zfs_blake3_compress_xof_sse41:
+.Lfunc_begin1:
+ .cfi_startproc
+.Lfunc_gep1:
+ addis 2, 12, .TOC.-.Lfunc_gep1@ha
+ addi 2, 2, .TOC.-.Lfunc_gep1@l
+.Lfunc_lep1:
+ .localentry zfs_blake3_compress_xof_sse41, .Lfunc_lep1-.Lfunc_gep1
+ li 9, -64
+ mtvsrd 34, 5
+ li 5, 16
+ lfdx 0, 0, 4
+ vspltisw 13, -16
+ addis 11, 2, .LCPI1_9@toc@ha
+ stxvd2x 60, 1, 9
+ li 9, -48
+ mtvsrd 35, 7
+ lfd 1, 8(4)
+ lfd 2, 16(4)
+ addis 7, 2, .LCPI1_0@toc@ha
+ stxvd2x 61, 1, 9
+ li 9, -32
+ mtvsrwz 36, 6
+ rldicl 6, 6, 32, 32
+ stxvd2x 62, 1, 9
+ li 9, -16
+ vmrghb 2, 3, 2
+ stxvd2x 63, 1, 9
+ mtvsrwz 35, 6
+ addi 6, 7, .LCPI1_0@toc@l
+ addis 7, 2, .LCPI1_2@toc@ha
+ lfd 3, 24(4)
+ xxmrghd 37, 1, 0
+ lvx 6, 0, 6
+ xxlxor 33, 33, 33
+ lxvd2x 0, 0, 3
+ addis 6, 2, .LCPI1_1@toc@ha
+ addi 7, 7, .LCPI1_2@toc@l
+ vmrghw 3, 3, 4
+ lxvd2x 1, 3, 5
+ addi 6, 6, .LCPI1_1@toc@l
+ vspltisw 14, 9
+ xxmrghd 32, 3, 2
+ xxswapd 36, 0
+ vperm 2, 1, 2, 6
+ xxswapd 38, 1
+ vpkudum 9, 0, 5
+ xxmrgld 34, 34, 35
+ lvx 3, 0, 7
+ addis 7, 2, .LCPI1_4@toc@ha
+ addi 7, 7, .LCPI1_4@toc@l
+ vadduwm 4, 9, 4
+ lvx 11, 0, 7
+ addis 7, 2, .LCPI1_6@toc@ha
+ addi 7, 7, .LCPI1_6@toc@l
+ vadduwm 7, 4, 6
+ lvx 4, 0, 6
+ addis 6, 2, .LCPI1_3@toc@ha
+ addi 6, 6, .LCPI1_3@toc@l
+ vperm 11, 0, 5, 11
+ lvx 0, 0, 7
+ li 7, 32
+ xxlxor 40, 39, 34
+ lvx 10, 0, 6
+ addis 6, 2, .LCPI1_5@toc@ha
+ lxvd2x 0, 4, 7
+ vcmpgtsb 2, 1, 4
+ addi 6, 6, .LCPI1_5@toc@l
+ vperm 4, 8, 8, 3
+ vspltisw 8, 10
+ xxlandc 44, 36, 34
+ vadduwm 4, 8, 8
+ vadduwm 8, 12, 10
+ xxlxor 37, 40, 38
+ vrlw 6, 5, 4
+ vadduwm 5, 7, 11
+ vadduwm 7, 6, 5
+ lvx 5, 0, 6
+ li 6, 48
+ lxvd2x 1, 4, 6
+ addis 4, 2, .LCPI1_7@toc@ha
+ xxlxor 42, 39, 44
+ addi 4, 4, .LCPI1_7@toc@l
+ vcmpgtsb 5, 1, 5
+ vperm 1, 10, 10, 0
+ xxswapd 42, 0
+ xxswapd 44, 1
+ vpkudum 16, 12, 10
+ xxlandc 47, 33, 37
+ vsubuwm 1, 14, 13
+ lvx 14, 0, 4
+ addis 4, 2, .LCPI1_8@toc@ha
+ vadduwm 8, 15, 8
+ xxswapd 45, 47
+ addi 4, 4, .LCPI1_8@toc@l
+ xxlxor 38, 40, 38
+ xxsldwi 40, 40, 40, 3
+ vadduwm 7, 7, 16
+ xxsldwi 48, 48, 48, 1
+ vrlw 6, 6, 1
+ xxsldwi 39, 39, 39, 1
+ vperm 14, 10, 12, 14
+ vadduwm 7, 6, 7
+ xxlxor 45, 39, 45
+ vperm 13, 13, 13, 3
+ xxlandc 45, 45, 34
+ vadduwm 8, 13, 8
+ xxlxor 38, 40, 38
+ vrlw 10, 6, 4
+ vadduwm 6, 7, 14
+ vadduwm 7, 10, 6
+ xxlxor 38, 39, 45
+ vperm 12, 6, 6, 0
+ lvx 6, 0, 4
+ addis 4, 2, .LCPI1_10@toc@ha
+ addi 4, 4, .LCPI1_10@toc@l
+ vperm 13, 11, 9, 6
+ xxlandc 44, 44, 37
+ vadduwm 15, 12, 8
+ vadduwm 7, 7, 13
+ xxsldwi 45, 45, 45, 3
+ xxlxor 40, 47, 42
+ xxsldwi 47, 47, 47, 1
+ xxsldwi 39, 39, 39, 3
+ vrlw 10, 8, 1
+ xxswapd 40, 44
+ vadduwm 17, 10, 7
+ lvx 7, 0, 4
+ addi 4, 11, .LCPI1_9@toc@l
+ xxlxor 44, 49, 40
+ lvx 8, 0, 4
+ addis 4, 2, .LCPI1_11@toc@ha
+ vperm 18, 9, 9, 7
+ addi 4, 4, .LCPI1_11@toc@l
+ vperm 12, 12, 12, 3
+ lvx 9, 0, 4
+ addis 4, 2, .LCPI1_12@toc@ha
+ vperm 19, 14, 16, 8
+ addi 4, 4, .LCPI1_12@toc@l
+ xxlandc 63, 44, 34
+ vperm 12, 19, 18, 9
+ vadduwm 15, 31, 15
+ xxlxor 42, 47, 42
+ vrlw 18, 10, 4
+ vadduwm 10, 17, 12
+ vadduwm 17, 18, 10
+ xxlxor 42, 49, 63
+ xxmrgld 63, 43, 46
+ xxsldwi 49, 49, 49, 1
+ vmrghw 14, 14, 11
+ vperm 19, 10, 10, 0
+ lvx 10, 0, 4
+ addis 4, 2, .LCPI1_13@toc@ha
+ addi 4, 4, .LCPI1_13@toc@l
+ lvx 11, 0, 4
+ addis 4, 2, .LCPI1_14@toc@ha
+ vperm 31, 16, 31, 10
+ addi 4, 4, .LCPI1_14@toc@l
+ vperm 14, 14, 16, 11
+ xxlandc 51, 51, 37
+ vadduwm 15, 19, 15
+ xxswapd 51, 51
+ vadduwm 17, 17, 31
+ xxlxor 50, 47, 50
+ xxsldwi 47, 47, 47, 3
+ vperm 30, 14, 31, 8
+ vrlw 18, 18, 1
+ vadduwm 17, 18, 17
+ xxlxor 51, 49, 51
+ vadduwm 17, 17, 14
+ vperm 19, 19, 19, 3
+ xxlandc 51, 51, 34
+ vadduwm 15, 19, 15
+ xxlxor 48, 47, 50
+ vrlw 16, 16, 4
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 51
+ vperm 19, 12, 13, 6
+ vperm 18, 18, 18, 0
+ vperm 13, 13, 13, 7
+ vadduwm 17, 17, 19
+ xxlandc 50, 50, 37
+ xxsldwi 49, 49, 49, 3
+ vperm 13, 30, 13, 9
+ vadduwm 15, 18, 15
+ xxswapd 50, 50
+ xxmrgld 62, 44, 46
+ vmrghw 12, 14, 12
+ xxlxor 48, 47, 48
+ xxsldwi 47, 47, 47, 1
+ vrlw 16, 16, 1
+ vperm 30, 31, 30, 10
+ vperm 12, 12, 31, 11
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 50
+ vadduwm 17, 17, 13
+ vperm 18, 18, 18, 3
+ vperm 31, 12, 30, 8
+ xxlandc 50, 50, 34
+ vadduwm 15, 18, 15
+ xxlxor 48, 47, 48
+ vrlw 16, 16, 4
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 50
+ xxsldwi 49, 49, 49, 1
+ vperm 18, 18, 18, 0
+ vadduwm 17, 17, 30
+ xxlandc 50, 50, 37
+ vadduwm 15, 18, 15
+ xxswapd 50, 50
+ xxlxor 48, 47, 48
+ xxsldwi 46, 47, 47, 3
+ vrlw 16, 16, 1
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 50
+ vadduwm 17, 17, 12
+ vperm 18, 18, 18, 3
+ xxlandc 47, 50, 34
+ xxsldwi 50, 51, 51, 3
+ vadduwm 14, 15, 14
+ vperm 19, 13, 18, 6
+ xxlxor 48, 46, 48
+ vperm 18, 18, 18, 7
+ vrlw 16, 16, 4
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ vadduwm 17, 17, 19
+ vperm 15, 15, 15, 0
+ xxsldwi 49, 49, 49, 3
+ xxlandc 47, 47, 37
+ vadduwm 14, 15, 14
+ xxswapd 47, 47
+ xxlxor 48, 46, 48
+ xxsldwi 46, 46, 46, 1
+ vrlw 16, 16, 1
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ vperm 15, 15, 15, 3
+ xxlandc 47, 47, 34
+ vadduwm 29, 15, 14
+ vperm 14, 31, 18, 9
+ xxmrgld 50, 45, 44
+ xxlxor 48, 61, 48
+ vmrghw 12, 12, 13
+ vrlw 16, 16, 4
+ vperm 18, 30, 18, 10
+ vadduwm 17, 17, 14
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ xxsldwi 49, 49, 49, 1
+ vperm 15, 15, 15, 0
+ vadduwm 17, 17, 18
+ xxlandc 47, 47, 37
+ vadduwm 31, 15, 29
+ xxswapd 47, 47
+ xxlxor 48, 63, 48
+ xxsldwi 45, 63, 63, 3
+ vperm 31, 12, 30, 11
+ vrlw 16, 16, 1
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ vperm 15, 15, 15, 3
+ xxlandc 47, 47, 34
+ vadduwm 13, 15, 13
+ xxlxor 44, 45, 48
+ vadduwm 16, 17, 31
+ xxsldwi 49, 51, 51, 3
+ vrlw 12, 12, 4
+ vperm 19, 14, 17, 6
+ vadduwm 16, 12, 16
+ xxlxor 47, 48, 47
+ vperm 15, 15, 15, 0
+ xxlandc 47, 47, 37
+ vadduwm 13, 15, 13
+ xxswapd 47, 47
+ xxlxor 44, 45, 44
+ xxsldwi 45, 45, 45, 1
+ vrlw 30, 12, 1
+ vadduwm 12, 16, 19
+ xxsldwi 44, 44, 44, 3
+ vadduwm 16, 30, 12
+ xxlxor 44, 48, 47
+ vperm 15, 17, 17, 7
+ vperm 12, 12, 12, 3
+ vperm 17, 31, 18, 8
+ xxlandc 61, 44, 34
+ vperm 12, 17, 15, 9
+ vadduwm 13, 29, 13
+ xxlxor 47, 45, 62
+ xxmrgld 62, 46, 63
+ vmrghw 14, 31, 14
+ vrlw 15, 15, 4
+ vadduwm 16, 16, 12
+ vperm 30, 18, 30, 10
+ vperm 14, 14, 18, 11
+ xxsldwi 50, 51, 51, 3
+ vadduwm 16, 15, 16
+ xxlxor 49, 48, 61
+ xxsldwi 48, 48, 48, 1
+ vperm 19, 12, 18, 6
+ vperm 17, 17, 17, 0
+ vadduwm 16, 16, 30
+ xxmrgld 60, 44, 46
+ vmrghw 12, 14, 12
+ vperm 28, 30, 28, 10
+ xxlandc 49, 49, 37
+ vadduwm 13, 17, 13
+ xxswapd 49, 49
+ vperm 12, 12, 30, 11
+ xxlxor 47, 45, 47
+ xxsldwi 45, 45, 45, 3
+ vrlw 15, 15, 1
+ vperm 8, 12, 28, 8
+ vadduwm 16, 15, 16
+ xxlxor 49, 48, 49
+ vadduwm 16, 16, 14
+ vperm 17, 17, 17, 3
+ xxlandc 49, 49, 34
+ vadduwm 13, 17, 13
+ xxlxor 47, 45, 47
+ vrlw 15, 15, 4
+ vadduwm 16, 15, 16
+ xxlxor 49, 48, 49
+ vperm 17, 17, 17, 0
+ xxlandc 49, 49, 37
+ vadduwm 31, 17, 13
+ xxlxor 45, 63, 47
+ vrlw 15, 13, 1
+ vadduwm 13, 16, 19
+ xxswapd 48, 49
+ xxsldwi 51, 51, 51, 3
+ xxsldwi 45, 45, 45, 3
+ vadduwm 17, 15, 13
+ xxlxor 45, 49, 48
+ lvx 16, 0, 4
+ vperm 29, 13, 13, 3
+ vperm 13, 18, 18, 7
+ xxsldwi 50, 63, 63, 1
+ vperm 16, 14, 30, 16
+ vperm 7, 19, 19, 7
+ xxlandc 63, 61, 34
+ vadduwm 18, 31, 18
+ vperm 29, 16, 13, 9
+ xxlxor 47, 50, 47
+ vperm 6, 16, 19, 6
+ vrlw 15, 15, 4
+ vperm 7, 8, 7, 9
+ vadduwm 17, 17, 29
+ xxmrgld 41, 61, 44
+ vadduwm 17, 15, 17
+ vperm 9, 28, 9, 10
+ xxlxor 63, 49, 63
+ xxsldwi 49, 49, 49, 1
+ vperm 31, 31, 31, 0
+ vadduwm 17, 17, 28
+ xxlandc 63, 63, 37
+ vadduwm 18, 31, 18
+ xxswapd 63, 63
+ xxlxor 47, 50, 47
+ xxsldwi 46, 50, 50, 3
+ vrlw 15, 15, 1
+ vadduwm 17, 15, 17
+ xxlxor 63, 49, 63
+ vadduwm 17, 17, 12
+ vperm 31, 31, 31, 3
+ xxlandc 50, 63, 34
+ vadduwm 14, 18, 14
+ xxlxor 47, 46, 47
+ vrlw 15, 15, 4
+ vadduwm 17, 15, 17
+ xxlxor 50, 49, 50
+ vadduwm 6, 17, 6
+ vperm 18, 18, 18, 0
+ xxsldwi 38, 38, 38, 3
+ xxlandc 50, 50, 37
+ vadduwm 14, 18, 14
+ xxswapd 48, 50
+ xxlxor 47, 46, 47
+ xxsldwi 46, 46, 46, 1
+ vrlw 15, 15, 1
+ vadduwm 6, 15, 6
+ xxlxor 48, 38, 48
+ vadduwm 6, 6, 7
+ vperm 16, 16, 16, 3
+ xxlandc 48, 48, 34
+ vadduwm 14, 16, 14
+ xxlxor 40, 46, 47
+ vrlw 8, 8, 4
+ vadduwm 6, 8, 6
+ xxlxor 39, 38, 48
+ xxsldwi 38, 38, 38, 1
+ vperm 7, 7, 7, 0
+ vadduwm 6, 6, 9
+ xxlandc 39, 39, 37
+ vadduwm 14, 7, 14
+ xxswapd 39, 39
+ xxlxor 40, 46, 40
+ xxsldwi 41, 46, 46, 3
+ vrlw 8, 8, 1
+ vadduwm 6, 8, 6
+ xxlxor 39, 38, 39
+ vperm 3, 7, 7, 3
+ vmrghw 7, 12, 13
+ xxlandc 34, 35, 34
+ vperm 7, 7, 28, 11
+ vadduwm 3, 2, 9
+ xxlxor 40, 35, 40
+ vrlw 4, 8, 4
+ vadduwm 6, 6, 7
+ vadduwm 6, 4, 6
+ xxlxor 34, 38, 34
+ xxsldwi 0, 38, 38, 3
+ vperm 2, 2, 2, 0
+ xxlandc 34, 34, 37
+ vadduwm 3, 2, 3
+ xxswapd 34, 34
+ xxlxor 36, 35, 36
+ xxsldwi 1, 35, 35, 1
+ vrlw 4, 4, 1
+ xxlxor 0, 1, 0
+ xxswapd 0, 0
+ xxlxor 2, 36, 34
+ stxvd2x 0, 0, 8
+ xxswapd 2, 2
+ stxvd2x 2, 8, 5
+ lfdx 0, 0, 3
+ lfd 2, 8(3)
+ xxmrghd 35, 2, 0
+ xxlxor 0, 1, 35
+ xxswapd 0, 0
+ stxvd2x 0, 8, 7
+ lfd 0, 16(3)
+ lfd 1, 24(3)
+ li 3, -16
+ xxmrghd 35, 1, 0
+ xxlxor 0, 34, 35
+ xxswapd 0, 0
+ stxvd2x 0, 8, 6
+ lxvd2x 63, 1, 3
+ li 3, -32
+ lxvd2x 62, 1, 3
+ li 3, -48
+ lxvd2x 61, 1, 3
+ li 3, -64
+ lxvd2x 60, 1, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end1:
+ .size zfs_blake3_compress_xof_sse41, .Lfunc_end1-.Lfunc_begin1
+ .cfi_endproc
+
+ .globl zfs_blake3_hash_many_sse41
+ .p2align 2
+ .type zfs_blake3_hash_many_sse41,@function
+zfs_blake3_hash_many_sse41:
+.Lfunc_begin2:
+ .cfi_startproc
+.Lfunc_gep2:
+ addis 2, 12, .TOC.-.Lfunc_gep2@ha
+ addi 2, 2, .TOC.-.Lfunc_gep2@l
+.Lfunc_lep2:
+ .localentry zfs_blake3_hash_many_sse41, .Lfunc_lep2-.Lfunc_gep2
+ mfocrf 12, 32
+ mflr 0
+ std 0, 16(1)
+ stw 12, 8(1)
+ stdu 1, -256(1)
+ .cfi_def_cfa_offset 256
+ .cfi_offset lr, 16
+ .cfi_offset r17, -120
+ .cfi_offset r18, -112
+ .cfi_offset r19, -104
+ .cfi_offset r20, -96
+ .cfi_offset r21, -88
+ .cfi_offset r22, -80
+ .cfi_offset r23, -72
+ .cfi_offset r24, -64
+ .cfi_offset r25, -56
+ .cfi_offset r26, -48
+ .cfi_offset r27, -40
+ .cfi_offset r28, -32
+ .cfi_offset r29, -24
+ .cfi_offset r30, -16
+ .cfi_offset cr2, 8
+ std 26, 208(1)
+ mr 26, 4
+ cmpldi 1, 4, 4
+ andi. 4, 8, 1
+ std 18, 144(1)
+ std 19, 152(1)
+ crmove 8, 1
+ ld 19, 360(1)
+ lwz 18, 352(1)
+ std 24, 192(1)
+ std 25, 200(1)
+ std 27, 216(1)
+ std 28, 224(1)
+ mr 24, 10
+ mr 28, 6
+ mr 27, 5
+ mr 25, 3
+ std 29, 232(1)
+ std 30, 240(1)
+ mr 30, 9
+ mr 29, 7
+ std 17, 136(1)
+ std 20, 160(1)
+ std 21, 168(1)
+ std 22, 176(1)
+ std 23, 184(1)
+ blt 1, .LBB2_3
+ li 3, 0
+ li 4, 1
+ clrldi 23, 30, 32
+ isel 22, 4, 3, 8
+ clrldi 21, 24, 32
+ clrldi 20, 18, 32
+.LBB2_2:
+ mr 3, 25
+ mr 4, 27
+ mr 5, 28
+ mr 6, 29
+ mr 7, 22
+ mr 8, 23
+ mr 9, 21
+ mr 10, 20
+ std 19, 32(1)
+ bl blake3_hash4_sse41
+ addi 26, 26, -4
+ addi 3, 29, 4
+ addi 25, 25, 32
+ addi 19, 19, 128
+ cmpldi 26, 3
+ isel 29, 3, 29, 8
+ bgt 0, .LBB2_2
+.LBB2_3:
+ cmpldi 26, 0
+ beq 0, .LBB2_11
+ li 3, 0
+ li 4, 1
+ or 21, 24, 30
+ li 20, 16
+ addi 24, 1, 96
+ isel 22, 4, 3, 8
+.LBB2_5:
+ lxvd2x 0, 28, 20
+ ld 23, 0(25)
+ mr 17, 27
+ mr 3, 21
+ stxvd2x 0, 24, 20
+ lxvd2x 0, 0, 28
+ stxvd2x 0, 0, 24
+.LBB2_6:
+ cmpldi 17, 1
+ beq 0, .LBB2_8
+ cmpldi 17, 0
+ bne 0, .LBB2_9
+ b .LBB2_10
+.LBB2_8:
+ or 3, 3, 18
+.LBB2_9:
+ clrldi 7, 3, 56
+ mr 3, 24
+ mr 4, 23
+ li 5, 64
+ mr 6, 29
+ bl zfs_blake3_compress_in_place_sse41
+ addi 23, 23, 64
+ addi 17, 17, -1
+ mr 3, 30
+ b .LBB2_6
+.LBB2_10:
+ lxvd2x 0, 24, 20
+ addi 26, 26, -1
+ add 29, 29, 22
+ addi 25, 25, 8
+ cmpldi 26, 0
+ stxvd2x 0, 19, 20
+ lxvd2x 0, 0, 24
+ stxvd2x 0, 0, 19
+ addi 19, 19, 32
+ bne 0, .LBB2_5
+.LBB2_11:
+ ld 30, 240(1)
+ ld 29, 232(1)
+ ld 28, 224(1)
+ ld 27, 216(1)
+ ld 26, 208(1)
+ ld 25, 200(1)
+ ld 24, 192(1)
+ ld 23, 184(1)
+ ld 22, 176(1)
+ ld 21, 168(1)
+ ld 20, 160(1)
+ ld 19, 152(1)
+ ld 18, 144(1)
+ ld 17, 136(1)
+ addi 1, 1, 256
+ ld 0, 16(1)
+ lwz 12, 8(1)
+ mtocrf 32, 12
+ mtlr 0
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end2:
+ .size zfs_blake3_hash_many_sse41, .Lfunc_end2-.Lfunc_begin2
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI3_0:
+ .quad 4294967296
+ .quad 12884901890
+.LCPI3_1:
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 5
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 9
+ .byte 14
+ .byte 15
+ .byte 12
+ .byte 13
+.LCPI3_2:
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 12
+.LCPI3_3:
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 26
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+.LCPI3_4:
+ .long 1779033703
+ .long 1779033703
+ .long 1779033703
+ .long 1779033703
+.LCPI3_5:
+ .long 3144134277
+ .long 3144134277
+ .long 3144134277
+ .long 3144134277
+.LCPI3_6:
+ .long 1013904242
+ .long 1013904242
+ .long 1013904242
+ .long 1013904242
+.LCPI3_7:
+ .long 2773480762
+ .long 2773480762
+ .long 2773480762
+ .long 2773480762
+.LCPI3_8:
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 19
+ .text
+ .p2align 2
+ .type blake3_hash4_sse41,@function
+blake3_hash4_sse41:
+.Lfunc_begin3:
+ .cfi_startproc
+.Lfunc_gep3:
+ addis 2, 12, .TOC.-.Lfunc_gep3@ha
+ addi 2, 2, .TOC.-.Lfunc_gep3@l
+.Lfunc_lep3:
+ .localentry blake3_hash4_sse41, .Lfunc_lep3-.Lfunc_gep3
+ stdu 1, -416(1)
+ .cfi_def_cfa_offset 416
+ .cfi_offset r22, -176
+ .cfi_offset r23, -168
+ .cfi_offset r24, -160
+ .cfi_offset r25, -152
+ .cfi_offset r26, -144
+ .cfi_offset r27, -136
+ .cfi_offset r28, -128
+ .cfi_offset r29, -120
+ .cfi_offset r30, -112
+ .cfi_offset f20, -96
+ .cfi_offset f21, -88
+ .cfi_offset f22, -80
+ .cfi_offset f23, -72
+ .cfi_offset f24, -64
+ .cfi_offset f25, -56
+ .cfi_offset f26, -48
+ .cfi_offset f27, -40
+ .cfi_offset f28, -32
+ .cfi_offset f29, -24
+ .cfi_offset f30, -16
+ .cfi_offset f31, -8
+ .cfi_offset v20, -368
+ .cfi_offset v21, -352
+ .cfi_offset v22, -336
+ .cfi_offset v23, -320
+ .cfi_offset v24, -304
+ .cfi_offset v25, -288
+ .cfi_offset v26, -272
+ .cfi_offset v27, -256
+ .cfi_offset v28, -240
+ .cfi_offset v29, -224
+ .cfi_offset v30, -208
+ .cfi_offset v31, -192
+ li 11, 48
+ li 0, 8
+ std 30, 304(1)
+ li 30, 12
+ li 12, 4
+ lfiwzx 0, 0, 5
+ stxvd2x 52, 1, 11
+ li 11, 64
+ lfiwzx 2, 5, 0
+ li 0, 20
+ lfiwzx 3, 5, 30
+ stxvd2x 53, 1, 11
+ li 11, 80
+ li 30, 24
+ lfiwzx 4, 5, 0
+ li 0, 28
+ stxvd2x 54, 1, 11
+ li 11, 96
+ lfiwzx 1, 5, 12
+ lfiwzx 6, 5, 30
+ xxspltw 47, 0, 1
+ cmpldi 4, 0
+ std 22, 240(1)
+ stxvd2x 55, 1, 11
+ li 11, 112
+ lfiwzx 7, 5, 0
+ xxspltw 40, 2, 1
+ std 23, 248(1)
+ xxspltw 39, 3, 1
+ std 24, 256(1)
+ std 25, 264(1)
+ xxspltw 51, 1, 1
+ xxspltw 43, 6, 1
+ std 26, 272(1)
+ xxspltw 41, 7, 1
+ std 27, 280(1)
+ std 28, 288(1)
+ std 29, 296(1)
+ stxvd2x 56, 1, 11
+ li 11, 128
+ stfd 20, 320(1)
+ stxvd2x 57, 1, 11
+ li 11, 144
+ stfd 21, 328(1)
+ stxvd2x 58, 1, 11
+ li 11, 160
+ stfd 22, 336(1)
+ stxvd2x 59, 1, 11
+ li 11, 176
+ stfd 23, 344(1)
+ stxvd2x 60, 1, 11
+ li 11, 192
+ stfd 24, 352(1)
+ stxvd2x 61, 1, 11
+ li 11, 208
+ stfd 25, 360(1)
+ stxvd2x 62, 1, 11
+ li 11, 224
+ stfd 26, 368(1)
+ stxvd2x 63, 1, 11
+ li 11, 16
+ xxspltw 63, 4, 1
+ lfiwzx 5, 5, 11
+ ld 5, 448(1)
+ stfd 27, 376(1)
+ stfd 28, 384(1)
+ stfd 29, 392(1)
+ stfd 30, 400(1)
+ stfd 31, 408(1)
+ xxspltw 50, 5, 1
+ beq 0, .LBB3_5
+ addis 30, 2, .LCPI3_0@toc@ha
+ neg 7, 7
+ xxleqv 34, 34, 34
+ addis 28, 2, .LCPI3_5@toc@ha
+ addis 27, 2, .LCPI3_6@toc@ha
+ addis 26, 2, .LCPI3_7@toc@ha
+ addis 29, 2, .LCPI3_4@toc@ha
+ addis 25, 2, .LCPI3_8@toc@ha
+ addi 0, 30, .LCPI3_0@toc@l
+ mtfprwz 2, 7
+ addis 7, 2, .LCPI3_1@toc@ha
+ addis 30, 2, .LCPI3_3@toc@ha
+ addi 24, 29, .LCPI3_4@toc@l
+ ld 29, 24(3)
+ lxvd2x 1, 0, 0
+ mtfprwz 0, 6
+ rldicl 6, 6, 32, 32
+ addi 0, 30, .LCPI3_3@toc@l
+ ld 30, 16(3)
+ xxspltw 2, 2, 1
+ vslw 2, 2, 2
+ xxspltw 37, 0, 1
+ mtfprwz 0, 6
+ addi 6, 7, .LCPI3_1@toc@l
+ addis 7, 2, .LCPI3_2@toc@ha
+ xxswapd 35, 1
+ xxlxor 36, 36, 36
+ xxspltw 33, 0, 1
+ xxland 35, 2, 35
+ vadduwm 0, 3, 5
+ lvx 5, 0, 6
+ addi 6, 7, .LCPI3_2@toc@l
+ ld 7, 8(3)
+ xxlor 35, 35, 34
+ xxlxor 34, 32, 34
+ xxlor 9, 32, 32
+ lvx 0, 0, 6
+ ld 6, 0(3)
+ addi 3, 3, -8
+ vcmpgtsw 2, 3, 2
+ lvx 3, 0, 0
+ addi 0, 28, .LCPI3_5@toc@l
+ addi 28, 27, .LCPI3_6@toc@l
+ addi 27, 26, .LCPI3_7@toc@l
+ addi 26, 25, .LCPI3_8@toc@l
+ or 25, 9, 8
+ li 9, 0
+ vcmpgtsb 5, 4, 5
+ vcmpgtsb 0, 4, 0
+ xxlor 11, 35, 35
+ lvx 3, 0, 24
+ xxlor 12, 35, 35
+ vsubuwm 2, 1, 2
+ xxlnor 10, 37, 37
+ xxlor 13, 34, 34
+ lvx 2, 0, 0
+ li 0, 32
+ xxlnor 31, 32, 32
+ xxlor 30, 34, 34
+ lvx 2, 0, 28
+ li 28, 48
+ xxlor 29, 34, 34
+ lvx 2, 0, 27
+ li 27, 0
+ xxlor 28, 34, 34
+ lvx 2, 0, 26
+ xxlor 27, 34, 34
+.LBB3_2:
+ mr 26, 27
+ addi 27, 27, 1
+ xxlor 23, 39, 39
+ cmpld 27, 4
+ sldi 26, 26, 6
+ xxlor 24, 40, 40
+ iseleq 24, 10, 9
+ add 23, 6, 26
+ add 22, 30, 26
+ lxvd2x 0, 6, 26
+ lxvd2x 1, 7, 26
+ or 25, 24, 25
+ add 24, 7, 26
+ lxvd2x 2, 30, 26
+ lxvd2x 3, 29, 26
+ xxlor 26, 47, 47
+ lxvd2x 4, 23, 11
+ lxvd2x 6, 24, 11
+ clrlwi 25, 25, 24
+ xxlor 25, 51, 51
+ lxvd2x 7, 22, 11
+ lxvd2x 8, 23, 0
+ mtfprd 5, 25
+ add 25, 29, 26
+ xxswapd 34, 0
+ lxvd2x 0, 25, 11
+ xxswapd 38, 1
+ xxswapd 32, 2
+ lxvd2x 1, 24, 0
+ lxvd2x 2, 22, 0
+ xxswapd 40, 3
+ xxswapd 39, 4
+ lxvd2x 3, 25, 0
+ lxvd2x 4, 23, 28
+ xxswapd 60, 6
+ xxswapd 47, 7
+ lxvd2x 6, 24, 28
+ xxswapd 57, 8
+ lxvd2x 7, 22, 28
+ lxvd2x 8, 25, 28
+ xxswapd 58, 0
+ mr 25, 3
+ xxswapd 53, 1
+ xxswapd 56, 2
+ xxswapd 52, 3
+ xxswapd 55, 4
+ xxswapd 54, 6
+ xxswapd 0, 5
+ xxswapd 42, 7
+ xxswapd 48, 8
+ mtctr 12
+.LBB3_3:
+ ldu 24, 8(25)
+ add 24, 24, 26
+ addi 24, 24, 256
+ dcbt 0, 24
+ bdnz .LBB3_3
+ vmrgew 4, 28, 7
+ vspltisw 14, 9
+ mr 25, 8
+ vmrgew 27, 6, 2
+ vspltisw 17, 4
+ vmrglw 12, 6, 2
+ vspltisw 19, 10
+ vmrghw 30, 6, 2
+ xxspltw 0, 0, 3
+ vmrglw 2, 8, 0
+ vmrghw 13, 8, 0
+ xxlor 7, 36, 36
+ vmrgew 4, 21, 25
+ vmrglw 29, 28, 7
+ vmrghw 1, 28, 7
+ vmrglw 28, 26, 15
+ xxmrgld 37, 34, 44
+ vmrgew 7, 26, 15
+ vmrghw 15, 26, 15
+ xxlor 21, 36, 36
+ vmrglw 4, 21, 25
+ vmrghw 21, 21, 25
+ vmrglw 25, 20, 24
+ xxmrgld 34, 60, 61
+ vmrghw 26, 20, 24
+ xxlor 38, 26, 26
+ vmrgew 3, 8, 0
+ xxlor 5, 36, 36
+ vmrgew 4, 20, 24
+ vspltisw 24, -16
+ vmrglw 20, 22, 23
+ xxmrgld 57, 57, 5
+ vmrglw 8, 16, 10
+ vmrghw 0, 16, 10
+ vadduwm 12, 19, 19
+ xxlor 8, 37, 37
+ xxlor 20, 36, 36
+ vmrgew 4, 22, 23
+ vmrghw 23, 22, 23
+ xxmrgld 40, 40, 52
+ vmrgew 22, 16, 10
+ vsubuwm 10, 14, 24
+ vslw 14, 17, 17
+ vadduwm 17, 5, 6
+ xxmrgld 37, 47, 33
+ xxlor 22, 36, 36
+ xxmrgld 36, 45, 62
+ xxlor 38, 25, 25
+ xxlor 2, 34, 34
+ vadduwm 19, 4, 6
+ xxmrgld 38, 39, 7
+ xxlor 3, 36, 36
+ xxmrghd 39, 47, 33
+ xxlor 36, 24, 24
+ xxmrgld 33, 58, 53
+ vadduwm 17, 17, 18
+ vadduwm 29, 2, 4
+ xxmrgld 36, 35, 59
+ xxlor 34, 23, 23
+ xxmrghd 35, 45, 62
+ xxlor 1, 9, 9
+ vadduwm 28, 5, 2
+ xxlor 1, 13, 13
+ vadduwm 19, 19, 31
+ vadduwm 24, 29, 11
+ vadduwm 28, 28, 9
+ xxlxor 61, 49, 9
+ xxlor 1, 41, 41
+ xxlor 41, 11, 11
+ xxlxor 34, 51, 13
+ vperm 29, 29, 29, 9
+ xxlxor 46, 56, 46
+ vperm 2, 2, 2, 9
+ xxlxor 59, 60, 0
+ vperm 14, 14, 14, 9
+ vperm 30, 27, 27, 9
+ vadduwm 19, 19, 3
+ xxlor 4, 35, 35
+ xxland 61, 61, 10
+ xxlor 35, 12, 12
+ xxland 34, 34, 10
+ vadduwm 27, 29, 3
+ xxlor 35, 30, 30
+ vadduwm 17, 17, 4
+ xxlor 26, 36, 36
+ xxland 46, 46, 10
+ vadduwm 3, 2, 3
+ xxlor 36, 29, 29
+ xxland 62, 62, 10
+ xxlxor 45, 59, 50
+ xxlxor 50, 35, 63
+ vadduwm 31, 14, 4
+ xxlor 36, 28, 28
+ xxlor 6, 37, 37
+ vadduwm 16, 30, 4
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 1
+ vrlw 4, 13, 12
+ vrlw 18, 18, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vadduwm 15, 24, 6
+ vadduwm 28, 28, 7
+ vadduwm 17, 4, 17
+ vadduwm 19, 18, 19
+ vadduwm 15, 11, 15
+ vadduwm 28, 5, 28
+ xxlor 25, 38, 38
+ xxlxor 61, 49, 61
+ xxlxor 34, 51, 34
+ xxlxor 46, 47, 46
+ xxlxor 62, 60, 62
+ xxlor 38, 27, 27
+ vadduwm 19, 19, 1
+ vperm 29, 29, 29, 6
+ vperm 2, 2, 2, 6
+ vperm 24, 14, 14, 6
+ vperm 30, 30, 30, 6
+ xxlor 5, 33, 33
+ vadduwm 17, 17, 25
+ xxland 61, 61, 31
+ xxland 34, 34, 31
+ xxland 56, 56, 31
+ xxland 62, 62, 31
+ vadduwm 27, 29, 27
+ vadduwm 3, 2, 3
+ vadduwm 31, 24, 31
+ vadduwm 16, 30, 16
+ xxlxor 36, 59, 36
+ xxlxor 50, 35, 50
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 1, 18, 10
+ xxmrgld 50, 32, 55
+ vrlw 11, 11, 10
+ xxmrghd 55, 32, 55
+ vrlw 5, 5, 10
+ vrlw 4, 4, 10
+ vadduwm 15, 15, 8
+ vadduwm 28, 28, 18
+ vadduwm 17, 1, 17
+ vadduwm 19, 11, 19
+ vadduwm 15, 5, 15
+ vadduwm 28, 4, 28
+ xxlor 7, 57, 57
+ xxlxor 62, 49, 62
+ xxlxor 61, 51, 61
+ xxlxor 57, 47, 34
+ xxlxor 34, 60, 56
+ vperm 24, 30, 30, 9
+ xxmrgld 62, 20, 21
+ vperm 29, 29, 29, 9
+ vperm 25, 25, 25, 9
+ vperm 2, 2, 2, 9
+ vmr 14, 8
+ xxmrghd 40, 58, 53
+ xxmrgld 58, 54, 22
+ vadduwm 17, 17, 30
+ xxland 56, 56, 10
+ vadduwm 21, 19, 8
+ xxland 61, 61, 10
+ xxland 51, 57, 10
+ xxland 34, 34, 10
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 37, 59, 37
+ xxlxor 36, 35, 36
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vrlw 4, 4, 12
+ vadduwm 0, 15, 26
+ vadduwm 15, 28, 23
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 21
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 6
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vmr 13, 8
+ xxlor 53, 3, 3
+ xxland 56, 56, 31
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 36, 35, 36
+ xxlxor 37, 59, 37
+ vrlw 4, 4, 10
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ xxlor 52, 4, 4
+ xxlor 40, 2, 2
+ vadduwm 17, 17, 21
+ vadduwm 28, 28, 20
+ vadduwm 0, 0, 7
+ vadduwm 15, 15, 8
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ vperm 24, 24, 24, 9
+ vmr 25, 26
+ xxlor 3, 39, 39
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ xxland 56, 56, 10
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 4, 4, 12
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ xxlor 54, 6, 6
+ xxlor 58, 5, 5
+ xxlor 39, 8, 8
+ vadduwm 17, 17, 22
+ vadduwm 28, 28, 26
+ vadduwm 0, 0, 7
+ vadduwm 15, 15, 25
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vperm 24, 24, 24, 6
+ xxlor 39, 26, 26
+ vadduwm 28, 28, 14
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ xxland 56, 56, 31
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vrlw 4, 4, 10
+ vadduwm 17, 17, 7
+ vadduwm 0, 0, 30
+ vadduwm 15, 15, 23
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 9
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ xxlor 24, 55, 55
+ vadduwm 17, 17, 13
+ xxland 56, 56, 10
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 37, 59, 37
+ xxlxor 36, 35, 36
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vrlw 4, 4, 12
+ vmr 23, 13
+ xxlor 45, 25, 25
+ xxlor 39, 7, 7
+ vadduwm 28, 28, 13
+ vadduwm 0, 0, 18
+ vadduwm 15, 15, 7
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 6
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ xxlor 2, 46, 46
+ xxlor 46, 3, 3
+ xxland 56, 56, 31
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 36, 35, 36
+ xxlxor 37, 59, 37
+ vrlw 4, 4, 10
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vadduwm 17, 17, 20
+ vadduwm 28, 28, 26
+ vadduwm 0, 0, 25
+ vadduwm 15, 15, 14
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ vperm 24, 24, 24, 9
+ xxlor 52, 2, 2
+ vadduwm 17, 17, 8
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ xxland 56, 56, 10
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 4, 4, 12
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vadduwm 28, 28, 20
+ vadduwm 0, 0, 21
+ vadduwm 15, 15, 18
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vperm 24, 24, 24, 6
+ vadduwm 17, 17, 22
+ vadduwm 28, 28, 30
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ xxland 56, 56, 31
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vrlw 4, 4, 10
+ vadduwm 0, 0, 23
+ vadduwm 15, 15, 7
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 9
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ xxlor 5, 4, 4
+ xxlor 4, 58, 58
+ xxland 56, 56, 10
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 37, 59, 37
+ xxlxor 36, 35, 36
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vrlw 4, 4, 12
+ xxlor 39, 8, 8
+ xxlor 54, 24, 24
+ xxlor 58, 26, 26
+ vadduwm 17, 17, 13
+ vadduwm 28, 28, 7
+ vadduwm 0, 0, 22
+ vadduwm 15, 15, 26
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 6
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ xxlor 3, 53, 53
+ xxlor 53, 4, 4
+ xxland 56, 56, 31
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 36, 35, 36
+ xxlxor 37, 59, 37
+ vrlw 4, 4, 10
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vadduwm 17, 17, 21
+ vadduwm 28, 28, 20
+ vadduwm 0, 0, 18
+ vadduwm 15, 15, 25
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ vperm 24, 24, 24, 9
+ xxlor 2, 55, 55
+ vmr 23, 18
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ xxland 56, 56, 10
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 4, 4, 12
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ xxlor 50, 5, 5
+ vadduwm 17, 17, 14
+ vadduwm 28, 28, 30
+ vadduwm 0, 0, 18
+ vadduwm 15, 15, 22
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vperm 24, 24, 24, 6
+ xxlor 25, 40, 40
+ vmr 8, 13
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ xxland 56, 56, 31
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ xxlor 45, 25, 25
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vrlw 4, 4, 10
+ vadduwm 17, 17, 13
+ xxlor 45, 2, 2
+ vadduwm 0, 0, 8
+ vadduwm 28, 28, 13
+ vadduwm 15, 15, 26
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 9
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ xxlor 4, 57, 57
+ xxlor 26, 46, 46
+ xxland 56, 56, 10
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 37, 59, 37
+ xxlxor 36, 35, 36
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vrlw 4, 4, 12
+ xxlor 8, 62, 62
+ xxlor 57, 3, 3
+ xxlor 46, 7, 7
+ xxlor 62, 6, 6
+ vadduwm 17, 17, 7
+ vadduwm 28, 28, 25
+ vadduwm 0, 0, 14
+ vadduwm 15, 15, 30
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 6
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vadduwm 17, 17, 20
+ xxlor 3, 52, 52
+ xxland 56, 56, 31
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 36, 35, 36
+ xxlxor 37, 59, 37
+ vrlw 4, 4, 10
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ xxlor 52, 8, 8
+ vadduwm 0, 0, 22
+ vadduwm 28, 28, 20
+ vadduwm 15, 15, 23
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ vperm 24, 24, 24, 9
+ xxlor 6, 55, 55
+ xxlor 55, 4, 4
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ xxland 56, 56, 10
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 4, 4, 12
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vadduwm 17, 17, 23
+ vadduwm 28, 28, 13
+ vadduwm 0, 0, 21
+ vadduwm 15, 15, 14
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vperm 24, 24, 24, 6
+ xxlor 4, 53, 53
+ xxlor 53, 26, 26
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ xxland 56, 56, 31
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vrlw 4, 4, 10
+ vadduwm 17, 17, 21
+ vadduwm 28, 28, 8
+ vadduwm 0, 0, 7
+ vadduwm 15, 15, 30
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 9
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ xxlor 5, 25, 25
+ xxlor 2, 58, 58
+ xxland 56, 56, 10
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 37, 59, 37
+ xxlxor 36, 35, 36
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vrlw 4, 4, 12
+ vmr 22, 26
+ vadduwm 0, 0, 26
+ xxlor 58, 5, 5
+ vadduwm 17, 17, 25
+ vadduwm 28, 28, 18
+ vadduwm 15, 15, 26
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 6
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ xxlor 7, 24, 24
+ xxlor 8, 57, 57
+ xxland 56, 56, 31
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 36, 35, 36
+ xxlxor 37, 59, 37
+ vrlw 4, 4, 10
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ xxlor 57, 7, 7
+ vadduwm 17, 17, 20
+ vadduwm 28, 28, 13
+ vadduwm 0, 0, 14
+ vadduwm 15, 15, 25
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ vperm 24, 24, 24, 9
+ xxlor 5, 52, 52
+ xxlor 23, 45, 45
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ xxland 56, 56, 10
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 4, 4, 12
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ xxlor 52, 6, 6
+ vadduwm 28, 28, 8
+ vmr 13, 8
+ xxlor 40, 3, 3
+ vadduwm 17, 17, 20
+ vadduwm 0, 0, 8
+ vadduwm 15, 15, 22
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vperm 24, 24, 24, 6
+ xxlor 25, 39, 39
+ vmr 7, 30
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ xxland 56, 56, 31
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vrlw 4, 4, 10
+ vmr 30, 18
+ xxlor 24, 46, 46
+ xxlor 46, 25, 25
+ xxlor 50, 8, 8
+ vadduwm 17, 17, 23
+ vadduwm 28, 28, 14
+ vadduwm 0, 0, 18
+ vadduwm 15, 15, 26
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 9
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ xxlor 6, 58, 58
+ xxlor 58, 4, 4
+ xxland 56, 56, 10
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 37, 59, 37
+ xxlxor 36, 35, 36
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vrlw 4, 4, 12
+ vadduwm 17, 17, 30
+ vadduwm 28, 28, 26
+ vadduwm 0, 0, 7
+ vadduwm 15, 15, 21
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 6
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ xxlor 40, 23, 23
+ vadduwm 13, 28, 13
+ vadduwm 8, 17, 8
+ xxland 49, 56, 31
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ vadduwm 31, 17, 31
+ vadduwm 16, 29, 16
+ vadduwm 28, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 36, 35, 36
+ xxlxor 37, 60, 37
+ vrlw 4, 4, 10
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ xxlor 2, 55, 55
+ vmr 23, 30
+ xxlor 62, 24, 24
+ vadduwm 0, 0, 22
+ vadduwm 15, 15, 30
+ vadduwm 8, 4, 8
+ vadduwm 13, 1, 13
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 40, 61
+ xxlxor 51, 45, 51
+ xxlxor 34, 32, 34
+ xxlxor 49, 47, 49
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ vperm 17, 17, 17, 9
+ vadduwm 13, 13, 14
+ xxlor 46, 5, 5
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ xxland 49, 49, 10
+ vadduwm 28, 29, 28
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 17, 16
+ xxlxor 36, 60, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 4, 4, 12
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vadduwm 8, 8, 25
+ vadduwm 0, 0, 14
+ vadduwm 15, 15, 7
+ vadduwm 8, 4, 8
+ vadduwm 13, 1, 13
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 62, 40, 61
+ xxlxor 51, 45, 51
+ xxlxor 34, 32, 34
+ xxlxor 49, 47, 49
+ vperm 30, 30, 30, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vperm 17, 17, 17, 6
+ vadduwm 29, 8, 20
+ vadduwm 8, 13, 18
+ xxland 45, 62, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ xxland 49, 49, 31
+ vadduwm 30, 13, 28
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 17, 16
+ xxlxor 36, 62, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vrlw 4, 4, 10
+ vadduwm 0, 0, 23
+ vadduwm 7, 15, 21
+ vadduwm 29, 1, 29
+ vadduwm 8, 11, 8
+ vadduwm 0, 5, 0
+ vadduwm 7, 4, 7
+ xxlxor 47, 61, 49
+ xxlxor 45, 40, 45
+ xxlxor 49, 32, 51
+ xxlxor 34, 39, 34
+ vperm 15, 15, 15, 9
+ vperm 13, 13, 13, 9
+ vperm 17, 17, 17, 9
+ vperm 2, 2, 2, 9
+ xxlor 46, 3, 3
+ vadduwm 9, 29, 26
+ vadduwm 8, 8, 14
+ xxland 46, 47, 10
+ xxland 45, 45, 10
+ xxland 47, 49, 10
+ xxland 34, 34, 10
+ vadduwm 17, 14, 31
+ vadduwm 16, 13, 16
+ vadduwm 18, 15, 30
+ vadduwm 3, 2, 3
+ xxlxor 33, 49, 33
+ xxlxor 43, 48, 43
+ xxlxor 37, 50, 37
+ xxlxor 36, 35, 36
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vrlw 4, 4, 12
+ xxlor 44, 6, 6
+ xxlor 0, 10, 10
+ vadduwm 0, 0, 12
+ xxlor 44, 2, 2
+ vadduwm 9, 1, 9
+ vadduwm 7, 7, 12
+ vadduwm 8, 11, 8
+ vadduwm 7, 4, 7
+ vadduwm 0, 5, 0
+ xxlxor 34, 39, 34
+ xxlxor 44, 32, 47
+ vperm 2, 2, 2, 6
+ xxlxor 46, 41, 46
+ xxlxor 45, 40, 45
+ vperm 12, 12, 12, 6
+ vperm 14, 14, 14, 6
+ vperm 13, 13, 13, 6
+ xxland 34, 34, 31
+ xxlor 1, 31, 31
+ vadduwm 3, 2, 3
+ xxland 44, 44, 31
+ xxlxor 36, 35, 36
+ xxlxor 51, 35, 40
+ xxland 35, 46, 31
+ xxland 38, 45, 31
+ vadduwm 15, 12, 18
+ vadduwm 8, 3, 17
+ vadduwm 13, 6, 16
+ xxlxor 37, 47, 37
+ xxlxor 33, 40, 33
+ xxlxor 43, 45, 43
+ vrlw 4, 4, 10
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ xxlxor 47, 47, 41
+ xxlxor 40, 40, 32
+ xxlxor 39, 45, 39
+ xxlxor 50, 36, 38
+ xxlxor 63, 33, 44
+ xxlxor 43, 43, 34
+ xxlxor 41, 37, 35
+ bne 0, .LBB3_2
+.LBB3_5:
+ vmrglw 2, 19, 15
+ li 3, 32
+ li 4, 48
+ vmrglw 4, 7, 8
+ vmrglw 0, 31, 18
+ vmrglw 1, 9, 11
+ vmrghw 3, 19, 15
+ vmrghw 5, 7, 8
+ vmrghw 6, 31, 18
+ vmrghw 7, 9, 11
+ xxmrgld 40, 36, 34
+ xxmrghd 34, 36, 34
+ xxmrgld 41, 33, 32
+ xxswapd 0, 40
+ xxmrgld 36, 37, 35
+ xxmrghd 35, 37, 35
+ xxmrghd 37, 33, 32
+ xxswapd 1, 41
+ xxmrgld 32, 39, 38
+ xxmrghd 33, 39, 38
+ xxswapd 2, 34
+ xxswapd 4, 36
+ xxswapd 3, 37
+ stxvd2x 0, 0, 5
+ xxswapd 5, 32
+ stxvd2x 1, 5, 11
+ xxswapd 0, 35
+ xxswapd 1, 33
+ stxvd2x 2, 5, 3
+ li 3, 64
+ stxvd2x 3, 5, 4
+ li 4, 80
+ stxvd2x 4, 5, 3
+ li 3, 96
+ stxvd2x 5, 5, 4
+ li 4, 112
+ stxvd2x 0, 5, 3
+ stxvd2x 1, 5, 4
+ li 3, 224
+ lxvd2x 63, 1, 3
+ li 3, 208
+ lfd 31, 408(1)
+ ld 30, 304(1)
+ ld 29, 296(1)
+ lxvd2x 62, 1, 3
+ li 3, 192
+ lfd 30, 400(1)
+ ld 28, 288(1)
+ ld 27, 280(1)
+ lxvd2x 61, 1, 3
+ li 3, 176
+ lfd 29, 392(1)
+ ld 26, 272(1)
+ ld 25, 264(1)
+ lxvd2x 60, 1, 3
+ li 3, 160
+ lfd 28, 384(1)
+ ld 24, 256(1)
+ ld 23, 248(1)
+ lxvd2x 59, 1, 3
+ li 3, 144
+ lfd 27, 376(1)
+ ld 22, 240(1)
+ lxvd2x 58, 1, 3
+ li 3, 128
+ lfd 26, 368(1)
+ lxvd2x 57, 1, 3
+ li 3, 112
+ lfd 25, 360(1)
+ lxvd2x 56, 1, 3
+ li 3, 96
+ lfd 24, 352(1)
+ lxvd2x 55, 1, 3
+ li 3, 80
+ lfd 23, 344(1)
+ lxvd2x 54, 1, 3
+ li 3, 64
+ lfd 22, 336(1)
+ lxvd2x 53, 1, 3
+ li 3, 48
+ lfd 21, 328(1)
+ lxvd2x 52, 1, 3
+ lfd 20, 320(1)
+ addi 1, 1, 416
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end3:
+ .size blake3_hash4_sse41, .Lfunc_end3-.Lfunc_begin3
+ .cfi_endproc
+ .section ".note.GNU-stack","",@progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S
new file mode 100644
index 000000000000..b15d8fc7744e
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S
@@ -0,0 +1,1845 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#if defined(HAVE_AVX2)
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
+#if __has_include(<cet.h>)
+#include <cet.h>
+#endif
+#endif
+
+#if !defined(_CET_ENDBR)
+#define _CET_ENDBR
+#endif
+
+.intel_syntax noprefix
+.global zfs_blake3_hash_many_avx2
+.text
+
+.type zfs_blake3_hash_many_avx2,@function
+.p2align 6
+zfs_blake3_hash_many_avx2:
+ _CET_ENDBR
+ push r15
+ push r14
+ push r13
+ push r12
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 680
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ neg r9d
+ vmovd xmm0, r9d
+ vpbroadcastd ymm0, xmm0
+ vmovdqa ymmword ptr [rsp+0x280], ymm0
+ vpand ymm1, ymm0, ymmword ptr [ADD0+rip]
+ vpand ymm2, ymm0, ymmword ptr [ADD1+rip]
+ vmovdqa ymmword ptr [rsp+0x220], ymm2
+ vmovd xmm2, r8d
+ vpbroadcastd ymm2, xmm2
+ vpaddd ymm2, ymm2, ymm1
+ vmovdqa ymmword ptr [rsp+0x240], ymm2
+ vpxor ymm1, ymm1, ymmword ptr [CMP_MSB_MASK+rip]
+ vpxor ymm2, ymm2, ymmword ptr [CMP_MSB_MASK+rip]
+ vpcmpgtd ymm2, ymm1, ymm2
+ shr r8, 32
+ vmovd xmm3, r8d
+ vpbroadcastd ymm3, xmm3
+ vpsubd ymm3, ymm3, ymm2
+ vmovdqa ymmword ptr [rsp+0x260], ymm3
+ shl rdx, 6
+ mov qword ptr [rsp+0x2A0], rdx
+ cmp rsi, 8
+ jc 3f
+2:
+ vpbroadcastd ymm0, dword ptr [rcx]
+ vpbroadcastd ymm1, dword ptr [rcx+0x4]
+ vpbroadcastd ymm2, dword ptr [rcx+0x8]
+ vpbroadcastd ymm3, dword ptr [rcx+0xC]
+ vpbroadcastd ymm4, dword ptr [rcx+0x10]
+ vpbroadcastd ymm5, dword ptr [rcx+0x14]
+ vpbroadcastd ymm6, dword ptr [rcx+0x18]
+ vpbroadcastd ymm7, dword ptr [rcx+0x1C]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x20]
+ mov r13, qword ptr [rdi+0x28]
+ mov r14, qword ptr [rdi+0x30]
+ mov r15, qword ptr [rdi+0x38]
+ movzx eax, byte ptr [rbp+0x38]
+ movzx ebx, byte ptr [rbp+0x40]
+ or eax, ebx
+ xor edx, edx
+.p2align 5
+9:
+ movzx ebx, byte ptr [rbp+0x48]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+0x2A0]
+ cmove eax, ebx
+ mov dword ptr [rsp+0x200], eax
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x40]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x40]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x40]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0x20], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0x40], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0x60], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x30]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x30]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+0x80], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0xA0], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0xC0], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0xE0], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x20]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x20]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x20]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+0x100], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0x120], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0x140], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0x160], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x10]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x10]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+0x180], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0x1A0], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0x1C0], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0x1E0], ymm11
+ vpbroadcastd ymm15, dword ptr [rsp+0x200]
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r12+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r13+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r14+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ prefetcht0 [r15+rdx+0x80]
+ vpaddd ymm0, ymm0, ymmword ptr [rsp]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm0, ymmword ptr [rsp+0x240]
+ vpxor ymm13, ymm1, ymmword ptr [rsp+0x260]
+ vpxor ymm14, ymm2, ymmword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpxor ymm15, ymm3, ymm15
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [BLAKE3_IV_0+rip]
+ vpaddd ymm9, ymm13, ymmword ptr [BLAKE3_IV_1+rip]
+ vpaddd ymm10, ymm14, ymmword ptr [BLAKE3_IV_2+rip]
+ vpaddd ymm11, ymm15, ymmword ptr [BLAKE3_IV_3+rip]
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x100]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xE0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x160]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xA0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x180]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x140]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xC0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vpxor ymm0, ymm0, ymm8
+ vpxor ymm1, ymm1, ymm9
+ vpxor ymm2, ymm2, ymm10
+ vpxor ymm3, ymm3, ymm11
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpxor ymm4, ymm4, ymm12
+ vpxor ymm5, ymm5, ymm13
+ vpxor ymm6, ymm6, ymm14
+ vpxor ymm7, ymm7, ymm15
+ movzx eax, byte ptr [rbp+0x38]
+ jne 9b
+ mov rbx, qword ptr [rbp+0x50]
+ vunpcklps ymm8, ymm0, ymm1
+ vunpcklps ymm9, ymm2, ymm3
+ vunpckhps ymm10, ymm0, ymm1
+ vunpcklps ymm11, ymm4, ymm5
+ vunpcklps ymm0, ymm6, ymm7
+ vshufps ymm12, ymm8, ymm9, 78
+ vblendps ymm1, ymm8, ymm12, 0xCC
+ vshufps ymm8, ymm11, ymm0, 78
+ vunpckhps ymm13, ymm2, ymm3
+ vblendps ymm2, ymm11, ymm8, 0xCC
+ vblendps ymm3, ymm12, ymm9, 0xCC
+ vperm2f128 ymm12, ymm1, ymm2, 0x20
+ vmovups ymmword ptr [rbx], ymm12
+ vunpckhps ymm14, ymm4, ymm5
+ vblendps ymm4, ymm8, ymm0, 0xCC
+ vunpckhps ymm15, ymm6, ymm7
+ vperm2f128 ymm7, ymm3, ymm4, 0x20
+ vmovups ymmword ptr [rbx+0x20], ymm7
+ vshufps ymm5, ymm10, ymm13, 78
+ vblendps ymm6, ymm5, ymm13, 0xCC
+ vshufps ymm13, ymm14, ymm15, 78
+ vblendps ymm10, ymm10, ymm5, 0xCC
+ vblendps ymm14, ymm14, ymm13, 0xCC
+ vperm2f128 ymm8, ymm10, ymm14, 0x20
+ vmovups ymmword ptr [rbx+0x40], ymm8
+ vblendps ymm15, ymm13, ymm15, 0xCC
+ vperm2f128 ymm13, ymm6, ymm15, 0x20
+ vmovups ymmword ptr [rbx+0x60], ymm13
+ vperm2f128 ymm9, ymm1, ymm2, 0x31
+ vperm2f128 ymm11, ymm3, ymm4, 0x31
+ vmovups ymmword ptr [rbx+0x80], ymm9
+ vperm2f128 ymm14, ymm10, ymm14, 0x31
+ vperm2f128 ymm15, ymm6, ymm15, 0x31
+ vmovups ymmword ptr [rbx+0xA0], ymm11
+ vmovups ymmword ptr [rbx+0xC0], ymm14
+ vmovups ymmword ptr [rbx+0xE0], ymm15
+ vmovdqa ymm0, ymmword ptr [rsp+0x220]
+ vpaddd ymm1, ymm0, ymmword ptr [rsp+0x240]
+ vmovdqa ymmword ptr [rsp+0x240], ymm1
+ vpxor ymm0, ymm0, ymmword ptr [CMP_MSB_MASK+rip]
+ vpxor ymm2, ymm1, ymmword ptr [CMP_MSB_MASK+rip]
+ vpcmpgtd ymm2, ymm0, ymm2
+ vmovdqa ymm0, ymmword ptr [rsp+0x260]
+ vpsubd ymm2, ymm0, ymm2
+ vmovdqa ymmword ptr [rsp+0x260], ymm2
+ add rdi, 64
+ add rbx, 256
+ mov qword ptr [rbp+0x50], rbx
+ sub rsi, 8
+ cmp rsi, 8
+ jnc 2b
+ test rsi, rsi
+ jnz 3f
+4:
+ vzeroupper
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+.p2align 5
+3:
+ mov rbx, qword ptr [rbp+0x50]
+ mov r15, qword ptr [rsp+0x2A0]
+ movzx r13d, byte ptr [rbp+0x38]
+ movzx r12d, byte ptr [rbp+0x48]
+ test rsi, 0x4
+ je 3f
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]
+ vmovdqa ymm8, ymm0
+ vmovdqa ymm9, ymm1
+ vbroadcasti128 ymm12, xmmword ptr [rsp+0x240]
+ vbroadcasti128 ymm13, xmmword ptr [rsp+0x260]
+ vpunpckldq ymm14, ymm12, ymm13
+ vpunpckhdq ymm15, ymm12, ymm13
+ vpermq ymm14, ymm14, 0x50
+ vpermq ymm15, ymm15, 0x50
+ vbroadcasti128 ymm12, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpblendd ymm14, ymm14, ymm12, 0x44
+ vpblendd ymm15, ymm15, ymm12, 0x44
+ vmovdqa ymmword ptr [rsp], ymm14
+ vmovdqa ymmword ptr [rsp+0x20], ymm15
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x200], eax
+ vmovups ymm2, ymmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x40], 0x01
+ vmovups ymm3, ymmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x30], 0x01
+ vshufps ymm4, ymm2, ymm3, 136
+ vshufps ymm5, ymm2, ymm3, 221
+ vmovups ymm2, ymmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x20], 0x01
+ vmovups ymm3, ymmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x10], 0x01
+ vshufps ymm6, ymm2, ymm3, 136
+ vshufps ymm7, ymm2, ymm3, 221
+ vpshufd ymm6, ymm6, 0x93
+ vpshufd ymm7, ymm7, 0x93
+ vmovups ymm10, ymmword ptr [r10+rdx-0x40]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x40], 0x01
+ vmovups ymm11, ymmword ptr [r10+rdx-0x30]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x30], 0x01
+ vshufps ymm12, ymm10, ymm11, 136
+ vshufps ymm13, ymm10, ymm11, 221
+ vmovups ymm10, ymmword ptr [r10+rdx-0x20]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x20], 0x01
+ vmovups ymm11, ymmword ptr [r10+rdx-0x10]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x10], 0x01
+ vshufps ymm14, ymm10, ymm11, 136
+ vshufps ymm15, ymm10, ymm11, 221
+ vpshufd ymm14, ymm14, 0x93
+ vpshufd ymm15, ymm15, 0x93
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ vpbroadcastd ymm2, dword ptr [rsp+0x200]
+ vmovdqa ymm3, ymmword ptr [rsp]
+ vmovdqa ymm11, ymmword ptr [rsp+0x20]
+ vpblendd ymm3, ymm3, ymm2, 0x88
+ vpblendd ymm11, ymm11, ymm2, 0x88
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovdqa ymm10, ymm2
+ mov al, 7
+9:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm8, ymm8, ymm12
+ vmovdqa ymmword ptr [rsp+0x40], ymm4
+ nop
+ vmovdqa ymmword ptr [rsp+0x60], ymm12
+ nop
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT16+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 12
+ vpslld ymm9, ymm9, 20
+ vpor ymm9, ymm9, ymm4
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vmovdqa ymmword ptr [rsp+0x80], ymm5
+ vmovdqa ymmword ptr [rsp+0xA0], ymm13
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT8+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 7
+ vpslld ymm9, ymm9, 25
+ vpor ymm9, ymm9, ymm4
+ vpshufd ymm0, ymm0, 0x93
+ vpshufd ymm8, ymm8, 0x93
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm11, ymm11, 0x4E
+ vpshufd ymm2, ymm2, 0x39
+ vpshufd ymm10, ymm10, 0x39
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm8, ymm8, ymm14
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT16+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 12
+ vpslld ymm9, ymm9, 20
+ vpor ymm9, ymm9, ymm4
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm8, ymm8, ymm15
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT8+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 7
+ vpslld ymm9, ymm9, 25
+ vpor ymm9, ymm9, ymm4
+ vpshufd ymm0, ymm0, 0x39
+ vpshufd ymm8, ymm8, 0x39
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm11, ymm11, 0x4E
+ vpshufd ymm2, ymm2, 0x93
+ vpshufd ymm10, ymm10, 0x93
+ dec al
+ je 9f
+ vmovdqa ymm4, ymmword ptr [rsp+0x40]
+ vmovdqa ymm5, ymmword ptr [rsp+0x80]
+ vshufps ymm12, ymm4, ymm5, 214
+ vpshufd ymm13, ymm4, 0x0F
+ vpshufd ymm4, ymm12, 0x39
+ vshufps ymm12, ymm6, ymm7, 250
+ vpblendd ymm13, ymm13, ymm12, 0xAA
+ vpunpcklqdq ymm12, ymm7, ymm5
+ vpblendd ymm12, ymm12, ymm6, 0x88
+ vpshufd ymm12, ymm12, 0x78
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 0x1E
+ vmovdqa ymmword ptr [rsp+0x40], ymm13
+ vmovdqa ymmword ptr [rsp+0x80], ymm12
+ vmovdqa ymm12, ymmword ptr [rsp+0x60]
+ vmovdqa ymm13, ymmword ptr [rsp+0xA0]
+ vshufps ymm5, ymm12, ymm13, 214
+ vpshufd ymm6, ymm12, 0x0F
+ vpshufd ymm12, ymm5, 0x39
+ vshufps ymm5, ymm14, ymm15, 250
+ vpblendd ymm6, ymm6, ymm5, 0xAA
+ vpunpcklqdq ymm5, ymm15, ymm13
+ vpblendd ymm5, ymm5, ymm14, 0x88
+ vpshufd ymm5, ymm5, 0x78
+ vpunpckhdq ymm13, ymm13, ymm15
+ vpunpckldq ymm14, ymm14, ymm13
+ vpshufd ymm15, ymm14, 0x1E
+ vmovdqa ymm13, ymm6
+ vmovdqa ymm14, ymm5
+ vmovdqa ymm5, ymmword ptr [rsp+0x40]
+ vmovdqa ymm6, ymmword ptr [rsp+0x80]
+ jmp 9b
+9:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ vpxor ymm8, ymm8, ymm10
+ vpxor ymm9, ymm9, ymm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vmovdqu xmmword ptr [rbx+0x40], xmm8
+ vmovdqu xmmword ptr [rbx+0x50], xmm9
+ vextracti128 xmmword ptr [rbx+0x60], ymm8, 0x01
+ vextracti128 xmmword ptr [rbx+0x70], ymm9, 0x01
+ vmovaps xmm8, xmmword ptr [rsp+0x280]
+ vmovaps xmm0, xmmword ptr [rsp+0x240]
+ vmovaps xmm1, xmmword ptr [rsp+0x250]
+ vmovaps xmm2, xmmword ptr [rsp+0x260]
+ vmovaps xmm3, xmmword ptr [rsp+0x270]
+ vblendvps xmm0, xmm0, xmm1, xmm8
+ vblendvps xmm2, xmm2, xmm3, xmm8
+ vmovaps xmmword ptr [rsp+0x240], xmm0
+ vmovaps xmmword ptr [rsp+0x260], xmm2
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+3:
+ test rsi, 0x2
+ je 3f
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]
+ vmovd xmm13, dword ptr [rsp+0x240]
+ vpinsrd xmm13, xmm13, dword ptr [rsp+0x260], 1
+ vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovd xmm14, dword ptr [rsp+0x244]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+0x264], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vinserti128 ymm13, ymm13, xmm14, 0x01
+ vbroadcasti128 ymm14, xmmword ptr [ROT16+rip]
+ vbroadcasti128 ymm15, xmmword ptr [ROT8+rip]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x200], eax
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
+ vpbroadcastd ymm8, dword ptr [rsp+0x200]
+ vpblendd ymm3, ymm13, ymm8, 0x88
+ vmovups ymm8, ymmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01
+ vshufps ymm4, ymm8, ymm9, 136
+ vshufps ymm5, ymm8, ymm9, 221
+ vmovups ymm8, ymmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01
+ vshufps ymm6, ymm8, ymm9, 136
+ vshufps ymm7, ymm8, ymm9, 221
+ vpshufd ymm6, ymm6, 0x93
+ vpshufd ymm7, ymm7, 0x93
+ mov al, 7
+9:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm14
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm8
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm15
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm8
+ vpshufd ymm0, ymm0, 0x93
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x39
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm14
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm8
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm15
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm8
+ vpshufd ymm0, ymm0, 0x39
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x93
+ dec al
+ jz 9f
+ vshufps ymm8, ymm4, ymm5, 214
+ vpshufd ymm9, ymm4, 0x0F
+ vpshufd ymm4, ymm8, 0x39
+ vshufps ymm8, ymm6, ymm7, 250
+ vpblendd ymm9, ymm9, ymm8, 0xAA
+ vpunpcklqdq ymm8, ymm7, ymm5
+ vpblendd ymm8, ymm8, ymm6, 0x88
+ vpshufd ymm8, ymm8, 0x78
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 0x1E
+ vmovdqa ymm5, ymm9
+ vmovdqa ymm6, ymm8
+ jmp 9b
+9:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vmovaps ymm8, ymmword ptr [rsp+0x280]
+ vmovaps ymm0, ymmword ptr [rsp+0x240]
+ vmovups ymm1, ymmword ptr [rsp+0x248]
+ vmovaps ymm2, ymmword ptr [rsp+0x260]
+ vmovups ymm3, ymmword ptr [rsp+0x268]
+ vblendvps ymm0, ymm0, ymm1, ymm8
+ vblendvps ymm2, ymm2, ymm3, ymm8
+ vmovaps ymmword ptr [rsp+0x240], ymm0
+ vmovaps ymmword ptr [rsp+0x260], ymm2
+ add rbx, 64
+ add rdi, 16
+ sub rsi, 2
+3:
+ test rsi, 0x1
+ je 4b
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+0x10]
+ vmovd xmm3, dword ptr [rsp+0x240]
+ vpinsrd xmm3, xmm3, dword ptr [rsp+0x260], 1
+ vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovdqa xmm14, xmmword ptr [ROT16+rip]
+ vmovdqa xmm15, xmmword ptr [ROT8+rip]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ vmovdqa xmm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovdqa xmm3, xmm13
+ vpinsrd xmm3, xmm3, eax, 3
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x30]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x10]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm14
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 12
+ vpslld xmm1, xmm1, 20
+ vpor xmm1, xmm1, xmm8
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm15
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 7
+ vpslld xmm1, xmm1, 25
+ vpor xmm1, xmm1, xmm8
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm14
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 12
+ vpslld xmm1, xmm1, 20
+ vpor xmm1, xmm1, xmm8
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm15
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 7
+ vpslld xmm1, xmm1, 25
+ vpor xmm1, xmm1, xmm8
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+
+.size zfs_blake3_hash_many_avx2, . - zfs_blake3_hash_many_avx2
+
+#ifdef __APPLE__
+.static_data
+#else
+.section .rodata
+#endif
+
+.p2align 6
+ADD0:
+ .long 0, 1, 2, 3, 4, 5, 6, 7
+ADD1:
+ .long 8, 8, 8, 8, 8, 8, 8, 8
+BLAKE3_IV_0:
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+BLAKE3_BLOCK_LEN:
+ .long 0x00000040, 0x00000040, 0x00000040, 0x00000040
+ .long 0x00000040, 0x00000040, 0x00000040, 0x00000040
+ROT16:
+ .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+ROT8:
+ .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+CMP_MSB_MASK:
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+BLAKE3_IV:
+ .long 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A
+#endif /* HAVE_AVX2 */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S
new file mode 100644
index 000000000000..d02c5e7ec92f
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S
@@ -0,0 +1,2618 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#if defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
+#if __has_include(<cet.h>)
+#include <cet.h>
+#endif
+#endif
+
+#if !defined(_CET_ENDBR)
+#define _CET_ENDBR
+#endif
+
+.intel_syntax noprefix
+.global zfs_blake3_hash_many_avx512
+.global zfs_blake3_compress_in_place_avx512
+.global zfs_blake3_compress_xof_avx512
+.text
+
+.type zfs_blake3_hash_many_avx512,@function
+.type zfs_blake3_compress_xof_avx512,@function
+.type zfs_blake3_compress_in_place_avx512,@function
+
+.p2align 6
+zfs_blake3_hash_many_avx512:
+ _CET_ENDBR
+ push r15
+ push r14
+ push r13
+ push r12
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 144
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ neg r9
+ kmovw k1, r9d
+ vmovd xmm0, r8d
+ vpbroadcastd ymm0, xmm0
+ shr r8, 32
+ vmovd xmm1, r8d
+ vpbroadcastd ymm1, xmm1
+ vmovdqa ymm4, ymm1
+ vmovdqa ymm5, ymm1
+ vpaddd ymm2, ymm0, ymmword ptr [ADD0+rip]
+ vpaddd ymm3, ymm0, ymmword ptr [ADD0+32+rip]
+ vpcmpltud k2, ymm2, ymm0
+ vpcmpltud k3, ymm3, ymm0
+ vpaddd ymm4 {k2}, ymm4, dword ptr [ADD1+rip] {1to8}
+ vpaddd ymm5 {k3}, ymm5, dword ptr [ADD1+rip] {1to8}
+ knotw k2, k1
+ vmovdqa32 ymm2 {k2}, ymm0
+ vmovdqa32 ymm3 {k2}, ymm0
+ vmovdqa32 ymm4 {k2}, ymm1
+ vmovdqa32 ymm5 {k2}, ymm1
+ vmovdqa ymmword ptr [rsp], ymm2
+ vmovdqa ymmword ptr [rsp+0x1*0x20], ymm3
+ vmovdqa ymmword ptr [rsp+0x2*0x20], ymm4
+ vmovdqa ymmword ptr [rsp+0x3*0x20], ymm5
+ shl rdx, 6
+ mov qword ptr [rsp+0x80], rdx
+ cmp rsi, 16
+ jc 3f
+2:
+ vpbroadcastd zmm0, dword ptr [rcx]
+ vpbroadcastd zmm1, dword ptr [rcx+0x1*0x4]
+ vpbroadcastd zmm2, dword ptr [rcx+0x2*0x4]
+ vpbroadcastd zmm3, dword ptr [rcx+0x3*0x4]
+ vpbroadcastd zmm4, dword ptr [rcx+0x4*0x4]
+ vpbroadcastd zmm5, dword ptr [rcx+0x5*0x4]
+ vpbroadcastd zmm6, dword ptr [rcx+0x6*0x4]
+ vpbroadcastd zmm7, dword ptr [rcx+0x7*0x4]
+ movzx eax, byte ptr [rbp+0x38]
+ movzx ebx, byte ptr [rbp+0x40]
+ or eax, ebx
+ xor edx, edx
+.p2align 5
+9:
+ movzx ebx, byte ptr [rbp+0x48]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+0x80]
+ cmove eax, ebx
+ mov dword ptr [rsp+0x88], eax
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x40]
+ mov r13, qword ptr [rdi+0x48]
+ mov r14, qword ptr [rdi+0x50]
+ mov r15, qword ptr [rdi+0x58]
+ vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
+ vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
+ vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
+ vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
+ vpunpcklqdq zmm8, zmm16, zmm17
+ vpunpckhqdq zmm9, zmm16, zmm17
+ vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
+ vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
+ vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
+ vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
+ vpunpcklqdq zmm10, zmm18, zmm19
+ vpunpckhqdq zmm11, zmm18, zmm19
+ mov r8, qword ptr [rdi+0x20]
+ mov r9, qword ptr [rdi+0x28]
+ mov r10, qword ptr [rdi+0x30]
+ mov r11, qword ptr [rdi+0x38]
+ mov r12, qword ptr [rdi+0x60]
+ mov r13, qword ptr [rdi+0x68]
+ mov r14, qword ptr [rdi+0x70]
+ mov r15, qword ptr [rdi+0x78]
+ vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
+ vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
+ vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
+ vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
+ vpunpcklqdq zmm12, zmm16, zmm17
+ vpunpckhqdq zmm13, zmm16, zmm17
+ vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
+ vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
+ vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
+ vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
+ vpunpcklqdq zmm14, zmm18, zmm19
+ vpunpckhqdq zmm15, zmm18, zmm19
+ vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
+ vmovdqa32 zmm31, zmmword ptr [INDEX1+rip]
+ vshufps zmm16, zmm8, zmm10, 136
+ vshufps zmm17, zmm12, zmm14, 136
+ vmovdqa32 zmm20, zmm16
+ vpermt2d zmm16, zmm27, zmm17
+ vpermt2d zmm20, zmm31, zmm17
+ vshufps zmm17, zmm8, zmm10, 221
+ vshufps zmm30, zmm12, zmm14, 221
+ vmovdqa32 zmm21, zmm17
+ vpermt2d zmm17, zmm27, zmm30
+ vpermt2d zmm21, zmm31, zmm30
+ vshufps zmm18, zmm9, zmm11, 136
+ vshufps zmm8, zmm13, zmm15, 136
+ vmovdqa32 zmm22, zmm18
+ vpermt2d zmm18, zmm27, zmm8
+ vpermt2d zmm22, zmm31, zmm8
+ vshufps zmm19, zmm9, zmm11, 221
+ vshufps zmm8, zmm13, zmm15, 221
+ vmovdqa32 zmm23, zmm19
+ vpermt2d zmm19, zmm27, zmm8
+ vpermt2d zmm23, zmm31, zmm8
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x40]
+ mov r13, qword ptr [rdi+0x48]
+ mov r14, qword ptr [rdi+0x50]
+ mov r15, qword ptr [rdi+0x58]
+ vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
+ vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
+ vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm8, zmm24, zmm25
+ vpunpckhqdq zmm9, zmm24, zmm25
+ vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
+ vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
+ vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm10, zmm24, zmm25
+ vpunpckhqdq zmm11, zmm24, zmm25
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r12+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r13+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r14+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ prefetcht0 [r15+rdx+0x80]
+ mov r8, qword ptr [rdi+0x20]
+ mov r9, qword ptr [rdi+0x28]
+ mov r10, qword ptr [rdi+0x30]
+ mov r11, qword ptr [rdi+0x38]
+ mov r12, qword ptr [rdi+0x60]
+ mov r13, qword ptr [rdi+0x68]
+ mov r14, qword ptr [rdi+0x70]
+ mov r15, qword ptr [rdi+0x78]
+ vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
+ vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
+ vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm12, zmm24, zmm25
+ vpunpckhqdq zmm13, zmm24, zmm25
+ vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
+ vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
+ vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm14, zmm24, zmm25
+ vpunpckhqdq zmm15, zmm24, zmm25
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r12+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r13+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r14+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ prefetcht0 [r15+rdx+0x80]
+ vshufps zmm24, zmm8, zmm10, 136
+ vshufps zmm30, zmm12, zmm14, 136
+ vmovdqa32 zmm28, zmm24
+ vpermt2d zmm24, zmm27, zmm30
+ vpermt2d zmm28, zmm31, zmm30
+ vshufps zmm25, zmm8, zmm10, 221
+ vshufps zmm30, zmm12, zmm14, 221
+ vmovdqa32 zmm29, zmm25
+ vpermt2d zmm25, zmm27, zmm30
+ vpermt2d zmm29, zmm31, zmm30
+ vshufps zmm26, zmm9, zmm11, 136
+ vshufps zmm8, zmm13, zmm15, 136
+ vmovdqa32 zmm30, zmm26
+ vpermt2d zmm26, zmm27, zmm8
+ vpermt2d zmm30, zmm31, zmm8
+ vshufps zmm8, zmm9, zmm11, 221
+ vshufps zmm10, zmm13, zmm15, 221
+ vpermi2d zmm27, zmm8, zmm10
+ vpermi2d zmm31, zmm8, zmm10
+ vpbroadcastd zmm8, dword ptr [BLAKE3_IV_0+rip]
+ vpbroadcastd zmm9, dword ptr [BLAKE3_IV_1+rip]
+ vpbroadcastd zmm10, dword ptr [BLAKE3_IV_2+rip]
+ vpbroadcastd zmm11, dword ptr [BLAKE3_IV_3+rip]
+ vmovdqa32 zmm12, zmmword ptr [rsp]
+ vmovdqa32 zmm13, zmmword ptr [rsp+0x1*0x40]
+ vpbroadcastd zmm14, dword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpbroadcastd zmm15, dword ptr [rsp+0x22*0x4]
+ vpaddd zmm0, zmm0, zmm16
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm20
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm17
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm21
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm24
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm28
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm25
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm29
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm18
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm23
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm22
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm16
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm17
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm25
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm27
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm30
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm19
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm29
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm20
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm18
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm22
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm27
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm21
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm31
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm26
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm30
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm23
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm19
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm20
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm21
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm16
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm24
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm28
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm31
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm29
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm26
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm23
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm16
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm18
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm17
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm25
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm24
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm30
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm28
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm29
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm18
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm19
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm22
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm27
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm17
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm31
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm25
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm30
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm19
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm26
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm20
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpxord zmm0, zmm0, zmm8
+ vpxord zmm1, zmm1, zmm9
+ vpxord zmm2, zmm2, zmm10
+ vpxord zmm3, zmm3, zmm11
+ vpxord zmm4, zmm4, zmm12
+ vpxord zmm5, zmm5, zmm13
+ vpxord zmm6, zmm6, zmm14
+ vpxord zmm7, zmm7, zmm15
+ movzx eax, byte ptr [rbp+0x38]
+ jne 9b
+ mov rbx, qword ptr [rbp+0x50]
+ vpunpckldq zmm16, zmm0, zmm1
+ vpunpckhdq zmm17, zmm0, zmm1
+ vpunpckldq zmm18, zmm2, zmm3
+ vpunpckhdq zmm19, zmm2, zmm3
+ vpunpckldq zmm20, zmm4, zmm5
+ vpunpckhdq zmm21, zmm4, zmm5
+ vpunpckldq zmm22, zmm6, zmm7
+ vpunpckhdq zmm23, zmm6, zmm7
+ vpunpcklqdq zmm0, zmm16, zmm18
+ vpunpckhqdq zmm1, zmm16, zmm18
+ vpunpcklqdq zmm2, zmm17, zmm19
+ vpunpckhqdq zmm3, zmm17, zmm19
+ vpunpcklqdq zmm4, zmm20, zmm22
+ vpunpckhqdq zmm5, zmm20, zmm22
+ vpunpcklqdq zmm6, zmm21, zmm23
+ vpunpckhqdq zmm7, zmm21, zmm23
+ vshufi32x4 zmm16, zmm0, zmm4, 0x88
+ vshufi32x4 zmm17, zmm1, zmm5, 0x88
+ vshufi32x4 zmm18, zmm2, zmm6, 0x88
+ vshufi32x4 zmm19, zmm3, zmm7, 0x88
+ vshufi32x4 zmm20, zmm0, zmm4, 0xDD
+ vshufi32x4 zmm21, zmm1, zmm5, 0xDD
+ vshufi32x4 zmm22, zmm2, zmm6, 0xDD
+ vshufi32x4 zmm23, zmm3, zmm7, 0xDD
+ vshufi32x4 zmm0, zmm16, zmm17, 0x88
+ vshufi32x4 zmm1, zmm18, zmm19, 0x88
+ vshufi32x4 zmm2, zmm20, zmm21, 0x88
+ vshufi32x4 zmm3, zmm22, zmm23, 0x88
+ vshufi32x4 zmm4, zmm16, zmm17, 0xDD
+ vshufi32x4 zmm5, zmm18, zmm19, 0xDD
+ vshufi32x4 zmm6, zmm20, zmm21, 0xDD
+ vshufi32x4 zmm7, zmm22, zmm23, 0xDD
+ vmovdqu32 zmmword ptr [rbx], zmm0
+ vmovdqu32 zmmword ptr [rbx+0x1*0x40], zmm1
+ vmovdqu32 zmmword ptr [rbx+0x2*0x40], zmm2
+ vmovdqu32 zmmword ptr [rbx+0x3*0x40], zmm3
+ vmovdqu32 zmmword ptr [rbx+0x4*0x40], zmm4
+ vmovdqu32 zmmword ptr [rbx+0x5*0x40], zmm5
+ vmovdqu32 zmmword ptr [rbx+0x6*0x40], zmm6
+ vmovdqu32 zmmword ptr [rbx+0x7*0x40], zmm7
+ vmovdqa32 zmm0, zmmword ptr [rsp]
+ vmovdqa32 zmm1, zmmword ptr [rsp+0x1*0x40]
+ vmovdqa32 zmm2, zmm0
+ vpaddd zmm2{k1}, zmm0, dword ptr [ADD16+rip] {1to16}
+ vpcmpltud k2, zmm2, zmm0
+ vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1+rip] {1to16}
+ vmovdqa32 zmmword ptr [rsp], zmm2
+ vmovdqa32 zmmword ptr [rsp+0x1*0x40], zmm1
+ add rdi, 128
+ add rbx, 512
+ mov qword ptr [rbp+0x50], rbx
+ sub rsi, 16
+ cmp rsi, 16
+ jnc 2b
+ test rsi, rsi
+ jnz 3f
+4:
+ vzeroupper
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+.p2align 6
+3:
+ test esi, 0x8
+ je 3f
+ vpbroadcastd ymm0, dword ptr [rcx]
+ vpbroadcastd ymm1, dword ptr [rcx+0x4]
+ vpbroadcastd ymm2, dword ptr [rcx+0x8]
+ vpbroadcastd ymm3, dword ptr [rcx+0xC]
+ vpbroadcastd ymm4, dword ptr [rcx+0x10]
+ vpbroadcastd ymm5, dword ptr [rcx+0x14]
+ vpbroadcastd ymm6, dword ptr [rcx+0x18]
+ vpbroadcastd ymm7, dword ptr [rcx+0x1C]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x20]
+ mov r13, qword ptr [rdi+0x28]
+ mov r14, qword ptr [rdi+0x30]
+ mov r15, qword ptr [rdi+0x38]
+ movzx eax, byte ptr [rbp+0x38]
+ movzx ebx, byte ptr [rbp+0x40]
+ or eax, ebx
+ xor edx, edx
+2:
+ movzx ebx, byte ptr [rbp+0x48]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+0x80]
+ cmove eax, ebx
+ mov dword ptr [rsp+0x88], eax
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x40]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x40]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x40]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm16, ymm12, ymm14, 136
+ vshufps ymm17, ymm12, ymm14, 221
+ vshufps ymm18, ymm13, ymm15, 136
+ vshufps ymm19, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x30]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x30]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm20, ymm12, ymm14, 136
+ vshufps ymm21, ymm12, ymm14, 221
+ vshufps ymm22, ymm13, ymm15, 136
+ vshufps ymm23, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x20]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x20]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x20]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm24, ymm12, ymm14, 136
+ vshufps ymm25, ymm12, ymm14, 221
+ vshufps ymm26, ymm13, ymm15, 136
+ vshufps ymm27, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x10]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x10]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm28, ymm12, ymm14, 136
+ vshufps ymm29, ymm12, ymm14, 221
+ vshufps ymm30, ymm13, ymm15, 136
+ vshufps ymm31, ymm13, ymm15, 221
+ vpbroadcastd ymm8, dword ptr [BLAKE3_IV_0+rip]
+ vpbroadcastd ymm9, dword ptr [BLAKE3_IV_1+rip]
+ vpbroadcastd ymm10, dword ptr [BLAKE3_IV_2+rip]
+ vpbroadcastd ymm11, dword ptr [BLAKE3_IV_3+rip]
+ vmovdqa ymm12, ymmword ptr [rsp]
+ vmovdqa ymm13, ymmword ptr [rsp+0x40]
+ vpbroadcastd ymm14, dword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpbroadcastd ymm15, dword ptr [rsp+0x88]
+ vpaddd ymm0, ymm0, ymm16
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm20
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm17
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm21
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm24
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm28
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm25
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm29
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm18
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm23
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm22
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm16
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm17
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm25
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm27
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm30
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm19
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm29
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm20
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm18
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm22
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm27
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm21
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm31
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm26
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm30
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm23
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm19
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm20
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm21
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm16
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm24
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm28
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm31
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm29
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm26
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm23
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm16
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm18
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm17
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm25
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm24
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm30
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm28
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm29
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm18
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm19
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm22
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm27
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm17
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm31
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm25
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm30
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm19
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm26
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm20
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpxor ymm0, ymm0, ymm8
+ vpxor ymm1, ymm1, ymm9
+ vpxor ymm2, ymm2, ymm10
+ vpxor ymm3, ymm3, ymm11
+ vpxor ymm4, ymm4, ymm12
+ vpxor ymm5, ymm5, ymm13
+ vpxor ymm6, ymm6, ymm14
+ vpxor ymm7, ymm7, ymm15
+ movzx eax, byte ptr [rbp+0x38]
+ jne 2b
+ mov rbx, qword ptr [rbp+0x50]
+ vunpcklps ymm8, ymm0, ymm1
+ vunpcklps ymm9, ymm2, ymm3
+ vunpckhps ymm10, ymm0, ymm1
+ vunpcklps ymm11, ymm4, ymm5
+ vunpcklps ymm0, ymm6, ymm7
+ vshufps ymm12, ymm8, ymm9, 78
+ vblendps ymm1, ymm8, ymm12, 0xCC
+ vshufps ymm8, ymm11, ymm0, 78
+ vunpckhps ymm13, ymm2, ymm3
+ vblendps ymm2, ymm11, ymm8, 0xCC
+ vblendps ymm3, ymm12, ymm9, 0xCC
+ vperm2f128 ymm12, ymm1, ymm2, 0x20
+ vmovups ymmword ptr [rbx], ymm12
+ vunpckhps ymm14, ymm4, ymm5
+ vblendps ymm4, ymm8, ymm0, 0xCC
+ vunpckhps ymm15, ymm6, ymm7
+ vperm2f128 ymm7, ymm3, ymm4, 0x20
+ vmovups ymmword ptr [rbx+0x20], ymm7
+ vshufps ymm5, ymm10, ymm13, 78
+ vblendps ymm6, ymm5, ymm13, 0xCC
+ vshufps ymm13, ymm14, ymm15, 78
+ vblendps ymm10, ymm10, ymm5, 0xCC
+ vblendps ymm14, ymm14, ymm13, 0xCC
+ vperm2f128 ymm8, ymm10, ymm14, 0x20
+ vmovups ymmword ptr [rbx+0x40], ymm8
+ vblendps ymm15, ymm13, ymm15, 0xCC
+ vperm2f128 ymm13, ymm6, ymm15, 0x20
+ vmovups ymmword ptr [rbx+0x60], ymm13
+ vperm2f128 ymm9, ymm1, ymm2, 0x31
+ vperm2f128 ymm11, ymm3, ymm4, 0x31
+ vmovups ymmword ptr [rbx+0x80], ymm9
+ vperm2f128 ymm14, ymm10, ymm14, 0x31
+ vperm2f128 ymm15, ymm6, ymm15, 0x31
+ vmovups ymmword ptr [rbx+0xA0], ymm11
+ vmovups ymmword ptr [rbx+0xC0], ymm14
+ vmovups ymmword ptr [rbx+0xE0], ymm15
+ vmovdqa ymm0, ymmword ptr [rsp]
+ vmovdqa ymm2, ymmword ptr [rsp+0x2*0x20]
+ vmovdqa32 ymm0 {k1}, ymmword ptr [rsp+0x1*0x20]
+ vmovdqa32 ymm2 {k1}, ymmword ptr [rsp+0x3*0x20]
+ vmovdqa ymmword ptr [rsp], ymm0
+ vmovdqa ymmword ptr [rsp+0x2*0x20], ymm2
+ add rbx, 256
+ mov qword ptr [rbp+0x50], rbx
+ add rdi, 64
+ sub rsi, 8
+3:
+ mov rbx, qword ptr [rbp+0x50]
+ mov r15, qword ptr [rsp+0x80]
+ movzx r13, byte ptr [rbp+0x38]
+ movzx r12, byte ptr [rbp+0x48]
+ test esi, 0x4
+ je 3f
+ vbroadcasti32x4 zmm0, xmmword ptr [rcx]
+ vbroadcasti32x4 zmm1, xmmword ptr [rcx+0x1*0x10]
+ vmovdqa xmm12, xmmword ptr [rsp]
+ vmovdqa xmm13, xmmword ptr [rsp+0x4*0x10]
+ vpunpckldq xmm14, xmm12, xmm13
+ vpunpckhdq xmm15, xmm12, xmm13
+ vpermq ymm14, ymm14, 0xDC
+ vpermq ymm15, ymm15, 0xDC
+ vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
+ vinserti32x8 zmm13, zmm14, ymm15, 0x01
+ mov eax, 17476
+ kmovw k2, eax
+ vpblendmd zmm13 {k2}, zmm13, zmm12
+ vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV+rip]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov eax, 43690
+ kmovw k3, eax
+ mov eax, 34952
+ kmovw k4, eax
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x88], eax
+ vmovdqa32 zmm2, zmm15
+ vpbroadcastd zmm8, dword ptr [rsp+0x22*0x4]
+ vpblendmd zmm3 {k4}, zmm13, zmm8
+ vmovups zmm8, zmmword ptr [r8+rdx-0x1*0x40]
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x4*0x10], 0x01
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x4*0x10], 0x02
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x4*0x10], 0x03
+ vmovups zmm9, zmmword ptr [r8+rdx-0x30]
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x3*0x10], 0x01
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x3*0x10], 0x02
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x3*0x10], 0x03
+ vshufps zmm4, zmm8, zmm9, 136
+ vshufps zmm5, zmm8, zmm9, 221
+ vmovups zmm8, zmmword ptr [r8+rdx-0x20]
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x2*0x10], 0x01
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x2*0x10], 0x02
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x2*0x10], 0x03
+ vmovups zmm9, zmmword ptr [r8+rdx-0x10]
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x1*0x10], 0x01
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x1*0x10], 0x02
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x1*0x10], 0x03
+ vshufps zmm6, zmm8, zmm9, 136
+ vshufps zmm7, zmm8, zmm9, 221
+ vpshufd zmm6, zmm6, 0x93
+ vpshufd zmm7, zmm7, 0x93
+ mov al, 7
+9:
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 16
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 12
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 8
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 7
+ vpshufd zmm0, zmm0, 0x93
+ vpshufd zmm3, zmm3, 0x4E
+ vpshufd zmm2, zmm2, 0x39
+ vpaddd zmm0, zmm0, zmm6
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 16
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 12
+ vpaddd zmm0, zmm0, zmm7
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 8
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 7
+ vpshufd zmm0, zmm0, 0x39
+ vpshufd zmm3, zmm3, 0x4E
+ vpshufd zmm2, zmm2, 0x93
+ dec al
+ jz 9f
+ vshufps zmm8, zmm4, zmm5, 214
+ vpshufd zmm9, zmm4, 0x0F
+ vpshufd zmm4, zmm8, 0x39
+ vshufps zmm8, zmm6, zmm7, 250
+ vpblendmd zmm9 {k3}, zmm9, zmm8
+ vpunpcklqdq zmm8, zmm7, zmm5
+ vpblendmd zmm8 {k4}, zmm8, zmm6
+ vpshufd zmm8, zmm8, 0x78
+ vpunpckhdq zmm5, zmm5, zmm7
+ vpunpckldq zmm6, zmm6, zmm5
+ vpshufd zmm7, zmm6, 0x1E
+ vmovdqa32 zmm5, zmm9
+ vmovdqa32 zmm6, zmm8
+ jmp 9b
+9:
+ vpxord zmm0, zmm0, zmm2
+ vpxord zmm1, zmm1, zmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vextracti32x4 xmmword ptr [rbx+0x4*0x10], zmm0, 0x02
+ vextracti32x4 xmmword ptr [rbx+0x5*0x10], zmm1, 0x02
+ vextracti32x4 xmmword ptr [rbx+0x6*0x10], zmm0, 0x03
+ vextracti32x4 xmmword ptr [rbx+0x7*0x10], zmm1, 0x03
+ vmovdqa xmm0, xmmword ptr [rsp]
+ vmovdqa xmm2, xmmword ptr [rsp+0x40]
+ vmovdqa32 xmm0 {k1}, xmmword ptr [rsp+0x1*0x10]
+ vmovdqa32 xmm2 {k1}, xmmword ptr [rsp+0x5*0x10]
+ vmovdqa xmmword ptr [rsp], xmm0
+ vmovdqa xmmword ptr [rsp+0x40], xmm2
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+3:
+ test esi, 0x2
+ je 3f
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]
+ vmovd xmm13, dword ptr [rsp]
+ vpinsrd xmm13, xmm13, dword ptr [rsp+0x40], 1
+ vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovd xmm14, dword ptr [rsp+0x4]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+0x44], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vinserti128 ymm13, ymm13, xmm14, 0x01
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x88], eax
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
+ vpbroadcastd ymm8, dword ptr [rsp+0x88]
+ vpblendd ymm3, ymm13, ymm8, 0x88
+ vmovups ymm8, ymmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01
+ vshufps ymm4, ymm8, ymm9, 136
+ vshufps ymm5, ymm8, ymm9, 221
+ vmovups ymm8, ymmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01
+ vshufps ymm6, ymm8, ymm9, 136
+ vshufps ymm7, ymm8, ymm9, 221
+ vpshufd ymm6, ymm6, 0x93
+ vpshufd ymm7, ymm7, 0x93
+ mov al, 7
+9:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 16
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 12
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 8
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 7
+ vpshufd ymm0, ymm0, 0x93
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x39
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 16
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 12
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 8
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 7
+ vpshufd ymm0, ymm0, 0x39
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x93
+ dec al
+ jz 9f
+ vshufps ymm8, ymm4, ymm5, 214
+ vpshufd ymm9, ymm4, 0x0F
+ vpshufd ymm4, ymm8, 0x39
+ vshufps ymm8, ymm6, ymm7, 250
+ vpblendd ymm9, ymm9, ymm8, 0xAA
+ vpunpcklqdq ymm8, ymm7, ymm5
+ vpblendd ymm8, ymm8, ymm6, 0x88
+ vpshufd ymm8, ymm8, 0x78
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 0x1E
+ vmovdqa ymm5, ymm9
+ vmovdqa ymm6, ymm8
+ jmp 9b
+9:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vmovdqa xmm0, xmmword ptr [rsp]
+ vmovdqa xmm2, xmmword ptr [rsp+0x4*0x10]
+ vmovdqu32 xmm0 {k1}, xmmword ptr [rsp+0x8]
+ vmovdqu32 xmm2 {k1}, xmmword ptr [rsp+0x48]
+ vmovdqa xmmword ptr [rsp], xmm0
+ vmovdqa xmmword ptr [rsp+0x4*0x10], xmm2
+ add rbx, 64
+ add rdi, 16
+ sub rsi, 2
+3:
+ test esi, 0x1
+ je 4b
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+0x10]
+ vmovd xmm14, dword ptr [rsp]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+0x40], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovdqa xmm15, xmmword ptr [BLAKE3_IV+rip]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ vpinsrd xmm3, xmm14, eax, 3
+ vmovdqa xmm2, xmm15
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x30]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x10]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+.p2align 6
+zfs_blake3_compress_in_place_avx512:
+ _CET_ENDBR
+ vmovdqu xmm0, xmmword ptr [rdi]
+ vmovdqu xmm1, xmmword ptr [rdi+0x10]
+ movzx eax, r8b
+ movzx edx, dl
+ shl rax, 32
+ add rdx, rax
+ vmovq xmm3, rcx
+ vmovq xmm4, rdx
+ vpunpcklqdq xmm3, xmm3, xmm4
+ vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovups xmm8, xmmword ptr [rsi]
+ vmovups xmm9, xmmword ptr [rsi+0x10]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [rsi+0x20]
+ vmovups xmm9, xmmword ptr [rsi+0x30]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ vmovdqu xmmword ptr [rdi], xmm0
+ vmovdqu xmmword ptr [rdi+0x10], xmm1
+ ret
+
+.p2align 6
+zfs_blake3_compress_xof_avx512:
+ _CET_ENDBR
+ vmovdqu xmm0, xmmword ptr [rdi]
+ vmovdqu xmm1, xmmword ptr [rdi+0x10]
+ movzx eax, r8b
+ movzx edx, dl
+ shl rax, 32
+ add rdx, rax
+ vmovq xmm3, rcx
+ vmovq xmm4, rdx
+ vpunpcklqdq xmm3, xmm3, xmm4
+ vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovups xmm8, xmmword ptr [rsi]
+ vmovups xmm9, xmmword ptr [rsi+0x10]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [rsi+0x20]
+ vmovups xmm9, xmmword ptr [rsi+0x30]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ vpxor xmm2, xmm2, [rdi]
+ vpxor xmm3, xmm3, [rdi+0x10]
+ vmovdqu xmmword ptr [r9], xmm0
+ vmovdqu xmmword ptr [r9+0x10], xmm1
+ vmovdqu xmmword ptr [r9+0x20], xmm2
+ vmovdqu xmmword ptr [r9+0x30], xmm3
+ ret
+
+.size zfs_blake3_hash_many_avx512, . - zfs_blake3_hash_many_avx512
+.size zfs_blake3_compress_in_place_avx512, . - zfs_blake3_compress_in_place_avx512
+.size zfs_blake3_compress_xof_avx512, . - zfs_blake3_compress_xof_avx512
+
+#ifdef __APPLE__
+.static_data
+#else
+.section .rodata
+#endif
+
+.p2align 6
+INDEX0:
+ .long 0, 1, 2, 3, 16, 17, 18, 19
+ .long 8, 9, 10, 11, 24, 25, 26, 27
+INDEX1:
+ .long 4, 5, 6, 7, 20, 21, 22, 23
+ .long 12, 13, 14, 15, 28, 29, 30, 31
+ADD0:
+ .long 0, 1, 2, 3, 4, 5, 6, 7
+ .long 8, 9, 10, 11, 12, 13, 14, 15
+ADD1: .long 1
+
+ADD16: .long 16
+BLAKE3_BLOCK_LEN:
+ .long 64
+.p2align 6
+BLAKE3_IV:
+BLAKE3_IV_0:
+ .long 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A
+
+#endif /* HAVE_AVX512 */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S
new file mode 100644
index 000000000000..39d23ee233df
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S
@@ -0,0 +1,2323 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves and Matthew Krupcale
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#if defined(HAVE_SSE2)
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
+#if __has_include(<cet.h>)
+#include <cet.h>
+#endif
+#endif
+
+#if !defined(_CET_ENDBR)
+#define _CET_ENDBR
+#endif
+
+.intel_syntax noprefix
+.global zfs_blake3_hash_many_sse2
+.global zfs_blake3_compress_in_place_sse2
+.global zfs_blake3_compress_xof_sse2
+
+.text
+.type zfs_blake3_hash_many_sse2,@function
+.type zfs_blake3_compress_in_place_sse2,@function
+.type zfs_blake3_compress_xof_sse2,@function
+
+ .p2align 6
+zfs_blake3_hash_many_sse2:
+ _CET_ENDBR
+ push r15
+ push r14
+ push r13
+ push r12
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 360
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ neg r9d
+ movd xmm0, r9d
+ pshufd xmm0, xmm0, 0x00
+ movdqa xmmword ptr [rsp+0x130], xmm0
+ movdqa xmm1, xmm0
+ pand xmm1, xmmword ptr [ADD0+rip]
+ pand xmm0, xmmword ptr [ADD1+rip]
+ movdqa xmmword ptr [rsp+0x150], xmm0
+ movd xmm0, r8d
+ pshufd xmm0, xmm0, 0x00
+ paddd xmm0, xmm1
+ movdqa xmmword ptr [rsp+0x110], xmm0
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm1, xmm0
+ shr r8, 32
+ movd xmm2, r8d
+ pshufd xmm2, xmm2, 0x00
+ psubd xmm2, xmm1
+ movdqa xmmword ptr [rsp+0x120], xmm2
+ mov rbx, qword ptr [rbp+0x50]
+ mov r15, rdx
+ shl r15, 6
+ movzx r13d, byte ptr [rbp+0x38]
+ movzx r12d, byte ptr [rbp+0x48]
+ cmp rsi, 4
+ jc 3f
+2:
+ movdqu xmm3, xmmword ptr [rcx]
+ pshufd xmm0, xmm3, 0x00
+ pshufd xmm1, xmm3, 0x55
+ pshufd xmm2, xmm3, 0xAA
+ pshufd xmm3, xmm3, 0xFF
+ movdqu xmm7, xmmword ptr [rcx+0x10]
+ pshufd xmm4, xmm7, 0x00
+ pshufd xmm5, xmm7, 0x55
+ pshufd xmm6, xmm7, 0xAA
+ pshufd xmm7, xmm7, 0xFF
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+9:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movdqu xmm8, xmmword ptr [r8+rdx-0x40]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x40]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x40]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x40]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp], xmm8
+ movdqa xmmword ptr [rsp+0x10], xmm9
+ movdqa xmmword ptr [rsp+0x20], xmm12
+ movdqa xmmword ptr [rsp+0x30], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x30]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x30]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x30]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x30]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x40], xmm8
+ movdqa xmmword ptr [rsp+0x50], xmm9
+ movdqa xmmword ptr [rsp+0x60], xmm12
+ movdqa xmmword ptr [rsp+0x70], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x20]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x20]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x20]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x20]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x80], xmm8
+ movdqa xmmword ptr [rsp+0x90], xmm9
+ movdqa xmmword ptr [rsp+0xA0], xmm12
+ movdqa xmmword ptr [rsp+0xB0], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x10]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x10]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x10]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x10]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0xC0], xmm8
+ movdqa xmmword ptr [rsp+0xD0], xmm9
+ movdqa xmmword ptr [rsp+0xE0], xmm12
+ movdqa xmmword ptr [rsp+0xF0], xmm13
+ movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip]
+ movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip]
+ movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip]
+ movdqa xmm12, xmmword ptr [rsp+0x110]
+ movdqa xmm13, xmmword ptr [rsp+0x120]
+ movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
+ movd xmm15, eax
+ pshufd xmm15, xmm15, 0x00
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x80]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x70]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xB0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x50]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xC0]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xA0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0x60]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xF0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ pxor xmm0, xmm8
+ pxor xmm1, xmm9
+ pxor xmm2, xmm10
+ pxor xmm3, xmm11
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ pxor xmm4, xmm12
+ pxor xmm5, xmm13
+ pxor xmm6, xmm14
+ pxor xmm7, xmm15
+ mov eax, r13d
+ jne 9b
+ movdqa xmm9, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm9, xmm1
+ movdqa xmm11, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm11, xmm3
+ movdqa xmm1, xmm0
+ punpcklqdq xmm0, xmm2
+ punpckhqdq xmm1, xmm2
+ movdqa xmm3, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm3, xmm11
+ movdqu xmmword ptr [rbx], xmm0
+ movdqu xmmword ptr [rbx+0x20], xmm1
+ movdqu xmmword ptr [rbx+0x40], xmm9
+ movdqu xmmword ptr [rbx+0x60], xmm3
+ movdqa xmm9, xmm4
+ punpckldq xmm4, xmm5
+ punpckhdq xmm9, xmm5
+ movdqa xmm11, xmm6
+ punpckldq xmm6, xmm7
+ punpckhdq xmm11, xmm7
+ movdqa xmm5, xmm4
+ punpcklqdq xmm4, xmm6
+ punpckhqdq xmm5, xmm6
+ movdqa xmm7, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm7, xmm11
+ movdqu xmmword ptr [rbx+0x10], xmm4
+ movdqu xmmword ptr [rbx+0x30], xmm5
+ movdqu xmmword ptr [rbx+0x50], xmm9
+ movdqu xmmword ptr [rbx+0x70], xmm7
+ movdqa xmm1, xmmword ptr [rsp+0x110]
+ movdqa xmm0, xmm1
+ paddd xmm1, xmmword ptr [rsp+0x150]
+ movdqa xmmword ptr [rsp+0x110], xmm1
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm0, xmm1
+ movdqa xmm1, xmmword ptr [rsp+0x120]
+ psubd xmm1, xmm0
+ movdqa xmmword ptr [rsp+0x120], xmm1
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+ cmp rsi, 4
+ jnc 2b
+ test rsi, rsi
+ jnz 3f
+4:
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+.p2align 5
+3:
+ test esi, 0x2
+ je 3f
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movaps xmm8, xmm0
+ movaps xmm9, xmm1
+ movd xmm13, dword ptr [rsp+0x110]
+ movd xmm14, dword ptr [rsp+0x120]
+ punpckldq xmm13, xmm14
+ movaps xmmword ptr [rsp], xmm13
+ movd xmm14, dword ptr [rsp+0x114]
+ movd xmm13, dword ptr [rsp+0x124]
+ punpckldq xmm14, xmm13
+ movaps xmmword ptr [rsp+0x10], xmm14
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movaps xmm10, xmm2
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm3, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm3, xmm5, 221
+ movaps xmm5, xmm3
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm3, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm3, xmm7, 221
+ pshufd xmm7, xmm3, 0x93
+ movups xmm12, xmmword ptr [r9+rdx-0x40]
+ movups xmm13, xmmword ptr [r9+rdx-0x30]
+ movaps xmm11, xmm12
+ shufps xmm12, xmm13, 136
+ shufps xmm11, xmm13, 221
+ movaps xmm13, xmm11
+ movups xmm14, xmmword ptr [r9+rdx-0x20]
+ movups xmm15, xmmword ptr [r9+rdx-0x10]
+ movaps xmm11, xmm14
+ shufps xmm14, xmm15, 136
+ pshufd xmm14, xmm14, 0x93
+ shufps xmm11, xmm15, 221
+ pshufd xmm15, xmm11, 0x93
+ shl rax, 0x20
+ or rax, 0x40
+ movq xmm3, rax
+ movdqa xmmword ptr [rsp+0x20], xmm3
+ movaps xmm3, xmmword ptr [rsp]
+ movaps xmm11, xmmword ptr [rsp+0x10]
+ punpcklqdq xmm3, xmmword ptr [rsp+0x20]
+ punpcklqdq xmm11, xmmword ptr [rsp+0x20]
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm8, xmm12
+ movaps xmmword ptr [rsp+0x20], xmm4
+ movaps xmmword ptr [rsp+0x30], xmm12
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ pshuflw xmm11, xmm11, 0xB1
+ pshufhw xmm11, xmm11, 0xB1
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm5
+ paddd xmm8, xmm13
+ movaps xmmword ptr [rsp+0x40], xmm5
+ movaps xmmword ptr [rsp+0x50], xmm13
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movdqa xmm13, xmm3
+ psrld xmm3, 8
+ pslld xmm13, 24
+ pxor xmm3, xmm13
+ movdqa xmm13, xmm11
+ psrld xmm11, 8
+ pslld xmm13, 24
+ pxor xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm8, xmm8, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ pshufd xmm10, xmm10, 0x39
+ paddd xmm0, xmm6
+ paddd xmm8, xmm14
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ pshuflw xmm11, xmm11, 0xB1
+ pshufhw xmm11, xmm11, 0xB1
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm7
+ paddd xmm8, xmm15
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movdqa xmm13, xmm3
+ psrld xmm3, 8
+ pslld xmm13, 24
+ pxor xmm3, xmm13
+ movdqa xmm13, xmm11
+ psrld xmm11, 8
+ pslld xmm13, 24
+ pxor xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm8, xmm8, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ pshufd xmm10, xmm10, 0x93
+ dec al
+ je 9f
+ movdqa xmm12, xmmword ptr [rsp+0x20]
+ movdqa xmm5, xmmword ptr [rsp+0x40]
+ pshufd xmm13, xmm12, 0x0F
+ shufps xmm12, xmm5, 214
+ pshufd xmm4, xmm12, 0x39
+ movdqa xmm12, xmm6
+ shufps xmm12, xmm7, 250
+ pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm13, xmm12
+ movdqa xmmword ptr [rsp+0x20], xmm13
+ movdqa xmm12, xmm7
+ punpcklqdq xmm12, xmm5
+ movdqa xmm13, xmm6
+ pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm12, xmm13
+ pshufd xmm12, xmm12, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmmword ptr [rsp+0x40], xmm12
+ movdqa xmm5, xmmword ptr [rsp+0x30]
+ movdqa xmm13, xmmword ptr [rsp+0x50]
+ pshufd xmm6, xmm5, 0x0F
+ shufps xmm5, xmm13, 214
+ pshufd xmm12, xmm5, 0x39
+ movdqa xmm5, xmm14
+ shufps xmm5, xmm15, 250
+ pand xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm6, xmm5
+ movdqa xmm5, xmm15
+ punpcklqdq xmm5, xmm13
+ movdqa xmmword ptr [rsp+0x30], xmm2
+ movdqa xmm2, xmm14
+ pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm5, xmm2
+ movdqa xmm2, xmmword ptr [rsp+0x30]
+ pshufd xmm5, xmm5, 0x78
+ punpckhdq xmm13, xmm15
+ punpckldq xmm14, xmm13
+ pshufd xmm15, xmm14, 0x1E
+ movdqa xmm13, xmm6
+ movdqa xmm14, xmm5
+ movdqa xmm5, xmmword ptr [rsp+0x20]
+ movdqa xmm6, xmmword ptr [rsp+0x40]
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm8, xmm10
+ pxor xmm9, xmm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ movups xmmword ptr [rbx+0x20], xmm8
+ movups xmmword ptr [rbx+0x30], xmm9
+ mov eax, dword ptr [rsp+0x130]
+ neg eax
+ mov r10d, dword ptr [rsp+0x110+8*rax]
+ mov r11d, dword ptr [rsp+0x120+8*rax]
+ mov dword ptr [rsp+0x110], r10d
+ mov dword ptr [rsp+0x120], r11d
+ add rdi, 16
+ add rbx, 64
+ sub rsi, 2
+3:
+ test esi, 0x1
+ je 4b
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movd xmm13, dword ptr [rsp+0x110]
+ movd xmm14, dword ptr [rsp+0x120]
+ punpckldq xmm13, xmm14
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ shl rax, 32
+ or rax, 64
+ movq xmm12, rax
+ movdqa xmm3, xmm13
+ punpcklqdq xmm3, xmm12
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm10, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm8, xmm10
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+
+.p2align 6
+zfs_blake3_compress_in_place_sse2:
+ _CET_ENDBR
+ movups xmm0, xmmword ptr [rdi]
+ movups xmm1, xmmword ptr [rdi+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ shl r8, 32
+ add rdx, r8
+ movq xmm3, rcx
+ movq xmm4, rdx
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rsi]
+ movups xmm5, xmmword ptr [rsi+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rsi+0x20]
+ movups xmm7, xmmword ptr [rsi+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm10, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm8, xmm10
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ movups xmmword ptr [rdi], xmm0
+ movups xmmword ptr [rdi+0x10], xmm1
+ ret
+
+.p2align 6
+zfs_blake3_compress_xof_sse2:
+ _CET_ENDBR
+ movups xmm0, xmmword ptr [rdi]
+ movups xmm1, xmmword ptr [rdi+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movzx eax, r8b
+ movzx edx, dl
+ shl rax, 32
+ add rdx, rax
+ movq xmm3, rcx
+ movq xmm4, rdx
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rsi]
+ movups xmm5, xmmword ptr [rsi+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rsi+0x20]
+ movups xmm7, xmmword ptr [rsi+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm10, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm8, xmm10
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ movdqu xmm4, xmmword ptr [rdi]
+ movdqu xmm5, xmmword ptr [rdi+0x10]
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+ movups xmmword ptr [r9], xmm0
+ movups xmmword ptr [r9+0x10], xmm1
+ movups xmmword ptr [r9+0x20], xmm2
+ movups xmmword ptr [r9+0x30], xmm3
+ ret
+
+.size zfs_blake3_hash_many_sse2, . - zfs_blake3_hash_many_sse2
+.size zfs_blake3_compress_in_place_sse2, . - zfs_blake3_compress_in_place_sse2
+.size zfs_blake3_compress_xof_sse2, . - zfs_blake3_compress_xof_sse2
+
+#ifdef __APPLE__
+.static_data
+#else
+.section .rodata
+#endif
+.p2align 6
+BLAKE3_IV:
+ .long 0x6A09E667, 0xBB67AE85
+ .long 0x3C6EF372, 0xA54FF53A
+ADD0:
+ .long 0, 1, 2, 3
+ADD1:
+ .long 4, 4, 4, 4
+BLAKE3_IV_0:
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+BLAKE3_BLOCK_LEN:
+ .long 64, 64, 64, 64
+CMP_MSB_MASK:
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+PBLENDW_0x33_MASK:
+ .long 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000
+PBLENDW_0xCC_MASK:
+ .long 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF
+PBLENDW_0x3F_MASK:
+ .long 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000
+PBLENDW_0xC0_MASK:
+ .long 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF
+
+#endif /* HAVE_SSE2 */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S
new file mode 100644
index 000000000000..1c40236f0628
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S
@@ -0,0 +1,2058 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#if defined(HAVE_SSE4_1)
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
+#if __has_include(<cet.h>)
+#include <cet.h>
+#endif
+#endif
+
+#if !defined(_CET_ENDBR)
+#define _CET_ENDBR
+#endif
+
+.intel_syntax noprefix
+.global zfs_blake3_compress_in_place_sse41
+.global zfs_blake3_compress_xof_sse41
+.global zfs_blake3_hash_many_sse41
+
+.text
+.type zfs_blake3_hash_many_sse41,@function
+.type zfs_blake3_compress_in_place_sse41,@function
+.type zfs_blake3_compress_xof_sse41,@function
+
+.p2align 6
+zfs_blake3_hash_many_sse41:
+ _CET_ENDBR
+ push r15
+ push r14
+ push r13
+ push r12
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 360
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ neg r9d
+ movd xmm0, r9d
+ pshufd xmm0, xmm0, 0x00
+ movdqa xmmword ptr [rsp+0x130], xmm0
+ movdqa xmm1, xmm0
+ pand xmm1, xmmword ptr [ADD0+rip]
+ pand xmm0, xmmword ptr [ADD1+rip]
+ movdqa xmmword ptr [rsp+0x150], xmm0
+ movd xmm0, r8d
+ pshufd xmm0, xmm0, 0x00
+ paddd xmm0, xmm1
+ movdqa xmmword ptr [rsp+0x110], xmm0
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm1, xmm0
+ shr r8, 32
+ movd xmm2, r8d
+ pshufd xmm2, xmm2, 0x00
+ psubd xmm2, xmm1
+ movdqa xmmword ptr [rsp+0x120], xmm2
+ mov rbx, qword ptr [rbp+0x50]
+ mov r15, rdx
+ shl r15, 6
+ movzx r13d, byte ptr [rbp+0x38]
+ movzx r12d, byte ptr [rbp+0x48]
+ cmp rsi, 4
+ jc 3f
+2:
+ movdqu xmm3, xmmword ptr [rcx]
+ pshufd xmm0, xmm3, 0x00
+ pshufd xmm1, xmm3, 0x55
+ pshufd xmm2, xmm3, 0xAA
+ pshufd xmm3, xmm3, 0xFF
+ movdqu xmm7, xmmword ptr [rcx+0x10]
+ pshufd xmm4, xmm7, 0x00
+ pshufd xmm5, xmm7, 0x55
+ pshufd xmm6, xmm7, 0xAA
+ pshufd xmm7, xmm7, 0xFF
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+9:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movdqu xmm8, xmmword ptr [r8+rdx-0x40]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x40]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x40]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x40]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp], xmm8
+ movdqa xmmword ptr [rsp+0x10], xmm9
+ movdqa xmmword ptr [rsp+0x20], xmm12
+ movdqa xmmword ptr [rsp+0x30], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x30]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x30]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x30]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x30]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x40], xmm8
+ movdqa xmmword ptr [rsp+0x50], xmm9
+ movdqa xmmword ptr [rsp+0x60], xmm12
+ movdqa xmmword ptr [rsp+0x70], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x20]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x20]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x20]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x20]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x80], xmm8
+ movdqa xmmword ptr [rsp+0x90], xmm9
+ movdqa xmmword ptr [rsp+0xA0], xmm12
+ movdqa xmmword ptr [rsp+0xB0], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x10]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x10]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x10]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x10]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0xC0], xmm8
+ movdqa xmmword ptr [rsp+0xD0], xmm9
+ movdqa xmmword ptr [rsp+0xE0], xmm12
+ movdqa xmmword ptr [rsp+0xF0], xmm13
+ movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip]
+ movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip]
+ movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip]
+ movdqa xmm12, xmmword ptr [rsp+0x110]
+ movdqa xmm13, xmmword ptr [rsp+0x120]
+ movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
+ movd xmm15, eax
+ pshufd xmm15, xmm15, 0x00
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x80]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x70]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xB0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x50]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xC0]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xA0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0x60]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xF0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ pxor xmm0, xmm8
+ pxor xmm1, xmm9
+ pxor xmm2, xmm10
+ pxor xmm3, xmm11
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ pxor xmm4, xmm12
+ pxor xmm5, xmm13
+ pxor xmm6, xmm14
+ pxor xmm7, xmm15
+ mov eax, r13d
+ jne 9b
+ movdqa xmm9, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm9, xmm1
+ movdqa xmm11, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm11, xmm3
+ movdqa xmm1, xmm0
+ punpcklqdq xmm0, xmm2
+ punpckhqdq xmm1, xmm2
+ movdqa xmm3, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm3, xmm11
+ movdqu xmmword ptr [rbx], xmm0
+ movdqu xmmword ptr [rbx+0x20], xmm1
+ movdqu xmmword ptr [rbx+0x40], xmm9
+ movdqu xmmword ptr [rbx+0x60], xmm3
+ movdqa xmm9, xmm4
+ punpckldq xmm4, xmm5
+ punpckhdq xmm9, xmm5
+ movdqa xmm11, xmm6
+ punpckldq xmm6, xmm7
+ punpckhdq xmm11, xmm7
+ movdqa xmm5, xmm4
+ punpcklqdq xmm4, xmm6
+ punpckhqdq xmm5, xmm6
+ movdqa xmm7, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm7, xmm11
+ movdqu xmmword ptr [rbx+0x10], xmm4
+ movdqu xmmword ptr [rbx+0x30], xmm5
+ movdqu xmmword ptr [rbx+0x50], xmm9
+ movdqu xmmword ptr [rbx+0x70], xmm7
+ movdqa xmm1, xmmword ptr [rsp+0x110]
+ movdqa xmm0, xmm1
+ paddd xmm1, xmmword ptr [rsp+0x150]
+ movdqa xmmword ptr [rsp+0x110], xmm1
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm0, xmm1
+ movdqa xmm1, xmmword ptr [rsp+0x120]
+ psubd xmm1, xmm0
+ movdqa xmmword ptr [rsp+0x120], xmm1
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+ cmp rsi, 4
+ jnc 2b
+ test rsi, rsi
+ jnz 3f
+4:
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+.p2align 5
+3:
+ test esi, 0x2
+ je 3f
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movaps xmm8, xmm0
+ movaps xmm9, xmm1
+ movd xmm13, dword ptr [rsp+0x110]
+ pinsrd xmm13, dword ptr [rsp+0x120], 1
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ movaps xmmword ptr [rsp], xmm13
+ movd xmm14, dword ptr [rsp+0x114]
+ pinsrd xmm14, dword ptr [rsp+0x124], 1
+ pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ movaps xmmword ptr [rsp+0x10], xmm14
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movaps xmm10, xmm2
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm3, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm3, xmm5, 221
+ movaps xmm5, xmm3
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm3, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm3, xmm7, 221
+ pshufd xmm7, xmm3, 0x93
+ movups xmm12, xmmword ptr [r9+rdx-0x40]
+ movups xmm13, xmmword ptr [r9+rdx-0x30]
+ movaps xmm11, xmm12
+ shufps xmm12, xmm13, 136
+ shufps xmm11, xmm13, 221
+ movaps xmm13, xmm11
+ movups xmm14, xmmword ptr [r9+rdx-0x20]
+ movups xmm15, xmmword ptr [r9+rdx-0x10]
+ movaps xmm11, xmm14
+ shufps xmm14, xmm15, 136
+ pshufd xmm14, xmm14, 0x93
+ shufps xmm11, xmm15, 221
+ pshufd xmm15, xmm11, 0x93
+ movaps xmm3, xmmword ptr [rsp]
+ movaps xmm11, xmmword ptr [rsp+0x10]
+ pinsrd xmm3, eax, 3
+ pinsrd xmm11, eax, 3
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm8, xmm12
+ movaps xmmword ptr [rsp+0x20], xmm4
+ movaps xmmword ptr [rsp+0x30], xmm12
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movaps xmm12, xmmword ptr [ROT16+rip]
+ pshufb xmm3, xmm12
+ pshufb xmm11, xmm12
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm5
+ paddd xmm8, xmm13
+ movaps xmmword ptr [rsp+0x40], xmm5
+ movaps xmmword ptr [rsp+0x50], xmm13
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movaps xmm13, xmmword ptr [ROT8+rip]
+ pshufb xmm3, xmm13
+ pshufb xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm8, xmm8, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ pshufd xmm10, xmm10, 0x39
+ paddd xmm0, xmm6
+ paddd xmm8, xmm14
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshufb xmm3, xmm12
+ pshufb xmm11, xmm12
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm7
+ paddd xmm8, xmm15
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshufb xmm3, xmm13
+ pshufb xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm8, xmm8, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ pshufd xmm10, xmm10, 0x93
+ dec al
+ je 9f
+ movdqa xmm12, xmmword ptr [rsp+0x20]
+ movdqa xmm5, xmmword ptr [rsp+0x40]
+ pshufd xmm13, xmm12, 0x0F
+ shufps xmm12, xmm5, 214
+ pshufd xmm4, xmm12, 0x39
+ movdqa xmm12, xmm6
+ shufps xmm12, xmm7, 250
+ pblendw xmm13, xmm12, 0xCC
+ movdqa xmm12, xmm7
+ punpcklqdq xmm12, xmm5
+ pblendw xmm12, xmm6, 0xC0
+ pshufd xmm12, xmm12, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmmword ptr [rsp+0x20], xmm13
+ movdqa xmmword ptr [rsp+0x40], xmm12
+ movdqa xmm5, xmmword ptr [rsp+0x30]
+ movdqa xmm13, xmmword ptr [rsp+0x50]
+ pshufd xmm6, xmm5, 0x0F
+ shufps xmm5, xmm13, 214
+ pshufd xmm12, xmm5, 0x39
+ movdqa xmm5, xmm14
+ shufps xmm5, xmm15, 250
+ pblendw xmm6, xmm5, 0xCC
+ movdqa xmm5, xmm15
+ punpcklqdq xmm5, xmm13
+ pblendw xmm5, xmm14, 0xC0
+ pshufd xmm5, xmm5, 0x78
+ punpckhdq xmm13, xmm15
+ punpckldq xmm14, xmm13
+ pshufd xmm15, xmm14, 0x1E
+ movdqa xmm13, xmm6
+ movdqa xmm14, xmm5
+ movdqa xmm5, xmmword ptr [rsp+0x20]
+ movdqa xmm6, xmmword ptr [rsp+0x40]
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm8, xmm10
+ pxor xmm9, xmm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ movups xmmword ptr [rbx+0x20], xmm8
+ movups xmmword ptr [rbx+0x30], xmm9
+ movdqa xmm0, xmmword ptr [rsp+0x130]
+ movdqa xmm1, xmmword ptr [rsp+0x110]
+ movdqa xmm2, xmmword ptr [rsp+0x120]
+ movdqu xmm3, xmmword ptr [rsp+0x118]
+ movdqu xmm4, xmmword ptr [rsp+0x128]
+ blendvps xmm1, xmm3, xmm0
+ blendvps xmm2, xmm4, xmm0
+ movdqa xmmword ptr [rsp+0x110], xmm1
+ movdqa xmmword ptr [rsp+0x120], xmm2
+ add rdi, 16
+ add rbx, 64
+ sub rsi, 2
+3:
+ test esi, 0x1
+ je 4b
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movd xmm13, dword ptr [rsp+0x110]
+ pinsrd xmm13, dword ptr [rsp+0x120], 1
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ movaps xmm14, xmmword ptr [ROT8+rip]
+ movaps xmm15, xmmword ptr [ROT16+rip]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movaps xmm3, xmm13
+ pinsrd xmm3, eax, 3
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0xCC
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0xC0
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+.p2align 6
+zfs_blake3_compress_in_place_sse41:
+ _CET_ENDBR
+ movups xmm0, xmmword ptr [rdi]
+ movups xmm1, xmmword ptr [rdi+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ shl r8, 32
+ add rdx, r8
+ movq xmm3, rcx
+ movq xmm4, rdx
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rsi]
+ movups xmm5, xmmword ptr [rsi+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rsi+0x20]
+ movups xmm7, xmmword ptr [rsi+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ movaps xmm14, xmmword ptr [ROT8+rip]
+ movaps xmm15, xmmword ptr [ROT16+rip]
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0xCC
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0xC0
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ movups xmmword ptr [rdi], xmm0
+ movups xmmword ptr [rdi+0x10], xmm1
+ ret
+.p2align 6
+zfs_blake3_compress_xof_sse41:
+ _CET_ENDBR
+ movups xmm0, xmmword ptr [rdi]
+ movups xmm1, xmmword ptr [rdi+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movzx eax, r8b
+ movzx edx, dl
+ shl rax, 32
+ add rdx, rax
+ movq xmm3, rcx
+ movq xmm4, rdx
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rsi]
+ movups xmm5, xmmword ptr [rsi+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rsi+0x20]
+ movups xmm7, xmmword ptr [rsi+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ movaps xmm14, xmmword ptr [ROT8+rip]
+ movaps xmm15, xmmword ptr [ROT16+rip]
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0xCC
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0xC0
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ movdqu xmm4, xmmword ptr [rdi]
+ movdqu xmm5, xmmword ptr [rdi+0x10]
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+ movups xmmword ptr [r9], xmm0
+ movups xmmword ptr [r9+0x10], xmm1
+ movups xmmword ptr [r9+0x20], xmm2
+ movups xmmword ptr [r9+0x30], xmm3
+ ret
+
+.size zfs_blake3_hash_many_sse41, . - zfs_blake3_hash_many_sse41
+.size zfs_blake3_compress_in_place_sse41, . - zfs_blake3_compress_in_place_sse41
+.size zfs_blake3_compress_xof_sse41, . - zfs_blake3_compress_xof_sse41
+
+#ifdef __APPLE__
+.static_data
+#else
+.section .rodata
+#endif
+.p2align 6
+BLAKE3_IV:
+ .long 0x6A09E667, 0xBB67AE85
+ .long 0x3C6EF372, 0xA54FF53A
+ROT16:
+ .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+ROT8:
+ .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+ADD0:
+ .long 0, 1, 2, 3
+ADD1:
+ .long 4, 4, 4, 4
+BLAKE3_IV_0:
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+BLAKE3_BLOCK_LEN:
+ .long 64, 64, 64, 64
+CMP_MSB_MASK:
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+
+#endif /* HAVE_SSE4_1 */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c
index c4d5f8761f5a..1f139ea5b807 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c
@@ -149,6 +149,13 @@ freebsd_zfs_crypt_done(struct cryptop *crp)
return (0);
}
+static int
+freebsd_zfs_crypt_done_sync(struct cryptop *crp)
+{
+
+ return (0);
+}
+
void
freebsd_crypt_freesession(freebsd_crypt_session_t *sess)
{
@@ -158,26 +165,36 @@ freebsd_crypt_freesession(freebsd_crypt_session_t *sess)
}
static int
-zfs_crypto_dispatch(freebsd_crypt_session_t *session, struct cryptop *crp)
+zfs_crypto_dispatch(freebsd_crypt_session_t *session, struct cryptop *crp)
{
int error;
crp->crp_opaque = session;
- crp->crp_callback = freebsd_zfs_crypt_done;
for (;;) {
+#if __FreeBSD_version < 1400004
+ boolean_t async = ((crypto_ses2caps(crp->crp_session) &
+ CRYPTOCAP_F_SYNC) == 0);
+#else
+ boolean_t async = !CRYPTO_SESS_SYNC(crp->crp_session);
+#endif
+ crp->crp_callback = async ? freebsd_zfs_crypt_done :
+ freebsd_zfs_crypt_done_sync;
error = crypto_dispatch(crp);
- if (error)
- break;
- mtx_lock(&session->fs_lock);
- while (session->fs_done == false)
- msleep(crp, &session->fs_lock, 0,
- "zfs_crypto", 0);
- mtx_unlock(&session->fs_lock);
+ if (error == 0) {
+ if (async) {
+ mtx_lock(&session->fs_lock);
+ while (session->fs_done == false) {
+ msleep(crp, &session->fs_lock, 0,
+ "zfs_crypto", 0);
+ }
+ mtx_unlock(&session->fs_lock);
+ }
+ error = crp->crp_etype;
+ }
- if (crp->crp_etype == ENOMEM) {
+ if (error == ENOMEM) {
pause("zcrnomem", 1);
- } else if (crp->crp_etype != EAGAIN) {
- error = crp->crp_etype;
+ } else if (error != EAGAIN) {
break;
}
crp->crp_etype = 0;
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
index f99a2f966660..5179100d1665 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
@@ -780,8 +780,13 @@ spl_init(void)
if ((rc = spl_zlib_init()))
goto out7;
+ if ((rc = spl_zone_init()))
+ goto out8;
+
return (rc);
+out8:
+ spl_zlib_fini();
out7:
spl_kstat_fini();
out6:
@@ -801,6 +806,7 @@ out1:
static void __exit
spl_fini(void)
{
+ spl_zone_fini();
spl_zlib_fini();
spl_kstat_fini();
spl_proc_fini();
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
new file mode 100644
index 000000000000..b8a8b7cd8cd8
--- /dev/null
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
@@ -0,0 +1,424 @@
+/*
+ * Copyright (c) 2021 Klara Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/mutex.h>
+#include <sys/sysmacros.h>
+#include <sys/kmem.h>
+#include <linux/file.h>
+#include <linux/magic.h>
+#include <sys/zone.h>
+
+#if defined(CONFIG_USER_NS)
+#include <linux/statfs.h>
+#include <linux/proc_ns.h>
+#endif
+
+static kmutex_t zone_datasets_lock;
+static struct list_head zone_datasets;
+
+typedef struct zone_datasets {
+ struct list_head zds_list; /* zone_datasets linkage */
+ struct user_namespace *zds_userns; /* namespace reference */
+ struct list_head zds_datasets; /* datasets for the namespace */
+} zone_datasets_t;
+
+typedef struct zone_dataset {
+ struct list_head zd_list; /* zone_dataset linkage */
+ size_t zd_dsnamelen; /* length of name */
+ char zd_dsname[0]; /* name of the member dataset */
+} zone_dataset_t;
+
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+/*
+ * Returns:
+ * - 0 on success
+ * - EBADF if it cannot open the provided file descriptor
+ * - ENOTTY if the file itself is a not a user namespace file. We want to
+ * intercept this error in the ZFS layer. We cannot just return one of the
+ * ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
+ * and the SPL layers.
+ */
+static int
+user_ns_get(int fd, struct user_namespace **userns)
+{
+ struct kstatfs st;
+ struct file *nsfile;
+ struct ns_common *ns;
+ int error;
+
+ if ((nsfile = fget(fd)) == NULL)
+ return (EBADF);
+ if (vfs_statfs(&nsfile->f_path, &st) != 0) {
+ error = ENOTTY;
+ goto done;
+ }
+ if (st.f_type != NSFS_MAGIC) {
+ error = ENOTTY;
+ goto done;
+ }
+ ns = get_proc_ns(file_inode(nsfile));
+ if (ns->ops->type != CLONE_NEWUSER) {
+ error = ENOTTY;
+ goto done;
+ }
+ *userns = container_of(ns, struct user_namespace, ns);
+
+ error = 0;
+done:
+ fput(nsfile);
+
+ return (error);
+}
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+
+static unsigned int
+user_ns_zoneid(struct user_namespace *user_ns)
+{
+ unsigned int r;
+
+#if defined(HAVE_USER_NS_COMMON_INUM)
+ r = user_ns->ns.inum;
+#else
+ r = user_ns->proc_inum;
+#endif
+
+ return (r);
+}
+
+static struct zone_datasets *
+zone_datasets_lookup(unsigned int nsinum)
+{
+ zone_datasets_t *zds;
+
+ list_for_each_entry(zds, &zone_datasets, zds_list) {
+ if (user_ns_zoneid(zds->zds_userns) == nsinum)
+ return (zds);
+ }
+ return (NULL);
+}
+
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+static struct zone_dataset *
+zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
+{
+ zone_dataset_t *zd;
+
+ list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
+ if (zd->zd_dsnamelen != dsnamelen)
+ continue;
+ if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
+ return (zd);
+ }
+
+ return (NULL);
+}
+
+static int
+zone_dataset_cred_check(cred_t *cred)
+{
+
+ if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
+ return (EPERM);
+
+ return (0);
+}
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+
+static int
+zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
+{
+
+ if (dataset[0] == '\0' || dataset[0] == '/')
+ return (ENOENT);
+
+ *dsnamelen = strlen(dataset);
+ /* Ignore trailing slash, if supplied. */
+ if (dataset[*dsnamelen - 1] == '/')
+ (*dsnamelen)--;
+
+ return (0);
+}
+
+int
+zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
+{
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+ struct user_namespace *userns;
+ zone_datasets_t *zds;
+ zone_dataset_t *zd;
+ int error;
+ size_t dsnamelen;
+
+ if ((error = zone_dataset_cred_check(cred)) != 0)
+ return (error);
+ if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
+ return (error);
+ if ((error = user_ns_get(userns_fd, &userns)) != 0)
+ return (error);
+
+ mutex_enter(&zone_datasets_lock);
+ zds = zone_datasets_lookup(user_ns_zoneid(userns));
+ if (zds == NULL) {
+ zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
+ INIT_LIST_HEAD(&zds->zds_list);
+ INIT_LIST_HEAD(&zds->zds_datasets);
+ zds->zds_userns = userns;
+ /*
+ * Lock the namespace by incresing its refcount to prevent
+ * the namespace ID from being reused.
+ */
+ get_user_ns(userns);
+ list_add_tail(&zds->zds_list, &zone_datasets);
+ } else {
+ zd = zone_dataset_lookup(zds, dataset, dsnamelen);
+ if (zd != NULL) {
+ mutex_exit(&zone_datasets_lock);
+ return (EEXIST);
+ }
+ }
+
+ zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
+ zd->zd_dsnamelen = dsnamelen;
+ strncpy(zd->zd_dsname, dataset, dsnamelen);
+ zd->zd_dsname[dsnamelen] = '\0';
+ INIT_LIST_HEAD(&zd->zd_list);
+ list_add_tail(&zd->zd_list, &zds->zds_datasets);
+
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+#else
+ return (ENXIO);
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+}
+EXPORT_SYMBOL(zone_dataset_attach);
+
+int
+zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
+{
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+ struct user_namespace *userns;
+ zone_datasets_t *zds;
+ zone_dataset_t *zd;
+ int error;
+ size_t dsnamelen;
+
+ if ((error = zone_dataset_cred_check(cred)) != 0)
+ return (error);
+ if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
+ return (error);
+ if ((error = user_ns_get(userns_fd, &userns)) != 0)
+ return (error);
+
+ mutex_enter(&zone_datasets_lock);
+ zds = zone_datasets_lookup(user_ns_zoneid(userns));
+ if (zds != NULL)
+ zd = zone_dataset_lookup(zds, dataset, dsnamelen);
+ if (zds == NULL || zd == NULL) {
+ mutex_exit(&zone_datasets_lock);
+ return (ENOENT);
+ }
+
+ list_del(&zd->zd_list);
+ kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
+
+ /* Prune the namespace entry if it has no more delegations. */
+ if (list_empty(&zds->zds_datasets)) {
+ /*
+ * Decrease the refcount now that the namespace is no longer
+ * used. It is no longer necessary to prevent the namespace ID
+ * from being reused.
+ */
+ put_user_ns(userns);
+ list_del(&zds->zds_list);
+ kmem_free(zds, sizeof (*zds));
+ }
+
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+#else
+ return (ENXIO);
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+}
+EXPORT_SYMBOL(zone_dataset_detach);
+
+/*
+ * A dataset is visible if:
+ * - It is a parent of a namespace entry.
+ * - It is one of the namespace entries.
+ * - It is a child of a namespace entry.
+ *
+ * A dataset is writable if:
+ * - It is one of the namespace entries.
+ * - It is a child of a namespace entry.
+ *
+ * The parent datasets of namespace entries are visible and
+ * read-only to provide a path back to the root of the pool.
+ */
+int
+zone_dataset_visible(const char *dataset, int *write)
+{
+ zone_datasets_t *zds;
+ zone_dataset_t *zd;
+ size_t dsnamelen, zd_len;
+ int visible;
+
+ /* Default to read-only, in case visible is returned. */
+ if (write != NULL)
+ *write = 0;
+ if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
+ return (0);
+ if (INGLOBALZONE(curproc)) {
+ if (write != NULL)
+ *write = 1;
+ return (1);
+ }
+
+ mutex_enter(&zone_datasets_lock);
+ zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
+ if (zds == NULL) {
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+ }
+
+ visible = 0;
+ list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
+ zd_len = strlen(zd->zd_dsname);
+ if (zd_len > dsnamelen) {
+ /*
+ * The name of the namespace entry is longer than that
+ * of the dataset, so it could be that the dataset is a
+ * parent of the namespace entry.
+ */
+ visible = memcmp(zd->zd_dsname, dataset,
+ dsnamelen) == 0 &&
+ zd->zd_dsname[dsnamelen] == '/';
+ if (visible)
+ break;
+ } else if (zd_len == dsnamelen) {
+ /*
+ * The name of the namespace entry is as long as that
+ * of the dataset, so perhaps the dataset itself is the
+ * namespace entry.
+ */
+ visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
+ if (visible) {
+ if (write != NULL)
+ *write = 1;
+ break;
+ }
+ } else {
+ /*
+ * The name of the namespace entry is shorter than that
+ * of the dataset, so perhaps the dataset is a child of
+ * the namespace entry.
+ */
+ visible = memcmp(zd->zd_dsname, dataset,
+ zd_len) == 0 && dataset[zd_len] == '/';
+ if (visible) {
+ if (write != NULL)
+ *write = 1;
+ break;
+ }
+ }
+ }
+
+ mutex_exit(&zone_datasets_lock);
+ return (visible);
+}
+EXPORT_SYMBOL(zone_dataset_visible);
+
+unsigned int
+global_zoneid(void)
+{
+ unsigned int z = 0;
+
+#if defined(CONFIG_USER_NS)
+ z = user_ns_zoneid(&init_user_ns);
+#endif
+
+ return (z);
+}
+EXPORT_SYMBOL(global_zoneid);
+
+unsigned int
+crgetzoneid(const cred_t *cr)
+{
+ unsigned int r = 0;
+
+#if defined(CONFIG_USER_NS)
+ r = user_ns_zoneid(cr->user_ns);
+#endif
+
+ return (r);
+}
+EXPORT_SYMBOL(crgetzoneid);
+
+boolean_t
+inglobalzone(proc_t *proc)
+{
+#if defined(CONFIG_USER_NS)
+ return (proc->cred->user_ns == &init_user_ns);
+#else
+ return (B_TRUE);
+#endif
+}
+EXPORT_SYMBOL(inglobalzone);
+
+int
+spl_zone_init(void)
+{
+ mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
+ INIT_LIST_HEAD(&zone_datasets);
+ return (0);
+}
+
+void
+spl_zone_fini(void)
+{
+ zone_datasets_t *zds;
+ zone_dataset_t *zd;
+
+ /*
+ * It would be better to assert an empty zone_datasets, but since
+ * there's no automatic mechanism for cleaning them up if the user
+ * namespace is destroyed, just do it here, since spl is about to go
+ * out of context.
+ */
+ while (!list_empty(&zone_datasets)) {
+ zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
+ while (!list_empty(&zds->zds_datasets)) {
+ zd = list_entry(zds->zds_datasets.next,
+ zone_dataset_t, zd_list);
+ list_del(&zd->zd_list);
+ kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
+ put_user_ns(zds->zds_userns);
+ }
+ list_del(&zds->zds_list);
+ kmem_free(zds, sizeof (*zds));
+ }
+ mutex_destroy(&zone_datasets_lock);
+}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/policy.c b/sys/contrib/openzfs/module/os/linux/zfs/policy.c
index 5a52092bb90a..ab00d2ae14d2 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/policy.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/policy.c
@@ -61,7 +61,7 @@ priv_policy_ns(const cred_t *cr, int capability, int err,
static int
priv_policy(const cred_t *cr, int capability, int err)
{
- return (priv_policy_ns(cr, capability, err, NULL));
+ return (priv_policy_ns(cr, capability, err, cr->user_ns));
}
static int
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
index c65702e1a053..67b864aa77a9 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
@@ -37,6 +37,7 @@
* Copyright 2017 RackTop Systems.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
+ * Copyright (c) 2021 Klara, Inc.
*/
#include <sys/types.h>
@@ -150,6 +151,48 @@ out:
}
+static int
+zfs_ioc_userns_attach(zfs_cmd_t *zc)
+{
+ int error;
+
+ if (zc == NULL)
+ return (SET_ERROR(EINVAL));
+
+ error = zone_dataset_attach(CRED(), zc->zc_name, zc->zc_cleanup_fd);
+
+ /*
+ * Translate ENOTTY to ZFS_ERR_NOT_USER_NAMESPACE as we just arrived
+ * back from the SPL layer, which does not know about ZFS_ERR_* errors.
+ * See the comment at the user_ns_get() function in spl-zone.c for
+ * details.
+ */
+ if (error == ENOTTY)
+ error = ZFS_ERR_NOT_USER_NAMESPACE;
+
+ return (error);
+}
+
+static int
+zfs_ioc_userns_detach(zfs_cmd_t *zc)
+{
+ int error;
+
+ if (zc == NULL)
+ return (SET_ERROR(EINVAL));
+
+ error = zone_dataset_detach(CRED(), zc->zc_name, zc->zc_cleanup_fd);
+
+ /*
+ * See the comment in zfs_ioc_userns_attach() for details on what is
+ * going on here.
+ */
+ if (error == ENOTTY)
+ error = ZFS_ERR_NOT_USER_NAMESPACE;
+
+ return (error);
+}
+
uint64_t
zfs_max_nvlist_src_size_os(void)
{
@@ -168,6 +211,10 @@ zfs_ioctl_update_mount_cache(const char *dsname)
void
zfs_ioctl_init_os(void)
{
+ zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_ATTACH,
+ zfs_ioc_userns_attach, zfs_secpolicy_config, POOL_CHECK_NONE);
+ zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_DETACH,
+ zfs_ioc_userns_detach, zfs_secpolicy_config, POOL_CHECK_NONE);
}
#ifdef CONFIG_COMPAT
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
index 4f31bcb5959d..abb6dbe67cdf 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
@@ -126,7 +126,7 @@ zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
}
static int
-zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
{
const struct bio_vec *bv = uio->uio_bvec;
size_t skip = uio->uio_skip;
@@ -137,10 +137,13 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
cnt = MIN(bv->bv_len - skip, n);
paddr = zfs_kmap_atomic(bv->bv_page);
- if (rw == UIO_READ)
+ if (rw == UIO_READ) {
+ /* Copy from buffer 'p' to the bvec data */
memcpy(paddr + bv->bv_offset + skip, p, cnt);
- else
+ } else {
+ /* Copy from bvec data to buffer 'p' */
memcpy(p, paddr + bv->bv_offset + skip, cnt);
+ }
zfs_kunmap_atomic(paddr);
skip += cnt;
@@ -158,6 +161,141 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
return (0);
}
+#ifdef HAVE_BLK_MQ
+static void
+zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
+ struct bio_vec *bv)
+{
+ void *paddr;
+
+ paddr = zfs_kmap_atomic(bv->bv_page);
+ if (rw == UIO_READ) {
+ /* Copy from buffer 'p' to the bvec data */
+ memcpy(paddr + bv->bv_offset + skip, p, cnt);
+ } else {
+ /* Copy from bvec data to buffer 'p' */
+ memcpy(p, paddr + bv->bv_offset + skip, cnt);
+ }
+ zfs_kunmap_atomic(paddr);
+}
+
+/*
+ * Copy 'n' bytes of data between the buffer p[] and the data represented
+ * by the request in the uio.
+ */
+static int
+zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+{
+ struct request *rq = uio->rq;
+ struct bio_vec bv;
+ struct req_iterator iter;
+ size_t this_seg_start; /* logical offset */
+ size_t this_seg_end; /* logical offset */
+ size_t skip_in_seg;
+ size_t copy_from_seg;
+ size_t orig_loffset;
+ int copied = 0;
+
+ /*
+ * Get the original logical offset of this entire request (because
+ * uio->uio_loffset will be modified over time).
+ */
+ orig_loffset = io_offset(NULL, rq);
+ this_seg_start = orig_loffset;
+
+ rq_for_each_segment(bv, rq, iter) {
+ if (uio->iter.bio) {
+ /*
+ * If uio->iter.bio is present, then we know we've saved
+ * uio->iter from a previous call to this function, and
+ * we can skip ahead in this rq_for_each_segment() loop
+ * to where we last left off. That way, we don't need
+ * to iterate over tons of segments we've already
+ * processed - we can just restore the "saved state".
+ */
+ iter = uio->iter;
+ bv = uio->bv;
+ this_seg_start = uio->uio_loffset;
+ memset(&uio->iter, 0, sizeof (uio->iter));
+ continue;
+ }
+
+ /*
+ * Lookup what the logical offset of the last byte of this
+ * segment is.
+ */
+ this_seg_end = this_seg_start + bv.bv_len - 1;
+
+ /*
+ * We only need to operate on segments that have data we're
+ * copying.
+ */
+ if (uio->uio_loffset >= this_seg_start &&
+ uio->uio_loffset <= this_seg_end) {
+ /*
+ * Some, or all, of the data in this segment needs to be
+ * copied.
+ */
+
+ /*
+ * We may be not be copying from the first byte in the
+ * segment. Figure out how many bytes to skip copying
+ * from the beginning of this segment.
+ */
+ skip_in_seg = uio->uio_loffset - this_seg_start;
+
+ /*
+ * Calculate the total number of bytes from this
+ * segment that we will be copying.
+ */
+ copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
+
+ /* Copy the bytes */
+ zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
+ p = ((char *)p) + copy_from_seg;
+
+ n -= copy_from_seg;
+ uio->uio_resid -= copy_from_seg;
+ uio->uio_loffset += copy_from_seg;
+ copied = 1; /* We copied some data */
+ }
+
+ if (n == 0) {
+ /*
+ * All done copying. Save our 'iter' value to the uio.
+ * This allows us to "save our state" and skip ahead in
+ * the rq_for_each_segment() loop the next time we call
+ * call zfs_uiomove_bvec_rq() on this uio (which we
+ * will be doing for any remaining data in the uio).
+ */
+ uio->iter = iter; /* make a copy of the struct data */
+ uio->bv = bv;
+ return (0);
+ }
+
+ this_seg_start = this_seg_end + 1;
+ }
+
+ if (!copied) {
+ /* Didn't copy anything */
+ uio->uio_resid = 0;
+ }
+ return (0);
+}
+#endif
+
+static int
+zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+{
+#ifdef HAVE_BLK_MQ
+ if (uio->rq != NULL)
+ return (zfs_uiomove_bvec_rq(p, n, rw, uio));
+#else
+ ASSERT3P(uio->rq, ==, NULL);
+#endif
+ return (zfs_uiomove_bvec_impl(p, n, rw, uio));
+}
+
#if defined(HAVE_VFS_IOV_ITER)
static int
zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
@@ -300,8 +438,14 @@ zfs_uioskip(zfs_uio_t *uio, size_t n)
{
if (n > uio->uio_resid)
return;
-
- if (uio->uio_segflg == UIO_BVEC) {
+ /*
+ * When using a uio with a struct request, we simply
+ * use uio_loffset as a pointer to the next logical byte to
+ * copy in the request. We don't have to do any fancy
+ * accounting with uio_bvec/uio_iovcnt since we don't use
+ * them.
+ */
+ if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
uio->uio_skip += n;
while (uio->uio_iovcnt &&
uio->uio_skip >= uio->uio_bvec->bv_len) {
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
index 81a059651e8a..a67ba821d06f 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@@ -1453,14 +1453,34 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
int error = 0;
zfsvfs_t *zfsvfs = NULL;
vfs_t *vfs = NULL;
+ int canwrite;
+ int dataset_visible_zone;
ASSERT(zm);
ASSERT(osname);
+ dataset_visible_zone = zone_dataset_visible(osname, &canwrite);
+
+ /*
+ * Refuse to mount a filesystem if we are in a namespace and the
+ * dataset is not visible or writable in that namespace.
+ */
+ if (!INGLOBALZONE(curproc) &&
+ (!dataset_visible_zone || !canwrite)) {
+ return (SET_ERROR(EPERM));
+ }
+
error = zfsvfs_parse_options(zm->mnt_data, &vfs);
if (error)
return (error);
+ /*
+ * If a non-writable filesystem is being mounted without the
+ * read-only flag, pretend it was set, as done for snapshots.
+ */
+ if (!canwrite)
+ vfs->vfs_readonly = true;
+
error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
if (error) {
zfsvfs_vfs_free(vfs);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
index f4e1ab7aef08..d5c222120a9d 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
@@ -32,6 +32,9 @@
#include <sys/zfs_vnops.h>
#include <sys/zfs_ctldir.h>
#include <sys/zpl.h>
+#include <sys/dmu.h>
+#include <sys/dsl_dataset.h>
+#include <sys/zap.h>
/*
* Common open routine. Disallow any write access.
@@ -411,6 +414,20 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
#endif
stat->nlink = stat->size = 2;
+
+ dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os);
+ if (dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0) {
+ uint64_t snap_count;
+ int err = zap_count(
+ dmu_objset_pool(ds->ds_objset)->dp_meta_objset,
+ dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count);
+ if (err != 0) {
+ ZPL_EXIT(zfsvfs);
+ return (-err);
+ }
+ stat->nlink += snap_count;
+ }
+
stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
stat->atime = current_time(ip);
ZPL_EXIT(zfsvfs);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
index c2fd3fee1401..b18efde9b18a 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
@@ -360,6 +360,7 @@ const struct super_operations zpl_super_operations = {
struct file_system_type zpl_fs_type = {
.owner = THIS_MODULE,
.name = ZFS_DRIVER,
+ .fs_flags = FS_USERNS_MOUNT,
.mount = zpl_mount,
.kill_sb = zpl_kill_sb,
};
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
index c53bf3c2ab25..c30ce1c98439 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
@@ -83,6 +83,7 @@
#include <sys/zap.h>
#include <sys/vfs.h>
#include <sys/zpl.h>
+#include <linux/vfs_compat.h>
enum xattr_permission {
XAPERM_DENY,
@@ -1495,7 +1496,9 @@ zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len)
return (perm);
}
-#if !defined(HAVE_POSIX_ACL_RELEASE) || defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY)
+#if defined(CONFIG_FS_POSIX_ACL) && \
+ (!defined(HAVE_POSIX_ACL_RELEASE) || \
+ defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY))
struct acl_rel_struct {
struct acl_rel_struct *next;
struct posix_acl *acl;
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
index 39441700ae8c..acbab55d03ef 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
@@ -41,20 +41,77 @@
#include <linux/blkdev_compat.h>
#include <linux/task_io_accounting_ops.h>
+#ifdef HAVE_BLK_MQ
+#include <linux/blk-mq.h>
+#endif
+
+static void zvol_request_impl(zvol_state_t *zv, struct bio *bio,
+ struct request *rq, boolean_t force_sync);
+
static unsigned int zvol_major = ZVOL_MAJOR;
static unsigned int zvol_request_sync = 0;
static unsigned int zvol_prefetch_bytes = (128 * 1024);
static unsigned long zvol_max_discard_blocks = 16384;
-static unsigned int zvol_threads = 32;
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
static const unsigned int zvol_open_timeout_ms = 1000;
#endif
+static unsigned int zvol_threads = 0;
+#ifdef HAVE_BLK_MQ
+static unsigned int zvol_blk_mq_threads = 0;
+static unsigned int zvol_blk_mq_actual_threads;
+static boolean_t zvol_use_blk_mq = B_FALSE;
+
+/*
+ * The maximum number of volblocksize blocks to process per thread. Typically,
+ * write heavy workloads preform better with higher values here, and read
+ * heavy workloads preform better with lower values, but that's not a hard
+ * and fast rule. It's basically a knob to tune between "less overhead with
+ * less parallelism" and "more overhead, but more parallelism".
+ *
+ * '8' was chosen as a reasonable, balanced, default based off of sequential
+ * read and write tests to a zvol in an NVMe pool (with 16 CPUs).
+ */
+static unsigned int zvol_blk_mq_blocks_per_thread = 8;
+#endif
+
+#ifndef BLKDEV_DEFAULT_RQ
+/* BLKDEV_MAX_RQ was renamed to BLKDEV_DEFAULT_RQ in the 5.16 kernel */
+#define BLKDEV_DEFAULT_RQ BLKDEV_MAX_RQ
+#endif
+
+/*
+ * Finalize our BIO or request.
+ */
+#ifdef HAVE_BLK_MQ
+#define END_IO(zv, bio, rq, error) do { \
+ if (bio) { \
+ BIO_END_IO(bio, error); \
+ } else { \
+ blk_mq_end_request(rq, errno_to_bi_status(error)); \
+ } \
+} while (0)
+#else
+#define END_IO(zv, bio, rq, error) BIO_END_IO(bio, error)
+#endif
+
+#ifdef HAVE_BLK_MQ
+static unsigned int zvol_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ;
+static unsigned int zvol_actual_blk_mq_queue_depth;
+#endif
+
struct zvol_state_os {
struct gendisk *zvo_disk; /* generic disk */
struct request_queue *zvo_queue; /* request queue */
dev_t zvo_dev; /* device id */
+
+#ifdef HAVE_BLK_MQ
+ struct blk_mq_tag_set tag_set;
+#endif
+
+ /* Set from the global 'zvol_use_blk_mq' at zvol load */
+ boolean_t use_blk_mq;
};
taskq_t *zvol_taskq;
@@ -63,8 +120,14 @@ static struct ida zvol_ida;
typedef struct zv_request_stack {
zvol_state_t *zv;
struct bio *bio;
+ struct request *rq;
} zv_request_t;
+typedef struct zv_work {
+ struct request *rq;
+ struct work_struct work;
+} zv_work_t;
+
typedef struct zv_request_task {
zv_request_t zvr;
taskq_ent_t ent;
@@ -86,6 +149,62 @@ zv_request_task_free(zv_request_task_t *task)
kmem_free(task, sizeof (*task));
}
+#ifdef HAVE_BLK_MQ
+
+/*
+ * This is called when a new block multiqueue request comes in. A request
+ * contains one or more BIOs.
+ */
+static blk_status_t zvol_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
+{
+ struct request *rq = bd->rq;
+ zvol_state_t *zv = rq->q->queuedata;
+
+ /* Tell the kernel that we are starting to process this request */
+ blk_mq_start_request(rq);
+
+ if (blk_rq_is_passthrough(rq)) {
+ /* Skip non filesystem request */
+ blk_mq_end_request(rq, BLK_STS_IOERR);
+ return (BLK_STS_IOERR);
+ }
+
+ zvol_request_impl(zv, NULL, rq, 0);
+
+ /* Acknowledge to the kernel that we got this request */
+ return (BLK_STS_OK);
+}
+
+static struct blk_mq_ops zvol_blk_mq_queue_ops = {
+ .queue_rq = zvol_mq_queue_rq,
+};
+
+/* Initialize our blk-mq struct */
+static int zvol_blk_mq_alloc_tag_set(zvol_state_t *zv)
+{
+ struct zvol_state_os *zso = zv->zv_zso;
+
+ memset(&zso->tag_set, 0, sizeof (zso->tag_set));
+
+ /* Initialize tag set. */
+ zso->tag_set.ops = &zvol_blk_mq_queue_ops;
+ zso->tag_set.nr_hw_queues = zvol_blk_mq_actual_threads;
+ zso->tag_set.queue_depth = zvol_actual_blk_mq_queue_depth;
+ zso->tag_set.numa_node = NUMA_NO_NODE;
+ zso->tag_set.cmd_size = 0;
+
+ /*
+ * We need BLK_MQ_F_BLOCKING here since we do blocking calls in
+ * zvol_request_impl()
+ */
+ zso->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
+ zso->tag_set.driver_data = zv;
+
+ return (blk_mq_alloc_tag_set(&zso->tag_set));
+}
+#endif /* HAVE_BLK_MQ */
+
/*
* Given a path, return TRUE if path is a ZVOL.
*/
@@ -107,38 +226,51 @@ static void
zvol_write(zv_request_t *zvr)
{
struct bio *bio = zvr->bio;
+ struct request *rq = zvr->rq;
int error = 0;
zfs_uio_t uio;
-
- zfs_uio_bvec_init(&uio, bio);
-
zvol_state_t *zv = zvr->zv;
+ struct request_queue *q;
+ struct gendisk *disk;
+ unsigned long start_time = 0;
+ boolean_t acct = B_FALSE;
+
ASSERT3P(zv, !=, NULL);
ASSERT3U(zv->zv_open_count, >, 0);
ASSERT3P(zv->zv_zilog, !=, NULL);
+ q = zv->zv_zso->zvo_queue;
+ disk = zv->zv_zso->zvo_disk;
+
/* bio marked as FLUSH need to flush before write */
- if (bio_is_flush(bio))
+ if (io_is_flush(bio, rq))
zil_commit(zv->zv_zilog, ZVOL_OBJ);
/* Some requests are just for flush and nothing else. */
- if (uio.uio_resid == 0) {
+ if (io_size(bio, rq) == 0) {
rw_exit(&zv->zv_suspend_lock);
- BIO_END_IO(bio, 0);
+ END_IO(zv, bio, rq, 0);
return;
}
- struct request_queue *q = zv->zv_zso->zvo_queue;
- struct gendisk *disk = zv->zv_zso->zvo_disk;
+ zfs_uio_bvec_init(&uio, bio, rq);
+
ssize_t start_resid = uio.uio_resid;
- unsigned long start_time;
- boolean_t acct = blk_queue_io_stat(q);
- if (acct)
- start_time = blk_generic_start_io_acct(q, disk, WRITE, bio);
+ /*
+ * With use_blk_mq, accounting is done by blk_mq_start_request()
+ * and blk_mq_end_request(), so we can skip it here.
+ */
+ if (bio) {
+ acct = blk_queue_io_stat(q);
+ if (acct) {
+ start_time = blk_generic_start_io_acct(q, disk, WRITE,
+ bio);
+ }
+ }
boolean_t sync =
- bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
+ io_is_fua(bio, rq) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
uio.uio_loffset, uio.uio_resid, RL_WRITER);
@@ -180,10 +312,11 @@ zvol_write(zv_request_t *zvr)
rw_exit(&zv->zv_suspend_lock);
- if (acct)
+ if (bio && acct) {
blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
+ }
- BIO_END_IO(bio, -error);
+ END_IO(zv, bio, rq, -error);
}
static void
@@ -198,27 +331,33 @@ static void
zvol_discard(zv_request_t *zvr)
{
struct bio *bio = zvr->bio;
+ struct request *rq = zvr->rq;
zvol_state_t *zv = zvr->zv;
- uint64_t start = BIO_BI_SECTOR(bio) << 9;
- uint64_t size = BIO_BI_SIZE(bio);
+ uint64_t start = io_offset(bio, rq);
+ uint64_t size = io_size(bio, rq);
uint64_t end = start + size;
boolean_t sync;
int error = 0;
dmu_tx_t *tx;
+ struct request_queue *q = zv->zv_zso->zvo_queue;
+ struct gendisk *disk = zv->zv_zso->zvo_disk;
+ unsigned long start_time = 0;
+
+ boolean_t acct = blk_queue_io_stat(q);
ASSERT3P(zv, !=, NULL);
ASSERT3U(zv->zv_open_count, >, 0);
ASSERT3P(zv->zv_zilog, !=, NULL);
- struct request_queue *q = zv->zv_zso->zvo_queue;
- struct gendisk *disk = zv->zv_zso->zvo_disk;
- unsigned long start_time;
-
- boolean_t acct = blk_queue_io_stat(q);
- if (acct)
- start_time = blk_generic_start_io_acct(q, disk, WRITE, bio);
+ if (bio) {
+ acct = blk_queue_io_stat(q);
+ if (acct) {
+ start_time = blk_generic_start_io_acct(q, disk, WRITE,
+ bio);
+ }
+ }
- sync = bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
+ sync = io_is_fua(bio, rq) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
if (end > zv->zv_volsize) {
error = SET_ERROR(EIO);
@@ -231,7 +370,7 @@ zvol_discard(zv_request_t *zvr)
* the unaligned parts which is slow (read-modify-write) and useless
* since we are not freeing any space by doing so.
*/
- if (!bio_is_secure_erase(bio)) {
+ if (!io_is_secure_erase(bio, rq)) {
start = P2ROUNDUP(start, zv->zv_volblocksize);
end = P2ALIGN(end, zv->zv_volblocksize);
size = end - start;
@@ -262,10 +401,12 @@ zvol_discard(zv_request_t *zvr)
unlock:
rw_exit(&zv->zv_suspend_lock);
- if (acct)
- blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
+ if (bio && acct) {
+ blk_generic_end_io_acct(q, disk, WRITE, bio,
+ start_time);
+ }
- BIO_END_IO(bio, -error);
+ END_IO(zv, bio, rq, -error);
}
static void
@@ -280,28 +421,41 @@ static void
zvol_read(zv_request_t *zvr)
{
struct bio *bio = zvr->bio;
+ struct request *rq = zvr->rq;
int error = 0;
zfs_uio_t uio;
-
- zfs_uio_bvec_init(&uio, bio);
-
+ boolean_t acct = B_FALSE;
zvol_state_t *zv = zvr->zv;
+ struct request_queue *q;
+ struct gendisk *disk;
+ unsigned long start_time = 0;
+
ASSERT3P(zv, !=, NULL);
ASSERT3U(zv->zv_open_count, >, 0);
- struct request_queue *q = zv->zv_zso->zvo_queue;
- struct gendisk *disk = zv->zv_zso->zvo_disk;
+ zfs_uio_bvec_init(&uio, bio, rq);
+
+ q = zv->zv_zso->zvo_queue;
+ disk = zv->zv_zso->zvo_disk;
+
ssize_t start_resid = uio.uio_resid;
- unsigned long start_time;
- boolean_t acct = blk_queue_io_stat(q);
- if (acct)
- start_time = blk_generic_start_io_acct(q, disk, READ, bio);
+ /*
+ * When blk-mq is being used, accounting is done by
+ * blk_mq_start_request() and blk_mq_end_request().
+ */
+ if (bio) {
+ acct = blk_queue_io_stat(q);
+ if (acct)
+ start_time = blk_generic_start_io_acct(q, disk, READ,
+ bio);
+ }
zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
uio.uio_loffset, uio.uio_resid, RL_READER);
uint64_t volsize = zv->zv_volsize;
+
while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);
@@ -325,10 +479,11 @@ zvol_read(zv_request_t *zvr)
rw_exit(&zv->zv_suspend_lock);
- if (acct)
+ if (bio && acct) {
blk_generic_end_io_acct(q, disk, READ, bio, start_time);
+ }
- BIO_END_IO(bio, -error);
+ END_IO(zv, bio, rq, -error);
}
static void
@@ -339,52 +494,49 @@ zvol_read_task(void *arg)
zv_request_task_free(task);
}
-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-#ifdef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID
+
+/*
+ * Process a BIO or request
+ *
+ * Either 'bio' or 'rq' should be set depending on if we are processing a
+ * bio or a request (both should not be set).
+ *
+ * force_sync: Set to 0 to defer processing to a background taskq
+ * Set to 1 to process data synchronously
+ */
static void
-zvol_submit_bio(struct bio *bio)
-#else
-static blk_qc_t
-zvol_submit_bio(struct bio *bio)
-#endif
-#else
-static MAKE_REQUEST_FN_RET
-zvol_request(struct request_queue *q, struct bio *bio)
-#endif
+zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
+ boolean_t force_sync)
{
-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-#if defined(HAVE_BIO_BDEV_DISK)
- struct request_queue *q = bio->bi_bdev->bd_disk->queue;
-#else
- struct request_queue *q = bio->bi_disk->queue;
-#endif
-#endif
- zvol_state_t *zv = q->queuedata;
fstrans_cookie_t cookie = spl_fstrans_mark();
- uint64_t offset = BIO_BI_SECTOR(bio) << 9;
- uint64_t size = BIO_BI_SIZE(bio);
- int rw = bio_data_dir(bio);
+ uint64_t offset = io_offset(bio, rq);
+ uint64_t size = io_size(bio, rq);
+ int rw = io_data_dir(bio, rq);
- if (bio_has_data(bio) && offset + size > zv->zv_volsize) {
- printk(KERN_INFO
- "%s: bad access: offset=%llu, size=%lu\n",
+ if (zvol_request_sync)
+ force_sync = 1;
+
+ zv_request_t zvr = {
+ .zv = zv,
+ .bio = bio,
+ .rq = rq,
+ };
+
+ if (io_has_data(bio, rq) && offset + size > zv->zv_volsize) {
+ printk(KERN_INFO "%s: bad access: offset=%llu, size=%lu\n",
zv->zv_zso->zvo_disk->disk_name,
(long long unsigned)offset,
(long unsigned)size);
- BIO_END_IO(bio, -SET_ERROR(EIO));
+ END_IO(zv, bio, rq, -SET_ERROR(EIO));
goto out;
}
- zv_request_t zvr = {
- .zv = zv,
- .bio = bio,
- };
zv_request_task_t *task;
if (rw == WRITE) {
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
- BIO_END_IO(bio, -SET_ERROR(EROFS));
+ END_IO(zv, bio, rq, -SET_ERROR(EROFS));
goto out;
}
@@ -421,7 +573,7 @@ zvol_request(struct request_queue *q, struct bio *bio)
* i/o may be a ZIL write (via zil_commit()), or a read of an
* indirect block, or a read of a data block (if this is a
* partial-block write). We will indicate that the i/o is
- * complete by calling BIO_END_IO() from the taskq callback.
+ * complete by calling END_IO() from the taskq callback.
*
* This design allows the calling thread to continue and
* initiate more concurrent operations by calling
@@ -441,12 +593,12 @@ zvol_request(struct request_queue *q, struct bio *bio)
* of one i/o at a time per zvol. However, an even better
* design would be for zvol_request() to initiate the zio
* directly, and then be notified by the zio_done callback,
- * which would call BIO_END_IO(). Unfortunately, the DMU/ZIL
+ * which would call END_IO(). Unfortunately, the DMU/ZIL
* interfaces lack this functionality (they block waiting for
* the i/o to complete).
*/
- if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
- if (zvol_request_sync) {
+ if (io_is_discard(bio, rq) || io_is_secure_erase(bio, rq)) {
+ if (force_sync) {
zvol_discard(&zvr);
} else {
task = zv_request_task_create(zvr);
@@ -454,7 +606,7 @@ zvol_request(struct request_queue *q, struct bio *bio)
zvol_discard_task, task, 0, &task->ent);
}
} else {
- if (zvol_request_sync) {
+ if (force_sync) {
zvol_write(&zvr);
} else {
task = zv_request_task_create(zvr);
@@ -469,14 +621,14 @@ zvol_request(struct request_queue *q, struct bio *bio)
* data and require no additional handling.
*/
if (size == 0) {
- BIO_END_IO(bio, 0);
+ END_IO(zv, bio, rq, 0);
goto out;
}
rw_enter(&zv->zv_suspend_lock, RW_READER);
/* See comment in WRITE case above. */
- if (zvol_request_sync) {
+ if (force_sync) {
zvol_read(&zvr);
} else {
task = zv_request_task_create(zvr);
@@ -487,8 +639,33 @@ zvol_request(struct request_queue *q, struct bio *bio)
out:
spl_fstrans_unmark(cookie);
-#if (defined(HAVE_MAKE_REQUEST_FN_RET_QC) || \
- defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS)) && \
+}
+
+#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
+#ifdef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID
+static void
+zvol_submit_bio(struct bio *bio)
+#else
+static blk_qc_t
+zvol_submit_bio(struct bio *bio)
+#endif
+#else
+static MAKE_REQUEST_FN_RET
+zvol_request(struct request_queue *q, struct bio *bio)
+#endif
+{
+#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
+#if defined(HAVE_BIO_BDEV_DISK)
+ struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+#else
+ struct request_queue *q = bio->bi_disk->queue;
+#endif
+#endif
+ zvol_state_t *zv = q->queuedata;
+
+ zvol_request_impl(zv, bio, NULL, 0);
+#if defined(HAVE_MAKE_REQUEST_FN_RET_QC) || \
+ defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
!defined(HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID)
return (BLK_QC_T_NONE);
#endif
@@ -805,6 +982,27 @@ zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return (0);
}
+/*
+ * Why have two separate block_device_operations structs?
+ *
+ * Normally we'd just have one, and assign 'submit_bio' as needed. However,
+ * it's possible the user's kernel is built with CONSTIFY_PLUGIN, meaning we
+ * can't just change submit_bio dynamically at runtime. So just create two
+ * separate structs to get around this.
+ */
+static const struct block_device_operations zvol_ops_blk_mq = {
+ .open = zvol_open,
+ .release = zvol_release,
+ .ioctl = zvol_ioctl,
+ .compat_ioctl = zvol_compat_ioctl,
+ .check_events = zvol_check_events,
+#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
+ .revalidate_disk = zvol_revalidate_disk,
+#endif
+ .getgeo = zvol_getgeo,
+ .owner = THIS_MODULE,
+};
+
static const struct block_device_operations zvol_ops = {
.open = zvol_open,
.release = zvol_release,
@@ -821,6 +1019,87 @@ static const struct block_device_operations zvol_ops = {
#endif
};
+static int
+zvol_alloc_non_blk_mq(struct zvol_state_os *zso)
+{
+#if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS)
+#if defined(HAVE_BLK_ALLOC_DISK)
+ zso->zvo_disk = blk_alloc_disk(NUMA_NO_NODE);
+ if (zso->zvo_disk == NULL)
+ return (1);
+
+ zso->zvo_disk->minors = ZVOL_MINORS;
+ zso->zvo_queue = zso->zvo_disk->queue;
+#else
+ zso->zvo_queue = blk_alloc_queue(NUMA_NO_NODE);
+ if (zso->zvo_queue == NULL)
+ return (1);
+
+ zso->zvo_disk = alloc_disk(ZVOL_MINORS);
+ if (zso->zvo_disk == NULL) {
+ blk_cleanup_queue(zso->zvo_queue);
+ return (1);
+ }
+
+ zso->zvo_disk->queue = zso->zvo_queue;
+#endif /* HAVE_BLK_ALLOC_DISK */
+#else
+ zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
+ if (zso->zvo_queue == NULL)
+ return (1);
+
+ zso->zvo_disk = alloc_disk(ZVOL_MINORS);
+ if (zso->zvo_disk == NULL) {
+ blk_cleanup_queue(zso->zvo_queue);
+ return (1);
+ }
+
+ zso->zvo_disk->queue = zso->zvo_queue;
+#endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
+ return (0);
+
+}
+
+static int
+zvol_alloc_blk_mq(zvol_state_t *zv)
+{
+#ifdef HAVE_BLK_MQ
+ struct zvol_state_os *zso = zv->zv_zso;
+
+ /* Allocate our blk-mq tag_set */
+ if (zvol_blk_mq_alloc_tag_set(zv) != 0)
+ return (1);
+
+#if defined(HAVE_BLK_ALLOC_DISK)
+ zso->zvo_disk = blk_mq_alloc_disk(&zso->tag_set, zv);
+ if (zso->zvo_disk == NULL) {
+ blk_mq_free_tag_set(&zso->tag_set);
+ return (1);
+ }
+ zso->zvo_queue = zso->zvo_disk->queue;
+ zso->zvo_disk->minors = ZVOL_MINORS;
+#else
+ zso->zvo_disk = alloc_disk(ZVOL_MINORS);
+ if (zso->zvo_disk == NULL) {
+ blk_cleanup_queue(zso->zvo_queue);
+ blk_mq_free_tag_set(&zso->tag_set);
+ return (1);
+ }
+ /* Allocate queue */
+ zso->zvo_queue = blk_mq_init_queue(&zso->tag_set);
+ if (IS_ERR(zso->zvo_queue)) {
+ blk_mq_free_tag_set(&zso->tag_set);
+ return (1);
+ }
+
+ /* Our queue is now created, assign it to our disk */
+ zso->zvo_disk->queue = zso->zvo_queue;
+
+#endif
+#endif
+ return (0);
+}
+
/*
* Allocate memory for a new zvol_state_t and setup the required
* request queue and generic disk structures for the block device.
@@ -831,6 +1110,7 @@ zvol_alloc(dev_t dev, const char *name)
zvol_state_t *zv;
struct zvol_state_os *zso;
uint64_t volmode;
+ int ret;
if (dsl_prop_get_integer(name, "volmode", &volmode, NULL) != 0)
return (NULL);
@@ -849,48 +1129,44 @@ zvol_alloc(dev_t dev, const char *name)
list_link_init(&zv->zv_next);
mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL);
-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-#ifdef HAVE_BLK_ALLOC_DISK
- zso->zvo_disk = blk_alloc_disk(NUMA_NO_NODE);
- if (zso->zvo_disk == NULL)
- goto out_kmem;
-
- zso->zvo_disk->minors = ZVOL_MINORS;
- zso->zvo_queue = zso->zvo_disk->queue;
-#else
- zso->zvo_queue = blk_alloc_queue(NUMA_NO_NODE);
- if (zso->zvo_queue == NULL)
- goto out_kmem;
+#ifdef HAVE_BLK_MQ
+ zv->zv_zso->use_blk_mq = zvol_use_blk_mq;
+#endif
- zso->zvo_disk = alloc_disk(ZVOL_MINORS);
- if (zso->zvo_disk == NULL) {
- blk_cleanup_queue(zso->zvo_queue);
- goto out_kmem;
+ /*
+ * The block layer has 3 interfaces for getting BIOs:
+ *
+ * 1. blk-mq request queues (new)
+ * 2. submit_bio() (oldest)
+ * 3. regular request queues (old).
+ *
+ * Each of those interfaces has two permutations:
+ *
+ * a) We have blk_alloc_disk()/blk_mq_alloc_disk(), which allocates
+ * both the disk and its queue (5.14 kernel or newer)
+ *
+ * b) We don't have blk_*alloc_disk(), and have to allocate the
+ * disk and the queue separately. (5.13 kernel or older)
+ */
+ if (zv->zv_zso->use_blk_mq) {
+ ret = zvol_alloc_blk_mq(zv);
+ zso->zvo_disk->fops = &zvol_ops_blk_mq;
+ } else {
+ ret = zvol_alloc_non_blk_mq(zso);
+ zso->zvo_disk->fops = &zvol_ops;
}
-
- zso->zvo_disk->queue = zso->zvo_queue;
-#endif /* HAVE_BLK_ALLOC_DISK */
-#else
- zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
- if (zso->zvo_queue == NULL)
+ if (ret != 0)
goto out_kmem;
- zso->zvo_disk = alloc_disk(ZVOL_MINORS);
- if (zso->zvo_disk == NULL) {
- blk_cleanup_queue(zso->zvo_queue);
- goto out_kmem;
- }
-
- zso->zvo_disk->queue = zso->zvo_queue;
-#endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
-
blk_queue_set_write_cache(zso->zvo_queue, B_TRUE, B_TRUE);
/* Limit read-ahead to a single page to prevent over-prefetching. */
blk_queue_set_read_ahead(zso->zvo_queue, 1);
- /* Disable write merging in favor of the ZIO pipeline. */
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES, zso->zvo_queue);
+ if (!zv->zv_zso->use_blk_mq) {
+ /* Disable write merging in favor of the ZIO pipeline. */
+ blk_queue_flag_set(QUEUE_FLAG_NOMERGES, zso->zvo_queue);
+ }
/* Enable /proc/diskstats */
blk_queue_flag_set(QUEUE_FLAG_IO_STAT, zso->zvo_queue);
@@ -918,7 +1194,6 @@ zvol_alloc(dev_t dev, const char *name)
}
zso->zvo_disk->first_minor = (dev & MINORMASK);
- zso->zvo_disk->fops = &zvol_ops;
zso->zvo_disk->private_data = zv;
snprintf(zso->zvo_disk->disk_name, DISK_NAME_LEN, "%s%d",
ZVOL_DEV_NAME, (dev & MINORMASK));
@@ -963,6 +1238,11 @@ zvol_os_free(zvol_state_t *zv)
put_disk(zv->zv_zso->zvo_disk);
#endif
+#ifdef HAVE_BLK_MQ
+ if (zv->zv_zso->use_blk_mq)
+ blk_mq_free_tag_set(&zv->zv_zso->tag_set);
+#endif
+
ida_simple_remove(&zvol_ida,
MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS);
@@ -1044,8 +1324,69 @@ zvol_os_create_minor(const char *name)
blk_queue_max_hw_sectors(zv->zv_zso->zvo_queue,
(DMU_MAX_ACCESS / 4) >> 9);
- blk_queue_max_segments(zv->zv_zso->zvo_queue, UINT16_MAX);
- blk_queue_max_segment_size(zv->zv_zso->zvo_queue, UINT_MAX);
+
+ if (zv->zv_zso->use_blk_mq) {
+ /*
+ * IO requests can be really big (1MB). When an IO request
+ * comes in, it is passed off to zvol_read() or zvol_write()
+ * in a new thread, where it is chunked up into 'volblocksize'
+ * sized pieces and processed. So for example, if the request
+ * is a 1MB write and your volblocksize is 128k, one zvol_write
+ * thread will take that request and sequentially do ten 128k
+ * IOs. This is due to the fact that the thread needs to lock
+ * each volblocksize sized block. So you might be wondering:
+ * "instead of passing the whole 1MB request to one thread,
+ * why not pass ten individual 128k chunks to ten threads and
+ * process the whole write in parallel?" The short answer is
+ * that there's a sweet spot number of chunks that balances
+ * the greater parallelism with the added overhead of more
+ * threads. The sweet spot can be different depending on if you
+ * have a read or write heavy workload. Writes typically want
+ * high chunk counts while reads typically want lower ones. On
+ * a test pool with 6 NVMe drives in a 3x 2-disk mirror
+ * configuration, with volblocksize=8k, the sweet spot for good
+ * sequential reads and writes was at 8 chunks.
+ */
+
+ /*
+ * Below we tell the kernel how big we want our requests
+ * to be. You would think that blk_queue_io_opt() would be
+ * used to do this since it is used to "set optimal request
+ * size for the queue", but that doesn't seem to do
+ * anything - the kernel still gives you huge requests
+ * with tons of little PAGE_SIZE segments contained within it.
+ *
+ * Knowing that the kernel will just give you PAGE_SIZE segments
+ * no matter what, you can say "ok, I want PAGE_SIZE byte
+ * segments, and I want 'N' of them per request", where N is
+ * the correct number of segments for the volblocksize and
+ * number of chunks you want.
+ */
+#ifdef HAVE_BLK_MQ
+ if (zvol_blk_mq_blocks_per_thread != 0) {
+ unsigned int chunks;
+ chunks = MIN(zvol_blk_mq_blocks_per_thread, UINT16_MAX);
+
+ blk_queue_max_segment_size(zv->zv_zso->zvo_queue,
+ PAGE_SIZE);
+ blk_queue_max_segments(zv->zv_zso->zvo_queue,
+ (zv->zv_volblocksize * chunks) / PAGE_SIZE);
+ } else {
+ /*
+ * Special case: zvol_blk_mq_blocks_per_thread = 0
+ * Max everything out.
+ */
+ blk_queue_max_segments(zv->zv_zso->zvo_queue,
+ UINT16_MAX);
+ blk_queue_max_segment_size(zv->zv_zso->zvo_queue,
+ UINT_MAX);
+ }
+#endif
+ } else {
+ blk_queue_max_segments(zv->zv_zso->zvo_queue, UINT16_MAX);
+ blk_queue_max_segment_size(zv->zv_zso->zvo_queue, UINT_MAX);
+ }
+
blk_queue_physical_block_size(zv->zv_zso->zvo_queue,
zv->zv_volblocksize);
blk_queue_io_opt(zv->zv_zso->zvo_queue, zv->zv_volblocksize);
@@ -1167,19 +1508,54 @@ int
zvol_init(void)
{
int error;
- int threads = MIN(MAX(zvol_threads, 1), 1024);
+
+ /*
+ * zvol_threads is the module param the user passes in.
+ *
+ * zvol_actual_threads is what we use internally, since the user can
+ * pass zvol_thread = 0 to mean "use all the CPUs" (the default).
+ */
+ static unsigned int zvol_actual_threads;
+
+ if (zvol_threads == 0) {
+ /*
+ * See dde9380a1 for why 32 was chosen here. This should
+ * probably be refined to be some multiple of the number
+ * of CPUs.
+ */
+ zvol_actual_threads = MAX(num_online_cpus(), 32);
+ } else {
+ zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024);
+ }
error = register_blkdev(zvol_major, ZVOL_DRIVER);
if (error) {
printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
return (error);
}
- zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri,
- threads * 2, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+
+#ifdef HAVE_BLK_MQ
+ if (zvol_blk_mq_queue_depth == 0) {
+ zvol_actual_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ;
+ } else {
+ zvol_actual_blk_mq_queue_depth =
+ MAX(zvol_blk_mq_queue_depth, BLKDEV_MIN_RQ);
+ }
+
+ if (zvol_blk_mq_threads == 0) {
+ zvol_blk_mq_actual_threads = num_online_cpus();
+ } else {
+ zvol_blk_mq_actual_threads = MIN(MAX(zvol_blk_mq_threads, 1),
+ 1024);
+ }
+#endif
+ zvol_taskq = taskq_create(ZVOL_DRIVER, zvol_actual_threads, maxclsyspri,
+ zvol_actual_threads, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
if (zvol_taskq == NULL) {
unregister_blkdev(zvol_major, ZVOL_DRIVER);
return (-ENOMEM);
}
+
zvol_init_impl();
ida_init(&zvol_ida);
return (0);
@@ -1202,7 +1578,8 @@ module_param(zvol_major, uint, 0444);
MODULE_PARM_DESC(zvol_major, "Major number for zvol device");
module_param(zvol_threads, uint, 0444);
-MODULE_PARM_DESC(zvol_threads, "Max number of threads to handle I/O requests");
+MODULE_PARM_DESC(zvol_threads, "Number of threads to handle I/O requests. Set"
+ "to 0 to use all active CPUs");
module_param(zvol_request_sync, uint, 0644);
MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");
@@ -1215,4 +1592,17 @@ MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
module_param(zvol_volmode, uint, 0644);
MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
+
+#ifdef HAVE_BLK_MQ
+module_param(zvol_blk_mq_queue_depth, uint, 0644);
+MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth");
+
+module_param(zvol_use_blk_mq, uint, 0644);
+MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols");
+
+module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
+MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
+ "Process volblocksize blocks per thread");
+#endif
+
/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zcommon/zfeature_common.c b/sys/contrib/openzfs/module/zcommon/zfeature_common.c
index f09389e6d02e..4df09884aa91 100644
--- a/sys/contrib/openzfs/module/zcommon/zfeature_common.c
+++ b/sys/contrib/openzfs/module/zcommon/zfeature_common.c
@@ -696,16 +696,15 @@ zpool_feature_init(void)
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures);
{
-
- static const spa_feature_t zilsaxattr_deps[] = {
- SPA_FEATURE_EXTENSIBLE_DATASET,
- SPA_FEATURE_NONE
- };
- zfeature_register(SPA_FEATURE_ZILSAXATTR,
- "org.openzfs:zilsaxattr", "zilsaxattr",
- "Support for xattr=sa extended attribute logging in ZIL.",
- ZFEATURE_FLAG_PER_DATASET | ZFEATURE_FLAG_READONLY_COMPAT,
- ZFEATURE_TYPE_BOOLEAN, zilsaxattr_deps, sfeatures);
+ static const spa_feature_t zilsaxattr_deps[] = {
+ SPA_FEATURE_EXTENSIBLE_DATASET,
+ SPA_FEATURE_NONE
+ };
+ zfeature_register(SPA_FEATURE_ZILSAXATTR,
+ "org.openzfs:zilsaxattr", "zilsaxattr",
+ "Support for xattr=sa extended attribute logging in ZIL.",
+ ZFEATURE_FLAG_PER_DATASET | ZFEATURE_FLAG_READONLY_COMPAT,
+ ZFEATURE_TYPE_BOOLEAN, zilsaxattr_deps, sfeatures);
}
zfeature_register(SPA_FEATURE_HEAD_ERRLOG,
@@ -714,6 +713,18 @@ zpool_feature_init(void)
ZFEATURE_FLAG_ACTIVATE_ON_ENABLE, ZFEATURE_TYPE_BOOLEAN, NULL,
sfeatures);
+ {
+ static const spa_feature_t blake3_deps[] = {
+ SPA_FEATURE_EXTENSIBLE_DATASET,
+ SPA_FEATURE_NONE
+ };
+ zfeature_register(SPA_FEATURE_BLAKE3,
+ "org.openzfs:blake3", "blake3",
+ "BLAKE3 hash algorithm.",
+ ZFEATURE_FLAG_PER_DATASET, ZFEATURE_TYPE_BOOLEAN,
+ blake3_deps, sfeatures);
+ }
+
zfs_mod_list_supported_free(sfeatures);
}
diff --git a/sys/contrib/openzfs/module/zcommon/zfs_prop.c b/sys/contrib/openzfs/module/zcommon/zfs_prop.c
index 500d80a33b6b..32475611e11f 100644
--- a/sys/contrib/openzfs/module/zcommon/zfs_prop.c
+++ b/sys/contrib/openzfs/module/zcommon/zfs_prop.c
@@ -84,6 +84,7 @@ zfs_prop_init(void)
{ "sha512", ZIO_CHECKSUM_SHA512 },
{ "skein", ZIO_CHECKSUM_SKEIN },
{ "edonr", ZIO_CHECKSUM_EDONR },
+ { "blake3", ZIO_CHECKSUM_BLAKE3 },
{ NULL }
};
@@ -102,6 +103,9 @@ zfs_prop_init(void)
ZIO_CHECKSUM_SKEIN | ZIO_CHECKSUM_VERIFY },
{ "edonr,verify",
ZIO_CHECKSUM_EDONR | ZIO_CHECKSUM_VERIFY },
+ { "blake3", ZIO_CHECKSUM_BLAKE3 },
+ { "blake3,verify",
+ ZIO_CHECKSUM_BLAKE3 | ZIO_CHECKSUM_VERIFY },
{ NULL }
};
@@ -394,12 +398,12 @@ zfs_prop_init(void)
ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM |
ZFS_TYPE_VOLUME,
"on | off | fletcher2 | fletcher4 | sha256 | sha512 | skein"
- " | edonr",
+ " | edonr | blake3",
"CHECKSUM", checksum_table, sfeatures);
zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"on | off | verify | sha256[,verify] | sha512[,verify] | "
- "skein[,verify] | edonr,verify",
+ "skein[,verify] | edonr,verify | blake3[,verify]",
"DEDUP", dedup_table, sfeatures);
zprop_register_index(ZFS_PROP_COMPRESSION, "compression",
ZIO_COMPRESS_DEFAULT, PROP_INHERIT,
diff --git a/sys/contrib/openzfs/module/zfs/blake3_zfs.c b/sys/contrib/openzfs/module/zfs/blake3_zfs.c
new file mode 100644
index 000000000000..7560f30fd4e4
--- /dev/null
+++ b/sys/contrib/openzfs/module/zfs/blake3_zfs.c
@@ -0,0 +1,117 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/zio_checksum.h>
+#include <sys/blake3.h>
+#include <sys/abd.h>
+
+static int
+blake3_incremental(void *buf, size_t size, void *arg)
+{
+ BLAKE3_CTX *ctx = arg;
+
+ Blake3_Update(ctx, buf, size);
+
+ return (0);
+}
+
+/*
+ * Computes a native 256-bit BLAKE3 MAC checksum. Please note that this
+ * function requires the presence of a ctx_template that should be allocated
+ * using abd_checksum_blake3_tmpl_init.
+ */
+void
+abd_checksum_blake3_native(abd_t *abd, uint64_t size, const void *ctx_template,
+ zio_cksum_t *zcp)
+{
+ ASSERT(ctx_template != 0);
+
+#if defined(_KERNEL)
+ BLAKE3_CTX *ctx = blake3_per_cpu_ctx[CPU_SEQID_UNSTABLE];
+#else
+ BLAKE3_CTX *ctx = kmem_alloc(sizeof (*ctx), KM_SLEEP);
+#endif
+
+ memcpy(ctx, ctx_template, sizeof (*ctx));
+ (void) abd_iterate_func(abd, 0, size, blake3_incremental, ctx);
+ Blake3_Final(ctx, (uint8_t *)zcp);
+
+#if !defined(_KERNEL)
+ memset(ctx, 0, sizeof (*ctx));
+ kmem_free(ctx, sizeof (*ctx));
+#endif
+}
+
+/*
+ * Byteswapped version of abd_checksum_blake3_native. This just invokes
+ * the native checksum function and byteswaps the resulting checksum (since
+ * BLAKE3 is internally endian-insensitive).
+ */
+void
+abd_checksum_blake3_byteswap(abd_t *abd, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
+{
+ zio_cksum_t tmp;
+
+ ASSERT(ctx_template != 0);
+
+ abd_checksum_blake3_native(abd, size, ctx_template, &tmp);
+ zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]);
+ zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]);
+ zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]);
+ zcp->zc_word[3] = BSWAP_64(tmp.zc_word[3]);
+}
+
+/*
+ * Allocates a BLAKE3 MAC template suitable for using in BLAKE3 MAC checksum
+ * computations and returns a pointer to it.
+ */
+void *
+abd_checksum_blake3_tmpl_init(const zio_cksum_salt_t *salt)
+{
+ BLAKE3_CTX *ctx;
+
+ ASSERT(sizeof (salt->zcs_bytes) == 32);
+
+ /* init reference object */
+ ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP);
+ Blake3_InitKeyed(ctx, salt->zcs_bytes);
+
+ return (ctx);
+}
+
+/*
+ * Frees a BLAKE3 context template previously allocated using
+ * zio_checksum_blake3_tmpl_init.
+ */
+void
+abd_checksum_blake3_tmpl_free(void *ctx_template)
+{
+ BLAKE3_CTX *ctx = ctx_template;
+
+ memset(ctx, 0, sizeof (*ctx));
+ kmem_free(ctx, sizeof (*ctx));
+}
diff --git a/sys/contrib/openzfs/module/zfs/dsl_prop.c b/sys/contrib/openzfs/module/zfs/dsl_prop.c
index cfdafd2f4436..51d689355ef0 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_prop.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_prop.c
@@ -88,7 +88,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
setpoint[0] = '\0';
prop = zfs_name_to_prop(propname);
- inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
+ inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop));
inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
@@ -168,7 +168,7 @@ dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
uint64_t zapobj;
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
- inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
+ inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop));
zapobj = dsl_dataset_phys(ds)->ds_props_obj;
if (zapobj != 0) {
@@ -1055,12 +1055,12 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
prop = zfs_name_to_prop(propname);
/* Skip non-inheritable properties. */
- if ((flags & DSL_PROP_GET_INHERITING) && prop != ZPROP_INVAL &&
- !zfs_prop_inheritable(prop))
+ if ((flags & DSL_PROP_GET_INHERITING) &&
+ prop != ZPROP_USERPROP && !zfs_prop_inheritable(prop))
continue;
/* Skip properties not valid for this type. */
- if ((flags & DSL_PROP_GET_SNAPSHOT) && prop != ZPROP_INVAL &&
+ if ((flags & DSL_PROP_GET_SNAPSHOT) && prop != ZPROP_USERPROP &&
!zfs_prop_valid_for_type(prop, ZFS_TYPE_SNAPSHOT, B_FALSE))
continue;
diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c
index 89448f0ecea2..d8152939d8e1 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
@@ -280,6 +280,7 @@ typedef struct scan_io {
struct dsl_scan_io_queue {
dsl_scan_t *q_scn; /* associated dsl_scan_t */
vdev_t *q_vd; /* top-level vdev that this queue represents */
+ zio_t *q_zio; /* scn_zio_root child for waiting on IO */
/* trees used for sorting I/Os and extents of I/Os */
range_tree_t *q_exts_by_addr;
@@ -1276,9 +1277,12 @@ dsl_scan_should_clear(dsl_scan_t *scn)
mutex_enter(&tvd->vdev_scan_io_queue_lock);
queue = tvd->vdev_scan_io_queue;
if (queue != NULL) {
- /* # extents in exts_by_size = # in exts_by_addr */
+ /*
+ * # of extents in exts_by_size = # in exts_by_addr.
+ * B-tree efficiency is ~75%, but can be as low as 50%.
+ */
mused += zfs_btree_numnodes(&queue->q_exts_by_size) *
- sizeof (range_seg_gap_t) + queue->q_sio_memused;
+ 3 * sizeof (range_seg_gap_t) + queue->q_sio_memused;
}
mutex_exit(&tvd->vdev_scan_io_queue_lock);
}
@@ -3033,15 +3037,19 @@ scan_io_queues_run_one(void *arg)
dsl_scan_io_queue_t *queue = arg;
kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
boolean_t suspended = B_FALSE;
- range_seg_t *rs = NULL;
- scan_io_t *sio = NULL;
+ range_seg_t *rs;
+ scan_io_t *sio;
+ zio_t *zio;
list_t sio_list;
ASSERT(queue->q_scn->scn_is_sorted);
list_create(&sio_list, sizeof (scan_io_t),
offsetof(scan_io_t, sio_nodes.sio_list_node));
+ zio = zio_null(queue->q_scn->scn_zio_root, queue->q_scn->scn_dp->dp_spa,
+ NULL, NULL, NULL, ZIO_FLAG_CANFAIL);
mutex_enter(q_lock);
+ queue->q_zio = zio;
/* Calculate maximum in-flight bytes for this vdev. */
queue->q_maxinflight_bytes = MAX(1, zfs_scan_vdev_limit *
@@ -3108,7 +3116,9 @@ scan_io_queues_run_one(void *arg)
scan_io_queue_insert_impl(queue, sio);
}
+ queue->q_zio = NULL;
mutex_exit(q_lock);
+ zio_nowait(zio);
list_destroy(&sio_list);
}
@@ -4073,6 +4083,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
dsl_scan_t *scn = dp->dp_scan;
size_t size = BP_GET_PSIZE(bp);
abd_t *data = abd_alloc_for_io(size, B_FALSE);
+ zio_t *pio;
if (queue == NULL) {
ASSERT3U(scn->scn_maxinflight_bytes, >, 0);
@@ -4081,6 +4092,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
spa->spa_scrub_inflight += BP_GET_PSIZE(bp);
mutex_exit(&spa->spa_scrub_lock);
+ pio = scn->scn_zio_root;
} else {
kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
@@ -4089,12 +4101,14 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
while (queue->q_inflight_bytes >= queue->q_maxinflight_bytes)
cv_wait(&queue->q_zio_cv, q_lock);
queue->q_inflight_bytes += BP_GET_PSIZE(bp);
+ pio = queue->q_zio;
mutex_exit(q_lock);
}
+ ASSERT(pio != NULL);
count_block(scn, dp->dp_blkstats, bp);
- zio_nowait(zio_read(scn->scn_zio_root, spa, bp, data, size,
- dsl_scan_scrub_done, queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
+ zio_nowait(zio_read(pio, spa, bp, data, size, dsl_scan_scrub_done,
+ queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
}
/*
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
index 12aec4a568eb..c57c69bd70e1 100644
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -30,6 +30,7 @@
*/
#include <sys/zfs_context.h>
+#include <sys/zfs_chksum.h>
#include <sys/spa_impl.h>
#include <sys/zio.h>
#include <sys/zio_checksum.h>
@@ -2417,6 +2418,7 @@ spa_init(spa_mode_t mode)
vdev_raidz_math_init();
vdev_file_init();
zfs_prop_init();
+ chksum_init();
zpool_prop_init();
zpool_feature_init();
spa_config_load();
@@ -2438,6 +2440,7 @@ spa_fini(void)
vdev_cache_stat_fini();
vdev_mirror_stat_fini();
vdev_raidz_math_fini();
+ chksum_fini();
zil_fini();
dmu_fini();
zio_fini();
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index ce7f020a0d86..2dcb97b7feb4 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -5496,7 +5496,7 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
}
switch (prop = vdev_name_to_prop(propname)) {
- case VDEV_PROP_USER:
+ case VDEV_PROP_USERPROP:
if (vdev_prop_user(propname)) {
strval = fnvpair_value_string(elem);
if (strlen(strval) == 0) {
@@ -5580,7 +5580,7 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
uint64_t intval = 0;
char *strval = NULL;
- if (prop == VDEV_PROP_USER && !vdev_prop_user(propname)) {
+ if (prop == VDEV_PROP_USERPROP && !vdev_prop_user(propname)) {
error = EINVAL;
goto end;
}
@@ -5937,7 +5937,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
case VDEV_PROP_COMMENT:
/* Exists in the ZAP below */
/* FALLTHRU */
- case VDEV_PROP_USER:
+ case VDEV_PROP_USERPROP:
/* User Properites */
src = ZPROP_SRC_LOCAL;
diff --git a/sys/contrib/openzfs/module/zfs/zcp_synctask.c b/sys/contrib/openzfs/module/zfs/zcp_synctask.c
index 403856ae3571..24210117eca0 100644
--- a/sys/contrib/openzfs/module/zfs/zcp_synctask.c
+++ b/sys/contrib/openzfs/module/zfs/zcp_synctask.c
@@ -325,7 +325,7 @@ zcp_synctask_inherit_prop_check(void *arg, dmu_tx_t *tx)
zcp_inherit_prop_arg_t *args = arg;
zfs_prop_t prop = zfs_name_to_prop(args->zipa_prop);
- if (prop == ZPROP_INVAL) {
+ if (prop == ZPROP_USERPROP) {
if (zfs_prop_user(args->zipa_prop))
return (0);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_chksum.c b/sys/contrib/openzfs/module/zfs/zfs_chksum.c
new file mode 100644
index 000000000000..639784287d72
--- /dev/null
+++ b/sys/contrib/openzfs/module/zfs/zfs_chksum.c
@@ -0,0 +1,323 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/types.h>
+#include <sys/spa.h>
+#include <sys/zio_checksum.h>
+#include <sys/zfs_context.h>
+#include <sys/zfs_chksum.h>
+
+#include <sys/blake3.h>
+
+static kstat_t *chksum_kstat = NULL;
+
+typedef struct {
+ const char *name;
+ const char *impl;
+ uint64_t bs1k;
+ uint64_t bs4k;
+ uint64_t bs16k;
+ uint64_t bs64k;
+ uint64_t bs256k;
+ uint64_t bs1m;
+ uint64_t bs4m;
+ zio_cksum_salt_t salt;
+ zio_checksum_t *(func);
+ zio_checksum_tmpl_init_t *(init);
+ zio_checksum_tmpl_free_t *(free);
+} chksum_stat_t;
+
+static int chksum_stat_cnt = 0;
+static chksum_stat_t *chksum_stat_data = 0;
+
+/*
+ * i3-1005G1 test output:
+ *
+ * implementation 1k 4k 16k 64k 256k 1m 4m
+ * fletcher-4 5421 15001 26468 32555 34720 32801 18847
+ * edonr-generic 1196 1602 1761 1749 1762 1759 1751
+ * skein-generic 546 591 608 615 619 612 616
+ * sha256-generic 246 270 274 274 277 275 276
+ * sha256-avx 262 296 304 307 307 307 306
+ * sha256-sha-ni 769 1072 1172 1220 1219 1232 1228
+ * sha256-openssl 240 300 316 314 304 285 276
+ * sha512-generic 333 374 385 392 391 393 392
+ * sha512-openssl 353 441 467 476 472 467 426
+ * sha512-avx 362 444 473 475 479 476 478
+ * sha512-avx2 394 500 530 538 543 545 542
+ * blake3-generic 308 313 313 313 312 313 312
+ * blake3-sse2 402 1289 1423 1446 1432 1458 1413
+ * blake3-sse41 427 1470 1625 1704 1679 1607 1629
+ * blake3-avx2 428 1920 3095 3343 3356 3318 3204
+ * blake3-avx512 473 2687 4905 5836 5844 5643 5374
+ */
+static int
+chksum_stat_kstat_headers(char *buf, size_t size)
+{
+ ssize_t off = 0;
+
+ off += snprintf(buf + off, size, "%-23s", "implementation");
+ off += snprintf(buf + off, size - off, "%8s", "1k");
+ off += snprintf(buf + off, size - off, "%8s", "4k");
+ off += snprintf(buf + off, size - off, "%8s", "16k");
+ off += snprintf(buf + off, size - off, "%8s", "64k");
+ off += snprintf(buf + off, size - off, "%8s", "256k");
+ off += snprintf(buf + off, size - off, "%8s", "1m");
+ (void) snprintf(buf + off, size - off, "%8s\n", "4m");
+
+ return (0);
+}
+
+static int
+chksum_stat_kstat_data(char *buf, size_t size, void *data)
+{
+ chksum_stat_t *cs;
+ ssize_t off = 0;
+ char b[24];
+
+ cs = (chksum_stat_t *)data;
+ snprintf(b, 23, "%s-%s", cs->name, cs->impl);
+ off += snprintf(buf + off, size - off, "%-23s", b);
+ off += snprintf(buf + off, size - off, "%8llu",
+ (u_longlong_t)cs->bs1k);
+ off += snprintf(buf + off, size - off, "%8llu",
+ (u_longlong_t)cs->bs4k);
+ off += snprintf(buf + off, size - off, "%8llu",
+ (u_longlong_t)cs->bs16k);
+ off += snprintf(buf + off, size - off, "%8llu",
+ (u_longlong_t)cs->bs64k);
+ off += snprintf(buf + off, size - off, "%8llu",
+ (u_longlong_t)cs->bs256k);
+ off += snprintf(buf + off, size - off, "%8llu",
+ (u_longlong_t)cs->bs1m);
+ (void) snprintf(buf + off, size - off, "%8llu\n",
+ (u_longlong_t)cs->bs4m);
+
+ return (0);
+}
+
+static void *
+chksum_stat_kstat_addr(kstat_t *ksp, loff_t n)
+{
+ if (n < chksum_stat_cnt)
+ ksp->ks_private = (void *)(chksum_stat_data + n);
+ else
+ ksp->ks_private = NULL;
+
+ return (ksp->ks_private);
+}
+
+static void
+chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
+ uint64_t *result)
+{
+ hrtime_t start;
+ uint64_t run_bw, run_time_ns, run_count = 0, size = 0;
+ uint32_t l, loops = 0;
+ zio_cksum_t zcp;
+
+ switch (round) {
+ case 1: /* 1k */
+ size = 1<<10; loops = 128; break;
+ case 2: /* 2k */
+ size = 1<<12; loops = 64; break;
+ case 3: /* 4k */
+ size = 1<<14; loops = 32; break;
+ case 4: /* 16k */
+ size = 1<<16; loops = 16; break;
+ case 5: /* 256k */
+ size = 1<<18; loops = 8; break;
+ case 6: /* 1m */
+ size = 1<<20; loops = 4; break;
+ case 7: /* 4m */
+ size = 1<<22; loops = 1; break;
+ }
+
+ kpreempt_disable();
+ start = gethrtime();
+ do {
+ for (l = 0; l < loops; l++, run_count++)
+ cs->func(abd, size, ctx, &zcp);
+
+ run_time_ns = gethrtime() - start;
+ } while (run_time_ns < MSEC2NSEC(1));
+ kpreempt_enable();
+
+ run_bw = size * run_count * NANOSEC;
+ run_bw /= run_time_ns; /* B/s */
+ *result = run_bw/1024/1024; /* MiB/s */
+}
+
+static void
+chksum_benchit(chksum_stat_t *cs)
+{
+ abd_t *abd;
+ void *ctx = 0;
+ void *salt = &cs->salt.zcs_bytes;
+
+ /* allocate test memory via default abd interface */
+ abd = abd_alloc_linear(1<<22, B_FALSE);
+ memset(salt, 0, sizeof (cs->salt.zcs_bytes));
+ if (cs->init) {
+ ctx = cs->init(&cs->salt);
+ }
+
+ chksum_run(cs, abd, ctx, 1, &cs->bs1k);
+ chksum_run(cs, abd, ctx, 2, &cs->bs4k);
+ chksum_run(cs, abd, ctx, 3, &cs->bs16k);
+ chksum_run(cs, abd, ctx, 4, &cs->bs64k);
+ chksum_run(cs, abd, ctx, 5, &cs->bs256k);
+ chksum_run(cs, abd, ctx, 6, &cs->bs1m);
+ chksum_run(cs, abd, ctx, 7, &cs->bs4m);
+
+ /* free up temp memory */
+ if (cs->free) {
+ cs->free(ctx);
+ }
+ abd_free(abd);
+}
+
+/*
+ * Initialize and benchmark all supported implementations.
+ */
+static void
+chksum_benchmark(void)
+{
+
+#ifndef _KERNEL
+ /* we need the benchmark only for the kernel module */
+ return;
+#endif
+
+ chksum_stat_t *cs;
+ int cbid = 0, id;
+ uint64_t max = 0;
+
+ /* space for the benchmark times */
+ chksum_stat_cnt = 4;
+ chksum_stat_cnt += blake3_get_impl_count();
+ chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
+ sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
+
+ /* edonr */
+ cs = &chksum_stat_data[cbid++];
+ cs->init = abd_checksum_edonr_tmpl_init;
+ cs->func = abd_checksum_edonr_native;
+ cs->free = abd_checksum_edonr_tmpl_free;
+ cs->name = "edonr";
+ cs->impl = "generic";
+ chksum_benchit(cs);
+
+ /* skein */
+ cs = &chksum_stat_data[cbid++];
+ cs->init = abd_checksum_skein_tmpl_init;
+ cs->func = abd_checksum_skein_native;
+ cs->free = abd_checksum_skein_tmpl_free;
+ cs->name = "skein";
+ cs->impl = "generic";
+ chksum_benchit(cs);
+
+ /* sha256 */
+ cs = &chksum_stat_data[cbid++];
+ cs->init = 0;
+ cs->func = abd_checksum_SHA256;
+ cs->free = 0;
+ cs->name = "sha256";
+ cs->impl = "generic";
+ chksum_benchit(cs);
+
+ /* sha512 */
+ cs = &chksum_stat_data[cbid++];
+ cs->init = 0;
+ cs->func = abd_checksum_SHA512_native;
+ cs->free = 0;
+ cs->name = "sha512";
+ cs->impl = "generic";
+ chksum_benchit(cs);
+
+ /* blake3 */
+ for (id = 0; id < blake3_get_impl_count(); id++) {
+ blake3_set_impl_id(id);
+ cs = &chksum_stat_data[cbid++];
+ cs->init = abd_checksum_blake3_tmpl_init;
+ cs->func = abd_checksum_blake3_native;
+ cs->free = abd_checksum_blake3_tmpl_free;
+ cs->name = "blake3";
+ cs->impl = blake3_get_impl_name();
+ chksum_benchit(cs);
+ if (cs->bs256k > max) {
+ max = cs->bs256k;
+ blake3_set_impl_fastest(id);
+ }
+ }
+}
+
+void
+chksum_init(void)
+{
+#ifdef _KERNEL
+ blake3_per_cpu_ctx_init();
+#endif
+
+ /* Benchmark supported implementations */
+ chksum_benchmark();
+
+ /* Install kstats for all implementations */
+ chksum_kstat = kstat_create("zfs", 0, "chksum_bench", "misc",
+ KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
+
+ if (chksum_kstat != NULL) {
+ chksum_kstat->ks_data = NULL;
+ chksum_kstat->ks_ndata = UINT32_MAX;
+ kstat_set_raw_ops(chksum_kstat,
+ chksum_stat_kstat_headers,
+ chksum_stat_kstat_data,
+ chksum_stat_kstat_addr);
+ kstat_install(chksum_kstat);
+ }
+
+ /* setup implementations */
+ blake3_setup_impl();
+}
+
+void
+chksum_fini(void)
+{
+ if (chksum_kstat != NULL) {
+ kstat_delete(chksum_kstat);
+ chksum_kstat = NULL;
+ }
+
+ if (chksum_stat_cnt) {
+ kmem_free(chksum_stat_data,
+ sizeof (chksum_stat_t) * chksum_stat_cnt);
+ chksum_stat_cnt = 0;
+ chksum_stat_data = 0;
+ }
+
+#ifdef _KERNEL
+ blake3_per_cpu_ctx_fini();
+#endif
+}
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index b3f32d64f3ef..35aec5226233 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -1104,7 +1104,7 @@ zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
(void) innvl;
zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
- if (prop == ZPROP_INVAL) {
+ if (prop == ZPROP_USERPROP) {
if (!zfs_prop_user(zc->zc_value))
return (SET_ERROR(EINVAL));
return (zfs_secpolicy_write_perms(zc->zc_name,
@@ -2406,7 +2406,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
const char *strval = NULL;
int err = -1;
- if (prop == ZPROP_INVAL) {
+ if (prop == ZPROP_USERPROP) {
if (zfs_prop_userquota(propname))
return (zfs_prop_set_userquota(dsname, pair));
return (-1);
@@ -2577,7 +2577,7 @@ retry:
/* inherited properties are expected to be booleans */
if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
err = SET_ERROR(EINVAL);
- } else if (err == 0 && prop == ZPROP_INVAL) {
+ } else if (err == 0 && prop == ZPROP_USERPROP) {
if (zfs_prop_user(propname)) {
if (nvpair_type(propval) != DATA_TYPE_STRING)
err = SET_ERROR(EINVAL);
@@ -2853,11 +2853,11 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)
* and reservation to the received or default values even though
* they are not considered inheritable.
*/
- if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
+ if (prop != ZPROP_USERPROP && !zfs_prop_inheritable(prop))
return (SET_ERROR(EINVAL));
}
- if (prop == ZPROP_INVAL) {
+ if (prop == ZPROP_USERPROP) {
if (!zfs_prop_user(propname))
return (SET_ERROR(EINVAL));
@@ -4488,7 +4488,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
uint64_t intval, compval;
int err;
- if (prop == ZPROP_INVAL) {
+ if (prop == ZPROP_USERPROP) {
if (zfs_prop_user(propname)) {
if ((err = zfs_secpolicy_write_perms(dsname,
ZFS_DELEG_PERM_USERPROP, cr)))
@@ -5034,7 +5034,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
/* -x property */
const char *name = nvpair_name(nvp);
zfs_prop_t prop = zfs_name_to_prop(name);
- if (prop != ZPROP_INVAL) {
+ if (prop != ZPROP_USERPROP) {
if (!zfs_prop_inheritable(prop))
continue;
} else if (!zfs_prop_user(name))
diff --git a/sys/contrib/openzfs/module/zfs/zio_checksum.c b/sys/contrib/openzfs/module/zfs/zio_checksum.c
index d89e5765326f..3c5cdf604100 100644
--- a/sys/contrib/openzfs/module/zfs/zio_checksum.c
+++ b/sys/contrib/openzfs/module/zfs/zio_checksum.c
@@ -195,6 +195,10 @@ zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
abd_checksum_edonr_tmpl_init, abd_checksum_edonr_tmpl_free,
ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_SALTED |
ZCHECKSUM_FLAG_NOPWRITE, "edonr"},
+ {{abd_checksum_blake3_native, abd_checksum_blake3_byteswap},
+ abd_checksum_blake3_tmpl_init, abd_checksum_blake3_tmpl_free,
+ ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
+ ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "blake3"},
};
/*
@@ -207,6 +211,8 @@ zio_checksum_to_feature(enum zio_checksum cksum)
VERIFY((cksum & ~ZIO_CHECKSUM_MASK) == 0);
switch (cksum) {
+ case ZIO_CHECKSUM_BLAKE3:
+ return (SPA_FEATURE_BLAKE3);
case ZIO_CHECKSUM_SHA512:
return (SPA_FEATURE_SHA512);
case ZIO_CHECKSUM_SKEIN:
diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
index 4ff46e7af35f..89ee0d3cb7b6 100644
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@@ -113,8 +113,8 @@ tests = ['tst.destroy_fs', 'tst.destroy_snap', 'tst.get_count_and_limit',
tags = ['functional', 'channel_program', 'synctask_core']
[tests/functional/checksum]
-tests = ['run_edonr_test', 'run_sha2_test', 'run_skein_test', 'filetest_001_pos',
- 'filetest_002_pos']
+tests = ['run_edonr_test', 'run_sha2_test', 'run_skein_test', 'run_blake3_test',
+ 'filetest_001_pos', 'filetest_002_pos']
tags = ['functional', 'checksum']
[tests/functional/clean_mirror]
@@ -937,9 +937,13 @@ tags = ['functional', 'zvol', 'zvol_cli']
[tests/functional/zvol/zvol_misc]
tests = ['zvol_misc_002_pos', 'zvol_misc_hierarchy', 'zvol_misc_rename_inuse',
- 'zvol_misc_snapdev', 'zvol_misc_volmode', 'zvol_misc_zil']
+ 'zvol_misc_snapdev', 'zvol_misc_trim', 'zvol_misc_volmode', 'zvol_misc_zil']
tags = ['functional', 'zvol', 'zvol_misc']
+[tests/functional/zvol/zvol_stress]
+tests = ['zvol_stress']
+tags = ['functional', 'zvol', 'zvol_stress']
+
[tests/functional/zvol/zvol_swap]
tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos', 'zvol_swap_004_pos']
tags = ['functional', 'zvol', 'zvol_swap']
diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run
index 3985da146044..9b32e73afb1e 100644
--- a/sys/contrib/openzfs/tests/runfiles/linux.run
+++ b/sys/contrib/openzfs/tests/runfiles/linux.run
@@ -177,10 +177,16 @@ tests = ['upgrade_projectquota_001_pos']
tags = ['functional', 'upgrade']
[tests/functional/user_namespace:Linux]
-tests = ['user_namespace_001']
+tests = ['user_namespace_001', 'user_namespace_002', 'user_namespace_003',
+ 'user_namespace_004']
tags = ['functional', 'user_namespace']
[tests/functional/userquota:Linux]
tests = ['groupspace_001_pos', 'groupspace_002_pos', 'groupspace_003_pos',
'userquota_013_pos', 'userspace_003_pos']
tags = ['functional', 'userquota']
+
+[tests/functional/zvol/zvol_misc:Linux]
+tests = ['zvol_misc_fua']
+tags = ['functional', 'zvol', 'zvol_misc']
+
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore b/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore
index 1830cab76fee..20d1382532bd 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore
@@ -42,6 +42,7 @@
/ereports
/zfs_diff-socket
/dosmode_readonly_write
+/blake3_test
/edonr_test
/skein_test
/sha2_test
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am
index e3c9874dcd54..3c8faf5afbbb 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am
@@ -98,15 +98,19 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/ereports
libzfs.la
-scripts_zfs_tests_bin_PROGRAMS += %D%/edonr_test %D%/skein_test %D%/sha2_test
+scripts_zfs_tests_bin_PROGRAMS += %D%/edonr_test %D%/skein_test \
+ %D%/sha2_test %D%/blake3_test
%C%_skein_test_SOURCES = %D%/checksum/skein_test.c
%C%_sha2_test_SOURCES = %D%/checksum/sha2_test.c
%C%_edonr_test_SOURCES = %D%/checksum/edonr_test.c
+%C%_blake3_test_SOURCES = %D%/checksum/blake3_test.c
%C%_skein_test_LDADD = \
libicp.la \
+ libspl.la \
libspl_assert.la
%C%_sha2_test_LDADD = $(%C%_skein_test_LDADD)
%C%_edonr_test_LDADD = $(%C%_skein_test_LDADD)
+%C%_blake3_test_LDADD = $(%C%_skein_test_LDADD)
if BUILD_LINUX
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/blake3_test.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/blake3_test.c
new file mode 100644
index 000000000000..55d268f5f8b7
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/blake3_test.c
@@ -0,0 +1,575 @@
+
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/blake3.h>
+
+/*
+ * set it to a define for debugging
+ */
+#undef BLAKE3_DEBUG
+
+/*
+ * C version of:
+ * https://github.com/BLAKE3-team/BLAKE3/tree/master/test_vectors
+ */
+typedef struct {
+ /* input length for this entry */
+ const int input_len;
+
+ /* hash value */
+ const char *hash;
+
+ /* salted hash value */
+ const char *shash;
+} blake3_test_t;
+
+/* BLAKE3 is variable here */
+#define TEST_DIGEST_LEN 262
+
+/*
+ * key for the keyed hashing
+ */
+static const char *salt = "whats the Elvish word for friend";
+
+static blake3_test_t TestArray[] = {
+ {
+ 0,
+ "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262e0"
+ "0f03e7b69af26b7faaf09fcd333050338ddfe085b8cc869ca98b206c08243a26f5"
+ "487789e8f660afe6c99ef9e0c52b92e7393024a80459cf91f476f9ffdbda7001c2"
+ "2e159b402631f277ca96f2defdf1078282314e763699a31c5363165421cce14d",
+ "92b2b75604ed3c761f9d6f62392c8a9227ad0ea3f09573e783f1498a4ed60d26b1"
+ "8171a2f22a4b94822c701f107153dba24918c4bae4d2945c20ece13387627d3b73"
+ "cbf97b797d5e59948c7ef788f54372df45e45e4293c7dc18c1d41144a9758be589"
+ "60856be1eabbe22c2653190de560ca3b2ac4aa692a9210694254c371e851bc8f",
+ },
+ {
+ 1,
+ "2d3adedff11b61f14c886e35afa036736dcd87a74d27b5c1510225d0f592e213c3"
+ "a6cb8bf623e20cdb535f8d1a5ffb86342d9c0b64aca3bce1d31f60adfa137b358a"
+ "d4d79f97b47c3d5e79f179df87a3b9776ef8325f8329886ba42f07fb138bb502f4"
+ "081cbcec3195c5871e6c23e2cc97d3c69a613eba131e5f1351f3f1da786545e5",
+ "6d7878dfff2f485635d39013278ae14f1454b8c0a3a2d34bc1ab38228a80c95b65"
+ "68c0490609413006fbd428eb3fd14e7756d90f73a4725fad147f7bf70fd61c4e0c"
+ "f7074885e92b0e3f125978b4154986d4fb202a3f331a3fb6cf349a3a70e49990f9"
+ "8fe4289761c8602c4e6ab1138d31d3b62218078b2f3ba9a88e1d08d0dd4cea11",
+ },
+ {
+ 2,
+ "7b7015bb92cf0b318037702a6cdd81dee41224f734684c2c122cd6359cb1ee63d8"
+ "386b22e2ddc05836b7c1bb693d92af006deb5ffbc4c70fb44d0195d0c6f252faac"
+ "61659ef86523aa16517f87cb5f1340e723756ab65efb2f91964e14391de2a43226"
+ "3a6faf1d146937b35a33621c12d00be8223a7f1919cec0acd12097ff3ab00ab1",
+ "5392ddae0e0a69d5f40160462cbd9bd889375082ff224ac9c758802b7a6fd20a9f"
+ "fbf7efd13e989a6c246f96d3a96b9d279f2c4e63fb0bdff633957acf50ee1a5f65"
+ "8be144bab0f6f16500dee4aa5967fc2c586d85a04caddec90fffb7633f46a60786"
+ "024353b9e5cebe277fcd9514217fee2267dcda8f7b31697b7c54fab6a939bf8f",
+ },
+ {
+ 3,
+ "e1be4d7a8ab5560aa4199eea339849ba8e293d55ca0a81006726d184519e647f5b"
+ "49b82f805a538c68915c1ae8035c900fd1d4b13902920fd05e1450822f36de9454"
+ "b7e9996de4900c8e723512883f93f4345f8a58bfe64ee38d3ad71ab027765d25cd"
+ "d0e448328a8e7a683b9a6af8b0af94fa09010d9186890b096a08471e4230a134",
+ "39e67b76b5a007d4921969779fe666da67b5213b096084ab674742f0d5ec62b9b9"
+ "142d0fab08e1b161efdbb28d18afc64d8f72160c958e53a950cdecf91c1a1bbab1"
+ "a9c0f01def762a77e2e8545d4dec241e98a89b6db2e9a5b070fc110caae2622690"
+ "bd7b76c02ab60750a3ea75426a6bb8803c370ffe465f07fb57def95df772c39f",
+ },
+ {
+ 4,
+ "f30f5ab28fe047904037f77b6da4fea1e27241c5d132638d8bedce9d40494f328f"
+ "603ba4564453e06cdcee6cbe728a4519bbe6f0d41e8a14b5b225174a566dbfa61b"
+ "56afb1e452dc08c804f8c3143c9e2cc4a31bb738bf8c1917b55830c6e657972117"
+ "01dc0b98daa1faeaa6ee9e56ab606ce03a1a881e8f14e87a4acf4646272cfd12",
+ "7671dde590c95d5ac9616651ff5aa0a27bee5913a348e053b8aa9108917fe07011"
+ "6c0acff3f0d1fa97ab38d813fd46506089118147d83393019b068a55d646251ecf"
+ "81105f798d76a10ae413f3d925787d6216a7eb444e510fd56916f1d753a5544ecf"
+ "0072134a146b2615b42f50c179f56b8fae0788008e3e27c67482349e249cb86a",
+ },
+ {
+ 5,
+ "b40b44dfd97e7a84a996a91af8b85188c66c126940ba7aad2e7ae6b385402aa2eb"
+ "cfdac6c5d32c31209e1f81a454751280db64942ce395104e1e4eaca62607de1c2c"
+ "a748251754ea5bbe8c20150e7f47efd57012c63b3c6a6632dc1c7cd15f3e1c9999"
+ "04037d60fac2eb9397f2adbe458d7f264e64f1e73aa927b30988e2aed2f03620",
+ "73ac69eecf286894d8102018a6fc729f4b1f4247d3703f69bdc6a5fe3e0c84616a"
+ "b199d1f2f3e53bffb17f0a2209fe8b4f7d4c7bae59c2bc7d01f1ff94c67588cc6b"
+ "38fa6024886f2c078bfe09b5d9e6584cd6c521c3bb52f4de7687b37117a2dbbec0"
+ "d59e92fa9a8cc3240d4432f91757aabcae03e87431dac003e7d73574bfdd8218",
+ },
+ {
+ 6,
+ "06c4e8ffb6872fad96f9aaca5eee1553eb62aed0ad7198cef42e87f6a616c84461"
+ "1a30c4e4f37fe2fe23c0883cde5cf7059d88b657c7ed2087e3d210925ede716435"
+ "d6d5d82597a1e52b9553919e804f5656278bd739880692c94bff2824d8e0b48cac"
+ "1d24682699e4883389dc4f2faa2eb3b4db6e39debd5061ff3609916f3e07529a",
+ "82d3199d0013035682cc7f2a399d4c212544376a839aa863a0f4c91220ca7a6dc2"
+ "ffb3aa05f2631f0fa9ac19b6e97eb7e6669e5ec254799350c8b8d189e880780084"
+ "2a5383c4d907c932f34490aaf00064de8cdb157357bde37c1504d2960034930887"
+ "603abc5ccb9f5247f79224baff6120a3c622a46d7b1bcaee02c5025460941256",
+ },
+ {
+ 7,
+ "3f8770f387faad08faa9d8414e9f449ac68e6ff0417f673f602a646a891419fe66"
+ "036ef6e6d1a8f54baa9fed1fc11c77cfb9cff65bae915045027046ebe0c01bf5a9"
+ "41f3bb0f73791d3fc0b84370f9f30af0cd5b0fc334dd61f70feb60dad785f070fe"
+ "f1f343ed933b49a5ca0d16a503f599a365a4296739248b28d1a20b0e2cc8975c",
+ "af0a7ec382aedc0cfd626e49e7628bc7a353a4cb108855541a5651bf64fbb28a7c"
+ "5035ba0f48a9c73dabb2be0533d02e8fd5d0d5639a18b2803ba6bf527e1d145d5f"
+ "d6406c437b79bcaad6c7bdf1cf4bd56a893c3eb9510335a7a798548c6753f74617"
+ "bede88bef924ba4b334f8852476d90b26c5dc4c3668a2519266a562c6c8034a6",
+ },
+ {
+ 8,
+ "2351207d04fc16ade43ccab08600939c7c1fa70a5c0aaca76063d04c3228eaeb72"
+ "5d6d46ceed8f785ab9f2f9b06acfe398c6699c6129da084cb531177445a682894f"
+ "9685eaf836999221d17c9a64a3a057000524cd2823986db378b074290a1a9b93a2"
+ "2e135ed2c14c7e20c6d045cd00b903400374126676ea78874d79f2dd7883cf5c",
+ "be2f5495c61cba1bb348a34948c004045e3bd4dae8f0fe82bf44d0da245a060048"
+ "eb5e68ce6dea1eb0229e144f578b3aa7e9f4f85febd135df8525e6fe40c6f0340d"
+ "13dd09b255ccd5112a94238f2be3c0b5b7ecde06580426a93e0708555a265305ab"
+ "f86d874e34b4995b788e37a823491f25127a502fe0704baa6bfdf04e76c13276",
+ },
+ {
+ 63,
+ "e9bc37a594daad83be9470df7f7b3798297c3d834ce80ba85d6e207627b7db7b11"
+ "97012b1e7d9af4d7cb7bdd1f3bb49a90a9b5dec3ea2bbc6eaebce77f4e470cbf46"
+ "87093b5352f04e4a4570fba233164e6acc36900e35d185886a827f7ea9bdc1e5c3"
+ "ce88b095a200e62c10c043b3e9bc6cb9b6ac4dfa51794b02ace9f98779040755",
+ "bb1eb5d4afa793c1ebdd9fb08def6c36d10096986ae0cfe148cd101170ce37aea0"
+ "5a63d74a840aecd514f654f080e51ac50fd617d22610d91780fe6b07a26b0847ab"
+ "b38291058c97474ef6ddd190d30fc318185c09ca1589d2024f0a6f16d45f116783"
+ "77483fa5c005b2a107cb9943e5da634e7046855eaa888663de55d6471371d55d",
+ },
+ {
+ 64,
+ "4eed7141ea4a5cd4b788606bd23f46e212af9cacebacdc7d1f4c6dc7f2511b98fc"
+ "9cc56cb831ffe33ea8e7e1d1df09b26efd2767670066aa82d023b1dfe8ab1b2b7f"
+ "bb5b97592d46ffe3e05a6a9b592e2949c74160e4674301bc3f97e04903f8c6cf95"
+ "b863174c33228924cdef7ae47559b10b294acd660666c4538833582b43f82d74",
+ "ba8ced36f327700d213f120b1a207a3b8c04330528586f414d09f2f7d9ccb7e682"
+ "44c26010afc3f762615bbac552a1ca909e67c83e2fd5478cf46b9e811efccc93f7"
+ "7a21b17a152ebaca1695733fdb086e23cd0eb48c41c034d52523fc21236e5d8c92"
+ "55306e48d52ba40b4dac24256460d56573d1312319afcf3ed39d72d0bfc69acb",
+ },
+ {
+ 65,
+ "de1e5fa0be70df6d2be8fffd0e99ceaa8eb6e8c93a63f2d8d1c30ecb6b263dee0e"
+ "16e0a4749d6811dd1d6d1265c29729b1b75a9ac346cf93f0e1d7296dfcfd4313b3"
+ "a227faaaaf7757cc95b4e87a49be3b8a270a12020233509b1c3632b3485eef309d"
+ "0abc4a4a696c9decc6e90454b53b000f456a3f10079072baaf7a981653221f2c",
+ "c0a4edefa2d2accb9277c371ac12fcdbb52988a86edc54f0716e1591b4326e72d5"
+ "e795f46a596b02d3d4bfb43abad1e5d19211152722ec1f20fef2cd413e3c22f2fc"
+ "5da3d73041275be6ede3517b3b9f0fc67ade5956a672b8b75d96cb43294b904149"
+ "7de92637ed3f2439225e683910cb3ae923374449ca788fb0f9bea92731bc26ad",
+ },
+ {
+ 127,
+ "d81293fda863f008c09e92fc382a81f5a0b4a1251cba1634016a0f86a6bd640de3"
+ "137d477156d1fde56b0cf36f8ef18b44b2d79897bece12227539ac9ae0a5119da4"
+ "7644d934d26e74dc316145dcb8bb69ac3f2e05c242dd6ee06484fcb0e956dc4435"
+ "5b452c5e2bbb5e2b66e99f5dd443d0cbcaaafd4beebaed24ae2f8bb672bcef78",
+ "c64200ae7dfaf35577ac5a9521c47863fb71514a3bcad18819218b818de85818ee"
+ "7a317aaccc1458f78d6f65f3427ec97d9c0adb0d6dacd4471374b621b7b5f35cd5"
+ "4663c64dbe0b9e2d95632f84c611313ea5bd90b71ce97b3cf645776f3adc11e27d"
+ "135cbadb9875c2bf8d3ae6b02f8a0206aba0c35bfe42574011931c9a255ce6dc",
+ },
+ {
+ 128,
+ "f17e570564b26578c33bb7f44643f539624b05df1a76c81f30acd548c44b45efa6"
+ "9faba091427f9c5c4caa873aa07828651f19c55bad85c47d1368b11c6fd99e47ec"
+ "ba5820a0325984d74fe3e4058494ca12e3f1d3293d0010a9722f7dee64f71246f7"
+ "5e9361f44cc8e214a100650db1313ff76a9f93ec6e84edb7add1cb4a95019b0c",
+ "b04fe15577457267ff3b6f3c947d93be581e7e3a4b018679125eaf86f6a628ecd8"
+ "6bbe0001f10bda47e6077b735016fca8119da11348d93ca302bbd125bde0db2b50"
+ "edbe728a620bb9d3e6f706286aedea973425c0b9eedf8a38873544cf91badf49ad"
+ "92a635a93f71ddfcee1eae536c25d1b270956be16588ef1cfef2f1d15f650bd5",
+ },
+ {
+ 129,
+ "683aaae9f3c5ba37eaaf072aed0f9e30bac0865137bae68b1fde4ca2aebdcb12f9"
+ "6ffa7b36dd78ba321be7e842d364a62a42e3746681c8bace18a4a8a79649285c71"
+ "27bf8febf125be9de39586d251f0d41da20980b70d35e3dac0eee59e468a894fa7"
+ "e6a07129aaad09855f6ad4801512a116ba2b7841e6cfc99ad77594a8f2d181a7",
+ "d4a64dae6cdccbac1e5287f54f17c5f985105457c1a2ec1878ebd4b57e20d38f1c"
+ "9db018541eec241b748f87725665b7b1ace3e0065b29c3bcb232c90e37897fa5aa"
+ "ee7e1e8a2ecfcd9b51463e42238cfdd7fee1aecb3267fa7f2128079176132a412c"
+ "d8aaf0791276f6b98ff67359bd8652ef3a203976d5ff1cd41885573487bcd683",
+ },
+ {
+ 1023,
+ "10108970eeda3eb932baac1428c7a2163b0e924c9a9e25b35bba72b28f70bd11a1"
+ "82d27a591b05592b15607500e1e8dd56bc6c7fc063715b7a1d737df5bad3339c56"
+ "778957d870eb9717b57ea3d9fb68d1b55127bba6a906a4a24bbd5acb2d123a37b2"
+ "8f9e9a81bbaae360d58f85e5fc9d75f7c370a0cc09b6522d9c8d822f2f28f485",
+ "c951ecdf03288d0fcc96ee3413563d8a6d3589547f2c2fb36d9786470f1b9d6e89"
+ "0316d2e6d8b8c25b0a5b2180f94fb1a158ef508c3cde45e2966bd796a696d3e13e"
+ "fd86259d756387d9becf5c8bf1ce2192b87025152907b6d8cc33d17826d8b7b9bc"
+ "97e38c3c85108ef09f013e01c229c20a83d9e8efac5b37470da28575fd755a10",
+ },
+ {
+ 1024,
+ "42214739f095a406f3fc83deb889744ac00df831c10daa55189b5d121c855af71c"
+ "f8107265ecdaf8505b95d8fcec83a98a6a96ea5109d2c179c47a387ffbb404756f"
+ "6eeae7883b446b70ebb144527c2075ab8ab204c0086bb22b7c93d465efc57f8d91"
+ "7f0b385c6df265e77003b85102967486ed57db5c5ca170ba441427ed9afa684e",
+ "75c46f6f3d9eb4f55ecaaee480db732e6c2105546f1e675003687c31719c7ba4a7"
+ "8bc838c72852d4f49c864acb7adafe2478e824afe51c8919d06168414c265f298a"
+ "8094b1ad813a9b8614acabac321f24ce61c5a5346eb519520d38ecc43e89b50002"
+ "36df0597243e4d2493fd626730e2ba17ac4d8824d09d1a4a8f57b8227778e2de",
+ },
+ {
+ 1025,
+ "d00278ae47eb27b34faecf67b4fe263f82d5412916c1ffd97c8cb7fb814b8444f4"
+ "c4a22b4b399155358a994e52bf255de60035742ec71bd08ac275a1b51cc6bfe332"
+ "b0ef84b409108cda080e6269ed4b3e2c3f7d722aa4cdc98d16deb554e5627be8f9"
+ "55c98e1d5f9565a9194cad0c4285f93700062d9595adb992ae68ff12800ab67a",
+ "357dc55de0c7e382c900fd6e320acc04146be01db6a8ce7210b7189bd664ea6936"
+ "2396b77fdc0d2634a552970843722066c3c15902ae5097e00ff53f1e116f1cd535"
+ "2720113a837ab2452cafbde4d54085d9cf5d21ca613071551b25d52e69d6c81123"
+ "872b6f19cd3bc1333edf0c52b94de23ba772cf82636cff4542540a7738d5b930",
+ },
+ {
+ 2048,
+ "e776b6028c7cd22a4d0ba182a8bf62205d2ef576467e838ed6f2529b85fba24a9a"
+ "60bf80001410ec9eea6698cd537939fad4749edd484cb541aced55cd9bf54764d0"
+ "63f23f6f1e32e12958ba5cfeb1bf618ad094266d4fc3c968c2088f677454c288c6"
+ "7ba0dba337b9d91c7e1ba586dc9a5bc2d5e90c14f53a8863ac75655461cea8f9",
+ "879cf1fa2ea0e79126cb1063617a05b6ad9d0b696d0d757cf053439f60a99dd101"
+ "73b961cd574288194b23ece278c330fbb8585485e74967f31352a8183aa782b2b2"
+ "2f26cdcadb61eed1a5bc144b8198fbb0c13abbf8e3192c145d0a5c21633b0ef860"
+ "54f42809df823389ee40811a5910dcbd1018af31c3b43aa55201ed4edaac74fe",
+ },
+ {
+ 2049,
+ "5f4d72f40d7a5f82b15ca2b2e44b1de3c2ef86c426c95c1af0b687952256303096"
+ "de31d71d74103403822a2e0bc1eb193e7aecc9643a76b7bbc0c9f9c52e8783aae9"
+ "8764ca468962b5c2ec92f0c74eb5448d519713e09413719431c802f948dd5d9042"
+ "5a4ecdadece9eb178d80f26efccae630734dff63340285adec2aed3b51073ad3",
+ "9f29700902f7c86e514ddc4df1e3049f258b2472b6dd5267f61bf13983b78dd5f9"
+ "a88abfefdfa1e00b418971f2b39c64ca621e8eb37fceac57fd0c8fc8e117d43b81"
+ "447be22d5d8186f8f5919ba6bcc6846bd7d50726c06d245672c2ad4f61702c6464"
+ "99ee1173daa061ffe15bf45a631e2946d616a4c345822f1151284712f76b2b0e",
+ },
+ {
+ 3072,
+ "b98cb0ff3623be03326b373de6b9095218513e64f1ee2edd2525c7ad1e5cffd29a"
+ "3f6b0b978d6608335c09dc94ccf682f9951cdfc501bfe47b9c9189a6fc7b404d12"
+ "0258506341a6d802857322fbd20d3e5dae05b95c88793fa83db1cb08e7d8008d15"
+ "99b6209d78336e24839724c191b2a52a80448306e0daa84a3fdb566661a37e11",
+ "044a0e7b172a312dc02a4c9a818c036ffa2776368d7f528268d2e6b5df19177022"
+ "f302d0529e4174cc507c463671217975e81dab02b8fdeb0d7ccc7568dd22574c78"
+ "3a76be215441b32e91b9a904be8ea81f7a0afd14bad8ee7c8efc305ace5d3dd61b"
+ "996febe8da4f56ca0919359a7533216e2999fc87ff7d8f176fbecb3d6f34278b",
+ },
+ {
+ 3073,
+ "7124b49501012f81cc7f11ca069ec9226cecb8a2c850cfe644e327d22d3e1cd39a"
+ "27ae3b79d68d89da9bf25bc27139ae65a324918a5f9b7828181e52cf373c84f35b"
+ "639b7fccbb985b6f2fa56aea0c18f531203497b8bbd3a07ceb5926f1cab74d14bd"
+ "66486d9a91eba99059a98bd1cd25876b2af5a76c3e9eed554ed72ea952b603bf",
+ "68dede9bef00ba89e43f31a6825f4cf433389fedae75c04ee9f0cf16a427c95a96"
+ "d6da3fe985054d3478865be9a092250839a697bbda74e279e8a9e69f0025e4cfdd"
+ "d6cfb434b1cd9543aaf97c635d1b451a4386041e4bb100f5e45407cbbc24fa53ea"
+ "2de3536ccb329e4eb9466ec37093a42cf62b82903c696a93a50b702c80f3c3c5",
+ },
+ {
+ 4096,
+ "015094013f57a5277b59d8475c0501042c0b642e531b0a1c8f58d2163229e96902"
+ "89e9409ddb1b99768eafe1623da896faf7e1114bebeadc1be30829b6f8af707d85"
+ "c298f4f0ff4d9438aef948335612ae921e76d411c3a9111df62d27eaf871959ae0"
+ "062b5492a0feb98ef3ed4af277f5395172dbe5c311918ea0074ce0036454f620",
+ "befc660aea2f1718884cd8deb9902811d332f4fc4a38cf7c7300d597a081bfc0bb"
+ "b64a36edb564e01e4b4aaf3b060092a6b838bea44afebd2deb8298fa562b7b597c"
+ "757b9df4c911c3ca462e2ac89e9a787357aaf74c3b56d5c07bc93ce899568a3eb1"
+ "7d9250c20f6c5f6c1e792ec9a2dcb715398d5a6ec6d5c54f586a00403a1af1de",
+ },
+ {
+ 4097,
+ "9b4052b38f1c5fc8b1f9ff7ac7b27cd242487b3d890d15c96a1c25b8aa0fb99505"
+ "f91b0b5600a11251652eacfa9497b31cd3c409ce2e45cfe6c0a016967316c426bd"
+ "26f619eab5d70af9a418b845c608840390f361630bd497b1ab44019316357c61db"
+ "e091ce72fc16dc340ac3d6e009e050b3adac4b5b2c92e722cffdc46501531956",
+ "00df940cd36bb9fa7cbbc3556744e0dbc8191401afe70520ba292ee3ca80abbc60"
+ "6db4976cfdd266ae0abf667d9481831ff12e0caa268e7d3e57260c0824115a54ce"
+ "595ccc897786d9dcbf495599cfd90157186a46ec800a6763f1c59e36197e9939e9"
+ "00809f7077c102f888caaf864b253bc41eea812656d46742e4ea42769f89b83f",
+ },
+ {
+ 5120,
+ "9cadc15fed8b5d854562b26a9536d9707cadeda9b143978f319ab34230535833ac"
+ "c61c8fdc114a2010ce8038c853e121e1544985133fccdd0a2d507e8e615e611e9a"
+ "0ba4f47915f49e53d721816a9198e8b30f12d20ec3689989175f1bf7a300eee0d9"
+ "321fad8da232ece6efb8e9fd81b42ad161f6b9550a069e66b11b40487a5f5059",
+ "2c493e48e9b9bf31e0553a22b23503c0a3388f035cece68eb438d22fa1943e209b"
+ "4dc9209cd80ce7c1f7c9a744658e7e288465717ae6e56d5463d4f80cdb2ef56495"
+ "f6a4f5487f69749af0c34c2cdfa857f3056bf8d807336a14d7b89bf62bef2fb54f"
+ "9af6a546f818dc1e98b9e07f8a5834da50fa28fb5874af91bf06020d1bf0120e",
+ },
+ {
+ 5121,
+ "628bd2cb2004694adaab7bbd778a25df25c47b9d4155a55f8fbd79f2fe154cff96"
+ "adaab0613a6146cdaabe498c3a94e529d3fc1da2bd08edf54ed64d40dcd6777647"
+ "eac51d8277d70219a9694334a68bc8f0f23e20b0ff70ada6f844542dfa32cd4204"
+ "ca1846ef76d811cdb296f65e260227f477aa7aa008bac878f72257484f2b6c95",
+ "6ccf1c34753e7a044db80798ecd0782a8f76f33563accaddbfbb2e0ea4b2d0240d"
+ "07e63f13667a8d1490e5e04f13eb617aea16a8c8a5aaed1ef6fbde1b0515e3c810"
+ "50b361af6ead126032998290b563e3caddeaebfab592e155f2e161fb7cba939092"
+ "133f23f9e65245e58ec23457b78a2e8a125588aad6e07d7f11a85b88d375b72d",
+ },
+ {
+ 6144,
+ "3e2e5b74e048f3add6d21faab3f83aa44d3b2278afb83b80b3c35164ebeca2054d"
+ "742022da6fdda444ebc384b04a54c3ac5839b49da7d39f6d8a9db03deab32aade1"
+ "56c1c0311e9b3435cde0ddba0dce7b26a376cad121294b689193508dd63151603c"
+ "6ddb866ad16c2ee41585d1633a2cea093bea714f4c5d6b903522045b20395c83",
+ "3d6b6d21281d0ade5b2b016ae4034c5dec10ca7e475f90f76eac7138e9bc8f1dc3"
+ "5754060091dc5caf3efabe0603c60f45e415bb3407db67e6beb3d11cf8e4f79075"
+ "61f05dace0c15807f4b5f389c841eb114d81a82c02a00b57206b1d11fa6e803486"
+ "b048a5ce87105a686dee041207e095323dfe172df73deb8c9532066d88f9da7e",
+ },
+ {
+ 6145,
+ "f1323a8631446cc50536a9f705ee5cb619424d46887f3c376c695b70e0f0507f18"
+ "a2cfdd73c6e39dd75ce7c1c6e3ef238fd54465f053b25d21044ccb2093beb01501"
+ "5532b108313b5829c3621ce324b8e14229091b7c93f32db2e4e63126a377d2a63a"
+ "3597997d4f1cba59309cb4af240ba70cebff9a23d5e3ff0cdae2cfd54e070022",
+ "9ac301e9e39e45e3250a7e3b3df701aa0fb6889fbd80eeecf28dbc6300fbc539f3"
+ "c184ca2f59780e27a576c1d1fb9772e99fd17881d02ac7dfd39675aca918453283"
+ "ed8c3169085ef4a466b91c1649cc341dfdee60e32231fc34c9c4e0b9a2ba87ca8f"
+ "372589c744c15fd6f985eec15e98136f25beeb4b13c4e43dc84abcc79cd4646c",
+ },
+ {
+ 7168,
+ "61da957ec2499a95d6b8023e2b0e604ec7f6b50e80a9678b89d2628e99ada77a57"
+ "07c321c83361793b9af62a40f43b523df1c8633cecb4cd14d00bdc79c78fca5165"
+ "b863893f6d38b02ff7236c5a9a8ad2dba87d24c547cab046c29fc5bc1ed142e1de"
+ "4763613bb162a5a538e6ef05ed05199d751f9eb58d332791b8d73fb74e4fce95",
+ "b42835e40e9d4a7f42ad8cc04f85a963a76e18198377ed84adddeaecacc6f3fca2"
+ "f01d5277d69bb681c70fa8d36094f73ec06e452c80d2ff2257ed82e7ba34840098"
+ "9a65ee8daa7094ae0933e3d2210ac6395c4af24f91c2b590ef87d7788d7066ea3e"
+ "aebca4c08a4f14b9a27644f99084c3543711b64a070b94f2c9d1d8a90d035d52",
+ },
+ {
+ 7169,
+ "a003fc7a51754a9b3c7fae0367ab3d782dccf28855a03d435f8cfe74605e781798"
+ "a8b20534be1ca9eb2ae2df3fae2ea60e48c6fb0b850b1385b5de0fe460dbe9d9f9"
+ "b0d8db4435da75c601156df9d047f4ede008732eb17adc05d96180f8a735485228"
+ "40779e6062d643b79478a6e8dbce68927f36ebf676ffa7d72d5f68f050b119c8",
+ "ed9b1a922c046fdb3d423ae34e143b05ca1bf28b710432857bf738bcedbfa5113c"
+ "9e28d72fcbfc020814ce3f5d4fc867f01c8f5b6caf305b3ea8a8ba2da3ab69fabc"
+ "b438f19ff11f5378ad4484d75c478de425fb8e6ee809b54eec9bdb184315dc8566"
+ "17c09f5340451bf42fd3270a7b0b6566169f242e533777604c118a6358250f54",
+ },
+ {
+ 8192,
+ "aae792484c8efe4f19e2ca7d371d8c467ffb10748d8a5a1ae579948f718a2a635f"
+ "e51a27db045a567c1ad51be5aa34c01c6651c4d9b5b5ac5d0fd58cf18dd61a4777"
+ "8566b797a8c67df7b1d60b97b19288d2d877bb2df417ace009dcb0241ca1257d62"
+ "712b6a4043b4ff33f690d849da91ea3bf711ed583cb7b7a7da2839ba71309bbf",
+ "dc9637c8845a770b4cbf76b8daec0eebf7dc2eac11498517f08d44c8fc00d58a48"
+ "34464159dcbc12a0ba0c6d6eb41bac0ed6585cabfe0aca36a375e6c5480c22afdc"
+ "40785c170f5a6b8a1107dbee282318d00d915ac9ed1143ad40765ec120042ee121"
+ "cd2baa36250c618adaf9e27260fda2f94dea8fb6f08c04f8f10c78292aa46102",
+ },
+ {
+ 8193,
+ "bab6c09cb8ce8cf459261398d2e7aef35700bf488116ceb94a36d0f5f1b7bc3bb2"
+ "282aa69be089359ea1154b9a9286c4a56af4de975a9aa4a5c497654914d279bea6"
+ "0bb6d2cf7225a2fa0ff5ef56bbe4b149f3ed15860f78b4e2ad04e158e375c1e0c0"
+ "b551cd7dfc82f1b155c11b6b3ed51ec9edb30d133653bb5709d1dbd55f4e1ff6",
+ "954a2a75420c8d6547e3ba5b98d963e6fa6491addc8c023189cc519821b4a1f5f0"
+ "3228648fd983aef045c2fa8290934b0866b615f585149587dda229903996532883"
+ "5a2b18f1d63b7e300fc76ff260b571839fe44876a4eae66cbac8c67694411ed7e0"
+ "9df51068a22c6e67d6d3dd2cca8ff12e3275384006c80f4db68023f24eebba57",
+ },
+ {
+ 16384,
+ "f875d6646de28985646f34ee13be9a576fd515f76b5b0a26bb324735041ddde49d"
+ "764c270176e53e97bdffa58d549073f2c660be0e81293767ed4e4929f9ad34bbb3"
+ "9a529334c57c4a381ffd2a6d4bfdbf1482651b172aa883cc13408fa67758a3e475"
+ "03f93f87720a3177325f7823251b85275f64636a8f1d599c2e49722f42e93893",
+ "9e9fc4eb7cf081ea7c47d1807790ed211bfec56aa25bb7037784c13c4b707b0df9"
+ "e601b101e4cf63a404dfe50f2e1865bb12edc8fca166579ce0c70dba5a5c0fc960"
+ "ad6f3772183416a00bd29d4c6e651ea7620bb100c9449858bf14e1ddc9ecd35725"
+ "581ca5b9160de04060045993d972571c3e8f71e9d0496bfa744656861b169d65",
+ },
+ {
+ 31744,
+ "62b6960e1a44bcc1eb1a611a8d6235b6b4b78f32e7abc4fb4c6cdcce94895c4786"
+ "0cc51f2b0c28a7b77304bd55fe73af663c02d3f52ea053ba43431ca5bab7bfea2f"
+ "5e9d7121770d88f70ae9649ea713087d1914f7f312147e247f87eb2d4ffef0ac97"
+ "8bf7b6579d57d533355aa20b8b77b13fd09748728a5cc327a8ec470f4013226f",
+ "efa53b389ab67c593dba624d898d0f7353ab99e4ac9d42302ee64cbf9939a4193a"
+ "7258db2d9cd32a7a3ecfce46144114b15c2fcb68a618a976bd74515d47be08b628"
+ "be420b5e830fade7c080e351a076fbc38641ad80c736c8a18fe3c66ce12f95c61c"
+ "2462a9770d60d0f77115bbcd3782b593016a4e728d4c06cee4505cb0c08a42ec",
+ },
+ {
+ 102400,
+ "bc3e3d41a1146b069abffad3c0d44860cf664390afce4d9661f7902e7943e085e0"
+ "1c59dab908c04c3342b816941a26d69c2605ebee5ec5291cc55e15b76146e6745f"
+ "0601156c3596cb75065a9c57f35585a52e1ac70f69131c23d611ce11ee4ab1ec2c"
+ "009012d236648e77be9295dd0426f29b764d65de58eb7d01dd42248204f45f8e",
+ "1c35d1a5811083fd7119f5d5d1ba027b4d01c0c6c49fb6ff2cf75393ea5db4a7f9"
+ "dbdd3e1d81dcbca3ba241bb18760f207710b751846faaeb9dff8262710999a59b2"
+ "aa1aca298a032d94eacfadf1aa192418eb54808db23b56e34213266aa08499a16b"
+ "354f018fc4967d05f8b9d2ad87a7278337be9693fc638a3bfdbe314574ee6fc4",
+ },
+ {
+ 0, 0, 0
+ }
+};
+
+#ifdef BLAKE3_DEBUG
+#define dprintf printf
+#else
+#define dprintf(...)
+#endif
+
+static char fmt_tohex(char c);
+static size_t fmt_hexdump(char *dest, const char *src, size_t len);
+
+static char fmt_tohex(char c) {
+ return ((char)(c >= 10 ? c-10+'a' : c+'0'));
+}
+
+static size_t fmt_hexdump(char *dest, const char *src, size_t len) {
+ register const unsigned char *s = (const unsigned char *) src;
+ size_t written = 0, i;
+
+ if (!dest)
+ return ((len > ((size_t)-1)/2) ? (size_t)-1 : len*2);
+ for (i = 0; i < len; ++i) {
+ dest[written] = fmt_tohex(s[i]>>4);
+ dest[written+1] = fmt_tohex(s[i]&15);
+ written += 2;
+ }
+
+ return (written);
+}
+
+int
+main(int argc, char *argv[])
+{
+ boolean_t failed = B_FALSE;
+ uint8_t buffer[102400];
+ uint64_t cpu_mhz = 0;
+ int id, i, j;
+
+ if (argc == 2)
+ cpu_mhz = atoi(argv[1]);
+
+ /* fill test message */
+ for (i = 0, j = 0; i < sizeof (buffer); i++, j++) {
+ if (j == 251)
+ j = 0;
+ buffer[i] = (uint8_t)j;
+ }
+
+ (void) printf("Running algorithm correctness tests:\n");
+ for (id = 0; id < blake3_get_impl_count(); id++) {
+ blake3_set_impl_id(id);
+ const char *name = blake3_get_impl_name();
+ dprintf("Result for BLAKE3-%s:\n", name);
+ for (i = 0; TestArray[i].hash; i++) {
+ blake3_test_t *cur = &TestArray[i];
+
+ BLAKE3_CTX ctx;
+ uint8_t digest[TEST_DIGEST_LEN];
+ char result[TEST_DIGEST_LEN];
+
+ /* default hashing */
+ Blake3_Init(&ctx);
+ Blake3_Update(&ctx, buffer, cur->input_len);
+ Blake3_FinalSeek(&ctx, 0, digest, TEST_DIGEST_LEN);
+ fmt_hexdump(result, (char *)digest, 131);
+ if (memcmp(result, cur->hash, 131) != 0)
+ failed = B_TRUE;
+
+ dprintf("HASH-res: %s\n", result);
+ dprintf("HASH-ref: %s\n", cur->hash);
+
+ /* salted hashing */
+ Blake3_InitKeyed(&ctx, (const uint8_t *)salt);
+ Blake3_Update(&ctx, buffer, cur->input_len);
+ Blake3_FinalSeek(&ctx, 0, digest, TEST_DIGEST_LEN);
+ fmt_hexdump(result, (char *)digest, 131);
+ if (memcmp(result, cur->shash, 131) != 0)
+ failed = B_TRUE;
+
+ dprintf("SHASH-res: %s\n", result);
+ dprintf("SHASH-ref: %s\n", cur->shash);
+
+ printf("BLAKE3-%s Message (inlen=%d)\tResult: %s\n",
+ name, cur->input_len, failed?"FAILED!":"OK");
+ }
+ }
+
+ if (failed)
+ return (1);
+
+#define BLAKE3_PERF_TEST(impl, diglen) \
+ do { \
+ BLAKE3_CTX ctx; \
+ uint8_t digest[diglen / 8]; \
+ uint8_t block[131072]; \
+ uint64_t delta; \
+ double cpb = 0; \
+ int i; \
+ struct timeval start, end; \
+ memset(block, 0, sizeof (block)); \
+ (void) gettimeofday(&start, NULL); \
+ Blake3_Init(&ctx); \
+ for (i = 0; i < 8192; i++) \
+ Blake3_Update(&ctx, block, sizeof (block)); \
+ Blake3_Final(&ctx, digest); \
+ (void) gettimeofday(&end, NULL); \
+ delta = (end.tv_sec * 1000000llu + end.tv_usec) - \
+ (start.tv_sec * 1000000llu + start.tv_usec); \
+ if (cpu_mhz != 0) { \
+ cpb = (cpu_mhz * 1e6 * ((double)delta / \
+ 1000000)) / (8192 * 128 * 1024); \
+ } \
+ (void) printf("BLAKE3-%s %llu us (%.02f CPB)\n", impl, \
+ (u_longlong_t)delta, cpb); \
+ } while (0)
+
+ printf("Running performance tests (hashing 1024 MiB of data):\n");
+ for (id = 0; id < blake3_get_impl_count(); id++) {
+ blake3_set_impl_id(id);
+ const char *name = blake3_get_impl_name();
+ BLAKE3_PERF_TEST(name, 256);
+ }
+
+ return (0);
+}
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/edonr_test.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/edonr_test.c
index c6365a4147e6..3a0a48533c53 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/edonr_test.c
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/edonr_test.c
@@ -28,9 +28,6 @@
* gettimeofday due to -D_KERNEL (we can do this since we're actually
* running in userspace, but we need -D_KERNEL for the remaining Edon-R code).
*/
-#ifdef _KERNEL
-#undef _KERNEL
-#endif
#include <sys/edonr.h>
#include <stdlib.h>
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/sha2_test.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/sha2_test.c
index dc4173e1059a..bb355311091e 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/sha2_test.c
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/sha2_test.c
@@ -28,9 +28,6 @@
* gettimeofday due to -D_KERNEL (we can do this since we're actually
* running in userspace, but we need -D_KERNEL for the remaining SHA2 code).
*/
-#ifdef _KERNEL
-#undef _KERNEL
-#endif
#include <stdarg.h>
#include <stdlib.h>
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/skein_test.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/skein_test.c
index 99b47b4532fc..13611c860c42 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/skein_test.c
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/checksum/skein_test.c
@@ -28,9 +28,6 @@
* gettimeofday due to -D_KERNEL (we can do this since we're actually
* running in userspace, but we need -D_KERNEL for the remaining Skein code).
*/
-#ifdef _KERNEL
-#undef _KERNEL
-#endif
#include <sys/skein.h>
#include <stdlib.h>
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
index 9dc2b4d0e08b..47357dca57fb 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
@@ -120,10 +120,12 @@ export SYSTEM_FILES_FREEBSD='chflags
showmount
swapctl
sysctl
+ trim
uncompress'
export SYSTEM_FILES_LINUX='attr
blkid
+ blkdiscard
blockdev
chattr
exportfs
@@ -144,11 +146,13 @@ export SYSTEM_FILES_LINUX='attr
mkswap
modprobe
mpstat
+ nsenter
parted
perf
setfattr
sha256sum
udevadm
+ unshare
useradd
userdel
usermod
@@ -212,6 +216,7 @@ export ZFSTEST_FILES='badsend
zed_fd_spill-zedlet
suid_write_to_file
cp_files
+ blake3_test
edonr_test
skein_test
sha2_test
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
index 51d4e225f10f..cb20318f44c5 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
@@ -2770,20 +2770,22 @@ function is_te_enabled
svcs -H -o state labeld 2>/dev/null | grep -q "enabled"
}
+# Return the number of CPUs (cross-platform)
+function get_num_cpus
+{
+ if is_linux ; then
+ grep -c '^processor' /proc/cpuinfo
+ elif is_freebsd; then
+ sysctl -n kern.smp.cpus
+ else
+ psrinfo | wc -l
+ fi
+}
+
# Utility function to determine if a system has multiple cpus.
function is_mp
{
- case "$UNAME" in
- Linux)
- (($(grep -c '^processor' /proc/cpuinfo) > 1))
- ;;
- FreeBSD)
- sysctl -n kern.smp.cpus
- ;;
- *)
- (($(psrinfo | wc -l) > 1))
- ;;
- esac
+ [[ $(get_num_cpus) -gt 1 ]]
}
function get_cpu_freq
@@ -3320,14 +3322,23 @@ function get_tunable_impl
{
typeset name="$1"
typeset module="${2:-zfs}"
+ typeset check_only="$3"
eval "typeset tunable=\$$name"
case "$tunable" in
UNSUPPORTED)
- log_unsupported "Tunable '$name' is unsupported on $UNAME"
+ if [ -z "$check_only" ] ; then
+ log_unsupported "Tunable '$name' is unsupported on $UNAME"
+ else
+ return 1
+ fi
;;
"")
- log_fail "Tunable '$name' must be added to tunables.cfg"
+ if [ -z "$check_only" ] ; then
+ log_fail "Tunable '$name' must be added to tunables.cfg"
+ else
+ return 1
+ fi
;;
*)
;;
@@ -3347,6 +3358,14 @@ function get_tunable_impl
esac
}
+# Does a tunable exist?
+#
+# $1: Tunable name
+function tunable_exists
+{
+ get_tunable_impl $1 "zfs" 1
+}
+
#
# Compute MD5 digest for given file or stdin if no file given.
# Note: file path must not contain spaces
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/properties.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/properties.shlib
index ba82f96202b2..14b3f4415b7d 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/properties.shlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/properties.shlib
@@ -17,7 +17,7 @@
typeset -a compress_prop_vals=('off' 'lzjb' 'lz4' 'gzip' 'zle' 'zstd')
typeset -a checksum_prop_vals=('on' 'off' 'fletcher2' 'fletcher4' 'sha256'
- 'noparity' 'sha512' 'skein')
+ 'noparity' 'sha512' 'skein' 'blake3')
if ! is_freebsd; then
checksum_prop_vals+=('edonr')
fi
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
index d3838cb7c8ed..d6a2fe5db7c6 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
@@ -87,6 +87,7 @@ VDEV_VALIDATE_SKIP vdev.validate_skip vdev_validate_skip
VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
VOL_MODE vol.mode zvol_volmode
VOL_RECURSIVE vol.recursive UNSUPPORTED
+VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
XATTR_COMPAT xattr_compat zfs_xattr_compat
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
index a91a24d16680..e65a8bba2c2c 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@@ -545,6 +545,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/checksum/cleanup.ksh \
functional/checksum/filetest_001_pos.ksh \
functional/checksum/filetest_002_pos.ksh \
+ functional/checksum/run_blake3_test.ksh \
functional/checksum/run_edonr_test.ksh \
functional/checksum/run_sha2_test.ksh \
functional/checksum/run_skein_test.ksh \
@@ -1894,6 +1895,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/user_namespace/cleanup.ksh \
functional/user_namespace/setup.ksh \
functional/user_namespace/user_namespace_001.ksh \
+ functional/user_namespace/user_namespace_002.ksh \
+ functional/user_namespace/user_namespace_003.ksh \
+ functional/user_namespace/user_namespace_004.ksh \
functional/userquota/cleanup.ksh \
functional/userquota/groupspace_001_pos.ksh \
functional/userquota/groupspace_002_pos.ksh \
@@ -1965,11 +1969,16 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/zvol/zvol_misc/zvol_misc_004_pos.ksh \
functional/zvol/zvol_misc/zvol_misc_005_neg.ksh \
functional/zvol/zvol_misc/zvol_misc_006_pos.ksh \
+ functional/zvol/zvol_misc/zvol_misc_fua.ksh \
functional/zvol/zvol_misc/zvol_misc_hierarchy.ksh \
functional/zvol/zvol_misc/zvol_misc_rename_inuse.ksh \
functional/zvol/zvol_misc/zvol_misc_snapdev.ksh \
+ functional/zvol/zvol_misc/zvol_misc_trim.ksh \
functional/zvol/zvol_misc/zvol_misc_volmode.ksh \
functional/zvol/zvol_misc/zvol_misc_zil.ksh \
+ functional/zvol/zvol_stress/cleanup.ksh \
+ functional/zvol/zvol_stress/setup.ksh \
+ functional/zvol/zvol_stress/zvol_stress.ksh \
functional/zvol/zvol_swap/cleanup.ksh \
functional/zvol/zvol_swap/setup.ksh \
functional/zvol/zvol_swap/zvol_swap_001_pos.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/checksum/default.cfg b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/checksum/default.cfg
index afb956093d8a..a7e143e75ea3 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/checksum/default.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/checksum/default.cfg
@@ -30,4 +30,4 @@
. $STF_SUITE/include/libtest.shlib
-set -A CHECKSUM_TYPES "fletcher2" "fletcher4" "sha256" "sha512" "skein" "edonr"
+set -A CHECKSUM_TYPES "fletcher2" "fletcher4" "blake3" "sha256" "sha512" "skein" "edonr"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/checksum/run_blake3_test.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/checksum/run_blake3_test.ksh
new file mode 100755
index 000000000000..cf1ca70328e1
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/checksum/run_blake3_test.ksh
@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# Run the tests for the BLAKE3 hash algorithm.
+#
+
+log_assert "Run the tests for the BLAKE3 hash algorithm."
+
+freq=$(get_cpu_freq)
+log_must blake3_test $freq
+
+log_pass "BLAKE3 tests passed."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/checksum_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/checksum_001_pos.ksh
index 27003b21b556..cab7c185e16a 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/checksum_001_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/checksum_001_pos.ksh
@@ -46,7 +46,7 @@
verify_runnable "both"
set -A dataset "$TESTPOOL" "$TESTPOOL/$TESTFS" "$TESTPOOL/$TESTVOL"
-set -A values "on" "off" "fletcher2" "fletcher4" "sha256" "sha512" "skein" "edonr" "noparity"
+set -A values "on" "off" "fletcher2" "fletcher4" "sha256" "sha512" "skein" "edonr" "blake3" "noparity"
log_assert "Setting a valid checksum on a file system, volume," \
"it should be successful."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
index 4ea5725e040e..7849ed22634e 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
@@ -99,5 +99,6 @@ if is_linux || is_freebsd; then
"feature@zstd_compress"
"feature@zilsaxattr"
"feature@head_errlog"
+ "feature@blake3"
)
fi
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh
index 3d19c4273e24..39aad91d0c61 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh
@@ -47,6 +47,11 @@ function cleanup
done
}
+unshare -Urm echo test
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to create user namespace"
+fi
+
log_onexit cleanup
log_assert "Check root in user namespaces"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh
new file mode 100755
index 000000000000..a5f76014ab85
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh
@@ -0,0 +1,115 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib
+
+#
+# DESCRIPTION:
+# Regression test for delegation of datasets to user namespaces.
+#
+# STRATEGY:
+# 1. Delegate a dataset to a user namespace.
+# 2. Check that 'zfs list' is only able to see inside the delegation.
+# 3. Check that 'zfs create' is able to create only inside the delegation.
+# 4. Check that the filesystems can be mounted inside the delegation,
+# and that file permissions are appropriate.
+# 5. Check that 'zfs destroy' is able to destroy only inside the delegation.
+# 6. Check that 'zfs unzone' has a desirable effect.
+#
+
+verify_runnable "both"
+
+user_ns_cleanup() {
+ if [ -n "$proc_ns_added" ]; then
+ log_must zfs unzone $proc_ns_added $TESTPOOL/userns
+ fi
+ if [ -n "$unshared_pid" ]; then
+ kill -9 $unshared_pid
+ # Give it a sec to make the global cleanup more reliable.
+ sleep 1
+ fi
+ log_must zfs destroy -r $TESTPOOL/userns
+}
+
+log_onexit user_ns_cleanup
+
+log_assert "Check zfs/zpool command delegation in user namespaces"
+
+# Create the baseline datasets.
+log_must zfs create -o zoned=on $TESTPOOL/userns
+log_must zfs create -o zoned=on $TESTPOOL/userns/testds
+# Partial match should be denied; hence we also set this to be 'zoned'.
+log_must zfs create -o zoned=on $TESTPOOL/user
+
+# 1. Create a user namespace with a cloned mount namespace, then delegate.
+unshare -Urm echo test
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to create user namespace"
+fi
+unshare -Urm /usr/bin/sleep 1h &
+unshared_pid=$!
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to create user namespace"
+fi
+proc_ns=/proc/$unshared_pid/ns/user
+sleep 2 # Wait for unshare to acquire user namespace
+log_note "unshare: child=${unshared_pid} proc_ns=${proc_ns}"
+
+NSENTER="nsenter -t $unshared_pid --all"
+
+$NSENTER echo test
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to enter user namespace"
+fi
+
+# 1b. Pre-test by checking that 'zone' does something new.
+list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')"
+log_must test -z "$list"
+log_must zfs zone $proc_ns $TESTPOOL/userns
+proc_ns_added="$ns"
+
+# 2. 'zfs list'
+list="$($NSENTER zfs list -r -H -o name $TESTPOOL | tr '\n' ' ')"
+log_must test "$list" = "$TESTPOOL $TESTPOOL/userns $TESTPOOL/userns/testds "
+
+# 3. 'zfs create'
+log_must $NSENTER zfs create $TESTPOOL/userns/created
+log_mustnot $NSENTER zfs create $TESTPOOL/user/created
+
+# 4. Check file permissions (create mounts the filesystem). The 'permissions'
+# check is simply, does it get mapped to user namespace's root/root?
+log_must $NSENTER df -h /$TESTPOOL/userns/created
+log_must $NSENTER mkfile 8192 /$TESTPOOL/userns/created/testfile
+uidgid=$($NSENTER stat -c '%u %g' /$TESTPOOL/userns/created/testfile)
+log_must test "${uidgid}" = "0 0"
+
+# 5. 'zfs destroy'
+log_must $NSENTER zfs destroy $TESTPOOL/userns/created
+log_mustnot $NSENTER zfs destroy $TESTPOOL/user
+
+# 6. 'zfs unzone' should have an effect
+log_must zfs unzone $proc_ns $TESTPOOL/userns
+proc_ns_added=""
+list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')"
+log_must test -z "$list"
+
+log_pass "Check zfs/zpool command delegation in user namespaces"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh
new file mode 100755
index 000000000000..20a7f6677d20
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh
@@ -0,0 +1,97 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib
+
+#
+# DESCRIPTION:
+# Regression test for delegation of datasets to user namespaces.
+#
+# STRATEGY:
+# 1. Delegate two datasets with distinctive names to a user namespace.
+# 2. Check that 'zfs list' is not able to see datasets outside of the
+# delegation, which have a prefix matching one of the delegated sets.
+# Also, check that all the delegated sets are visible.
+#
+
+verify_runnable "both"
+
+user_ns_cleanup() {
+ if [ -n "$proc_ns_added" ]; then
+ log_must zfs unzone $proc_ns_added $TESTPOOL/userns
+ log_must zfs unzone $proc_ns_added $TESTPOOL/otheruserns
+ fi
+ if [ -n "$unshared_pid" ]; then
+ kill -9 $unshared_pid
+ # Give it a sec to make the global cleanup more reliable.
+ sleep 1
+ fi
+ log_must zfs destroy -r $TESTPOOL/userns
+ log_must zfs destroy -r $TESTPOOL/usernsisitnot
+ log_must zfs destroy -r $TESTPOOL/otheruserns
+}
+
+log_onexit user_ns_cleanup
+
+log_assert "Check zfs list command handling of dataset visibility in user namespaces"
+
+# Create the baseline dataset.
+log_must zfs create -o zoned=on $TESTPOOL/userns
+# Datasets with a prefix matching the delegated dataset should not be
+# automatically considered visible.
+log_must zfs create -o zoned=on $TESTPOOL/usernsisitnot
+# All delegated datasets should be visible.
+log_must zfs create -o zoned=on $TESTPOOL/otheruserns
+
+# 1. Create a user namespace with a cloned mount namespace, then delegate.
+unshare -Urm echo test
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to create user namespace"
+fi
+unshare -Urm /usr/bin/sleep 1h &
+unshared_pid=$!
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to create user namespace"
+fi
+proc_ns=/proc/$unshared_pid/ns/user
+sleep 2 # Wait for unshare to acquire user namespace
+log_note "unshare: child=${unshared_pid} proc_ns=${proc_ns}"
+
+NSENTER="nsenter -t $unshared_pid --all"
+
+$NSENTER echo test
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to enter user namespace"
+fi
+
+# 1b. Pre-test by checking that 'zone' does something new.
+list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')"
+log_must test -z "$list"
+log_must zfs zone $proc_ns $TESTPOOL/userns
+log_must zfs zone $proc_ns $TESTPOOL/otheruserns
+proc_ns_added="$ns"
+
+# 2. 'zfs list'
+list="$($NSENTER zfs list -r -H -o name $TESTPOOL | tr '\n' ' ')"
+log_must test "$list" = "$TESTPOOL $TESTPOOL/otheruserns $TESTPOOL/userns "
+
+log_pass "Check zfs list command handling of dataset visibility in user namespaces"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh
new file mode 100755
index 000000000000..6edb0413c98a
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh
@@ -0,0 +1,67 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib
+
+#
+# DESCRIPTION:
+# Regression test for safeguards around the delegation of datasets to
+# user namespaces.
+#
+# STRATEGY:
+# 1. Check that 'zfs zone' correctly handles the case of the first
+# argument being a non-namespace file.
+# 2. Check that 'zfs zone' correctly handles the case of the first
+# argument being a non-namespace and non-existent file.
+#
+
+verify_runnable "both"
+
+user_ns_cleanup() {
+ if [ -n "$temp_file" ]; then
+ log_must rm -f "$temp_file"
+ fi
+
+ log_must zfs destroy -r "$TESTPOOL/userns"
+}
+
+log_onexit user_ns_cleanup
+
+log_assert "Check zfs zone command handling of non-namespace files"
+
+# Pass if user namespaces are not supported.
+unshare -Urm echo test
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to create user namespace"
+fi
+
+# Create the baseline datasets.
+log_must zfs create -o zoned=on "$TESTPOOL/userns"
+
+# 1. Try to pass a non-namespace file to zfs zone.
+temp_file="$(TMPDIR=$TEST_BASE_DIR mktemp)"
+log_mustnot zfs zone "$temp_file" "$TESTPOOL/userns"
+
+# 2. Try to pass a non-namespace and non-existent file to zfs zone.
+log_mustnot zfs zone "$TEMP_BASE_DIR/nonexistent" "$TESTPOOL/userns"
+
+log_pass "Check zfs zone command handling of non-namespace files"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_common.shlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_common.shlib
index c0fd90f58eaf..c04559fe337b 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_common.shlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_common.shlib
@@ -128,3 +128,14 @@ function is_zvol_dumpified
zdb -dddd $volume 2 | grep -q "dumpsize"
}
+
+# enable/disable blk-mq (if available)
+#
+# $1: 1 = enable, 0 = disable
+function set_blk_mq
+{
+ # Not all kernels support blk-mq
+ if tunable_exists VOL_USE_BLK_MQ ; then
+ log_must set_tunable32 VOL_USE_BLK_MQ $1
+ fi
+}
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh
new file mode 100755
index 000000000000..e44107030f3c
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh
@@ -0,0 +1,96 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2022 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/zvol/zvol_common.shlib
+
+#
+# DESCRIPTION:
+# Verify that a zvol Force Unit Access (FUA) write works.
+#
+# STRATEGY:
+# 1. dd write 5MB of data with "oflag=dsync,direct" to a zvol. Those flags
+# together do a FUA write.
+# 3. Verify the data is correct.
+# 3. Repeat 1-2 for both the blk-mq and non-blk-mq cases.
+
+verify_runnable "global"
+
+if ! is_physical_device $DISKS; then
+ log_unsupported "This directory cannot be run on raw files."
+fi
+
+if ! is_linux ; then
+ log_unsupported "Only linux supports dd with oflag=dsync for FUA writes"
+fi
+
+typeset datafile1="$(mktemp zvol_misc_fua1.XXXXXX)"
+typeset datafile2="$(mktemp zvol_misc_fua2.XXXXXX)"
+typeset zvolpath=${ZVOL_DEVDIR}/$TESTPOOL/$TESTVOL
+
+function cleanup
+{
+ rm "$datafile1" "$datafile2"
+}
+
+function do_test {
+ # Wait for udev to create symlinks to our zvol
+ block_device_wait $zvolpath
+
+ # Create a data file
+ log_must dd if=/dev/urandom of="$datafile1" bs=1M count=5
+
+ # Write the data to our zvol using FUA
+ log_must dd if=$datafile1 of=$zvolpath oflag=dsync,direct bs=1M count=5
+
+ # Extract data from our zvol
+ log_must dd if=$zvolpath of="$datafile2" bs=1M count=5
+
+ # Compare the data we expect with what's on our zvol. diff will return
+ # non-zero if they differ.
+ log_must diff $datafile1 $datafile2
+
+ log_must rm $datafile1 $datafile2
+}
+
+log_assert "Verify that a ZFS volume can do Force Unit Access (FUA)"
+log_onexit cleanup
+
+log_must zfs set compression=off $TESTPOOL/$TESTVOL
+
+log_note "Testing without blk-mq"
+
+set_blk_mq 0
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+do_test
+
+set_blk_mq 1
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+do_test
+
+log_pass "ZFS volume FUA works"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh
new file mode 100755
index 000000000000..2e417a0e6676
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh
@@ -0,0 +1,136 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2022 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/include/math.shlib
+. $STF_SUITE/tests/functional/zvol/zvol_common.shlib
+
+#
+# DESCRIPTION:
+# Verify we can TRIM a zvol
+#
+# STRATEGY:
+# 1. TRIM the entire zvol to remove data from older tests
+# 2. Create a 5MB data file
+# 3. Write the file to the zvol
+# 4. Observe 5MB of used space on the zvol
+# 5. TRIM the first 1MB and last 2MB of the 5MB block of data.
+# 6. Observe 2MB of used space on the zvol
+# 7. Verify the trimmed regions are zero'd on the zvol
+
+verify_runnable "global"
+
+if is_linux ; then
+ # We need '--force' here since the prior tests may leave a filesystem
+ # on the zvol, and blkdiscard will see that filesystem and print a
+ # warning unless you force it.
+ #
+ # Only blkdiscard >= v2.36 supports --force, so we need to
+ # check for it.
+ if blkdiscard --help | grep -q '\-\-force' ; then
+ trimcmd='blkdiscard --force'
+ else
+ trimcmd='blkdiscard'
+ fi
+else
+ # By default, FreeBSD 'trim' always does a dry-run. '-f' makes
+ # it perform the actual operation.
+ trimcmd='trim -f'
+fi
+
+if ! is_physical_device $DISKS; then
+ log_unsupported "This directory cannot be run on raw files."
+fi
+
+typeset datafile1="$(mktemp zvol_misc_flags1.XXXXXX)"
+typeset datafile2="$(mktemp zvol_misc_flags2.XXXXXX)"
+typeset zvolpath=${ZVOL_DEVDIR}/$TESTPOOL/$TESTVOL
+
+function cleanup
+{
+ rm "$datafile1" "$datafile2"
+}
+
+function do_test {
+ # Wait for udev to create symlinks to our zvol
+ block_device_wait $zvolpath
+
+ # Create a data file
+ log_must dd if=/dev/urandom of="$datafile1" bs=1M count=5
+
+ # Write to zvol
+ log_must dd if=$datafile1 of=$zvolpath conv=fsync
+
+ # Record how much space we've used (should be 5MB, with 128k
+ # of tolerance).
+ before="$(get_prop refer $TESTPOOL/$TESTVOL)"
+ log_must within_tolerance $before 5242880 131072
+
+ # We currently have 5MB of random data on the zvol.
+ # Trim the first 1MB and also trim 2MB at offset 3MB.
+ log_must $trimcmd -l $((1 * 1048576)) $zvolpath
+ log_must $trimcmd -o $((3 * 1048576)) -l $((2 * 1048576)) $zvolpath
+ sync_pool
+
+ # After trimming 3MB, the zvol should have 2MB of data (with 128k of
+ # tolerance).
+ after="$(get_prop refer $TESTPOOL/$TESTVOL)"
+ log_must within_tolerance $after 2097152 131072
+
+ # Make the same holes in our test data
+ log_must dd if=/dev/zero of="$datafile1" bs=1M count=1 conv=notrunc
+ log_must dd if=/dev/zero of="$datafile1" bs=1M count=2 seek=3 conv=notrunc
+
+ # Extract data from our zvol
+ log_must dd if=$zvolpath of="$datafile2" bs=1M count=5
+
+ # Compare the data we expect with what's on our zvol. diff will return
+ # non-zero if they differ.
+ log_must diff $datafile1 $datafile2
+
+ log_must rm $datafile1 $datafile2
+}
+
+log_assert "Verify that a ZFS volume can be TRIMed"
+log_onexit cleanup
+
+log_must zfs set compression=off $TESTPOOL/$TESTVOL
+
+# Remove old data from previous tests
+log_must $trimcmd $zvolpath
+
+
+set_blk_mq 1
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+do_test
+
+set_blk_mq 0
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+do_test
+
+log_pass "ZFS volumes can be trimmed"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/cleanup.ksh
new file mode 100755
index 000000000000..b81a372638e3
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/cleanup.ksh
@@ -0,0 +1,36 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "global"
+
+default_cleanup
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/setup.ksh
new file mode 100755
index 000000000000..9e70fc47b89b
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/setup.ksh
@@ -0,0 +1,36 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "global"
+
+default_setup "$DISKS"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh
new file mode 100755
index 000000000000..c1aadcac3bf5
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh
@@ -0,0 +1,169 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2022 by Lawrence Livermore National Security, LLC.
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/reservation/reservation.shlib
+. $STF_SUITE/tests/functional/zvol/zvol_common.shlib
+
+#
+# DESCRIPTION:
+# Stress test multithreaded transfers to multiple zvols. Also verify
+# zvol errors show up in zpool status.
+#
+# STRATEGY:
+#
+# For both the normal submit_bio() codepath and the blk-mq codepath, do
+# the following:
+#
+# 1. Create one zvol per CPU
+# 2. In parallel, spawn an fio "write and verify" for each zvol
+# 3. Inject write errors
+# 4. Write to one of the zvols with dd and verify the errors
+#
+
+verify_runnable "global"
+
+num_zvols=$(get_num_cpus)
+
+# If we were making one big zvol from all the pool space, it would
+# be this big:
+biggest_zvol_size_possible=$(largest_volsize_from_pool $TESTPOOL)
+
+# Crude calculation: take the biggest zvol size we could possibly
+# create, knock 10% off it (for overhead) and divide by the number
+# of ZVOLs we want to make.
+#
+# Round the value using a printf
+typeset -f each_zvol_size=$(( floor($biggest_zvol_size_possible * 0.9 / \
+ $num_zvols )))
+
+typeset tmpdir="$(mktemp -d zvol_stress_fio_state.XXXXXX)"
+
+function create_zvols
+{
+ log_note "Creating $num_zvols zvols that are ${each_zvol_size}B each"
+ for i in $(seq $num_zvols) ; do
+ log_must zfs create -V $each_zvol_size $TESTPOOL/testvol$i
+ block_device_wait "$ZVOL_DEVDIR/$TESTPOOL/testvol$i"
+ done
+}
+
+function destroy_zvols
+{
+ for i in $(seq $num_zvols) ; do
+ log_must_busy zfs destroy $TESTPOOL/testvol$i
+ done
+}
+
+function do_zvol_stress
+{
+ # Write 10% of each zvol, or 50MB, whichever is less
+ zvol_write_size=$((each_zvol_size / 10))
+ if [ $zvol_write_size -gt $((50 * 1048576)) ] ; then
+ zvol_write_size=$((50 * 1048576))
+ fi
+ zvol_write_size_mb=$(($zvol_write_size / 1048576))
+
+ if is_linux ; then
+ engine=libaio
+ else
+ engine=psync
+ fi
+
+ # Spawn off one fio per zvol in parallel
+ pids=""
+ for i in $(seq $num_zvols) ; do
+ # Spawn one fio per zvol as its own process
+ fio --ioengine=$engine --name=zvol_stress$i --direct=0 \
+ --filename="$ZVOL_DEVDIR/$TESTPOOL/testvol$i" --bs=1048576 \
+ --iodepth=10 --readwrite=randwrite --size=${zvol_write_size} \
+ --verify_async=2 --numjobs=1 --verify=sha1 \
+ --verify_fatal=1 \
+ --continue_on_error=none \
+ --error_dump=1 \
+ --exitall_on_error \
+ --aux-path="$tmpdir" --do_verify=1 &
+ pids="$pids $!"
+ done
+
+ # Wait for all the spawned fios to finish and look for errors
+ fail=""
+ i=0
+ for pid in $pids ; do
+ log_note "$s waiting on $pid"
+ if ! wait $pid ; then
+ log_fail "fio error on $TESTPOOL/testvol$i"
+ fi
+ i=$(($i + 1))
+ done
+}
+
+function cleanup
+{
+ log_must zinject -c all
+ log_must zpool clear $TESTPOOL
+ destroy_zvols
+ set_blk_mq 0
+
+ # Remove all fio's leftover state files
+ if [ -n "$tmpdir" ] ; then
+ log_must rm -fd "$tmpdir"/*.state "$tmpdir"
+ fi
+}
+
+log_onexit cleanup
+
+log_assert "Stress test zvols"
+
+set_blk_mq 0
+create_zvols
+# Do some fio write/verifies in parallel
+do_zvol_stress
+destroy_zvols
+
+# Enable blk-mq (block multi-queue), and re-run the same test
+set_blk_mq 1
+create_zvols
+do_zvol_stress
+
+# Inject some errors, and verify we see some IO errors in zpool status
+for DISK in $DISKS ; do
+ log_must zinject -d $DISK -f 10 -e io -T write $TESTPOOL
+done
+log_must dd if=/dev/zero of=$ZVOL_DEVDIR/$TESTPOOL/testvol1 bs=512 count=50
+log_must zinject -c all
+
+# We should see write errors
+typeset -i write_errors=$(zpool status -p | awk '
+ !NF { isvdev = 0 }
+ isvdev { errors += $4 }
+ /CKSUM$/ { isvdev = 1 }
+ END { print errors }
+')
+
+if [ $write_errors -eq 0 ] ; then
+ log_fail "Expected to see some write errors"
+else
+ log_note "Correctly saw $write_errors write errors"
+fi
+log_pass "Done with zvol_stress"