From 0ae142b64039ba741ff69605d2a110074454a0ce Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Mon, 7 Nov 2022 18:09:41 +0000 Subject: [PATCH 1/8] nvpair: Always use XDR encoding Recievers are agnostic to stream format and just call nvpair_unpack or nvpair_xunpack which decode what ever stream type is received. It is easier to make a system with different unpacked sizes work with XDR than with native where streams are unpacked in place. This change provides forward compatability with systems were the size of pointers exceeds 64-bits. For example, a CheriBSD pure-capability kernel targeting Morello should be able to work with a FreeBSD aarch64 zfs or zpool command build with this change applied. Signed-off-by: Brooks Davis --- lib/libzfs/libzfs_dataset.c | 4 ++-- lib/libzfs/libzfs_util.c | 2 +- module/nvpair/fnvpair.c | 6 +++--- module/nvpair/nvpair.c | 3 +-- module/zfs/fm.c | 4 ++-- module/zfs/spa_history.c | 2 +- 6 files changed, 10 insertions(+), 11 deletions(-) diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 87bc4ea66c..ff2212cd8c 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -5218,12 +5218,12 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl) assert(zhp->zfs_type == ZFS_TYPE_VOLUME || zhp->zfs_type == ZFS_TYPE_FILESYSTEM); - err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE); + err = nvlist_size(nvl, &nvsz, NV_ENCODE_XDR); assert(err == 0); nvbuf = malloc(nvsz); - err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0); + err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_XDR, 0); assert(err == 0); zc.zc_nvlist_src_size = nvsz; diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index b4679dbb36..86ed001ad7 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -1207,7 +1207,7 @@ zcmd_write_nvlist_com(libzfs_handle_t *hdl, uint64_t *outnv, uint64_t *outlen, size_t len = fnvlist_size(nvl); packed = zfs_alloc(hdl, len); - verify(nvlist_pack(nvl, &packed, &len, NV_ENCODE_NATIVE, 0) == 0); + verify(nvlist_pack(nvl, &packed, &len, NV_ENCODE_XDR, 0) == 0); *outnv = (uint64_t)(uintptr_t)packed; *outlen = len; diff --git a/module/nvpair/fnvpair.c b/module/nvpair/fnvpair.c index 86b0b4cdfc..effc4594cc 100644 --- a/module/nvpair/fnvpair.c +++ b/module/nvpair/fnvpair.c @@ -40,7 +40,7 @@ * functions, which can return the requested value (rather than filling in * a pointer). * - * These functions use NV_UNIQUE_NAME, encoding NV_ENCODE_NATIVE, and allocate + * These functions use NV_UNIQUE_NAME, encoding NV_ENCODE_XDR, and allocate * with KM_SLEEP. * * More wrappers should be added as needed -- for example @@ -65,7 +65,7 @@ size_t fnvlist_size(nvlist_t *nvl) { size_t size; - VERIFY0(nvlist_size(nvl, &size, NV_ENCODE_NATIVE)); + VERIFY0(nvlist_size(nvl, &size, NV_ENCODE_XDR)); return (size); } @@ -77,7 +77,7 @@ char * fnvlist_pack(nvlist_t *nvl, size_t *sizep) { char *packed = 0; - VERIFY3U(nvlist_pack(nvl, &packed, sizep, NV_ENCODE_NATIVE, + VERIFY3U(nvlist_pack(nvl, &packed, sizep, NV_ENCODE_XDR, KM_SLEEP), ==, 0); return (packed); } diff --git a/module/nvpair/nvpair.c b/module/nvpair/nvpair.c index c8161f116b..18301150d1 100644 --- a/module/nvpair/nvpair.c +++ b/module/nvpair/nvpair.c @@ -2753,8 +2753,7 @@ nvlist_xunpack(char *buf, size_t buflen, nvlist_t **nvlp, nv_alloc_t *nva) if ((err = nvlist_xalloc(&nvl, 0, nva)) != 0) return (err); - if ((err = nvlist_common(nvl, buf, &buflen, NV_ENCODE_NATIVE, - NVS_OP_DECODE)) != 0) + if ((err = nvlist_common(nvl, buf, &buflen, -1, NVS_OP_DECODE)) != 0) nvlist_free(nvl); else *nvlp = nvl; diff --git a/module/zfs/fm.c b/module/zfs/fm.c index 3f05d75977..bf8458b768 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -226,7 +226,7 @@ zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) goto out; } - error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE); + error = nvlist_size(nvl, &nvl_size, NV_ENCODE_XDR); if (error) { atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); goto out; @@ -337,7 +337,7 @@ zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size, } } - VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0); + VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_XDR) == 0); if (size > *event_size) { *event_size = size; error = ENOMEM; diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c index 6d468e7164..508a329614 100644 --- a/module/zfs/spa_history.c +++ b/module/zfs/spa_history.c @@ -330,7 +330,7 @@ spa_history_log_sync(void *arg, dmu_tx_t *tx) fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL)); } - VERIFY3U(nvlist_pack(nvl, &record_packed, &reclen, NV_ENCODE_NATIVE, + VERIFY3U(nvlist_pack(nvl, &record_packed, &reclen, NV_ENCODE_XDR, KM_SLEEP), ==, 0); mutex_enter(&spa->spa_history_lock); From 6cbd8f4289bbc406531845cfe4f838162b1303b9 Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Fri, 18 Nov 2022 00:47:05 +0000 Subject: [PATCH 2/8] Don't switch fm.c to NV_ENCODE_XDR nvlist_size is being used to determine event memory use rather than to size a buffer and AFACT events never leave the kernel. --- module/zfs/fm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module/zfs/fm.c b/module/zfs/fm.c index bf8458b768..3f05d75977 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -226,7 +226,7 @@ zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) goto out; } - error = nvlist_size(nvl, &nvl_size, NV_ENCODE_XDR); + error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE); if (error) { atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); goto out; @@ -337,7 +337,7 @@ zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size, } } - VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_XDR) == 0); + VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0); if (size > *event_size) { *event_size = size; error = ENOMEM; From db0d791f8333cf3d40ba027e9a0e6a3b38b35fcb Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Fri, 18 Nov 2022 00:54:36 +0000 Subject: [PATCH 3/8] Don't mix fnvlist_size and nvlist_pack fnvlist_size implies an encoding format while nvlist_pack takes an explict one. --- lib/libzfs/libzfs_util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 86ed001ad7..8ea08bdf15 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -1203,8 +1203,9 @@ zcmd_write_nvlist_com(libzfs_handle_t *hdl, uint64_t *outnv, uint64_t *outlen, nvlist_t *nvl) { char *packed; + size_t len; - size_t len = fnvlist_size(nvl); + verify(nvlist_size(nvl, &len, NV_ENCODE_XDR) == 0); packed = zfs_alloc(hdl, len); verify(nvlist_pack(nvl, &packed, &len, NV_ENCODE_XDR, 0) == 0); From 8a30bd4541252a4ad3d586b4756cea437ab150bd Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Fri, 18 Nov 2022 17:54:23 +0000 Subject: [PATCH 4/8] Switch fnvpair back to native to assess impact --- module/nvpair/fnvpair.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/module/nvpair/fnvpair.c b/module/nvpair/fnvpair.c index effc4594cc..86b0b4cdfc 100644 --- a/module/nvpair/fnvpair.c +++ b/module/nvpair/fnvpair.c @@ -40,7 +40,7 @@ * functions, which can return the requested value (rather than filling in * a pointer). * - * These functions use NV_UNIQUE_NAME, encoding NV_ENCODE_XDR, and allocate + * These functions use NV_UNIQUE_NAME, encoding NV_ENCODE_NATIVE, and allocate * with KM_SLEEP. * * More wrappers should be added as needed -- for example @@ -65,7 +65,7 @@ size_t fnvlist_size(nvlist_t *nvl) { size_t size; - VERIFY0(nvlist_size(nvl, &size, NV_ENCODE_XDR)); + VERIFY0(nvlist_size(nvl, &size, NV_ENCODE_NATIVE)); return (size); } @@ -77,7 +77,7 @@ char * fnvlist_pack(nvlist_t *nvl, size_t *sizep) { char *packed = 0; - VERIFY3U(nvlist_pack(nvl, &packed, sizep, NV_ENCODE_XDR, + VERIFY3U(nvlist_pack(nvl, &packed, sizep, NV_ENCODE_NATIVE, KM_SLEEP), ==, 0); return (packed); } From 4003f9b805207cabe0479cfbc28f33fc6fc92d4c Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Fri, 18 Nov 2022 19:09:28 +0000 Subject: [PATCH 5/8] fnvlist: add _xdr variants of _size and _pack This allows code to be incrementally converted to use XDR packed nvlists while still using the simplified fnvlist API. Signed-off-by: Brooks Davis --- include/sys/nvpair.h | 2 ++ module/nvpair/fnvpair.c | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/include/sys/nvpair.h b/include/sys/nvpair.h index 09b3a81f7c..548e5a72e4 100644 --- a/include/sys/nvpair.h +++ b/include/sys/nvpair.h @@ -310,7 +310,9 @@ _SYS_NVPAIR_H int nvpair_value_double(const nvpair_t *, double *); _SYS_NVPAIR_H nvlist_t *fnvlist_alloc(void); _SYS_NVPAIR_H void fnvlist_free(nvlist_t *); _SYS_NVPAIR_H size_t fnvlist_size(nvlist_t *); +_SYS_NVPAIR_H size_t fnvlist_size_xdr(nvlist_t *); _SYS_NVPAIR_H char *fnvlist_pack(nvlist_t *, size_t *); +_SYS_NVPAIR_H char *fnvlist_pack_xdr(nvlist_t *, size_t *); _SYS_NVPAIR_H void fnvlist_pack_free(char *, size_t); _SYS_NVPAIR_H nvlist_t *fnvlist_unpack(char *, size_t); _SYS_NVPAIR_H nvlist_t *fnvlist_dup(const nvlist_t *); diff --git a/module/nvpair/fnvpair.c b/module/nvpair/fnvpair.c index 86b0b4cdfc..301346e73d 100644 --- a/module/nvpair/fnvpair.c +++ b/module/nvpair/fnvpair.c @@ -69,6 +69,14 @@ fnvlist_size(nvlist_t *nvl) return (size); } +size_t +fnvlist_size_xdr(nvlist_t *nvl) +{ + size_t size; + VERIFY0(nvlist_size(nvl, &size, NV_ENCODE_XDR)); + return (size); +} + /* * Returns allocated buffer of size *sizep. Caller must free the buffer with * fnvlist_pack_free(). @@ -82,6 +90,15 @@ fnvlist_pack(nvlist_t *nvl, size_t *sizep) return (packed); } +char * +fnvlist_pack_xdr(nvlist_t *nvl, size_t *sizep) +{ + char *packed = 0; + VERIFY3U(nvlist_pack(nvl, &packed, sizep, NV_ENCODE_XDR, + KM_SLEEP), ==, 0); + return (packed); +} + void fnvlist_pack_free(char *pack, size_t size) { From 4bfc16ced8456b2d662c54c2e6d06b508c63ac2d Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Fri, 18 Nov 2022 19:16:52 +0000 Subject: [PATCH 6/8] Store the pool config in XDR format --- module/zfs/spa_config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 5165c37040..60d1f1b6fd 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -206,7 +206,7 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) /* * Pack the configuration into a buffer. */ - buf = fnvlist_pack(nvl, &buflen); + buf = fnvlist_pack_xdr(nvl, &buflen); temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP); /* From aa876d6ec3f56b8b9250db6e6c3054400af7b689 Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Mon, 21 Nov 2022 17:47:47 +0000 Subject: [PATCH 7/8] Don't use XDR for resume tokens It appears this leads to performace regressions. --- module/zfs/dsl_dataset.c | 1 + 1 file changed, 1 insertion(+) diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 4da4effca6..7fb0f7b0ff 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -2439,6 +2439,7 @@ get_receive_resume_token_impl(dsl_dataset_t *ds) redact_snaps, num_redact_snaps); kmem_free(redact_snaps, int_size * num_redact_snaps); } + /* XXX: using XDR seems to cause performace problems */ packed = fnvlist_pack(token_nv, &packed_size); fnvlist_free(token_nv); compressed = kmem_alloc(packed_size, KM_SLEEP); From 1e36c5b9ff40914e47560134cabc8bc11b080063 Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Fri, 18 Nov 2022 22:52:16 +0000 Subject: [PATCH 8/8] Use XDR packing for sent streams --- module/zfs/dmu_send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index ccb7eb2075..47d977b1f7 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -2527,7 +2527,7 @@ dmu_send_impl(struct dmu_send_params *dspp) } if (!nvlist_empty(nvl)) { - payload = fnvlist_pack(nvl, &payload_len); + payload = fnvlist_pack_xdr(nvl, &payload_len); drr->drr_payloadlen = payload_len; }