From c71806c3b6795b5fa9bd6cbb186a5e875a0d8d8f Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Thu, 12 Oct 2023 03:31:11 +0500 Subject: [PATCH 1/6] zvol: Cleanup set property zvol_set_volmode() and zvol_set_snapdev() share a common code path. Merge this shared code path into zvol_set_common(). Signed-off-by: Ameer Hamza --- include/sys/zvol.h | 3 +- module/zfs/zfs_ioctl.c | 4 +- module/zfs/zvol.c | 137 +++++++++++------------------------------ 3 files changed, 37 insertions(+), 107 deletions(-) diff --git a/include/sys/zvol.h b/include/sys/zvol.h index 15a3034aef..5e92a3367b 100644 --- a/include/sys/zvol.h +++ b/include/sys/zvol.h @@ -50,8 +50,7 @@ extern int zvol_get_stats(objset_t *, nvlist_t *); extern boolean_t zvol_is_zvol(const char *); extern void zvol_create_cb(objset_t *, void *, cred_t *, dmu_tx_t *); extern int zvol_set_volsize(const char *, uint64_t); -extern int zvol_set_snapdev(const char *, zprop_source_t, uint64_t); -extern int zvol_set_volmode(const char *, zprop_source_t, uint64_t); +extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t); extern zvol_state_handle_t *zvol_suspend(const char *); extern int zvol_resume(zvol_state_handle_t *); extern void *zvol_tag(zvol_state_handle_t *); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 2738385e26..d780b9f3da 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -2524,10 +2524,8 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, err = zvol_set_volsize(dsname, intval); break; case ZFS_PROP_SNAPDEV: - err = zvol_set_snapdev(dsname, source, intval); - break; case ZFS_PROP_VOLMODE: - err = zvol_set_volmode(dsname, source, intval); + err = zvol_set_common(dsname, prop, source, intval); break; case ZFS_PROP_VERSION: { diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 53dcb4dee4..023a6386d6 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1541,6 +1541,7 @@ typedef struct zvol_set_prop_int_arg { const char *zsda_name; uint64_t zsda_value; zprop_source_t zsda_source; + zfs_prop_t zsda_prop; dmu_tx_t *zsda_tx; } zvol_set_prop_int_arg_t; @@ -1549,7 +1550,7 @@ typedef struct zvol_set_prop_int_arg { * conditions are imposed. */ static int -zvol_set_snapdev_check(void *arg, dmu_tx_t *tx) +zvol_set_common_check(void *arg, dmu_tx_t *tx) { zvol_set_prop_int_arg_t *zsda = arg; dsl_pool_t *dp = dmu_tx_pool(tx); @@ -1566,17 +1567,33 @@ zvol_set_snapdev_check(void *arg, dmu_tx_t *tx) } static int -zvol_set_snapdev_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) +zvol_set_common_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) { - (void) arg; + zvol_set_prop_int_arg_t *zsda = arg; char dsname[MAXNAMELEN]; zvol_task_t *task; - uint64_t snapdev; + uint64_t prop; + const char *prop_name = zfs_prop_to_name(zsda->zsda_prop); dsl_dataset_name(ds, dsname); - if (dsl_prop_get_int_ds(ds, "snapdev", &snapdev) != 0) + + if (dsl_prop_get_int_ds(ds, prop_name, &prop) != 0) return (0); - task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname, NULL, snapdev); + + switch (zsda->zsda_prop) { + case ZFS_PROP_VOLMODE: + task = zvol_task_alloc(ZVOL_ASYNC_SET_VOLMODE, dsname, + NULL, prop); + break; + case ZFS_PROP_SNAPDEV: + task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname, + NULL, prop); + break; + default: + task = NULL; + break; + } + if (task == NULL) return (0); @@ -1586,14 +1603,14 @@ zvol_set_snapdev_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) } /* - * Traverse all child datasets and apply snapdev appropriately. + * Traverse all child datasets and apply the property appropriately. * We call dsl_prop_set_sync_impl() here to set the value only on the toplevel - * dataset and read the effective "snapdev" on every child in the callback + * dataset and read the effective "property" on every child in the callback * function: this is because the value is not guaranteed to be the same in the * whole dataset hierarchy. */ static void -zvol_set_snapdev_sync(void *arg, dmu_tx_t *tx) +zvol_set_common_sync(void *arg, dmu_tx_t *tx) { zvol_set_prop_int_arg_t *zsda = arg; dsl_pool_t *dp = dmu_tx_pool(tx); @@ -1606,115 +1623,31 @@ zvol_set_snapdev_sync(void *arg, dmu_tx_t *tx) error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds); if (error == 0) { - dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_SNAPDEV), + dsl_prop_set_sync_impl(ds, zfs_prop_to_name(zsda->zsda_prop), zsda->zsda_source, sizeof (zsda->zsda_value), 1, &zsda->zsda_value, zsda->zsda_tx); dsl_dataset_rele(ds, FTAG); } - dmu_objset_find_dp(dp, dd->dd_object, zvol_set_snapdev_sync_cb, + + dmu_objset_find_dp(dp, dd->dd_object, zvol_set_common_sync_cb, zsda, DS_FIND_CHILDREN); dsl_dir_rele(dd, FTAG); } int -zvol_set_snapdev(const char *ddname, zprop_source_t source, uint64_t snapdev) +zvol_set_common(const char *ddname, zfs_prop_t prop, zprop_source_t source, + uint64_t val) { zvol_set_prop_int_arg_t zsda; zsda.zsda_name = ddname; zsda.zsda_source = source; - zsda.zsda_value = snapdev; + zsda.zsda_value = val; + zsda.zsda_prop = prop; - return (dsl_sync_task(ddname, zvol_set_snapdev_check, - zvol_set_snapdev_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE)); -} - -/* - * Sanity check the dataset for safe use by the sync task. No additional - * conditions are imposed. - */ -static int -zvol_set_volmode_check(void *arg, dmu_tx_t *tx) -{ - zvol_set_prop_int_arg_t *zsda = arg; - dsl_pool_t *dp = dmu_tx_pool(tx); - dsl_dir_t *dd; - int error; - - error = dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL); - if (error != 0) - return (error); - - dsl_dir_rele(dd, FTAG); - - return (error); -} - -static int -zvol_set_volmode_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) -{ - (void) arg; - char dsname[MAXNAMELEN]; - zvol_task_t *task; - uint64_t volmode; - - dsl_dataset_name(ds, dsname); - if (dsl_prop_get_int_ds(ds, "volmode", &volmode) != 0) - return (0); - task = zvol_task_alloc(ZVOL_ASYNC_SET_VOLMODE, dsname, NULL, volmode); - if (task == NULL) - return (0); - - (void) taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb, - task, TQ_SLEEP); - return (0); -} - -/* - * Traverse all child datasets and apply volmode appropriately. - * We call dsl_prop_set_sync_impl() here to set the value only on the toplevel - * dataset and read the effective "volmode" on every child in the callback - * function: this is because the value is not guaranteed to be the same in the - * whole dataset hierarchy. - */ -static void -zvol_set_volmode_sync(void *arg, dmu_tx_t *tx) -{ - zvol_set_prop_int_arg_t *zsda = arg; - dsl_pool_t *dp = dmu_tx_pool(tx); - dsl_dir_t *dd; - dsl_dataset_t *ds; - int error; - - VERIFY0(dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL)); - zsda->zsda_tx = tx; - - error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds); - if (error == 0) { - dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_VOLMODE), - zsda->zsda_source, sizeof (zsda->zsda_value), 1, - &zsda->zsda_value, zsda->zsda_tx); - dsl_dataset_rele(ds, FTAG); - } - - dmu_objset_find_dp(dp, dd->dd_object, zvol_set_volmode_sync_cb, - zsda, DS_FIND_CHILDREN); - - dsl_dir_rele(dd, FTAG); -} - -int -zvol_set_volmode(const char *ddname, zprop_source_t source, uint64_t volmode) -{ - zvol_set_prop_int_arg_t zsda; - - zsda.zsda_name = ddname; - zsda.zsda_source = source; - zsda.zsda_value = volmode; - - return (dsl_sync_task(ddname, zvol_set_volmode_check, - zvol_set_volmode_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE)); + return (dsl_sync_task(ddname, zvol_set_common_check, + zvol_set_common_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE)); } void From 1438208f3c4c55f6aa524495d68b6ac0a2dbf424 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Wed, 25 Oct 2023 02:53:27 +0500 Subject: [PATCH 2/6] zvol: Implement zvol threading as a Property Currently, zvol threading can be switched through the zvol_request_sync module parameter system-wide. By making it a zvol property, zvol threading can be switched per zvol. Signed-off-by: Ameer Hamza --- include/sys/fs/zfs.h | 1 + include/sys/zvol.h | 1 + include/sys/zvol_impl.h | 1 + lib/libzfs/libzfs.abi | 3 ++- man/man7/zfsprops.7 | 12 ++++++++++++ module/os/linux/zfs/zvol_os.c | 9 ++++++++- module/zcommon/zfs_prop.c | 3 +++ module/zfs/zfs_ioctl.c | 9 +++++++++ module/zfs/zvol.c | 14 ++++++++++++++ 9 files changed, 51 insertions(+), 2 deletions(-) diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index bc940e8a79..d8b6ff3fbd 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -191,6 +191,7 @@ typedef enum { ZFS_PROP_REDACTED, ZFS_PROP_REDACT_SNAPS, ZFS_PROP_SNAPSHOTS_CHANGED, + ZFS_PROP_VOLTHREADING, ZFS_NUM_PROPS } zfs_prop_t; diff --git a/include/sys/zvol.h b/include/sys/zvol.h index 5e92a3367b..c9eefbeb48 100644 --- a/include/sys/zvol.h +++ b/include/sys/zvol.h @@ -50,6 +50,7 @@ extern int zvol_get_stats(objset_t *, nvlist_t *); extern boolean_t zvol_is_zvol(const char *); extern void zvol_create_cb(objset_t *, void *, cred_t *, dmu_tx_t *); extern int zvol_set_volsize(const char *, uint64_t); +extern int zvol_set_volthreading(const char *, boolean_t); extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t); extern zvol_state_handle_t *zvol_suspend(const char *); extern int zvol_resume(zvol_state_handle_t *); diff --git a/include/sys/zvol_impl.h b/include/sys/zvol_impl.h index 3243917bcd..de4f6dec86 100644 --- a/include/sys/zvol_impl.h +++ b/include/sys/zvol_impl.h @@ -58,6 +58,7 @@ typedef struct zvol_state { atomic_t zv_suspend_ref; /* refcount for suspend */ krwlock_t zv_suspend_lock; /* suspend lock */ struct zvol_state_os *zv_zso; /* private platform state */ + boolean_t zv_threading; /* volthreading property */ } zvol_state_t; diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 2d612a16b2..fdc36487dc 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -1867,7 +1867,8 @@ - + + diff --git a/man/man7/zfsprops.7 b/man/man7/zfsprops.7 index f8554c81c8..069be5e383 100644 --- a/man/man7/zfsprops.7 +++ b/man/man7/zfsprops.7 @@ -1192,6 +1192,18 @@ are equivalent to the and .Sy noexec mount options. +.It Sy volthreading Ns = Ns Sy on Ns | Ns Sy off +Controls internal zvol threading. +The value +.Sy off +disables zvol threading, and zvol relies on application threads. +The default value is +.Sy on , +which enables threading within a zvol. +Please note that this property will be overridden by +.Sy zvol_request_sync +module parameter. +This property is only applicable to Linux. .It Sy filesystem_limit Ns = Ns Ar count Ns | Ns Sy none Limits the number of filesystems and volumes that can exist under this point in the dataset tree. diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index f94ce69fb9..a7bcda252f 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -512,7 +512,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, uint64_t size = io_size(bio, rq); int rw = io_data_dir(bio, rq); - if (zvol_request_sync) + if (zvol_request_sync || zv->zv_threading == B_FALSE) force_sync = 1; zv_request_t zvr = { @@ -1304,6 +1304,7 @@ zvol_os_create_minor(const char *name) int error = 0; int idx; uint64_t hash = zvol_name_hash(name); + uint64_t volthreading; bool replayed_zil = B_FALSE; if (zvol_inhibit_dev) @@ -1350,6 +1351,12 @@ zvol_os_create_minor(const char *name) zv->zv_volsize = volsize; zv->zv_objset = os; + /* Default */ + zv->zv_threading = B_TRUE; + if (dsl_prop_get_integer(name, "volthreading", &volthreading, NULL) + == 0) + zv->zv_threading = volthreading; + set_capacity(zv->zv_zso->zvo_disk, zv->zv_volsize >> 9); blk_queue_max_hw_sectors(zv->zv_zso->zvo_queue, diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 00b5393a89..f63c2ad2f6 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -611,6 +611,9 @@ zfs_prop_init(void) ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME, ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK", B_FALSE, sfeatures); + zprop_register_index(ZFS_PROP_VOLTHREADING, "volthreading", + 1, PROP_DEFAULT, ZFS_TYPE_VOLUME, "on | off", "zvol threading", + boolean_table, sfeatures); zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "USEDSNAP", B_FALSE, sfeatures); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index d780b9f3da..72d5fc8192 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -2523,6 +2523,15 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, case ZFS_PROP_VOLSIZE: err = zvol_set_volsize(dsname, intval); break; + case ZFS_PROP_VOLTHREADING: + err = zvol_set_volthreading(dsname, intval); + /* + * Set err to -1 to force the zfs_set_prop_nvlist code down the + * default path to set the value in the nvlist. + */ + if (err == 0) + err = -1; + break; case ZFS_PROP_SNAPDEV: case ZFS_PROP_VOLMODE: err = zvol_set_common(dsname, prop, source, intval); diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 023a6386d6..c576e34696 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -369,6 +369,20 @@ out: return (SET_ERROR(error)); } +/* + * Update volthreading. + */ +int +zvol_set_volthreading(const char *name, boolean_t value) +{ + zvol_state_t *zv = zvol_find_by_name(name, RW_NONE); + if (zv == NULL) + return (ENOENT); + zv->zv_threading = value; + mutex_exit(&zv->zv_state_lock); + return (0); +} + /* * Sanity check volume block size. */ From 510560c66f1a1b8156433db5dda2d331a6524f5a Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Wed, 18 Oct 2023 00:19:58 +0500 Subject: [PATCH 3/6] zvol: fix delayed update to block device ro entry The change in the zvol readonly property does not update the block device readonly entry until the first IO to the ZVOL. This patch addresses the issue by updating the block device readonly property from the set property IOCTL call. Signed-off-by: Ameer Hamza --- include/sys/zvol.h | 1 + module/zfs/zfs_ioctl.c | 9 +++++++++ module/zfs/zvol.c | 20 ++++++++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/include/sys/zvol.h b/include/sys/zvol.h index c9eefbeb48..c79fe1d9ad 100644 --- a/include/sys/zvol.h +++ b/include/sys/zvol.h @@ -52,6 +52,7 @@ extern void zvol_create_cb(objset_t *, void *, cred_t *, dmu_tx_t *); extern int zvol_set_volsize(const char *, uint64_t); extern int zvol_set_volthreading(const char *, boolean_t); extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t); +extern int zvol_set_ro(const char *, boolean_t); extern zvol_state_handle_t *zvol_suspend(const char *); extern int zvol_resume(zvol_state_handle_t *); extern void *zvol_tag(zvol_state_handle_t *); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 72d5fc8192..01e16e102f 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -2536,6 +2536,15 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, case ZFS_PROP_VOLMODE: err = zvol_set_common(dsname, prop, source, intval); break; + case ZFS_PROP_READONLY: + err = zvol_set_ro(dsname, intval); + /* + * Set err to -1 to force the zfs_set_prop_nvlist code down the + * default path to set the value in the nvlist. + */ + if (err == 0) + err = -1; + break; case ZFS_PROP_VERSION: { zfsvfs_t *zfsvfs; diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index c576e34696..e661c55d18 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -383,6 +383,26 @@ zvol_set_volthreading(const char *name, boolean_t value) return (0); } +/* + * Update zvol ro property. + */ +int +zvol_set_ro(const char *name, boolean_t value) +{ + zvol_state_t *zv = zvol_find_by_name(name, RW_NONE); + if (zv == NULL) + return (-1); + if (value) { + zvol_os_set_disk_ro(zv, 1); + zv->zv_flags |= ZVOL_RDONLY; + } else { + zvol_os_set_disk_ro(zv, 0); + zv->zv_flags &= ~ZVOL_RDONLY; + } + mutex_exit(&zv->zv_state_lock); + return (0); +} + /* * Sanity check volume block size. */ From 3870483ffe3fc5245f7ccca9c603c2be4dfdde86 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 12 Dec 2023 15:53:59 -0500 Subject: [PATCH 4/6] dbuf: Handle arcbuf assignment after block cloning In some cases dbuf_assign_arcbuf() may be called on a block that was recently cloned. If it happened in current TXG we must undo the block cloning first, since the only one dirty record per TXG can't and shouldn't mean both cloning and overwrite same time. Reviewed-by: Kay Pedersen Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15653 --- module/zfs/dbuf.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 5a7fe42b60..7691cd85f6 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2930,7 +2930,8 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) while (db->db_state == DB_READ || db->db_state == DB_FILL) cv_wait(&db->db_changed, &db->db_mtx); - ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED); + ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED || + db->db_state == DB_NOFILL); if (db->db_state == DB_CACHED && zfs_refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) { @@ -2967,6 +2968,15 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) arc_buf_destroy(db->db_buf, db); } db->db_buf = NULL; + } else if (db->db_state == DB_NOFILL) { + /* + * We will be completely replacing the cloned block. In case + * it was cloned in this transaction group, let's undirty the + * pending clone and mark the block as uncached. This will be + * as if the clone was never done. + */ + VERIFY(!dbuf_undirty(db, tx)); + db->db_state = DB_UNCACHED; } ASSERT(db->db_buf == NULL); dbuf_set_data(db, buf); From d44f56191347c50ee7ce28f20bc545342f23c5c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dag-Erling=20Sm=C3=B8rgrav?= Date: Wed, 30 Aug 2023 17:13:09 +0200 Subject: [PATCH 5/6] Add VERIFY0P() and ASSERT0P() macros. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These macros are similar to VERIFY0() and ASSERT0() but are intended for pointers, and therefore use uintptr_t instead of int64_t. Reviewed-by: Brian Behlendorf Reviewed-by: Kay Pedersen Reviewed-by: Alexander Motin Signed-off-by: Dag-Erling Smørgrav Closes #15225 --- include/os/freebsd/spl/sys/debug.h | 13 +++++++++++++ include/os/linux/spl/sys/debug.h | 13 +++++++++++++ lib/libspl/include/assert.h | 11 +++++++++++ 3 files changed, 37 insertions(+) diff --git a/include/os/freebsd/spl/sys/debug.h b/include/os/freebsd/spl/sys/debug.h index 3e67cf0e9a..c788b54d5a 100644 --- a/include/os/freebsd/spl/sys/debug.h +++ b/include/os/freebsd/spl/sys/debug.h @@ -39,12 +39,14 @@ * ASSERT3U() - Assert unsigned X OP Y is true, if not panic. * ASSERT3P() - Assert pointer X OP Y is true, if not panic. * ASSERT0() - Assert value is zero, if not panic. + * ASSERT0P() - Assert pointer is null, if not panic. * VERIFY() - Verify X is true, if not panic. * VERIFY3B() - Verify boolean X OP Y is true, if not panic. * VERIFY3S() - Verify signed X OP Y is true, if not panic. * VERIFY3U() - Verify unsigned X OP Y is true, if not panic. * VERIFY3P() - Verify pointer X OP Y is true, if not panic. * VERIFY0() - Verify value is zero, if not panic. + * VERIFY0P() - Verify pointer is null, if not panic. */ #ifndef _SPL_DEBUG_H @@ -136,6 +138,15 @@ spl_assert(const char *buf, const char *file, const char *func, int line) (long long) (_verify3_right)); \ } while (0) +#define VERIFY0P(RIGHT) do { \ + const uintptr_t _verify0_right = (uintptr_t)(RIGHT); \ + if (unlikely(!(0 == _verify0_right))) \ + spl_panic(__FILE__, __FUNCTION__, __LINE__, \ + "VERIFY0P(" #RIGHT ") " \ + "failed (NULL == %p)\n", \ + (void *)_verify0_right); \ + } while (0) + /* * Debugging disabled (--disable-debug) */ @@ -151,6 +162,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line) #define ASSERT3P(x, y, z) \ ((void) sizeof ((uintptr_t)(x)), (void) sizeof ((uintptr_t)(z))) #define ASSERT0(x) ((void) sizeof ((uintptr_t)(x))) +#define ASSERT0P(x) ((void) sizeof ((uintptr_t)(x))) #define IMPLY(A, B) \ ((void) sizeof ((uintptr_t)(A)), (void) sizeof ((uintptr_t)(B))) #define EQUIV(A, B) \ @@ -166,6 +178,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line) #define ASSERT3U VERIFY3U #define ASSERT3P VERIFY3P #define ASSERT0 VERIFY0 +#define ASSERT0P VERIFY0P #define ASSERT VERIFY #define IMPLY(A, B) \ ((void)(likely((!(A)) || (B)) || \ diff --git a/include/os/linux/spl/sys/debug.h b/include/os/linux/spl/sys/debug.h index 007238574f..82338d212e 100644 --- a/include/os/linux/spl/sys/debug.h +++ b/include/os/linux/spl/sys/debug.h @@ -34,12 +34,14 @@ * ASSERT3U() - Assert unsigned X OP Y is true, if not panic. * ASSERT3P() - Assert pointer X OP Y is true, if not panic. * ASSERT0() - Assert value is zero, if not panic. + * ASSERT0P() - Assert pointer is null, if not panic. * VERIFY() - Verify X is true, if not panic. * VERIFY3B() - Verify boolean X OP Y is true, if not panic. * VERIFY3S() - Verify signed X OP Y is true, if not panic. * VERIFY3U() - Verify unsigned X OP Y is true, if not panic. * VERIFY3P() - Verify pointer X OP Y is true, if not panic. * VERIFY0() - Verify value is zero, if not panic. + * VERIFY0P() - Verify pointer is null, if not panic. */ #ifndef _SPL_DEBUG_H @@ -140,6 +142,15 @@ spl_assert(const char *buf, const char *file, const char *func, int line) (long long) (_verify3_right)); \ } while (0) +#define VERIFY0P(RIGHT) do { \ + const uintptr_t _verify0_right = (uintptr_t)(RIGHT); \ + if (unlikely(!(0 == _verify0_right))) \ + spl_panic(__FILE__, __FUNCTION__, __LINE__, \ + "VERIFY0P(" #RIGHT ") " \ + "failed (NULL == %px)\n", \ + (void *)_verify0_right); \ + } while (0) + #define VERIFY_IMPLY(A, B) \ ((void)(likely((!(A)) || (B)) || \ spl_assert("(" #A ") implies (" #B ")", \ @@ -165,6 +176,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line) #define ASSERT3P(x, y, z) \ ((void) sizeof ((uintptr_t)(x)), (void) sizeof ((uintptr_t)(z))) #define ASSERT0(x) ((void) sizeof ((uintptr_t)(x))) +#define ASSERT0P(x) ((void) sizeof ((uintptr_t)(x))) #define IMPLY(A, B) \ ((void) sizeof ((uintptr_t)(A)), (void) sizeof ((uintptr_t)(B))) #define EQUIV(A, B) \ @@ -180,6 +192,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line) #define ASSERT3U VERIFY3U #define ASSERT3P VERIFY3P #define ASSERT0 VERIFY0 +#define ASSERT0P VERIFY0P #define ASSERT VERIFY #define IMPLY VERIFY_IMPLY #define EQUIV VERIFY_EQUIV diff --git a/lib/libspl/include/assert.h b/lib/libspl/include/assert.h index c5bf0f0cc8..79e1016eab 100644 --- a/lib/libspl/include/assert.h +++ b/lib/libspl/include/assert.h @@ -120,6 +120,15 @@ do { \ (u_longlong_t)__left); \ } while (0) +#define VERIFY0P(LEFT) \ +do { \ + const uintptr_t __left = (uintptr_t)(LEFT); \ + if (!(__left == 0)) \ + libspl_assertf(__FILE__, __FUNCTION__, __LINE__, \ + "%s == 0 (%p == 0)", #LEFT, \ + (void *)__left); \ +} while (0) + #ifdef assert #undef assert #endif @@ -134,6 +143,7 @@ do { \ #define ASSERT3P(x, y, z) \ ((void) sizeof ((uintptr_t)(x)), (void) sizeof ((uintptr_t)(z))) #define ASSERT0(x) ((void) sizeof ((uintptr_t)(x))) +#define ASSERT0P(x) ((void) sizeof ((uintptr_t)(x))) #define ASSERT(x) ((void) sizeof ((uintptr_t)(x))) #define assert(x) ((void) sizeof ((uintptr_t)(x))) #define IMPLY(A, B) \ @@ -146,6 +156,7 @@ do { \ #define ASSERT3U VERIFY3U #define ASSERT3P VERIFY3P #define ASSERT0 VERIFY0 +#define ASSERT0P VERIFY0P #define ASSERT VERIFY #define assert VERIFY #define IMPLY(A, B) \ From 758c8daefcc78d53b219dd3ca357a958fbecd840 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 12 Dec 2023 15:59:24 -0500 Subject: [PATCH 6/6] dbuf: Set dr_data when unoverriding after clone Block cloning normally creates dirty record without dr_data. But if the block is read after cloning, it is moved into DB_CACHED state and receives the data buffer. If after that we call dbuf_unoverride() to convert the dirty record into normal write, we should give it the data buffer from dbuf and release one. Reviewed-by: Kay Pedersen Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15654 Closes #15656 --- module/zfs/dbuf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 7691cd85f6..e77461fb9c 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1904,7 +1904,6 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) dmu_buf_impl_t *db = dr->dr_dbuf; blkptr_t *bp = &dr->dt.dl.dr_overridden_by; uint64_t txg = dr->dr_txg; - boolean_t release; ASSERT(MUTEX_HELD(&db->db_mtx)); /* @@ -1925,7 +1924,10 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite) zio_free(db->db_objset->os_spa, txg, bp); - release = !dr->dt.dl.dr_brtwrite; + if (dr->dt.dl.dr_brtwrite) { + ASSERT0P(dr->dt.dl.dr_data); + dr->dt.dl.dr_data = db->db_buf; + } dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; dr->dt.dl.dr_nopwrite = B_FALSE; dr->dt.dl.dr_brtwrite = B_FALSE; @@ -1939,7 +1941,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) * the buf thawed to save the effort of freezing & * immediately re-thawing it. */ - if (release) + if (dr->dt.dl.dr_data) arc_release(dr->dt.dl.dr_data, db); }