Merge pull request #198 from truenas/NAS-125539

NAS-125539 / None / Block Cloning Fixes and ZVOL Patch Backports
This commit is contained in:
Alexander Motin 2023-12-13 17:15:23 -05:00 committed by GitHub
commit b88422e3f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 171 additions and 113 deletions

View File

@ -39,12 +39,14 @@
* ASSERT3U() - Assert unsigned X OP Y is true, if not panic.
* ASSERT3P() - Assert pointer X OP Y is true, if not panic.
* ASSERT0() - Assert value is zero, if not panic.
* ASSERT0P() - Assert pointer is null, if not panic.
* VERIFY() - Verify X is true, if not panic.
* VERIFY3B() - Verify boolean X OP Y is true, if not panic.
* VERIFY3S() - Verify signed X OP Y is true, if not panic.
* VERIFY3U() - Verify unsigned X OP Y is true, if not panic.
* VERIFY3P() - Verify pointer X OP Y is true, if not panic.
* VERIFY0() - Verify value is zero, if not panic.
* VERIFY0P() - Verify pointer is null, if not panic.
*/
#ifndef _SPL_DEBUG_H
@ -136,6 +138,15 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
(long long) (_verify3_right)); \
} while (0)
#define VERIFY0P(RIGHT) do { \
const uintptr_t _verify0_right = (uintptr_t)(RIGHT); \
if (unlikely(!(0 == _verify0_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
"VERIFY0P(" #RIGHT ") " \
"failed (NULL == %p)\n", \
(void *)_verify0_right); \
} while (0)
/*
* Debugging disabled (--disable-debug)
*/
@ -151,6 +162,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
#define ASSERT3P(x, y, z) \
((void) sizeof ((uintptr_t)(x)), (void) sizeof ((uintptr_t)(z)))
#define ASSERT0(x) ((void) sizeof ((uintptr_t)(x)))
#define ASSERT0P(x) ((void) sizeof ((uintptr_t)(x)))
#define IMPLY(A, B) \
((void) sizeof ((uintptr_t)(A)), (void) sizeof ((uintptr_t)(B)))
#define EQUIV(A, B) \
@ -166,6 +178,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
#define ASSERT3U VERIFY3U
#define ASSERT3P VERIFY3P
#define ASSERT0 VERIFY0
#define ASSERT0P VERIFY0P
#define ASSERT VERIFY
#define IMPLY(A, B) \
((void)(likely((!(A)) || (B)) || \

View File

@ -34,12 +34,14 @@
* ASSERT3U() - Assert unsigned X OP Y is true, if not panic.
* ASSERT3P() - Assert pointer X OP Y is true, if not panic.
* ASSERT0() - Assert value is zero, if not panic.
* ASSERT0P() - Assert pointer is null, if not panic.
* VERIFY() - Verify X is true, if not panic.
* VERIFY3B() - Verify boolean X OP Y is true, if not panic.
* VERIFY3S() - Verify signed X OP Y is true, if not panic.
* VERIFY3U() - Verify unsigned X OP Y is true, if not panic.
* VERIFY3P() - Verify pointer X OP Y is true, if not panic.
* VERIFY0() - Verify value is zero, if not panic.
* VERIFY0P() - Verify pointer is null, if not panic.
*/
#ifndef _SPL_DEBUG_H
@ -140,6 +142,15 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
(long long) (_verify3_right)); \
} while (0)
#define VERIFY0P(RIGHT) do { \
const uintptr_t _verify0_right = (uintptr_t)(RIGHT); \
if (unlikely(!(0 == _verify0_right))) \
spl_panic(__FILE__, __FUNCTION__, __LINE__, \
"VERIFY0P(" #RIGHT ") " \
"failed (NULL == %px)\n", \
(void *)_verify0_right); \
} while (0)
#define VERIFY_IMPLY(A, B) \
((void)(likely((!(A)) || (B)) || \
spl_assert("(" #A ") implies (" #B ")", \
@ -165,6 +176,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
#define ASSERT3P(x, y, z) \
((void) sizeof ((uintptr_t)(x)), (void) sizeof ((uintptr_t)(z)))
#define ASSERT0(x) ((void) sizeof ((uintptr_t)(x)))
#define ASSERT0P(x) ((void) sizeof ((uintptr_t)(x)))
#define IMPLY(A, B) \
((void) sizeof ((uintptr_t)(A)), (void) sizeof ((uintptr_t)(B)))
#define EQUIV(A, B) \
@ -180,6 +192,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
#define ASSERT3U VERIFY3U
#define ASSERT3P VERIFY3P
#define ASSERT0 VERIFY0
#define ASSERT0P VERIFY0P
#define ASSERT VERIFY
#define IMPLY VERIFY_IMPLY
#define EQUIV VERIFY_EQUIV

View File

@ -191,6 +191,7 @@ typedef enum {
ZFS_PROP_REDACTED,
ZFS_PROP_REDACT_SNAPS,
ZFS_PROP_SNAPSHOTS_CHANGED,
ZFS_PROP_VOLTHREADING,
ZFS_NUM_PROPS
} zfs_prop_t;

View File

@ -50,8 +50,9 @@ extern int zvol_get_stats(objset_t *, nvlist_t *);
extern boolean_t zvol_is_zvol(const char *);
extern void zvol_create_cb(objset_t *, void *, cred_t *, dmu_tx_t *);
extern int zvol_set_volsize(const char *, uint64_t);
extern int zvol_set_snapdev(const char *, zprop_source_t, uint64_t);
extern int zvol_set_volmode(const char *, zprop_source_t, uint64_t);
extern int zvol_set_volthreading(const char *, boolean_t);
extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t);
extern int zvol_set_ro(const char *, boolean_t);
extern zvol_state_handle_t *zvol_suspend(const char *);
extern int zvol_resume(zvol_state_handle_t *);
extern void *zvol_tag(zvol_state_handle_t *);

View File

@ -58,6 +58,7 @@ typedef struct zvol_state {
atomic_t zv_suspend_ref; /* refcount for suspend */
krwlock_t zv_suspend_lock; /* suspend lock */
struct zvol_state_os *zv_zso; /* private platform state */
boolean_t zv_threading; /* volthreading property */
} zvol_state_t;

View File

@ -120,6 +120,15 @@ do { \
(u_longlong_t)__left); \
} while (0)
#define VERIFY0P(LEFT) \
do { \
const uintptr_t __left = (uintptr_t)(LEFT); \
if (!(__left == 0)) \
libspl_assertf(__FILE__, __FUNCTION__, __LINE__, \
"%s == 0 (%p == 0)", #LEFT, \
(void *)__left); \
} while (0)
#ifdef assert
#undef assert
#endif
@ -134,6 +143,7 @@ do { \
#define ASSERT3P(x, y, z) \
((void) sizeof ((uintptr_t)(x)), (void) sizeof ((uintptr_t)(z)))
#define ASSERT0(x) ((void) sizeof ((uintptr_t)(x)))
#define ASSERT0P(x) ((void) sizeof ((uintptr_t)(x)))
#define ASSERT(x) ((void) sizeof ((uintptr_t)(x)))
#define assert(x) ((void) sizeof ((uintptr_t)(x)))
#define IMPLY(A, B) \
@ -146,6 +156,7 @@ do { \
#define ASSERT3U VERIFY3U
#define ASSERT3P VERIFY3P
#define ASSERT0 VERIFY0
#define ASSERT0P VERIFY0P
#define ASSERT VERIFY
#define assert VERIFY
#define IMPLY(A, B) \

View File

@ -1867,7 +1867,8 @@
<enumerator name='ZFS_PROP_REDACTED' value='93'/>
<enumerator name='ZFS_PROP_REDACT_SNAPS' value='94'/>
<enumerator name='ZFS_PROP_SNAPSHOTS_CHANGED' value='95'/>
<enumerator name='ZFS_NUM_PROPS' value='96'/>
<enumerator name='ZFS_PROP_VOLTHREADING' value='96'/>
<enumerator name='ZFS_NUM_PROPS' value='97'/>
</enum-decl>
<typedef-decl name='zfs_prop_t' type-id='4b000d60' id='58603c44'/>
<enum-decl name='zprop_source_t' naming-typedef-id='a2256d42' id='5903f80e'>

View File

@ -1192,6 +1192,18 @@ are equivalent to the
and
.Sy noexec
mount options.
.It Sy volthreading Ns = Ns Sy on Ns | Ns Sy off
Controls internal zvol threading.
The value
.Sy off
disables zvol threading, and zvol relies on application threads.
The default value is
.Sy on ,
which enables threading within a zvol.
Please note that this property will be overridden by
.Sy zvol_request_sync
module parameter.
This property is only applicable to Linux.
.It Sy filesystem_limit Ns = Ns Ar count Ns | Ns Sy none
Limits the number of filesystems and volumes that can exist under this point in
the dataset tree.

View File

@ -512,7 +512,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
uint64_t size = io_size(bio, rq);
int rw = io_data_dir(bio, rq);
if (zvol_request_sync)
if (zvol_request_sync || zv->zv_threading == B_FALSE)
force_sync = 1;
zv_request_t zvr = {
@ -1304,6 +1304,7 @@ zvol_os_create_minor(const char *name)
int error = 0;
int idx;
uint64_t hash = zvol_name_hash(name);
uint64_t volthreading;
bool replayed_zil = B_FALSE;
if (zvol_inhibit_dev)
@ -1350,6 +1351,12 @@ zvol_os_create_minor(const char *name)
zv->zv_volsize = volsize;
zv->zv_objset = os;
/* Default */
zv->zv_threading = B_TRUE;
if (dsl_prop_get_integer(name, "volthreading", &volthreading, NULL)
== 0)
zv->zv_threading = volthreading;
set_capacity(zv->zv_zso->zvo_disk, zv->zv_volsize >> 9);
blk_queue_max_hw_sectors(zv->zv_zso->zvo_queue,

View File

@ -611,6 +611,9 @@ zfs_prop_init(void)
ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME,
ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK", B_FALSE,
sfeatures);
zprop_register_index(ZFS_PROP_VOLTHREADING, "volthreading",
1, PROP_DEFAULT, ZFS_TYPE_VOLUME, "on | off", "zvol threading",
boolean_table, sfeatures);
zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0,
PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
"USEDSNAP", B_FALSE, sfeatures);

View File

@ -1904,7 +1904,6 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
dmu_buf_impl_t *db = dr->dr_dbuf;
blkptr_t *bp = &dr->dt.dl.dr_overridden_by;
uint64_t txg = dr->dr_txg;
boolean_t release;
ASSERT(MUTEX_HELD(&db->db_mtx));
/*
@ -1925,7 +1924,10 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite)
zio_free(db->db_objset->os_spa, txg, bp);
release = !dr->dt.dl.dr_brtwrite;
if (dr->dt.dl.dr_brtwrite) {
ASSERT0P(dr->dt.dl.dr_data);
dr->dt.dl.dr_data = db->db_buf;
}
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
dr->dt.dl.dr_nopwrite = B_FALSE;
dr->dt.dl.dr_brtwrite = B_FALSE;
@ -1939,7 +1941,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
* the buf thawed to save the effort of freezing &
* immediately re-thawing it.
*/
if (release)
if (dr->dt.dl.dr_data)
arc_release(dr->dt.dl.dr_data, db);
}
@ -2930,7 +2932,8 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
while (db->db_state == DB_READ || db->db_state == DB_FILL)
cv_wait(&db->db_changed, &db->db_mtx);
ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED);
ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED ||
db->db_state == DB_NOFILL);
if (db->db_state == DB_CACHED &&
zfs_refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) {
@ -2967,6 +2970,15 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
arc_buf_destroy(db->db_buf, db);
}
db->db_buf = NULL;
} else if (db->db_state == DB_NOFILL) {
/*
* We will be completely replacing the cloned block. In case
* it was cloned in this transaction group, let's undirty the
* pending clone and mark the block as uncached. This will be
* as if the clone was never done.
*/
VERIFY(!dbuf_undirty(db, tx));
db->db_state = DB_UNCACHED;
}
ASSERT(db->db_buf == NULL);
dbuf_set_data(db, buf);

View File

@ -2523,11 +2523,27 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
case ZFS_PROP_VOLSIZE:
err = zvol_set_volsize(dsname, intval);
break;
case ZFS_PROP_SNAPDEV:
err = zvol_set_snapdev(dsname, source, intval);
case ZFS_PROP_VOLTHREADING:
err = zvol_set_volthreading(dsname, intval);
/*
* Set err to -1 to force the zfs_set_prop_nvlist code down the
* default path to set the value in the nvlist.
*/
if (err == 0)
err = -1;
break;
case ZFS_PROP_SNAPDEV:
case ZFS_PROP_VOLMODE:
err = zvol_set_volmode(dsname, source, intval);
err = zvol_set_common(dsname, prop, source, intval);
break;
case ZFS_PROP_READONLY:
err = zvol_set_ro(dsname, intval);
/*
* Set err to -1 to force the zfs_set_prop_nvlist code down the
* default path to set the value in the nvlist.
*/
if (err == 0)
err = -1;
break;
case ZFS_PROP_VERSION:
{

View File

@ -369,6 +369,40 @@ out:
return (SET_ERROR(error));
}
/*
* Update volthreading.
*/
int
zvol_set_volthreading(const char *name, boolean_t value)
{
zvol_state_t *zv = zvol_find_by_name(name, RW_NONE);
if (zv == NULL)
return (ENOENT);
zv->zv_threading = value;
mutex_exit(&zv->zv_state_lock);
return (0);
}
/*
* Update zvol ro property.
*/
int
zvol_set_ro(const char *name, boolean_t value)
{
zvol_state_t *zv = zvol_find_by_name(name, RW_NONE);
if (zv == NULL)
return (-1);
if (value) {
zvol_os_set_disk_ro(zv, 1);
zv->zv_flags |= ZVOL_RDONLY;
} else {
zvol_os_set_disk_ro(zv, 0);
zv->zv_flags &= ~ZVOL_RDONLY;
}
mutex_exit(&zv->zv_state_lock);
return (0);
}
/*
* Sanity check volume block size.
*/
@ -1541,6 +1575,7 @@ typedef struct zvol_set_prop_int_arg {
const char *zsda_name;
uint64_t zsda_value;
zprop_source_t zsda_source;
zfs_prop_t zsda_prop;
dmu_tx_t *zsda_tx;
} zvol_set_prop_int_arg_t;
@ -1549,7 +1584,7 @@ typedef struct zvol_set_prop_int_arg {
* conditions are imposed.
*/
static int
zvol_set_snapdev_check(void *arg, dmu_tx_t *tx)
zvol_set_common_check(void *arg, dmu_tx_t *tx)
{
zvol_set_prop_int_arg_t *zsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
@ -1566,17 +1601,33 @@ zvol_set_snapdev_check(void *arg, dmu_tx_t *tx)
}
static int
zvol_set_snapdev_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
zvol_set_common_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
(void) arg;
zvol_set_prop_int_arg_t *zsda = arg;
char dsname[MAXNAMELEN];
zvol_task_t *task;
uint64_t snapdev;
uint64_t prop;
const char *prop_name = zfs_prop_to_name(zsda->zsda_prop);
dsl_dataset_name(ds, dsname);
if (dsl_prop_get_int_ds(ds, "snapdev", &snapdev) != 0)
if (dsl_prop_get_int_ds(ds, prop_name, &prop) != 0)
return (0);
task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname, NULL, snapdev);
switch (zsda->zsda_prop) {
case ZFS_PROP_VOLMODE:
task = zvol_task_alloc(ZVOL_ASYNC_SET_VOLMODE, dsname,
NULL, prop);
break;
case ZFS_PROP_SNAPDEV:
task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname,
NULL, prop);
break;
default:
task = NULL;
break;
}
if (task == NULL)
return (0);
@ -1586,14 +1637,14 @@ zvol_set_snapdev_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
}
/*
* Traverse all child datasets and apply snapdev appropriately.
* Traverse all child datasets and apply the property appropriately.
* We call dsl_prop_set_sync_impl() here to set the value only on the toplevel
* dataset and read the effective "snapdev" on every child in the callback
* dataset and read the effective "property" on every child in the callback
* function: this is because the value is not guaranteed to be the same in the
* whole dataset hierarchy.
*/
static void
zvol_set_snapdev_sync(void *arg, dmu_tx_t *tx)
zvol_set_common_sync(void *arg, dmu_tx_t *tx)
{
zvol_set_prop_int_arg_t *zsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
@ -1606,115 +1657,31 @@ zvol_set_snapdev_sync(void *arg, dmu_tx_t *tx)
error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds);
if (error == 0) {
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_SNAPDEV),
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(zsda->zsda_prop),
zsda->zsda_source, sizeof (zsda->zsda_value), 1,
&zsda->zsda_value, zsda->zsda_tx);
dsl_dataset_rele(ds, FTAG);
}
dmu_objset_find_dp(dp, dd->dd_object, zvol_set_snapdev_sync_cb,
dmu_objset_find_dp(dp, dd->dd_object, zvol_set_common_sync_cb,
zsda, DS_FIND_CHILDREN);
dsl_dir_rele(dd, FTAG);
}
int
zvol_set_snapdev(const char *ddname, zprop_source_t source, uint64_t snapdev)
zvol_set_common(const char *ddname, zfs_prop_t prop, zprop_source_t source,
uint64_t val)
{
zvol_set_prop_int_arg_t zsda;
zsda.zsda_name = ddname;
zsda.zsda_source = source;
zsda.zsda_value = snapdev;
zsda.zsda_value = val;
zsda.zsda_prop = prop;
return (dsl_sync_task(ddname, zvol_set_snapdev_check,
zvol_set_snapdev_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
}
/*
* Sanity check the dataset for safe use by the sync task. No additional
* conditions are imposed.
*/
static int
zvol_set_volmode_check(void *arg, dmu_tx_t *tx)
{
zvol_set_prop_int_arg_t *zsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd;
int error;
error = dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL);
if (error != 0)
return (error);
dsl_dir_rele(dd, FTAG);
return (error);
}
static int
zvol_set_volmode_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
(void) arg;
char dsname[MAXNAMELEN];
zvol_task_t *task;
uint64_t volmode;
dsl_dataset_name(ds, dsname);
if (dsl_prop_get_int_ds(ds, "volmode", &volmode) != 0)
return (0);
task = zvol_task_alloc(ZVOL_ASYNC_SET_VOLMODE, dsname, NULL, volmode);
if (task == NULL)
return (0);
(void) taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb,
task, TQ_SLEEP);
return (0);
}
/*
* Traverse all child datasets and apply volmode appropriately.
* We call dsl_prop_set_sync_impl() here to set the value only on the toplevel
* dataset and read the effective "volmode" on every child in the callback
* function: this is because the value is not guaranteed to be the same in the
* whole dataset hierarchy.
*/
static void
zvol_set_volmode_sync(void *arg, dmu_tx_t *tx)
{
zvol_set_prop_int_arg_t *zsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd;
dsl_dataset_t *ds;
int error;
VERIFY0(dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL));
zsda->zsda_tx = tx;
error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds);
if (error == 0) {
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_VOLMODE),
zsda->zsda_source, sizeof (zsda->zsda_value), 1,
&zsda->zsda_value, zsda->zsda_tx);
dsl_dataset_rele(ds, FTAG);
}
dmu_objset_find_dp(dp, dd->dd_object, zvol_set_volmode_sync_cb,
zsda, DS_FIND_CHILDREN);
dsl_dir_rele(dd, FTAG);
}
int
zvol_set_volmode(const char *ddname, zprop_source_t source, uint64_t volmode)
{
zvol_set_prop_int_arg_t zsda;
zsda.zsda_name = ddname;
zsda.zsda_source = source;
zsda.zsda_value = volmode;
return (dsl_sync_task(ddname, zvol_set_volmode_check,
zvol_set_volmode_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
return (dsl_sync_task(ddname, zvol_set_common_check,
zvol_set_common_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
}
void