Update linux-kernel-disk ZVOL implementation
The ZVOL interfaces changed significantly with the latest update. I've updated the Linux version of the code to handle this and it looks like the net result has been a simpler implementation which is good! Plus, I'm relatively sure the ZIL integration is right this time although it needs some serious crash testing to verify that. Also minor additions to vdev_disk for .hold and .rele callbacks. Currently, they do nothing and I may be able to simply stub them out with NULLs for Linux since opening the device in Linux should have much the same effort. More investigation is needed though since the ZFS interface may make some demands here I'm overlooking.
This commit is contained in:
parent
2a00f10bf8
commit
800b7a03e1
|
@ -208,7 +208,7 @@ struct spa {
|
||||||
kmutex_t spa_proc_lock; /* protects spa_proc* */
|
kmutex_t spa_proc_lock; /* protects spa_proc* */
|
||||||
kcondvar_t spa_proc_cv; /* spa_proc_state transitions */
|
kcondvar_t spa_proc_cv; /* spa_proc_state transitions */
|
||||||
spa_proc_state_t spa_proc_state; /* see definition */
|
spa_proc_state_t spa_proc_state; /* see definition */
|
||||||
struct proc *spa_proc; /* "zpool-poolname" process */
|
proc_t *spa_proc; /* "zpool-poolname" process */
|
||||||
uint64_t spa_did; /* if procp != p0, did of t1 */
|
uint64_t spa_did; /* if procp != p0, did of t1 */
|
||||||
boolean_t spa_autoreplace; /* autoreplace set in open */
|
boolean_t spa_autoreplace; /* autoreplace set in open */
|
||||||
int spa_vdev_locks; /* locks grabbed */
|
int spa_vdev_locks; /* locks grabbed */
|
||||||
|
|
|
@ -33,6 +33,7 @@
|
||||||
#include <sys/zfs_vfsops.h>
|
#include <sys/zfs_vfsops.h>
|
||||||
#endif
|
#endif
|
||||||
#include <sys/avl.h>
|
#include <sys/avl.h>
|
||||||
|
#include <sys/list.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
|
|
@ -1072,6 +1072,12 @@ vdev_open_child(void *arg)
|
||||||
boolean_t
|
boolean_t
|
||||||
vdev_uses_zvols(vdev_t *vd)
|
vdev_uses_zvols(vdev_t *vd)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* NOTE: Disabled because under Linux I've choosen not to put all the zvols
|
||||||
|
* in their own directory. This could be changed or this code can be updated
|
||||||
|
* to perhap run an ioctl() on the vdev path to determine if it is a zvol.
|
||||||
|
*/
|
||||||
|
#if 0
|
||||||
int c;
|
int c;
|
||||||
|
|
||||||
if (vd->vdev_path && strncmp(vd->vdev_path, ZVOL_DIR,
|
if (vd->vdev_path && strncmp(vd->vdev_path, ZVOL_DIR,
|
||||||
|
@ -1080,6 +1086,7 @@ vdev_uses_zvols(vdev_t *vd)
|
||||||
for (c = 0; c < vd->vdev_children; c++)
|
for (c = 0; c < vd->vdev_children; c++)
|
||||||
if (vdev_uses_zvols(vd->vdev_child[c]))
|
if (vdev_uses_zvols(vd->vdev_child[c]))
|
||||||
return (B_TRUE);
|
return (B_TRUE);
|
||||||
|
#endif
|
||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -550,6 +550,35 @@ vdev_disk_io_done(zio_t *zio)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
vdev_disk_hold(vdev_t *vd)
|
||||||
|
{
|
||||||
|
ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
|
||||||
|
|
||||||
|
/* We must have a pathname, and it must be absolute. */
|
||||||
|
if (vd->vdev_path == NULL || vd->vdev_path[0] != '/')
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only prefetch path and devid info if the device has
|
||||||
|
* never been opened.
|
||||||
|
*/
|
||||||
|
if (vd->vdev_tsd != NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* XXX: Implement me as a vnode lookup for the device */
|
||||||
|
vd->vdev_name_vp = NULL;
|
||||||
|
vd->vdev_devid_vp = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
vdev_disk_rele(vdev_t *vd)
|
||||||
|
{
|
||||||
|
ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
|
||||||
|
|
||||||
|
/* XXX: Implement me as a vnode rele for the device */
|
||||||
|
}
|
||||||
|
|
||||||
vdev_ops_t vdev_disk_ops = {
|
vdev_ops_t vdev_disk_ops = {
|
||||||
vdev_disk_open,
|
vdev_disk_open,
|
||||||
vdev_disk_close,
|
vdev_disk_close,
|
||||||
|
@ -557,6 +586,8 @@ vdev_ops_t vdev_disk_ops = {
|
||||||
vdev_disk_io_start,
|
vdev_disk_io_start,
|
||||||
vdev_disk_io_done,
|
vdev_disk_io_done,
|
||||||
NULL,
|
NULL,
|
||||||
|
vdev_disk_hold,
|
||||||
|
vdev_disk_rele,
|
||||||
VDEV_TYPE_DISK, /* name of this vdev type */
|
VDEV_TYPE_DISK, /* name of this vdev type */
|
||||||
B_TRUE /* leaf vdev */
|
B_TRUE /* leaf vdev */
|
||||||
};
|
};
|
||||||
|
|
|
@ -1965,8 +1965,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
|
||||||
err = dsl_dataset_set_reservation(dsname, source, intval);
|
err = dsl_dataset_set_reservation(dsname, source, intval);
|
||||||
break;
|
break;
|
||||||
case ZFS_PROP_VOLSIZE:
|
case ZFS_PROP_VOLSIZE:
|
||||||
err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip),
|
err = zvol_set_volsize(dsname, intval);
|
||||||
intval);
|
|
||||||
break;
|
break;
|
||||||
case ZFS_PROP_VERSION:
|
case ZFS_PROP_VERSION:
|
||||||
{
|
{
|
||||||
|
@ -2832,9 +2831,18 @@ zfs_ioc_create(zfs_cmd_t *zc)
|
||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
|
error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
|
||||||
nvprops, NULL);
|
nvprops, NULL);
|
||||||
|
if (error != 0) {
|
||||||
|
(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type == DMU_OST_ZVOL) {
|
||||||
|
error = zvol_create_minor(zc->zc_name);
|
||||||
if (error != 0)
|
if (error != 0)
|
||||||
(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
|
(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
out:
|
||||||
nvlist_free(nvprops);
|
nvlist_free(nvprops);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,6 +51,7 @@ unsigned int zvol_threads = 0;
|
||||||
static taskq_t *zvol_taskq;
|
static taskq_t *zvol_taskq;
|
||||||
static kmutex_t zvol_state_lock;
|
static kmutex_t zvol_state_lock;
|
||||||
static list_t zvol_state_list;
|
static list_t zvol_state_list;
|
||||||
|
static char *zvol_tag = "zvol_tag";
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The in-core state of each volume.
|
* The in-core state of each volume.
|
||||||
|
@ -59,11 +60,12 @@ typedef struct zvol_state {
|
||||||
uint64_t zv_volsize; /* advertised space */
|
uint64_t zv_volsize; /* advertised space */
|
||||||
uint64_t zv_volblocksize;/* volume block size */
|
uint64_t zv_volblocksize;/* volume block size */
|
||||||
objset_t *zv_objset; /* objset handle */
|
objset_t *zv_objset; /* objset handle */
|
||||||
uint32_t zv_mode; /* DS_MODE_* at open time */
|
uint32_t zv_flags; /* ZVOL_* flags */
|
||||||
uint32_t zv_open_count; /* open counts */
|
uint32_t zv_open_count; /* open counts */
|
||||||
uint32_t zv_changed; /* disk changed */
|
uint32_t zv_changed; /* disk changed */
|
||||||
zilog_t *zv_zilog; /* ZIL handle */
|
zilog_t *zv_zilog; /* ZIL handle */
|
||||||
znode_t zv_znode; /* for range locking */
|
znode_t zv_znode; /* for range locking */
|
||||||
|
dmu_buf_t *zv_dbuf; /* bonus handle */
|
||||||
dev_t zv_dev; /* device id */
|
dev_t zv_dev; /* device id */
|
||||||
struct gendisk *zv_disk; /* generic disk */
|
struct gendisk *zv_disk; /* generic disk */
|
||||||
struct request_queue *zv_queue; /* request queue */
|
struct request_queue *zv_queue; /* request queue */
|
||||||
|
@ -71,6 +73,8 @@ typedef struct zvol_state {
|
||||||
list_node_t zv_next; /* next zvol_state_t linkage */
|
list_node_t zv_next; /* next zvol_state_t linkage */
|
||||||
} zvol_state_t;
|
} zvol_state_t;
|
||||||
|
|
||||||
|
#define ZVOL_RDONLY 0x1
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find the next available range of ZVOL_MINORS minor numbers. The
|
* Find the next available range of ZVOL_MINORS minor numbers. The
|
||||||
* zvol_state_list is kept in ascending minor order so we simply need
|
* zvol_state_list is kept in ascending minor order so we simply need
|
||||||
|
@ -197,15 +201,6 @@ zvol_get_stats(objset_t *os, nvlist_t *nv)
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Notification handler for objset readonly property changes.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
zvol_readonly_changed_cb(void *arg, uint64_t value)
|
|
||||||
{
|
|
||||||
set_disk_ro(((zvol_state_t *)arg)->zv_disk, !!value);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Sanity check volume size.
|
* Sanity check volume size.
|
||||||
*/
|
*/
|
||||||
|
@ -278,40 +273,44 @@ int
|
||||||
zvol_set_volsize(const char *name, uint64_t volsize)
|
zvol_set_volsize(const char *name, uint64_t volsize)
|
||||||
{
|
{
|
||||||
zvol_state_t *zv;
|
zvol_state_t *zv;
|
||||||
int error;
|
|
||||||
dmu_object_info_t doi;
|
dmu_object_info_t doi;
|
||||||
uint64_t old_volsize = 0ULL;
|
objset_t *os = NULL;
|
||||||
zvol_state_t state = { 0 };
|
zvol_state_t state = { 0 };
|
||||||
|
uint64_t old_volsize = 0ULL;
|
||||||
|
uint64_t readonly;
|
||||||
|
int error;
|
||||||
|
|
||||||
mutex_enter(&zvol_state_lock);
|
mutex_enter(&zvol_state_lock);
|
||||||
|
|
||||||
zv = zvol_find_by_name(name);
|
zv = zvol_find_by_name(name);
|
||||||
if (zv == NULL) {
|
if (zv == NULL) {
|
||||||
/*
|
error = dmu_objset_hold(name, FTAG, &os);
|
||||||
* If we are doing a "zfs clone -o volsize=", then the
|
if (error)
|
||||||
* minor node won't exist yet.
|
|
||||||
*/
|
|
||||||
error = dmu_objset_open(name, DMU_OST_ZVOL, DS_MODE_OWNER,
|
|
||||||
&state.zv_objset);
|
|
||||||
if (error != 0)
|
|
||||||
goto out;
|
goto out;
|
||||||
zv = &state;
|
zv = &state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VERIFY(dsl_prop_get_integer(name, "readonly", &readonly, NULL) == 0);
|
||||||
|
if (readonly) {
|
||||||
|
error = EROFS;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
old_volsize = zv->zv_volsize;
|
old_volsize = zv->zv_volsize;
|
||||||
|
|
||||||
if ((error = dmu_object_info(zv->zv_objset, ZVOL_OBJ, &doi)) != 0 ||
|
if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 ||
|
||||||
(error = zvol_check_volsize(volsize,doi.doi_data_block_size)) != 0)
|
(error = zvol_check_volsize(volsize,doi.doi_data_block_size)) != 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
if (get_disk_ro(zv->zv_disk) || (zv->zv_mode & DS_MODE_READONLY)) {
|
if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY)) {
|
||||||
error = EROFS;
|
error = EROFS;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
error = zvol_update_volsize(zv, volsize);
|
error = zvol_update_volsize(zv, volsize);
|
||||||
out:
|
out:
|
||||||
if (state.zv_objset)
|
if (os)
|
||||||
dmu_objset_close(state.zv_objset);
|
dmu_objset_rele(os, FTAG);
|
||||||
|
|
||||||
mutex_exit(&zvol_state_lock);
|
mutex_exit(&zvol_state_lock);
|
||||||
|
|
||||||
|
@ -348,7 +347,7 @@ zvol_set_volblocksize(const char *name, uint64_t volblocksize)
|
||||||
if (zv == NULL)
|
if (zv == NULL)
|
||||||
return (ENXIO);
|
return (ENXIO);
|
||||||
|
|
||||||
if (get_disk_ro(zv->zv_disk) || (zv->zv_mode & DS_MODE_READONLY))
|
if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY))
|
||||||
return (EROFS);
|
return (EROFS);
|
||||||
|
|
||||||
tx = dmu_tx_create(zv->zv_objset);
|
tx = dmu_tx_create(zv->zv_objset);
|
||||||
|
@ -441,16 +440,9 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx,
|
||||||
zilog_t *zilog = zv->zv_zilog;
|
zilog_t *zilog = zv->zv_zilog;
|
||||||
boolean_t slogging;
|
boolean_t slogging;
|
||||||
|
|
||||||
if (zil_disable)
|
if (zil_replaying(zilog, tx))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (zilog->zl_replay) {
|
|
||||||
dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
|
|
||||||
zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] =
|
|
||||||
zilog->zl_replaying_seq;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
slogging = spa_has_slogs(zilog->zl_spa);
|
slogging = spa_has_slogs(zilog->zl_spa);
|
||||||
|
|
||||||
while (size) {
|
while (size) {
|
||||||
|
@ -480,8 +472,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx,
|
||||||
lr = (lr_write_t *)&itx->itx_lr;
|
lr = (lr_write_t *)&itx->itx_lr;
|
||||||
if (write_state == WR_COPIED && dmu_read(zv->zv_objset,
|
if (write_state == WR_COPIED && dmu_read(zv->zv_objset,
|
||||||
ZVOL_OBJ, offset, len, lr+1, DMU_READ_NO_PREFETCH) != 0) {
|
ZVOL_OBJ, offset, len, lr+1, DMU_READ_NO_PREFETCH) != 0) {
|
||||||
kmem_free(itx, offsetof(itx_t, itx_lr) +
|
zil_itx_destroy(itx);
|
||||||
itx->itx_lr.lrc_reclen);
|
|
||||||
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
|
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
|
||||||
lr = (lr_write_t *)&itx->itx_lr;
|
lr = (lr_write_t *)&itx->itx_lr;
|
||||||
write_state = WR_NEED_COPY;
|
write_state = WR_NEED_COPY;
|
||||||
|
@ -493,8 +484,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx,
|
||||||
lr->lr_foid = ZVOL_OBJ;
|
lr->lr_foid = ZVOL_OBJ;
|
||||||
lr->lr_offset = offset;
|
lr->lr_offset = offset;
|
||||||
lr->lr_length = len;
|
lr->lr_length = len;
|
||||||
lr->lr_blkoff = offset -
|
lr->lr_blkoff = 0;
|
||||||
P2ALIGN_TYPED(offset, blocksize, uint64_t);
|
|
||||||
BP_ZERO(&lr->lr_blkptr);
|
BP_ZERO(&lr->lr_blkptr);
|
||||||
|
|
||||||
itx->itx_private = zv;
|
itx->itx_private = zv;
|
||||||
|
@ -520,13 +510,10 @@ zvol_write(void *arg)
|
||||||
zvol_state_t *zv = q->queuedata;
|
zvol_state_t *zv = q->queuedata;
|
||||||
uint64_t offset = blk_rq_pos(req) << 9;
|
uint64_t offset = blk_rq_pos(req) << 9;
|
||||||
uint64_t size = blk_rq_bytes(req);
|
uint64_t size = blk_rq_bytes(req);
|
||||||
int sync = 0, error = 0;
|
int error = 0;
|
||||||
dmu_tx_t *tx;
|
dmu_tx_t *tx;
|
||||||
rl_t *rl;
|
rl_t *rl;
|
||||||
|
|
||||||
if (rq_is_sync(req) && !zil_disable)
|
|
||||||
sync = 1;
|
|
||||||
|
|
||||||
rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER);
|
rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER);
|
||||||
|
|
||||||
tx = dmu_tx_create(zv->zv_objset);
|
tx = dmu_tx_create(zv->zv_objset);
|
||||||
|
@ -541,13 +528,14 @@ zvol_write(void *arg)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
dmu_write_req(zv->zv_objset, ZVOL_OBJ, req, tx);
|
error = dmu_write_req(zv->zv_objset, ZVOL_OBJ, req, tx);
|
||||||
zvol_log_write(zv, tx, offset, size, sync);
|
if (error == 0)
|
||||||
|
zvol_log_write(zv, tx, offset, size, rq_is_sync(req));
|
||||||
|
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
zfs_range_unlock(rl);
|
zfs_range_unlock(rl);
|
||||||
|
|
||||||
if (sync)
|
if (rq_is_sync(req))
|
||||||
zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ);
|
zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ);
|
||||||
|
|
||||||
blk_end_request(req, -error, size);
|
blk_end_request(req, -error, size);
|
||||||
|
@ -643,7 +631,7 @@ zvol_request(struct request_queue *q)
|
||||||
break;
|
break;
|
||||||
case WRITE:
|
case WRITE:
|
||||||
if (unlikely(get_disk_ro(zv->zv_disk)) ||
|
if (unlikely(get_disk_ro(zv->zv_disk)) ||
|
||||||
unlikely(zv->zv_mode & DS_MODE_READONLY)) {
|
unlikely(zv->zv_flags & ZVOL_RDONLY)) {
|
||||||
__blk_end_request(req, -EROFS, size);
|
__blk_end_request(req, -EROFS, size);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -659,6 +647,77 @@ zvol_request(struct request_queue *q)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
zvol_get_done(zgd_t *zgd, int error)
|
||||||
|
{
|
||||||
|
if (zgd->zgd_db)
|
||||||
|
dmu_buf_rele(zgd->zgd_db, zgd);
|
||||||
|
|
||||||
|
zfs_range_unlock(zgd->zgd_rl);
|
||||||
|
|
||||||
|
if (error == 0 && zgd->zgd_bp)
|
||||||
|
zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
|
||||||
|
|
||||||
|
kmem_free(zgd, sizeof (zgd_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get data to generate a TX_WRITE intent log record.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
|
||||||
|
{
|
||||||
|
zvol_state_t *zv = arg;
|
||||||
|
objset_t *os = zv->zv_objset;
|
||||||
|
uint64_t offset = lr->lr_offset;
|
||||||
|
uint64_t size = lr->lr_length;
|
||||||
|
dmu_buf_t *db;
|
||||||
|
zgd_t *zgd;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
ASSERT(zio != NULL);
|
||||||
|
ASSERT(size != 0);
|
||||||
|
|
||||||
|
zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
|
||||||
|
zgd->zgd_zilog = zv->zv_zilog;
|
||||||
|
zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Write records come in two flavors: immediate and indirect.
|
||||||
|
* For small writes it's cheaper to store the data with the
|
||||||
|
* log record (immediate); for large writes it's cheaper to
|
||||||
|
* sync the data and get a pointer to it (indirect) so that
|
||||||
|
* we don't have to write the data twice.
|
||||||
|
*/
|
||||||
|
if (buf != NULL) { /* immediate write */
|
||||||
|
error = dmu_read(os, ZVOL_OBJ, offset, size, buf,
|
||||||
|
DMU_READ_NO_PREFETCH);
|
||||||
|
} else {
|
||||||
|
size = zv->zv_volblocksize;
|
||||||
|
offset = P2ALIGN_TYPED(offset, size, uint64_t);
|
||||||
|
error = dmu_buf_hold(os, ZVOL_OBJ, offset, zgd, &db,
|
||||||
|
DMU_READ_NO_PREFETCH);
|
||||||
|
if (error == 0) {
|
||||||
|
zgd->zgd_db = db;
|
||||||
|
zgd->zgd_bp = &lr->lr_blkptr;
|
||||||
|
|
||||||
|
ASSERT(db != NULL);
|
||||||
|
ASSERT(db->db_offset == offset);
|
||||||
|
ASSERT(db->db_size == size);
|
||||||
|
|
||||||
|
error = dmu_sync(zio, lr->lr_common.lrc_txg,
|
||||||
|
zvol_get_done, zgd);
|
||||||
|
|
||||||
|
if (error == 0)
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
zvol_get_done(zgd, error);
|
||||||
|
|
||||||
|
return (error);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The zvol_state_t's are inserted in increasing MINOR(dev_t) order.
|
* The zvol_state_t's are inserted in increasing MINOR(dev_t) order.
|
||||||
*/
|
*/
|
||||||
|
@ -688,27 +747,94 @@ zvol_remove(zvol_state_t *zv_remove)
|
||||||
list_remove(&zvol_state_list, zv_remove);
|
list_remove(&zvol_state_list, zv_remove);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
zvol_first_open(zvol_state_t *zv)
|
||||||
|
{
|
||||||
|
objset_t *os;
|
||||||
|
uint64_t volsize;
|
||||||
|
int error;
|
||||||
|
uint64_t readonly;
|
||||||
|
|
||||||
|
/* lie and say we're read-only */
|
||||||
|
error = dmu_objset_own(zv->zv_disk->disk_name,
|
||||||
|
DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
|
||||||
|
if (error)
|
||||||
|
return (-error);
|
||||||
|
|
||||||
|
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
|
||||||
|
if (error) {
|
||||||
|
dmu_objset_disown(os, zvol_tag);
|
||||||
|
return (-error);
|
||||||
|
}
|
||||||
|
|
||||||
|
zv->zv_objset = os;
|
||||||
|
error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
|
||||||
|
if (error) {
|
||||||
|
dmu_objset_disown(os, zvol_tag);
|
||||||
|
return (-error);
|
||||||
|
}
|
||||||
|
|
||||||
|
set_capacity(zv->zv_disk, volsize >> 9);
|
||||||
|
zv->zv_volsize = volsize;
|
||||||
|
zv->zv_zilog = zil_open(os, zvol_get_data);
|
||||||
|
|
||||||
|
VERIFY(dsl_prop_get_integer(zv->zv_disk->disk_name,
|
||||||
|
"readonly", &readonly, NULL) == 0);
|
||||||
|
if (readonly || dmu_objset_is_snapshot(os)) {
|
||||||
|
set_disk_ro(zv->zv_disk, 1);
|
||||||
|
zv->zv_flags |= ZVOL_RDONLY;
|
||||||
|
} else {
|
||||||
|
set_disk_ro(zv->zv_disk, 0);
|
||||||
|
zv->zv_flags &= ~ZVOL_RDONLY;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (-error);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
zvol_last_close(zvol_state_t *zv)
|
||||||
|
{
|
||||||
|
zil_close(zv->zv_zilog);
|
||||||
|
zv->zv_zilog = NULL;
|
||||||
|
dmu_buf_rele(zv->zv_dbuf, zvol_tag);
|
||||||
|
zv->zv_dbuf = NULL;
|
||||||
|
dmu_objset_disown(zv->zv_objset, zvol_tag);
|
||||||
|
zv->zv_objset = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
zvol_open(struct block_device *bdev, fmode_t flag)
|
zvol_open(struct block_device *bdev, fmode_t flag)
|
||||||
{
|
{
|
||||||
zvol_state_t *zv = bdev->bd_disk->private_data;
|
zvol_state_t *zv = bdev->bd_disk->private_data;
|
||||||
|
int error = 0;
|
||||||
|
|
||||||
mutex_enter(&zvol_state_lock);
|
mutex_enter(&zvol_state_lock);
|
||||||
ASSERT3P(zv, !=, NULL);
|
ASSERT3P(zv, !=, NULL);
|
||||||
ASSERT3P(zv->zv_objset, !=, NULL);
|
|
||||||
|
if (zv->zv_open_count == 0) {
|
||||||
|
error = zvol_first_open(zv);
|
||||||
|
if (error)
|
||||||
|
goto out_mutex;
|
||||||
|
}
|
||||||
|
|
||||||
if ((flag & FMODE_WRITE) &&
|
if ((flag & FMODE_WRITE) &&
|
||||||
(get_disk_ro(zv->zv_disk) || (zv->zv_mode & DS_MODE_READONLY))) {
|
(get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY))) {
|
||||||
mutex_exit(&zvol_state_lock);
|
error = -EROFS;
|
||||||
return (-EROFS);
|
goto out_open_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
zv->zv_open_count++;
|
zv->zv_open_count++;
|
||||||
|
|
||||||
|
out_open_count:
|
||||||
|
if (zv->zv_open_count == 0)
|
||||||
|
zvol_last_close(zv);
|
||||||
|
|
||||||
|
out_mutex:
|
||||||
mutex_exit(&zvol_state_lock);
|
mutex_exit(&zvol_state_lock);
|
||||||
|
|
||||||
check_disk_change(bdev);
|
check_disk_change(bdev);
|
||||||
|
|
||||||
return (0);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -720,88 +846,14 @@ zvol_release(struct gendisk *disk, fmode_t mode)
|
||||||
ASSERT3P(zv, !=, NULL);
|
ASSERT3P(zv, !=, NULL);
|
||||||
ASSERT3U(zv->zv_open_count, >, 0);
|
ASSERT3U(zv->zv_open_count, >, 0);
|
||||||
zv->zv_open_count--;
|
zv->zv_open_count--;
|
||||||
|
if (zv->zv_open_count == 0)
|
||||||
|
zvol_last_close(zv);
|
||||||
|
|
||||||
mutex_exit(&zvol_state_lock);
|
mutex_exit(&zvol_state_lock);
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
zvol_get_done(dmu_buf_t *db, void *vzgd)
|
|
||||||
{
|
|
||||||
zgd_t *zgd = (zgd_t *)vzgd;
|
|
||||||
rl_t *rl = zgd->zgd_rl;
|
|
||||||
|
|
||||||
dmu_buf_rele(db, vzgd);
|
|
||||||
zfs_range_unlock(rl);
|
|
||||||
zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
|
|
||||||
kmem_free(zgd, sizeof (zgd_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Get data to generate a TX_WRITE intent log record.
|
|
||||||
*/
|
|
||||||
static int
|
|
||||||
zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
|
|
||||||
{
|
|
||||||
zvol_state_t *zv = arg;
|
|
||||||
objset_t *os = zv->zv_objset;
|
|
||||||
dmu_buf_t *db;
|
|
||||||
rl_t *rl;
|
|
||||||
zgd_t *zgd;
|
|
||||||
uint64_t boff; /* block starting offset */
|
|
||||||
int dlen = lr->lr_length; /* length of user data */
|
|
||||||
int error;
|
|
||||||
|
|
||||||
ASSERT(zio);
|
|
||||||
ASSERT(dlen != 0);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Write records come in two flavors: immediate and indirect.
|
|
||||||
* For small writes it's cheaper to store the data with the
|
|
||||||
* log record (immediate); for large writes it's cheaper to
|
|
||||||
* sync the data and get a pointer to it (indirect) so that
|
|
||||||
* we don't have to write the data twice.
|
|
||||||
*/
|
|
||||||
if (buf != NULL) /* immediate write */
|
|
||||||
return (dmu_read(os, ZVOL_OBJ, lr->lr_offset, dlen, buf,
|
|
||||||
DMU_READ_NO_PREFETCH));
|
|
||||||
|
|
||||||
zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP);
|
|
||||||
zgd->zgd_zilog = zv->zv_zilog;
|
|
||||||
zgd->zgd_bp = &lr->lr_blkptr;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Lock the range of the block to ensure that when the data is
|
|
||||||
* written out and its checksum is being calculated that no other
|
|
||||||
* thread can change the block.
|
|
||||||
*/
|
|
||||||
boff = P2ALIGN_TYPED(lr->lr_offset, zv->zv_volblocksize, uint64_t);
|
|
||||||
rl = zfs_range_lock(&zv->zv_znode, boff, zv->zv_volblocksize,
|
|
||||||
RL_READER);
|
|
||||||
zgd->zgd_rl = rl;
|
|
||||||
|
|
||||||
VERIFY3S(dmu_buf_hold(os, ZVOL_OBJ, lr->lr_offset, zgd, &db), ==, 0);
|
|
||||||
error = dmu_sync(zio, db, &lr->lr_blkptr,
|
|
||||||
lr->lr_common.lrc_txg, zvol_get_done, zgd);
|
|
||||||
if (error == 0)
|
|
||||||
zil_add_block(zv->zv_zilog, &lr->lr_blkptr);
|
|
||||||
/*
|
|
||||||
* If we get EINPROGRESS, then we need to wait for a
|
|
||||||
* write IO initiated by dmu_sync() to complete before
|
|
||||||
* we can release this dbuf. We will finish everything
|
|
||||||
* up in the zvol_get_done() callback.
|
|
||||||
*/
|
|
||||||
if (error == EINPROGRESS)
|
|
||||||
return (0);
|
|
||||||
|
|
||||||
dmu_buf_rele(db, zgd);
|
|
||||||
zfs_range_unlock(rl);
|
|
||||||
kmem_free(zgd, sizeof (zgd_t));
|
|
||||||
|
|
||||||
return (error);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
zvol_ioctl(struct block_device *bdev, fmode_t mode,
|
zvol_ioctl(struct block_device *bdev, fmode_t mode,
|
||||||
unsigned int cmd, unsigned long arg)
|
unsigned int cmd, unsigned long arg)
|
||||||
|
@ -1026,9 +1078,7 @@ zvol_create_minor(const char *name)
|
||||||
zvol_state_t *zv;
|
zvol_state_t *zv;
|
||||||
objset_t *os;
|
objset_t *os;
|
||||||
dmu_object_info_t doi;
|
dmu_object_info_t doi;
|
||||||
uint64_t volsize;
|
|
||||||
unsigned minor = 0;
|
unsigned minor = 0;
|
||||||
int ds_mode = DS_MODE_OWNER;
|
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
mutex_enter(&zvol_state_lock);
|
mutex_enter(&zvol_state_lock);
|
||||||
|
@ -1039,57 +1089,40 @@ zvol_create_minor(const char *name)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Snapshot may only be read-only */
|
error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
|
||||||
if (strchr(name, '@') != 0)
|
|
||||||
ds_mode |= DS_MODE_READONLY;
|
|
||||||
|
|
||||||
error = dmu_objset_open(name, DMU_OST_ZVOL, ds_mode, &os);
|
|
||||||
if (error)
|
if (error)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
error = dmu_object_info(os, ZVOL_OBJ, &doi);
|
error = dmu_object_info(os, ZVOL_OBJ, &doi);
|
||||||
if (error)
|
if (error)
|
||||||
goto out_dmu_objset_close;
|
goto out_dmu_objset_disown;
|
||||||
|
|
||||||
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
|
|
||||||
if (error)
|
|
||||||
goto out_dmu_objset_close;
|
|
||||||
|
|
||||||
error = zvol_find_minor(&minor);
|
error = zvol_find_minor(&minor);
|
||||||
if (error)
|
if (error)
|
||||||
goto out_dmu_objset_close;
|
goto out_dmu_objset_disown;
|
||||||
|
|
||||||
zv = zvol_alloc(MKDEV(zvol_major, minor), name);
|
zv = zvol_alloc(MKDEV(zvol_major, minor), name);
|
||||||
if (zv == NULL) {
|
if (zv == NULL) {
|
||||||
error = EAGAIN;
|
error = EAGAIN;
|
||||||
goto out_dmu_objset_close;
|
goto out_dmu_objset_disown;
|
||||||
}
|
}
|
||||||
|
|
||||||
set_disk_ro(zv->zv_disk, !!(ds_mode & DS_MODE_READONLY));
|
if (dmu_objset_is_snapshot(os))
|
||||||
set_capacity(zv->zv_disk, volsize >> 9);
|
zv->zv_flags |= ZVOL_RDONLY;
|
||||||
|
|
||||||
zv->zv_volsize = volsize;
|
|
||||||
zv->zv_volblocksize = doi.doi_data_block_size;
|
zv->zv_volblocksize = doi.doi_data_block_size;
|
||||||
zv->zv_objset = os;
|
|
||||||
zv->zv_mode = ds_mode;
|
if (zil_replay_disable)
|
||||||
zv->zv_zilog = zil_open(os, zvol_get_data);
|
zil_destroy(dmu_objset_zil(os), B_FALSE);
|
||||||
|
else
|
||||||
zil_replay(os, zv, zvol_replay_vector);
|
zil_replay(os, zv, zvol_replay_vector);
|
||||||
|
|
||||||
error = dsl_prop_register(dmu_objset_ds(zv->zv_objset), "readonly",
|
|
||||||
zvol_readonly_changed_cb, zv);
|
|
||||||
if (error)
|
|
||||||
goto out_zvol_alloc;
|
|
||||||
|
|
||||||
zvol_insert(zv);
|
zvol_insert(zv);
|
||||||
mutex_exit(&zvol_state_lock);
|
|
||||||
add_disk(zv->zv_disk);
|
add_disk(zv->zv_disk);
|
||||||
|
error = 0;
|
||||||
|
|
||||||
return 0;
|
out_dmu_objset_disown:
|
||||||
|
dmu_objset_disown(os, zvol_tag);
|
||||||
out_zvol_alloc:
|
|
||||||
zvol_free(zv);
|
|
||||||
out_dmu_objset_close:
|
|
||||||
dmu_objset_close(os);
|
|
||||||
out:
|
out:
|
||||||
mutex_exit(&zvol_state_lock);
|
mutex_exit(&zvol_state_lock);
|
||||||
|
|
||||||
|
@ -1137,14 +1170,6 @@ zvol_remove_minor(const char *name)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
error = dsl_prop_unregister(dmu_objset_ds(zv->zv_objset),
|
|
||||||
"readonly", zvol_readonly_changed_cb, zv);
|
|
||||||
if (error)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
zil_close(zv->zv_zilog);
|
|
||||||
dmu_objset_close(zv->zv_objset);
|
|
||||||
|
|
||||||
zvol_remove(zv);
|
zvol_remove(zv);
|
||||||
zvol_free(zv);
|
zvol_free(zv);
|
||||||
out:
|
out:
|
||||||
|
@ -1158,20 +1183,14 @@ out:
|
||||||
* zvol_fini() which means the module reference count must have
|
* zvol_fini() which means the module reference count must have
|
||||||
* dropped to zero and none of the zvol devices may be open.
|
* dropped to zero and none of the zvol devices may be open.
|
||||||
*/
|
*/
|
||||||
static void
|
void
|
||||||
zvol_remove_minors(void)
|
zvol_remove_minors(const char *name)
|
||||||
{
|
{
|
||||||
zvol_state_t *zv;
|
zvol_state_t *zv;
|
||||||
|
|
||||||
mutex_enter(&zvol_state_lock);
|
mutex_enter(&zvol_state_lock);
|
||||||
while ((zv = list_head(&zvol_state_list)) != NULL) {
|
while ((zv = list_head(&zvol_state_list)) != NULL) {
|
||||||
ASSERT3U(zv->zv_open_count, ==, 0);
|
ASSERT3U(zv->zv_open_count, ==, 0);
|
||||||
|
|
||||||
(void)dsl_prop_unregister(dmu_objset_ds(zv->zv_objset),
|
|
||||||
"readonly", zvol_readonly_changed_cb, zv);
|
|
||||||
zil_close(zv->zv_zilog);
|
|
||||||
dmu_objset_close(zv->zv_objset);
|
|
||||||
|
|
||||||
zvol_remove(zv);
|
zvol_remove(zv);
|
||||||
zvol_free(zv);
|
zvol_free(zv);
|
||||||
}
|
}
|
||||||
|
@ -1215,7 +1234,6 @@ zvol_init(void)
|
||||||
void
|
void
|
||||||
zvol_fini(void)
|
zvol_fini(void)
|
||||||
{
|
{
|
||||||
zvol_remove_minors();
|
|
||||||
blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS);
|
blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS);
|
||||||
unregister_blkdev(zvol_major, ZVOL_DRIVER);
|
unregister_blkdev(zvol_major, ZVOL_DRIVER);
|
||||||
taskq_destroy(zvol_taskq);
|
taskq_destroy(zvol_taskq);
|
||||||
|
|
Loading…
Reference in New Issue