zfs initialize performance enhancements
PROBLEM ======== When invoking "zpool initialize" on a pool the command will create a thread to initialize each disk. Unfortunately, it does this serially across many transaction groups which can result in commands taking a long time to return to the user and may appear hung. The same thing is true when trying to suspend/cancel the operation. SOLUTION ========= This change refactors the way we invoke the initialize interface to ensure we can start or stop the intialization in just a few transaction groups. When stopping or cancelling a vdev initialization perform it in two phases. First signal each vdev initialization thread that it should exit, then after all threads have been signaled wait for them to exit. On a pool with 40 leaf vdevs this reduces the vdev initialize stop/cancel time from ~10 minutes to under a second. The reason for this is spa_vdev_initialize() no longer needs to wait on multiple full TXGs per leaf vdev being stopped. This commit additionally adds some missing checks for the passed "initialize_vdevs" input nvlist. The contents of the user provided input "initialize_vdevs" nvlist must be validated to ensure all values are uint64s. This is done in zfs_ioc_pool_initialize() in order to keep all of these checks in a single location. Updated the innvl and outnvl comments to match the formatting used for all other new sytle ioctls. Reviewed by: Matt Ahrens <mahrens@delphix.com> Reviewed-by: loli10K <ezomori.nozomu@gmail.com> Reviewed-by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: George Wilson <george.wilson@delphix.com> Closes #8230
This commit is contained in:
parent
619f097693
commit
c10d37dd9f
|
@ -6404,7 +6404,14 @@ ztest_initialize(ztest_ds_t *zd, uint64_t id)
|
||||||
spa_config_exit(spa, SCL_VDEV, FTAG);
|
spa_config_exit(spa, SCL_VDEV, FTAG);
|
||||||
|
|
||||||
uint64_t cmd = ztest_random(POOL_INITIALIZE_FUNCS);
|
uint64_t cmd = ztest_random(POOL_INITIALIZE_FUNCS);
|
||||||
error = spa_vdev_initialize(spa, guid, cmd);
|
|
||||||
|
nvlist_t *vdev_guids = fnvlist_alloc();
|
||||||
|
nvlist_t *vdev_errlist = fnvlist_alloc();
|
||||||
|
fnvlist_add_uint64(vdev_guids, path, guid);
|
||||||
|
error = spa_vdev_initialize(spa, vdev_guids, cmd, vdev_errlist);
|
||||||
|
fnvlist_free(vdev_guids);
|
||||||
|
fnvlist_free(vdev_errlist);
|
||||||
|
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
case POOL_INITIALIZE_CANCEL:
|
case POOL_INITIALIZE_CANCEL:
|
||||||
if (ztest_opts.zo_verbose >= 4) {
|
if (ztest_opts.zo_verbose >= 4) {
|
||||||
|
|
|
@ -788,7 +788,8 @@ extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
|
||||||
int replace_done);
|
int replace_done);
|
||||||
extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
|
extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
|
||||||
extern boolean_t spa_vdev_remove_active(spa_t *spa);
|
extern boolean_t spa_vdev_remove_active(spa_t *spa);
|
||||||
extern int spa_vdev_initialize(spa_t *spa, uint64_t guid, uint64_t cmd_type);
|
extern int spa_vdev_initialize(spa_t *spa, nvlist_t *nv, uint64_t cmd_type,
|
||||||
|
nvlist_t *vdev_errlist);
|
||||||
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
|
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
|
||||||
extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
|
extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
|
||||||
extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
|
extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
|
||||||
|
|
|
@ -263,6 +263,7 @@ struct vdev {
|
||||||
|
|
||||||
boolean_t vdev_initialize_exit_wanted;
|
boolean_t vdev_initialize_exit_wanted;
|
||||||
vdev_initializing_state_t vdev_initialize_state;
|
vdev_initializing_state_t vdev_initialize_state;
|
||||||
|
list_node_t vdev_initialize_node;
|
||||||
kthread_t *vdev_initialize_thread;
|
kthread_t *vdev_initialize_thread;
|
||||||
/* Protects vdev_initialize_thread and vdev_initialize_state. */
|
/* Protects vdev_initialize_thread and vdev_initialize_state. */
|
||||||
kmutex_t vdev_initialize_lock;
|
kmutex_t vdev_initialize_lock;
|
||||||
|
|
|
@ -26,15 +26,18 @@
|
||||||
#ifndef _SYS_VDEV_INITIALIZE_H
|
#ifndef _SYS_VDEV_INITIALIZE_H
|
||||||
#define _SYS_VDEV_INITIALIZE_H
|
#define _SYS_VDEV_INITIALIZE_H
|
||||||
|
|
||||||
|
#include <sys/spa.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern void vdev_initialize(vdev_t *vd);
|
extern void vdev_initialize(vdev_t *vd);
|
||||||
extern void vdev_initialize_stop(vdev_t *vd,
|
extern void vdev_initialize_stop(vdev_t *vd,
|
||||||
vdev_initializing_state_t tgt_state);
|
vdev_initializing_state_t tgt_state, list_t *vd_list);
|
||||||
extern void vdev_initialize_stop_all(vdev_t *vd,
|
extern void vdev_initialize_stop_all(vdev_t *vd,
|
||||||
vdev_initializing_state_t tgt_state);
|
vdev_initializing_state_t tgt_state);
|
||||||
|
extern void vdev_initialize_stop_wait(spa_t *spa, list_t *vd_list);
|
||||||
extern void vdev_initialize_restart(vdev_t *vd);
|
extern void vdev_initialize_restart(vdev_t *vd);
|
||||||
extern void vdev_xlate(vdev_t *vd, const range_seg_t *logical_rs,
|
extern void vdev_xlate(vdev_t *vd, const range_seg_t *logical_rs,
|
||||||
range_seg_t *physical_rs);
|
range_seg_t *physical_rs);
|
||||||
|
|
|
@ -6381,32 +6381,24 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
static int
|
||||||
spa_vdev_initialize(spa_t *spa, uint64_t guid, uint64_t cmd_type)
|
spa_vdev_initialize_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type,
|
||||||
|
list_t *vd_list)
|
||||||
{
|
{
|
||||||
/*
|
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||||
* We hold the namespace lock through the whole function
|
|
||||||
* to prevent any changes to the pool while we're starting or
|
|
||||||
* stopping initialization. The config and state locks are held so that
|
|
||||||
* we can properly assess the vdev state before we commit to
|
|
||||||
* the initializing operation.
|
|
||||||
*/
|
|
||||||
mutex_enter(&spa_namespace_lock);
|
|
||||||
spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
|
spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
|
||||||
|
|
||||||
/* Look up vdev and ensure it's a leaf. */
|
/* Look up vdev and ensure it's a leaf. */
|
||||||
vdev_t *vd = spa_lookup_by_guid(spa, guid, B_FALSE);
|
vdev_t *vd = spa_lookup_by_guid(spa, guid, B_FALSE);
|
||||||
if (vd == NULL || vd->vdev_detached) {
|
if (vd == NULL || vd->vdev_detached) {
|
||||||
spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
|
spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
|
||||||
mutex_exit(&spa_namespace_lock);
|
|
||||||
return (SET_ERROR(ENODEV));
|
return (SET_ERROR(ENODEV));
|
||||||
} else if (!vd->vdev_ops->vdev_op_leaf || !vdev_is_concrete(vd)) {
|
} else if (!vd->vdev_ops->vdev_op_leaf || !vdev_is_concrete(vd)) {
|
||||||
spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
|
spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
|
||||||
mutex_exit(&spa_namespace_lock);
|
|
||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
} else if (!vdev_writeable(vd)) {
|
} else if (!vdev_writeable(vd)) {
|
||||||
spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
|
spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
|
||||||
mutex_exit(&spa_namespace_lock);
|
|
||||||
return (SET_ERROR(EROFS));
|
return (SET_ERROR(EROFS));
|
||||||
}
|
}
|
||||||
mutex_enter(&vd->vdev_initialize_lock);
|
mutex_enter(&vd->vdev_initialize_lock);
|
||||||
|
@ -6423,18 +6415,15 @@ spa_vdev_initialize(spa_t *spa, uint64_t guid, uint64_t cmd_type)
|
||||||
(vd->vdev_initialize_thread != NULL ||
|
(vd->vdev_initialize_thread != NULL ||
|
||||||
vd->vdev_top->vdev_removing)) {
|
vd->vdev_top->vdev_removing)) {
|
||||||
mutex_exit(&vd->vdev_initialize_lock);
|
mutex_exit(&vd->vdev_initialize_lock);
|
||||||
mutex_exit(&spa_namespace_lock);
|
|
||||||
return (SET_ERROR(EBUSY));
|
return (SET_ERROR(EBUSY));
|
||||||
} else if (cmd_type == POOL_INITIALIZE_CANCEL &&
|
} else if (cmd_type == POOL_INITIALIZE_CANCEL &&
|
||||||
(vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE &&
|
(vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE &&
|
||||||
vd->vdev_initialize_state != VDEV_INITIALIZE_SUSPENDED)) {
|
vd->vdev_initialize_state != VDEV_INITIALIZE_SUSPENDED)) {
|
||||||
mutex_exit(&vd->vdev_initialize_lock);
|
mutex_exit(&vd->vdev_initialize_lock);
|
||||||
mutex_exit(&spa_namespace_lock);
|
|
||||||
return (SET_ERROR(ESRCH));
|
return (SET_ERROR(ESRCH));
|
||||||
} else if (cmd_type == POOL_INITIALIZE_SUSPEND &&
|
} else if (cmd_type == POOL_INITIALIZE_SUSPEND &&
|
||||||
vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE) {
|
vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE) {
|
||||||
mutex_exit(&vd->vdev_initialize_lock);
|
mutex_exit(&vd->vdev_initialize_lock);
|
||||||
mutex_exit(&spa_namespace_lock);
|
|
||||||
return (SET_ERROR(ESRCH));
|
return (SET_ERROR(ESRCH));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6443,23 +6432,65 @@ spa_vdev_initialize(spa_t *spa, uint64_t guid, uint64_t cmd_type)
|
||||||
vdev_initialize(vd);
|
vdev_initialize(vd);
|
||||||
break;
|
break;
|
||||||
case POOL_INITIALIZE_CANCEL:
|
case POOL_INITIALIZE_CANCEL:
|
||||||
vdev_initialize_stop(vd, VDEV_INITIALIZE_CANCELED);
|
vdev_initialize_stop(vd, VDEV_INITIALIZE_CANCELED, vd_list);
|
||||||
break;
|
break;
|
||||||
case POOL_INITIALIZE_SUSPEND:
|
case POOL_INITIALIZE_SUSPEND:
|
||||||
vdev_initialize_stop(vd, VDEV_INITIALIZE_SUSPENDED);
|
vdev_initialize_stop(vd, VDEV_INITIALIZE_SUSPENDED, vd_list);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
panic("invalid cmd_type %llu", (unsigned long long)cmd_type);
|
panic("invalid cmd_type %llu", (unsigned long long)cmd_type);
|
||||||
}
|
}
|
||||||
mutex_exit(&vd->vdev_initialize_lock);
|
mutex_exit(&vd->vdev_initialize_lock);
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
spa_vdev_initialize(spa_t *spa, nvlist_t *nv, uint64_t cmd_type,
|
||||||
|
nvlist_t *vdev_errlist)
|
||||||
|
{
|
||||||
|
int total_errors = 0;
|
||||||
|
list_t vd_list;
|
||||||
|
|
||||||
|
list_create(&vd_list, sizeof (vdev_t),
|
||||||
|
offsetof(vdev_t, vdev_initialize_node));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We hold the namespace lock through the whole function
|
||||||
|
* to prevent any changes to the pool while we're starting or
|
||||||
|
* stopping initialization. The config and state locks are held so that
|
||||||
|
* we can properly assess the vdev state before we commit to
|
||||||
|
* the initializing operation.
|
||||||
|
*/
|
||||||
|
mutex_enter(&spa_namespace_lock);
|
||||||
|
|
||||||
|
for (nvpair_t *pair = nvlist_next_nvpair(nv, NULL);
|
||||||
|
pair != NULL; pair = nvlist_next_nvpair(nv, pair)) {
|
||||||
|
uint64_t vdev_guid = fnvpair_value_uint64(pair);
|
||||||
|
|
||||||
|
int error = spa_vdev_initialize_impl(spa, vdev_guid, cmd_type,
|
||||||
|
&vd_list);
|
||||||
|
if (error != 0) {
|
||||||
|
char guid_as_str[MAXNAMELEN];
|
||||||
|
|
||||||
|
(void) snprintf(guid_as_str, sizeof (guid_as_str),
|
||||||
|
"%llu", (unsigned long long)vdev_guid);
|
||||||
|
fnvlist_add_int64(vdev_errlist, guid_as_str, error);
|
||||||
|
total_errors++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for all initialize threads to stop. */
|
||||||
|
vdev_initialize_stop_wait(spa, &vd_list);
|
||||||
|
|
||||||
/* Sync out the initializing state */
|
/* Sync out the initializing state */
|
||||||
txg_wait_synced(spa->spa_dsl_pool, 0);
|
txg_wait_synced(spa->spa_dsl_pool, 0);
|
||||||
mutex_exit(&spa_namespace_lock);
|
mutex_exit(&spa_namespace_lock);
|
||||||
|
|
||||||
return (0);
|
list_destroy(&vd_list);
|
||||||
}
|
|
||||||
|
|
||||||
|
return (total_errors);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Split a set of devices from their mirrors, and create a new pool from them.
|
* Split a set of devices from their mirrors, and create a new pool from them.
|
||||||
|
@ -6669,18 +6700,25 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
|
||||||
spa_activate(newspa, spa_mode_global);
|
spa_activate(newspa, spa_mode_global);
|
||||||
spa_async_suspend(newspa);
|
spa_async_suspend(newspa);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Temporarily stop the initializing activity. We set the state to
|
||||||
|
* ACTIVE so that we know to resume the initializing once the split
|
||||||
|
* has completed.
|
||||||
|
*/
|
||||||
|
list_t vd_list;
|
||||||
|
list_create(&vd_list, sizeof (vdev_t),
|
||||||
|
offsetof(vdev_t, vdev_initialize_node));
|
||||||
|
|
||||||
for (c = 0; c < children; c++) {
|
for (c = 0; c < children; c++) {
|
||||||
if (vml[c] != NULL) {
|
if (vml[c] != NULL) {
|
||||||
/*
|
|
||||||
* Temporarily stop the initializing activity. We set
|
|
||||||
* the state to ACTIVE so that we know to resume
|
|
||||||
* the initializing once the split has completed.
|
|
||||||
*/
|
|
||||||
mutex_enter(&vml[c]->vdev_initialize_lock);
|
mutex_enter(&vml[c]->vdev_initialize_lock);
|
||||||
vdev_initialize_stop(vml[c], VDEV_INITIALIZE_ACTIVE);
|
vdev_initialize_stop(vml[c], VDEV_INITIALIZE_ACTIVE,
|
||||||
|
&vd_list);
|
||||||
mutex_exit(&vml[c]->vdev_initialize_lock);
|
mutex_exit(&vml[c]->vdev_initialize_lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
vdev_initialize_stop_wait(spa, &vd_list);
|
||||||
|
list_destroy(&vd_list);
|
||||||
|
|
||||||
newspa->spa_config_source = SPA_CONFIG_SRC_SPLIT;
|
newspa->spa_config_source = SPA_CONFIG_SRC_SPLIT;
|
||||||
|
|
||||||
|
|
|
@ -1197,7 +1197,8 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
|
||||||
ASSERT(!vd->vdev_detached || vd->vdev_dtl_sm == NULL);
|
ASSERT(!vd->vdev_detached || vd->vdev_dtl_sm == NULL);
|
||||||
if (vd->vdev_ops->vdev_op_leaf) {
|
if (vd->vdev_ops->vdev_op_leaf) {
|
||||||
mutex_enter(&vd->vdev_initialize_lock);
|
mutex_enter(&vd->vdev_initialize_lock);
|
||||||
vdev_initialize_stop(vd, VDEV_INITIALIZE_CANCELED);
|
vdev_initialize_stop(vd, VDEV_INITIALIZE_CANCELED,
|
||||||
|
NULL);
|
||||||
mutex_exit(&vd->vdev_initialize_lock);
|
mutex_exit(&vd->vdev_initialize_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -530,6 +530,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
||||||
|
|
||||||
list_link_init(&vd->vdev_config_dirty_node);
|
list_link_init(&vd->vdev_config_dirty_node);
|
||||||
list_link_init(&vd->vdev_state_dirty_node);
|
list_link_init(&vd->vdev_state_dirty_node);
|
||||||
|
list_link_init(&vd->vdev_initialize_node);
|
||||||
mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_NOLOCKDEP, NULL);
|
mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_NOLOCKDEP, NULL);
|
||||||
mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
|
mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL);
|
mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
|
|
|
@ -702,18 +702,51 @@ vdev_initialize(vdev_t *vd)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Stop initializng a device, with the resultant initialing state being
|
* Wait for the initialize thread to be terminated (cancelled or stopped).
|
||||||
* tgt_state. Blocks until the initializing thread has exited.
|
*/
|
||||||
* Caller must hold vdev_initialize_lock and must not be writing to the spa
|
static void
|
||||||
* config, as the initializing thread may try to enter the config as a reader
|
vdev_initialize_stop_wait_impl(vdev_t *vd)
|
||||||
* before exiting.
|
{
|
||||||
|
ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
|
||||||
|
|
||||||
|
while (vd->vdev_initialize_thread != NULL)
|
||||||
|
cv_wait(&vd->vdev_initialize_cv, &vd->vdev_initialize_lock);
|
||||||
|
|
||||||
|
ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
|
||||||
|
vd->vdev_initialize_exit_wanted = B_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Wait for vdev initialize threads which were either to cleanly exit.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
vdev_initialize_stop(vdev_t *vd, vdev_initializing_state_t tgt_state)
|
vdev_initialize_stop_wait(spa_t *spa, list_t *vd_list)
|
||||||
{
|
{
|
||||||
ASSERTV(spa_t *spa = vd->vdev_spa);
|
vdev_t *vd;
|
||||||
ASSERT(!spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_WRITER));
|
|
||||||
|
|
||||||
|
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||||
|
|
||||||
|
while ((vd = list_remove_head(vd_list)) != NULL) {
|
||||||
|
mutex_enter(&vd->vdev_initialize_lock);
|
||||||
|
vdev_initialize_stop_wait_impl(vd);
|
||||||
|
mutex_exit(&vd->vdev_initialize_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Stop initializing a device, with the resultant initialing state being
|
||||||
|
* tgt_state. For blocking behavior pass NULL for vd_list. Otherwise, when
|
||||||
|
* a list_t is provided the stopping vdev is inserted in to the list. Callers
|
||||||
|
* are then required to call vdev_initialize_stop_wait() to block for all the
|
||||||
|
* initialization threads to exit. The caller must hold vdev_initialize_lock
|
||||||
|
* and must not be writing to the spa config, as the initializing thread may
|
||||||
|
* try to enter the config as a reader before exiting.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
vdev_initialize_stop(vdev_t *vd, vdev_initializing_state_t tgt_state,
|
||||||
|
list_t *vd_list)
|
||||||
|
{
|
||||||
|
ASSERT(!spa_config_held(vd->vdev_spa, SCL_CONFIG|SCL_STATE, RW_WRITER));
|
||||||
ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
|
ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
|
||||||
ASSERT(vd->vdev_ops->vdev_op_leaf);
|
ASSERT(vd->vdev_ops->vdev_op_leaf);
|
||||||
ASSERT(vdev_is_concrete(vd));
|
ASSERT(vdev_is_concrete(vd));
|
||||||
|
@ -729,25 +762,29 @@ vdev_initialize_stop(vdev_t *vd, vdev_initializing_state_t tgt_state)
|
||||||
|
|
||||||
vdev_initialize_change_state(vd, tgt_state);
|
vdev_initialize_change_state(vd, tgt_state);
|
||||||
vd->vdev_initialize_exit_wanted = B_TRUE;
|
vd->vdev_initialize_exit_wanted = B_TRUE;
|
||||||
while (vd->vdev_initialize_thread != NULL)
|
|
||||||
cv_wait(&vd->vdev_initialize_cv, &vd->vdev_initialize_lock);
|
|
||||||
|
|
||||||
ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
|
if (vd_list == NULL) {
|
||||||
vd->vdev_initialize_exit_wanted = B_FALSE;
|
vdev_initialize_stop_wait_impl(vd);
|
||||||
|
} else {
|
||||||
|
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||||
|
list_insert_tail(vd_list, vd);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
vdev_initialize_stop_all_impl(vdev_t *vd, vdev_initializing_state_t tgt_state)
|
vdev_initialize_stop_all_impl(vdev_t *vd, vdev_initializing_state_t tgt_state,
|
||||||
|
list_t *vd_list)
|
||||||
{
|
{
|
||||||
if (vd->vdev_ops->vdev_op_leaf && vdev_is_concrete(vd)) {
|
if (vd->vdev_ops->vdev_op_leaf && vdev_is_concrete(vd)) {
|
||||||
mutex_enter(&vd->vdev_initialize_lock);
|
mutex_enter(&vd->vdev_initialize_lock);
|
||||||
vdev_initialize_stop(vd, tgt_state);
|
vdev_initialize_stop(vd, tgt_state, vd_list);
|
||||||
mutex_exit(&vd->vdev_initialize_lock);
|
mutex_exit(&vd->vdev_initialize_lock);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint64_t i = 0; i < vd->vdev_children; i++) {
|
for (uint64_t i = 0; i < vd->vdev_children; i++) {
|
||||||
vdev_initialize_stop_all_impl(vd->vdev_child[i], tgt_state);
|
vdev_initialize_stop_all_impl(vd->vdev_child[i], tgt_state,
|
||||||
|
vd_list);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -758,12 +795,23 @@ vdev_initialize_stop_all_impl(vdev_t *vd, vdev_initializing_state_t tgt_state)
|
||||||
void
|
void
|
||||||
vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state)
|
vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state)
|
||||||
{
|
{
|
||||||
vdev_initialize_stop_all_impl(vd, tgt_state);
|
spa_t *spa = vd->vdev_spa;
|
||||||
|
list_t vd_list;
|
||||||
|
|
||||||
|
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||||
|
|
||||||
|
list_create(&vd_list, sizeof (vdev_t),
|
||||||
|
offsetof(vdev_t, vdev_initialize_node));
|
||||||
|
|
||||||
|
vdev_initialize_stop_all_impl(vd, tgt_state, &vd_list);
|
||||||
|
vdev_initialize_stop_wait(spa, &vd_list);
|
||||||
|
|
||||||
if (vd->vdev_spa->spa_sync_on) {
|
if (vd->vdev_spa->spa_sync_on) {
|
||||||
/* Make sure that our state has been synced to disk */
|
/* Make sure that our state has been synced to disk */
|
||||||
txg_wait_synced(spa_get_dsl(vd->vdev_spa), 0);
|
txg_wait_synced(spa_get_dsl(vd->vdev_spa), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
list_destroy(&vd_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -808,9 +856,10 @@ vdev_initialize_restart(vdev_t *vd)
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
EXPORT_SYMBOL(vdev_initialize_restart);
|
EXPORT_SYMBOL(vdev_initialize_restart);
|
||||||
EXPORT_SYMBOL(vdev_xlate);
|
EXPORT_SYMBOL(vdev_xlate);
|
||||||
EXPORT_SYMBOL(vdev_initialize_stop_all);
|
|
||||||
EXPORT_SYMBOL(vdev_initialize);
|
EXPORT_SYMBOL(vdev_initialize);
|
||||||
EXPORT_SYMBOL(vdev_initialize_stop);
|
EXPORT_SYMBOL(vdev_initialize_stop);
|
||||||
|
EXPORT_SYMBOL(vdev_initialize_stop_all);
|
||||||
|
EXPORT_SYMBOL(vdev_initialize_stop_wait);
|
||||||
|
|
||||||
/* CSTYLED */
|
/* CSTYLED */
|
||||||
module_param(zfs_initialize_value, ulong, 0644);
|
module_param(zfs_initialize_value, ulong, 0644);
|
||||||
|
|
|
@ -1899,7 +1899,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
|
||||||
spa_vdev_config_exit(spa, NULL, *txg, 0, FTAG);
|
spa_vdev_config_exit(spa, NULL, *txg, 0, FTAG);
|
||||||
|
|
||||||
/* Stop initializing */
|
/* Stop initializing */
|
||||||
(void) vdev_initialize_stop_all(vd, VDEV_INITIALIZE_CANCELED);
|
vdev_initialize_stop_all(vd, VDEV_INITIALIZE_CANCELED);
|
||||||
|
|
||||||
*txg = spa_vdev_config_enter(spa);
|
*txg = spa_vdev_config_enter(spa);
|
||||||
|
|
||||||
|
|
|
@ -3846,73 +3846,68 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* innvl: {
|
* innvl: {
|
||||||
* vdevs: {
|
* "initialize_command" -> POOL_INITIALIZE_{CANCEL|DO|SUSPEND} (uint64)
|
||||||
* guid 1, guid 2, ...
|
* "initialize_vdevs": { -> guids to initialize (nvlist)
|
||||||
|
* "vdev_path_1": vdev_guid_1, (uint64),
|
||||||
|
* "vdev_path_2": vdev_guid_2, (uint64),
|
||||||
|
* ...
|
||||||
* },
|
* },
|
||||||
* func: POOL_INITIALIZE_{CANCEL|DO|SUSPEND}
|
|
||||||
* }
|
* }
|
||||||
*
|
*
|
||||||
* outnvl: {
|
* outnvl: {
|
||||||
* [func: EINVAL (if provided command type didn't make sense)],
|
* "initialize_vdevs": { -> initialization errors (nvlist)
|
||||||
* [vdevs: {
|
* "vdev_path_1": errno, see function body for possible errnos (uint64)
|
||||||
* guid1: errno, (see function body for possible errnos)
|
* "vdev_path_2": errno, ... (uint64)
|
||||||
* ...
|
* ...
|
||||||
* }]
|
* }
|
||||||
* }
|
* }
|
||||||
*
|
*
|
||||||
|
* EINVAL is returned for an unknown commands or if any of the provided vdev
|
||||||
|
* guids have be specified with a type other than uint64.
|
||||||
*/
|
*/
|
||||||
static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
|
static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
|
||||||
{ZPOOL_INITIALIZE_COMMAND, DATA_TYPE_UINT64, 0},
|
{ZPOOL_INITIALIZE_COMMAND, DATA_TYPE_UINT64, 0},
|
||||||
{ZPOOL_INITIALIZE_VDEVS, DATA_TYPE_NVLIST, 0}
|
{ZPOOL_INITIALIZE_VDEVS, DATA_TYPE_NVLIST, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
static int
|
static int
|
||||||
zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
|
zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||||
{
|
{
|
||||||
spa_t *spa;
|
|
||||||
int error;
|
|
||||||
|
|
||||||
error = spa_open(poolname, &spa, FTAG);
|
|
||||||
if (error != 0)
|
|
||||||
return (error);
|
|
||||||
|
|
||||||
uint64_t cmd_type;
|
uint64_t cmd_type;
|
||||||
if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
|
if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
|
||||||
&cmd_type) != 0) {
|
&cmd_type) != 0) {
|
||||||
spa_close(spa, FTAG);
|
|
||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
|
if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
|
||||||
cmd_type == POOL_INITIALIZE_DO ||
|
cmd_type == POOL_INITIALIZE_DO ||
|
||||||
cmd_type == POOL_INITIALIZE_SUSPEND)) {
|
cmd_type == POOL_INITIALIZE_SUSPEND)) {
|
||||||
spa_close(spa, FTAG);
|
|
||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
}
|
}
|
||||||
|
|
||||||
nvlist_t *vdev_guids;
|
nvlist_t *vdev_guids;
|
||||||
if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
|
if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
|
||||||
&vdev_guids) != 0) {
|
&vdev_guids) != 0) {
|
||||||
spa_close(spa, FTAG);
|
|
||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
}
|
}
|
||||||
|
|
||||||
nvlist_t *vdev_errlist = fnvlist_alloc();
|
|
||||||
int total_errors = 0;
|
|
||||||
|
|
||||||
for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
|
for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
|
||||||
pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
|
pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
|
||||||
uint64_t vdev_guid = fnvpair_value_uint64(pair);
|
uint64_t vdev_guid;
|
||||||
|
if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
|
||||||
error = spa_vdev_initialize(spa, vdev_guid, cmd_type);
|
return (SET_ERROR(EINVAL));
|
||||||
if (error != 0) {
|
|
||||||
char guid_as_str[MAXNAMELEN];
|
|
||||||
|
|
||||||
(void) snprintf(guid_as_str, sizeof (guid_as_str),
|
|
||||||
"%llu", (unsigned long long)vdev_guid);
|
|
||||||
fnvlist_add_int64(vdev_errlist, guid_as_str, error);
|
|
||||||
total_errors++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spa_t *spa;
|
||||||
|
int error = spa_open(poolname, &spa, FTAG);
|
||||||
|
if (error != 0)
|
||||||
|
return (error);
|
||||||
|
|
||||||
|
nvlist_t *vdev_errlist = fnvlist_alloc();
|
||||||
|
int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
|
||||||
|
vdev_errlist);
|
||||||
|
|
||||||
if (fnvlist_size(vdev_errlist) > 0) {
|
if (fnvlist_size(vdev_errlist) > 0) {
|
||||||
fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
|
fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
|
||||||
vdev_errlist);
|
vdev_errlist);
|
||||||
|
|
Loading…
Reference in New Issue