Race condition between spa async threads and export
In the past we've seen multiple race conditions that have to do with open-context threads async threads and concurrent calls to spa_export()/spa_destroy() (including the one referenced in issue #9015). This patch ensures that only one thread can execute the main body of spa_export_common() at a time, with subsequent threads returning with a new error code created just for this situation, eliminating this way any race condition bugs introduced by concurrent calls to this function. Reviewed by: Matt Ahrens <matt@delphix.com> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Serapheim Dimitropoulos <serapheim@delphix.com> Closes #9015 Closes #9044
This commit is contained in:
parent
1c44a5c97f
commit
43a8536260
|
@ -2719,8 +2719,24 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
|
|||
VERIFY3U(EEXIST, ==,
|
||||
spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL));
|
||||
nvlist_free(nvroot);
|
||||
|
||||
/*
|
||||
* We open a reference to the spa and then we try to export it
|
||||
* expecting one of the following errors:
|
||||
*
|
||||
* EBUSY
|
||||
* Because of the reference we just opened.
|
||||
*
|
||||
* ZFS_ERR_EXPORT_IN_PROGRESS
|
||||
* For the case that there is another ztest thread doing
|
||||
* an export concurrently.
|
||||
*/
|
||||
VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
|
||||
VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
|
||||
int error = spa_destroy(zo->zo_pool);
|
||||
if (error != EBUSY && error != ZFS_ERR_EXPORT_IN_PROGRESS) {
|
||||
fatal(0, "spa_destroy(%s) returned unexpected value %d",
|
||||
spa->spa_name, error);
|
||||
}
|
||||
spa_close(spa, FTAG);
|
||||
|
||||
(void) pthread_rwlock_unlock(&ztest_name_lock);
|
||||
|
|
|
@ -147,6 +147,7 @@ typedef enum zfs_error {
|
|||
EZFS_NO_TRIM, /* no active trim */
|
||||
EZFS_TRIM_NOTSUP, /* device does not support trim */
|
||||
EZFS_NO_RESILVER_DEFER, /* pool doesn't support resilver_defer */
|
||||
EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */
|
||||
EZFS_UNKNOWN
|
||||
} zfs_error_t;
|
||||
|
||||
|
|
|
@ -1324,6 +1324,7 @@ typedef enum {
|
|||
ZFS_ERR_FROM_IVSET_GUID_MISMATCH,
|
||||
ZFS_ERR_SPILL_BLOCK_FLAG_MISSING,
|
||||
ZFS_ERR_UNKNOWN_SEND_STREAM_FEATURE,
|
||||
ZFS_ERR_EXPORT_IN_PROGRESS,
|
||||
} zfs_errno_t;
|
||||
|
||||
/*
|
||||
|
|
|
@ -220,6 +220,7 @@ struct spa {
|
|||
spa_taskqs_t spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
|
||||
dsl_pool_t *spa_dsl_pool;
|
||||
boolean_t spa_is_initializing; /* true while opening pool */
|
||||
boolean_t spa_is_exporting; /* true while exporting pool */
|
||||
metaslab_class_t *spa_normal_class; /* normal data class */
|
||||
metaslab_class_t *spa_log_class; /* intent log data class */
|
||||
metaslab_class_t *spa_special_class; /* special allocation class */
|
||||
|
|
|
@ -303,6 +303,8 @@ libzfs_error_description(libzfs_handle_t *hdl)
|
|||
case EZFS_NO_RESILVER_DEFER:
|
||||
return (dgettext(TEXT_DOMAIN, "this action requires the "
|
||||
"resilver_defer feature"));
|
||||
case EZFS_EXPORT_IN_PROGRESS:
|
||||
return (dgettext(TEXT_DOMAIN, "pool export in progress"));
|
||||
case EZFS_UNKNOWN:
|
||||
return (dgettext(TEXT_DOMAIN, "unknown error"));
|
||||
default:
|
||||
|
@ -599,6 +601,9 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
|
|||
case ZFS_ERR_VDEV_TOO_BIG:
|
||||
zfs_verror(hdl, EZFS_VDEV_TOO_BIG, fmt, ap);
|
||||
break;
|
||||
case ZFS_ERR_EXPORT_IN_PROGRESS:
|
||||
zfs_verror(hdl, EZFS_EXPORT_IN_PROGRESS, fmt, ap);
|
||||
break;
|
||||
case ZFS_ERR_IOC_CMD_UNAVAIL:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs "
|
||||
"module does not support this operation. A reboot may "
|
||||
|
|
|
@ -5790,6 +5790,13 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
|
|||
return (SET_ERROR(ENOENT));
|
||||
}
|
||||
|
||||
if (spa->spa_is_exporting) {
|
||||
/* the pool is being exported by another thread */
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
return (SET_ERROR(ZFS_ERR_EXPORT_IN_PROGRESS));
|
||||
}
|
||||
spa->spa_is_exporting = B_TRUE;
|
||||
|
||||
/*
|
||||
* Put a hold on the pool, drop the namespace lock, stop async tasks,
|
||||
* reacquire the namespace lock, and see if we can export.
|
||||
|
@ -5825,6 +5832,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
|
|||
(spa->spa_inject_ref != 0 &&
|
||||
new_state != POOL_STATE_UNINITIALIZED)) {
|
||||
spa_async_resume(spa);
|
||||
spa->spa_is_exporting = B_FALSE;
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
return (SET_ERROR(EBUSY));
|
||||
}
|
||||
|
@ -5839,6 +5847,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
|
|||
if (!force && new_state == POOL_STATE_EXPORTED &&
|
||||
spa_has_active_shared_spare(spa)) {
|
||||
spa_async_resume(spa);
|
||||
spa->spa_is_exporting = B_FALSE;
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
return (SET_ERROR(EXDEV));
|
||||
}
|
||||
|
@ -5890,9 +5899,16 @@ export_spa:
|
|||
if (!hardforce)
|
||||
spa_write_cachefile(spa, B_TRUE, B_TRUE);
|
||||
spa_remove(spa);
|
||||
} else {
|
||||
/*
|
||||
* If spa_remove() is not called for this spa_t and
|
||||
* there is any possibility that it can be reused,
|
||||
* we make sure to reset the exporting flag.
|
||||
*/
|
||||
spa->spa_is_exporting = B_FALSE;
|
||||
}
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue