Prevent race in blkptr_verify against device removal
When we check the vdev of the blkptr in zfs_blkptr_verify, we can run into a race condition where that vdev is temporarily unavailable. This happens when a device removal operation and the old vdev_t has been removed from the array, but the new indirect vdev has not yet been inserted. We hold the spa_config_lock while doing our sensitive verification. To ensure that we don't deadlock, we only grab the lock if we don't have config_writer held. In addition, I had to const the tags of the refcounts and the spa_config_lock arguments. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com> Signed-off-by: Paul Dagnelie <pcd@delphix.com> Closes #9112
This commit is contained in:
parent
8e556c5ebc
commit
dc04a8c757
|
@ -44,7 +44,7 @@ extern "C" {
|
||||||
#ifdef ZFS_DEBUG
|
#ifdef ZFS_DEBUG
|
||||||
typedef struct reference {
|
typedef struct reference {
|
||||||
list_node_t ref_link;
|
list_node_t ref_link;
|
||||||
void *ref_holder;
|
const void *ref_holder;
|
||||||
uint64_t ref_number;
|
uint64_t ref_number;
|
||||||
uint8_t *ref_removed;
|
uint8_t *ref_removed;
|
||||||
} reference_t;
|
} reference_t;
|
||||||
|
@ -70,16 +70,17 @@ void zfs_refcount_destroy(zfs_refcount_t *);
|
||||||
void zfs_refcount_destroy_many(zfs_refcount_t *, uint64_t);
|
void zfs_refcount_destroy_many(zfs_refcount_t *, uint64_t);
|
||||||
int zfs_refcount_is_zero(zfs_refcount_t *);
|
int zfs_refcount_is_zero(zfs_refcount_t *);
|
||||||
int64_t zfs_refcount_count(zfs_refcount_t *);
|
int64_t zfs_refcount_count(zfs_refcount_t *);
|
||||||
int64_t zfs_refcount_add(zfs_refcount_t *, void *);
|
int64_t zfs_refcount_add(zfs_refcount_t *, const void *);
|
||||||
int64_t zfs_refcount_remove(zfs_refcount_t *, void *);
|
int64_t zfs_refcount_remove(zfs_refcount_t *, const void *);
|
||||||
int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, void *);
|
int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, const void *);
|
||||||
int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, void *);
|
int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, const void *);
|
||||||
void zfs_refcount_transfer(zfs_refcount_t *, zfs_refcount_t *);
|
void zfs_refcount_transfer(zfs_refcount_t *, zfs_refcount_t *);
|
||||||
void zfs_refcount_transfer_ownership(zfs_refcount_t *, void *, void *);
|
void zfs_refcount_transfer_ownership(zfs_refcount_t *, const void *,
|
||||||
|
const void *);
|
||||||
void zfs_refcount_transfer_ownership_many(zfs_refcount_t *, uint64_t,
|
void zfs_refcount_transfer_ownership_many(zfs_refcount_t *, uint64_t,
|
||||||
void *, void *);
|
const void *, const void *);
|
||||||
boolean_t zfs_refcount_held(zfs_refcount_t *, void *);
|
boolean_t zfs_refcount_held(zfs_refcount_t *, const void *);
|
||||||
boolean_t zfs_refcount_not_held(zfs_refcount_t *, void *);
|
boolean_t zfs_refcount_not_held(zfs_refcount_t *, const void *);
|
||||||
|
|
||||||
void zfs_refcount_init(void);
|
void zfs_refcount_init(void);
|
||||||
void zfs_refcount_fini(void);
|
void zfs_refcount_fini(void);
|
||||||
|
|
|
@ -1007,8 +1007,8 @@ extern int spa_import_progress_set_state(uint64_t pool_guid,
|
||||||
|
|
||||||
/* Pool configuration locks */
|
/* Pool configuration locks */
|
||||||
extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
|
extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
|
||||||
extern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw);
|
extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw);
|
||||||
extern void spa_config_exit(spa_t *spa, int locks, void *tag);
|
extern void spa_config_exit(spa_t *spa, int locks, const void *tag);
|
||||||
extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
|
extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
|
||||||
|
|
||||||
/* Pool vdev add/remove lock */
|
/* Pool vdev add/remove lock */
|
||||||
|
@ -1123,7 +1123,6 @@ extern boolean_t spa_has_checkpoint(spa_t *spa);
|
||||||
extern boolean_t spa_importing_readonly_checkpoint(spa_t *spa);
|
extern boolean_t spa_importing_readonly_checkpoint(spa_t *spa);
|
||||||
extern boolean_t spa_suspend_async_destroy(spa_t *spa);
|
extern boolean_t spa_suspend_async_destroy(spa_t *spa);
|
||||||
extern uint64_t spa_min_claim_txg(spa_t *spa);
|
extern uint64_t spa_min_claim_txg(spa_t *spa);
|
||||||
extern void zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp);
|
|
||||||
extern boolean_t zfs_dva_valid(spa_t *spa, const dva_t *dva,
|
extern boolean_t zfs_dva_valid(spa_t *spa, const dva_t *dva,
|
||||||
const blkptr_t *bp);
|
const blkptr_t *bp);
|
||||||
typedef void (*spa_remap_cb_t)(uint64_t vdev, uint64_t offset, uint64_t size,
|
typedef void (*spa_remap_cb_t)(uint64_t vdev, uint64_t offset, uint64_t size,
|
||||||
|
|
|
@ -121,7 +121,7 @@ zfs_refcount_count(zfs_refcount_t *rc)
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, void *holder)
|
zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, const void *holder)
|
||||||
{
|
{
|
||||||
reference_t *ref = NULL;
|
reference_t *ref = NULL;
|
||||||
int64_t count;
|
int64_t count;
|
||||||
|
@ -143,13 +143,14 @@ zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, void *holder)
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
zfs_refcount_add(zfs_refcount_t *rc, void *holder)
|
zfs_refcount_add(zfs_refcount_t *rc, const void *holder)
|
||||||
{
|
{
|
||||||
return (zfs_refcount_add_many(rc, 1, holder));
|
return (zfs_refcount_add_many(rc, 1, holder));
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, void *holder)
|
zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number,
|
||||||
|
const void *holder)
|
||||||
{
|
{
|
||||||
reference_t *ref;
|
reference_t *ref;
|
||||||
int64_t count;
|
int64_t count;
|
||||||
|
@ -197,7 +198,7 @@ zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, void *holder)
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
zfs_refcount_remove(zfs_refcount_t *rc, void *holder)
|
zfs_refcount_remove(zfs_refcount_t *rc, const void *holder)
|
||||||
{
|
{
|
||||||
return (zfs_refcount_remove_many(rc, 1, holder));
|
return (zfs_refcount_remove_many(rc, 1, holder));
|
||||||
}
|
}
|
||||||
|
@ -235,7 +236,7 @@ zfs_refcount_transfer(zfs_refcount_t *dst, zfs_refcount_t *src)
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number,
|
zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number,
|
||||||
void *current_holder, void *new_holder)
|
const void *current_holder, const void *new_holder)
|
||||||
{
|
{
|
||||||
reference_t *ref;
|
reference_t *ref;
|
||||||
boolean_t found = B_FALSE;
|
boolean_t found = B_FALSE;
|
||||||
|
@ -260,8 +261,8 @@ zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number,
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder,
|
zfs_refcount_transfer_ownership(zfs_refcount_t *rc, const void *current_holder,
|
||||||
void *new_holder)
|
const void *new_holder)
|
||||||
{
|
{
|
||||||
return (zfs_refcount_transfer_ownership_many(rc, 1, current_holder,
|
return (zfs_refcount_transfer_ownership_many(rc, 1, current_holder,
|
||||||
new_holder));
|
new_holder));
|
||||||
|
@ -273,7 +274,7 @@ zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder,
|
||||||
* might be held.
|
* might be held.
|
||||||
*/
|
*/
|
||||||
boolean_t
|
boolean_t
|
||||||
zfs_refcount_held(zfs_refcount_t *rc, void *holder)
|
zfs_refcount_held(zfs_refcount_t *rc, const void *holder)
|
||||||
{
|
{
|
||||||
reference_t *ref;
|
reference_t *ref;
|
||||||
|
|
||||||
|
@ -301,7 +302,7 @@ zfs_refcount_held(zfs_refcount_t *rc, void *holder)
|
||||||
* since the reference might not be held.
|
* since the reference might not be held.
|
||||||
*/
|
*/
|
||||||
boolean_t
|
boolean_t
|
||||||
zfs_refcount_not_held(zfs_refcount_t *rc, void *holder)
|
zfs_refcount_not_held(zfs_refcount_t *rc, const void *holder)
|
||||||
{
|
{
|
||||||
reference_t *ref;
|
reference_t *ref;
|
||||||
|
|
||||||
|
|
|
@ -484,7 +484,7 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw)
|
spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
|
||||||
{
|
{
|
||||||
int wlocks_held = 0;
|
int wlocks_held = 0;
|
||||||
|
|
||||||
|
@ -517,7 +517,7 @@ spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
spa_config_exit(spa_t *spa, int locks, void *tag)
|
spa_config_exit(spa_t *spa, int locks, const void *tag)
|
||||||
{
|
{
|
||||||
for (int i = SCL_LOCKS - 1; i >= 0; i--) {
|
for (int i = SCL_LOCKS - 1; i >= 0; i--) {
|
||||||
spa_config_lock_t *scl = &spa->spa_config_lock[i];
|
spa_config_lock_t *scl = &spa->spa_config_lock[i];
|
||||||
|
|
|
@ -881,8 +881,8 @@ zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags)
|
||||||
return (zio_null(NULL, spa, NULL, done, private, flags));
|
return (zio_null(NULL, spa, NULL, done, private, flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
static void
|
||||||
zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp)
|
zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held)
|
||||||
{
|
{
|
||||||
if (!DMU_OT_IS_VALID(BP_GET_TYPE(bp))) {
|
if (!DMU_OT_IS_VALID(BP_GET_TYPE(bp))) {
|
||||||
zfs_panic_recover("blkptr at %p has invalid TYPE %llu",
|
zfs_panic_recover("blkptr at %p has invalid TYPE %llu",
|
||||||
|
@ -921,6 +921,10 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp)
|
||||||
if (!spa->spa_trust_config)
|
if (!spa->spa_trust_config)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (!config_held)
|
||||||
|
spa_config_enter(spa, SCL_VDEV, bp, RW_READER);
|
||||||
|
else
|
||||||
|
ASSERT(spa_config_held(spa, SCL_VDEV, RW_WRITER));
|
||||||
/*
|
/*
|
||||||
* Pool-specific checks.
|
* Pool-specific checks.
|
||||||
*
|
*
|
||||||
|
@ -969,6 +973,8 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp)
|
||||||
bp, i, (longlong_t)offset);
|
bp, i, (longlong_t)offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!config_held)
|
||||||
|
spa_config_exit(spa, SCL_VDEV, bp);
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean_t
|
boolean_t
|
||||||
|
@ -1008,7 +1014,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
||||||
{
|
{
|
||||||
zio_t *zio;
|
zio_t *zio;
|
||||||
|
|
||||||
zfs_blkptr_verify(spa, bp);
|
zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER);
|
||||||
|
|
||||||
zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp,
|
zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp,
|
||||||
data, size, size, done, private,
|
data, size, size, done, private,
|
||||||
|
@ -1101,7 +1107,7 @@ void
|
||||||
zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
|
zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
|
||||||
{
|
{
|
||||||
|
|
||||||
zfs_blkptr_verify(spa, bp);
|
zfs_blkptr_verify(spa, bp, B_FALSE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The check for EMBEDDED is a performance optimization. We
|
* The check for EMBEDDED is a performance optimization. We
|
||||||
|
@ -1171,7 +1177,7 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
||||||
{
|
{
|
||||||
zio_t *zio;
|
zio_t *zio;
|
||||||
|
|
||||||
zfs_blkptr_verify(spa, bp);
|
zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER);
|
||||||
|
|
||||||
if (BP_IS_EMBEDDED(bp))
|
if (BP_IS_EMBEDDED(bp))
|
||||||
return (zio_null(pio, spa, NULL, NULL, NULL, 0));
|
return (zio_null(pio, spa, NULL, NULL, NULL, 0));
|
||||||
|
|
Loading…
Reference in New Issue