From 93fd9101c9cc46879d6512cbe3363723d0c5d7c1 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Tue, 13 Aug 2019 20:24:43 -0700 Subject: [PATCH] Prevent race in blkptr_verify against device removal When we check the vdev of the blkptr in zfs_blkptr_verify, we can run into a race condition where that vdev is temporarily unavailable. This happens when a device removal operation and the old vdev_t has been removed from the array, but the new indirect vdev has not yet been inserted. We hold the spa_config_lock while doing our sensitive verification. To ensure that we don't deadlock, we only grab the lock if we don't have config_writer held. In addition, I had to const the tags of the refcounts and the spa_config_lock arguments. Reviewed-by: Brian Behlendorf Reviewed-by: Serapheim Dimitropoulos Signed-off-by: Paul Dagnelie Closes #9112 --- include/sys/refcount.h | 19 ++++++++++--------- include/sys/spa.h | 5 ++--- module/zfs/refcount.c | 19 ++++++++++--------- module/zfs/spa_misc.c | 4 ++-- module/zfs/zio.c | 16 +++++++++++----- 5 files changed, 35 insertions(+), 28 deletions(-) diff --git a/include/sys/refcount.h b/include/sys/refcount.h index e982faeba0..c8f5862303 100644 --- a/include/sys/refcount.h +++ b/include/sys/refcount.h @@ -44,7 +44,7 @@ extern "C" { #ifdef ZFS_DEBUG typedef struct reference { list_node_t ref_link; - void *ref_holder; + const void *ref_holder; uint64_t ref_number; uint8_t *ref_removed; } reference_t; @@ -70,16 +70,17 @@ void zfs_refcount_destroy(zfs_refcount_t *); void zfs_refcount_destroy_many(zfs_refcount_t *, uint64_t); int zfs_refcount_is_zero(zfs_refcount_t *); int64_t zfs_refcount_count(zfs_refcount_t *); -int64_t zfs_refcount_add(zfs_refcount_t *, void *); -int64_t zfs_refcount_remove(zfs_refcount_t *, void *); -int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, void *); -int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, void *); +int64_t zfs_refcount_add(zfs_refcount_t *, const void *); +int64_t zfs_refcount_remove(zfs_refcount_t *, const void *); +int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, const void *); +int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, const void *); void zfs_refcount_transfer(zfs_refcount_t *, zfs_refcount_t *); -void zfs_refcount_transfer_ownership(zfs_refcount_t *, void *, void *); +void zfs_refcount_transfer_ownership(zfs_refcount_t *, const void *, + const void *); void zfs_refcount_transfer_ownership_many(zfs_refcount_t *, uint64_t, - void *, void *); -boolean_t zfs_refcount_held(zfs_refcount_t *, void *); -boolean_t zfs_refcount_not_held(zfs_refcount_t *, void *); + const void *, const void *); +boolean_t zfs_refcount_held(zfs_refcount_t *, const void *); +boolean_t zfs_refcount_not_held(zfs_refcount_t *, const void *); void zfs_refcount_init(void); void zfs_refcount_fini(void); diff --git a/include/sys/spa.h b/include/sys/spa.h index d43801de5d..42bf9dcc10 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -976,8 +976,8 @@ extern int spa_import_progress_set_state(uint64_t pool_guid, /* Pool configuration locks */ extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw); -extern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw); -extern void spa_config_exit(spa_t *spa, int locks, void *tag); +extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw); +extern void spa_config_exit(spa_t *spa, int locks, const void *tag); extern int spa_config_held(spa_t *spa, int locks, krw_t rw); /* Pool vdev add/remove lock */ @@ -1091,7 +1091,6 @@ extern boolean_t spa_has_checkpoint(spa_t *spa); extern boolean_t spa_importing_readonly_checkpoint(spa_t *spa); extern boolean_t spa_suspend_async_destroy(spa_t *spa); extern uint64_t spa_min_claim_txg(spa_t *spa); -extern void zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp); extern boolean_t zfs_dva_valid(spa_t *spa, const dva_t *dva, const blkptr_t *bp); typedef void (*spa_remap_cb_t)(uint64_t vdev, uint64_t offset, uint64_t size, diff --git a/module/zfs/refcount.c b/module/zfs/refcount.c index bcaa6d3875..a7e46d3790 100644 --- a/module/zfs/refcount.c +++ b/module/zfs/refcount.c @@ -121,7 +121,7 @@ zfs_refcount_count(zfs_refcount_t *rc) } int64_t -zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, void *holder) +zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, const void *holder) { reference_t *ref = NULL; int64_t count; @@ -143,13 +143,14 @@ zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, void *holder) } int64_t -zfs_refcount_add(zfs_refcount_t *rc, void *holder) +zfs_refcount_add(zfs_refcount_t *rc, const void *holder) { return (zfs_refcount_add_many(rc, 1, holder)); } int64_t -zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, void *holder) +zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, + const void *holder) { reference_t *ref; int64_t count; @@ -197,7 +198,7 @@ zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, void *holder) } int64_t -zfs_refcount_remove(zfs_refcount_t *rc, void *holder) +zfs_refcount_remove(zfs_refcount_t *rc, const void *holder) { return (zfs_refcount_remove_many(rc, 1, holder)); } @@ -235,7 +236,7 @@ zfs_refcount_transfer(zfs_refcount_t *dst, zfs_refcount_t *src) void zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number, - void *current_holder, void *new_holder) + const void *current_holder, const void *new_holder) { reference_t *ref; boolean_t found = B_FALSE; @@ -260,8 +261,8 @@ zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number, } void -zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder, - void *new_holder) +zfs_refcount_transfer_ownership(zfs_refcount_t *rc, const void *current_holder, + const void *new_holder) { return (zfs_refcount_transfer_ownership_many(rc, 1, current_holder, new_holder)); @@ -273,7 +274,7 @@ zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder, * might be held. */ boolean_t -zfs_refcount_held(zfs_refcount_t *rc, void *holder) +zfs_refcount_held(zfs_refcount_t *rc, const void *holder) { reference_t *ref; @@ -301,7 +302,7 @@ zfs_refcount_held(zfs_refcount_t *rc, void *holder) * since the reference might not be held. */ boolean_t -zfs_refcount_not_held(zfs_refcount_t *rc, void *holder) +zfs_refcount_not_held(zfs_refcount_t *rc, const void *holder) { reference_t *ref; diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 185b702014..ecdb3c6151 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -484,7 +484,7 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw) } void -spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw) +spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw) { int wlocks_held = 0; @@ -517,7 +517,7 @@ spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw) } void -spa_config_exit(spa_t *spa, int locks, void *tag) +spa_config_exit(spa_t *spa, int locks, const void *tag) { for (int i = SCL_LOCKS - 1; i >= 0; i--) { spa_config_lock_t *scl = &spa->spa_config_lock[i]; diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 5638f53193..aac0392a4a 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -881,8 +881,8 @@ zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags) return (zio_null(NULL, spa, NULL, done, private, flags)); } -void -zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) +static void +zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held) { if (!DMU_OT_IS_VALID(BP_GET_TYPE(bp))) { zfs_panic_recover("blkptr at %p has invalid TYPE %llu", @@ -921,6 +921,10 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) if (!spa->spa_trust_config) return; + if (!config_held) + spa_config_enter(spa, SCL_VDEV, bp, RW_READER); + else + ASSERT(spa_config_held(spa, SCL_VDEV, RW_WRITER)); /* * Pool-specific checks. * @@ -969,6 +973,8 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) bp, i, (longlong_t)offset); } } + if (!config_held) + spa_config_exit(spa, SCL_VDEV, bp); } boolean_t @@ -1008,7 +1014,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, { zio_t *zio; - zfs_blkptr_verify(spa, bp); + zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER); zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp, data, size, size, done, private, @@ -1101,7 +1107,7 @@ void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) { - zfs_blkptr_verify(spa, bp); + zfs_blkptr_verify(spa, bp, B_FALSE); /* * The check for EMBEDDED is a performance optimization. We @@ -1166,7 +1172,7 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, { zio_t *zio; - zfs_blkptr_verify(spa, bp); + zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER); if (BP_IS_EMBEDDED(bp)) return (zio_null(pio, spa, NULL, NULL, NULL, 0));