Guarantee that spa_load_guid is unique
The zpool reguid feature introduced the spa_load_guid, which is a transient value used for runtime identification purposes in the ARC. This value is not the same as the spa's persistent pool guid. However, the value is seeded from spa_generate_load_guid() which does not check for uniqueness against the spa_load_guid from other pools. Although extremely rare, you can end up with two different pools sharing the same spa_load_guid value! This change guarantees that the value is always unique and additionally not still in use by an async arc flush task. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Signed-off-by: Don Brady <don.brady@klarasystems.com>
This commit is contained in:
parent
e360d9bbca
commit
52b68cd342
|
@ -333,6 +333,7 @@ void arc_flush(spa_t *spa, boolean_t retry);
|
|||
void arc_flush_async(spa_t *spa);
|
||||
void arc_tempreserve_clear(uint64_t reserve);
|
||||
int arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg);
|
||||
boolean_t arc_async_flush_guid_inuse(uint64_t load_guid);
|
||||
|
||||
uint64_t arc_all_memory(void);
|
||||
uint64_t arc_default_max(uint64_t min, uint64_t allmem);
|
||||
|
|
|
@ -1090,6 +1090,7 @@ extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
|
|||
extern char *spa_strdup(const char *);
|
||||
extern void spa_strfree(char *);
|
||||
extern uint64_t spa_generate_guid(spa_t *spa);
|
||||
extern uint64_t spa_generate_load_guid(void);
|
||||
extern void snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp);
|
||||
extern void spa_freeze(spa_t *spa);
|
||||
extern int spa_change_guid(spa_t *spa);
|
||||
|
|
127
module/zfs/arc.c
127
module/zfs/arc.c
|
@ -775,6 +775,21 @@ static buf_hash_table_t buf_hash_table;
|
|||
|
||||
uint64_t zfs_crc64_table[256];
|
||||
|
||||
/*
|
||||
* Asynchronous ARC flush
|
||||
*
|
||||
* We track these in a list for arc_async_flush_guid_inuse()
|
||||
*/
|
||||
static list_t arc_async_flush_list;
|
||||
static kmutex_t arc_async_flush_lock;
|
||||
|
||||
typedef struct arc_async_flush {
|
||||
uint64_t af_spa_guid;
|
||||
taskqid_t af_task_id;
|
||||
list_node_t af_node;
|
||||
} arc_async_flush_t;
|
||||
|
||||
|
||||
/*
|
||||
* Level 2 ARC
|
||||
*/
|
||||
|
@ -4424,19 +4439,50 @@ arc_flush(spa_t *spa, boolean_t retry)
|
|||
arc_flush_impl(spa != NULL ? spa_load_guid(spa) : 0, retry);
|
||||
}
|
||||
|
||||
static arc_async_flush_t *
|
||||
arc_async_flush_add(uint64_t spa_guid, taskqid_t task_id)
|
||||
{
|
||||
arc_async_flush_t *af = kmem_alloc(sizeof (*af), KM_SLEEP);
|
||||
af->af_spa_guid = spa_guid;
|
||||
af->af_task_id = task_id;
|
||||
list_link_init(&af->af_node);
|
||||
|
||||
mutex_enter(&arc_async_flush_lock);
|
||||
list_insert_tail(&arc_async_flush_list, af);
|
||||
mutex_exit(&arc_async_flush_lock);
|
||||
|
||||
return (af);
|
||||
}
|
||||
|
||||
static void
|
||||
arc_async_flush_remove(uint64_t spa_guid, taskqid_t task_id)
|
||||
{
|
||||
mutex_enter(&arc_async_flush_lock);
|
||||
for (arc_async_flush_t *af = list_head(&arc_async_flush_list);
|
||||
af != NULL; af = list_next(&arc_async_flush_list, af)) {
|
||||
if (af->af_spa_guid == spa_guid && af->af_task_id == task_id) {
|
||||
list_remove(&arc_async_flush_list, af);
|
||||
kmem_free(af, sizeof (*af));
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_exit(&arc_async_flush_lock);
|
||||
}
|
||||
|
||||
static void
|
||||
arc_flush_task(void *arg)
|
||||
{
|
||||
uint64_t guid = *((uint64_t *)arg);
|
||||
arc_async_flush_t *af = arg;
|
||||
hrtime_t start_time = gethrtime();
|
||||
uint64_t spa_guid = af->af_spa_guid;
|
||||
|
||||
arc_flush_impl(guid, B_FALSE);
|
||||
kmem_free(arg, sizeof (uint64_t *));
|
||||
arc_flush_impl(spa_guid, B_FALSE);
|
||||
arc_async_flush_remove(spa_guid, af->af_task_id);
|
||||
|
||||
uint64_t elaspsed = NSEC2MSEC(gethrtime() - start_time);
|
||||
if (elaspsed > 0) {
|
||||
zfs_dbgmsg("spa %llu arc flushed in %llu ms",
|
||||
(u_longlong_t)guid, (u_longlong_t)elaspsed);
|
||||
(u_longlong_t)spa_guid, (u_longlong_t)elaspsed);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4453,17 +4499,46 @@ arc_flush_task(void *arg)
|
|||
void
|
||||
arc_flush_async(spa_t *spa)
|
||||
{
|
||||
uint64_t *guidp = kmem_alloc(sizeof (uint64_t *), KM_SLEEP);
|
||||
uint64_t spa_guid = spa_load_guid(spa);
|
||||
arc_async_flush_t *af = arc_async_flush_add(spa_guid, TASKQID_INVALID);
|
||||
|
||||
*guidp = spa_load_guid(spa);
|
||||
/*
|
||||
* Note that arc_flush_task() needs arc_async_flush_lock to remove af
|
||||
* list node. So by holding the lock we avoid a race for af removal
|
||||
* with our use here.
|
||||
*/
|
||||
mutex_enter(&arc_async_flush_lock);
|
||||
taskqid_t tid = af->af_task_id = taskq_dispatch(arc_flush_taskq,
|
||||
arc_flush_task, af, TQ_SLEEP);
|
||||
mutex_exit(&arc_async_flush_lock);
|
||||
|
||||
if (taskq_dispatch(arc_flush_taskq, arc_flush_task, guidp,
|
||||
TQ_SLEEP) == TASKQID_INVALID) {
|
||||
arc_flush_impl(*guidp, B_FALSE);
|
||||
kmem_free(guidp, sizeof (uint64_t *));
|
||||
/*
|
||||
* unlikely, but if we couldn't dispatch then use an inline flush
|
||||
*/
|
||||
if (tid == TASKQID_INVALID) {
|
||||
arc_async_flush_remove(spa_guid, TASKQID_INVALID);
|
||||
arc_flush_impl(spa_guid, B_FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if a guid is still in-use as part of an async teardown task
|
||||
*/
|
||||
boolean_t
|
||||
arc_async_flush_guid_inuse(uint64_t spa_guid)
|
||||
{
|
||||
mutex_enter(&arc_async_flush_lock);
|
||||
for (arc_async_flush_t *af = list_head(&arc_async_flush_list);
|
||||
af != NULL; af = list_next(&arc_async_flush_list, af)) {
|
||||
if (af->af_spa_guid == spa_guid) {
|
||||
mutex_exit(&arc_async_flush_lock);
|
||||
return (B_TRUE);
|
||||
}
|
||||
}
|
||||
mutex_exit(&arc_async_flush_lock);
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
arc_reduce_target_size(uint64_t to_free)
|
||||
{
|
||||
|
@ -7802,6 +7877,9 @@ arc_init(void)
|
|||
arc_prune_taskq = taskq_create("arc_prune", zfs_arc_prune_task_threads,
|
||||
defclsyspri, 100, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
||||
|
||||
list_create(&arc_async_flush_list, sizeof (arc_async_flush_t),
|
||||
offsetof(arc_async_flush_t, af_node));
|
||||
mutex_init(&arc_async_flush_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
arc_flush_taskq = taskq_create("arc_flush", 75, defclsyspri,
|
||||
1, INT_MAX, TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT);
|
||||
|
||||
|
@ -7885,6 +7963,9 @@ arc_fini(void)
|
|||
taskq_wait(arc_prune_taskq);
|
||||
taskq_destroy(arc_prune_taskq);
|
||||
|
||||
list_destroy(&arc_async_flush_list);
|
||||
mutex_destroy(&arc_async_flush_lock);
|
||||
|
||||
mutex_enter(&arc_prune_mtx);
|
||||
while ((p = list_remove_head(&arc_prune_list)) != NULL) {
|
||||
(void) zfs_refcount_remove(&p->p_refcnt, &arc_prune_list);
|
||||
|
@ -9801,6 +9882,7 @@ typedef struct {
|
|||
l2arc_dev_t *rva_l2arc_dev;
|
||||
uint64_t rva_spa_gid;
|
||||
uint64_t rva_vdev_gid;
|
||||
taskqid_t rva_task_id;
|
||||
} remove_vdev_args_t;
|
||||
|
||||
static void
|
||||
|
@ -9831,6 +9913,10 @@ l2arc_device_teardown(void *arg)
|
|||
(u_longlong_t)rva->rva_vdev_gid,
|
||||
(u_longlong_t)elaspsed);
|
||||
}
|
||||
|
||||
if (rva->rva_task_id != TASKQID_INVALID)
|
||||
arc_async_flush_remove(rva->rva_spa_gid, rva->rva_task_id);
|
||||
|
||||
kmem_free(rva, sizeof (remove_vdev_args_t));
|
||||
}
|
||||
|
||||
|
@ -9889,11 +9975,22 @@ l2arc_remove_vdev(vdev_t *vd)
|
|||
}
|
||||
mutex_exit(&l2arc_dev_mtx);
|
||||
|
||||
/*
|
||||
* If possible, the teardown is completed asynchronously
|
||||
*/
|
||||
if (!asynchronous || taskq_dispatch(arc_flush_taskq,
|
||||
l2arc_device_teardown, rva, TQ_SLEEP) == TASKQID_INVALID) {
|
||||
if (!asynchronous) {
|
||||
l2arc_device_teardown(rva);
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t spa_guid = spa_load_guid(spa);
|
||||
arc_async_flush_t *af = arc_async_flush_add(spa_guid, TASKQID_INVALID);
|
||||
|
||||
mutex_enter(&arc_async_flush_lock);
|
||||
taskqid_t tid = taskq_dispatch(arc_flush_taskq, l2arc_device_teardown,
|
||||
rva, TQ_SLEEP);
|
||||
rva->rva_task_id = af->af_task_id = tid;
|
||||
mutex_exit(&arc_async_flush_lock);
|
||||
|
||||
if (tid == TASKQID_INVALID) {
|
||||
arc_async_flush_remove(spa_guid, TASKQID_INVALID);
|
||||
l2arc_device_teardown(rva);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1588,6 +1588,34 @@ spa_generate_guid(spa_t *spa)
|
|||
return (guid);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
spa_load_guid_exists(uint64_t guid)
|
||||
{
|
||||
avl_tree_t *t = &spa_namespace_avl;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
|
||||
for (spa_t *spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) {
|
||||
if (spa_load_guid(spa) == guid)
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
return (arc_async_flush_guid_inuse(guid));
|
||||
}
|
||||
|
||||
uint64_t
|
||||
spa_generate_load_guid(void)
|
||||
{
|
||||
uint64_t guid;
|
||||
|
||||
do {
|
||||
(void) random_get_pseudo_bytes((void *)&guid,
|
||||
sizeof (guid));
|
||||
} while (guid == 0 || spa_load_guid_exists(guid));
|
||||
|
||||
return (guid);
|
||||
}
|
||||
|
||||
void
|
||||
snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp)
|
||||
{
|
||||
|
|
|
@ -631,7 +631,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
|||
if (spa->spa_root_vdev == NULL) {
|
||||
ASSERT(ops == &vdev_root_ops);
|
||||
spa->spa_root_vdev = vd;
|
||||
spa->spa_load_guid = spa_generate_guid(NULL);
|
||||
spa->spa_load_guid = spa_generate_load_guid();
|
||||
}
|
||||
|
||||
if (guid == 0 && ops != &vdev_hole_ops) {
|
||||
|
|
Loading…
Reference in New Issue