OpenZFS 6676 - Race between unique_insert() and unique_remove() causes ZFS fsid change

Authored by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Dan Vatca <dan.vatca@gmail.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Sebastien Roy <sebastien.roy@delphix.com>
Approved by: Robert Mustacchi <rm@joyent.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: George Melikov <mail@gmelikov.ru>

OpenZFS-issue: https://www.illumos.org/issues/6676
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/40510e8
Closes #5667
This commit is contained in:
George Melikov 2017-01-27 01:43:28 +03:00 committed by Brian Behlendorf
parent aeacdefedc
commit 39efbde7c5
9 changed files with 90 additions and 35 deletions

View File

@ -549,8 +549,14 @@ typedef struct dmu_buf_user {
*/ */
taskq_ent_t dbu_tqent; taskq_ent_t dbu_tqent;
/* This instance's eviction function pointer. */ /*
dmu_buf_evict_func_t *dbu_evict_func; * This instance's eviction function pointers.
*
* dbu_evict_func_sync is called synchronously and then
* dbu_evict_func_async is executed asynchronously on a taskq.
*/
dmu_buf_evict_func_t *dbu_evict_func_sync;
dmu_buf_evict_func_t *dbu_evict_func_async;
#ifdef ZFS_DEBUG #ifdef ZFS_DEBUG
/* /*
* Pointer to user's dbuf pointer. NULL for clients that do * Pointer to user's dbuf pointer. NULL for clients that do
@ -572,12 +578,16 @@ typedef struct dmu_buf_user {
*/ */
/*ARGSUSED*/ /*ARGSUSED*/
static inline void static inline void
dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func, dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func_sync,
dmu_buf_t **clear_on_evict_dbufp) dmu_buf_evict_func_t *evict_func_async, dmu_buf_t **clear_on_evict_dbufp)
{ {
ASSERT(dbu->dbu_evict_func == NULL); ASSERT(dbu->dbu_evict_func_sync == NULL);
ASSERT(evict_func != NULL); ASSERT(dbu->dbu_evict_func_async == NULL);
dbu->dbu_evict_func = evict_func;
/* must have at least one evict func */
IMPLY(evict_func_sync == NULL, evict_func_async != NULL);
dbu->dbu_evict_func_sync = evict_func_sync;
dbu->dbu_evict_func_async = evict_func_async;
taskq_init_ent(&dbu->dbu_tqent); taskq_init_ent(&dbu->dbu_tqent);
#ifdef ZFS_DEBUG #ifdef ZFS_DEBUG
dbu->dbu_clear_on_evict_dbufp = clear_on_evict_dbufp; dbu->dbu_clear_on_evict_dbufp = clear_on_evict_dbufp;

View File

@ -198,7 +198,7 @@ boolean_t zap_match(zap_name_t *zn, const char *matchname);
int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp); krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp);
void zap_unlockdir(zap_t *zap, void *tag); void zap_unlockdir(zap_t *zap, void *tag);
void zap_evict(void *dbu); void zap_evict_sync(void *dbu);
zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt); zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
void zap_name_free(zap_name_t *zn); void zap_name_free(zap_name_t *zn);
int zap_hashbits(zap_t *zap); int zap_hashbits(zap_t *zap);

View File

@ -85,7 +85,9 @@ static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
#ifndef __lint #ifndef __lint
extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu, extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu,
dmu_buf_evict_func_t *evict_func, dmu_buf_t **clear_on_evict_dbufp); dmu_buf_evict_func_t *evict_func_sync,
dmu_buf_evict_func_t *evict_func_async,
dmu_buf_t **clear_on_evict_dbufp);
#endif /* ! __lint */ #endif /* ! __lint */
/* /*
@ -385,6 +387,7 @@ static void
dbuf_evict_user(dmu_buf_impl_t *db) dbuf_evict_user(dmu_buf_impl_t *db)
{ {
dmu_buf_user_t *dbu = db->db_user; dmu_buf_user_t *dbu = db->db_user;
boolean_t has_async;
ASSERT(MUTEX_HELD(&db->db_mtx)); ASSERT(MUTEX_HELD(&db->db_mtx));
@ -400,11 +403,24 @@ dbuf_evict_user(dmu_buf_impl_t *db)
#endif #endif
/* /*
* Invoke the callback from a taskq to avoid lock order reversals * There are two eviction callbacks - one that we call synchronously
* and limit stack depth. * and one that we invoke via a taskq. The async one is useful for
* avoiding lock order reversals and limiting stack depth.
*
* Note that if we have a sync callback but no async callback,
* it's likely that the sync callback will free the structure
* containing the dbu. In that case we need to take care to not
* dereference dbu after calling the sync evict func.
*/ */
taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func, dbu, 0, has_async = (dbu->dbu_evict_func_async != NULL);
&dbu->dbu_tqent);
if (dbu->dbu_evict_func_sync != NULL)
dbu->dbu_evict_func_sync(dbu);
if (has_async) {
taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func_async,
dbu, 0, &dbu->dbu_tqent);
}
} }
boolean_t boolean_t

View File

@ -1021,7 +1021,7 @@ dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
} }
static void static void
dnode_buf_pageout(void *dbu) dnode_buf_evict_async(void *dbu)
{ {
dnode_children_t *children_dnodes = dbu; dnode_children_t *children_dnodes = dbu;
int i; int i;
@ -1271,8 +1271,8 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
for (i = 0; i < epb; i++) { for (i = 0; i < epb; i++) {
zrl_init(&dnh[i].dnh_zrlock); zrl_init(&dnh[i].dnh_zrlock);
} }
dmu_buf_init_user(&children_dnodes->dnc_dbu, dmu_buf_init_user(&children_dnodes->dnc_dbu, NULL,
dnode_buf_pageout, NULL); dnode_buf_evict_async, NULL);
winner = dmu_buf_set_user(&db->db, &children_dnodes->dnc_dbu); winner = dmu_buf_set_user(&db->db, &children_dnodes->dnc_dbu);
if (winner != NULL) { if (winner != NULL) {

View File

@ -20,7 +20,7 @@
*/ */
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved. * Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 RackTop Systems. * Copyright (c) 2014 RackTop Systems.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@ -273,8 +273,24 @@ dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
return (B_TRUE); return (B_TRUE);
} }
/*
* We have to release the fsid syncronously or we risk that a subsequent
* mount of the same dataset will fail to unique_insert the fsid. This
* failure would manifest itself as the fsid of this dataset changing
* between mounts which makes NFS clients quite unhappy.
*/
static void static void
dsl_dataset_evict(void *dbu) dsl_dataset_evict_sync(void *dbu)
{
dsl_dataset_t *ds = dbu;
ASSERT(ds->ds_owner == NULL);
unique_remove(ds->ds_fsid_guid);
}
static void
dsl_dataset_evict_async(void *dbu)
{ {
dsl_dataset_t *ds = dbu; dsl_dataset_t *ds = dbu;
@ -282,8 +298,6 @@ dsl_dataset_evict(void *dbu)
ds->ds_dbuf = NULL; ds->ds_dbuf = NULL;
unique_remove(ds->ds_fsid_guid);
if (ds->ds_objset != NULL) if (ds->ds_objset != NULL)
dmu_objset_evict(ds->ds_objset); dmu_objset_evict(ds->ds_objset);
@ -525,7 +539,8 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
ds->ds_reserved = ds->ds_quota = 0; ds->ds_reserved = ds->ds_quota = 0;
} }
dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf); dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict_sync,
dsl_dataset_evict_async, &ds->ds_dbuf);
if (err == 0) if (err == 0)
winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu); winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu);
@ -548,6 +563,16 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
} else { } else {
ds->ds_fsid_guid = ds->ds_fsid_guid =
unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid); unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid);
if (ds->ds_fsid_guid !=
dsl_dataset_phys(ds)->ds_fsid_guid) {
zfs_dbgmsg("ds_fsid_guid changed from "
"%llx to %llx for pool %s dataset id %llu",
(long long)
dsl_dataset_phys(ds)->ds_fsid_guid,
(long long)ds->ds_fsid_guid,
spa_name(dp->dp_spa),
dsobj);
}
} }
} }
ASSERT3P(ds->ds_dbuf, ==, dbuf); ASSERT3P(ds->ds_dbuf, ==, dbuf);

View File

@ -20,7 +20,7 @@
*/ */
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 Martin Matuska. All rights reserved. * Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright (c) 2014 Joyent, Inc. All rights reserved. * Copyright (c) 2014 Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@ -129,7 +129,7 @@ extern inline dsl_dir_phys_t *dsl_dir_phys(dsl_dir_t *dd);
static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
static void static void
dsl_dir_evict(void *dbu) dsl_dir_evict_async(void *dbu)
{ {
dsl_dir_t *dd = dbu; dsl_dir_t *dd = dbu;
int t; int t;
@ -237,7 +237,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
dmu_buf_rele(origin_bonus, FTAG); dmu_buf_rele(origin_bonus, FTAG);
} }
dmu_buf_init_user(&dd->dd_dbu, dsl_dir_evict, &dd->dd_dbuf); dmu_buf_init_user(&dd->dd_dbu, NULL, dsl_dir_evict_async,
&dd->dd_dbuf);
winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu); winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu);
if (winner != NULL) { if (winner != NULL) {
if (dd->dd_parent) if (dd->dd_parent)

View File

@ -21,7 +21,7 @@
/* /*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
@ -1297,7 +1297,7 @@ sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype)
/*ARGSUSED*/ /*ARGSUSED*/
static void static void
sa_evict(void *dbu) sa_evict_sync(void *dbu)
{ {
panic("evicting sa dbuf\n"); panic("evicting sa dbuf\n");
} }
@ -1394,7 +1394,8 @@ sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
sa_handle_t *winner = NULL; sa_handle_t *winner = NULL;
handle = kmem_cache_alloc(sa_cache, KM_SLEEP); handle = kmem_cache_alloc(sa_cache, KM_SLEEP);
handle->sa_dbu.dbu_evict_func = NULL; handle->sa_dbu.dbu_evict_func_sync = NULL;
handle->sa_dbu.dbu_evict_func_async = NULL;
handle->sa_userp = userp; handle->sa_userp = userp;
handle->sa_bonus = db; handle->sa_bonus = db;
handle->sa_os = os; handle->sa_os = os;
@ -1405,7 +1406,8 @@ sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
error = sa_build_index(handle, SA_BONUS); error = sa_build_index(handle, SA_BONUS);
if (hdl_type == SA_HDL_SHARED) { if (hdl_type == SA_HDL_SHARED) {
dmu_buf_init_user(&handle->sa_dbu, sa_evict, NULL); dmu_buf_init_user(&handle->sa_dbu, sa_evict_sync, NULL,
NULL);
winner = dmu_buf_set_user_ie(db, &handle->sa_dbu); winner = dmu_buf_set_user_ie(db, &handle->sa_dbu);
} }

View File

@ -81,7 +81,8 @@ fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
zap->zap_ismicro = FALSE; zap->zap_ismicro = FALSE;
zap->zap_dbu.dbu_evict_func = zap_evict; zap->zap_dbu.dbu_evict_func_sync = zap_evict_sync;
zap->zap_dbu.dbu_evict_func_async = NULL;
mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1; zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1;
@ -399,7 +400,7 @@ zap_allocate_blocks(zap_t *zap, int nblocks)
} }
static void static void
zap_leaf_pageout(void *dbu) zap_leaf_evict_sync(void *dbu)
{ {
zap_leaf_t *l = dbu; zap_leaf_t *l = dbu;
@ -423,7 +424,7 @@ zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object, VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf, l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
DMU_READ_NO_PREFETCH)); DMU_READ_NO_PREFETCH));
dmu_buf_init_user(&l->l_dbu, zap_leaf_pageout, &l->l_dbuf); dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf);
winner = dmu_buf_set_user(l->l_dbuf, &l->l_dbu); winner = dmu_buf_set_user(l->l_dbuf, &l->l_dbu);
ASSERT(winner == NULL); ASSERT(winner == NULL);
dmu_buf_will_dirty(l->l_dbuf, tx); dmu_buf_will_dirty(l->l_dbuf, tx);
@ -470,13 +471,13 @@ zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
l->l_bs = highbit64(db->db_size) - 1; l->l_bs = highbit64(db->db_size) - 1;
l->l_dbuf = db; l->l_dbuf = db;
dmu_buf_init_user(&l->l_dbu, zap_leaf_pageout, &l->l_dbuf); dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf);
winner = dmu_buf_set_user(db, &l->l_dbu); winner = dmu_buf_set_user(db, &l->l_dbu);
rw_exit(&l->l_rwlock); rw_exit(&l->l_rwlock);
if (winner != NULL) { if (winner != NULL) {
/* someone else set it first */ /* someone else set it first */
zap_leaf_pageout(&l->l_dbu); zap_leaf_evict_sync(&l->l_dbu);
l = winner; l = winner;
} }

View File

@ -392,7 +392,7 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
* it, because zap_lockdir() checks zap_ismicro without the lock * it, because zap_lockdir() checks zap_ismicro without the lock
* held. * held.
*/ */
dmu_buf_init_user(&zap->zap_dbu, zap_evict, &zap->zap_dbuf); dmu_buf_init_user(&zap->zap_dbu, zap_evict_sync, NULL, &zap->zap_dbuf);
winner = dmu_buf_set_user(db, &zap->zap_dbu); winner = dmu_buf_set_user(db, &zap->zap_dbu);
if (winner != NULL) if (winner != NULL)
@ -774,7 +774,7 @@ zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
} }
void void
zap_evict(void *dbu) zap_evict_sync(void *dbu)
{ {
zap_t *zap = dbu; zap_t *zap = dbu;