From cc9bb3e58e926d4e056a89046301c1349755957b Mon Sep 17 00:00:00 2001 From: George Melikov Date: Fri, 27 Jan 2017 22:43:42 +0300 Subject: [PATCH] OpenZFS 7254 - ztest failed assertion in ztest_dataset_dirobj_verify: dirobjs + 1 == usedobjs Authored by: Paul Dagnelie Reviewed by: George Wilson Reviewed by: Prakash Surya Reviewed by: Matthew Ahrens Reviewed by: Steve Gonczi Approved by: Robert Mustacchi Reviewed-by: Brian Behlendorf Ported-by: George Melikov OpenZFS-issue: https://www.illumos.org/issues/7254 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/c166b69 Closes #5670 --- include/sys/dmu_objset.h | 9 +++++++-- include/sys/dsl_dataset.h | 2 ++ module/zfs/dbuf.c | 35 +++++++++++++++++++++++++++++------ module/zfs/dmu_objset.c | 15 +++++++++++++-- module/zfs/dmu_send.c | 4 ++++ module/zfs/dsl_dataset.c | 25 +++++++++++++++++++++---- module/zfs/dsl_destroy.c | 6 ++++++ module/zfs/dsl_pool.c | 6 +++++- module/zfs/dsl_scan.c | 2 ++ 9 files changed, 89 insertions(+), 15 deletions(-) diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index ed3cbf4989..940785a53d 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -20,8 +20,8 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. */ @@ -104,9 +104,14 @@ struct objset { zfs_redundant_metadata_type_t os_redundant_metadata; int os_recordsize; + /* + * Pointer is constant; the blkptr it points to is protected by + * os_dsl_dataset->ds_bp_rwlock + */ + blkptr_t *os_rootbp; + /* no lock needed: */ struct dmu_tx *os_synctx; /* XXX sketchy */ - blkptr_t *os_rootbp; zil_header_t os_zil_header; list_t os_synced_dnodes; uint64_t os_flags; diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h index e46bd5f25a..fc87cbe865 100644 --- a/include/sys/dsl_dataset.h +++ b/include/sys/dsl_dataset.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #ifdef __cplusplus @@ -149,6 +150,7 @@ typedef struct dsl_dataset_phys { typedef struct dsl_dataset { dmu_buf_user_t ds_dbu; + rrwlock_t ds_bp_rwlock; /* Protects ds_phys->ds_bp */ /* Immutable: */ struct dsl_dir *ds_dir; diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index ebb95ddb8f..01a7610915 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1583,10 +1583,18 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * objects may be dirtied in syncing context, but only if they * were already pre-dirtied in open context. */ +#ifdef DEBUG + if (dn->dn_objset->os_dsl_dataset != NULL) { + rrw_enter(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, + RW_READER, FTAG); + } ASSERT(!dmu_tx_is_syncing(tx) || BP_IS_HOLE(dn->dn_objset->os_rootbp) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) || dn->dn_objset->os_dsl_dataset == NULL); + if (dn->dn_objset->os_dsl_dataset != NULL) + rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, FTAG); +#endif /* * We make this assert for private objects as well, but after we * check if we're already dirty. They are allowed to re-dirty @@ -1611,12 +1619,21 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * Don't set dirtyctx to SYNC if we're just modifying this as we * initialize the objset. */ - if (dn->dn_dirtyctx == DN_UNDIRTIED && - !BP_IS_HOLE(dn->dn_objset->os_rootbp)) { - dn->dn_dirtyctx = - (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN); - ASSERT(dn->dn_dirtyctx_firstset == NULL); - dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP); + if (dn->dn_dirtyctx == DN_UNDIRTIED) { + if (dn->dn_objset->os_dsl_dataset != NULL) { + rrw_enter(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, + RW_READER, FTAG); + } + if (!BP_IS_HOLE(dn->dn_objset->os_rootbp)) { + dn->dn_dirtyctx = (dmu_tx_is_syncing(tx) ? + DN_DIRTY_SYNC : DN_DIRTY_OPEN); + ASSERT(dn->dn_dirtyctx_firstset == NULL); + dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP); + } + if (dn->dn_objset->os_dsl_dataset != NULL) { + rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, + FTAG); + } } mutex_exit(&dn->dn_mtx); @@ -1661,8 +1678,14 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * this assertion only if we're not already dirty. */ os = dn->dn_objset; +#ifdef DEBUG + if (dn->dn_objset->os_dsl_dataset != NULL) + rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_READER, FTAG); ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) || os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp)); + if (dn->dn_objset->os_dsl_dataset != NULL) + rrw_exit(&os->os_dsl_dataset->ds_bp_rwlock, FTAG); +#endif ASSERT(db->db.db_size != 0); dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 47e1b2d089..f09cd9e3c0 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -545,8 +545,10 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) mutex_enter(&ds->ds_opening_lock); if (ds->ds_objset == NULL) { objset_t *os; + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), ds, dsl_dataset_get_blkptr(ds), &os); + rrw_exit(&ds->ds_bp_rwlock, FTAG); if (err == 0) { mutex_enter(&ds->ds_lock); @@ -945,9 +947,11 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) doca->doca_cred, tx); VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); bp = dsl_dataset_get_blkptr(ds); os = dmu_objset_create_impl(pdd->dd_pool->dp_spa, ds, bp, doca->doca_type, tx); + rrw_exit(&ds->ds_bp_rwlock, FTAG); if (doca->doca_userfunc != NULL) { doca->doca_userfunc(os, doca->doca_userarg, @@ -1179,7 +1183,6 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; ASSERT(!BP_IS_EMBEDDED(bp)); - ASSERT3P(bp, ==, os->os_rootbp); ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); ASSERT0(BP_GET_LEVEL(bp)); @@ -1192,6 +1195,11 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) bp->blk_fill = 0; for (i = 0; i < dnp->dn_nblkptr; i++) bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]); + if (os->os_dsl_dataset != NULL) + rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG); + *os->os_rootbp = *bp; + if (os->os_dsl_dataset != NULL) + rrw_exit(&os->os_dsl_dataset->ds_bp_rwlock, FTAG); } /* ARGSUSED */ @@ -1211,6 +1219,7 @@ dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg) (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE); dsl_dataset_block_born(ds, bp, tx); } + kmem_free(bp, sizeof (*bp)); } /* called from dsl */ @@ -1224,6 +1233,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) list_t *list; list_t *newlist = NULL; dbuf_dirty_record_t *dr; + blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP); + *blkptr_copy = *os->os_rootbp; dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); @@ -1251,7 +1262,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) dmu_write_policy(os, NULL, 0, 0, ZIO_COMPRESS_INHERIT, &zp); zio = arc_write(pio, os->os_spa, tx->tx_txg, - os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), + blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index e31f6e3c04..6e117651ba 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -1613,10 +1613,12 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) * If we actually created a non-clone, we need to create the * objset in our new dataset. */ + rrw_enter(&newds->ds_bp_rwlock, RW_READER, FTAG); if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) { (void) dmu_objset_create_impl(dp->dp_spa, newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx); } + rrw_exit(&newds->ds_bp_rwlock, FTAG); drba->drba_cookie->drc_ds = newds; @@ -1759,7 +1761,9 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT; + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds))); + rrw_exit(&ds->ds_bp_rwlock, FTAG); drba->drba_cookie->drc_ds = ds; diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index fb6feeec9d..b6afb33a48 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -319,6 +319,7 @@ dsl_dataset_evict_async(void *dbu) mutex_destroy(&ds->ds_opening_lock); mutex_destroy(&ds->ds_sendstream_lock); refcount_destroy(&ds->ds_longholds); + rrw_destroy(&ds->ds_bp_rwlock); kmem_free(ds, sizeof (dsl_dataset_t)); } @@ -455,6 +456,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); + rrw_init(&ds->ds_bp_rwlock, B_FALSE); refcount_create(&ds->ds_longholds); bplist_create(&ds->ds_pending_deadlist); @@ -862,7 +864,9 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, dsl_dataset_phys(origin)->ds_compressed_bytes; dsphys->ds_uncompressed_bytes = dsl_dataset_phys(origin)->ds_uncompressed_bytes; + rrw_enter(&origin->ds_bp_rwlock, RW_READER, FTAG); dsphys->ds_bp = dsl_dataset_phys(origin)->ds_bp; + rrw_exit(&origin->ds_bp_rwlock, FTAG); /* * Inherit flags that describe the dataset's contents @@ -1384,7 +1388,9 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, dsphys->ds_uncompressed_bytes = dsl_dataset_phys(ds)->ds_uncompressed_bytes; dsphys->ds_flags = dsl_dataset_phys(ds)->ds_flags; + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp; + rrw_exit(&ds->ds_bp_rwlock, FTAG); dmu_buf_rele(dbuf, FTAG); for (f = 0; f < SPA_FEATURES; f++) { @@ -1980,18 +1986,25 @@ dsl_dataset_space(dsl_dataset_t *ds, else *availbytesp = 0; } + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); *usedobjsp = BP_GET_FILL(&dsl_dataset_phys(ds)->ds_bp); + rrw_exit(&ds->ds_bp_rwlock, FTAG); *availobjsp = DN_MAX_OBJECT - *usedobjsp; } boolean_t dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap) { - ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); + ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool); + uint64_t birth; + + ASSERT(dsl_pool_config_held(dp)); if (snap == NULL) return (B_FALSE); - if (dsl_dataset_phys(ds)->ds_bp.blk_birth > - dsl_dataset_phys(snap)->ds_creation_txg) { + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); + birth = dsl_dataset_get_blkptr(ds)->blk_birth; + rrw_exit(&ds->ds_bp_rwlock, FTAG); + if (birth > dsl_dataset_phys(snap)->ds_creation_txg) { objset_t *os, *os_snap; /* * It may be that only the ZIL differs, because it was @@ -2948,6 +2961,7 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, spa_feature_t f; dsl_pool_t *dp = dmu_tx_pool(tx); int64_t unused_refres_delta; + blkptr_t tmp; ASSERT(clone->ds_reserved == 0); /* @@ -3030,11 +3044,14 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, /* swap blkptrs */ { - blkptr_t tmp; + rrw_enter(&clone->ds_bp_rwlock, RW_WRITER, FTAG); + rrw_enter(&origin_head->ds_bp_rwlock, RW_WRITER, FTAG); tmp = dsl_dataset_phys(origin_head)->ds_bp; dsl_dataset_phys(origin_head)->ds_bp = dsl_dataset_phys(clone)->ds_bp; dsl_dataset_phys(clone)->ds_bp = tmp; + rrw_exit(&origin_head->ds_bp_rwlock, FTAG); + rrw_exit(&clone->ds_bp_rwlock, FTAG); } /* set dd_*_bytes */ diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index 716081ba3a..d980f7d1fd 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -255,7 +255,9 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); + rrw_exit(&ds->ds_bp_rwlock, FTAG); ASSERT(refcount_is_zero(&ds->ds_longholds)); if (defer && @@ -728,7 +730,9 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); ASSERT(ds->ds_prev == NULL || dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object); + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); + rrw_exit(&ds->ds_bp_rwlock, FTAG); ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); /* We need to log before removing it from the namespace. */ @@ -819,10 +823,12 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || dsl_dataset_phys(ds)->ds_unique_bytes == used); + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); bptree_add(mos, dp->dp_bptree_obj, &dsl_dataset_phys(ds)->ds_bp, dsl_dataset_phys(ds)->ds_prev_snap_txg, used, comp, uncomp, tx); + rrw_exit(&ds->ds_bp_rwlock, FTAG); dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, -used, -comp, -uncomp, tx); dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index 1a62fba2c6..2ff3ae4568 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. */ @@ -402,8 +402,10 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) /* create the root objset */ VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); VERIFY(NULL != (os = dmu_objset_create_impl(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx))); + rrw_exit(&ds->ds_bp_rwlock, FTAG); #ifdef _KERNEL zfs_create_fs(os, kcred, zplprops, tx); #endif @@ -718,7 +720,9 @@ upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) * The $ORIGIN can't have any data, or the accounting * will be wrong. */ + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); ASSERT0(dsl_dataset_phys(prev)->ds_bp.blk_birth); + rrw_exit(&ds->ds_bp_rwlock, FTAG); /* The origin doesn't get attached to itself */ if (ds->ds_object == prev->ds_object) { diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index c097a35ac1..f5ef2268d2 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -1142,7 +1142,9 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) * Iterate over the bps in this ds. */ dmu_buf_will_dirty(ds->ds_dbuf, tx); + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); dsl_scan_visit_rootbp(scn, ds, &dsl_dataset_phys(ds)->ds_bp, tx); + rrw_exit(&ds->ds_bp_rwlock, FTAG); dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); dsl_dataset_name(ds, dsname);