From 905edb405da278ccb019c656408f82796a344510 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Sat, 11 Jul 2015 02:19:41 +0200 Subject: [PATCH] Illumos 5347 - idle pool may run itself out of space 5347 idle pool may run itself out of space Reviewed by: Alex Reece Reviewed by: George Wilson Reviewed by: Steven Hartland Reviewed by: Richard Elling Approved by: Dan McDonald References: https://github.com/illumos/illumos-gate/commit/231aab8 https://github.com/illumos/illumos-gate/commit/4a92375 3642 https://www.illumos.org/issues/5347 https://github.com/zfsonlinux/zfs/commit/89b1cd6 (partial commit & fix) https://github.com/zfsonlinux/zfs/commit/fbeddd6 Illumos 4390 https://github.com/zfsonlinux/zfs/commit/2696dfa Illumos 3642, 3643 Porting notes: This is completing the partial fix from FreeBSD Ported-by: kernelOfTruth kerneloftruth@gmail.com Signed-off-by: Brian Behlendorf Closes #3586 --- include/sys/uberblock.h | 7 ++++-- module/zfs/dsl_scan.c | 10 +++++--- module/zfs/spa.c | 51 ++++++++++++++++++++++++++++------------- module/zfs/uberblock.c | 8 +++---- 4 files changed, 51 insertions(+), 25 deletions(-) diff --git a/include/sys/uberblock.h b/include/sys/uberblock.h index b5bb915731..21e7ae0de7 100644 --- a/include/sys/uberblock.h +++ b/include/sys/uberblock.h @@ -22,6 +22,9 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2014 by Delphix. All rights reserved. + */ #ifndef _SYS_UBERBLOCK_H #define _SYS_UBERBLOCK_H @@ -36,8 +39,8 @@ extern "C" { typedef struct uberblock uberblock_t; -extern int uberblock_verify(uberblock_t *ub); -extern int uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg); +extern int uberblock_verify(uberblock_t *); +extern boolean_t uberblock_update(uberblock_t *, vdev_t *, uint64_t); #ifdef __cplusplus } diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 0489359710..b989e76338 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -1528,11 +1528,15 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) dp->dp_bptree_obj, tx)); dp->dp_bptree_obj = 0; scn->scn_async_destroying = B_FALSE; + scn->scn_async_stalled = B_FALSE; } else { /* - * If we didn't make progress, mark the async destroy as - * stalled, so that we will not initiate a spa_sync() on - * its behalf. + * If we didn't make progress, mark the async + * destroy as stalled, so that we will not initiate + * a spa_sync() on its behalf. Note that we only + * check this if we are not finished, because if the + * bptree had no blocks for us to visit, we can + * finish without "making progress". */ scn->scn_async_stalled = (scn->scn_visited_this_txg == 0); diff --git a/module/zfs/spa.c b/module/zfs/spa.c index b5e024c3f2..d8eaf9979f 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -6393,21 +6393,6 @@ spa_sync(spa_t *spa, uint64_t txg) } } - /* - * If anything has changed in this txg, or if someone is waiting - * for this txg to sync (eg, spa_vdev_remove()), push the - * deferred frees from the previous txg. If not, leave them - * alone so that we don't generate work on an otherwise idle - * system. - */ - if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || - !txg_list_empty(&dp->dp_dirty_dirs, txg) || - !txg_list_empty(&dp->dp_sync_tasks, txg) || - ((dsl_scan_active(dp->dp_scan) || - txg_sync_waiting(dp)) && !spa_shutting_down(spa))) { - spa_sync_deferred_frees(spa, tx); - } - /* * Iterate to convergence. */ @@ -6425,6 +6410,11 @@ spa_sync(spa_t *spa, uint64_t txg) if (pass < zfs_sync_pass_deferred_free) { spa_sync_frees(spa, free_bpl, tx); } else { + /* + * We can not defer frees in pass 1, because + * we sync the deferred frees later in pass 1. + */ + ASSERT3U(pass, >, 1); bplist_iterate(free_bpl, bpobj_enqueue_cb, &spa->spa_deferred_bpobj, tx); } @@ -6435,8 +6425,37 @@ spa_sync(spa_t *spa, uint64_t txg) while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, txg))) vdev_sync(vd, txg); - if (pass == 1) + if (pass == 1) { spa_sync_upgrades(spa, tx); + ASSERT3U(txg, >=, + spa->spa_uberblock.ub_rootbp.blk_birth); + /* + * Note: We need to check if the MOS is dirty + * because we could have marked the MOS dirty + * without updating the uberblock (e.g. if we + * have sync tasks but no dirty user data). We + * need to check the uberblock's rootbp because + * it is updated if we have synced out dirty + * data (though in this case the MOS will most + * likely also be dirty due to second order + * effects, we don't want to rely on that here). + */ + if (spa->spa_uberblock.ub_rootbp.blk_birth < txg && + !dmu_objset_is_dirty(mos, txg)) { + /* + * Nothing changed on the first pass, + * therefore this TXG is a no-op. Avoid + * syncing deferred frees, so that we + * can keep this TXG as a no-op. + */ + ASSERT(txg_list_empty(&dp->dp_dirty_datasets, + txg)); + ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); + ASSERT(txg_list_empty(&dp->dp_sync_tasks, txg)); + break; + } + spa_sync_deferred_frees(spa, tx); + } } while (dmu_objset_is_dirty(mos, txg)); diff --git a/module/zfs/uberblock.c b/module/zfs/uberblock.c index a07dc00ae1..f8bdecdf57 100644 --- a/module/zfs/uberblock.c +++ b/module/zfs/uberblock.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, 2014 by Delphix. All rights reserved. */ #include @@ -40,10 +40,10 @@ uberblock_verify(uberblock_t *ub) } /* - * Update the uberblock and return a boolean value indicating whether - * anything changed in this transaction group. + * Update the uberblock and return TRUE if anything changed in this + * transaction group. */ -int +boolean_t uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg) { ASSERT(ub->ub_txg < txg);