From 89b1cd6581528c576bd4ff7f713f671b23b051b5 Mon Sep 17 00:00:00 2001 From: smh Date: Thu, 16 Oct 2014 02:23:27 +0000 Subject: [PATCH] Prevent ZFS leaking pool free space When processing async destroys ZFS would leak space every txg timeout (5 seconds by default), if no writes occurred, until the pool is totally full. At this point it would be unfixable without a pool recreation. In addition if the machine was rebooted with the pool in this situation would fail to import on boot, hanging indefinitely, as the import process requires the ability to write data to the pool. Any attempts to query the pool status during the hung import would not return as the import holds the pool lock. The only way to import such a pool would be to specify -o readonly=on to the zpool import. zdb -bb can be used to check for "deferred free" size which is where this lost space will be counted. References: https://github.com/freebsd/freebsd/commit/48431b7 http://svnweb.freebsd.org/base?view=revision&revision=273158 https://reviews.csiden.org/r/132/ Porting notes: This issue was filed as illumos 5347 and a more comprehensive fix is under review. Once that change is finalized it will be integrated, in the meanwhile the FreeBSD fix has been merged to prevent the issue. Ported by: Tim Chase Signed-off-by: Matthew Ahrens mahrens@delphix.com Signed-off-by: Brian Behlendorf Closes #2896 --- module/zfs/dsl_scan.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index eeec76f787..0e16002b37 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -1493,13 +1493,6 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) "traverse_dataset_destroyed()", err); } - /* - * If we didn't make progress, mark the async destroy as - * stalled, so that we will not initiate a spa_sync() on - * its behalf. - */ - scn->scn_async_stalled = (scn->scn_visited_this_txg == 0); - if (bptree_is_empty(dp->dp_meta_objset, dp->dp_bptree_obj)) { /* finished; deactivate async destroy feature */ spa_feature_decr(spa, SPA_FEATURE_ASYNC_DESTROY, tx); @@ -1512,6 +1505,14 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) dp->dp_bptree_obj, tx)); dp->dp_bptree_obj = 0; scn->scn_async_destroying = B_FALSE; + } else { + /* + * If we didn't make progress, mark the async destroy as + * stalled, so that we will not initiate a spa_sync() on + * its behalf. + */ + scn->scn_async_stalled = + (scn->scn_visited_this_txg == 0); } } if (scn->scn_visited_this_txg) {