From 65282ee9e06b130f1f0169baf5d9bf0dd8fc1ef9 Mon Sep 17 00:00:00 2001 From: Alek P Date: Tue, 12 Feb 2019 12:01:08 -0800 Subject: [PATCH] Freeing throttle should account for holes Deletion throttle currently does not account for holes in a file. This means that it can activate when it shouldn't. To fix it we switch the throttle to be based on the number of L1 blocks we will have to dirty when freeing Reviewed by: Tom Caputi Reviewed by: Matt Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Alek Pinchuk Closes #7725 Closes #7888 --- man/man5/zfs-module-parameters.5 | 12 +++++----- module/zfs/dmu.c | 41 ++++++++++++++++++++++++-------- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index f6b0431830..dc9fcc6b42 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -1,7 +1,7 @@ '\" te .\" Copyright (c) 2013 by Turbo Fredriksson . All rights reserved. -.\" Copyright (c) 2017 Datto Inc. .\" Copyright (c) 2018 by Delphix. All rights reserved. +.\" Copyright (c) 2019 Datto Inc. .\" The contents of this file are subject to the terms of the Common Development .\" and Distribution License (the "License"). You may not use this file except .\" in compliance with the License. You can obtain a copy of the license at @@ -14,7 +14,7 @@ .\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your .\" own identifying information: .\" Portions Copyright [yyyy] [name of copyright owner] -.TH ZFS-MODULE-PARAMETERS 5 "Oct 28, 2017" +.TH ZFS-MODULE-PARAMETERS 5 "Feb 8, 2019" .SH NAME zfs\-module\-parameters \- ZFS module parameters .SH DESCRIPTION @@ -1970,12 +1970,12 @@ Default value: \fB52,428,800\fR. \fBzfs_per_txg_dirty_frees_percent \fR (ulong) .ad .RS 12n -Tunable to control percentage of dirtied blocks from frees in one TXG. -After this threshold is crossed, additional dirty blocks from frees -wait until the next TXG. +Tunable to control percentage of dirtied indirect blocks from frees allowed +into one TXG. After this threshold is crossed, additional frees will wait until +the next TXG. A value of zero will disable this throttle. .sp -Default value: \fB30\fR and \fB0\fR to disable. +Default value: \fB5\fR, set to \fB0\fR to disable. .RE .sp diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 231ed3053e..36fc8815cd 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -25,6 +25,7 @@ * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2016, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2015 by Chunwei Chen. All rights reserved. + * Copyright (c) 2019 Datto Inc. */ #include @@ -61,12 +62,12 @@ int zfs_nopwrite_enabled = 1; /* - * Tunable to control percentage of dirtied blocks from frees in one TXG. - * After this threshold is crossed, additional dirty blocks from frees - * wait until the next TXG. + * Tunable to control percentage of dirtied L1 blocks from frees allowed into + * one TXG. After this threshold is crossed, additional dirty blocks from frees + * will wait until the next TXG. * A value of zero will disable this throttle. */ -unsigned long zfs_per_txg_dirty_frees_percent = 30; +unsigned long zfs_per_txg_dirty_frees_percent = 5; /* * Enable/disable forcing txg sync when dirty in dmu_offset_next. @@ -709,11 +710,13 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, * * On input, *start should be the first offset that does not need to be * freed (e.g. "offset + length"). On return, *start will be the first - * offset that should be freed. + * offset that should be freed and l1blks is set to the number of level 1 + * indirect blocks found within the chunk. */ static int -get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum) +get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum, uint64_t *l1blks) { + uint64_t blks; uint64_t maxblks = DMU_MAX_ACCESS >> (dn->dn_indblkshift + 1); /* bytes of data covered by a level-1 indirect block */ uint64_t iblkrange = @@ -723,11 +726,16 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum) if (*start - minimum <= iblkrange * maxblks) { *start = minimum; + /* + * Assume full L1 blocks and 128k recordsize to approximate the + * expected number of L1 blocks in this chunk + */ + *l1blks = minimum / (1024 * 128 * 1024); return (0); } ASSERT(ISP2(iblkrange)); - for (uint64_t blks = 0; *start > minimum && blks < maxblks; blks++) { + for (blks = 0; *start > minimum && blks < maxblks; blks++) { int err; /* @@ -745,6 +753,7 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum) *start = minimum; break; } else if (err != 0) { + *l1blks = blks; return (err); } @@ -753,6 +762,7 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum) } if (*start < minimum) *start = minimum; + *l1blks = blks; return (0); } @@ -792,7 +802,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, dirty_frees_threshold = zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100; else - dirty_frees_threshold = zfs_dirty_data_max / 4; + dirty_frees_threshold = zfs_dirty_data_max / 20; if (length == DMU_OBJECT_END || offset + length > object_size) length = object_size - offset; @@ -800,6 +810,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, while (length != 0) { uint64_t chunk_end, chunk_begin, chunk_len; uint64_t long_free_dirty_all_txgs = 0; + uint64_t l1blks; dmu_tx_t *tx; if (dmu_objset_zfs_unmounting(dn->dn_objset)) @@ -808,7 +819,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, chunk_end = chunk_begin = offset + length; /* move chunk_begin backwards to the beginning of this chunk */ - err = get_next_chunk(dn, &chunk_begin, offset); + err = get_next_chunk(dn, &chunk_begin, offset, &l1blks); if (err) return (err); ASSERT3U(chunk_begin, >=, offset); @@ -849,9 +860,19 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, return (err); } + /* + * In order to prevent unnecessary write throttling, for each + * TXG, we track the cumulative size of L1 blocks being dirtied + * in dnode_free_range() below. We compare this number to a + * tunable threshold, past which we prevent new L1 dirty freeing + * blocks from being added into the open TXG. See + * dmu_free_long_range_impl() for details. The threshold + * prevents write throttle activation due to dirty freeing L1 + * blocks taking up a large percentage of zfs_dirty_data_max. + */ mutex_enter(&dp->dp_lock); dp->dp_long_free_dirty_pertxg[dmu_tx_get_txg(tx) & TXG_MASK] += - chunk_len; + l1blks << dn->dn_indblkshift; mutex_exit(&dp->dp_lock); DTRACE_PROBE3(free__long__range, uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,