Default zfs_max_recordsize to 16M

Increase the default allowed maximum recordsize from 1M to 16M.
As described in the zfs(4) man page, there are significant costs
which need to be considered before using very large blocks.
However, there are scenarios where they make good sense and
it should no longer be necessary to artificially restrict their
use behind a module option.

Note that for 32-bit platforms we continue to leave this
restriction in place due to the limited virtual address space
available (256-512MB).  On these systems only a handful
of blocks could be cached at any one time severely impacting
performance and potentially stability.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rich Ercolani <rincebrain@gmail.com>
Closes #12830
Closes #13302
This commit is contained in:
Rich Ercolani 2022-04-28 18:12:24 -04:00 committed by GitHub
parent 63b18e4097
commit f2330bd156
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 20 additions and 28 deletions

View File

@ -1469,15 +1469,15 @@ feature uses to estimate incoming log blocks.
.It Sy zfs_max_logsm_summary_length Ns = Ns Sy 10 Pq ulong .It Sy zfs_max_logsm_summary_length Ns = Ns Sy 10 Pq ulong
Maximum number of rows allowed in the summary of the spacemap log. Maximum number of rows allowed in the summary of the spacemap log.
. .
.It Sy zfs_max_recordsize Ns = Ns Sy 1048576 Po 1MB Pc Pq int .It Sy zfs_max_recordsize Ns = Ns Sy 16777216 Po 16MB Pc Pq int
We currently support block sizes from We currently support block sizes from
.Em 512B No to Em 16MB . .Em 512B No to Em 16MB .
The benefits of larger blocks, and thus larger I/O, The benefits of larger blocks, and thus larger I/O,
need to be weighed against the cost of COWing a giant block to modify one byte. need to be weighed against the cost of COWing a giant block to modify one byte.
Additionally, very large blocks can have an impact on I/O latency, Additionally, very large blocks can have an impact on I/O latency,
and also potentially on the memory allocator. and also potentially on the memory allocator.
Therefore, we do not allow the recordsize to be set larger than this tunable. Therefore, we formerly forbade creating blocks larger than 1M.
Larger blocks can be created by changing it, Larger blocks could be created by changing it,
and pools with larger blocks can always be imported and used, and pools with larger blocks can always be imported and used,
regardless of this setting. regardless of this setting.
. .

View File

@ -73,12 +73,19 @@
* The SPA supports block sizes up to 16MB. However, very large blocks * The SPA supports block sizes up to 16MB. However, very large blocks
* can have an impact on i/o latency (e.g. tying up a spinning disk for * can have an impact on i/o latency (e.g. tying up a spinning disk for
* ~300ms), and also potentially on the memory allocator. Therefore, * ~300ms), and also potentially on the memory allocator. Therefore,
* we do not allow the recordsize to be set larger than zfs_max_recordsize * we did not allow the recordsize to be set larger than zfs_max_recordsize
* (default 1MB). Larger blocks can be created by changing this tunable, * (former default: 1MB). Larger blocks could be created by changing this
* and pools with larger blocks can always be imported and used, regardless * tunable, and pools with larger blocks could always be imported and used,
* of this setting. * regardless of this setting.
*
* We do, however, still limit it by default to 1M on x86_32, because Linux's
* 3/1 memory split doesn't leave much room for 16M chunks.
*/ */
#ifdef _ILP32
int zfs_max_recordsize = 1 * 1024 * 1024; int zfs_max_recordsize = 1 * 1024 * 1024;
#else
int zfs_max_recordsize = 16 * 1024 * 1024;
#endif
static int zfs_allow_redacted_dataset_mount = 0; static int zfs_allow_redacted_dataset_mount = 0;
#define SWITCH64(x, y) \ #define SWITCH64(x, y) \
@ -4964,13 +4971,7 @@ dsl_dataset_oldest_snapshot(spa_t *spa, uint64_t head_ds, uint64_t min_txg,
return (0); return (0);
} }
#if defined(_LP64) ZFS_MODULE_PARAM(zfs, zfs_, max_recordsize, INT, ZMOD_RW,
#define RECORDSIZE_PERM ZMOD_RW
#else
/* Limited to 1M on 32-bit platforms due to lack of virtual address space */
#define RECORDSIZE_PERM ZMOD_RD
#endif
ZFS_MODULE_PARAM(zfs, zfs_, max_recordsize, INT, RECORDSIZE_PERM,
"Max allowed record size"); "Max allowed record size");
ZFS_MODULE_PARAM(zfs, zfs_, allow_redacted_dataset_mount, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, allow_redacted_dataset_mount, INT, ZMOD_RW,

View File

@ -166,15 +166,6 @@ zio_init(void)
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ? cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
KMC_NODEBUG : 0; KMC_NODEBUG : 0;
#if defined(_ILP32) && defined(_KERNEL)
/*
* Cache size limited to 1M on 32-bit platforms until ARC
* buffers no longer require virtual address space.
*/
if (size > zfs_max_recordsize)
break;
#endif
while (!ISP2(p2)) while (!ISP2(p2))
p2 &= p2 - 1; p2 &= p2 - 1;

View File

@ -35,7 +35,7 @@ log_must disk_setup
log_must zpool create $TESTPOOL raidz $ZPOOL_DISKS special mirror \ log_must zpool create $TESTPOOL raidz $ZPOOL_DISKS special mirror \
$CLASS_DISK0 $CLASS_DISK1 $CLASS_DISK0 $CLASS_DISK1
for value in 256 1025 2097152 for value in 256 1025 33554432
do do
log_mustnot zfs set special_small_blocks=$value $TESTPOOL log_mustnot zfs set special_small_blocks=$value $TESTPOOL
done done

View File

@ -56,7 +56,7 @@ set -A args "ab" "-?" "-cV" "-Vc" "-c -V" "c" "V" "--c" "-e" "-s" \
"-blah" "-cV 12k" "-s -cV 1P" "-sc" "-Vs 5g" "-o" "--o" "-O" "--O" \ "-blah" "-cV 12k" "-s -cV 1P" "-sc" "-Vs 5g" "-o" "--o" "-O" "--O" \
"-o QuOta=none" "-o quota=non" "-o quota=abcd" "-o quota=0" "-o quota=" \ "-o QuOta=none" "-o quota=non" "-o quota=abcd" "-o quota=0" "-o quota=" \
"-o ResErVaTi0n=none" "-o reserV=none" "-o reservation=abcd" "-o reserv=" \ "-o ResErVaTi0n=none" "-o reserV=none" "-o reservation=abcd" "-o reserv=" \
"-o recorDSize=64k" "-o recordsize=2048K" "-o recordsize=2M" \ "-o recorDSize=64k" "-o recordsize=32768K" "-o recordsize=32M" \
"-o recordsize=256" "-o recsize=" "-o recsize=zero" "-o recordsize=0" \ "-o recordsize=256" "-o recsize=" "-o recsize=zero" "-o recordsize=0" \
"-o mountPoint=/tmp/tmpfile$$" "-o mountpoint=non0" "-o mountpoint=" \ "-o mountPoint=/tmp/tmpfile$$" "-o mountpoint=non0" "-o mountpoint=" \
"-o mountpoint=LEGACY" "-o mounpoint=none" \ "-o mountpoint=LEGACY" "-o mounpoint=none" \

View File

@ -78,7 +78,7 @@ while (( i < ${#dataset[@]} )); do
(( j += 1 )) (( j += 1 ))
done done
# Additional recordsize # Additional recordsize
set_n_check_prop "2048K" "recordsize" "${dataset[i]}" false set_n_check_prop "32768K" "recordsize" "${dataset[i]}" false
set_n_check_prop "128B" "recordsize" "${dataset[i]}" false set_n_check_prop "128B" "recordsize" "${dataset[i]}" false
(( i += 1 )) (( i += 1 ))
done done

View File

@ -52,7 +52,7 @@ log_onexit cleanup
set -A args "QuOta=none" "quota=non" "quota=abcd" "quota=0" "quota=" \ set -A args "QuOta=none" "quota=non" "quota=abcd" "quota=0" "quota=" \
"ResErVaTi0n=none" "reserV=none" "reservation=abcd" "reserv=" \ "ResErVaTi0n=none" "reserV=none" "reservation=abcd" "reserv=" \
"recorDSize=64k" "recordsize=2M" "recordsize=2048K" \ "recorDSize=64k" "recordsize=32M" "recordsize=32768K" \
"recordsize=256" "recsize=" "recsize=zero" "recordsize=0" \ "recordsize=256" "recsize=" "recsize=zero" "recordsize=0" \
"mountPoint=/tmp/tmpfile$$" "mountpoint=non0" "mountpoint=" \ "mountPoint=/tmp/tmpfile$$" "mountpoint=non0" "mountpoint=" \
"mountpoint=LEGACY" "mounpoint=none" \ "mountpoint=LEGACY" "mounpoint=none" \