From 2ecc2dfe42707d8569e30f3b6a4526a0a825d479 Mon Sep 17 00:00:00 2001 From: Rob N Date: Wed, 17 Jan 2024 09:01:17 +1100 Subject: [PATCH 01/45] Linux 6.7 compat: zfs_setattr fix atime update In db4fc559c I messed up and changed this bit of code to set the inode atime to an uninitialised value, when actually it was just supposed to loading the atime from the inode to be stored in the SA. This changes it to what it should have been. Ensure times change by the right amount Previously, we only checked if the times changed at all, which missed a bug where the atime was being set to an undefined value. Now ensure the times change by two seconds (or thereabouts), ensuring we catch cases where we set the time to something bonkers Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Sponsored-by: https://despairlabs.com/sponsor/ Closes #15762 Closes #15773 --- module/os/linux/zfs/zfs_vnops_os.c | 3 +-- tests/zfs-tests/cmd/ctime.c | 14 +++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 65d1d786ae..9ea8ad5f4a 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -2435,9 +2435,8 @@ top: if ((mask & ATTR_ATIME) || zp->z_atime_dirty) { zp->z_atime_dirty = B_FALSE; - inode_timespec_t tmp_atime; + inode_timespec_t tmp_atime = zpl_inode_get_atime(ip); ZFS_TIME_ENCODE(&tmp_atime, atime); - zpl_inode_set_atime_to_ts(ZTOI(zp), tmp_atime); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, sizeof (atime)); } diff --git a/tests/zfs-tests/cmd/ctime.c b/tests/zfs-tests/cmd/ctime.c index 0f5d81aea6..5ff1cea8a8 100644 --- a/tests/zfs-tests/cmd/ctime.c +++ b/tests/zfs-tests/cmd/ctime.c @@ -362,12 +362,20 @@ main(void) return (1); } - if (t1 == t2) { - (void) fprintf(stderr, "%s: t1(%ld) == t2(%ld)\n", + + /* + * Ideally, time change would be exactly two seconds, but allow + * a little slack in case of scheduling delays or similar. + */ + long delta = (long)t2 - (long)t1; + if (delta < 2 || delta > 4) { + (void) fprintf(stderr, + "%s: BAD time change: t1(%ld), t2(%ld)\n", timetest_table[i].name, (long)t1, (long)t2); return (1); } else { - (void) fprintf(stderr, "%s: t1(%ld) != t2(%ld)\n", + (void) fprintf(stderr, + "%s: good time change: t1(%ld), t2(%ld)\n", timetest_table[i].name, (long)t1, (long)t2); } } From 07cf973fe9e4b99b9c0a89038301fc9ad26f7e95 Mon Sep 17 00:00:00 2001 From: Kevin Jin <33590050+jxdking@users.noreply.github.com> Date: Wed, 17 Jan 2024 12:03:58 -0500 Subject: [PATCH 02/45] Autotrim High Load Average Fix Switch from cv_wait() to cv_wait_idle() in vdev_autotrim_wait_kick(), which should mitigate the high load average while waiting. Reviewed-by: Brian Atkinson Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: jxdking Closes #15781 --- module/zfs/vdev_trim.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/module/zfs/vdev_trim.c b/module/zfs/vdev_trim.c index 03e17db024..d96b75e5ed 100644 --- a/module/zfs/vdev_trim.c +++ b/module/zfs/vdev_trim.c @@ -194,7 +194,8 @@ vdev_autotrim_wait_kick(vdev_t *vd, int num_of_kick) for (int i = 0; i < num_of_kick; i++) { if (vd->vdev_autotrim_exit_wanted) break; - cv_wait(&vd->vdev_autotrim_kick_cv, &vd->vdev_autotrim_lock); + cv_wait_idle(&vd->vdev_autotrim_kick_cv, + &vd->vdev_autotrim_lock); } boolean_t exit_wanted = vd->vdev_autotrim_exit_wanted; mutex_exit(&vd->vdev_autotrim_lock); From 387f003be3052ee1ea53cef7fdbc0babd2392c68 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Thu, 7 Dec 2023 01:18:43 +0500 Subject: [PATCH 03/45] ZTS: block_cloning: Use numeric sort for get_same_blocks Reviewed-by: Kay Pedersen Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Ameer Hamza Closes #15614 --- .../tests/functional/block_cloning/block_cloning.kshlib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib b/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib index 526bd54a2b..50f3a3d262 100644 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib @@ -53,6 +53,6 @@ function get_same_blocks awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.a zdb $KEY -vvvvv $3 -O $4 | \ awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.b - echo $(sort $zdbout.a $zdbout.b | uniq -d | cut -f1 -d' ') + echo $(sort -n $zdbout.a $zdbout.b | uniq -d | cut -f1 -d' ') } From d8b0b6032b5f46bf26f7796db5659d68f96485c0 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Fri, 1 Dec 2023 01:14:56 +0500 Subject: [PATCH 04/45] ZTS: Add test cases for block cloning replay Reviewed-by: Kay Pedersen Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Ameer Hamza Closes #15614 --- tests/runfiles/linux.run | 3 +- tests/test-runner/bin/zts-report.py.in | 5 +- tests/zfs-tests/tests/Makefile.am | 2 + .../block_cloning/block_cloning_replay.ksh | 131 +++++++++++++++++ .../block_cloning_replay_encrypted.ksh | 133 ++++++++++++++++++ 5 files changed, 272 insertions(+), 2 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh create mode 100755 tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index fb78d96fb5..17ba233524 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -43,7 +43,8 @@ tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial', 'block_cloning_disabled_ficlonerange', 'block_cloning_copyfilerange_cross_dataset', 'block_cloning_cross_enc_dataset', - 'block_cloning_copyfilerange_fallback_same_txg'] + 'block_cloning_copyfilerange_fallback_same_txg', + 'block_cloning_replay', 'block_cloning_replay_encrypted'] tags = ['functional', 'block_cloning'] [tests/functional/chattr:Linux] diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index b188a101c2..3b5eeacb6b 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -301,6 +301,10 @@ elif sys.platform.startswith('linux'): ['SKIP', cfr_reason], 'block_cloning/block_cloning_copyfilerange_fallback': ['SKIP', cfr_reason], + 'block_cloning/block_cloning_replay': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_replay_encrypted': + ['SKIP', cfr_reason], 'block_cloning/block_cloning_copyfilerange_cross_dataset': ['SKIP', cfr_cross_reason], 'block_cloning/block_cloning_copyfilerange_fallback_same_txg': @@ -309,7 +313,6 @@ elif sys.platform.startswith('linux'): ['SKIP', cfr_cross_reason], }) - # Not all Github actions runners have scsi_debug module, so we may skip # some tests which use it. if os.environ.get('CI') == 'true': diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 21b830126b..88573a15ed 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -452,6 +452,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/block_cloning/block_cloning_ficlonerange.ksh \ functional/block_cloning/block_cloning_ficlonerange_partial.ksh \ functional/block_cloning/block_cloning_cross_enc_dataset.ksh \ + functional/block_cloning/block_cloning_replay.ksh \ + functional/block_cloning/block_cloning_replay_encrypted.ksh \ functional/bootfs/bootfs_001_pos.ksh \ functional/bootfs/bootfs_002_neg.ksh \ functional/bootfs/bootfs_003_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh new file mode 100755 index 0000000000..1fdf379ed2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh @@ -0,0 +1,131 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# Verify slogs are replayed correctly for cloned files. This +# test is ported from slog_replay tests for block cloning. +# +# STRATEGY: +# 1. Create an empty file system (TESTFS) +# 2. Create regular files and sync +# 3. Freeze TESTFS +# 4. Clone the file +# 5. Unmount filesystem +# +# 6. Remount TESTFS +# 7. Compare clone file with the original file +# + +verify_runnable "global" + +if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" +fi + +export VDIR=$TEST_BASE_DIR/disk-bclone +export VDEV="$VDIR/a $VDIR/b $VDIR/c" +export LDEV="$VDIR/e $VDIR/f" +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must truncate -s $MINVDEVSIZE $VDEV $LDEV + +claim="The slogs are replayed correctly for cloned files." + +log_assert $claim + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + rm -rf $TESTDIR $VDIR $VDIR2 +} + +log_onexit cleanup + +# +# 1. Create an empty file system (TESTFS) +# +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV \ + log mirror $LDEV +log_must zfs create $TESTPOOL/$TESTFS + +# +# 2. TX_WRITE: Create two files and sync txg +# +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 \ + oflag=sync bs=128k count=4 +log_must zfs set recordsize=16K $TESTPOOL/$TESTFS +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file2 \ + oflag=sync bs=16K count=2048 +sync_pool $TESTPOOL + +# +# 3. Checkpoint for ZIL Replay +# +log_must zpool freeze $TESTPOOL + +# +# 4. TX_CLONE_RANGE: Clone the file +# +log_must clonefile -c /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 +log_must clonefile -c /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 + +# +# 5. Unmount filesystem and export the pool +# +# At this stage TESTFS is frozen, the intent log contains a complete set +# of deltas to replay for clone files. +# +log_must zfs unmount /$TESTPOOL/$TESTFS + +log_note "Verify transactions to replay:" +log_must zdb -iv $TESTPOOL/$TESTFS + +log_must zpool export $TESTPOOL + +# +# 6. Remount TESTFS +# +# Import the pool to unfreeze it and claim log blocks. It has to be +# `zpool import -f` because we can't write a frozen pool's labels! +# +log_must zpool import -f -d $VDIR $TESTPOOL + +# +# 7. Compare clone file with the original file +# +log_must have_same_content /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 +log_must have_same_content /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 + +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 \ + $TESTPOOL/$TESTFS clone1) +log_must [ "$blocks" = "0 1 2 3" ] + +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file2 \ + $TESTPOOL/$TESTFS clone2) +log_must [ "$blocks" = "$(seq -s " " 0 2047)" ] + +log_pass $claim diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh new file mode 100755 index 0000000000..f9f687c83e --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh @@ -0,0 +1,133 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# Verify slogs are replayed correctly for encrypted cloned files. +# This test is ported from slog_replay tests for block cloning. +# +# STRATEGY: +# 1. Create an encrypted file system (TESTFS) +# 2. Create regular files and sync +# 3. Freeze TESTFS +# 4. Clone the file +# 5. Unmount filesystem +# +# 6. Remount encrypted TESTFS +# 7. Compare clone file with the original file +# + +verify_runnable "global" + +if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" +fi + +export VDIR=$TEST_BASE_DIR/disk-bclone +export VDEV="$VDIR/a $VDIR/b $VDIR/c" +export LDEV="$VDIR/e $VDIR/f" +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must truncate -s $MINVDEVSIZE $VDEV $LDEV +export PASSPHRASE="password" + +claim="The slogs are replayed correctly for encrypted cloned files." + +log_assert $claim + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + rm -rf $TESTDIR $VDIR $VDIR2 +} + +log_onexit cleanup + +# +# 1. Create an encrypted file system (TESTFS) +# +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV \ + log mirror $LDEV +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS" + +# +# 2. TX_WRITE: Create two files and sync txg +# +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 \ + oflag=sync bs=128k count=4 +log_must zfs set recordsize=16K $TESTPOOL/$TESTFS +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file2 \ + oflag=sync bs=16K count=2048 +sync_pool $TESTPOOL + +# +# 3. Checkpoint for ZIL Replay +# +log_must zpool freeze $TESTPOOL + +# +# 4. TX_CLONE_RANGE: Clone the file +# +log_must clonefile -c /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 +log_must clonefile -c /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 + +# +# 5. Unmount filesystem and export the pool +# +# At this stage TESTFS is frozen, the intent log contains a complete set +# of deltas to replay for clone files. +# +log_must zfs unmount /$TESTPOOL/$TESTFS + +log_note "Verify transactions to replay:" +log_must zdb -iv $TESTPOOL/$TESTFS + +log_must zpool export $TESTPOOL + +# +# 6. Remount TESTFS +# +# Import the pool to unfreeze it and claim log blocks. It has to be +# `zpool import -f` because we can't write a frozen pool's labels! +# +log_must eval "echo $PASSPHRASE | zpool import -l -f -d $VDIR $TESTPOOL" + +# +# 7. Compare clone file with the original file +# +log_must have_same_content /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 +log_must have_same_content /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 + +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 \ + $TESTPOOL/$TESTFS clone1 $PASSPHRASE) +log_must [ "$blocks" = "0 1 2 3" ] + +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file2 \ + $TESTPOOL/$TESTFS clone2 $PASSPHRASE) +log_must [ "$blocks" = "$(seq -s " " 0 2047)" ] + +log_pass $claim From f94a77951dd3a1861cb39a4a386c5677d68f25a0 Mon Sep 17 00:00:00 2001 From: Umer Saleem Date: Sat, 16 Dec 2023 03:18:27 +0500 Subject: [PATCH 05/45] Test LWB buffer overflow for block cloning PR#15634 removes 128K into 2x68K LWB split optimization, since it was found to cause LWB buffer overflow while trying to write 128KB TX_CLONE_RANGE record with 1022 block pointers into 68KB buffer, with multiple VDEVs ZIL. This commit adds a test for this particular scenario by writing maximum sizes TX_CLONE_RANE record with 1022 block pointers into 68KB buffer, with two SLOG devices. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Reviewed-by: Ameer Hamza Signed-off-by: Umer Saleem Closes #15672 --- tests/runfiles/linux.run | 3 +- tests/test-runner/bin/zts-report.py.in | 2 + tests/zfs-tests/tests/Makefile.am | 1 + .../block_cloning_lwb_buffer_overflow.ksh | 89 +++++++++++++++++++ 4 files changed, 94 insertions(+), 1 deletion(-) create mode 100755 tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 17ba233524..c7c17f2717 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -44,7 +44,8 @@ tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial', 'block_cloning_copyfilerange_cross_dataset', 'block_cloning_cross_enc_dataset', 'block_cloning_copyfilerange_fallback_same_txg', - 'block_cloning_replay', 'block_cloning_replay_encrypted'] + 'block_cloning_replay', 'block_cloning_replay_encrypted', + 'block_cloning_lwb_buffer_overflow'] tags = ['functional', 'block_cloning'] [tests/functional/chattr:Linux] diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index 3b5eeacb6b..708b7be917 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -305,6 +305,8 @@ elif sys.platform.startswith('linux'): ['SKIP', cfr_reason], 'block_cloning/block_cloning_replay_encrypted': ['SKIP', cfr_reason], + 'block_cloning/block_cloning_lwb_buffer_overflow': + ['SKIP', cfr_reason], 'block_cloning/block_cloning_copyfilerange_cross_dataset': ['SKIP', cfr_cross_reason], 'block_cloning/block_cloning_copyfilerange_fallback_same_txg': diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 88573a15ed..7f5af6530e 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -454,6 +454,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/block_cloning/block_cloning_cross_enc_dataset.ksh \ functional/block_cloning/block_cloning_replay.ksh \ functional/block_cloning/block_cloning_replay_encrypted.ksh \ + functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh \ functional/bootfs/bootfs_001_pos.ksh \ functional/bootfs/bootfs_002_neg.ksh \ functional/bootfs/bootfs_003_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh new file mode 100755 index 0000000000..0ae76b7e54 --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh @@ -0,0 +1,89 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by iXsystems, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# Test for LWB buffer overflow with multiple VDEVs ZIL when 128KB +# block write is split into two 68KB ones, trying to write maximum +# sizes 128KB TX_CLONE_RANGE record with 1022 block pointers into +# 68KB buffer. +# +# STRATEGY: +# 1. Create a pool with multiple VDEVs ZIL +# 2. Write maximum sizes TX_CLONE_RANGE record with 1022 block +# pointers into 68KB buffer +# 3. Sync TXG +# 4. Clone the file +# 5. Synchronize cached writes +# + +verify_runnable "global" + +if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" +fi + +VDIR=$TEST_BASE_DIR/disk-bclone +VDEV="$VDIR/a $VDIR/b $VDIR/c" +LDEV="$VDIR/e $VDIR/f" + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + rm -rf $VDIR +} + +log_onexit cleanup + +log_assert "Test for LWB buffer overflow with multiple VDEVs ZIL" + +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must truncate -s $MINVDEVSIZE $VDEV $LDEV + +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV \ + log mirror $LDEV +log_must zfs create -o recordsize=32K $TESTPOOL/$TESTFS +# Each ZIL log entry can fit 130816 bytes for a block cloning operation, +# so it can store 1022 block pointers. When LWB optimization is enabled, +# an assert is hit when 128KB block write is split into two 68KB ones +# for 2 SLOG devices +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 bs=32K count=1022 \ + conv=fsync +sync_pool $TESTPOOL +log_must clonefile -c /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2 +log_must sync + +sync_pool $TESTPOOL +log_must have_same_content /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2 +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 $TESTPOOL/$TESTFS file2) +log_must [ "$blocks" = "$(seq -s " " 0 1021)" ] + +log_pass "LWB buffer overflow is not triggered with multiple VDEVs ZIL" + From c16d103422806ed503cc6186fa098b1e8ee10c79 Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Tue, 26 Dec 2023 12:01:53 -0800 Subject: [PATCH 06/45] Block cloning tests. The test mostly focus on testing various corner cases. The tests take a long time to run, so for the common.run runfile we randomly select a hundred tests. To run all the bclone tests, bclone.run runfile should be used. Reviewed-by: Brian Behlendorf Signed-off-by: Pawel Jakub Dawidek Closes #15631 --- tests/Makefile.am | 1 + tests/runfiles/bclone.run | 46 +++ tests/runfiles/common.run | 18 + tests/test-runner/bin/zts-report.py.in | 73 ++-- tests/zfs-tests/cmd/Makefile.am | 2 +- tests/zfs-tests/cmd/clonefile.c | 80 +++-- tests/zfs-tests/include/commands.cfg | 3 +- tests/zfs-tests/include/math.shlib | 13 +- tests/zfs-tests/tests/Makefile.am | 21 ++ tests/zfs-tests/tests/functional/bclone/TODO | 4 + .../tests/functional/bclone/bclone.cfg | 32 ++ .../functional/bclone/bclone_common.kshlib | 280 ++++++++++++++++ .../bclone/bclone_corner_cases.kshlib | 315 ++++++++++++++++++ .../bclone/bclone_crossfs_corner_cases.ksh | 45 +++ .../bclone_crossfs_corner_cases_limited.ksh | 45 +++ .../functional/bclone/bclone_crossfs_data.ksh | 46 +++ .../bclone/bclone_crossfs_embedded.ksh | 50 +++ .../functional/bclone/bclone_crossfs_hole.ksh | 45 +++ .../bclone/bclone_diffprops_all.ksh | 86 +++++ .../bclone/bclone_diffprops_checksum.ksh | 62 ++++ .../bclone/bclone_diffprops_compress.ksh | 59 ++++ .../bclone/bclone_diffprops_copies.ksh | 59 ++++ .../bclone/bclone_diffprops_recordsize.ksh | 65 ++++ .../functional/bclone/bclone_prop_sync.ksh | 66 ++++ .../bclone/bclone_samefs_corner_cases.ksh | 42 +++ .../bclone_samefs_corner_cases_limited.ksh | 42 +++ .../functional/bclone/bclone_samefs_data.ksh | 44 +++ .../bclone/bclone_samefs_embedded.ksh | 48 +++ .../functional/bclone/bclone_samefs_hole.ksh | 44 +++ .../tests/functional/bclone/cleanup.ksh | 37 ++ .../tests/functional/bclone/setup.ksh | 45 +++ .../functional/redundancy/redundancy.kshlib | 22 -- 32 files changed, 1767 insertions(+), 73 deletions(-) create mode 100644 tests/runfiles/bclone.run create mode 100644 tests/zfs-tests/tests/functional/bclone/TODO create mode 100644 tests/zfs-tests/tests/functional/bclone/bclone.cfg create mode 100644 tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib create mode 100644 tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases_limited.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_crossfs_embedded.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_crossfs_hole.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_diffprops_all.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_diffprops_checksum.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_diffprops_compress.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_diffprops_copies.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_diffprops_recordsize.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_prop_sync.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases_limited.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_samefs_embedded.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/bclone_samefs_hole.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/cleanup.ksh create mode 100755 tests/zfs-tests/tests/functional/bclone/setup.ksh diff --git a/tests/Makefile.am b/tests/Makefile.am index 2e633041ab..12e9c9f9da 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -16,6 +16,7 @@ dist_scripts_test_runner_include_DATA = \ scripts_runfilesdir = $(datadir)/$(PACKAGE)/runfiles dist_scripts_runfiles_DATA = \ + %D%/runfiles/bclone.run \ %D%/runfiles/common.run \ %D%/runfiles/freebsd.run \ %D%/runfiles/linux.run \ diff --git a/tests/runfiles/bclone.run b/tests/runfiles/bclone.run new file mode 100644 index 0000000000..3d0f545d92 --- /dev/null +++ b/tests/runfiles/bclone.run @@ -0,0 +1,46 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# This run file contains all of the common functional tests. When +# adding a new test consider also adding it to the sanity.run file +# if the new test runs to completion in only a few seconds. +# +# Approximate run time: 5 hours +# + +[DEFAULT] +pre = setup +quiet = False +pre_user = root +user = root +timeout = 28800 +post_user = root +post = cleanup +failsafe_user = root +failsafe = callbacks/zfs_failsafe +outputdir = /var/tmp/test_results +tags = ['bclone'] + +[tests/functional/bclone] +tests = ['bclone_crossfs_corner_cases', + 'bclone_crossfs_data', + 'bclone_crossfs_embedded', + 'bclone_crossfs_hole', + 'bclone_diffprops_all', + 'bclone_diffprops_checksum', + 'bclone_diffprops_compress', + 'bclone_diffprops_copies', + 'bclone_diffprops_recordsize', + 'bclone_prop_sync', + 'bclone_samefs_corner_cases', + 'bclone_samefs_data', + 'bclone_samefs_embedded', + 'bclone_samefs_hole'] +tags = ['bclone'] diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index ef787c65c0..f94a5fba9e 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -53,6 +53,24 @@ tags = ['functional', 'arc'] tests = ['atime_001_pos', 'atime_002_neg', 'root_atime_off', 'root_atime_on'] tags = ['functional', 'atime'] +[tests/functional/bclone] +tests = ['bclone_crossfs_corner_cases_limited', + 'bclone_crossfs_data', + 'bclone_crossfs_embedded', + 'bclone_crossfs_hole', + 'bclone_diffprops_all', + 'bclone_diffprops_checksum', + 'bclone_diffprops_compress', + 'bclone_diffprops_copies', + 'bclone_diffprops_recordsize', + 'bclone_prop_sync', + 'bclone_samefs_corner_cases_limited', + 'bclone_samefs_data', + 'bclone_samefs_embedded', + 'bclone_samefs_hole'] +tags = ['functional', 'bclone'] +timeout = 7200 + [tests/functional/bootfs] tests = ['bootfs_001_pos', 'bootfs_002_neg', 'bootfs_003_pos', 'bootfs_004_neg', 'bootfs_005_neg', 'bootfs_006_pos', 'bootfs_007_pos', diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index 708b7be917..7bf4d05d54 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -263,13 +263,50 @@ if sys.platform.startswith('freebsd'): 'cli_root/zpool_import/zpool_import_012_pos': ['FAIL', known_reason], 'delegate/zfs_allow_003_pos': ['FAIL', known_reason], 'inheritance/inherit_001_pos': ['FAIL', 11829], - 'resilver/resilver_restart_001': ['FAIL', known_reason], 'pool_checkpoint/checkpoint_big_rewind': ['FAIL', 12622], 'pool_checkpoint/checkpoint_indirect': ['FAIL', 12623], + 'resilver/resilver_restart_001': ['FAIL', known_reason], 'snapshot/snapshot_002_pos': ['FAIL', '14831'], }) elif sys.platform.startswith('linux'): maybe.update({ + 'bclone/bclone_crossfs_corner_cases': ['SKIP', cfr_cross_reason], + 'bclone/bclone_crossfs_corner_cases_limited': + ['SKIP', cfr_cross_reason], + 'bclone/bclone_crossfs_data': ['SKIP', cfr_cross_reason], + 'bclone/bclone_crossfs_embedded': ['SKIP', cfr_cross_reason], + 'bclone/bclone_crossfs_hole': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_all': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_checksum': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_compress': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_copies': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_recordsize': ['SKIP', cfr_cross_reason], + 'bclone/bclone_prop_sync': ['SKIP', cfr_cross_reason], + 'bclone/bclone_samefs_corner_cases': ['SKIP', cfr_reason], + 'bclone/bclone_samefs_corner_cases_limited': ['SKIP', cfr_reason], + 'bclone/bclone_samefs_data': ['SKIP', cfr_reason], + 'bclone/bclone_samefs_embedded': ['SKIP', cfr_reason], + 'bclone/bclone_samefs_hole': ['SKIP', cfr_reason], + 'block_cloning/block_cloning_copyfilerange': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_copyfilerange_cross_dataset': + ['SKIP', cfr_cross_reason], + 'block_cloning/block_cloning_copyfilerange_fallback': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_copyfilerange_fallback_same_txg': + ['SKIP', cfr_cross_reason], + 'block_cloning/block_cloning_copyfilerange_partial': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_cross_enc_dataset': + ['SKIP', cfr_cross_reason], + 'block_cloning/block_cloning_disabled_copyfilerange': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_lwb_buffer_overflow': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_replay': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_replay_encrypted': + ['SKIP', cfr_reason], 'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason], 'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason], 'fault/auto_online_002_pos': ['FAIL', 11889], @@ -278,41 +315,21 @@ elif sys.platform.startswith('linux'): 'fault/auto_spare_multiple': ['FAIL', 11889], 'fault/auto_spare_shared': ['FAIL', 11889], 'fault/decompress_fault': ['FAIL', 11889], + 'idmap_mount/idmap_mount_001': ['SKIP', idmap_reason], + 'idmap_mount/idmap_mount_002': ['SKIP', idmap_reason], + 'idmap_mount/idmap_mount_003': ['SKIP', idmap_reason], + 'idmap_mount/idmap_mount_004': ['SKIP', idmap_reason], + 'idmap_mount/idmap_mount_005': ['SKIP', idmap_reason], 'io/io_uring': ['SKIP', 'io_uring support required'], 'limits/filesystem_limit': ['SKIP', known_reason], 'limits/snapshot_limit': ['SKIP', known_reason], 'mmp/mmp_active_import': ['FAIL', known_reason], 'mmp/mmp_exported_import': ['FAIL', known_reason], 'mmp/mmp_inactive_import': ['FAIL', known_reason], - 'zvol/zvol_misc/zvol_misc_snapdev': ['FAIL', 12621], - 'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', known_reason], 'zvol/zvol_misc/zvol_misc_fua': ['SKIP', 14872], + 'zvol/zvol_misc/zvol_misc_snapdev': ['FAIL', 12621], 'zvol/zvol_misc/zvol_misc_trim': ['SKIP', 14872], - 'idmap_mount/idmap_mount_001': ['SKIP', idmap_reason], - 'idmap_mount/idmap_mount_002': ['SKIP', idmap_reason], - 'idmap_mount/idmap_mount_003': ['SKIP', idmap_reason], - 'idmap_mount/idmap_mount_004': ['SKIP', idmap_reason], - 'idmap_mount/idmap_mount_005': ['SKIP', idmap_reason], - 'block_cloning/block_cloning_disabled_copyfilerange': - ['SKIP', cfr_reason], - 'block_cloning/block_cloning_copyfilerange': - ['SKIP', cfr_reason], - 'block_cloning/block_cloning_copyfilerange_partial': - ['SKIP', cfr_reason], - 'block_cloning/block_cloning_copyfilerange_fallback': - ['SKIP', cfr_reason], - 'block_cloning/block_cloning_replay': - ['SKIP', cfr_reason], - 'block_cloning/block_cloning_replay_encrypted': - ['SKIP', cfr_reason], - 'block_cloning/block_cloning_lwb_buffer_overflow': - ['SKIP', cfr_reason], - 'block_cloning/block_cloning_copyfilerange_cross_dataset': - ['SKIP', cfr_cross_reason], - 'block_cloning/block_cloning_copyfilerange_fallback_same_txg': - ['SKIP', cfr_cross_reason], - 'block_cloning/block_cloning_cross_enc_dataset': - ['SKIP', cfr_cross_reason], + 'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', known_reason], }) # Not all Github actions runners have scsi_debug module, so we may skip diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am index 9bdb3c2097..1b915ae98c 100644 --- a/tests/zfs-tests/cmd/Makefile.am +++ b/tests/zfs-tests/cmd/Makefile.am @@ -2,6 +2,7 @@ scripts_zfs_tests_bindir = $(datadir)/$(PACKAGE)/zfs-tests/bin scripts_zfs_tests_bin_PROGRAMS = %D%/chg_usr_exec +scripts_zfs_tests_bin_PROGRAMS += %D%/clonefile scripts_zfs_tests_bin_PROGRAMS += %D%/cp_files scripts_zfs_tests_bin_PROGRAMS += %D%/ctime scripts_zfs_tests_bin_PROGRAMS += %D%/dir_rd_update @@ -119,7 +120,6 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/renameat2 scripts_zfs_tests_bin_PROGRAMS += %D%/xattrtest scripts_zfs_tests_bin_PROGRAMS += %D%/zed_fd_spill-zedlet scripts_zfs_tests_bin_PROGRAMS += %D%/idmap_util -scripts_zfs_tests_bin_PROGRAMS += %D%/clonefile %C%_idmap_util_LDADD = libspl.la diff --git a/tests/zfs-tests/cmd/clonefile.c b/tests/zfs-tests/cmd/clonefile.c index 696dc471d8..d002cd9b58 100644 --- a/tests/zfs-tests/cmd/clonefile.c +++ b/tests/zfs-tests/cmd/clonefile.c @@ -59,6 +59,10 @@ #endif #endif /* __NR_copy_file_range */ +#ifdef __FreeBSD__ +#define loff_t off_t +#endif + ssize_t copy_file_range(int, loff_t *, int, loff_t *, size_t, unsigned int) __attribute__((weak)); @@ -140,7 +144,7 @@ usage(void) " FICLONERANGE:\n" " clonefile -r \n" " copy_file_range:\n" - " clonefile -f \n" + " clonefile -f [ ]\n" " FIDEDUPERANGE:\n" " clonefile -d \n"); return (1); @@ -179,13 +183,29 @@ main(int argc, char **argv) } } - if (mode == CF_MODE_NONE || (argc-optind) < 2 || - (mode != CF_MODE_CLONE && (argc-optind) < 5)) - return (usage()); + switch (mode) { + case CF_MODE_NONE: + return (usage()); + case CF_MODE_CLONE: + if ((argc-optind) != 2) + return (usage()); + break; + case CF_MODE_CLONERANGE: + case CF_MODE_DEDUPERANGE: + if ((argc-optind) != 5) + return (usage()); + break; + case CF_MODE_COPYFILERANGE: + if ((argc-optind) != 2 && (argc-optind) != 5) + return (usage()); + break; + default: + abort(); + } loff_t soff = 0, doff = 0; - size_t len = 0; - if (mode != CF_MODE_CLONE) { + size_t len = SSIZE_MAX; + if ((argc-optind) == 5) { soff = strtoull(argv[optind+2], NULL, 10); if (soff == ULLONG_MAX) { fprintf(stderr, "invalid source offset"); @@ -196,10 +216,15 @@ main(int argc, char **argv) fprintf(stderr, "invalid dest offset"); return (1); } - len = strtoull(argv[optind+4], NULL, 10); - if (len == ULLONG_MAX) { - fprintf(stderr, "invalid length"); - return (1); + if (mode == CF_MODE_COPYFILERANGE && + strcmp(argv[optind+4], "all") == 0) { + len = SSIZE_MAX; + } else { + len = strtoull(argv[optind+4], NULL, 10); + if (len == ULLONG_MAX) { + fprintf(stderr, "invalid length"); + return (1); + } } } @@ -237,13 +262,15 @@ main(int argc, char **argv) abort(); } - off_t spos = lseek(sfd, 0, SEEK_CUR); - off_t slen = lseek(sfd, 0, SEEK_END); - off_t dpos = lseek(dfd, 0, SEEK_CUR); - off_t dlen = lseek(dfd, 0, SEEK_END); + if (!quiet) { + off_t spos = lseek(sfd, 0, SEEK_CUR); + off_t slen = lseek(sfd, 0, SEEK_END); + off_t dpos = lseek(dfd, 0, SEEK_CUR); + off_t dlen = lseek(dfd, 0, SEEK_END); - fprintf(stderr, "file offsets: src=%lu/%lu; dst=%lu/%lu\n", spos, slen, - dpos, dlen); + fprintf(stderr, "file offsets: src=%lu/%lu; dst=%lu/%lu\n", + spos, slen, dpos, dlen); + } close(dfd); close(sfd); @@ -254,7 +281,8 @@ main(int argc, char **argv) int do_clone(int sfd, int dfd) { - fprintf(stderr, "using FICLONE\n"); + if (!quiet) + fprintf(stderr, "using FICLONE\n"); int err = ioctl(dfd, CF_FICLONE, sfd); if (err < 0) { fprintf(stderr, "ioctl(FICLONE): %s\n", strerror(errno)); @@ -266,7 +294,8 @@ do_clone(int sfd, int dfd) int do_clonerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len) { - fprintf(stderr, "using FICLONERANGE\n"); + if (!quiet) + fprintf(stderr, "using FICLONERANGE\n"); cf_file_clone_range_t fcr = { .src_fd = sfd, .src_offset = soff, @@ -284,12 +313,22 @@ do_clonerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len) int do_copyfilerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len) { - fprintf(stderr, "using copy_file_range\n"); + if (!quiet) + fprintf(stderr, "using copy_file_range\n"); ssize_t copied = cf_copy_file_range(sfd, &soff, dfd, &doff, len, 0); if (copied < 0) { fprintf(stderr, "copy_file_range: %s\n", strerror(errno)); return (1); } + if (len == SSIZE_MAX) { + struct stat sb; + + if (fstat(sfd, &sb) < 0) { + fprintf(stderr, "fstat(sfd): %s\n", strerror(errno)); + return (1); + } + len = sb.st_size; + } if (copied != len) { fprintf(stderr, "copy_file_range: copied less than requested: " "requested=%lu; copied=%lu\n", len, copied); @@ -301,7 +340,8 @@ do_copyfilerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len) int do_deduperange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len) { - fprintf(stderr, "using FIDEDUPERANGE\n"); + if (!quiet) + fprintf(stderr, "using FIDEDUPERANGE\n"); char buf[sizeof (cf_file_dedupe_range_t)+ sizeof (cf_file_dedupe_range_info_t)] = {0}; diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index 648f2203df..c6f74cd81a 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -98,7 +98,8 @@ export SYSTEM_FILES_COMMON='awk uname uniq vmstat - wc' + wc + xargs' export SYSTEM_FILES_FREEBSD='chflags compress diff --git a/tests/zfs-tests/include/math.shlib b/tests/zfs-tests/include/math.shlib index da1e77e5fb..2b5e60180f 100644 --- a/tests/zfs-tests/include/math.shlib +++ b/tests/zfs-tests/include/math.shlib @@ -123,10 +123,21 @@ function verify_ne # # # $1 lower bound # $2 upper bound +# [$3 how many] function random_int_between { typeset -i min=$1 typeset -i max=$2 + typeset -i count + typeset -i i - echo $(( (RANDOM % (max - min + 1)) + min )) + if [[ -z "$3" ]]; then + count=1 + else + count=$3 + fi + + for (( i = 0; i < $count; i++ )); do + echo $(( (RANDOM % (max - min + 1)) + min )) + done } diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 7f5af6530e..33e97d22b6 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -90,6 +90,9 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \ functional/alloc_class/alloc_class.kshlib \ functional/atime/atime.cfg \ functional/atime/atime_common.kshlib \ + functional/bclone/bclone.cfg \ + functional/bclone/bclone_common.kshlib \ + functional/bclone/bclone_corner_cases.kshlib \ functional/block_cloning/block_cloning.kshlib \ functional/cache/cache.cfg \ functional/cache/cache.kshlib \ @@ -438,6 +441,24 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/atime/root_atime_on.ksh \ functional/atime/root_relatime_on.ksh \ functional/atime/setup.ksh \ + functional/bclone/bclone_crossfs_corner_cases.ksh \ + functional/bclone/bclone_crossfs_corner_cases_limited.ksh \ + functional/bclone/bclone_crossfs_data.ksh \ + functional/bclone/bclone_crossfs_embedded.ksh \ + functional/bclone/bclone_crossfs_hole.ksh \ + functional/bclone/bclone_diffprops_all.ksh \ + functional/bclone/bclone_diffprops_checksum.ksh \ + functional/bclone/bclone_diffprops_compress.ksh \ + functional/bclone/bclone_diffprops_copies.ksh \ + functional/bclone/bclone_diffprops_recordsize.ksh \ + functional/bclone/bclone_prop_sync.ksh \ + functional/bclone/bclone_samefs_corner_cases.ksh \ + functional/bclone/bclone_samefs_corner_cases_limited.ksh \ + functional/bclone/bclone_samefs_data.ksh \ + functional/bclone/bclone_samefs_embedded.ksh \ + functional/bclone/bclone_samefs_hole.ksh \ + functional/bclone/cleanup.ksh \ + functional/bclone/setup.ksh \ functional/block_cloning/cleanup.ksh \ functional/block_cloning/setup.ksh \ functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh \ diff --git a/tests/zfs-tests/tests/functional/bclone/TODO b/tests/zfs-tests/tests/functional/bclone/TODO new file mode 100644 index 0000000000..7cd4ee898f --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/TODO @@ -0,0 +1,4 @@ +- If dedup enabled, block_cloning uses dedup. +- check when block cloning doesn't suppose to work +- check block cloning between two different pools +- block cloning from a snapshot diff --git a/tests/zfs-tests/tests/functional/bclone/bclone.cfg b/tests/zfs-tests/tests/functional/bclone/bclone.cfg new file mode 100644 index 0000000000..f72d17c1be --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone.cfg @@ -0,0 +1,32 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +# TODO: We should calculate that based on ashift. +export MINBLOCKSIZE=512 + +export TESTSRCFS="$TESTPOOL/$TESTFS/src" +export TESTDSTFS="$TESTPOOL/$TESTFS/dst" +export TESTSRCDIR="$TESTDIR/src" +export TESTDSTDIR="$TESTDIR/dst" diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib b/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib new file mode 100644 index 0000000000..beba01c0ed --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib @@ -0,0 +1,280 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/tests/functional/bclone/bclone.cfg + +export RECORDSIZE=$(zfs get -Hp -o value recordsize $TESTPOOL/$TESTFS) + +MINBLKSIZE1=512 +MINBLKSIZE2=1024 + +function verify_block_cloning +{ + if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" + fi +} + +function verify_crossfs_block_cloning +{ + if is_linux && [[ $(linux_version) -lt $(linux_version "5.3") ]]; then + log_unsupported "copy_file_range can't copy cross-filesystem before Linux 5.3" + fi +} + +# Unused. +function size_to_dsize +{ + typeset -r size=$1 + typeset -r dir=$2 + + typeset -r dataset=$(df $dir | tail -1 | awk '{print $1}') + typeset -r recordsize=$(get_prop recordsize $dataset) + typeset -r copies=$(get_prop copies $dataset) + typeset dsize + + if [[ $size -le $recordsize ]]; then + dsize=$(( ((size - 1) / MINBLOCKSIZE + 1) * MINBLOCKSIZE )) + else + dsize=$(( ((size - 1) / recordsize + 1) * recordsize )) + fi + dsize=$((dsize*copies)) + + echo $dsize +} + +function test_file_integrity +{ + typeset -r original_checksum=$1 + typeset -r clone=$2 + typeset -r filesize=$3 + + typeset -r clone_checksum=$(sha256digest $clone) + + if [[ $original_checksum != $clone_checksum ]]; then + log_fail "Clone $clone is corrupted with file size $filesize" + fi +} + +function verify_pool_prop_eq +{ + typeset -r prop=$1 + typeset -r expected=$2 + + typeset -r value=$(get_pool_prop $prop $TESTPOOL) + if [[ $value != $expected ]]; then + log_fail "Pool property $prop is incorrect: expected $expected, got $value" + fi +} + +function verify_pool_props +{ + typeset -r dsize=$1 + typeset -r ratio=$2 + + if [[ $dsize -eq 0 ]]; then + verify_pool_prop_eq bcloneused 0 + verify_pool_prop_eq bclonesaved 0 + verify_pool_prop_eq bcloneratio 1.00 + else + if [[ $ratio -eq 1 ]]; then + verify_pool_prop_eq bcloneused 0 + else + verify_pool_prop_eq bcloneused $dsize + fi + verify_pool_prop_eq bclonesaved $((dsize*(ratio-1))) + verify_pool_prop_eq bcloneratio "${ratio}.00" + fi +} + +# Function to test file copying and integrity check. +function bclone_test +{ + typeset -r datatype=$1 + typeset filesize=$2 + typeset -r embedded=$3 + typeset -r srcdir=$4 + typeset -r dstdir=$5 + typeset dsize + + typeset -r original="${srcdir}/original" + typeset -r clone="${dstdir}/clone" + + log_note "Testing file copy with datatype $datatype, file size $filesize, embedded $embedded" + + # Create a test file with known content. + case $datatype in + random|text) + sync_pool $TESTPOOL + if [[ $datatype = "random" ]]; then + dd if=/dev/urandom of=$original bs=$filesize count=1 2>/dev/null + else + filesize=$(((filesize/4)*4)) + dd if=/dev/urandom bs=$(((filesize/4)*3)) count=1 | \ + openssl base64 -A > $original + fi + sync_pool $TESTPOOL + clonefile -f $original "${clone}-tmp" + sync_pool $TESTPOOL + # It is hard to predict block sizes that will be used, + # so just do one clone and take it from bcloneused. + filesize=$(zpool get -Hp -o value bcloneused $TESTPOOL) + if [[ $embedded = "false" ]]; then + log_must test $filesize -gt 0 + fi + rm -f "${clone}-tmp" + sync_pool $TESTPOOL + dsize=$filesize + ;; + hole) + log_must truncate_test -s $filesize -f $original + dsize=0 + ;; + *) + log_fail "Unknown datatype $datatype" + ;; + esac + if [[ $embedded = "true" ]]; then + dsize=0 + fi + + typeset -r original_checksum=$(sha256digest $original) + + sync_pool $TESTPOOL + + # Create a first clone of the entire file. + clonefile -f $original "${clone}0" + # Try to clone the clone in the same transaction group. + clonefile -f "${clone}0" "${clone}2" + + # Clone the original again... + clonefile -f $original "${clone}1" + # ...and overwrite it in the same transaction group. + clonefile -f $original "${clone}1" + + # Clone the clone... + clonefile -f "${clone}1" "${clone}3" + sync_pool $TESTPOOL + # ...and overwrite in the new transaction group. + clonefile -f "${clone}1" "${clone}3" + + sync_pool $TESTPOOL + + # Test removal of the pending clones (before they are committed to disk). + clonefile -f $original "${clone}4" + clonefile -f "${clone}4" "${clone}5" + rm -f "${clone}4" "${clone}5" + + # Clone into one file, but remove another file, but with the same data in + # the same transaction group. + clonefile -f $original "${clone}5" + sync_pool $TESTPOOL + clonefile -f $original "${clone}4" + rm -f "${clone}5" + test_file_integrity $original_checksum "${clone}4" $filesize + sync_pool $TESTPOOL + test_file_integrity $original_checksum "${clone}4" $filesize + + clonefile -f "${clone}4" "${clone}5" + # Verify integrity of the cloned file before it is committed to disk. + test_file_integrity $original_checksum "${clone}5" $filesize + + sync_pool $TESTPOOL + + # Verify integrity in the new transaction group. + test_file_integrity $original_checksum "${clone}0" $filesize + test_file_integrity $original_checksum "${clone}1" $filesize + test_file_integrity $original_checksum "${clone}2" $filesize + test_file_integrity $original_checksum "${clone}3" $filesize + test_file_integrity $original_checksum "${clone}4" $filesize + test_file_integrity $original_checksum "${clone}5" $filesize + + verify_pool_props $dsize 7 + + # Clear cache and test after fresh import. + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL + + # Cloned uncached file. + clonefile -f $original "${clone}6" + # Cloned uncached clone. + clonefile -f "${clone}6" "${clone}7" + + # Cache the file. + cat $original >/dev/null + clonefile -f $original "${clone}8" + clonefile -f "${clone}8" "${clone}9" + + test_file_integrity $original_checksum "${clone}6" $filesize + test_file_integrity $original_checksum "${clone}7" $filesize + test_file_integrity $original_checksum "${clone}8" $filesize + test_file_integrity $original_checksum "${clone}9" $filesize + + sync_pool $TESTPOOL + + verify_pool_props $dsize 11 + + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL + + test_file_integrity $original_checksum "${clone}0" $filesize + test_file_integrity $original_checksum "${clone}1" $filesize + test_file_integrity $original_checksum "${clone}2" $filesize + test_file_integrity $original_checksum "${clone}3" $filesize + test_file_integrity $original_checksum "${clone}4" $filesize + test_file_integrity $original_checksum "${clone}5" $filesize + test_file_integrity $original_checksum "${clone}6" $filesize + test_file_integrity $original_checksum "${clone}7" $filesize + test_file_integrity $original_checksum "${clone}8" $filesize + test_file_integrity $original_checksum "${clone}9" $filesize + + rm -f $original + rm -f "${clone}1" "${clone}3" "${clone}5" "${clone}7" + + sync_pool $TESTPOOL + + test_file_integrity $original_checksum "${clone}0" $filesize + test_file_integrity $original_checksum "${clone}2" $filesize + test_file_integrity $original_checksum "${clone}4" $filesize + test_file_integrity $original_checksum "${clone}6" $filesize + test_file_integrity $original_checksum "${clone}8" $filesize + test_file_integrity $original_checksum "${clone}9" $filesize + + verify_pool_props $dsize 6 + + rm -f "${clone}0" "${clone}2" "${clone}4" "${clone}8" "${clone}9" + + sync_pool $TESTPOOL + + test_file_integrity $original_checksum "${clone}6" $filesize + + verify_pool_props $dsize 1 + + rm -f "${clone}6" + + sync_pool $TESTPOOL + + verify_pool_props $dsize 1 +} diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib b/tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib new file mode 100644 index 0000000000..ddfbfc999c --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib @@ -0,0 +1,315 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +function first_half_checksum +{ + typeset -r file=$1 + + dd if=$file bs=$HALFRECORDSIZE count=1 2>/dev/null | sha256digest +} + +function second_half_checksum +{ + typeset -r file=$1 + + dd if=$file bs=$HALFRECORDSIZE count=1 skip=1 2>/dev/null | sha256digest +} + +function bclone_corner_cases_init +{ + typeset -r srcdir=$1 + typeset -r dstdir=$2 + + export RECORDSIZE=4096 + export HALFRECORDSIZE=$((RECORDSIZE / 2)) + + export CLONE="$dstdir/clone0" + export ORIG0="$srcdir/orig0" + export ORIG1="$srcdir/orig1" + export ORIG2="$srcdir/orig2" + + # Create source files. + log_must dd if=/dev/urandom of="$ORIG0" bs=$RECORDSIZE count=1 + log_must dd if=/dev/urandom of="$ORIG1" bs=$RECORDSIZE count=1 + log_must dd if=/dev/urandom of="$ORIG2" bs=$RECORDSIZE count=1 + + export FIRST_HALF_ORIG0_CHECKSUM=$(first_half_checksum $ORIG0) + export FIRST_HALF_ORIG1_CHECKSUM=$(first_half_checksum $ORIG1) + export FIRST_HALF_ORIG2_CHECKSUM=$(first_half_checksum $ORIG2) + export SECOND_HALF_ORIG0_CHECKSUM=$(second_half_checksum $ORIG0) + export SECOND_HALF_ORIG1_CHECKSUM=$(second_half_checksum $ORIG1) + export SECOND_HALF_ORIG2_CHECKSUM=$(second_half_checksum $ORIG2) + export ZEROS_CHECKSUM=$(dd if=/dev/zero bs=$HALFRECORDSIZE count=1 | sha256digest) + export FIRST_HALF_CHECKSUM="" + export SECOND_HALF_CHECKSUM="" +} + +function cache_clone +{ + typeset -r cached=$1 + + case "$cached" in + "cached") + dd if=$CLONE of=/dev/null bs=$RECORDSIZE 2>/dev/null + ;; + "uncached") + ;; + *) + log_fail "invalid cached: $cached" + ;; + esac +} + +function create_existing +{ + typeset -r existing=$1 + + case "$existing" in + "no") + ;; + "small empty") + log_must truncate_test -s $HALFRECORDSIZE -f $CLONE + ;; + "full empty") + log_must truncate_test -s $RECORDSIZE -f $CLONE + ;; + "small data") + log_must dd if=/dev/urandom of=$CLONE bs=$HALFRECORDSIZE count=1 \ + 2>/dev/null + ;; + "full data") + log_must dd if=/dev/urandom of=$CLONE bs=$RECORDSIZE count=1 2>/dev/null + ;; + *) + log_fail "invalid existing: $existing" + ;; + esac +} + +function create_clone +{ + typeset -r clone=$1 + typeset -r file=$2 + + case "$clone" in + "no") + ;; + "yes") + clonefile -f $file $CLONE + case "$file" in + $ORIG0) + FIRST_HALF_CHECKSUM=$FIRST_HALF_ORIG0_CHECKSUM + SECOND_HALF_CHECKSUM=$SECOND_HALF_ORIG0_CHECKSUM + ;; + $ORIG2) + FIRST_HALF_CHECKSUM=$FIRST_HALF_ORIG2_CHECKSUM + SECOND_HALF_CHECKSUM=$SECOND_HALF_ORIG2_CHECKSUM + ;; + *) + log_fail "invalid file: $file" + ;; + esac + ;; + *) + log_fail "invalid clone: $clone" + ;; + esac +} + +function overwrite_clone +{ + typeset -r overwrite=$1 + + case "$overwrite" in + "no") + ;; + "free") + log_must truncate_test -s 0 -f $CLONE + log_must truncate_test -s $RECORDSIZE -f $CLONE + FIRST_HALF_CHECKSUM=$ZEROS_CHECKSUM + SECOND_HALF_CHECKSUM=$ZEROS_CHECKSUM + ;; + "full") + log_must dd if=$ORIG1 of=$CLONE bs=$RECORDSIZE count=1 2>/dev/null + FIRST_HALF_CHECKSUM=$FIRST_HALF_ORIG1_CHECKSUM + SECOND_HALF_CHECKSUM=$SECOND_HALF_ORIG1_CHECKSUM + ;; + "first half") + log_must dd if=$ORIG1 of=$CLONE bs=$HALFRECORDSIZE skip=0 seek=0 \ + count=1 conv=notrunc 2>/dev/null + FIRST_HALF_CHECKSUM=$FIRST_HALF_ORIG1_CHECKSUM + ;; + "second half") + log_must dd if=$ORIG1 of=$CLONE bs=$HALFRECORDSIZE skip=1 seek=1 \ + count=1 conv=notrunc 2>/dev/null + SECOND_HALF_CHECKSUM=$SECOND_HALF_ORIG1_CHECKSUM + ;; + *) + log_fail "invalid overwrite: $overwrite" + ;; + esac +} + +function checksum_compare +{ + typeset -r compare=$1 + typeset first_half_calculated_checksum second_half_calculated_checksum + + case "$compare" in + "no") + ;; + "yes") + first_half_calculated_checksum=$(first_half_checksum $CLONE) + second_half_calculated_checksum=$(second_half_checksum $CLONE) + + if [[ $first_half_calculated_checksum != $FIRST_HALF_CHECKSUM ]] || \ + [[ $second_half_calculated_checksum != $SECOND_HALF_CHECKSUM ]]; then + return 1 + fi + ;; + *) + log_fail "invalid compare: $compare" + ;; + esac +} + +function bclone_corner_cases_test +{ + typeset cached existing + typeset first_clone first_overwrite + typeset read_after read_before + typeset second_clone second_overwrite + typeset -r srcdir=$1 + typeset -r dstdir=$2 + typeset limit=$3 + typeset -i count=0 + + if [[ $srcdir != "count" ]]; then + if [[ -n "$limit" ]]; then + typeset -r total_count=$(bclone_corner_cases_test count) + limit=$(random_int_between 1 $total_count $((limit*2)) | sort -nu | head -n $limit | xargs) + fi + bclone_corner_cases_init $srcdir $dstdir + fi + + # + # (create) / (cache) / (clone) / (overwrite) / (read) / (clone) / (overwrite) / (read) / read next txg + # + for existing in "no" "small empty" "full empty" "small data" "full data"; do + for cached in "uncached" "cached"; do + for first_clone in "no" "yes"; do + for first_overwrite in "no" "free" "full" "first half" "second half"; do + for read_before in "no" "yes"; do + for second_clone in "no" "yes"; do + for second_overwrite in "no" "free" "full" "first half" "second half"; do + for read_after in "no" "yes"; do + if [[ $first_clone = "no" ]] && \ + [[ $second_clone = "no" ]]; then + continue + fi + if [[ $first_clone = "no" ]] && \ + [[ $read_before = "yes" ]]; then + continue + fi + if [[ $second_clone = "no" ]] && \ + [[ $read_before = "yes" ]] && \ + [[ $read_after = "yes" ]]; then + continue + fi + + count=$((count+1)) + + if [[ $srcdir = "count" ]]; then + # Just counting. + continue + fi + + if [[ -n "$limit" ]]; then + if ! echo " $limit " | grep -q " $count "; then + continue + fi + fi + + FIRST_HALF_CHECKSUM="" + SECOND_HALF_CHECKSUM="" + + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL + + create_existing "$existing" + + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL + + cache_clone "$cached" + + create_clone "$first_clone" "$ORIG0" + + overwrite_clone "$first_overwrite" + + if checksum_compare $read_before; then + log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before" + else + log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before" + fi + + create_clone "$second_clone" "$ORIG2" + + overwrite_clone "$second_overwrite" + + if checksum_compare $read_after; then + log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after" + else + log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after" + fi + + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL + + if checksum_compare "yes"; then + log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after / read_next_txg" + else + log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after / read_next_txg" + fi + + rm -f "$CLONE" + done + done + done + done + done + done + done + done + + if [[ $srcdir = "count" ]]; then + echo $count + fi +} diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh new file mode 100755 index 0000000000..35188cddb0 --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh @@ -0,0 +1,45 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_corner_cases.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify various corner cases in block cloning across datasets" + +# Disable compression to make sure we won't use embedded blocks. +log_must zfs set compress=off $TESTSRCFS +log_must zfs set recordsize=$RECORDSIZE $TESTSRCFS +log_must zfs set compress=off $TESTDSTFS +log_must zfs set recordsize=$RECORDSIZE $TESTDSTFS + +bclone_corner_cases_test $TESTSRCDIR $TESTDSTDIR + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases_limited.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases_limited.ksh new file mode 100755 index 0000000000..1fc1bbd07f --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases_limited.ksh @@ -0,0 +1,45 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_corner_cases.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify various corner cases in block cloning across datasets" + +# Disable compression to make sure we won't use embedded blocks. +log_must zfs set compress=off $TESTSRCFS +log_must zfs set recordsize=$RECORDSIZE $TESTSRCFS +log_must zfs set compress=off $TESTDSTFS +log_must zfs set recordsize=$RECORDSIZE $TESTDSTFS + +bclone_corner_cases_test $TESTSRCDIR $TESTDSTDIR 100 + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh new file mode 100755 index 0000000000..e2fe25d451 --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh @@ -0,0 +1,46 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning properly clones regular files across datasets" + +# Disable compression to make sure we won't use embedded blocks. +log_must zfs set compress=off $TESTSRCFS +log_must zfs set compress=off $TESTDSTFS + +for filesize in 1 107 113 511 512 513 4095 4096 4097 131071 131072 131073 \ + 1048575 1048576 1048577 4194303 4194304 4194305; do + bclone_test random $filesize false $TESTSRCDIR $TESTDSTDIR +done + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_embedded.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_embedded.ksh new file mode 100755 index 0000000000..6a6fe1d309 --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_embedded.ksh @@ -0,0 +1,50 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning properly clones small files (with embedded blocks) across datasets" + +# Enable ZLE compression to make sure what is the maximum amount of data we +# can store in BP. +log_must zfs set compress=zle $TESTSRCFS +log_must zfs set compress=zle $TESTDSTFS + +# Test BP_IS_EMBEDDED(). +# Maximum embedded payload size is 112 bytes, but the buffer is extended to +# 512 bytes first and then compressed. 107 random bytes followed by 405 zeros +# gives exactly 112 bytes after compression with ZLE. +for filesize in 1 2 4 8 16 32 64 96 107; do + bclone_test random $filesize true $TESTSRCDIR $TESTDSTDIR +done + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_hole.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_hole.ksh new file mode 100755 index 0000000000..d4c33d6da3 --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_hole.ksh @@ -0,0 +1,45 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning properly clones sparse files (files with holes) across datasets" + +# Compression doesn't matter here. + +# Test BP_IS_HOLE(). +for filesize in 1 511 512 513 4095 4096 4097 131071 131072 131073 \ + 1048575 1048576 1048577 4194303 4194304 4194305; do + bclone_test hole $filesize false $TESTSRCDIR $TESTDSTDIR +done + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_all.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_all.ksh new file mode 100755 index 0000000000..a5e7282fe6 --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_all.ksh @@ -0,0 +1,86 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning across datasets with different properties" + +log_must zfs set checksum=off $TESTSRCFS +log_must zfs set compress=off $TESTSRCFS +log_must zfs set copies=1 $TESTSRCFS +log_must zfs set recordsize=131072 $TESTSRCFS +log_must zfs set checksum=fletcher2 $TESTDSTFS +log_must zfs set compress=lz4 $TESTDSTFS +log_must zfs set copies=3 $TESTDSTFS +log_must zfs set recordsize=8192 $TESTDSTFS + +FILESIZE=$(random_int_between 2 32767) +FILESIZE=$((FILESIZE * 64)) +bclone_test text $FILESIZE false $TESTSRCDIR $TESTDSTDIR + +log_must zfs set checksum=sha256 $TESTSRCFS +log_must zfs set compress=zstd $TESTSRCFS +log_must zfs set copies=2 $TESTSRCFS +log_must zfs set recordsize=262144 $TESTSRCFS +log_must zfs set checksum=off $TESTDSTFS +log_must zfs set compress=off $TESTDSTFS +log_must zfs set copies=1 $TESTDSTFS +log_must zfs set recordsize=131072 $TESTDSTFS + +FILESIZE=$(random_int_between 2 32767) +FILESIZE=$((FILESIZE * 64)) +bclone_test text $FILESIZE false $TESTSRCDIR $TESTDSTDIR + +log_must zfs set checksum=sha512 $TESTSRCFS +log_must zfs set compress=gzip $TESTSRCFS +log_must zfs set copies=2 $TESTSRCFS +log_must zfs set recordsize=512 $TESTSRCFS +log_must zfs set checksum=fletcher4 $TESTDSTFS +log_must zfs set compress=lzjb $TESTDSTFS +log_must zfs set copies=3 $TESTDSTFS +log_must zfs set recordsize=16384 $TESTDSTFS + +FILESIZE=$(random_int_between 2 32767) +FILESIZE=$((FILESIZE * 64)) +bclone_test text $FILESIZE false $TESTSRCDIR $TESTDSTDIR + +log_must zfs inherit checksum $TESTSRCFS +log_must zfs inherit compress $TESTSRCFS +log_must zfs inherit copies $TESTSRCFS +log_must zfs inherit recordsize $TESTSRCFS +log_must zfs inherit checksum $TESTDSTFS +log_must zfs inherit compress $TESTDSTFS +log_must zfs inherit copies $TESTDSTFS +log_must zfs inherit recordsize $TESTDSTFS + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_checksum.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_checksum.ksh new file mode 100755 index 0000000000..7e064a0dfd --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_checksum.ksh @@ -0,0 +1,62 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning across datasets with different checksum properties" + +log_must zfs set compress=off $TESTSRCFS +log_must zfs set compress=off $TESTDSTFS + +for srcprop in "${checksum_prop_vals[@]}"; do + for dstprop in "${checksum_prop_vals[@]}"; do + if [[ $srcprop == $dstprop ]]; then + continue + fi + log_must zfs set checksum=$srcprop $TESTSRCFS + log_must zfs set checksum=$dstprop $TESTDSTFS + # 15*8=120, which is greater than 113, so we are sure the data won't + # be embedded into BP. + # 32767*8=262136, which is larger than a single default recordsize of + # 131072. + FILESIZE=$(random_int_between 15 32767) + FILESIZE=$((FILESIZE * 8)) + bclone_test random $FILESIZE false $TESTSRCDIR $TESTDSTDIR + done +done + +log_must zfs inherit checksum $TESTSRCFS +log_must zfs inherit checksum $TESTDSTFS + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_compress.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_compress.ksh new file mode 100755 index 0000000000..e1d6e59492 --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_compress.ksh @@ -0,0 +1,59 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning across datasets with different compression properties" + +for srcprop in "${compress_prop_vals[@]}"; do + for dstprop in "${compress_prop_vals[@]}"; do + if [[ $srcprop == $dstprop ]]; then + continue + fi + log_must zfs set compress=$srcprop $TESTSRCFS + log_must zfs set compress=$dstprop $TESTDSTFS + # 15*8=120, which is greater than 113, so we are sure the data won't + # be embedded into BP. + # 32767*8=262136, which is larger than a single default recordsize of + # 131072. + FILESIZE=$(random_int_between 15 32767) + FILESIZE=$((FILESIZE * 8)) + bclone_test text $FILESIZE false $TESTSRCDIR $TESTDSTDIR + done +done + +log_must zfs inherit compress $TESTSRCFS +log_must zfs inherit compress $TESTDSTFS + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_copies.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_copies.ksh new file mode 100755 index 0000000000..ac823e1ec3 --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_copies.ksh @@ -0,0 +1,59 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning across datasets with different copies properties" + +log_must zfs set compress=off $TESTSRCFS +log_must zfs set compress=off $TESTDSTFS + +for srcprop in "${copies_prop_vals[@]}"; do + for dstprop in "${copies_prop_vals[@]}"; do + log_must zfs set copies=$srcprop $TESTSRCFS + log_must zfs set copies=$dstprop $TESTDSTFS + # 15*8=120, which is greater than 113, so we are sure the data won't + # be embedded into BP. + # 32767*8=262136, which is larger than a single default recordsize of + # 131072. + FILESIZE=$(random_int_between 15 32767) + FILESIZE=$((FILESIZE * 8)) + bclone_test random $FILESIZE false $TESTSRCDIR $TESTDSTDIR + done +done + +log_must zfs inherit copies $TESTSRCFS +log_must zfs inherit copies $TESTDSTFS + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_recordsize.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_recordsize.ksh new file mode 100755 index 0000000000..d833e61231 --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_recordsize.ksh @@ -0,0 +1,65 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning across datasets with different recordsize properties" + +log_must zfs set compress=off $TESTSRCFS +log_must zfs set compress=off $TESTDSTFS + +# recsize_prop_vals[] array contains too many entries and the tests take too +# long. Let's use only a subset of them. +typeset -a bclone_recsize_prop_vals=('512' '4096' '131072' '1048576') + +for srcprop in "${bclone_recsize_prop_vals[@]}"; do + for dstprop in "${bclone_recsize_prop_vals[@]}"; do + if [[ $srcprop == $dstprop ]]; then + continue + fi + log_must zfs set recordsize=$srcprop $TESTSRCFS + log_must zfs set recordsize=$dstprop $TESTDSTFS + # 2*64=128, which is greater than 113, so we are sure the data won't + # be embedded into BP. + # 32767*64=2097088, which is larger than the largest recordsize (1MB). + FILESIZE=$(random_int_between 2 32767) + FILESIZE=$((FILESIZE * 64)) + bclone_test random $FILESIZE false $TESTSRCDIR $TESTDSTDIR + done +done + +log_must zfs inherit recordsize $TESTSRCFS +log_must zfs inherit recordsize $TESTDSTFS + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_prop_sync.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_prop_sync.ksh new file mode 100755 index 0000000000..f8aa1c875c --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_prop_sync.ksh @@ -0,0 +1,66 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning with all sync property settings" + +log_must zfs set compress=zle $TESTSRCFS +log_must zfs set compress=zle $TESTDSTFS + +for prop in "${sync_prop_vals[@]}"; do + log_must zfs set sync=$prop $TESTSRCFS + # 32767*8=262136, which is larger than a single default recordsize of + # 131072. + FILESIZE=$(random_int_between 1 32767) + FILESIZE=$((FILESIZE * 8)) + bclone_test random $FILESIZE false $TESTSRCDIR $TESTSRCDIR +done + +for srcprop in "${sync_prop_vals[@]}"; do + log_must zfs set sync=$srcprop $TESTSRCFS + for dstprop in "${sync_prop_vals[@]}"; do + log_must zfs set sync=$dstprop $TESTDSTFS + # 32767*8=262136, which is larger than a single default recordsize of + # 131072. + FILESIZE=$(random_int_between 1 32767) + FILESIZE=$((FILESIZE * 8)) + bclone_test random $FILESIZE false $TESTSRCDIR $TESTDSTDIR + done +done + +log_must zfs inherit sync $TESTSRCFS +log_must zfs inherit sync $TESTDSTFS + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh new file mode 100755 index 0000000000..4aa2914da2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh @@ -0,0 +1,42 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_corner_cases.kshlib + +verify_runnable "both" + +verify_block_cloning + +log_assert "Verify various corner cases in block cloning within the same dataset" + +# Disable compression to make sure we won't use embedded blocks. +log_must zfs set compress=off $TESTSRCFS +log_must zfs set recordsize=$RECORDSIZE $TESTSRCFS + +bclone_corner_cases_test $TESTSRCDIR $TESTSRCDIR + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases_limited.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases_limited.ksh new file mode 100755 index 0000000000..b4737700eb --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases_limited.ksh @@ -0,0 +1,42 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_corner_cases.kshlib + +verify_runnable "both" + +verify_block_cloning + +log_assert "Verify various corner cases in block cloning within the same dataset" + +# Disable compression to make sure we won't use embedded blocks. +log_must zfs set compress=off $TESTSRCFS +log_must zfs set recordsize=$RECORDSIZE $TESTSRCFS + +bclone_corner_cases_test $TESTSRCDIR $TESTSRCDIR 100 + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh new file mode 100755 index 0000000000..e964f7bbf6 --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh @@ -0,0 +1,44 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning + +log_assert "Verify block cloning properly clones regular files within the same dataset" + +# Disable compression to make sure we won't use embedded blocks. +log_must zfs set compress=off $TESTSRCFS + +for filesize in 1 107 113 511 512 513 4095 4096 4097 131071 131072 131073 \ + 1048575 1048576 1048577 4194303 4194304 4194305; do + bclone_test random $filesize false $TESTSRCDIR $TESTSRCDIR +done + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_samefs_embedded.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_samefs_embedded.ksh new file mode 100755 index 0000000000..df393a8780 --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_samefs_embedded.ksh @@ -0,0 +1,48 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning + +log_assert "Verify block cloning properly clones small files (with embedded blocks) within the same dataset" + +# Enable ZLE compression to make sure what is the maximum amount of data we +# can store in BP. +log_must zfs set compress=zle $TESTSRCFS + +# Test BP_IS_EMBEDDED(). +# Maximum embedded payload size is 112 bytes, but the buffer is extended to +# 512 bytes first and then compressed. 107 random bytes followed by 405 zeros +# gives exactly 112 bytes after compression with ZLE. +for filesize in 1 2 4 8 16 32 64 96 107; do + bclone_test random $filesize true $TESTSRCDIR $TESTSRCDIR +done + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_samefs_hole.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_samefs_hole.ksh new file mode 100755 index 0000000000..3c6e345e6e --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/bclone_samefs_hole.ksh @@ -0,0 +1,44 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning + +log_assert "Verify block cloning properly clones sparse files (files with holes) within the same dataset" + +# Compression doesn't matter here. + +# Test BP_IS_HOLE(). +for filesize in 1 511 512 513 4095 4096 4097 131071 131072 131073 \ + 1048575 1048576 1048577 4194303 4194304 4194305; do + bclone_test hole $filesize false $TESTSRCDIR $TESTSRCDIR +done + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/cleanup.ksh b/tests/zfs-tests/tests/functional/bclone/cleanup.ksh new file mode 100755 index 0000000000..df6d9c08fe --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/cleanup.ksh @@ -0,0 +1,37 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone.cfg + +log_must zfs destroy $TESTSRCFS +log_must zfs destroy $TESTDSTFS +default_cleanup diff --git a/tests/zfs-tests/tests/functional/bclone/setup.ksh b/tests/zfs-tests/tests/functional/bclone/setup.ksh new file mode 100755 index 0000000000..c68719ee72 --- /dev/null +++ b/tests/zfs-tests/tests/functional/bclone/setup.ksh @@ -0,0 +1,45 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone.cfg + +if ! command -v clonefile > /dev/null ; then + log_unsupported "clonefile program required to test block cloning" +fi + +DISK=${DISKS%% *} + +default_setup_noexit $DISK "true" +log_must zpool set feature@block_cloning=enabled $TESTPOOL +log_must zfs create $TESTSRCFS +log_must zfs create $TESTDSTFS +log_pass diff --git a/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib b/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib index 30818050a0..297c6a073b 100644 --- a/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib +++ b/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib @@ -44,28 +44,6 @@ function cleanup done } -# -# Get random number between min and max number. -# -# $1 Minimal value -# $2 Maximal value -# -function random -{ - typeset -i min=$1 - typeset -i max=$2 - typeset -i value - - while true; do - ((value = RANDOM % (max + 1))) - if ((value >= min)); then - break - fi - done - - echo $value -} - # # Get the number of checksum errors for the pool. # From 83c0ccc7cf5494090621ab7038386b8a4750e560 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 12 Jan 2024 11:57:13 -0800 Subject: [PATCH 07/45] Enable block_cloning tests on FreeBSD Reviewed-by: Brian Behlendorf Signed-off-by: Pawel Jakub Dawidek Closes #15749 --- tests/runfiles/common.run | 11 +++++++++++ tests/runfiles/linux.run | 14 +++----------- .../block_cloning/block_cloning_copyfilerange.ksh | 2 +- .../block_cloning_copyfilerange_cross_dataset.ksh | 2 +- .../block_cloning_copyfilerange_fallback.ksh | 2 +- ...ock_cloning_copyfilerange_fallback_same_txg.ksh | 2 +- .../block_cloning_copyfilerange_partial.ksh | 2 +- .../block_cloning_cross_enc_dataset.ksh | 2 +- .../block_cloning_disabled_copyfilerange.ksh | 2 +- .../block_cloning_lwb_buffer_overflow.ksh | 7 ++++--- .../block_cloning/block_cloning_replay.ksh | 9 +++++---- .../block_cloning_replay_encrypted.ksh | 9 +++++---- 12 files changed, 35 insertions(+), 29 deletions(-) diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index f94a5fba9e..13d8312833 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -71,6 +71,17 @@ tests = ['bclone_crossfs_corner_cases_limited', tags = ['functional', 'bclone'] timeout = 7200 +[tests/functional/block_cloning] +tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial', + 'block_cloning_copyfilerange_fallback', + 'block_cloning_disabled_copyfilerange', + 'block_cloning_copyfilerange_cross_dataset', + 'block_cloning_cross_enc_dataset', + 'block_cloning_copyfilerange_fallback_same_txg', + 'block_cloning_replay', 'block_cloning_replay_encrypted', + 'block_cloning_lwb_buffer_overflow'] +tags = ['functional', 'block_cloning'] + [tests/functional/bootfs] tests = ['bootfs_001_pos', 'bootfs_002_neg', 'bootfs_003_pos', 'bootfs_004_neg', 'bootfs_005_neg', 'bootfs_006_pos', 'bootfs_007_pos', diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index c7c17f2717..6a4cd3fe69 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -35,17 +35,9 @@ tests = ['atime_003_pos', 'root_relatime_on'] tags = ['functional', 'atime'] [tests/functional/block_cloning:Linux] -tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial', - 'block_cloning_copyfilerange_fallback', - 'block_cloning_ficlone', 'block_cloning_ficlonerange', - 'block_cloning_ficlonerange_partial', - 'block_cloning_disabled_copyfilerange', 'block_cloning_disabled_ficlone', - 'block_cloning_disabled_ficlonerange', - 'block_cloning_copyfilerange_cross_dataset', - 'block_cloning_cross_enc_dataset', - 'block_cloning_copyfilerange_fallback_same_txg', - 'block_cloning_replay', 'block_cloning_replay_encrypted', - 'block_cloning_lwb_buffer_overflow'] +tests = ['block_cloning_ficlone', 'block_cloning_ficlonerange', + 'block_cloning_ficlonerange_partial', 'block_cloning_disabled_ficlone', + 'block_cloning_disabled_ficlonerange'] tags = ['functional', 'block_cloning'] [tests/functional/chattr:Linux] diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange.ksh index 43ea47b0ef..0599739abe 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange.ksh @@ -29,7 +29,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then log_unsupported "copy_file_range not available before Linux 4.5" fi diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh index 74e6b04903..43323c207a 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh @@ -29,7 +29,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "5.3") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "5.3") ]]; then log_unsupported "copy_file_range can't copy cross-filesystem before Linux 5.3" fi diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback.ksh index 9a96eacd60..475910be74 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback.ksh @@ -30,7 +30,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then log_unsupported "copy_file_range not available before Linux 4.5" fi diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh index e52b34ec8a..00982f68db 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh @@ -30,7 +30,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then log_unsupported "copy_file_range not available before Linux 4.5" fi diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_partial.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_partial.ksh index a5da0a0bd3..38c46e4741 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_partial.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_partial.ksh @@ -29,7 +29,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then log_unsupported "copy_file_range not available before Linux 4.5" fi diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh index fe8f0867b9..34d3d26925 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh @@ -29,7 +29,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "5.3") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "5.3") ]]; then log_unsupported "copy_file_range can't copy cross-filesystem before Linux 5.3" fi diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_disabled_copyfilerange.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_disabled_copyfilerange.ksh index d21b625113..3d916ab921 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_disabled_copyfilerange.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_disabled_copyfilerange.ksh @@ -29,7 +29,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then log_unsupported "copy_file_range not available before Linux 4.5" fi diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh index 0ae76b7e54..919f320dea 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh @@ -45,7 +45,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then log_unsupported "copy_file_range not available before Linux 4.5" fi @@ -77,13 +77,14 @@ log_must zfs create -o recordsize=32K $TESTPOOL/$TESTFS log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 bs=32K count=1022 \ conv=fsync sync_pool $TESTPOOL -log_must clonefile -c /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2 +log_must clonefile -f /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2 log_must sync sync_pool $TESTPOOL log_must have_same_content /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2 typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 $TESTPOOL/$TESTFS file2) -log_must [ "$blocks" = "$(seq -s " " 0 1021)" ] +# FreeBSD's seq(1) leaves a trailing space, remove it with sed(1). +log_must [ "$blocks" = "$(seq -s " " 0 1021 | sed 's/ $//')" ] log_pass "LWB buffer overflow is not triggered with multiple VDEVs ZIL" diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh index 1fdf379ed2..5301520046 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh @@ -42,7 +42,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then log_unsupported "copy_file_range not available before Linux 4.5" fi @@ -90,8 +90,8 @@ log_must zpool freeze $TESTPOOL # # 4. TX_CLONE_RANGE: Clone the file # -log_must clonefile -c /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 -log_must clonefile -c /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 +log_must clonefile -f /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 +log_must clonefile -f /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 # # 5. Unmount filesystem and export the pool @@ -126,6 +126,7 @@ log_must [ "$blocks" = "0 1 2 3" ] typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file2 \ $TESTPOOL/$TESTFS clone2) -log_must [ "$blocks" = "$(seq -s " " 0 2047)" ] +# FreeBSD's seq(1) leaves a trailing space, remove it with sed(1). +log_must [ "$blocks" = "$(seq -s " " 0 2047 | sed 's/ $//')" ] log_pass $claim diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh index f9f687c83e..0967415b7b 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh @@ -42,7 +42,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then log_unsupported "copy_file_range not available before Linux 4.5" fi @@ -92,8 +92,8 @@ log_must zpool freeze $TESTPOOL # # 4. TX_CLONE_RANGE: Clone the file # -log_must clonefile -c /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 -log_must clonefile -c /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 +log_must clonefile -f /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 +log_must clonefile -f /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 # # 5. Unmount filesystem and export the pool @@ -128,6 +128,7 @@ log_must [ "$blocks" = "0 1 2 3" ] typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file2 \ $TESTPOOL/$TESTFS clone2 $PASSPHRASE) -log_must [ "$blocks" = "$(seq -s " " 0 2047)" ] +# FreeBSD's seq(1) leaves a trailing space, remove it with sed(1). +log_must [ "$blocks" = "$(seq -s " " 0 2047 | sed 's/ $//')" ] log_pass $claim From d2f7b2e55767f8b84bcca79cf508f89c0471a92a Mon Sep 17 00:00:00 2001 From: Umer Saleem Date: Wed, 17 Jan 2024 02:15:10 +0500 Subject: [PATCH 08/45] ZTS: Test for clone, mmap and write for block cloning For block cloning, if we mmap the cloned file and write from the map into the file, it triggers a panic in dbuf_redirty() on Linux. The same scenario causes data corruption on FreeBSD. Both these issues are fixed under PR#15656 and PR#15665. It would be good to add a test for this scenario in ZTS. The test program and issue was produced by @robn. Reviewed-by: Pawel Jakub Dawidek Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Reviewed-by: Ameer Hamza Signed-off-by: Umer Saleem Closes #15717 --- tests/runfiles/common.run | 2 +- tests/test-runner/bin/zts-report.py.in | 2 + tests/zfs-tests/cmd/.gitignore | 1 + tests/zfs-tests/cmd/Makefile.am | 1 + tests/zfs-tests/cmd/clone_mmap_write.c | 123 ++++++++++++++++++ tests/zfs-tests/include/commands.cfg | 1 + tests/zfs-tests/tests/Makefile.am | 1 + .../block_cloning_clone_mmap_write.ksh | 79 +++++++++++ 8 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 tests/zfs-tests/cmd/clone_mmap_write.c create mode 100755 tests/zfs-tests/tests/functional/block_cloning/block_cloning_clone_mmap_write.ksh diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 13d8312833..f320c54239 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -79,7 +79,7 @@ tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial', 'block_cloning_cross_enc_dataset', 'block_cloning_copyfilerange_fallback_same_txg', 'block_cloning_replay', 'block_cloning_replay_encrypted', - 'block_cloning_lwb_buffer_overflow'] + 'block_cloning_lwb_buffer_overflow', 'block_cloning_clone_mmap_write'] tags = ['functional', 'block_cloning'] [tests/functional/bootfs] diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index 7bf4d05d54..c84f75cd80 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -287,6 +287,8 @@ elif sys.platform.startswith('linux'): 'bclone/bclone_samefs_data': ['SKIP', cfr_reason], 'bclone/bclone_samefs_embedded': ['SKIP', cfr_reason], 'bclone/bclone_samefs_hole': ['SKIP', cfr_reason], + 'block_cloning/block_cloning_clone_mmap_write': + ['SKIP', cfr_reason], 'block_cloning/block_cloning_copyfilerange': ['SKIP', cfr_reason], 'block_cloning/block_cloning_copyfilerange_cross_dataset': diff --git a/tests/zfs-tests/cmd/.gitignore b/tests/zfs-tests/cmd/.gitignore index 5f53b68719..a696fd3871 100644 --- a/tests/zfs-tests/cmd/.gitignore +++ b/tests/zfs-tests/cmd/.gitignore @@ -2,6 +2,7 @@ /btree_test /chg_usr_exec /clonefile +/clone_mmap_write /devname2devid /dir_rd_update /draid diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am index 1b915ae98c..379dc5e236 100644 --- a/tests/zfs-tests/cmd/Makefile.am +++ b/tests/zfs-tests/cmd/Makefile.am @@ -3,6 +3,7 @@ scripts_zfs_tests_bindir = $(datadir)/$(PACKAGE)/zfs-tests/bin scripts_zfs_tests_bin_PROGRAMS = %D%/chg_usr_exec scripts_zfs_tests_bin_PROGRAMS += %D%/clonefile +scripts_zfs_tests_bin_PROGRAMS += %D%/clone_mmap_write scripts_zfs_tests_bin_PROGRAMS += %D%/cp_files scripts_zfs_tests_bin_PROGRAMS += %D%/ctime scripts_zfs_tests_bin_PROGRAMS += %D%/dir_rd_update diff --git a/tests/zfs-tests/cmd/clone_mmap_write.c b/tests/zfs-tests/cmd/clone_mmap_write.c new file mode 100644 index 0000000000..6a5cd8721c --- /dev/null +++ b/tests/zfs-tests/cmd/clone_mmap_write.c @@ -0,0 +1,123 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * This program clones the file, mmap it, and writes from the map into + * file. This scenario triggers a panic on Linux in dbuf_redirty(), + * which is fixed under PR#15656. On FreeBSD, the same test causes data + * corruption, which is fixed by PR#15665. + * + * It would be good to test for this scenario in ZTS. This program and + * issue was initially produced by @robn. + */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __FreeBSD__ +#define loff_t off_t +#endif + +ssize_t +copy_file_range(int, loff_t *, int, loff_t *, size_t, unsigned int) + __attribute__((weak)); + +static int +open_file(const char *source) +{ + int fd; + if ((fd = open(source, O_RDWR | O_APPEND)) < 0) { + (void) fprintf(stderr, "Error opening %s\n", source); + exit(1); + } + sync(); + return (fd); +} + +static int +clone_file(int sfd, long long size, const char *dest) +{ + int dfd; + + if ((dfd = open(dest, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) < 0) { + (void) fprintf(stderr, "Error opening %s\n", dest); + exit(1); + } + + if (copy_file_range(sfd, 0, dfd, 0, size, 0) < 0) { + (void) fprintf(stderr, "copy_file_range failed\n"); + exit(1); + } + + return (dfd); +} + +static void * +map_file(int fd, long long size) +{ + void *p = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) { + (void) fprintf(stderr, "mmap failed\n"); + exit(1); + } + + return (p); +} + +static void +map_write(void *p, int fd) +{ + if (pwrite(fd, p, 1024*128, 0) < 0) { + (void) fprintf(stderr, "write failed\n"); + exit(1); + } +} + +int +main(int argc, char **argv) +{ + int sfd, dfd; + void *p; + struct stat sb; + if (argc != 3) { + (void) printf("usage: %s " + "\n", argv[0]); + exit(1); + } + sfd = open_file(argv[1]); + if (fstat(sfd, &sb) == -1) { + (void) fprintf(stderr, "fstat failed\n"); + exit(1); + } + dfd = clone_file(sfd, sb.st_size, argv[2]); + p = map_file(dfd, sb.st_size); + map_write(p, dfd); + return (0); +} diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index c6f74cd81a..797078ed3a 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -185,6 +185,7 @@ export ZFSTEST_FILES='badsend btree_test chg_usr_exec clonefile + clone_mmap_write devname2devid dir_rd_update draid diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 33e97d22b6..aeff66627a 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -461,6 +461,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/bclone/setup.ksh \ functional/block_cloning/cleanup.ksh \ functional/block_cloning/setup.ksh \ + functional/block_cloning/block_cloning_clone_mmap_write.ksh \ functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh \ functional/block_cloning/block_cloning_copyfilerange_fallback.ksh \ functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh \ diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_clone_mmap_write.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_clone_mmap_write.ksh new file mode 100755 index 0000000000..6215b3178e --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_clone_mmap_write.ksh @@ -0,0 +1,79 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# A PANIC is triggered in dbuf_redirty() if we clone a file, mmap it +# and write from the map into the file. PR#15656 fixes this scenario. +# This scenario also causes data corruption on FreeBSD, which is fixed +# by PR#15665. +# +# STRATEGY: +# 1. Create a pool +# 2. Create a test file +# 3. Clone, mmap and write to the file using clone_mmap_write +# 5. Synchronize cached writes +# 6. Verfiy data is correctly written to the disk +# + +verify_runnable "global" + +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" +fi + +VDIR=$TEST_BASE_DIR/disk-bclone +VDEV="$VDIR/a" + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + rm -rf $VDIR +} + +log_onexit cleanup + +log_assert "Test for clone, mmap and write scenario" + +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must truncate -s 1G $VDEV + +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV +log_must zfs create $TESTPOOL/$TESTFS + +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file bs=1M count=512 +log_must clone_mmap_write /$TESTPOOL/$TESTFS/file /$TESTPOOL/$TESTFS/clone + +sync_pool $TESTPOOL +log_must sync + +log_must have_same_content /$TESTPOOL/$TESTFS/file /$TESTPOOL/$TESTFS/clone +blocks=$(get_same_blocks $TESTPOOL/$TESTFS file $TESTPOOL/$TESTFS clone) +# FreeBSD's seq(1) leaves a trailing space, remove it with sed(1). +log_must [ "$blocks" = "$(seq -s " " 1 4095 | sed 's/ $//')" ] + +log_pass "Clone, mmap and write does not cause data corruption or " \ + "trigger panic" From ef527958c6a1fc07177636465194625ef8e64083 Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Wed, 17 Jan 2024 08:51:07 -0800 Subject: [PATCH 09/45] Fix cloning into mmaped and cached file. If the destination file is mmaped and the mmaped region was already read, so it is cached, we need to update mmaped pages after successful clone using update_pages(). Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Pointed out by: Ka Ho Ng Signed-off-by: Pawel Jakub Dawidek Closes #15772 --- module/zfs/zfs_vnops.c | 4 + tests/runfiles/common.run | 4 +- tests/test-runner/bin/zts-report.py.in | 1 + tests/zfs-tests/cmd/.gitignore | 1 + tests/zfs-tests/cmd/Makefile.am | 1 + tests/zfs-tests/cmd/clone_mmap_cached.c | 146 ++++++++++++++++++ tests/zfs-tests/include/commands.cfg | 1 + tests/zfs-tests/tests/Makefile.am | 1 + .../block_cloning_clone_mmap_cached.ksh | 86 +++++++++++ .../tests/functional/block_cloning/setup.ksh | 3 + 10 files changed, 247 insertions(+), 1 deletion(-) create mode 100644 tests/zfs-tests/cmd/clone_mmap_cached.c create mode 100755 tests/zfs-tests/tests/functional/block_cloning/block_cloning_clone_mmap_cached.ksh diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 812e42f645..aa61575a6a 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -1355,6 +1355,10 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, break; } + if (zn_has_cached_data(outzp, outoff, outoff + size - 1)) { + update_pages(outzp, outoff, size, outos); + } + zfs_clear_setid_bits_if_necessary(outzfsvfs, outzp, cr, &clear_setid_bits_txg, tx); diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index f320c54239..33f30b0055 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -72,7 +72,9 @@ tags = ['functional', 'bclone'] timeout = 7200 [tests/functional/block_cloning] -tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial', +tests = ['block_cloning_clone_mmap_cached', + 'block_cloning_copyfilerange', + 'block_cloning_copyfilerange_partial', 'block_cloning_copyfilerange_fallback', 'block_cloning_disabled_copyfilerange', 'block_cloning_copyfilerange_cross_dataset', diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index c84f75cd80..ae4aa62754 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -287,6 +287,7 @@ elif sys.platform.startswith('linux'): 'bclone/bclone_samefs_data': ['SKIP', cfr_reason], 'bclone/bclone_samefs_embedded': ['SKIP', cfr_reason], 'bclone/bclone_samefs_hole': ['SKIP', cfr_reason], + 'block_cloning/block_cloning_clone_mmap_cached': ['SKIP', cfr_reason], 'block_cloning/block_cloning_clone_mmap_write': ['SKIP', cfr_reason], 'block_cloning/block_cloning_copyfilerange': diff --git a/tests/zfs-tests/cmd/.gitignore b/tests/zfs-tests/cmd/.gitignore index a696fd3871..0ed0a69eb0 100644 --- a/tests/zfs-tests/cmd/.gitignore +++ b/tests/zfs-tests/cmd/.gitignore @@ -2,6 +2,7 @@ /btree_test /chg_usr_exec /clonefile +/clone_mmap_cached /clone_mmap_write /devname2devid /dir_rd_update diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am index 379dc5e236..23848a82ff 100644 --- a/tests/zfs-tests/cmd/Makefile.am +++ b/tests/zfs-tests/cmd/Makefile.am @@ -3,6 +3,7 @@ scripts_zfs_tests_bindir = $(datadir)/$(PACKAGE)/zfs-tests/bin scripts_zfs_tests_bin_PROGRAMS = %D%/chg_usr_exec scripts_zfs_tests_bin_PROGRAMS += %D%/clonefile +scripts_zfs_tests_bin_PROGRAMS += %D%/clone_mmap_cached scripts_zfs_tests_bin_PROGRAMS += %D%/clone_mmap_write scripts_zfs_tests_bin_PROGRAMS += %D%/cp_files scripts_zfs_tests_bin_PROGRAMS += %D%/ctime diff --git a/tests/zfs-tests/cmd/clone_mmap_cached.c b/tests/zfs-tests/cmd/clone_mmap_cached.c new file mode 100644 index 0000000000..c1cdf796cf --- /dev/null +++ b/tests/zfs-tests/cmd/clone_mmap_cached.c @@ -0,0 +1,146 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2024 by Pawel Jakub Dawidek + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __FreeBSD__ +#define loff_t off_t +#endif + +ssize_t +copy_file_range(int, loff_t *, int, loff_t *, size_t, unsigned int) + __attribute__((weak)); + +static void * +mmap_file(int fd, size_t size) +{ + void *p; + + p = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) { + (void) fprintf(stderr, "mmap failed: %s\n", strerror(errno)); + exit(2); + } + + return (p); +} + +static void +usage(const char *progname) +{ + + /* + * -i cache input before copy_file_range(2). + * -o cache input before copy_file_range(2). + */ + (void) fprintf(stderr, "usage: %s [-io] \n", progname); + exit(3); +} + +int +main(int argc, char *argv[]) +{ + int dfd, sfd; + size_t dsize, ssize; + void *dmem, *smem, *ptr; + off_t doff, soff; + struct stat sb; + bool cache_input, cache_output; + const char *progname; + int c; + + progname = argv[0]; + cache_input = cache_output = false; + + while ((c = getopt(argc, argv, "io")) != -1) { + switch (c) { + case 'i': + cache_input = true; + break; + case 'o': + cache_output = true; + break; + default: + usage(progname); + } + } + argc -= optind; + argv += optind; + + if (argc != 2) { + usage(progname); + } + + sfd = open(argv[0], O_RDONLY); + if (fstat(sfd, &sb) == -1) { + (void) fprintf(stderr, "fstat failed: %s\n", strerror(errno)); + exit(2); + } + ssize = sb.st_size; + smem = mmap_file(sfd, ssize); + + dfd = open(argv[1], O_RDWR); + if (fstat(dfd, &sb) == -1) { + (void) fprintf(stderr, "fstat failed: %s\n", strerror(errno)); + exit(2); + } + dsize = sb.st_size; + dmem = mmap_file(dfd, dsize); + + /* + * Hopefully it won't be compiled out. + */ + if (cache_input) { + ptr = malloc(ssize); + assert(ptr != NULL); + memcpy(ptr, smem, ssize); + free(ptr); + } + if (cache_output) { + ptr = malloc(ssize); + assert(ptr != NULL); + memcpy(ptr, dmem, dsize); + free(ptr); + } + + soff = doff = 0; + if (copy_file_range(sfd, &soff, dfd, &doff, ssize, 0) < 0) { + (void) fprintf(stderr, "copy_file_range failed: %s\n", + strerror(errno)); + exit(2); + } + + exit(memcmp(smem, dmem, ssize) == 0 ? 0 : 1); +} diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index 797078ed3a..daa7945516 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -185,6 +185,7 @@ export ZFSTEST_FILES='badsend btree_test chg_usr_exec clonefile + clone_mmap_cached clone_mmap_write devname2devid dir_rd_update diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index aeff66627a..1c3dfc77ea 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -461,6 +461,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/bclone/setup.ksh \ functional/block_cloning/cleanup.ksh \ functional/block_cloning/setup.ksh \ + functional/block_cloning/block_cloning_clone_mmap_cached.ksh \ functional/block_cloning/block_cloning_clone_mmap_write.ksh \ functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh \ functional/block_cloning/block_cloning_copyfilerange_fallback.ksh \ diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_clone_mmap_cached.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_clone_mmap_cached.ksh new file mode 100755 index 0000000000..b0ef8ec995 --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_clone_mmap_cached.ksh @@ -0,0 +1,86 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# When the destination file is mmaped and is already cached we need to +# update mmaped pages after successful clone. +# +# STRATEGY: +# 1. Create a pool. +# 2. Create a two test files with random content. +# 3. mmap the files, read them and clone from one to the other using +# clone_mmap_cached. +# 4. clone_mmap_cached also verifies if the content of the destination +# file was updated while reading it from mmaped memory. +# + +verify_runnable "global" + +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" +fi + +VDIR=$TEST_BASE_DIR/disk-bclone +VDEV="$VDIR/a" + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + rm -rf $VDIR +} + +log_onexit cleanup + +log_assert "Test for clone into mmaped and cached file" + +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must truncate -s 1G $VDEV + +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV +log_must zfs create $TESTPOOL/$TESTFS + +for opts in "--" "-i" "-o" "-io" +do + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/src bs=1M count=1 + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/dst bs=1M count=1 + + # Clear cache. + log_must zpool export $TESTPOOL + log_must zpool import -d $VDIR $TESTPOOL + + log_must clone_mmap_cached $opts /$TESTPOOL/$TESTFS/src /$TESTPOOL/$TESTFS/dst + + sync_pool $TESTPOOL + log_must sync + + log_must have_same_content /$TESTPOOL/$TESTFS/src /$TESTPOOL/$TESTFS/dst + blocks=$(get_same_blocks $TESTPOOL/$TESTFS src $TESTPOOL/$TESTFS dst) + # FreeBSD's seq(1) leaves a trailing space, remove it with sed(1). + log_must [ "$blocks" = "$(seq -s " " 0 7 | sed 's/ $//')" ] +done + +log_pass "Clone properly updates mmapped and cached pages" diff --git a/tests/zfs-tests/tests/functional/block_cloning/setup.ksh b/tests/zfs-tests/tests/functional/block_cloning/setup.ksh index 58441bf8f3..a9b13f062a 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/setup.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/setup.ksh @@ -30,6 +30,9 @@ if ! command -v clonefile > /dev/null ; then log_unsupported "clonefile program required to test block cloning" fi +if ! command -v clone_mmap_cached > /dev/null ; then + log_unsupported "clone_mmap_cached program required to test block cloning" +fi verify_runnable "global" From c1161e28513410a3f566a0e10b48e54b11b19e59 Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Wed, 17 Jan 2024 18:06:14 +0100 Subject: [PATCH 10/45] fix: variable type with zfs-tests/cmd/clonefile.c Compiling on arm64 freebsd-13.2 and arm64 almalinux-8 brings currently this error: ``` CC tests/zfs-tests/cmd/clonefile.o tests/zfs-tests/cmd/clonefile.c:166:43: error: result of comparison of \ constant -1 with expression of type 'char' is always true \ [-Werror,-Wtautological-constant-out-of-range-compare] while ((c = getopt(argc, argv, "crfdq")) != -1) { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~ 1 error generated. gmake[2]: *** [Makefile:8675: tests/zfs-tests/cmd/clonefile.o] Error 1 ``` Fix: use correct variable type `int`. Reviewed-by: Brian Behlendorf Reviewed-by: Rob Norris Signed-off-by: Tino Reichardt Closes #15783 --- tests/zfs-tests/cmd/clonefile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/zfs-tests/cmd/clonefile.c b/tests/zfs-tests/cmd/clonefile.c index d002cd9b58..bc30bb7798 100644 --- a/tests/zfs-tests/cmd/clonefile.c +++ b/tests/zfs-tests/cmd/clonefile.c @@ -162,7 +162,7 @@ main(int argc, char **argv) { cf_mode_t mode = CF_MODE_NONE; - char c; + int c; while ((c = getopt(argc, argv, "crfdq")) != -1) { switch (c) { case 'c': From 9e0304c363d7bcc2330b252299edd84a6d4dabbc Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 22 Jan 2024 16:15:03 -0800 Subject: [PATCH 11/45] ZTS: Apply zfs_bclone_enabled to bclone tests If block cloning is disabled by default then enable it when running the bclone tests. Follow up to #15529. Reviewed-by: Brian Atkinson Signed-off-by: Brian Behlendorf Closes #15796 --- tests/zfs-tests/tests/functional/bclone/cleanup.ksh | 9 ++++++++- tests/zfs-tests/tests/functional/bclone/setup.ksh | 5 +++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/zfs-tests/tests/functional/bclone/cleanup.ksh b/tests/zfs-tests/tests/functional/bclone/cleanup.ksh index df6d9c08fe..0021ccb57a 100755 --- a/tests/zfs-tests/tests/functional/bclone/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/bclone/cleanup.ksh @@ -34,4 +34,11 @@ log_must zfs destroy $TESTSRCFS log_must zfs destroy $TESTDSTFS -default_cleanup + +default_cleanup_noexit + +if tunable_exists BCLONE_ENABLED ; then + log_must restore_tunable BCLONE_ENABLED +fi + +log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/setup.ksh b/tests/zfs-tests/tests/functional/bclone/setup.ksh index c68719ee72..9d26088c5a 100755 --- a/tests/zfs-tests/tests/functional/bclone/setup.ksh +++ b/tests/zfs-tests/tests/functional/bclone/setup.ksh @@ -36,6 +36,11 @@ if ! command -v clonefile > /dev/null ; then log_unsupported "clonefile program required to test block cloning" fi +if tunable_exists BCLONE_ENABLED ; then + log_must save_tunable BCLONE_ENABLED + log_must set_tunable32 BCLONE_ENABLED 1 +fi + DISK=${DISKS%% *} default_setup_noexit $DISK "true" From 3425484eb907d489c315cced2a1fdea08ef03fc4 Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Tue, 23 Jan 2024 15:03:48 -0800 Subject: [PATCH 12/45] Fix file descriptor leak on pool import. Descriptor leak can be easily reproduced by doing: # zpool import tank # sysctl kern.openfiles # zpool export tank; zpool import tank # sysctl kern.openfiles We were leaking four file descriptors on every import. Similar leak most likely existed when using file-based VDEVs. External-issue: https://reviews.freebsd.org/D43529 Reviewed-by: Brian Behlendorf Signed-off-by: Pawel Jakub Dawidek Closes #15630 --- module/os/freebsd/zfs/zfs_file_os.c | 63 +++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/module/os/freebsd/zfs/zfs_file_os.c b/module/os/freebsd/zfs/zfs_file_os.c index 60c9ff0581..f7f2be2cf9 100644 --- a/module/os/freebsd/zfs/zfs_file_os.c +++ b/module/os/freebsd/zfs/zfs_file_os.c @@ -53,26 +53,65 @@ int zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp) { struct thread *td; - int rc, fd; + struct vnode *vp; + struct file *fp; + struct nameidata nd; + int error; td = curthread; pwd_ensure_dirs(); - /* 12.x doesn't take a const char * */ - rc = kern_openat(td, AT_FDCWD, __DECONST(char *, path), - UIO_SYSSPACE, flags, mode); - if (rc) - return (SET_ERROR(rc)); - fd = td->td_retval[0]; - td->td_retval[0] = 0; - if (fget(curthread, fd, &cap_no_rights, fpp)) - kern_close(td, fd); + + KASSERT((flags & (O_EXEC | O_PATH)) == 0, + ("invalid flags: 0x%x", flags)); + KASSERT((flags & O_ACCMODE) != O_ACCMODE, + ("invalid flags: 0x%x", flags)); + flags = FFLAGS(flags); + + error = falloc_noinstall(td, &fp); + if (error != 0) { + return (error); + } + fp->f_flag = flags & FMASK; + +#if __FreeBSD_version >= 1400043 + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path); +#else + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, td); +#endif + error = vn_open(&nd, &flags, mode, fp); + if (error != 0) { + falloc_abort(td, fp); + return (SET_ERROR(error)); + } + NDFREE_PNBUF(&nd); + vp = nd.ni_vp; + fp->f_vnode = vp; + if (fp->f_ops == &badfileops) { + finit_vnode(fp, flags, NULL, &vnops); + } + VOP_UNLOCK(vp); + if (vp->v_type != VREG) { + zfs_file_close(fp); + return (SET_ERROR(EACCES)); + } + + if (flags & O_TRUNC) { + error = fo_truncate(fp, 0, td->td_ucred, td); + if (error != 0) { + zfs_file_close(fp); + return (SET_ERROR(error)); + } + } + + *fpp = fp; + return (0); } void zfs_file_close(zfs_file_t *fp) { - fo_close(fp, curthread); + fdrop(fp, curthread); } static int @@ -263,7 +302,7 @@ zfs_file_get(int fd) void zfs_file_put(zfs_file_t *fp) { - fdrop(fp, curthread); + zfs_file_close(fp); } loff_t From 4d4972ed98a83a4b3a404f53782d5b351b8ef8cf Mon Sep 17 00:00:00 2001 From: Rich Ercolani <214141+rincebrain@users.noreply.github.com> Date: Fri, 12 Jan 2024 15:17:26 -0500 Subject: [PATCH 13/45] Stop wasting time on malloc in snprintf_zstd_header Profiling zdb -vvvvv on datasets with a lot of zstd blocks, we find ourselves spending quite a lot of time on malloc/free, because we allocate a 16M abd each call, and never free it, so we're leaking 16M per call as well. This seems sub-optimal. So let's just keep the buffer around and reuse it. Reviewed-by: Brian Behlendorf Reviewed-by: Rob Norris Signed-off-by: Rich Ercolani Closes #15721 --- cmd/zdb/zdb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 3fc9fd2a9d..70c85a87ad 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -2360,7 +2360,7 @@ static void snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen, const blkptr_t *bp) { - abd_t *pabd; + static abd_t *pabd = NULL; void *buf; zio_t *zio; zfs_zstdhdr_t zstd_hdr; @@ -2391,7 +2391,8 @@ snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen, return; } - pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE); + if (!pabd) + pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE); zio = zio_root(spa, NULL, NULL, 0); /* Decrypt but don't decompress so we can read the compression header */ From 7bccf98a731d717515ba83f728be337e2b21b9bc Mon Sep 17 00:00:00 2001 From: Rich Ercolani <214141+rincebrain@users.noreply.github.com> Date: Fri, 12 Jan 2024 14:55:17 -0500 Subject: [PATCH 14/45] Make zdb -R scale less poorly zdb -R with :d tries to use gzip decompression 9 times per size. There's absolutely no reason for that, they're all the same decompressor. Reviewed-by: Brian Atkinson Reviewed-by: Brian Behlendorf Signed-off-by: Rich Ercolani Closes #15726 --- cmd/zdb/zdb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 70c85a87ad..19b0d61f09 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -8491,6 +8491,14 @@ zdb_decompress_block(abd_t *pabd, void *buf, void *lbuf, uint64_t lsize, *cfuncp++ = ZIO_COMPRESS_LZ4; *cfuncp++ = ZIO_COMPRESS_LZJB; mask |= ZIO_COMPRESS_MASK(LZ4) | ZIO_COMPRESS_MASK(LZJB); + /* + * Every gzip level has the same decompressor, no need to + * run it 9 times per bruteforce attempt. + */ + mask |= ZIO_COMPRESS_MASK(GZIP_2) | ZIO_COMPRESS_MASK(GZIP_3); + mask |= ZIO_COMPRESS_MASK(GZIP_4) | ZIO_COMPRESS_MASK(GZIP_5); + mask |= ZIO_COMPRESS_MASK(GZIP_6) | ZIO_COMPRESS_MASK(GZIP_7); + mask |= ZIO_COMPRESS_MASK(GZIP_8) | ZIO_COMPRESS_MASK(GZIP_9); for (int c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) if (((1ULL << c) & mask) == 0) *cfuncp++ = c; From 22e4f08c30f97d208a6d1ae8b8943071de340431 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Tue, 9 Jan 2024 10:57:29 -0500 Subject: [PATCH 15/45] Linux: Defer loading the object set in zfs_setattr() We need to wait until after having done a zfs_enter() to load some fields from the zfsvfs structure. Otherwise a use-after-free is possible in the face of a concurrent rollback. Other functions in this file are careful to avoid this bug, I believe this is the only instance. Reviewed-by: Brian Atkinson Reviewed-by: Brian Behlendorf Signed-off-by: Mark Johnston Closes #15752 --- module/os/linux/zfs/zfs_vnops_os.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 9ea8ad5f4a..ecfa4b54e2 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -1853,7 +1853,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns) { struct inode *ip; zfsvfs_t *zfsvfs = ZTOZSB(zp); - objset_t *os = zfsvfs->z_os; + objset_t *os; zilog_t *zilog; dmu_tx_t *tx; vattr_t oldva; @@ -1885,6 +1885,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns) if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) return (err); ip = ZTOI(zp); + os = zfsvfs->z_os; /* * If this is a xvattr_t, then get a pointer to the structure of From 8b1c6db3d2f35cf13c5f35374b0ec37216881ec7 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Tue, 9 Jan 2024 18:57:09 -0500 Subject: [PATCH 16/45] Fix a potential use-after-free in zfs_setsecattr() In general, VOPs must not load the "z_log" field until having called zfs_enter_verify_zp(). Reviewed-by: Brian Atkinson Reviewed-by: Brian Behlendorf Signed-off-by: Mark Johnston Closes #15752 --- module/zfs/zfs_vnops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index aa61575a6a..e6ae574ad0 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -801,11 +801,11 @@ zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr) zfsvfs_t *zfsvfs = ZTOZSB(zp); int error; boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; - zilog_t *zilog = zfsvfs->z_log; + zilog_t *zilog; if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) return (error); - + zilog = zfsvfs->z_log; error = zfs_setacl(zp, vsecp, skipaclchk, cr); if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) From 4db88c37cc4ebcf0bb00ea9574cf66c9ccf2409c Mon Sep 17 00:00:00 2001 From: Stefan Lendl <1321542+stfl@users.noreply.github.com> Date: Fri, 12 Jan 2024 21:05:11 +0100 Subject: [PATCH 17/45] fix(mount): do not truncate shares not zfs mount When running zfs share -a resetting the exports.d/zfs.exports makes sense the get a clean state. Truncating was also called with zfs mount which would not populate the file again. Add test to verify shares persist after mount -a. Reviewed-by: Brian Behlendorf Signed-off-by: Stefan Lendl Closes #15607 Closes #15660 --- cmd/zfs/zfs_main.c | 3 +- tests/runfiles/common.run | 3 +- tests/zfs-tests/tests/Makefile.am | 1 + .../zfs_share/zfs_share_after_mount.ksh | 62 +++++++++++++++++++ 4 files changed, 67 insertions(+), 2 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_after_mount.ksh diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 5644869cf3..67b191d72e 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -7230,7 +7230,8 @@ share_mount(int op, int argc, char **argv) pthread_mutex_init(&share_mount_state.sm_lock, NULL); /* For a 'zfs share -a' operation start with a clean slate. */ - zfs_truncate_shares(NULL); + if (op == OP_SHARE) + zfs_truncate_shares(NULL); /* * libshare isn't mt-safe, so only do the operation in parallel diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 33f30b0055..f93bfb4333 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -318,7 +318,8 @@ tags = ['functional', 'cli_root', 'zfs_set'] [tests/functional/cli_root/zfs_share] tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos', 'zfs_share_004_pos', 'zfs_share_006_pos', 'zfs_share_008_neg', - 'zfs_share_010_neg', 'zfs_share_011_pos', 'zfs_share_concurrent_shares'] + 'zfs_share_010_neg', 'zfs_share_011_pos', 'zfs_share_concurrent_shares', + 'zfs_share_after_mount'] tags = ['functional', 'cli_root', 'zfs_share'] [tests/functional/cli_root/zfs_snapshot] diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 1c3dfc77ea..19174c71fb 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -914,6 +914,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zfs_share/zfs_share_012_pos.ksh \ functional/cli_root/zfs_share/zfs_share_013_pos.ksh \ functional/cli_root/zfs_share/zfs_share_concurrent_shares.ksh \ + functional/cli_root/zfs_share/zfs_share_after_mount.ksh \ functional/cli_root/zfs_snapshot/cleanup.ksh \ functional/cli_root/zfs_snapshot/setup.ksh \ functional/cli_root/zfs_snapshot/zfs_snapshot_001_neg.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_after_mount.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_after_mount.ksh new file mode 100755 index 0000000000..0d4b66ea85 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_after_mount.ksh @@ -0,0 +1,62 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Proxmox. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# DESCRIPTION: +# Verify that nfs shares persist after zfs mount -a +# +# STRATEGY: +# 1. Verify that the filesystem is not shared. +# 2. Enable the 'sharenfs' property +# 3. Verify filesystem is shared +# 4. Invoke 'zfs mount -a' +# 5. Verify filesystem is still shared + +verify_runnable "global" + +function cleanup +{ + log_must zfs set sharenfs=off $TESTPOOL/$TESTFS + is_shared $TESTPOOL/$TESTFS && \ + log_must unshare_fs $TESTPOOL/$TESTFS + log_must zfs share -a +} + + +log_onexit cleanup + +cleanup + +log_must zfs set sharenfs="on" $TESTPOOL/$TESTFS +log_must is_shared $TESTPOOL/$TESTFS +log_must is_exported $TESTPOOL/$TESTFS + +log_must zfs mount -a +log_must is_shared $TESTPOOL/$TESTFS +log_must is_exported $TESTPOOL/$TESTFS + +log_pass "Verify that nfs shares persist after zfs mount -a" From 509526ad2103adddc18c1b6d7b514d0c36b682ef Mon Sep 17 00:00:00 2001 From: Benjamin Sherman Date: Fri, 12 Jan 2024 14:33:41 -0600 Subject: [PATCH 18/45] fix: preserve linux kmod signature in zfs-kmod rpm spec This change provides rpm spec macros to sign the zfs and spl kmods as the final step after the %install scriptlet. This is needed since the find-debuginfo.sh script strips out debug symbols plus signatures. Kernel module signing only occurs when the required files are present as typically required in the Linux source tree: - certs/signing_key.pem - certs/signing_key.x509 The method for overriding the default __spec_install_post macro is inspired by (and largely copied from) the Fedora kernel.spec. Reviewed-by: Tony Hutter Reviewed-by: Tino Reichardt Signed-off-by: Benjamin Sherman Closes #15744 --- rpm/generic/zfs-kmod.spec.in | 24 ++++++++++++++++++++++++ rpm/redhat/zfs-kmod.spec.in | 24 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/rpm/generic/zfs-kmod.spec.in b/rpm/generic/zfs-kmod.spec.in index 3c73e2ff2d..4cc075585d 100644 --- a/rpm/generic/zfs-kmod.spec.in +++ b/rpm/generic/zfs-kmod.spec.in @@ -150,6 +150,30 @@ for kernel_version in %{?kernel_versions}; do done +# Module signing (modsign) +# +# This must be run _after_ find-debuginfo.sh runs, otherwise that will strip +# the signature off of the modules. +# (Based on Fedora's kernel.spec workaround) +%define __modsign_install_post \ + sign_pem="%{ksrc}/certs/signing_key.pem"; \ + sign_x509="%{ksrc}/certs/signing_key.x509"; \ + if [ -f "${sign_x509}" ]\ + then \ + echo "Signing kernel modules ..."; \ + for kmod in $(find ${RPM_BUILD_ROOT}%{kmodinstdir_prefix}/*/extra/ -name \*.ko); do \ + %{ksrc}/scripts/sign-file sha256 ${sign_pem} ${sign_x509} ${kmod}; \ + done \ + fi \ +%{nil} + +# hack to ensure signing happens after find-debuginfo.sh runs +%define __spec_install_post \ + %{?__debug_package:%{__debug_install_post}}\ + %{__arch_install_post}\ + %{__os_install_post}\ + %{__modsign_install_post} + %install rm -rf ${RPM_BUILD_ROOT} diff --git a/rpm/redhat/zfs-kmod.spec.in b/rpm/redhat/zfs-kmod.spec.in index f59551c0b4..9c836786ba 100644 --- a/rpm/redhat/zfs-kmod.spec.in +++ b/rpm/redhat/zfs-kmod.spec.in @@ -72,6 +72,30 @@ fi %{?kernel_llvm} make %{?_smp_mflags} +# Module signing (modsign) +# +# This must be run _after_ find-debuginfo.sh runs, otherwise that will strip +# the signature off of the modules. +# (Based on Fedora's kernel.spec workaround) +%define __modsign_install_post \ + sign_pem="%{ksrc}/certs/signing_key.pem"; \ + sign_x509="%{ksrc}/certs/signing_key.x509"; \ + if [ -f "${sign_x509}" ]\ + then \ + echo "Signing kernel modules ..."; \ + for kmod in $(find %{buildroot}/lib/modules/%{kverrel}/extra/ -name \*.ko); do \ + %{ksrc}/scripts/sign-file sha256 ${sign_pem} ${sign_x509} ${kmod}; \ + done \ + fi \ +%{nil} + +# hack to ensure signing happens after find-debuginfo.sh runs +%define __spec_install_post \ + %{?__debug_package:%{__debug_install_post}}\ + %{__arch_install_post}\ + %{__os_install_post}\ + %{__modsign_install_post} + %install make install \ DESTDIR=${RPM_BUILD_ROOT} \ From 2006ac1f4a52419d08641324ba56ecc5d0bbaf6f Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 12 Jan 2024 12:35:29 -0800 Subject: [PATCH 19/45] Fix "out of memory" error Drop the no_memory() call from zpool_in_use() when reading the label fails and instead return the error to the caller. This prevents a misleading "internal error: out of memory" error when the label can't be read. This will result in is_spare() returning B_FALSE instead of aborting, which is already safely handled. Furthermore, on Linux it's possible for EREMOTEIO to returned by an NVMe device if the device has been low-level formatted and not rescanned. In this case we want to fallback to the legacy scanning method and read any of the labels we can. Reviewed-by: Brian Atkinson Signed-off-by: Brian Behlendorf Issue #13538 Closes #15747 --- lib/libzfs/libzfs_import.c | 4 +--- lib/libzutil/zutil_import.c | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c index 2a7c5a76a0..e2d40a7b3b 100644 --- a/lib/libzfs/libzfs_import.c +++ b/lib/libzfs/libzfs_import.c @@ -291,10 +291,8 @@ zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr, *inuse = B_FALSE; - if (zpool_read_label(fd, &config, NULL) != 0) { - (void) no_memory(hdl); + if (zpool_read_label(fd, &config, NULL) != 0) return (-1); - } if (config == NULL) return (0); diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index 19d8a47428..bafe50e5f9 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1056,10 +1056,21 @@ zpool_read_label(int fd, nvlist_t **config, int *num_labels) case EINVAL: break; case EINPROGRESS: - // This shouldn't be possible to - // encounter, die if we do. + /* + * This shouldn't be possible to + * encounter, die if we do. + */ ASSERT(B_FALSE); zfs_fallthrough; + case EREMOTEIO: + /* + * May be returned by an NVMe device + * which is visible in /dev/ but due + * to a low-level format change, or + * other error, needs to be rescanned. + * Try the slow method. + */ + zfs_fallthrough; case EOPNOTSUPP: case ENOSYS: do_slow = B_TRUE; From 52cee9a3eb0a691ce915a6f46d23f575351d8b4d Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Thu, 4 Jan 2024 19:02:50 +0500 Subject: [PATCH 20/45] fix: Uber block label not always found for aux vdevs When spare or l2cache (aux) vdev is added during pool creation, spa->spa_uberblock is not dumped until that point. Subsequently, the aux label is never synchronized after its initial creation, resulting in the uberblock label remaining undumped. The uberblock is crucial for lib_blkid in identifying the ZFS partition type. To address this issue, we now ensure sync of the uberblock label once if it's not dumped initially. Reviewed-by: Umer Saleem Reviewed-by: Alexander Motin Signed-off-by: Ameer Hamza Closes #15737 --- include/sys/spa_impl.h | 1 + module/zfs/vdev_label.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index cdf65c3713..c7ecd3d0cc 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -266,6 +266,7 @@ struct spa { spa_aux_vdev_t spa_spares; /* hot spares */ spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */ + boolean_t spa_aux_sync_uber; /* need to sync aux uber */ nvlist_t *spa_label_features; /* Features for reading MOS */ uint64_t spa_config_object; /* MOS object for pool config */ uint64_t spa_config_generation; /* config generation number */ diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index a2e5524a83..21348f95a4 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -1148,6 +1148,14 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) */ VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift) == 0); + + /* + * When spare or l2cache (aux) vdev is added during pool + * creation, spa->spa_uberblock is not written until this + * point. Write it on next config sync. + */ + if (uberblock_verify(&spa->spa_uberblock)) + spa->spa_aux_sync_uber = B_TRUE; } else { uint64_t txg = 0ULL; @@ -1749,6 +1757,16 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) for (int v = 0; v < svdcount; v++) vdev_uberblock_sync(zio, &good_writes, ub, svd[v], flags); + if (spa->spa_aux_sync_uber) { + for (int v = 0; v < spa->spa_spares.sav_count; v++) { + vdev_uberblock_sync(zio, &good_writes, ub, + spa->spa_spares.sav_vdevs[v], flags); + } + for (int v = 0; v < spa->spa_l2cache.sav_count; v++) { + vdev_uberblock_sync(zio, &good_writes, ub, + spa->spa_l2cache.sav_vdevs[v], flags); + } + } (void) zio_wait(zio); /* @@ -1763,6 +1781,19 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) zio_flush(zio, svd[v]); } } + if (spa->spa_aux_sync_uber) { + spa->spa_aux_sync_uber = B_FALSE; + for (int v = 0; v < spa->spa_spares.sav_count; v++) { + if (vdev_writeable(spa->spa_spares.sav_vdevs[v])) { + zio_flush(zio, spa->spa_spares.sav_vdevs[v]); + } + } + for (int v = 0; v < spa->spa_l2cache.sav_count; v++) { + if (vdev_writeable(spa->spa_l2cache.sav_vdevs[v])) { + zio_flush(zio, spa->spa_l2cache.sav_vdevs[v]); + } + } + } (void) zio_wait(zio); From eb4a36bcef41f2f73a74bbfcd7fb46152df7b0e6 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Thu, 4 Jan 2024 19:32:53 +0500 Subject: [PATCH 21/45] Extend aux label to add path information Pool import logic uses vdev paths, so it makes sense to add path information on AUX vdev as well. Reviewed-by: Umer Saleem Reviewed-by: Alexander Motin Signed-off-by: Ameer Hamza Closes #15737 --- module/zfs/vdev_label.c | 54 +++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 21348f95a4..737d8b33e1 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -1023,6 +1023,10 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) int error; uint64_t spare_guid = 0, l2cache_guid = 0; int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; + boolean_t reason_spare = (reason == VDEV_LABEL_SPARE || (reason == + VDEV_LABEL_REMOVE && vd->vdev_isspare)); + boolean_t reason_l2cache = (reason == VDEV_LABEL_L2CACHE || (reason == + VDEV_LABEL_REMOVE && vd->vdev_isl2cache)); ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); @@ -1108,34 +1112,20 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) * really part of an active pool just yet. The labels will * be written again with a meaningful txg by spa_sync(). */ - if (reason == VDEV_LABEL_SPARE || - (reason == VDEV_LABEL_REMOVE && vd->vdev_isspare)) { + if (reason_spare || reason_l2cache) { /* - * For inactive hot spares, we generate a special label that - * identifies as a mutually shared hot spare. We write the - * label if we are adding a hot spare, or if we are removing an - * active hot spare (in which case we want to revert the - * labels). + * For inactive hot spares and level 2 ARC devices, we generate + * a special label that identifies as a mutually shared hot + * spare or l2cache device. We write the label in case of + * addition or removal of hot spare or l2cache vdev (in which + * case we want to revert the labels). */ VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, spa_version(spa)) == 0); VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE, - POOL_STATE_SPARE) == 0); - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID, - vd->vdev_guid) == 0); - } else if (reason == VDEV_LABEL_L2CACHE || - (reason == VDEV_LABEL_REMOVE && vd->vdev_isl2cache)) { - /* - * For level 2 ARC devices, add a special label. - */ - VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0); - - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, - spa_version(spa)) == 0); - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE, - POOL_STATE_L2CACHE) == 0); + reason_spare ? POOL_STATE_SPARE : POOL_STATE_L2CACHE) == 0); VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); @@ -1146,8 +1136,26 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) * spa->spa_l2cache->sav_config (populated in * spa_ld_open_aux_vdevs()). */ - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_ASHIFT, - vd->vdev_ashift) == 0); + if (reason_l2cache) { + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_ASHIFT, + vd->vdev_ashift) == 0); + } + + /* + * Add path information to help find it during pool import + */ + if (vd->vdev_path != NULL) { + VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_PATH, + vd->vdev_path) == 0); + } + if (vd->vdev_devid != NULL) { + VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_DEVID, + vd->vdev_devid) == 0); + } + if (vd->vdev_physpath != NULL) { + VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_PHYS_PATH, + vd->vdev_physpath) == 0); + } /* * When spare or l2cache (aux) vdev is added during pool From a2e71db66434ea27a57e3add5fbda35ecd0722d6 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Thu, 4 Jan 2024 19:35:04 +0500 Subject: [PATCH 22/45] Add path handling for aux vdevs in `label_path` If the AUX vdev is added using UUID, importing the pool falls back AUX vdev to open it with disk name instead of UUID due to the absence of path information for AUX vdevs. Since AUX label now have path information, this PR adds path handling for it in `label_path`. Reviewed-by: Umer Saleem Reviewed-by: Alexander Motin Signed-off-by: Ameer Hamza Closes #15737 --- lib/libzutil/zutil_import.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index bafe50e5f9..f7ef69a1d9 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1221,13 +1221,26 @@ label_paths(libpc_handle_t *hdl, nvlist_t *label, const char **path, nvlist_t *nvroot; uint64_t pool_guid; uint64_t vdev_guid; + uint64_t state; *path = NULL; *devid = NULL; + if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &vdev_guid) != 0) + return (ENOENT); + + /* + * In case of spare or l2cache, we directly return path/devid from the + * label. + */ + if (!(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state)) && + (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE)) { + (void) nvlist_lookup_string(label, ZPOOL_CONFIG_PATH, path); + (void) nvlist_lookup_string(label, ZPOOL_CONFIG_DEVID, devid); + return (0); + } if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || - nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, &pool_guid) || - nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &vdev_guid)) + nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) return (ENOENT); return (label_paths_impl(hdl, nvroot, pool_guid, vdev_guid, path, From 6b64acc157ec713f1e3d0b1980a528e874341e52 Mon Sep 17 00:00:00 2001 From: youzhongyang Date: Tue, 16 Jan 2024 16:30:58 -0500 Subject: [PATCH 23/45] Make spl_kmem_cache size check consistent On Linux x86_64, kmem cache can have size up to 4M, however increasing spl_kmem_cache_slab_limit can lead to crash due to the size check inconsistency. Reviewed-by: Brian Behlendorf Signed-off-by: Youzhong Yang Closes #15757 --- module/os/linux/spl/spl-kmem-cache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c index a2920c7466..4b15081715 100644 --- a/module/os/linux/spl/spl-kmem-cache.c +++ b/module/os/linux/spl/spl-kmem-cache.c @@ -91,7 +91,8 @@ MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB"); * of 16K was determined to be optimal for architectures using 4K pages and * to also work well on architecutres using larger 64K page sizes. */ -static unsigned int spl_kmem_cache_slab_limit = 16384; +static unsigned int spl_kmem_cache_slab_limit = + SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE; module_param(spl_kmem_cache_slab_limit, uint, 0644); MODULE_PARM_DESC(spl_kmem_cache_slab_limit, "Objects less than N bytes use the Linux slab"); @@ -783,7 +784,7 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align, } else { unsigned long slabflags = 0; - if (size > (SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE)) + if (size > spl_kmem_cache_slab_limit) goto out; #if defined(SLAB_USERCOPY) From 424d06a29886603de2e33ec7aaae6607b17819ff Mon Sep 17 00:00:00 2001 From: Lalufu Date: Tue, 16 Jan 2024 22:32:59 +0100 Subject: [PATCH 24/45] Make sure all necessary RPM path macros are defined When building (s)rpm files through the Makefile, a directory structure is created in /tmp to hold the various files. In case the user running the command has overridden some of the RPM path settings through their user profile (for example in `~/.rpmmacros`), these paths do not line up with the configuration, and the build fails. Make sure all paths used are properly defined. Reviewed-by: Brian Behlendorf Signed-off-by: Ralf Ertzinger Closes #15756 --- config/rpm.am | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/config/rpm.am b/config/rpm.am index 13bd54a625..85c56c0b2e 100644 --- a/config/rpm.am +++ b/config/rpm.am @@ -83,6 +83,11 @@ srpm-common: rpm-local || exit 1; \ LANG=C $(RPMBUILD) \ --define "_tmppath $$rpmbuild/TMP" \ + --define "_builddir $$rpmbuild/BUILD" \ + --define "_rpmdir $$rpmbuild/RPMS" \ + --define "_srcrpmdir $$rpmbuild/SRPMS" \ + --define "_specdir $$rpmbuild/SPECS" \ + --define "_sourcedir $$rpmbuild/SOURCES" \ --define "_topdir $$rpmbuild" \ $(def) -bs $$rpmbuild/SPECS/$$rpmspec || exit 1; \ cp $$rpmbuild/SRPMS/$$rpmpkg . || exit 1; \ @@ -99,6 +104,11 @@ rpm-common: rpm-local || exit 1; \ LANG=C ${RPMBUILD} \ --define "_tmppath $$rpmbuild/TMP" \ + --define "_builddir $$rpmbuild/BUILD" \ + --define "_rpmdir $$rpmbuild/RPMS" \ + --define "_srcrpmdir $$rpmbuild/SRPMS" \ + --define "_specdir $$rpmbuild/SPECS" \ + --define "_sourcedir $$rpmbuild/SOURCES" \ --define "_topdir $$rpmbuild" \ $(def) --rebuild $$rpmpkg || exit 1; \ cp $$rpmbuild/RPMS/*/* . || exit 1; \ From 276be5357cf33a266a676fca1f22924655da1ba3 Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Wed, 17 Jan 2024 18:05:12 +0100 Subject: [PATCH 25/45] linux spl: fix typo in top comment of spl-condvar.c Credential Implementation -> Condition Variables Implementation Reviewed-by: Brian Atkinson Reviewed-by: Brian Behlendorf Signed-off-by: Tino Reichardt Closes #15782 --- module/os/linux/spl/spl-condvar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/os/linux/spl/spl-condvar.c b/module/os/linux/spl/spl-condvar.c index e87954714e..5898789ad5 100644 --- a/module/os/linux/spl/spl-condvar.c +++ b/module/os/linux/spl/spl-condvar.c @@ -20,7 +20,7 @@ * You should have received a copy of the GNU General Public License along * with the SPL. If not, see . * - * Solaris Porting Layer (SPL) Credential Implementation. + * Solaris Porting Layer (SPL) Condition Variables Implementation. */ #include From 09a79613640bd96f2ac39967557a8ed602cd04a5 Mon Sep 17 00:00:00 2001 From: Val Packett Date: Fri, 19 Jan 2024 18:01:26 -0300 Subject: [PATCH 26/45] FreeBSD: Fix bootstrapping tools under Linux/musl musl libc has deprecated LFS64 aliases, so bootstrapping FreeBSD tools under musl distros has been failing with stat64 errors. Apply the aliases under non-glibc Linux to fix this problem. Reviewed-by: Richard Yao Reviewed-by: Brian Behlendorf Signed-off-by: Val Packett Closes #15780 --- lib/libspl/include/os/freebsd/sys/stat.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/libspl/include/os/freebsd/sys/stat.h b/lib/libspl/include/os/freebsd/sys/stat.h index 88773cceb9..af488244bd 100644 --- a/lib/libspl/include/os/freebsd/sys/stat.h +++ b/lib/libspl/include/os/freebsd/sys/stat.h @@ -76,8 +76,12 @@ fstat64_blk(int fd, struct stat64 *st) /* * Only Intel-based Macs have a separate stat64; Arm-based Macs are like * FreeBSD and have a full 64-bit stat from the start. + * + * On Linux, musl libc is full 64-bit too and has deprecated its own version + * of these defines since version 1.2.4. */ -#if defined(__APPLE__) && !(defined(__i386__) || defined(__x86_64__)) +#if (defined(__APPLE__) && !(defined(__i386__) || defined(__x86_64__))) || \ + (defined(__linux__) && !defined(__GLIBC__)) #define stat64 stat #define fstat64 fstat #endif From cfa29b994594dd4261117aa9c685adc6274485a8 Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Fri, 26 Jan 2024 22:36:59 +0100 Subject: [PATCH 27/45] ZTS: Apply small changes for speeding up the tests The Github Action Runner got some new hardware metrics. We should use the provided and empty disk which is pre-mounted at /mnt now. Disk1: 89GiB -> rootfs + bootfs with ~80MB/s -> don't care Disk2: 64GiB -> /mnt with 420MB/s -> new testing ssd This commit will mount the new disk to /var/tmp and provide hopefully some speedups within our testings. Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Reviewed-by: Andrew Innes Signed-off-by: Tino Reichardt Closes #15811 --- .../workflows/scripts/setup-dependencies.sh | 35 ++++++++----------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/.github/workflows/scripts/setup-dependencies.sh b/.github/workflows/scripts/setup-dependencies.sh index 440d5e8e5a..b40f9290f9 100755 --- a/.github/workflows/scripts/setup-dependencies.sh +++ b/.github/workflows/scripts/setup-dependencies.sh @@ -55,29 +55,24 @@ function mod_install() { cat /proc/spl/kstat/zfs/chksum_bench echo "::endgroup::" - echo "::group::Reclaim and report disk space" - # remove 4GiB of images - sudo systemd-run docker system prune --force --all --volumes + echo "::group::Optimize storage for ZFS testings" + # remove swap and umount fast storage + # 89GiB -> rootfs + bootfs with ~80MB/s -> don't care + # 64GiB -> /mnt with 420MB/s -> new testing ssd + sudo swapoff -a - # remove unused software - sudo systemd-run --wait rm -rf \ - "$AGENT_TOOLSDIRECTORY" \ - /opt/* \ - /usr/local/* \ - /usr/share/az* \ - /usr/share/dotnet \ - /usr/share/gradle* \ - /usr/share/miniconda \ - /usr/share/swift \ - /var/lib/gems \ - /var/lib/mysql \ - /var/lib/snapd - - # trim the cleaned space - sudo fstrim / + # this one is fast and mounted @ /mnt + # -> we reformat with ext4 + move it to /var/tmp + DEV="/dev/disk/azure/resource-part1" + sudo umount /mnt + sudo mkfs.ext4 -O ^has_journal -F $DEV + sudo mount -o noatime,barrier=0 $DEV /var/tmp + sudo chmod 1777 /var/tmp # disk usage afterwards - df -h / + sudo df -h / + sudo df -h /var/tmp + sudo fstrim -a echo "::endgroup::" } From 9da745f5de73487e14e6dfd65130b1677f84518a Mon Sep 17 00:00:00 2001 From: Richard Yao Date: Fri, 26 Jan 2024 17:11:33 -0500 Subject: [PATCH 28/45] Switch to CodeQL to detect prohibited function use The LLVM/Clang developers pointed out that using the CPP to detect use of functions that our QA policies prohibit risks invoking undefined behavior. To resolve this, we configure CodeQL to detect forbidden function usage. Note that cpp in the context of CodeQL refers to C/C++, rather than the C PreProcessor, which C++ also uses. It really should have been written cxx, but that ship sailed a long time ago. This misuse of the term cpp is retained in the CodeQL configuration for consistency with upstream CodeQL. As a side benefit, verbose make no longer is a wall of text showing a bunch of CPP macros, which can make debugging slightly easier. Reviewed-by: Brian Behlendorf Signed-off-by: Richard Yao Closes #15819 Closes #14134 --- .github/codeql-cpp.yml | 4 ++ .github/codeql-python.yml | 4 ++ .../cpp/deprecatedFunctionUsage.ql | 59 +++++++++++++++++++ .github/codeql/custom-queries/cpp/qlpack.yml | 4 ++ .github/workflows/codeql.yml | 1 + config/Rules.am | 15 ----- 6 files changed, 72 insertions(+), 15 deletions(-) create mode 100644 .github/codeql-cpp.yml create mode 100644 .github/codeql-python.yml create mode 100644 .github/codeql/custom-queries/cpp/deprecatedFunctionUsage.ql create mode 100644 .github/codeql/custom-queries/cpp/qlpack.yml diff --git a/.github/codeql-cpp.yml b/.github/codeql-cpp.yml new file mode 100644 index 0000000000..88b8c60860 --- /dev/null +++ b/.github/codeql-cpp.yml @@ -0,0 +1,4 @@ +name: "Custom CodeQL Analysis" + +queries: + - uses: ./.github/codeql/custom-queries/cpp/deprecatedFunctionUsage.ql diff --git a/.github/codeql-python.yml b/.github/codeql-python.yml new file mode 100644 index 0000000000..93cb4a435e --- /dev/null +++ b/.github/codeql-python.yml @@ -0,0 +1,4 @@ +name: "Custom CodeQL Analysis" + +paths-ignore: + - tests diff --git a/.github/codeql/custom-queries/cpp/deprecatedFunctionUsage.ql b/.github/codeql/custom-queries/cpp/deprecatedFunctionUsage.ql new file mode 100644 index 0000000000..eb4b7bd629 --- /dev/null +++ b/.github/codeql/custom-queries/cpp/deprecatedFunctionUsage.ql @@ -0,0 +1,59 @@ +/** + * @name Deprecated function usage detection + * @description Detects functions whose usage is banned from the OpenZFS + * codebase due to QA concerns. + * @kind problem + * @severity error + * @id cpp/deprecated-function-usage +*/ + +import cpp + +predicate isDeprecatedFunction(Function f) { + f.getName() = "strtok" or + f.getName() = "__xpg_basename" or + f.getName() = "basename" or + f.getName() = "dirname" or + f.getName() = "bcopy" or + f.getName() = "bcmp" or + f.getName() = "bzero" or + f.getName() = "asctime" or + f.getName() = "asctime_r" or + f.getName() = "gmtime" or + f.getName() = "localtime" or + f.getName() = "strncpy" + +} + +string getReplacementMessage(Function f) { + if f.getName() = "strtok" then + result = "Use strtok_r(3) instead!" + else if f.getName() = "__xpg_basename" then + result = "basename(3) is underspecified. Use zfs_basename() instead!" + else if f.getName() = "basename" then + result = "basename(3) is underspecified. Use zfs_basename() instead!" + else if f.getName() = "dirname" then + result = "dirname(3) is underspecified. Use zfs_dirnamelen() instead!" + else if f.getName() = "bcopy" then + result = "bcopy(3) is deprecated. Use memcpy(3)/memmove(3) instead!" + else if f.getName() = "bcmp" then + result = "bcmp(3) is deprecated. Use memcmp(3) instead!" + else if f.getName() = "bzero" then + result = "bzero(3) is deprecated. Use memset(3) instead!" + else if f.getName() = "asctime" then + result = "Use strftime(3) instead!" + else if f.getName() = "asctime_r" then + result = "Use strftime(3) instead!" + else if f.getName() = "gmtime" then + result = "gmtime(3) isn't thread-safe. Use gmtime_r(3) instead!" + else if f.getName() = "localtime" then + result = "localtime(3) isn't thread-safe. Use localtime_r(3) instead!" + else + result = "strncpy(3) is deprecated. Use strlcpy(3) instead!" +} + +from FunctionCall fc, Function f +where + fc.getTarget() = f and + isDeprecatedFunction(f) +select fc, getReplacementMessage(f) diff --git a/.github/codeql/custom-queries/cpp/qlpack.yml b/.github/codeql/custom-queries/cpp/qlpack.yml new file mode 100644 index 0000000000..cbe0f1cbe3 --- /dev/null +++ b/.github/codeql/custom-queries/cpp/qlpack.yml @@ -0,0 +1,4 @@ +name: openzfs-cpp-queries +version: 0.0.0 +libraryPathDependencies: codeql-cpp +suites: openzfs-cpp-suite diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 037f8aca0e..7ccfc14925 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -29,6 +29,7 @@ jobs: - name: Initialize CodeQL uses: github/codeql-action/init@v2 with: + config-file: .github/codeql-${{ matrix.language }}.yml languages: ${{ matrix.language }} - name: Autobuild diff --git a/config/Rules.am b/config/Rules.am index 7c266964f3..2e463ae608 100644 --- a/config/Rules.am +++ b/config/Rules.am @@ -42,21 +42,6 @@ AM_CPPFLAGS += $(DEBUG_CPPFLAGS) AM_CPPFLAGS += $(CODE_COVERAGE_CPPFLAGS) AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-@ac_system_l@-user\" -AM_CPPFLAGS_NOCHECK = -D"strtok(...)=strtok(__VA_ARGS__) __attribute__((deprecated(\"Use strtok_r(3) instead!\")))" -AM_CPPFLAGS_NOCHECK += -D"__xpg_basename(...)=__xpg_basename(__VA_ARGS__) __attribute__((deprecated(\"basename(3) is underspecified. Use zfs_basename() instead!\")))" -AM_CPPFLAGS_NOCHECK += -D"basename(...)=basename(__VA_ARGS__) __attribute__((deprecated(\"basename(3) is underspecified. Use zfs_basename() instead!\")))" -AM_CPPFLAGS_NOCHECK += -D"dirname(...)=dirname(__VA_ARGS__) __attribute__((deprecated(\"dirname(3) is underspecified. Use zfs_dirnamelen() instead!\")))" -AM_CPPFLAGS_NOCHECK += -D"bcopy(...)=__attribute__((deprecated(\"bcopy(3) is deprecated. Use memcpy(3)/memmove(3) instead!\"))) bcopy(__VA_ARGS__)" -AM_CPPFLAGS_NOCHECK += -D"bcmp(...)=__attribute__((deprecated(\"bcmp(3) is deprecated. Use memcmp(3) instead!\"))) bcmp(__VA_ARGS__)" -AM_CPPFLAGS_NOCHECK += -D"bzero(...)=__attribute__((deprecated(\"bzero(3) is deprecated. Use memset(3) instead!\"))) bzero(__VA_ARGS__)" -AM_CPPFLAGS_NOCHECK += -D"asctime(...)=__attribute__((deprecated(\"Use strftime(3) instead!\"))) asctime(__VA_ARGS__)" -AM_CPPFLAGS_NOCHECK += -D"asctime_r(...)=__attribute__((deprecated(\"Use strftime(3) instead!\"))) asctime_r(__VA_ARGS__)" -AM_CPPFLAGS_NOCHECK += -D"gmtime(...)=__attribute__((deprecated(\"gmtime(3) isn't thread-safe. Use gmtime_r(3) instead!\"))) gmtime(__VA_ARGS__)" -AM_CPPFLAGS_NOCHECK += -D"localtime(...)=__attribute__((deprecated(\"localtime(3) isn't thread-safe. Use localtime_r(3) instead!\"))) localtime(__VA_ARGS__)" -AM_CPPFLAGS_NOCHECK += -D"strncpy(...)=__attribute__((deprecated(\"strncpy(3) is deprecated. Use strlcpy(3) instead!\"))) strncpy(__VA_ARGS__)" - -AM_CPPFLAGS += $(AM_CPPFLAGS_NOCHECK) - if ASAN_ENABLED AM_CPPFLAGS += -DZFS_ASAN_ENABLED endif From 9ad150446fad14b1de6baf2b8bdef4a8965e6030 Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Fri, 26 Jan 2024 23:22:26 +0100 Subject: [PATCH 29/45] ZTS: Update deprecated Github Action version numbers GitHub Actions is transitioning from Node 16 to Node 20. So we need to update these: - actions/checkout@v3 -> v4 - actions/download-artifact@v3 -> v4 - actions/upload-artifact@v3 -> v4 and some minor changes Update also the documentation of the testings workflow. Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Reviewed-by: Andrew Innes Signed-off-by: Tino Reichardt Closes #15820 --- .github/workflows/README.md | 41 +++++++++++-------- .github/workflows/checkstyle.yaml | 4 +- .github/workflows/codeql.yml | 2 +- .github/workflows/scripts/generate-summary.sh | 2 +- .github/workflows/zfs-linux-tests.yml | 22 +++++----- .github/workflows/zfs-linux.yml | 8 ++-- 6 files changed, 42 insertions(+), 37 deletions(-) diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 8255dd2108..ab0555dcdd 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -4,44 +4,49 @@ ```mermaid flowchart TB subgraph CleanUp and Summary - Part1-20.04-->CleanUp+nice+Summary - Part2-20.04-->CleanUp+nice+Summary - PartN-20.04-->CleanUp+nice+Summary - Part1-22.04-->CleanUp+nice+Summary - Part2-22.04-->CleanUp+nice+Summary - PartN-22.04-->CleanUp+nice+Summary + sanity-checks-20.04-->CleanUp+Summary + Part1-20.04-->CleanUp+Summary + Part2-20.04-->CleanUp+Summary + Part3-20.04-->CleanUp+Summary + Part4-20.04-->CleanUp+Summary + Part1-22.04-->CleanUp+Summary + Part2-22.04-->CleanUp+Summary + Part3-22.04-->CleanUp+Summary + Part4-22.04-->CleanUp+Summary + sanity-checks-22.04-->CleanUp+Summary end subgraph Functional Testings + sanity-checks-20.04 + zloop-checks-20.04 functional-testing-20.04-->Part1-20.04 functional-testing-20.04-->Part2-20.04 - functional-testing-20.04-->PartN-20.04 + functional-testing-20.04-->Part3-20.04 + functional-testing-20.04-->Part4-20.04 functional-testing-22.04-->Part1-22.04 functional-testing-22.04-->Part2-22.04 - functional-testing-22.04-->PartN-22.04 -end - -subgraph Sanity and zloop Testings - sanity-checks-20.04-->functional-testing-20.04 - sanity-checks-22.04-->functional-testing-22.04 - zloop-checks-20.04-->functional - zloop-checks-22.04-->functional + functional-testing-22.04-->Part3-22.04 + functional-testing-22.04-->Part4-22.04 + sanity-checks-22.04 + zloop-checks-22.04 end subgraph Code Checking + Building + Build-Ubuntu-20.04-->sanity-checks-20.04 + Build-Ubuntu-20.04-->zloop-checks-20.04 + Build-Ubuntu-20.04-->functional-testing-20.04 codeql.yml checkstyle.yml - Build-Ubuntu-20.04-->sanity-checks-20.04 Build-Ubuntu-22.04-->sanity-checks-22.04 - Build-Ubuntu-20.04-->zloop-checks-20.04 Build-Ubuntu-22.04-->zloop-checks-22.04 + Build-Ubuntu-22.04-->functional-testing-22.04 end ``` 1) build zfs modules for Ubuntu 20.04 and 22.04 (~15m) 2) 2x zloop test (~10m) + 2x sanity test (~25m) -3) functional testings in parts 1..5 (each ~1h) +3) 4x functional testings in parts 1..4 (each ~1h) 4) cleanup and create summary - content of summary depends on the results of the steps diff --git a/.github/workflows/checkstyle.yaml b/.github/workflows/checkstyle.yaml index b0fdc570d4..abcb358fc0 100644 --- a/.github/workflows/checkstyle.yaml +++ b/.github/workflows/checkstyle.yaml @@ -8,7 +8,7 @@ jobs: checkstyle: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - name: Install dependencies @@ -52,7 +52,7 @@ jobs: if: failure() && steps.CheckABI.outcome == 'failure' run: | find -name *.abi | tar -cf abi_files.tar -T - - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: failure() && steps.CheckABI.outcome == 'failure' with: name: New ABI files (use only if you're sure about interface changes) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 7ccfc14925..e015b2cb71 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -24,7 +24,7 @@ jobs: echo "MAKEFLAGS=-j$(nproc)" >> $GITHUB_ENV - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Initialize CodeQL uses: github/codeql-action/init@v2 diff --git a/.github/workflows/scripts/generate-summary.sh b/.github/workflows/scripts/generate-summary.sh index cd5ea3421c..b5d89208a5 100755 --- a/.github/workflows/scripts/generate-summary.sh +++ b/.github/workflows/scripts/generate-summary.sh @@ -87,7 +87,7 @@ function summarize_f() { output "\n## $headline\n" rm -rf testfiles for i in $(seq 1 $FUNCTIONAL_PARTS); do - tarfile="$2/part$i.tar" + tarfile="$2-part$i/part$i.tar" check_tarfile "$tarfile" check_logfile "testfiles/log" done diff --git a/.github/workflows/zfs-linux-tests.yml b/.github/workflows/zfs-linux-tests.yml index c4fe930d09..753f3cd021 100644 --- a/.github/workflows/zfs-linux-tests.yml +++ b/.github/workflows/zfs-linux-tests.yml @@ -13,10 +13,10 @@ jobs: zloop: runs-on: ubuntu-${{ inputs.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: modules-${{ inputs.os }} - name: Install modules @@ -34,7 +34,7 @@ jobs: if: failure() run: | sudo chmod +r -R /var/tmp/zloop/ - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: failure() with: name: Zpool-logs-${{ inputs.os }} @@ -43,7 +43,7 @@ jobs: !/var/tmp/zloop/*/vdev/ retention-days: 14 if-no-files-found: ignore - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: failure() with: name: Zpool-files-${{ inputs.os }} @@ -55,10 +55,10 @@ jobs: sanity: runs-on: ubuntu-${{ inputs.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: modules-${{ inputs.os }} - name: Install modules @@ -77,7 +77,7 @@ jobs: RESPATH="/var/tmp/test_results" mv -f $RESPATH/current $RESPATH/testfiles tar cf $RESPATH/sanity.tar -h -C $RESPATH testfiles - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: success() || failure() with: name: Logs-${{ inputs.os }}-sanity @@ -91,10 +91,10 @@ jobs: matrix: tests: [ part1, part2, part3, part4 ] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: modules-${{ inputs.os }} - name: Install modules @@ -116,9 +116,9 @@ jobs: RESPATH="/var/tmp/test_results" mv -f $RESPATH/current $RESPATH/testfiles tar cf $RESPATH/${{ matrix.tests }}.tar -h -C $RESPATH testfiles - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: success() || failure() with: - name: Logs-${{ inputs.os }}-functional + name: Logs-${{ inputs.os }}-functional-${{ matrix.tests }} path: /var/tmp/test_results/${{ matrix.tests }}.tar if-no-files-found: ignore diff --git a/.github/workflows/zfs-linux.yml b/.github/workflows/zfs-linux.yml index be3908deb9..e6b705c860 100644 --- a/.github/workflows/zfs-linux.yml +++ b/.github/workflows/zfs-linux.yml @@ -14,14 +14,14 @@ jobs: os: [20.04, 22.04] runs-on: ubuntu-${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - name: Build modules run: .github/workflows/scripts/setup-dependencies.sh build - name: Prepare modules upload run: tar czf modules-${{ matrix.os }}.tgz *.deb .github tests/test-runner tests/ImageOS.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: modules-${{ matrix.os }} path: modules-${{ matrix.os }}.tgz @@ -44,7 +44,7 @@ jobs: runs-on: ubuntu-22.04 needs: testings steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 - name: Generating summary run: | tar xzf modules-22.04/modules-22.04.tgz .github tests @@ -58,7 +58,7 @@ jobs: run: .github/workflows/scripts/generate-summary.sh 3 - name: Summary for errors #4 run: .github/workflows/scripts/generate-summary.sh 4 - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Summary Files path: Summary/ From dd3a0a27157bb918e6e216b698fbdc22c3c3cc0d Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Sat, 27 Jan 2024 03:24:35 +0500 Subject: [PATCH 30/45] Update vdev devid and physpath if changed between imports If devid or physpath for a vdev changes between imports, ensure it is updated to the new value. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Ameer Hamza Closes #15816 --- module/zfs/vdev.c | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index afb01c0ef7..e1ca1aecc9 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -2484,23 +2484,37 @@ vdev_validate(vdev_t *vd) return (0); } +static void +vdev_update_path(const char *prefix, char *svd, char **dvd, uint64_t guid) +{ + if (svd != NULL && *dvd != NULL) { + if (strcmp(svd, *dvd) != 0) { + zfs_dbgmsg("vdev_copy_path: vdev %llu: %s changed " + "from '%s' to '%s'", (u_longlong_t)guid, prefix, + *dvd, svd); + spa_strfree(*dvd); + *dvd = spa_strdup(svd); + } + } else if (svd != NULL) { + *dvd = spa_strdup(svd); + zfs_dbgmsg("vdev_copy_path: vdev %llu: path set to '%s'", + (u_longlong_t)guid, *dvd); + } +} + static void vdev_copy_path_impl(vdev_t *svd, vdev_t *dvd) { char *old, *new; - if (svd->vdev_path != NULL && dvd->vdev_path != NULL) { - if (strcmp(svd->vdev_path, dvd->vdev_path) != 0) { - zfs_dbgmsg("vdev_copy_path: vdev %llu: path changed " - "from '%s' to '%s'", (u_longlong_t)dvd->vdev_guid, - dvd->vdev_path, svd->vdev_path); - spa_strfree(dvd->vdev_path); - dvd->vdev_path = spa_strdup(svd->vdev_path); - } - } else if (svd->vdev_path != NULL) { - dvd->vdev_path = spa_strdup(svd->vdev_path); - zfs_dbgmsg("vdev_copy_path: vdev %llu: path set to '%s'", - (u_longlong_t)dvd->vdev_guid, dvd->vdev_path); - } + + vdev_update_path("vdev_path", svd->vdev_path, &dvd->vdev_path, + dvd->vdev_guid); + + vdev_update_path("vdev_devid", svd->vdev_devid, &dvd->vdev_devid, + dvd->vdev_guid); + + vdev_update_path("vdev_physpath", svd->vdev_physpath, + &dvd->vdev_physpath, dvd->vdev_guid); /* * Our enclosure sysfs path may have changed between imports From 0606ce20555a2392d9172e37d5e2ff3cdab5c1bd Mon Sep 17 00:00:00 2001 From: Rob N Date: Sat, 27 Jan 2024 09:41:31 +1100 Subject: [PATCH 31/45] zpool wait: print timestamp before the header list, status and iostat all display the -T timestamp before the header, but wait showed it after. Make it be like the others. Reported-by: Kyle Evans Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Closes #15825 --- cmd/zpool/zpool_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 5507f9d3fd..5f96dc8d00 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -10752,6 +10752,9 @@ print_wait_status_row(wait_data_t *wd, zpool_handle_t *zhp, int row) col_widths[i] = MAX(strlen(headers[i]), 6) + 2; } + if (timestamp_fmt != NODATE) + print_timestamp(timestamp_fmt); + /* Print header if appropriate */ int term_height = terminal_height(); boolean_t reprint_header = (!wd->wd_headers_once && term_height > 0 && @@ -10819,9 +10822,6 @@ print_wait_status_row(wait_data_t *wd, zpool_handle_t *zhp, int row) if (vdev_any_spare_replacing(nvroot)) bytes_rem[ZPOOL_WAIT_REPLACE] = bytes_rem[ZPOOL_WAIT_RESILVER]; - if (timestamp_fmt != NODATE) - print_timestamp(timestamp_fmt); - for (i = 0; i < ZPOOL_WAIT_NUM_ACTIVITIES; i++) { char buf[64]; if (!wd->wd_enabled[i]) From 7cd666d54b122e2e1ca2fb4519ff75fc8d488a43 Mon Sep 17 00:00:00 2001 From: Andrew Innes Date: Tue, 30 Jan 2024 01:16:02 +0800 Subject: [PATCH 32/45] Move nodes into correct subgraphs Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Reviewed-by: Tino Reichardt Signed-off-by: Andrew Innes Closes #15828 --- .github/workflows/README.md | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/.github/workflows/README.md b/.github/workflows/README.md index ab0555dcdd..eef47dae3d 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -4,16 +4,7 @@ ```mermaid flowchart TB subgraph CleanUp and Summary - sanity-checks-20.04-->CleanUp+Summary - Part1-20.04-->CleanUp+Summary - Part2-20.04-->CleanUp+Summary - Part3-20.04-->CleanUp+Summary - Part4-20.04-->CleanUp+Summary - Part1-22.04-->CleanUp+Summary - Part2-22.04-->CleanUp+Summary - Part3-22.04-->CleanUp+Summary - Part4-22.04-->CleanUp+Summary - sanity-checks-22.04-->CleanUp+Summary + CleanUp+Summary end subgraph Functional Testings @@ -32,15 +23,29 @@ subgraph Functional Testings end subgraph Code Checking + Building + Build-Ubuntu-20.04 + codeql.yml + checkstyle.yml + Build-Ubuntu-22.04 +end + Build-Ubuntu-20.04-->sanity-checks-20.04 Build-Ubuntu-20.04-->zloop-checks-20.04 Build-Ubuntu-20.04-->functional-testing-20.04 - codeql.yml - checkstyle.yml Build-Ubuntu-22.04-->sanity-checks-22.04 Build-Ubuntu-22.04-->zloop-checks-22.04 Build-Ubuntu-22.04-->functional-testing-22.04 -end + + sanity-checks-20.04-->CleanUp+Summary + Part1-20.04-->CleanUp+Summary + Part2-20.04-->CleanUp+Summary + Part3-20.04-->CleanUp+Summary + Part4-20.04-->CleanUp+Summary + Part1-22.04-->CleanUp+Summary + Part2-22.04-->CleanUp+Summary + Part3-22.04-->CleanUp+Summary + Part4-22.04-->CleanUp+Summary + sanity-checks-22.04-->CleanUp+Summary ``` From dd0874cf7ea3e67130662180fea0e40f54108abb Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 29 Jan 2024 09:41:26 -0800 Subject: [PATCH 33/45] ZTS: Allow longer run time for zdb_args_pos The zdb_args_pos test may take slightly longer than 600 seconds to run on some of the CI builders. To prevent this from causing failures allow up to 1200 seconds for tests in this group. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #15826 --- tests/runfiles/common.run | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index f93bfb4333..85f29c8220 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -163,6 +163,7 @@ tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos', pre = post = tags = ['functional', 'cli_root', 'zdb'] +timeout = 1200 [tests/functional/cli_root/zfs] tests = ['zfs_001_neg', 'zfs_002_pos'] From acc7cd8e99da50d775296694c42b2127e42a75b7 Mon Sep 17 00:00:00 2001 From: Chris Davidson Date: Mon, 29 Jan 2024 12:44:08 -0500 Subject: [PATCH 34/45] Update man pages to time(1) from time(2) zpool-iostat.8: Updated time(2) -> time(1) to align to manual page zpool-list.8: Updated time(2) -> time(1) to align to manual page zpool-status.8: Updated time(2) -> time(1) to align to manual page zpool-wait.8: Update time(2) -> time(1) to align to manual page Reviewed-by: Brian Behlendorf Signed-off-by: Christopher Davidson Closes #15823 --- man/man8/zpool-iostat.8 | 2 +- man/man8/zpool-list.8 | 2 +- man/man8/zpool-status.8 | 2 +- man/man8/zpool-wait.8 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/man/man8/zpool-iostat.8 b/man/man8/zpool-iostat.8 index 34f7243d5a..e1d2a4b4ff 100644 --- a/man/man8/zpool-iostat.8 +++ b/man/man8/zpool-iostat.8 @@ -146,7 +146,7 @@ Specify .Sy u for a printed representation of the internal representation of time. See -.Xr time 2 . +.Xr time 1 . Specify .Sy d for standard date format. diff --git a/man/man8/zpool-list.8 b/man/man8/zpool-list.8 index 9e905d52dd..c60c47f5eb 100644 --- a/man/man8/zpool-list.8 +++ b/man/man8/zpool-list.8 @@ -95,7 +95,7 @@ Specify .Sy u for a printed representation of the internal representation of time. See -.Xr time 2 . +.Xr time 1 . Specify .Sy d for standard date format. diff --git a/man/man8/zpool-status.8 b/man/man8/zpool-status.8 index 8f9580cf08..10424b9f5b 100644 --- a/man/man8/zpool-status.8 +++ b/man/man8/zpool-status.8 @@ -110,7 +110,7 @@ Specify .Sy u for a printed representation of the internal representation of time. See -.Xr time 2 . +.Xr time 1 . Specify .Sy d for standard date format. diff --git a/man/man8/zpool-wait.8 b/man/man8/zpool-wait.8 index 683b014142..4fa4cb2356 100644 --- a/man/man8/zpool-wait.8 +++ b/man/man8/zpool-wait.8 @@ -97,7 +97,7 @@ Specify .Sy u for a printed representation of the internal representation of time. See -.Xr time 2 . +.Xr time 1 . Specify .Sy d for standard date format. From ab653603f8e113208539fcc1426321cdbb17451d Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 29 Jan 2024 10:36:42 -0800 Subject: [PATCH 35/45] Don't assert mg_initialized due to device addition race During device removal stress tests, we noticed that we were tripping the assertion that mg_initialized was true. After investigation, it was determined that the mg in question was the embedded log metaslab group for a newly added vdev; the normal mg had been initialized (by metaslab_sync_reassess, via vdev_sync_done). However, because the spa config alloc lock is not held as writer across both calls to metaslab_sync_reassess, it is possible for an allocation to happen between the two metaslab_groups being initialized. Because the metaslab code doesn't check the group in question, just the vdev's main mg, it is possible to get past the initial check in vdev_allocatable and later fail due to the assertion. We simply remove the assertions. We could also consider locking the ALLOC lock around the reassess calls in vdev_sync_done, but that risks deadlocks. We could check the actual target mg in vdev_allocatable, but that risks racing with a passivation that comes in after that check but before the assertion. We still won't be able to actually allocate from the metaslab group if no metaslabs are ready, so this change shouldn't break anything. Reviewed-by: Brian Behlendorf Reviewed-by: George Wilson Signed-off-by: Paul Dagnelie Closes #15818 --- module/zfs/metaslab.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 599d7ffa0c..5809a832bc 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -5061,7 +5061,6 @@ metaslab_group_alloc(metaslab_group_t *mg, zio_alloc_list_t *zal, int allocator, boolean_t try_hard) { uint64_t offset; - ASSERT(mg->mg_initialized); offset = metaslab_group_alloc_normal(mg, zal, asize, txg, want_unique, dva, d, allocator, try_hard); @@ -5212,8 +5211,6 @@ top: goto next; } - ASSERT(mg->mg_initialized); - /* * Avoid writing single-copy data to an unhealthy, * non-redundant vdev, unless we've already tried all From 621dfaff5ce1673ca1edce82e44cb70b2e00316e Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 29 Jan 2024 11:35:43 -0800 Subject: [PATCH 36/45] Linux 6.7 compat: META Update the META file to reflect compatibility with the 6.7 kernel. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #15833 --- META | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/META b/META index 93045ec3ab..05337a9c50 100644 --- a/META +++ b/META @@ -6,5 +6,5 @@ Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS -Linux-Maximum: 6.6 +Linux-Maximum: 6.7 Linux-Minimum: 3.10 From 64afc4e66edf6a740f1c7ab808a452e42d964eb7 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 23 Jan 2024 10:50:53 +1100 Subject: [PATCH 37/45] Linux 6.8 compat: make test functions static The kernel is now being compiled with -Wmissing-prototypes. Most of our test stub functions had no prototype, and failed to compile. Since they don't need to be visible anywhere else, just make them all static. Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Sponsored-by: https://despairlabs.com/sponsor/ Closes #15805 --- config/kernel-acl.m4 | 14 +++++++------- config/kernel-automount.m4 | 2 +- config/kernel-bio.m4 | 2 +- config/kernel-block-device-operations.m4 | 8 ++++---- config/kernel-commit-metadata.m4 | 2 +- config/kernel-dentry-operations.m4 | 2 +- config/kernel-dirty-inode.m4 | 2 +- config/kernel-encode-fh-inode.m4 | 2 +- config/kernel-evict-inode.m4 | 2 +- config/kernel-fallocate.m4 | 2 +- config/kernel-fsync.m4 | 4 ++-- config/kernel-get-link.m4 | 8 ++++---- config/kernel-inode-create.m4 | 6 +++--- config/kernel-inode-getattr.m4 | 8 ++++---- config/kernel-inode-lookup.m4 | 2 +- config/kernel-inode-permission.m4 | 4 ++-- config/kernel-inode-setattr.m4 | 6 +++--- config/kernel-make-request-fn.m4 | 8 ++++---- config/kernel-mkdir.m4 | 6 +++--- config/kernel-mknod.m4 | 4 ++-- config/kernel-proc-operations.m4 | 10 +++++----- config/kernel-put-link.m4 | 4 ++-- config/kernel-rename.m4 | 10 +++++----- config/kernel-show-options.m4 | 2 +- config/kernel-shrink.m4 | 13 +++++-------- config/kernel-symlink.m4 | 4 ++-- config/kernel-timer.m4 | 4 ++-- config/kernel-tmpfile.m4 | 8 ++++---- config/kernel-vfs-direct_IO.m4 | 8 ++++---- config/kernel-vfs-iterate.m4 | 6 +++--- config/kernel-vfs-rw-iterate.m4 | 4 ++-- config/kernel-writepage_t.m4 | 2 +- config/kernel-xattr-handler.m4 | 24 ++++++++++++------------ 33 files changed, 95 insertions(+), 98 deletions(-) diff --git a/config/kernel-acl.m4 b/config/kernel-acl.m4 index be08c3c607..3ae5dc6b6d 100644 --- a/config/kernel-acl.m4 +++ b/config/kernel-acl.m4 @@ -172,7 +172,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_get_acl], [ #include - struct posix_acl *get_acl_fn(struct inode *inode, int type) + static struct posix_acl *get_acl_fn(struct inode *inode, int type) { return NULL; } static const struct inode_operations @@ -184,7 +184,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_get_acl_rcu], [ #include - struct posix_acl *get_acl_fn(struct inode *inode, int type, + static struct posix_acl *get_acl_fn(struct inode *inode, int type, bool rcu) { return NULL; } static const struct inode_operations @@ -196,7 +196,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_get_inode_acl], [ #include - struct posix_acl *get_inode_acl_fn(struct inode *inode, int type, + static struct posix_acl *get_inode_acl_fn(struct inode *inode, int type, bool rcu) { return NULL; } static const struct inode_operations @@ -243,7 +243,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_set_acl_mnt_idmap_dentry], [ #include - int set_acl_fn(struct mnt_idmap *idmap, + static int set_acl_fn(struct mnt_idmap *idmap, struct dentry *dent, struct posix_acl *acl, int type) { return 0; } @@ -255,7 +255,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_set_acl_userns_dentry], [ #include - int set_acl_fn(struct user_namespace *userns, + static int set_acl_fn(struct user_namespace *userns, struct dentry *dent, struct posix_acl *acl, int type) { return 0; } @@ -267,7 +267,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_set_acl_userns], [ #include - int set_acl_fn(struct user_namespace *userns, + static int set_acl_fn(struct user_namespace *userns, struct inode *inode, struct posix_acl *acl, int type) { return 0; } @@ -279,7 +279,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_set_acl], [ #include - int set_acl_fn(struct inode *inode, struct posix_acl *acl, + static int set_acl_fn(struct inode *inode, struct posix_acl *acl, int type) { return 0; } static const struct inode_operations diff --git a/config/kernel-automount.m4 b/config/kernel-automount.m4 index f7bb63c681..52f1931b74 100644 --- a/config/kernel-automount.m4 +++ b/config/kernel-automount.m4 @@ -8,7 +8,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_AUTOMOUNT], [ ZFS_LINUX_TEST_SRC([dentry_operations_d_automount], [ #include - struct vfsmount *d_automount(struct path *p) { return NULL; } + static struct vfsmount *d_automount(struct path *p) { return NULL; } struct dentry_operations dops __attribute__ ((unused)) = { .d_automount = d_automount, }; diff --git a/config/kernel-bio.m4 b/config/kernel-bio.m4 index 18620ca5b7..b22c1a3de7 100644 --- a/config/kernel-bio.m4 +++ b/config/kernel-bio.m4 @@ -247,7 +247,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_END_IO_T_ARGS], [ ZFS_LINUX_TEST_SRC([bio_end_io_t_args], [ #include - void wanted_end_io(struct bio *bio) { return; } + static void wanted_end_io(struct bio *bio) { return; } bio_end_io_t *end_io __attribute__ ((unused)) = wanted_end_io; ], []) ]) diff --git a/config/kernel-block-device-operations.m4 b/config/kernel-block-device-operations.m4 index d13c1337b1..4ff20b9c41 100644 --- a/config/kernel-block-device-operations.m4 +++ b/config/kernel-block-device-operations.m4 @@ -5,7 +5,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [ ZFS_LINUX_TEST_SRC([block_device_operations_check_events], [ #include - unsigned int blk_check_events(struct gendisk *disk, + static unsigned int blk_check_events(struct gendisk *disk, unsigned int clearing) { (void) disk, (void) clearing; return (0); @@ -34,7 +34,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [ ZFS_LINUX_TEST_SRC([block_device_operations_release_void], [ #include - void blk_release(struct gendisk *g, fmode_t mode) { + static void blk_release(struct gendisk *g, fmode_t mode) { (void) g, (void) mode; return; } @@ -56,7 +56,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG], [ ZFS_LINUX_TEST_SRC([block_device_operations_release_void_1arg], [ #include - void blk_release(struct gendisk *g) { + static void blk_release(struct gendisk *g) { (void) g; return; } @@ -96,7 +96,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK], [ ZFS_LINUX_TEST_SRC([block_device_operations_revalidate_disk], [ #include - int blk_revalidate_disk(struct gendisk *disk) { + static int blk_revalidate_disk(struct gendisk *disk) { (void) disk; return(0); } diff --git a/config/kernel-commit-metadata.m4 b/config/kernel-commit-metadata.m4 index 7df9b98029..49bffbf609 100644 --- a/config/kernel-commit-metadata.m4 +++ b/config/kernel-commit-metadata.m4 @@ -7,7 +7,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_COMMIT_METADATA], [ ZFS_LINUX_TEST_SRC([export_operations_commit_metadata], [ #include - int commit_metadata(struct inode *inode) { return 0; } + static int commit_metadata(struct inode *inode) { return 0; } static struct export_operations eops __attribute__ ((unused))={ .commit_metadata = commit_metadata, }; diff --git a/config/kernel-dentry-operations.m4 b/config/kernel-dentry-operations.m4 index dd470d7607..500f61e26a 100644 --- a/config/kernel-dentry-operations.m4 +++ b/config/kernel-dentry-operations.m4 @@ -98,7 +98,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_D_REVALIDATE_NAMEIDATA], [ #include #include - int revalidate (struct dentry *dentry, + static int revalidate (struct dentry *dentry, struct nameidata *nidata) { return 0; } static const struct dentry_operations diff --git a/config/kernel-dirty-inode.m4 b/config/kernel-dirty-inode.m4 index dc7667fa48..2ef8658748 100644 --- a/config/kernel-dirty-inode.m4 +++ b/config/kernel-dirty-inode.m4 @@ -8,7 +8,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_DIRTY_INODE], [ ZFS_LINUX_TEST_SRC([dirty_inode_with_flags], [ #include - void dirty_inode(struct inode *a, int b) { return; } + static void dirty_inode(struct inode *a, int b) { return; } static const struct super_operations sops __attribute__ ((unused)) = { diff --git a/config/kernel-encode-fh-inode.m4 b/config/kernel-encode-fh-inode.m4 index 9d4ba5f0f6..b3ec040b5e 100644 --- a/config/kernel-encode-fh-inode.m4 +++ b/config/kernel-encode-fh-inode.m4 @@ -7,7 +7,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_ENCODE_FH_WITH_INODE], [ ZFS_LINUX_TEST_SRC([export_operations_encode_fh], [ #include - int encode_fh(struct inode *inode, __u32 *fh, int *max_len, + static int encode_fh(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) { return 0; } static struct export_operations eops __attribute__ ((unused))={ .encode_fh = encode_fh, diff --git a/config/kernel-evict-inode.m4 b/config/kernel-evict-inode.m4 index 66f10492de..87082c9a28 100644 --- a/config/kernel-evict-inode.m4 +++ b/config/kernel-evict-inode.m4 @@ -6,7 +6,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_EVICT_INODE], [ ZFS_LINUX_TEST_SRC([evict_inode], [ #include - void evict_inode (struct inode * t) { return; } + static void evict_inode (struct inode * t) { return; } static struct super_operations sops __attribute__ ((unused)) = { .evict_inode = evict_inode, }; diff --git a/config/kernel-fallocate.m4 b/config/kernel-fallocate.m4 index 815602d3e2..95186dada4 100644 --- a/config/kernel-fallocate.m4 +++ b/config/kernel-fallocate.m4 @@ -11,7 +11,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FALLOCATE], [ ZFS_LINUX_TEST_SRC([file_fallocate], [ #include - long test_fallocate(struct file *file, int mode, + static long test_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { return 0; } static const struct file_operations diff --git a/config/kernel-fsync.m4 b/config/kernel-fsync.m4 index d198191d3a..c155f8af81 100644 --- a/config/kernel-fsync.m4 +++ b/config/kernel-fsync.m4 @@ -5,7 +5,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FSYNC], [ ZFS_LINUX_TEST_SRC([fsync_without_dentry], [ #include - int test_fsync(struct file *f, int x) { return 0; } + static int test_fsync(struct file *f, int x) { return 0; } static const struct file_operations fops __attribute__ ((unused)) = { @@ -16,7 +16,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FSYNC], [ ZFS_LINUX_TEST_SRC([fsync_range], [ #include - int test_fsync(struct file *f, loff_t a, loff_t b, int c) + static int test_fsync(struct file *f, loff_t a, loff_t b, int c) { return 0; } static const struct file_operations diff --git a/config/kernel-get-link.m4 b/config/kernel-get-link.m4 index e4f478e37c..1f8f5b0c8b 100644 --- a/config/kernel-get-link.m4 +++ b/config/kernel-get-link.m4 @@ -5,7 +5,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_LINK], [ ZFS_LINUX_TEST_SRC([inode_operations_get_link], [ #include - const char *get_link(struct dentry *de, struct inode *ip, + static const char *get_link(struct dentry *de, struct inode *ip, struct delayed_call *done) { return "symlink"; } static struct inode_operations iops __attribute__ ((unused)) = { @@ -15,7 +15,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_LINK], [ ZFS_LINUX_TEST_SRC([inode_operations_get_link_cookie], [ #include - const char *get_link(struct dentry *de, struct + static const char *get_link(struct dentry *de, struct inode *ip, void **cookie) { return "symlink"; } static struct inode_operations iops __attribute__ ((unused)) = { @@ -25,7 +25,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_LINK], [ ZFS_LINUX_TEST_SRC([inode_operations_follow_link], [ #include - const char *follow_link(struct dentry *de, + static const char *follow_link(struct dentry *de, void **cookie) { return "symlink"; } static struct inode_operations iops __attribute__ ((unused)) = { @@ -35,7 +35,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_LINK], [ ZFS_LINUX_TEST_SRC([inode_operations_follow_link_nameidata], [ #include - void *follow_link(struct dentry *de, struct + static void *follow_link(struct dentry *de, struct nameidata *nd) { return (void *)NULL; } static struct inode_operations iops __attribute__ ((unused)) = { diff --git a/config/kernel-inode-create.m4 b/config/kernel-inode-create.m4 index 9e9e431809..95f8aa2d52 100644 --- a/config/kernel-inode-create.m4 +++ b/config/kernel-inode-create.m4 @@ -7,7 +7,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [ #include #include - int inode_create(struct mnt_idmap *idmap, + static int inode_create(struct mnt_idmap *idmap, struct inode *inode ,struct dentry *dentry, umode_t umode, bool flag) { return 0; } @@ -25,7 +25,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [ #include #include - int inode_create(struct user_namespace *userns, + static int inode_create(struct user_namespace *userns, struct inode *inode ,struct dentry *dentry, umode_t umode, bool flag) { return 0; } @@ -42,7 +42,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [ #include #include - int inode_create(struct inode *inode ,struct dentry *dentry, + static int inode_create(struct inode *inode ,struct dentry *dentry, umode_t umode, bool flag) { return 0; } static const struct inode_operations diff --git a/config/kernel-inode-getattr.m4 b/config/kernel-inode-getattr.m4 index c8bfb07862..5f7ce1ad9a 100644 --- a/config/kernel-inode-getattr.m4 +++ b/config/kernel-inode-getattr.m4 @@ -7,7 +7,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [ ZFS_LINUX_TEST_SRC([inode_operations_getattr_mnt_idmap], [ #include - int test_getattr( + static int test_getattr( struct mnt_idmap *idmap, const struct path *p, struct kstat *k, u32 request_mask, unsigned int query_flags) @@ -28,7 +28,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [ ZFS_LINUX_TEST_SRC([inode_operations_getattr_userns], [ #include - int test_getattr( + static int test_getattr( struct user_namespace *userns, const struct path *p, struct kstat *k, u32 request_mask, unsigned int query_flags) @@ -47,7 +47,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [ ZFS_LINUX_TEST_SRC([inode_operations_getattr_path], [ #include - int test_getattr( + static int test_getattr( const struct path *p, struct kstat *k, u32 request_mask, unsigned int query_flags) { return 0; } @@ -61,7 +61,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [ ZFS_LINUX_TEST_SRC([inode_operations_getattr_vfsmount], [ #include - int test_getattr( + static int test_getattr( struct vfsmount *mnt, struct dentry *d, struct kstat *k) { return 0; } diff --git a/config/kernel-inode-lookup.m4 b/config/kernel-inode-lookup.m4 index 1a56e69b04..c737305642 100644 --- a/config/kernel-inode-lookup.m4 +++ b/config/kernel-inode-lookup.m4 @@ -6,7 +6,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_LOOKUP_FLAGS], [ #include #include - struct dentry *inode_lookup(struct inode *inode, + static struct dentry *inode_lookup(struct inode *inode, struct dentry *dentry, unsigned int flags) { return NULL; } static const struct inode_operations iops diff --git a/config/kernel-inode-permission.m4 b/config/kernel-inode-permission.m4 index 01d23635b0..aef4005c40 100644 --- a/config/kernel-inode-permission.m4 +++ b/config/kernel-inode-permission.m4 @@ -8,7 +8,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_PERMISSION], [ #include #include - int inode_permission(struct mnt_idmap *idmap, + static int inode_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { return 0; } static const struct inode_operations @@ -25,7 +25,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_PERMISSION], [ #include #include - int inode_permission(struct user_namespace *userns, + static int inode_permission(struct user_namespace *userns, struct inode *inode, int mask) { return 0; } static const struct inode_operations diff --git a/config/kernel-inode-setattr.m4 b/config/kernel-inode-setattr.m4 index 45755b4eb2..69289e897b 100644 --- a/config/kernel-inode-setattr.m4 +++ b/config/kernel-inode-setattr.m4 @@ -7,7 +7,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SETATTR], [ ZFS_LINUX_TEST_SRC([inode_operations_setattr_mnt_idmap], [ #include - int test_setattr( + static int test_setattr( struct mnt_idmap *idmap, struct dentry *de, struct iattr *ia) { return 0; } @@ -27,7 +27,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SETATTR], [ ZFS_LINUX_TEST_SRC([inode_operations_setattr_userns], [ #include - int test_setattr( + static int test_setattr( struct user_namespace *userns, struct dentry *de, struct iattr *ia) { return 0; } @@ -41,7 +41,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SETATTR], [ ZFS_LINUX_TEST_SRC([inode_operations_setattr], [ #include - int test_setattr( + static int test_setattr( struct dentry *de, struct iattr *ia) { return 0; } diff --git a/config/kernel-make-request-fn.m4 b/config/kernel-make-request-fn.m4 index f17416acca..4d20dd45c4 100644 --- a/config/kernel-make-request-fn.m4 +++ b/config/kernel-make-request-fn.m4 @@ -4,7 +4,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [ ZFS_LINUX_TEST_SRC([make_request_fn_void], [ #include - void make_request(struct request_queue *q, + static void make_request(struct request_queue *q, struct bio *bio) { return; } ],[ blk_queue_make_request(NULL, &make_request); @@ -12,7 +12,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [ ZFS_LINUX_TEST_SRC([make_request_fn_blk_qc_t], [ #include - blk_qc_t make_request(struct request_queue *q, + static blk_qc_t make_request(struct request_queue *q, struct bio *bio) { return (BLK_QC_T_NONE); } ],[ blk_queue_make_request(NULL, &make_request); @@ -20,7 +20,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [ ZFS_LINUX_TEST_SRC([blk_alloc_queue_request_fn], [ #include - blk_qc_t make_request(struct request_queue *q, + static blk_qc_t make_request(struct request_queue *q, struct bio *bio) { return (BLK_QC_T_NONE); } ],[ struct request_queue *q __attribute__ ((unused)); @@ -29,7 +29,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [ ZFS_LINUX_TEST_SRC([blk_alloc_queue_request_fn_rh], [ #include - blk_qc_t make_request(struct request_queue *q, + static blk_qc_t make_request(struct request_queue *q, struct bio *bio) { return (BLK_QC_T_NONE); } ],[ struct request_queue *q __attribute__ ((unused)); diff --git a/config/kernel-mkdir.m4 b/config/kernel-mkdir.m4 index 7407a791b8..367f100094 100644 --- a/config/kernel-mkdir.m4 +++ b/config/kernel-mkdir.m4 @@ -9,7 +9,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [ ZFS_LINUX_TEST_SRC([mkdir_mnt_idmap], [ #include - int mkdir(struct mnt_idmap *idmap, + static int mkdir(struct mnt_idmap *idmap, struct inode *inode, struct dentry *dentry, umode_t umode) { return 0; } static const struct inode_operations @@ -26,7 +26,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [ ZFS_LINUX_TEST_SRC([mkdir_user_namespace], [ #include - int mkdir(struct user_namespace *userns, + static int mkdir(struct user_namespace *userns, struct inode *inode, struct dentry *dentry, umode_t umode) { return 0; } @@ -47,7 +47,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [ ZFS_LINUX_TEST_SRC([inode_operations_mkdir], [ #include - int mkdir(struct inode *inode, struct dentry *dentry, + static int mkdir(struct inode *inode, struct dentry *dentry, umode_t umode) { return 0; } static const struct inode_operations diff --git a/config/kernel-mknod.m4 b/config/kernel-mknod.m4 index 1494ec1ae4..6ad3453aaf 100644 --- a/config/kernel-mknod.m4 +++ b/config/kernel-mknod.m4 @@ -7,7 +7,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [ #include #include - int tmp_mknod(struct mnt_idmap *idmap, + static int tmp_mknod(struct mnt_idmap *idmap, struct inode *inode ,struct dentry *dentry, umode_t u, dev_t d) { return 0; } @@ -25,7 +25,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [ #include #include - int tmp_mknod(struct user_namespace *userns, + static int tmp_mknod(struct user_namespace *userns, struct inode *inode ,struct dentry *dentry, umode_t u, dev_t d) { return 0; } diff --git a/config/kernel-proc-operations.m4 b/config/kernel-proc-operations.m4 index df216222ec..3ae8ce2b6d 100644 --- a/config/kernel-proc-operations.m4 +++ b/config/kernel-proc-operations.m4 @@ -7,14 +7,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_PROC_OPERATIONS], [ ZFS_LINUX_TEST_SRC([proc_ops_struct], [ #include - int test_open(struct inode *ip, struct file *fp) { return 0; } - ssize_t test_read(struct file *fp, char __user *ptr, + static int test_open(struct inode *ip, struct file *fp) { return 0; } + static ssize_t test_read(struct file *fp, char __user *ptr, size_t size, loff_t *offp) { return 0; } - ssize_t test_write(struct file *fp, const char __user *ptr, + static ssize_t test_write(struct file *fp, const char __user *ptr, size_t size, loff_t *offp) { return 0; } - loff_t test_lseek(struct file *fp, loff_t off, int flag) + static loff_t test_lseek(struct file *fp, loff_t off, int flag) { return 0; } - int test_release(struct inode *ip, struct file *fp) + static int test_release(struct inode *ip, struct file *fp) { return 0; } const struct proc_ops test_ops __attribute__ ((unused)) = { diff --git a/config/kernel-put-link.m4 b/config/kernel-put-link.m4 index 4234861f33..8ab318cbff 100644 --- a/config/kernel-put-link.m4 +++ b/config/kernel-put-link.m4 @@ -4,7 +4,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_PUT_LINK], [ ZFS_LINUX_TEST_SRC([put_link_cookie], [ #include - void put_link(struct inode *ip, void *cookie) + static void put_link(struct inode *ip, void *cookie) { return; } static struct inode_operations iops __attribute__ ((unused)) = { @@ -14,7 +14,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_PUT_LINK], [ ZFS_LINUX_TEST_SRC([put_link_nameidata], [ #include - void put_link(struct dentry *de, struct + static void put_link(struct dentry *de, struct nameidata *nd, void *ptr) { return; } static struct inode_operations iops __attribute__ ((unused)) = { diff --git a/config/kernel-rename.m4 b/config/kernel-rename.m4 index 57c3eed789..ce881502d1 100644 --- a/config/kernel-rename.m4 +++ b/config/kernel-rename.m4 @@ -8,7 +8,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ dnl # ZFS_LINUX_TEST_SRC([inode_operations_rename2], [ #include - int rename2_fn(struct inode *sip, struct dentry *sdp, + static int rename2_fn(struct inode *sip, struct dentry *sdp, struct inode *tip, struct dentry *tdp, unsigned int flags) { return 0; } @@ -26,7 +26,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ dnl # ZFS_LINUX_TEST_SRC([inode_operations_rename_flags], [ #include - int rename_fn(struct inode *sip, struct dentry *sdp, + static int rename_fn(struct inode *sip, struct dentry *sdp, struct inode *tip, struct dentry *tdp, unsigned int flags) { return 0; } @@ -44,7 +44,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ dnl # ZFS_LINUX_TEST_SRC([dir_inode_operations_wrapper_rename2], [ #include - int rename2_fn(struct inode *sip, struct dentry *sdp, + static int rename2_fn(struct inode *sip, struct dentry *sdp, struct inode *tip, struct dentry *tdp, unsigned int flags) { return 0; } @@ -62,7 +62,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ dnl # ZFS_LINUX_TEST_SRC([inode_operations_rename_userns], [ #include - int rename_fn(struct user_namespace *user_ns, struct inode *sip, + static int rename_fn(struct user_namespace *user_ns, struct inode *sip, struct dentry *sdp, struct inode *tip, struct dentry *tdp, unsigned int flags) { return 0; } @@ -77,7 +77,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ dnl # ZFS_LINUX_TEST_SRC([inode_operations_rename_mnt_idmap], [ #include - int rename_fn(struct mnt_idmap *idmap, struct inode *sip, + static int rename_fn(struct mnt_idmap *idmap, struct inode *sip, struct dentry *sdp, struct inode *tip, struct dentry *tdp, unsigned int flags) { return 0; } diff --git a/config/kernel-show-options.m4 b/config/kernel-show-options.m4 index 93bd5fbfbb..fd62f30086 100644 --- a/config/kernel-show-options.m4 +++ b/config/kernel-show-options.m4 @@ -5,7 +5,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHOW_OPTIONS], [ ZFS_LINUX_TEST_SRC([super_operations_show_options], [ #include - int show_options(struct seq_file * x, struct dentry * y) { + static int show_options(struct seq_file * x, struct dentry * y) { return 0; }; diff --git a/config/kernel-shrink.m4 b/config/kernel-shrink.m4 index 4a529c43b5..6580b08d5f 100644 --- a/config/kernel-shrink.m4 +++ b/config/kernel-shrink.m4 @@ -8,9 +8,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK], [ ZFS_LINUX_TEST_SRC([super_block_s_shrink], [ #include - int shrink(struct shrinker *s, struct shrink_control *sc) - { return 0; } - static const struct super_block sb __attribute__ ((unused)) = { .s_shrink.seeks = DEFAULT_SEEKS, @@ -26,7 +23,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK_PTR], [ ZFS_LINUX_TEST_SRC([super_block_s_shrink_ptr], [ #include - unsigned long shrinker_cb(struct shrinker *shrink, + static unsigned long shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } static struct shrinker shrinker = { .count_objects = shrinker_cb, @@ -89,7 +86,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID], [ AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG], [ ZFS_LINUX_TEST_SRC([register_shrinker_vararg], [ #include - unsigned long shrinker_cb(struct shrinker *shrink, + static unsigned long shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { @@ -104,7 +101,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG], [ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [ ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control], [ #include - int shrinker_cb(struct shrinker *shrink, + static int shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { @@ -116,7 +113,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [ ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control_split], [ #include - unsigned long shrinker_cb(struct shrinker *shrink, + static unsigned long shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { @@ -135,7 +132,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER], [ ZFS_LINUX_TEST_SRC([shrinker_register], [ #include - unsigned long shrinker_cb(struct shrinker *shrink, + static unsigned long shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { diff --git a/config/kernel-symlink.m4 b/config/kernel-symlink.m4 index a0333ed66a..804fceab28 100644 --- a/config/kernel-symlink.m4 +++ b/config/kernel-symlink.m4 @@ -6,7 +6,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [ ZFS_LINUX_TEST_SRC([symlink_mnt_idmap], [ #include #include - int tmp_symlink(struct mnt_idmap *idmap, + static int tmp_symlink(struct mnt_idmap *idmap, struct inode *inode ,struct dentry *dentry, const char *path) { return 0; } @@ -23,7 +23,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [ #include #include - int tmp_symlink(struct user_namespace *userns, + static int tmp_symlink(struct user_namespace *userns, struct inode *inode ,struct dentry *dentry, const char *path) { return 0; } diff --git a/config/kernel-timer.m4 b/config/kernel-timer.m4 index 403cff3f41..c710e804be 100644 --- a/config/kernel-timer.m4 +++ b/config/kernel-timer.m4 @@ -18,7 +18,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_SETUP], [ int data; }; - void task_expire(struct timer_list *tl) + static void task_expire(struct timer_list *tl) { struct my_task_timer *task_timer = from_timer(task_timer, tl, timer); @@ -31,7 +31,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_SETUP], [ ZFS_LINUX_TEST_SRC([timer_list_function], [ #include - void task_expire(struct timer_list *tl) {} + static void task_expire(struct timer_list *tl) {} ],[ struct timer_list tl; tl.function = task_expire; diff --git a/config/kernel-tmpfile.m4 b/config/kernel-tmpfile.m4 index cc18b8f65a..7439514186 100644 --- a/config/kernel-tmpfile.m4 +++ b/config/kernel-tmpfile.m4 @@ -9,7 +9,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ dnl # ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_mnt_idmap], [ #include - int tmpfile(struct mnt_idmap *idmap, + static int tmpfile(struct mnt_idmap *idmap, struct inode *inode, struct file *file, umode_t mode) { return 0; } static struct inode_operations @@ -22,7 +22,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ dnl # ZFS_LINUX_TEST_SRC([inode_operations_tmpfile], [ #include - int tmpfile(struct user_namespace *userns, + static int tmpfile(struct user_namespace *userns, struct inode *inode, struct file *file, umode_t mode) { return 0; } static struct inode_operations @@ -36,7 +36,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ dnl # ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_dentry_userns], [ #include - int tmpfile(struct user_namespace *userns, + static int tmpfile(struct user_namespace *userns, struct inode *inode, struct dentry *dentry, umode_t mode) { return 0; } static struct inode_operations @@ -46,7 +46,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ ],[]) ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_dentry], [ #include - int tmpfile(struct inode *inode, struct dentry *dentry, + static int tmpfile(struct inode *inode, struct dentry *dentry, umode_t mode) { return 0; } static struct inode_operations iops __attribute__ ((unused)) = { diff --git a/config/kernel-vfs-direct_IO.m4 b/config/kernel-vfs-direct_IO.m4 index 82583d52fc..7b7b91f979 100644 --- a/config/kernel-vfs-direct_IO.m4 +++ b/config/kernel-vfs-direct_IO.m4 @@ -5,7 +5,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ ZFS_LINUX_TEST_SRC([direct_io_iter], [ #include - ssize_t test_direct_IO(struct kiocb *kiocb, + static ssize_t test_direct_IO(struct kiocb *kiocb, struct iov_iter *iter) { return 0; } static const struct address_space_operations @@ -17,7 +17,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ ZFS_LINUX_TEST_SRC([direct_io_iter_offset], [ #include - ssize_t test_direct_IO(struct kiocb *kiocb, + static ssize_t test_direct_IO(struct kiocb *kiocb, struct iov_iter *iter, loff_t offset) { return 0; } static const struct address_space_operations @@ -29,7 +29,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ ZFS_LINUX_TEST_SRC([direct_io_iter_rw_offset], [ #include - ssize_t test_direct_IO(int rw, struct kiocb *kiocb, + static ssize_t test_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t offset) { return 0; } static const struct address_space_operations @@ -41,7 +41,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ ZFS_LINUX_TEST_SRC([direct_io_iovec], [ #include - ssize_t test_direct_IO(int rw, struct kiocb *kiocb, + static ssize_t test_direct_IO(int rw, struct kiocb *kiocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { return 0; } diff --git a/config/kernel-vfs-iterate.m4 b/config/kernel-vfs-iterate.m4 index 172118eac8..2e396daa1c 100644 --- a/config/kernel-vfs-iterate.m4 +++ b/config/kernel-vfs-iterate.m4 @@ -1,7 +1,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_ITERATE], [ ZFS_LINUX_TEST_SRC([file_operations_iterate_shared], [ #include - int iterate(struct file *filp, struct dir_context * context) + static int iterate(struct file *filp, struct dir_context * context) { return 0; } static const struct file_operations fops @@ -12,7 +12,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_ITERATE], [ ZFS_LINUX_TEST_SRC([file_operations_iterate], [ #include - int iterate(struct file *filp, + static int iterate(struct file *filp, struct dir_context *context) { return 0; } static const struct file_operations fops @@ -27,7 +27,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_ITERATE], [ ZFS_LINUX_TEST_SRC([file_operations_readdir], [ #include - int readdir(struct file *filp, void *entry, + static int readdir(struct file *filp, void *entry, filldir_t func) { return 0; } static const struct file_operations fops diff --git a/config/kernel-vfs-rw-iterate.m4 b/config/kernel-vfs-rw-iterate.m4 index 000353ec15..cb20ed0309 100644 --- a/config/kernel-vfs-rw-iterate.m4 +++ b/config/kernel-vfs-rw-iterate.m4 @@ -5,9 +5,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE], [ ZFS_LINUX_TEST_SRC([file_operations_rw], [ #include - ssize_t test_read(struct kiocb *kiocb, struct iov_iter *to) + static ssize_t test_read(struct kiocb *kiocb, struct iov_iter *to) { return 0; } - ssize_t test_write(struct kiocb *kiocb, struct iov_iter *from) + static ssize_t test_write(struct kiocb *kiocb, struct iov_iter *from) { return 0; } static const struct file_operations diff --git a/config/kernel-writepage_t.m4 b/config/kernel-writepage_t.m4 index 3a0cffd985..a82cf370c9 100644 --- a/config/kernel-writepage_t.m4 +++ b/config/kernel-writepage_t.m4 @@ -6,7 +6,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITEPAGE_T], [ dnl # ZFS_LINUX_TEST_SRC([writepage_t_folio], [ #include - int putpage(struct folio *folio, + static int putpage(struct folio *folio, struct writeback_control *wbc, void *data) { return 0; } writepage_t func = putpage; diff --git a/config/kernel-xattr-handler.m4 b/config/kernel-xattr-handler.m4 index 6b8a08dbcc..32f58c70a5 100644 --- a/config/kernel-xattr-handler.m4 +++ b/config/kernel-xattr-handler.m4 @@ -68,7 +68,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [ ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry_inode], [ #include - int get(const struct xattr_handler *handler, + static int get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { return 0; } static const struct xattr_handler @@ -80,7 +80,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [ ZFS_LINUX_TEST_SRC([xattr_handler_get_xattr_handler], [ #include - int get(const struct xattr_handler *handler, + static int get(const struct xattr_handler *handler, struct dentry *dentry, const char *name, void *buffer, size_t size) { return 0; } static const struct xattr_handler @@ -92,7 +92,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [ ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry], [ #include - int get(struct dentry *dentry, const char *name, + static int get(struct dentry *dentry, const char *name, void *buffer, size_t size, int handler_flags) { return 0; } static const struct xattr_handler @@ -104,7 +104,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [ ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry_inode_flags], [ #include - int get(const struct xattr_handler *handler, + static int get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size, int flags) { return 0; } @@ -182,7 +182,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ ZFS_LINUX_TEST_SRC([xattr_handler_set_mnt_idmap], [ #include - int set(const struct xattr_handler *handler, + static int set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, @@ -197,7 +197,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ ZFS_LINUX_TEST_SRC([xattr_handler_set_userns], [ #include - int set(const struct xattr_handler *handler, + static int set(const struct xattr_handler *handler, struct user_namespace *mnt_userns, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, @@ -212,7 +212,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ ZFS_LINUX_TEST_SRC([xattr_handler_set_dentry_inode], [ #include - int set(const struct xattr_handler *handler, + static int set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) @@ -226,7 +226,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ ZFS_LINUX_TEST_SRC([xattr_handler_set_xattr_handler], [ #include - int set(const struct xattr_handler *handler, + static int set(const struct xattr_handler *handler, struct dentry *dentry, const char *name, const void *buffer, size_t size, int flags) { return 0; } @@ -239,7 +239,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ ZFS_LINUX_TEST_SRC([xattr_handler_set_dentry], [ #include - int set(struct dentry *dentry, const char *name, + static int set(struct dentry *dentry, const char *name, const void *buffer, size_t size, int flags, int handler_flags) { return 0; } static const struct xattr_handler @@ -325,7 +325,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_LIST], [ ZFS_LINUX_TEST_SRC([xattr_handler_list_simple], [ #include - bool list(struct dentry *dentry) { return 0; } + static bool list(struct dentry *dentry) { return 0; } static const struct xattr_handler xops __attribute__ ((unused)) = { .list = list, @@ -335,7 +335,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_LIST], [ ZFS_LINUX_TEST_SRC([xattr_handler_list_xattr_handler], [ #include - size_t list(const struct xattr_handler *handler, + static size_t list(const struct xattr_handler *handler, struct dentry *dentry, char *list, size_t list_size, const char *name, size_t name_len) { return 0; } static const struct xattr_handler @@ -347,7 +347,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_LIST], [ ZFS_LINUX_TEST_SRC([xattr_handler_list_dentry], [ #include - size_t list(struct dentry *dentry, + static size_t list(struct dentry *dentry, char *list, size_t list_size, const char *name, size_t name_len, int handler_flags) { return 0; } From ce782d080432506a41b49df32af6f0013b5775db Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 23 Jan 2024 15:42:57 +1100 Subject: [PATCH 38/45] Linux 6.8 compat: update for new bdev access functions blkdev_get_by_path() and blkdev_put() have been replaced by bdev_open_by_path() and bdev_release(), which return a "handle" object with the bdev object itself inside. This adds detection for the new functions, and macros to handle the old and new forms consistently. Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Sponsored-by: https://despairlabs.com/sponsor/ Closes #15805 --- config/kernel-blkdev.m4 | 56 ++++++++++++- module/os/linux/zfs/vdev_disk.c | 137 ++++++++++++++++++-------------- 2 files changed, 133 insertions(+), 60 deletions(-) diff --git a/config/kernel-blkdev.m4 b/config/kernel-blkdev.m4 index e04a2bd2c3..8e9e638b12 100644 --- a/config/kernel-blkdev.m4 +++ b/config/kernel-blkdev.m4 @@ -35,6 +35,25 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG], [ ]) ]) +dnl # +dnl # 6.8.x API change +dnl # bdev_open_by_path() replaces blkdev_get_by_path() +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_OPEN_BY_PATH], [ + ZFS_LINUX_TEST_SRC([bdev_open_by_path], [ + #include + #include + ], [ + struct bdev_handle *bdh __attribute__ ((unused)) = NULL; + const char *path = "path"; + fmode_t mode = 0; + void *holder = NULL; + struct blk_holder_ops h; + + bdh = bdev_open_by_path(path, mode, holder, &h); + ]) +]) + AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [ AC_MSG_CHECKING([whether blkdev_get_by_path() exists and takes 3 args]) ZFS_LINUX_TEST_RESULT([blkdev_get_by_path], [ @@ -47,7 +66,15 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [ [blkdev_get_by_path() exists and takes 4 args]) AC_MSG_RESULT(yes) ], [ - ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()]) + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether bdev_open_by_path() exists]) + ZFS_LINUX_TEST_RESULT([bdev_open_by_path], [ + AC_DEFINE(HAVE_BDEV_OPEN_BY_PATH, 1, + [bdev_open_by_path() exists]) + AC_MSG_RESULT(yes) + ], [ + ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()]) + ]) ]) ]) ]) @@ -108,18 +135,41 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER], [ ]) ]) +dnl # +dnl # 6.8.x API change +dnl # bdev_release() replaces blkdev_put() +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_RELEASE], [ + ZFS_LINUX_TEST_SRC([bdev_release], [ + #include + #include + ], [ + struct bdev_handle *bdh = NULL; + bdev_release(bdh); + ]) +]) + AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PUT], [ AC_MSG_CHECKING([whether blkdev_put() exists]) ZFS_LINUX_TEST_RESULT([blkdev_put], [ AC_MSG_RESULT(yes) ], [ + AC_MSG_RESULT(no) AC_MSG_CHECKING([whether blkdev_put() accepts void* as arg 2]) ZFS_LINUX_TEST_RESULT([blkdev_put_holder], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLKDEV_PUT_HOLDER, 1, [blkdev_put() accepts void* as arg 2]) ], [ - ZFS_LINUX_TEST_ERROR([blkdev_put()]) + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether bdev_release() exists]) + ZFS_LINUX_TEST_RESULT([bdev_release], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BDEV_RELEASE, 1, + [bdev_release() exists]) + ], [ + ZFS_LINUX_TEST_ERROR([blkdev_put()]) + ]) ]) ]) ]) @@ -570,8 +620,10 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT], [ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [ ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG + ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_OPEN_BY_PATH ZFS_AC_KERNEL_SRC_BLKDEV_PUT ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER + ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_RELEASE ZFS_AC_KERNEL_SRC_BLKDEV_REREAD_PART ZFS_AC_KERNEL_SRC_BLKDEV_INVALIDATE_BDEV ZFS_AC_KERNEL_SRC_BLKDEV_LOOKUP_BDEV diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index 8b5aa94fe4..e7f0aa5738 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -41,8 +41,28 @@ #include #endif +/* + * Linux 6.8.x uses a bdev_handle as an instance/refcount for an underlying + * block_device. Since it carries the block_device inside, its convenient to + * just use the handle as a proxy. For pre-6.8, we just emulate this with + * a cast, since we don't need any of the other fields inside the handle. + */ +#ifdef HAVE_BDEV_OPEN_BY_PATH +typedef struct bdev_handle zfs_bdev_handle_t; +#define BDH_BDEV(bdh) ((bdh)->bdev) +#define BDH_IS_ERR(bdh) (IS_ERR(bdh)) +#define BDH_PTR_ERR(bdh) (PTR_ERR(bdh)) +#define BDH_ERR_PTR(err) (ERR_PTR(err)) +#else +typedef void zfs_bdev_handle_t; +#define BDH_BDEV(bdh) ((struct block_device *)bdh) +#define BDH_IS_ERR(bdh) (IS_ERR(BDH_BDEV(bdh))) +#define BDH_PTR_ERR(bdh) (PTR_ERR(BDH_BDEV(bdh))) +#define BDH_ERR_PTR(err) (ERR_PTR(err)) +#endif + typedef struct vdev_disk { - struct block_device *vd_bdev; + zfs_bdev_handle_t *vd_bdh; krwlock_t vd_lock; } vdev_disk_t; @@ -209,29 +229,23 @@ static void vdev_disk_kobj_evt_post(vdev_t *v) { vdev_disk_t *vd = v->vdev_tsd; - if (vd && vd->vd_bdev) { - spl_signal_kobj_evt(vd->vd_bdev); + if (vd && vd->vd_bdh) { + spl_signal_kobj_evt(BDH_BDEV(vd->vd_bdh)); } else { vdev_dbgmsg(v, "vdev_disk_t is NULL for VDEV:%s\n", v->vdev_path); } } -#if !defined(HAVE_BLKDEV_GET_BY_PATH_4ARG) -/* - * Define a dummy struct blk_holder_ops for kernel versions - * prior to 6.5. - */ -struct blk_holder_ops {}; -#endif - -static struct block_device * -vdev_blkdev_get_by_path(const char *path, spa_mode_t mode, void *holder, - const struct blk_holder_ops *hops) +static zfs_bdev_handle_t * +vdev_blkdev_get_by_path(const char *path, spa_mode_t mode, void *holder) { -#ifdef HAVE_BLKDEV_GET_BY_PATH_4ARG +#if defined(HAVE_BDEV_OPEN_BY_PATH) + return (bdev_open_by_path(path, + vdev_bdev_mode(mode, B_TRUE), holder, NULL)); +#elif defined(HAVE_BLKDEV_GET_BY_PATH_4ARG) return (blkdev_get_by_path(path, - vdev_bdev_mode(mode, B_TRUE), holder, hops)); + vdev_bdev_mode(mode, B_TRUE), holder, NULL)); #else return (blkdev_get_by_path(path, vdev_bdev_mode(mode, B_TRUE), holder)); @@ -239,12 +253,15 @@ vdev_blkdev_get_by_path(const char *path, spa_mode_t mode, void *holder, } static void -vdev_blkdev_put(struct block_device *bdev, spa_mode_t mode, void *holder) +vdev_blkdev_put(zfs_bdev_handle_t *bdh, spa_mode_t mode, void *holder) { -#ifdef HAVE_BLKDEV_PUT_HOLDER - return (blkdev_put(bdev, holder)); +#if defined(HAVE_BDEV_RELEASE) + return (bdev_release(bdh)); +#elif defined(HAVE_BLKDEV_PUT_HOLDER) + return (blkdev_put(BDH_BDEV(bdh), holder)); #else - return (blkdev_put(bdev, vdev_bdev_mode(mode, B_TRUE))); + return (blkdev_put(BDH_BDEV(bdh), + vdev_bdev_mode(mode, B_TRUE))); #endif } @@ -252,7 +269,7 @@ static int vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, uint64_t *logical_ashift, uint64_t *physical_ashift) { - struct block_device *bdev; + zfs_bdev_handle_t *bdh; #ifdef HAVE_BLK_MODE_T blk_mode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa), B_FALSE); #else @@ -282,10 +299,11 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, boolean_t reread_part = B_FALSE; rw_enter(&vd->vd_lock, RW_WRITER); - bdev = vd->vd_bdev; - vd->vd_bdev = NULL; + bdh = vd->vd_bdh; + vd->vd_bdh = NULL; - if (bdev) { + if (bdh) { + struct block_device *bdev = BDH_BDEV(bdh); if (v->vdev_expanding && bdev != bdev_whole(bdev)) { vdev_bdevname(bdev_whole(bdev), disk_name + 5); /* @@ -307,15 +325,16 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, reread_part = B_TRUE; } - vdev_blkdev_put(bdev, mode, zfs_vdev_holder); + vdev_blkdev_put(bdh, mode, zfs_vdev_holder); } if (reread_part) { - bdev = vdev_blkdev_get_by_path(disk_name, mode, - zfs_vdev_holder, NULL); - if (!IS_ERR(bdev)) { - int error = vdev_bdev_reread_part(bdev); - vdev_blkdev_put(bdev, mode, zfs_vdev_holder); + bdh = vdev_blkdev_get_by_path(disk_name, mode, + zfs_vdev_holder); + if (!BDH_IS_ERR(bdh)) { + int error = + vdev_bdev_reread_part(BDH_BDEV(bdh)); + vdev_blkdev_put(bdh, mode, zfs_vdev_holder); if (error == 0) { timeout = MSEC2NSEC( zfs_vdev_open_timeout_ms * 2); @@ -358,11 +377,11 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, * subsequent attempts are expected to eventually succeed. */ hrtime_t start = gethrtime(); - bdev = ERR_PTR(-ENXIO); - while (IS_ERR(bdev) && ((gethrtime() - start) < timeout)) { - bdev = vdev_blkdev_get_by_path(v->vdev_path, mode, - zfs_vdev_holder, NULL); - if (unlikely(PTR_ERR(bdev) == -ENOENT)) { + bdh = BDH_ERR_PTR(-ENXIO); + while (BDH_IS_ERR(bdh) && ((gethrtime() - start) < timeout)) { + bdh = vdev_blkdev_get_by_path(v->vdev_path, mode, + zfs_vdev_holder); + if (unlikely(BDH_PTR_ERR(bdh) == -ENOENT)) { /* * There is no point of waiting since device is removed * explicitly @@ -371,52 +390,54 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, break; schedule_timeout(MSEC_TO_TICK(10)); - } else if (unlikely(PTR_ERR(bdev) == -ERESTARTSYS)) { + } else if (unlikely(BDH_PTR_ERR(bdh) == -ERESTARTSYS)) { timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms * 10); continue; - } else if (IS_ERR(bdev)) { + } else if (BDH_IS_ERR(bdh)) { break; } } - if (IS_ERR(bdev)) { - int error = -PTR_ERR(bdev); + if (BDH_IS_ERR(bdh)) { + int error = -BDH_PTR_ERR(bdh); vdev_dbgmsg(v, "open error=%d timeout=%llu/%llu", error, (u_longlong_t)(gethrtime() - start), (u_longlong_t)timeout); - vd->vd_bdev = NULL; + vd->vd_bdh = NULL; v->vdev_tsd = vd; rw_exit(&vd->vd_lock); return (SET_ERROR(error)); } else { - vd->vd_bdev = bdev; + vd->vd_bdh = bdh; v->vdev_tsd = vd; rw_exit(&vd->vd_lock); } + struct block_device *bdev = BDH_BDEV(vd->vd_bdh); + /* Determine the physical block size */ - int physical_block_size = bdev_physical_block_size(vd->vd_bdev); + int physical_block_size = bdev_physical_block_size(bdev); /* Determine the logical block size */ - int logical_block_size = bdev_logical_block_size(vd->vd_bdev); + int logical_block_size = bdev_logical_block_size(bdev); /* Clear the nowritecache bit, causes vdev_reopen() to try again. */ v->vdev_nowritecache = B_FALSE; /* Set when device reports it supports TRIM. */ - v->vdev_has_trim = bdev_discard_supported(vd->vd_bdev); + v->vdev_has_trim = bdev_discard_supported(bdev); /* Set when device reports it supports secure TRIM. */ - v->vdev_has_securetrim = bdev_secure_discard_supported(vd->vd_bdev); + v->vdev_has_securetrim = bdev_secure_discard_supported(bdev); /* Inform the ZIO pipeline that we are non-rotational */ - v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev)); + v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(bdev)); /* Physical volume size in bytes for the partition */ - *psize = bdev_capacity(vd->vd_bdev); + *psize = bdev_capacity(bdev); /* Physical volume size in bytes including possible expansion space */ - *max_psize = bdev_max_capacity(vd->vd_bdev, v->vdev_wholedisk); + *max_psize = bdev_max_capacity(bdev, v->vdev_wholedisk); /* Based on the minimum sector size set the block size */ *physical_ashift = highbit64(MAX(physical_block_size, @@ -436,8 +457,8 @@ vdev_disk_close(vdev_t *v) if (v->vdev_reopening || vd == NULL) return; - if (vd->vd_bdev != NULL) { - vdev_blkdev_put(vd->vd_bdev, spa_mode(v->vdev_spa), + if (vd->vd_bdh != NULL) { + vdev_blkdev_put(vd->vd_bdh, spa_mode(v->vdev_spa), zfs_vdev_holder); } @@ -849,10 +870,10 @@ vdev_disk_io_trim(zio_t *zio) #if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) if (zio->io_trim_flags & ZIO_TRIM_SECURE) { - return (-blkdev_issue_secure_erase(vd->vd_bdev, + return (-blkdev_issue_secure_erase(BDH_BDEV(vd->vd_bdh), zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS)); } else { - return (-blkdev_issue_discard(vd->vd_bdev, + return (-blkdev_issue_discard(BDH_BDEV(vd->vd_bdh), zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS)); } #elif defined(HAVE_BLKDEV_ISSUE_DISCARD) @@ -861,7 +882,7 @@ vdev_disk_io_trim(zio_t *zio) if (zio->io_trim_flags & ZIO_TRIM_SECURE) trim_flags |= BLKDEV_DISCARD_SECURE; #endif - return (-blkdev_issue_discard(vd->vd_bdev, + return (-blkdev_issue_discard(BDH_BDEV(vd->vd_bdh), zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, trim_flags)); #else #error "Unsupported kernel" @@ -891,7 +912,7 @@ vdev_disk_io_start(zio_t *zio) * If the vdev is closed, it's likely due to a failed reopen and is * in the UNAVAIL state. Nothing to be done here but return failure. */ - if (vd->vd_bdev == NULL) { + if (vd->vd_bdh == NULL) { rw_exit(&vd->vd_lock); zio->io_error = ENXIO; zio_interrupt(zio); @@ -919,7 +940,7 @@ vdev_disk_io_start(zio_t *zio) break; } - error = vdev_disk_io_flush(vd->vd_bdev, zio); + error = vdev_disk_io_flush(BDH_BDEV(vd->vd_bdh), zio); if (error == 0) { rw_exit(&vd->vd_lock); return; @@ -958,7 +979,7 @@ vdev_disk_io_start(zio_t *zio) } zio->io_target_timestamp = zio_handle_io_delay(zio); - error = __vdev_disk_physio(vd->vd_bdev, zio, + error = __vdev_disk_physio(BDH_BDEV(vd->vd_bdh), zio, zio->io_size, zio->io_offset, rw, 0); rw_exit(&vd->vd_lock); @@ -981,8 +1002,8 @@ vdev_disk_io_done(zio_t *zio) vdev_t *v = zio->io_vd; vdev_disk_t *vd = v->vdev_tsd; - if (!zfs_check_disk_status(vd->vd_bdev)) { - invalidate_bdev(vd->vd_bdev); + if (!zfs_check_disk_status(BDH_BDEV(vd->vd_bdh))) { + invalidate_bdev(BDH_BDEV(vd->vd_bdh)); v->vdev_remove_wanted = B_TRUE; spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); } From 7466e09a492b644d39d85dd173e0f8051858a2a5 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 23 Jan 2024 16:34:49 +1100 Subject: [PATCH 39/45] Linux 6.8 compat: implement strlcpy fallback Linux has removed strlcpy in favour of strscpy. This implements a fallback implementation of strlcpy for this case. Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Sponsored-by: https://despairlabs.com/sponsor/ Closes #15805 --- config/kernel-strlcpy.m4 | 47 ++++++++++++++++++++++++++ config/kernel.m4 | 4 +++ include/os/linux/spl/sys/string.h | 49 ++++++++++++++++++++++++++++ module/os/linux/spl/spl-kmem-cache.c | 1 + module/os/linux/spl/spl-kstat.c | 1 + module/os/linux/spl/spl-thread.c | 1 + module/os/linux/spl/spl-zone.c | 1 + 7 files changed, 104 insertions(+) create mode 100644 config/kernel-strlcpy.m4 diff --git a/config/kernel-strlcpy.m4 b/config/kernel-strlcpy.m4 new file mode 100644 index 0000000000..c31cf52d78 --- /dev/null +++ b/config/kernel-strlcpy.m4 @@ -0,0 +1,47 @@ +dnl # +dnl # 6.8.x replaced strlcpy with strscpy. Check for both so we can provide +dnl # appropriate fallbacks. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_STRLCPY], [ + ZFS_LINUX_TEST_SRC([kernel_has_strlcpy], [ + #include + ], [ + const char *src = "goodbye"; + char dst[32]; + size_t len; + len = strlcpy(dst, src, sizeof (dst)); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_STRSCPY], [ + ZFS_LINUX_TEST_SRC([kernel_has_strscpy], [ + #include + ], [ + const char *src = "goodbye"; + char dst[32]; + ssize_t len; + len = strscpy(dst, src, sizeof (dst)); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_STRLCPY], [ + AC_MSG_CHECKING([whether strlcpy() exists]) + ZFS_LINUX_TEST_RESULT([kernel_has_strlcpy], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_KERNEL_STRLCPY, 1, + [strlcpy() exists]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_STRSCPY], [ + AC_MSG_CHECKING([whether strscpy() exists]) + ZFS_LINUX_TEST_RESULT([kernel_has_strscpy], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_KERNEL_STRSCPY, 1, + [strscpy() exists]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index d25b65994f..30bdd65795 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -149,6 +149,8 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_SYSFS ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE ZFS_AC_KERNEL_SRC_STANDALONE_LINUX_STDARG + ZFS_AC_KERNEL_SRC_STRLCPY + ZFS_AC_KERNEL_SRC_STRSCPY ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_SRC_ADD_DISK ZFS_AC_KERNEL_SRC_KTHREAD @@ -294,6 +296,8 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_SYSFS ZFS_AC_KERNEL_SET_SPECIAL_STATE ZFS_AC_KERNEL_STANDALONE_LINUX_STDARG + ZFS_AC_KERNEL_STRLCPY + ZFS_AC_KERNEL_STRSCPY ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_ADD_DISK ZFS_AC_KERNEL_KTHREAD diff --git a/include/os/linux/spl/sys/string.h b/include/os/linux/spl/sys/string.h index 38134dcf4c..f44bf23eb3 100644 --- a/include/os/linux/spl/sys/string.h +++ b/include/os/linux/spl/sys/string.h @@ -1 +1,50 @@ +/* + * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Brian Behlendorf . + * UCRL-CODE-235197 + * + * This file is part of the SPL, Solaris Porting Layer. + * + * The SPL is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * The SPL is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with the SPL. If not, see . + */ + +#ifndef _SPL_STRING_H +#define _SPL_STRING_H + #include + +/* Fallbacks for kernel missing strlcpy */ +#ifndef HAVE_KERNEL_STRLCPY + +#if defined(HAVE_KERNEL_STRSCPY) +/* + * strscpy is strlcpy, but returns an error on truncation. strlcpy is defined + * to return strlen(src), so detect error and override it. + */ +static inline size_t +strlcpy(char *dest, const char *src, size_t size) +{ + ssize_t ret = strscpy(dest, src, size); + if (likely(ret > 0)) + return ((size_t)ret); + return (strlen(src)); +} +#else +#error "no strlcpy fallback available" +#endif + +#endif /* HAVE_KERNEL_STRLCPY */ + +#endif /* _SPL_STRING_H */ diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c index 4b15081715..42821ad602 100644 --- a/module/os/linux/spl/spl-kmem-cache.c +++ b/module/os/linux/spl/spl-kmem-cache.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/module/os/linux/spl/spl-kstat.c b/module/os/linux/spl/spl-kstat.c index 4308581147..ad553a73a6 100644 --- a/module/os/linux/spl/spl-kstat.c +++ b/module/os/linux/spl/spl-kstat.c @@ -32,6 +32,7 @@ #include #include #include +#include static kmutex_t kstat_module_lock; static struct list_head kstat_module_list; diff --git a/module/os/linux/spl/spl-thread.c b/module/os/linux/spl/spl-thread.c index b4ef86a5e4..ee3eb4690c 100644 --- a/module/os/linux/spl/spl-thread.c +++ b/module/os/linux/spl/spl-thread.c @@ -26,6 +26,7 @@ #include #include #include +#include /* * Thread interfaces diff --git a/module/os/linux/spl/spl-zone.c b/module/os/linux/spl/spl-zone.c index e821fbb4f3..d0d0cca154 100644 --- a/module/os/linux/spl/spl-zone.c +++ b/module/os/linux/spl/spl-zone.c @@ -30,6 +30,7 @@ #include #include #include +#include #if defined(CONFIG_USER_NS) #include From 09e6724e1ee545a6afefc258820870dfedb2a16f Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 23 Jan 2024 16:41:05 +1100 Subject: [PATCH 40/45] Linux 6.8 compat: replace MAX_ORDER define MAX_ORDER has been renamed to MAX_PAGE_ORDER. Rather than just redefining it, instead define our own name and set it consistently from the start. Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Sponsored-by: https://despairlabs.com/sponsor/ Closes #15805 --- module/os/linux/zfs/abd_os.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c index 13150adbe0..24390fbbf1 100644 --- a/module/os/linux/zfs/abd_os.c +++ b/module/os/linux/zfs/abd_os.c @@ -60,8 +60,16 @@ #ifdef _KERNEL #include #include +#endif + +#ifdef _KERNEL +#if defined(MAX_ORDER) +#define ABD_MAX_ORDER (MAX_ORDER) +#elif defined(MAX_PAGE_ORDER) +#define ABD_MAX_ORDER (MAX_PAGE_ORDER) +#endif #else -#define MAX_ORDER 1 +#define ABD_MAX_ORDER (1) #endif typedef struct abd_stats { @@ -71,7 +79,7 @@ typedef struct abd_stats { kstat_named_t abdstat_scatter_cnt; kstat_named_t abdstat_scatter_data_size; kstat_named_t abdstat_scatter_chunk_waste; - kstat_named_t abdstat_scatter_orders[MAX_ORDER]; + kstat_named_t abdstat_scatter_orders[ABD_MAX_ORDER]; kstat_named_t abdstat_scatter_page_multi_chunk; kstat_named_t abdstat_scatter_page_multi_zone; kstat_named_t abdstat_scatter_page_alloc_retry; @@ -139,7 +147,7 @@ static struct { wmsum_t abdstat_scatter_cnt; wmsum_t abdstat_scatter_data_size; wmsum_t abdstat_scatter_chunk_waste; - wmsum_t abdstat_scatter_orders[MAX_ORDER]; + wmsum_t abdstat_scatter_orders[ABD_MAX_ORDER]; wmsum_t abdstat_scatter_page_multi_chunk; wmsum_t abdstat_scatter_page_multi_zone; wmsum_t abdstat_scatter_page_alloc_retry; @@ -222,7 +230,7 @@ abd_free_struct_impl(abd_t *abd) } #ifdef _KERNEL -static unsigned zfs_abd_scatter_max_order = MAX_ORDER - 1; +static unsigned zfs_abd_scatter_max_order = ABD_MAX_ORDER - 1; /* * Mark zfs data pages so they can be excluded from kernel crash dumps @@ -272,7 +280,8 @@ abd_alloc_chunks(abd_t *abd, size_t size) struct page *page, *tmp_page = NULL; gfp_t gfp = __GFP_NOWARN | GFP_NOIO; gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM; - unsigned int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1); + unsigned int max_order = MIN(zfs_abd_scatter_max_order, + ABD_MAX_ORDER - 1); unsigned int nr_pages = abd_chunkcnt_for_bytes(size); unsigned int chunks = 0, zones = 0; size_t remaining_size; @@ -729,7 +738,7 @@ abd_kstats_update(kstat_t *ksp, int rw) wmsum_value(&abd_sums.abdstat_scatter_data_size); as->abdstat_scatter_chunk_waste.value.ui64 = wmsum_value(&abd_sums.abdstat_scatter_chunk_waste); - for (int i = 0; i < MAX_ORDER; i++) { + for (int i = 0; i < ABD_MAX_ORDER; i++) { as->abdstat_scatter_orders[i].value.ui64 = wmsum_value(&abd_sums.abdstat_scatter_orders[i]); } @@ -758,7 +767,7 @@ abd_init(void) wmsum_init(&abd_sums.abdstat_scatter_cnt, 0); wmsum_init(&abd_sums.abdstat_scatter_data_size, 0); wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0); - for (i = 0; i < MAX_ORDER; i++) + for (i = 0; i < ABD_MAX_ORDER; i++) wmsum_init(&abd_sums.abdstat_scatter_orders[i], 0); wmsum_init(&abd_sums.abdstat_scatter_page_multi_chunk, 0); wmsum_init(&abd_sums.abdstat_scatter_page_multi_zone, 0); @@ -768,7 +777,7 @@ abd_init(void) abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED, sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); if (abd_ksp != NULL) { - for (i = 0; i < MAX_ORDER; i++) { + for (i = 0; i < ABD_MAX_ORDER; i++) { snprintf(abd_stats.abdstat_scatter_orders[i].name, KSTAT_STRLEN, "scatter_order_%d", i); abd_stats.abdstat_scatter_orders[i].data_type = @@ -798,7 +807,7 @@ abd_fini(void) wmsum_fini(&abd_sums.abdstat_scatter_cnt); wmsum_fini(&abd_sums.abdstat_scatter_data_size); wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste); - for (int i = 0; i < MAX_ORDER; i++) + for (int i = 0; i < ABD_MAX_ORDER; i++) wmsum_fini(&abd_sums.abdstat_scatter_orders[i]); wmsum_fini(&abd_sums.abdstat_scatter_page_multi_chunk); wmsum_fini(&abd_sums.abdstat_scatter_page_multi_zone); From cbd51c5f2416fecd1e0c1b79c7dad385ad29f5ce Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 23 Jan 2024 17:43:20 +1100 Subject: [PATCH 41/45] Linux 6.8 compat: fix inode permission tests The name inode_permission is now defined in the kernel. Rename ours to test_permission, in line with most of our other tests. Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Sponsored-by: https://despairlabs.com/sponsor/ Closes #15805 --- config/kernel-inode-permission.m4 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/config/kernel-inode-permission.m4 b/config/kernel-inode-permission.m4 index aef4005c40..f7fc164390 100644 --- a/config/kernel-inode-permission.m4 +++ b/config/kernel-inode-permission.m4 @@ -8,12 +8,12 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_PERMISSION], [ #include #include - static int inode_permission(struct mnt_idmap *idmap, + static int test_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { return 0; } static const struct inode_operations iops __attribute__ ((unused)) = { - .permission = inode_permission, + .permission = test_permission, }; ],[]) @@ -25,12 +25,12 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_PERMISSION], [ #include #include - static int inode_permission(struct user_namespace *userns, + static int test_permission(struct user_namespace *userns, struct inode *inode, int mask) { return 0; } static const struct inode_operations iops __attribute__ ((unused)) = { - .permission = inode_permission, + .permission = test_permission, }; ],[]) ]) From e6ca28c970842c387852acca89eaabfb54267b90 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 23 Jan 2024 21:14:06 +1100 Subject: [PATCH 42/45] Linux 6.8 compat: handle mnt_idmap user_namespace change struct mnt_idmap no longer has a struct user_namespace within it. Work around this by creating a temporary with the copy of the map we need taken from the idmap. Reviewed-by: Brian Behlendorf Co-authored-by: Youzhong Yang Signed-off-by: Rob Norris Sponsored-by: https://despairlabs.com/sponsor/ Closes #15805 --- config/kernel-idmap_mnt_api.m4 | 25 +++++++++++++++ config/kernel.m4 | 2 ++ include/os/linux/spl/sys/cred.h | 55 +++++++++++++++++++++++++++++--- include/os/linux/spl/sys/types.h | 11 +++++++ 4 files changed, 88 insertions(+), 5 deletions(-) diff --git a/config/kernel-idmap_mnt_api.m4 b/config/kernel-idmap_mnt_api.m4 index 47ddc5702f..d1bdd05320 100644 --- a/config/kernel-idmap_mnt_api.m4 +++ b/config/kernel-idmap_mnt_api.m4 @@ -23,3 +23,28 @@ AC_DEFUN([ZFS_AC_KERNEL_IDMAP_MNT_API], [ ]) ]) +dnl # +dnl # 6.8 decouples mnt_idmap from user_namespace. This is all internal +dnl # to mnt_idmap so we can't detect it directly, but we detect a related +dnl # change as use that as a signal. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_IDMAP_NO_USERNS], [ + ZFS_LINUX_TEST_SRC([idmap_no_userns], [ + #include + ], [ + struct uid_gid_map *map = NULL; + map_id_down(map, 0); + ]) +]) + + +AC_DEFUN([ZFS_AC_KERNEL_IDMAP_NO_USERNS], [ + AC_MSG_CHECKING([whether idmapped mounts have a user namespace]) + ZFS_LINUX_TEST_RESULT([idmap_no_userns], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_IDMAP_NO_USERNS, 1, + [mnt_idmap does not have user_namespace]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 30bdd65795..e3f8645774 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -158,6 +158,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM ZFS_AC_KERNEL_SRC_IDMAP_MNT_API + ZFS_AC_KERNEL_SRC_IDMAP_NO_USERNS ZFS_AC_KERNEL_SRC_IATTR_VFSID ZFS_AC_KERNEL_SRC_FILEMAP ZFS_AC_KERNEL_SRC_WRITEPAGE_T @@ -305,6 +306,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC ZFS_AC_KERNEL_USER_NS_COMMON_INUM ZFS_AC_KERNEL_IDMAP_MNT_API + ZFS_AC_KERNEL_IDMAP_NO_USERNS ZFS_AC_KERNEL_IATTR_VFSID ZFS_AC_KERNEL_FILEMAP ZFS_AC_KERNEL_WRITEPAGE_T diff --git a/include/os/linux/spl/sys/cred.h b/include/os/linux/spl/sys/cred.h index 7fd5f64486..c19c3c0719 100644 --- a/include/os/linux/spl/sys/cred.h +++ b/include/os/linux/spl/sys/cred.h @@ -73,13 +73,25 @@ static inline struct user_namespace *zfs_i_user_ns(struct inode *inode) static inline boolean_t zfs_no_idmapping(struct user_namespace *mnt_userns, struct user_namespace *fs_userns) { - return (zfs_is_init_userns(mnt_userns) || mnt_userns == fs_userns); + return (zfs_is_init_userns(mnt_userns) || + mnt_userns == fs_userns); } static inline uid_t zfs_uid_to_vfsuid(zidmap_t *mnt_userns, struct user_namespace *fs_userns, uid_t uid) { - struct user_namespace *owner = idmap_owner(mnt_userns); + struct user_namespace *owner; +#ifdef HAVE_IOPS_CREATE_IDMAP + if (mnt_userns == zfs_init_idmap) + return (uid); +#endif +#ifdef HAVE_IDMAP_NO_USERNS + struct user_namespace ns; + ns.uid_map = mnt_userns->uid_map; + owner = &ns; +#else + owner = idmap_owner(mnt_userns); +#endif if (zfs_no_idmapping(owner, fs_userns)) return (uid); if (!zfs_is_init_userns(fs_userns)) @@ -92,7 +104,18 @@ static inline uid_t zfs_uid_to_vfsuid(zidmap_t *mnt_userns, static inline gid_t zfs_gid_to_vfsgid(zidmap_t *mnt_userns, struct user_namespace *fs_userns, gid_t gid) { - struct user_namespace *owner = idmap_owner(mnt_userns); + struct user_namespace *owner; +#ifdef HAVE_IOPS_CREATE_IDMAP + if (mnt_userns == zfs_init_idmap) + return (gid); +#endif +#ifdef HAVE_IDMAP_NO_USERNS + struct user_namespace ns; + ns.gid_map = mnt_userns->gid_map; + owner = &ns; +#else + owner = idmap_owner(mnt_userns); +#endif if (zfs_no_idmapping(owner, fs_userns)) return (gid); if (!zfs_is_init_userns(fs_userns)) @@ -105,7 +128,18 @@ static inline gid_t zfs_gid_to_vfsgid(zidmap_t *mnt_userns, static inline uid_t zfs_vfsuid_to_uid(zidmap_t *mnt_userns, struct user_namespace *fs_userns, uid_t uid) { - struct user_namespace *owner = idmap_owner(mnt_userns); + struct user_namespace *owner; +#ifdef HAVE_IOPS_CREATE_IDMAP + if (mnt_userns == zfs_init_idmap) + return (uid); +#endif +#ifdef HAVE_IDMAP_NO_USERNS + struct user_namespace ns; + ns.uid_map = mnt_userns->uid_map; + owner = &ns; +#else + owner = idmap_owner(mnt_userns); +#endif if (zfs_no_idmapping(owner, fs_userns)) return (uid); uid = from_kuid(owner, KUIDT_INIT(uid)); @@ -119,7 +153,18 @@ static inline uid_t zfs_vfsuid_to_uid(zidmap_t *mnt_userns, static inline gid_t zfs_vfsgid_to_gid(zidmap_t *mnt_userns, struct user_namespace *fs_userns, gid_t gid) { - struct user_namespace *owner = idmap_owner(mnt_userns); + struct user_namespace *owner; +#ifdef HAVE_IOPS_CREATE_IDMAP + if (mnt_userns == zfs_init_idmap) + return (gid); +#endif +#ifdef HAVE_IDMAP_NO_USERNS + struct user_namespace ns; + ns.gid_map = mnt_userns->gid_map; + owner = &ns; +#else + owner = idmap_owner(mnt_userns); +#endif if (zfs_no_idmapping(owner, fs_userns)) return (gid); gid = from_kgid(owner, KGIDT_INIT(gid)); diff --git a/include/os/linux/spl/sys/types.h b/include/os/linux/spl/sys/types.h index d89a91c36f..20ba457f7e 100644 --- a/include/os/linux/spl/sys/types.h +++ b/include/os/linux/spl/sys/types.h @@ -57,12 +57,23 @@ typedef int minor_t; struct user_namespace; #ifdef HAVE_IOPS_CREATE_IDMAP #include +#ifdef HAVE_IDMAP_NO_USERNS +#include +struct mnt_idmap { + struct uid_gid_map uid_map; + struct uid_gid_map gid_map; + refcount_t count; +}; +typedef struct mnt_idmap zidmap_t; +#define idmap_owner(p) (NULL) +#else struct mnt_idmap { struct user_namespace *owner; refcount_t count; }; typedef struct mnt_idmap zidmap_t; #define idmap_owner(p) (((struct mnt_idmap *)p)->owner) +#endif #else typedef struct user_namespace zidmap_t; #define idmap_owner(p) ((struct user_namespace *)p) From 992d8871ebe172ab8da6e08ac7c31344267f6cdd Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Mon, 11 Dec 2023 09:59:59 -0800 Subject: [PATCH 43/45] ZTS: Add dirty dnode stress test Add a test for the dirty dnode SEEK_HOLE/SEEK_DATA bug described in https://github.com/openzfs/zfs/issues/15526 The bug was fixed in https://github.com/openzfs/zfs/pull/15571 and was backported to 2.2.2 and 2.1.14. This test case is just to make sure it does not come back. seekflood.c originally written by Rob Norris. Reviewed-by: Graham Perrin Reviewed-by: Brian Behlendorf Reviewed-by: Rob Norris Signed-off-by: Tony Hutter Closes #15608 --- tests/runfiles/common.run | 2 +- tests/zfs-tests/Makefile.am | 3 + tests/zfs-tests/tests/Makefile.am | 1 + .../tests/functional/cp_files/.gitignore | 1 + .../tests/functional/cp_files/cp_stress.ksh | 73 +++++++ .../tests/functional/cp_files/seekflood.c | 180 ++++++++++++++++++ 6 files changed, 259 insertions(+), 1 deletion(-) create mode 100644 tests/zfs-tests/tests/functional/cp_files/.gitignore create mode 100755 tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh create mode 100644 tests/zfs-tests/tests/functional/cp_files/seekflood.c diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 85f29c8220..a3550d26ab 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -630,7 +630,7 @@ tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos', tags = ['functional', 'compression'] [tests/functional/cp_files] -tests = ['cp_files_001_pos'] +tests = ['cp_files_001_pos', 'cp_stress'] tags = ['functional', 'cp_files'] [tests/functional/crtime] diff --git a/tests/zfs-tests/Makefile.am b/tests/zfs-tests/Makefile.am index f816635248..3dd1a64527 100644 --- a/tests/zfs-tests/Makefile.am +++ b/tests/zfs-tests/Makefile.am @@ -13,6 +13,9 @@ scripts_zfs_tests_functional_hkdf_PROGRAMS = %D%/tests/functional/hkdf/hkdf_test %C%_tests_functional_hkdf_hkdf_test_LDADD = \ libzpool.la +scripts_zfs_tests_functional_cp_filesdir = $(datadir)/$(PACKAGE)/zfs-tests/tests/functional/cp_files +scripts_zfs_tests_functional_cp_files_PROGRAMS = %D%/tests/functional/cp_files/seekflood + if BUILD_LINUX scripts_zfs_tests_functional_tmpfiledir = $(datadir)/$(PACKAGE)/zfs-tests/tests/functional/tmpfile scripts_zfs_tests_functional_tmpfile_PROGRAMS = \ diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 19174c71fb..8bee07f480 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1393,6 +1393,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/compression/setup.ksh \ functional/cp_files/cleanup.ksh \ functional/cp_files/cp_files_001_pos.ksh \ + functional/cp_files/cp_stress.ksh \ functional/cp_files/setup.ksh \ functional/crtime/cleanup.ksh \ functional/crtime/crtime_001_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/cp_files/.gitignore b/tests/zfs-tests/tests/functional/cp_files/.gitignore new file mode 100644 index 0000000000..d15225ac84 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cp_files/.gitignore @@ -0,0 +1 @@ +seekflood diff --git a/tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh b/tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh new file mode 100755 index 0000000000..43bb8ab572 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh @@ -0,0 +1,73 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2023 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# +# https://github.com/openzfs/zfs/issues/15526 identified a dirty dnode +# SEEK_HOLE/SEEK_DATA bug. https://github.com/openzfs/zfs/pull/15571 +# fixed the bug, and was backported to 2.1.14 and 2.2.2. +# +# This test is to ensure that the bug, as understood, will not recur. +# +# STRATEGY: +# +# 1. Run the 'seekflood' binary, for creation of files with timing +# characteristics that can trigger #15526. +# 2. A single run is not always a trigger, so run repeatedly. + +verify_runnable "global" + +function cleanup +{ + rm -rf /$TESTDIR/cp_stress +} + +log_assert "Run the 'seekflood' binary repeatedly to try to trigger #15526" + +log_onexit cleanup + +log_must mkdir /$TESTPOOL/cp_stress + +MYPWD="$PWD" +cd /$TESTPOOL/cp_stress +CPUS=$(get_num_cpus) + +if is_freebsd ; then + # 'seekflood' takes longer on FreeBSD and can timeout the test + RUNS=3 +else + RUNS=10 +fi + +for i in $(seq 1 $RUNS) ; do + # Each run takes around 12 seconds. + log_must $STF_SUITE/tests/functional/cp_files/seekflood 2000 $CPUS +done +cd "$MYPWD" + +log_pass "No corruption detected" diff --git a/tests/zfs-tests/tests/functional/cp_files/seekflood.c b/tests/zfs-tests/tests/functional/cp_files/seekflood.c new file mode 100644 index 0000000000..02c2c8e6ec --- /dev/null +++ b/tests/zfs-tests/tests/functional/cp_files/seekflood.c @@ -0,0 +1,180 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright (c) 2023, Rob Norris + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DATASIZE (4096) +char data[DATASIZE]; + +static int +_open_file(int n, int wr) +{ + char buf[256]; + int fd; + + snprintf(buf, sizeof (buf), "testdata_%d_%d", getpid(), n); + + if ((fd = open(buf, wr ? (O_WRONLY | O_CREAT) : O_RDONLY, + wr ? (S_IRUSR | S_IWUSR) : 0)) < 0) { + fprintf(stderr, "Error: open '%s' (%s): %s\n", + buf, wr ? "write" : "read", strerror(errno)); + exit(1); + } + + return (fd); +} + +static void +_write_file(int n, int fd) +{ + /* write a big ball of stuff */ + ssize_t nwr = write(fd, data, DATASIZE); + if (nwr < 0) { + fprintf(stderr, "Error: write '%d_%d': %s\n", + getpid(), n, strerror(errno)); + exit(1); + } else if (nwr < DATASIZE) { + fprintf(stderr, "Error: write '%d_%d': short write\n", getpid(), + n); + exit(1); + } +} + +static int +_seek_file(int n, int fd) +{ + struct stat st; + if (fstat(fd, &st) < 0) { + fprintf(stderr, "Error: fstat '%d_%d': %s\n", getpid(), n, + strerror(errno)); + exit(1); + } + + /* + * A zero-sized file correctly has no data, so seeking the file is + * pointless. + */ + if (st.st_size == 0) + return (0); + + /* size is real, and we only write, so SEEK_DATA must find something */ + if (lseek(fd, 0, SEEK_DATA) < 0) { + if (errno == ENXIO) + return (1); + fprintf(stderr, "Error: lseek '%d_%d': %s\n", + getpid(), n, strerror(errno)); + exit(2); + } + + return (0); +} + +int +main(int argc, char **argv) +{ + int nfiles = 0; + int nthreads = 0; + + if (argc < 3 || (nfiles = atoi(argv[1])) == 0 || + (nthreads = atoi(argv[2])) == 0) { + printf("usage: seekflood \n"); + exit(1); + } + + memset(data, 0x5a, DATASIZE); + + /* fork off some flood threads */ + for (int i = 0; i < nthreads; i++) { + if (!fork()) { + /* thread main */ + + /* create zero file */ + int fd = _open_file(0, 1); + _write_file(0, fd); + close(fd); + + int count = 0; + + int h = 0, i, j, rfd, wfd; + for (i = 0; i < nfiles; i += 2, h++) { + j = i+1; + + /* seek h, write i */ + rfd = _open_file(h, 0); + wfd = _open_file(i, 1); + count += _seek_file(h, rfd); + _write_file(i, wfd); + close(rfd); + close(wfd); + + /* seek i, write j */ + rfd = _open_file(i, 0); + wfd = _open_file(j, 1); + count += _seek_file(i, rfd); + _write_file(j, wfd); + close(rfd); + close(wfd); + } + + /* return count of failed seeks to parent */ + exit(count < 256 ? count : 255); + } + } + + /* wait for threads, take their seek fail counts from exit code */ + int count = 0, crashed = 0; + for (int i = 0; i < nthreads; i++) { + int wstatus; + wait(&wstatus); + if (WIFEXITED(wstatus)) + count += WEXITSTATUS(wstatus); + else + crashed++; + } + + if (crashed) { + fprintf(stderr, "Error: child crashed; test failed\n"); + exit(1); + } + + if (count) { + fprintf(stderr, "Error: %d seek failures; test failed\n", + count); + exit(1); + } + + exit(0); +} From 59112ca27d94edd793dbfda6ed5d2fc7a97dddaa Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Tue, 7 Nov 2023 09:09:24 -0800 Subject: [PATCH 44/45] zed: misc vdev_enc_sysfs_path fixes There have been rare cases where the VDEV_ENC_SYSFS_PATH value that zed gets passed is stale. To mitigate this, dynamically check the sysfs path at the time of zed event processing, and use the dynamic value if possible. Note that there will be other times when we can not dynamically detect the sysfs path (like if a disk disappears) and have to rely on the old value for things like turning on the fault LED. That is to say, we can't just blindly use the dynamic path in every case. Also: - Add enclosure sysfs entry when running 'zpool add' - Fix 'slot' and 'enc' zpool.d scripts for nvme Reviewed-by: Don Brady Reviewed-by: Brian Behlendorf Signed-off-by: Tony Hutter Closes #15462 --- cmd/zed/agents/zfs_mod.c | 4 +++ cmd/zed/zed_event.c | 31 +++++++++++++++++++++++ cmd/zpool/zpool.d/ses | 12 +++++++-- cmd/zpool/zpool_vdev.c | 4 +++ include/libzutil.h | 2 ++ lib/libzfs/libzfs.abi | 7 +++++ lib/libzutil/os/freebsd/zutil_import_os.c | 9 +++++++ lib/libzutil/os/linux/zutil_import_os.c | 17 ++++++++----- 8 files changed, 78 insertions(+), 8 deletions(-) diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index 9636c99fc8..69163b80bd 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -233,8 +233,12 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) } (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath); + + update_vdev_config_dev_sysfs_path(vdev, path, + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, &enc_sysfs_path); + (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_FAULTED, &faulted); diff --git a/cmd/zed/zed_event.c b/cmd/zed/zed_event.c index c60d5a4bc2..7e58676922 100644 --- a/cmd/zed/zed_event.c +++ b/cmd/zed/zed_event.c @@ -35,6 +35,7 @@ #include "zed_strings.h" #include "agents/zfs_agents.h" +#include #define MAXBUF 4096 @@ -922,6 +923,25 @@ _zed_event_add_time_strings(uint64_t eid, zed_strings_t *zsp, int64_t etime[]) } } + +static void +_zed_event_update_enc_sysfs_path(nvlist_t *nvl) +{ + const char *vdev_path; + + if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH, + &vdev_path) != 0) { + return; /* some other kind of event, ignore it */ + } + + if (vdev_path == NULL) { + return; + } + + update_vdev_config_dev_sysfs_path(nvl, vdev_path, + FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH); +} + /* * Service the next zevent, blocking until one is available. */ @@ -969,6 +989,17 @@ zed_event_service(struct zed_conf *zcp) zed_log_msg(LOG_WARNING, "Failed to lookup zevent class (eid=%llu)", eid); } else { + /* + * Special case: If we can dynamically detect an enclosure sysfs + * path, then use that value rather than the one stored in the + * vd->vdev_enc_sysfs_path. There have been rare cases where + * vd->vdev_enc_sysfs_path becomes outdated. However, there + * will be other times when we can not dynamically detect the + * sysfs path (like if a disk disappears) and have to rely on + * the old value for things like turning on the fault LED. + */ + _zed_event_update_enc_sysfs_path(nvl); + /* let internal modules see this event first */ zfs_agent_post_event(class, NULL, nvl); diff --git a/cmd/zpool/zpool.d/ses b/cmd/zpool/zpool.d/ses index 638145c95d..19ef92ad67 100755 --- a/cmd/zpool/zpool.d/ses +++ b/cmd/zpool/zpool.d/ses @@ -33,10 +33,18 @@ for i in $scripts ; do val="" case $i in enc) - val=$(ls "$VDEV_ENC_SYSFS_PATH/../../" 2>/dev/null) + if echo "$VDEV_ENC_SYSFS_PATH" | grep -q '/sys/bus/pci/slots' ; then + val="$VDEV_ENC_SYSFS_PATH" + else + val="$(ls """$VDEV_ENC_SYSFS_PATH/../../""" 2>/dev/null)" + fi ;; slot) - val=$(cat "$VDEV_ENC_SYSFS_PATH/slot" 2>/dev/null) + if echo "$VDEV_ENC_SYSFS_PATH" | grep -q '/sys/bus/pci/slots' ; then + val="$(basename """$VDEV_ENC_SYSFS_PATH""")" + else + val="$(cat """$VDEV_ENC_SYSFS_PATH/slot""" 2>/dev/null)" + fi ;; encdev) val=$(ls "$VDEV_ENC_SYSFS_PATH/../device/scsi_generic" 2>/dev/null) diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 3d0fc089c3..fbd4b81dfa 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -372,6 +372,10 @@ make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary) verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0); verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0); + /* Lookup and add the enclosure sysfs path (if exists) */ + update_vdev_config_dev_sysfs_path(vdev, path, + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); + if (strcmp(type, VDEV_TYPE_DISK) == 0) verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, (uint64_t)wholedisk) == 0); diff --git a/include/libzutil.h b/include/libzutil.h index 053b1ed4b5..9842c225b6 100644 --- a/include/libzutil.h +++ b/include/libzutil.h @@ -208,6 +208,8 @@ int for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func, int for_each_vdev_in_nvlist(nvlist_t *nvroot, pool_vdev_iter_f func, void *data); void update_vdevs_config_dev_sysfs_path(nvlist_t *config); +_LIBZUTIL_H void update_vdev_config_dev_sysfs_path(nvlist_t *nv, + const char *path, const char *key); #ifdef __cplusplus } #endif diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 2d612a16b2..3c975397ed 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -260,6 +260,7 @@ + @@ -8329,6 +8330,12 @@ + + + + + + diff --git a/lib/libzutil/os/freebsd/zutil_import_os.c b/lib/libzutil/os/freebsd/zutil_import_os.c index 19ba58e79a..a134c173bc 100644 --- a/lib/libzutil/os/freebsd/zutil_import_os.c +++ b/lib/libzutil/os/freebsd/zutil_import_os.c @@ -249,6 +249,15 @@ zfs_dev_flush(int fd) return (0); } +void +update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path, + const char *key) +{ + (void) nv; + (void) path; + (void) key; +} + void update_vdevs_config_dev_sysfs_path(nvlist_t *config) { diff --git a/lib/libzutil/os/linux/zutil_import_os.c b/lib/libzutil/os/linux/zutil_import_os.c index 44ed697dd4..fbfae4f7e6 100644 --- a/lib/libzutil/os/linux/zutil_import_os.c +++ b/lib/libzutil/os/linux/zutil_import_os.c @@ -766,9 +766,12 @@ no_dev: * Rescan the enclosure sysfs path for turning on enclosure LEDs and store it * in the nvlist * (if applicable). Like: * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' + * + * key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH) */ -static void -update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path) +void +update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path, + const char *key) { char *upath, *spath; @@ -777,9 +780,9 @@ update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path) spath = zfs_get_enclosure_sysfs_path(upath); if (spath) { - nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, spath); + (void) nvlist_add_string(nv, key, spath); } else { - nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); + (void) nvlist_remove_all(nv, key); } free(upath); @@ -799,7 +802,8 @@ sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data) return (1); /* Rescan our enclosure sysfs path for this vdev */ - update_vdev_config_dev_sysfs_path(nv, path); + update_vdev_config_dev_sysfs_path(nv, path, + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); return (0); } @@ -888,7 +892,8 @@ update_vdev_config_dev_strs(nvlist_t *nv) (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, vds.vds_devphys); } - update_vdev_config_dev_sysfs_path(nv, path); + update_vdev_config_dev_sysfs_path(nv, path, + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); } else { /* Clear out any stale entries. */ (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); From 69142125d75b7405e0f1cf141dbe7913448daedf Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Thu, 21 Dec 2023 10:53:16 -0800 Subject: [PATCH 45/45] zpool: Add slot power control, print power status Add `zpool` flags to control the slot power to drives. This assumes your SAS or NVMe enclosure supports slot power control via sysfs. The new `--power` flag is added to `zpool offline|online|clear`: zpool offline --power Turn off device slot power zpool online --power Turn on device slot power zpool clear --power [device] Turn on device slot power If the ZPOOL_AUTO_POWER_ON_SLOT env var is set, then the '--power' option is automatically implied for `zpool online` and `zpool clear` and does not need to be passed. zpool status also gets a --power option to print the slot power status. Reviewed-by: Brian Behlendorf Reviewed-by: Mart Frauenlob Signed-off-by: Tony Hutter Closes #15662 --- cmd/zpool/os/freebsd/zpool_vdev_os.c | 14 ++ cmd/zpool/os/linux/zpool_vdev_os.c | 255 ++++++++++++++++++++++++ cmd/zpool/zpool_iter.c | 4 + cmd/zpool/zpool_main.c | 239 +++++++++++++++++++--- cmd/zpool/zpool_util.h | 3 + include/libzfs.h | 3 + include/libzutil.h | 57 ++++++ lib/libzfs/libzfs.abi | 97 +++++++-- lib/libzfs/libzfs_pool.c | 49 ++++- lib/libzutil/os/linux/zutil_import_os.c | 40 +++- lib/libzutil/zutil_import.c | 98 +++++++++ lib/libzutil/zutil_pool.c | 31 +++ man/man8/zpool-clear.8 | 11 + man/man8/zpool-offline.8 | 18 +- man/man8/zpool-status.8 | 2 + man/man8/zpool.8 | 19 +- 16 files changed, 875 insertions(+), 65 deletions(-) diff --git a/cmd/zpool/os/freebsd/zpool_vdev_os.c b/cmd/zpool/os/freebsd/zpool_vdev_os.c index 231ca97f1f..9dd733989e 100644 --- a/cmd/zpool/os/freebsd/zpool_vdev_os.c +++ b/cmd/zpool/os/freebsd/zpool_vdev_os.c @@ -124,3 +124,17 @@ check_file(const char *file, boolean_t force, boolean_t isspare) { return (check_file_generic(file, force, isspare)); } + +int +zpool_power_current_state(zpool_handle_t *zhp, char *vdev) +{ + /* Enclosure slot power not supported on FreeBSD yet */ + return (-1); +} + +int +zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on) +{ + /* Enclosure slot power not supported on FreeBSD yet */ + return (ENOTSUP); +} diff --git a/cmd/zpool/os/linux/zpool_vdev_os.c b/cmd/zpool/os/linux/zpool_vdev_os.c index 7f4486e062..006a3a7d8e 100644 --- a/cmd/zpool/os/linux/zpool_vdev_os.c +++ b/cmd/zpool/os/linux/zpool_vdev_os.c @@ -416,3 +416,258 @@ check_file(const char *file, boolean_t force, boolean_t isspare) { return (check_file_generic(file, force, isspare)); } + +/* + * Read from a sysfs file and return an allocated string. Removes + * the newline from the end of the string if there is one. + * + * Returns a string on success (which must be freed), or NULL on error. + */ +static char *zpool_sysfs_gets(char *path) +{ + int fd; + struct stat statbuf; + char *buf = NULL; + ssize_t count = 0; + fd = open(path, O_RDONLY); + if (fd < 0) + return (NULL); + + if (fstat(fd, &statbuf) != 0) { + close(fd); + return (NULL); + } + + buf = calloc(sizeof (*buf), statbuf.st_size + 1); + if (buf == NULL) { + close(fd); + return (NULL); + } + + /* + * Note, we can read less bytes than st_size, and that's ok. Sysfs + * files will report their size is 4k even if they only return a small + * string. + */ + count = read(fd, buf, statbuf.st_size); + if (count < 0) { + /* Error doing read() or we overran the buffer */ + close(fd); + free(buf); + return (NULL); + } + + /* Remove trailing newline */ + if (buf[count - 1] == '\n') + buf[count - 1] = 0; + + close(fd); + + return (buf); +} + +/* + * Write a string to a sysfs file. + * + * Returns 0 on success, non-zero otherwise. + */ +static int zpool_sysfs_puts(char *path, char *str) +{ + FILE *file; + + file = fopen(path, "w"); + if (!file) { + return (-1); + } + + if (fputs(str, file) < 0) { + fclose(file); + return (-2); + } + fclose(file); + return (0); +} + +/* Given a vdev nvlist_t, rescan its enclosure sysfs path */ +static void +rescan_vdev_config_dev_sysfs_path(nvlist_t *vdev_nv) +{ + update_vdev_config_dev_sysfs_path(vdev_nv, + fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH), + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); +} + +/* + * Given a power string: "on", "off", "1", or "0", return 0 if it's an + * off value, 1 if it's an on value, and -1 if the value is unrecognized. + */ +static int zpool_power_parse_value(char *str) +{ + if ((strcmp(str, "off") == 0) || (strcmp(str, "0") == 0)) + return (0); + + if ((strcmp(str, "on") == 0) || (strcmp(str, "1") == 0)) + return (1); + + return (-1); +} + +/* + * Given a vdev string return an allocated string containing the sysfs path to + * its power control file. Also do a check if the power control file really + * exists and has correct permissions. + * + * Example returned strings: + * + * /sys/class/enclosure/0:0:122:0/10/power_status + * /sys/bus/pci/slots/10/power + * + * Returns allocated string on success (which must be freed), NULL on failure. + */ +static char * +zpool_power_sysfs_path(zpool_handle_t *zhp, char *vdev) +{ + const char *enc_sysfs_dir = NULL; + char *path = NULL; + nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL); + + if (vdev_nv == NULL) { + return (NULL); + } + + /* Make sure we're getting the updated enclosure sysfs path */ + rescan_vdev_config_dev_sysfs_path(vdev_nv); + + if (nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, + &enc_sysfs_dir) != 0) { + return (NULL); + } + + if (asprintf(&path, "%s/power_status", enc_sysfs_dir) == -1) + return (NULL); + + if (access(path, W_OK) != 0) { + free(path); + path = NULL; + /* No HDD 'power_control' file, maybe it's NVMe? */ + if (asprintf(&path, "%s/power", enc_sysfs_dir) == -1) { + return (NULL); + } + + if (access(path, R_OK | W_OK) != 0) { + /* Not NVMe either */ + free(path); + return (NULL); + } + } + + return (path); +} + +/* + * Given a path to a sysfs power control file, return B_TRUE if you should use + * "on/off" words to control it, or B_FALSE otherwise ("0/1" to control). + */ +static boolean_t +zpool_power_use_word(char *sysfs_path) +{ + if (strcmp(&sysfs_path[strlen(sysfs_path) - strlen("power_status")], + "power_status") == 0) { + return (B_TRUE); + } + return (B_FALSE); +} + +/* + * Check the sysfs power control value for a vdev. + * + * Returns: + * 0 - Power is off + * 1 - Power is on + * -1 - Error or unsupported + */ +int +zpool_power_current_state(zpool_handle_t *zhp, char *vdev) +{ + char *val; + int rc; + + char *path = zpool_power_sysfs_path(zhp, vdev); + if (path == NULL) + return (-1); + + val = zpool_sysfs_gets(path); + if (val == NULL) { + free(path); + return (-1); + } + + rc = zpool_power_parse_value(val); + free(val); + free(path); + return (rc); +} + +/* + * Turn on or off the slot to a device + * + * Device path is the full path to the device (like /dev/sda or /dev/sda1). + * + * Return code: + * 0: Success + * ENOTSUP: Power control not supported for OS + * EBADSLT: Couldn't read current power state + * ENOENT: No sysfs path to power control + * EIO: Couldn't write sysfs power value + * EBADE: Sysfs power value didn't change + */ +int +zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on) +{ + char *sysfs_path; + const char *val; + int rc; + int timeout_ms; + + rc = zpool_power_current_state(zhp, vdev); + if (rc == -1) { + return (EBADSLT); + } + + /* Already correct value? */ + if (rc == (int)turn_on) + return (0); + + sysfs_path = zpool_power_sysfs_path(zhp, vdev); + if (sysfs_path == NULL) + return (ENOENT); + + if (zpool_power_use_word(sysfs_path)) { + val = turn_on ? "on" : "off"; + } else { + val = turn_on ? "1" : "0"; + } + + rc = zpool_sysfs_puts(sysfs_path, (char *)val); + + free(sysfs_path); + if (rc != 0) { + return (EIO); + } + + /* + * Wait up to 30 seconds for sysfs power value to change after + * writing it. + */ + timeout_ms = zpool_getenv_int("ZPOOL_POWER_ON_SLOT_TIMEOUT_MS", 30000); + for (int i = 0; i < MAX(1, timeout_ms / 200); i++) { + rc = zpool_power_current_state(zhp, vdev); + if (rc == (int)turn_on) + return (0); /* success */ + + fsleep(0.200); /* 200ms */ + } + + /* sysfs value never changed */ + return (EBADE); +} diff --git a/cmd/zpool/zpool_iter.c b/cmd/zpool/zpool_iter.c index 506b529dce..ae2e9da910 100644 --- a/cmd/zpool/zpool_iter.c +++ b/cmd/zpool/zpool_iter.c @@ -554,6 +554,10 @@ for_each_vdev_run_cb(void *zhp_data, nvlist_t *nv, void *cb_vcdl) if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) return (1); + /* Make sure we're getting the updated enclosure sysfs path */ + update_vdev_config_dev_sysfs_path(nv, path, + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); + nvlist_lookup_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, &vdev_enc_sysfs_path); diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 5f96dc8d00..6687a44644 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -353,7 +353,7 @@ get_usage(zpool_help_t idx) return (gettext("\tattach [-fsw] [-o property=value] " " \n")); case HELP_CLEAR: - return (gettext("\tclear [-nF] [device]\n")); + return (gettext("\tclear [[--power]|[-nF]] [device]\n")); case HELP_CREATE: return (gettext("\tcreate [-fnd] [-o property=value] ... \n" "\t [-O file-system-property=value] ... \n" @@ -389,9 +389,11 @@ get_usage(zpool_help_t idx) "[-T d|u] [pool] ... \n" "\t [interval [count]]\n")); case HELP_OFFLINE: - return (gettext("\toffline [-f] [-t] ...\n")); + return (gettext("\toffline [--power]|[[-f][-t]] " + " ...\n")); case HELP_ONLINE: - return (gettext("\tonline [-e] ...\n")); + return (gettext("\tonline [--power][-e] " + "...\n")); case HELP_REPLACE: return (gettext("\treplace [-fsw] [-o property=value] " " [new-device]\n")); @@ -410,7 +412,7 @@ get_usage(zpool_help_t idx) return (gettext("\ttrim [-dw] [-r ] [-c | -s] " "[ ...]\n")); case HELP_STATUS: - return (gettext("\tstatus [-c [script1,script2,...]] " + return (gettext("\tstatus [--power] [-c [script1,script2,...]] " "[-igLpPstvxD] [-T d|u] [pool] ... \n" "\t [interval [count]]\n")); case HELP_UPGRADE: @@ -516,6 +518,77 @@ print_vdev_prop_cb(int prop, void *cb) return (ZPROP_CONT); } +/* + * Given a leaf vdev name like 'L5' return its VDEV_CONFIG_PATH like + * '/dev/disk/by-vdev/L5'. + */ +static const char * +vdev_name_to_path(zpool_handle_t *zhp, char *vdev) +{ + nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL); + if (vdev_nv == NULL) { + return (NULL); + } + return (fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH)); +} + +static int +zpool_power_on(zpool_handle_t *zhp, char *vdev) +{ + return (zpool_power(zhp, vdev, B_TRUE)); +} + +static int +zpool_power_on_and_disk_wait(zpool_handle_t *zhp, char *vdev) +{ + int rc; + + rc = zpool_power_on(zhp, vdev); + if (rc != 0) + return (rc); + + zpool_disk_wait(vdev_name_to_path(zhp, vdev)); + + return (0); +} + +static int +zpool_power_on_pool_and_wait_for_devices(zpool_handle_t *zhp) +{ + nvlist_t *nv; + const char *path = NULL; + int rc; + + /* Power up all the devices first */ + FOR_EACH_REAL_LEAF_VDEV(zhp, nv) { + path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH); + if (path != NULL) { + rc = zpool_power_on(zhp, (char *)path); + if (rc != 0) { + return (rc); + } + } + } + + /* + * Wait for their devices to show up. Since we powered them on + * at roughly the same time, they should all come online around + * the same time. + */ + FOR_EACH_REAL_LEAF_VDEV(zhp, nv) { + path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH); + zpool_disk_wait(path); + } + + return (0); +} + +static int +zpool_power_off(zpool_handle_t *zhp, char *vdev) +{ + return (zpool_power(zhp, vdev, B_FALSE)); +} + /* * Display usage message. If we're inside a command, display only the usage for * that command. Otherwise, iterate over the entire command table and display @@ -2093,6 +2166,7 @@ typedef struct status_cbdata { boolean_t cb_print_vdev_init; boolean_t cb_print_vdev_trim; vdev_cmd_data_list_t *vcdl; + boolean_t cb_print_power; } status_cbdata_t; /* Return 1 if string is NULL, empty, or whitespace; return 0 otherwise. */ @@ -2378,6 +2452,26 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, else printf(" %5s", rbuf); } + if (cb->cb_print_power) { + if (children == 0) { + /* Only leaf vdevs have physical slots */ + switch (zpool_power_current_state(zhp, (char *) + fnvlist_lookup_string(nv, + ZPOOL_CONFIG_PATH))) { + case 0: + printf_color(ANSI_RED, " %5s", + gettext("off")); + break; + case 1: + printf(" %5s", gettext("on")); + break; + default: + printf(" %5s", "-"); + } + } else { + printf(" %5s", "-"); + } + } } if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, @@ -5428,19 +5522,6 @@ get_interval_count_filter_guids(int *argc, char **argv, float *interval, interval, count); } -/* - * Floating point sleep(). Allows you to pass in a floating point value for - * seconds. - */ -static void -fsleep(float sec) -{ - struct timespec req; - req.tv_sec = floor(sec); - req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC; - nanosleep(&req, NULL); -} - /* * Terminal height, in rows. Returns -1 if stdout is not connected to a TTY or * if we were unable to determine its size. @@ -6939,10 +7020,12 @@ zpool_do_split(int argc, char **argv) return (ret); } - +#define POWER_OPT 1024 /* - * zpool online ... + * zpool online [--power] ... + * + * --power: Power on the enclosure slot to the drive (if possible) */ int zpool_do_online(int argc, char **argv) @@ -6953,13 +7036,21 @@ zpool_do_online(int argc, char **argv) int ret = 0; vdev_state_t newstate; int flags = 0; + boolean_t is_power_on = B_FALSE; + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; /* check options */ - while ((c = getopt(argc, argv, "e")) != -1) { + while ((c = getopt_long(argc, argv, "e", long_options, NULL)) != -1) { switch (c) { case 'e': flags |= ZFS_ONLINE_EXPAND; break; + case POWER_OPT: + is_power_on = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -6967,6 +7058,9 @@ zpool_do_online(int argc, char **argv) } } + if (libzfs_envvar_is_set("ZPOOL_AUTO_POWER_ON_SLOT")) + is_power_on = B_TRUE; + argc -= optind; argv += optind; @@ -6988,6 +7082,18 @@ zpool_do_online(int argc, char **argv) for (i = 1; i < argc; i++) { vdev_state_t oldstate; boolean_t avail_spare, l2cache; + int rc; + + if (is_power_on) { + rc = zpool_power_on_and_disk_wait(zhp, argv[i]); + if (rc == ENOTSUP) { + (void) fprintf(stderr, + gettext("Power control not supported\n")); + } + if (rc != 0) + return (rc); + } + nvlist_t *tgt = zpool_find_vdev(zhp, argv[i], &avail_spare, &l2cache, NULL); if (tgt == NULL) { @@ -7033,12 +7139,15 @@ zpool_do_online(int argc, char **argv) } /* - * zpool offline [-ft] ... + * zpool offline [-ft]|[--power] ... + * * * -f Force the device into a faulted state. * * -t Only take the device off-line temporarily. The offline/faulted * state will not be persistent across reboots. + * + * --power Power off the enclosure slot to the drive (if possible) */ int zpool_do_offline(int argc, char **argv) @@ -7049,9 +7158,15 @@ zpool_do_offline(int argc, char **argv) int ret = 0; boolean_t istmp = B_FALSE; boolean_t fault = B_FALSE; + boolean_t is_power_off = B_FALSE; + + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; /* check options */ - while ((c = getopt(argc, argv, "ft")) != -1) { + while ((c = getopt_long(argc, argv, "ft", long_options, NULL)) != -1) { switch (c) { case 'f': fault = B_TRUE; @@ -7059,6 +7174,9 @@ zpool_do_offline(int argc, char **argv) case 't': istmp = B_TRUE; break; + case POWER_OPT: + is_power_off = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -7066,6 +7184,20 @@ zpool_do_offline(int argc, char **argv) } } + if (is_power_off && fault) { + (void) fprintf(stderr, + gettext("-0 and -f cannot be used together\n")); + usage(B_FALSE); + return (1); + } + + if (is_power_off && istmp) { + (void) fprintf(stderr, + gettext("-0 and -t cannot be used together\n")); + usage(B_FALSE); + return (1); + } + argc -= optind; argv += optind; @@ -7085,8 +7217,22 @@ zpool_do_offline(int argc, char **argv) return (1); for (i = 1; i < argc; i++) { - if (fault) { - uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]); + uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]); + if (is_power_off) { + /* + * Note: we have to power off first, then set REMOVED, + * or else zpool_vdev_set_removed_state() returns + * EAGAIN. + */ + ret = zpool_power_off(zhp, argv[i]); + if (ret != 0) { + (void) fprintf(stderr, "%s %s %d\n", + gettext("unable to power off slot for"), + argv[i], ret); + } + zpool_vdev_set_removed_state(zhp, guid, VDEV_AUX_NONE); + + } else if (fault) { vdev_aux_t aux; if (istmp == B_FALSE) { /* Force the fault to persist across imports */ @@ -7109,7 +7255,7 @@ zpool_do_offline(int argc, char **argv) } /* - * zpool clear [device] + * zpool clear [-nF]|[--power] [device] * * Clear all errors associated with a pool or a particular device. */ @@ -7121,13 +7267,20 @@ zpool_do_clear(int argc, char **argv) boolean_t dryrun = B_FALSE; boolean_t do_rewind = B_FALSE; boolean_t xtreme_rewind = B_FALSE; + boolean_t is_power_on = B_FALSE; uint32_t rewind_policy = ZPOOL_NO_REWIND; nvlist_t *policy = NULL; zpool_handle_t *zhp; char *pool, *device; + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; + /* check options */ - while ((c = getopt(argc, argv, "FnX")) != -1) { + while ((c = getopt_long(argc, argv, "FnX", long_options, + NULL)) != -1) { switch (c) { case 'F': do_rewind = B_TRUE; @@ -7138,6 +7291,9 @@ zpool_do_clear(int argc, char **argv) case 'X': xtreme_rewind = B_TRUE; break; + case POWER_OPT: + is_power_on = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -7145,6 +7301,9 @@ zpool_do_clear(int argc, char **argv) } } + if (libzfs_envvar_is_set("ZPOOL_AUTO_POWER_ON_SLOT")) + is_power_on = B_TRUE; + argc -= optind; argv += optind; @@ -7185,6 +7344,14 @@ zpool_do_clear(int argc, char **argv) return (1); } + if (is_power_on) { + if (device == NULL) { + zpool_power_on_pool_and_wait_for_devices(zhp); + } else { + zpool_power_on_and_disk_wait(zhp, device); + } + } + if (zpool_clear(zhp, device, policy) != 0) ret = 1; @@ -8801,6 +8968,10 @@ status_callback(zpool_handle_t *zhp, void *data) printf_color(ANSI_BOLD, " %5s", gettext("SLOW")); } + if (cbp->cb_print_power) { + printf_color(ANSI_BOLD, " %5s", gettext("POWER")); + } + if (cbp->vcdl != NULL) print_cmd_columns(cbp->vcdl, 0); @@ -8847,8 +9018,8 @@ status_callback(zpool_handle_t *zhp, void *data) } /* - * zpool status [-c [script1,script2,...]] [-igLpPstvx] [-T d|u] [pool] ... - * [interval [count]] + * zpool status [-c [script1,script2,...]] [-igLpPstvx] [--power] [-T d|u] ... + * [pool] [interval [count]] * * -c CMD For each vdev, run command CMD * -i Display vdev initialization status. @@ -8862,6 +9033,7 @@ status_callback(zpool_handle_t *zhp, void *data) * -D Display dedup status (undocumented) * -t Display vdev TRIM status. * -T Display a timestamp in date(1) or Unix format + * --power Display vdev enclosure slot power status * * Describes the health status of all pools or some subset. */ @@ -8875,8 +9047,14 @@ zpool_do_status(int argc, char **argv) status_cbdata_t cb = { 0 }; char *cmd = NULL; + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; + /* check options */ - while ((c = getopt(argc, argv, "c:igLpPsvxDtT:")) != -1) { + while ((c = getopt_long(argc, argv, "c:igLpPsvxDtT:", long_options, + NULL)) != -1) { switch (c) { case 'c': if (cmd != NULL) { @@ -8935,6 +9113,9 @@ zpool_do_status(int argc, char **argv) case 'T': get_timestamp_arg(*optarg); break; + case POWER_OPT: + cb.cb_print_power = B_TRUE; + break; case '?': if (optopt == 'c') { print_zpool_script_list("status"); diff --git a/cmd/zpool/zpool_util.h b/cmd/zpool/zpool_util.h index db8e631dc6..7f5406f063 100644 --- a/cmd/zpool/zpool_util.h +++ b/cmd/zpool/zpool_util.h @@ -138,6 +138,9 @@ int check_file(const char *file, boolean_t force, boolean_t isspare); void after_zpool_upgrade(zpool_handle_t *zhp); int check_file_generic(const char *file, boolean_t force, boolean_t isspare); +int zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on); +int zpool_power_current_state(zpool_handle_t *zhp, char *vdev); + #ifdef __cplusplus } #endif diff --git a/include/libzfs.h b/include/libzfs.h index 4adfa38e87..770c5e1f20 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -318,6 +318,9 @@ _LIBZFS_H int zpool_vdev_remove_wanted(zpool_handle_t *, const char *); _LIBZFS_H int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t); _LIBZFS_H int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t); +_LIBZFS_H int zpool_vdev_set_removed_state(zpool_handle_t *, uint64_t, + vdev_aux_t); + _LIBZFS_H int zpool_vdev_clear(zpool_handle_t *, uint64_t); _LIBZFS_H nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, diff --git a/include/libzutil.h b/include/libzutil.h index 9842c225b6..839486fb62 100644 --- a/include/libzutil.h +++ b/include/libzutil.h @@ -97,6 +97,7 @@ _LIBZUTIL_H int zpool_find_config(libpc_handle_t *, const char *, nvlist_t **, _LIBZUTIL_H const char * const * zpool_default_search_paths(size_t *count); _LIBZUTIL_H int zpool_read_label(int, nvlist_t **, int *); _LIBZUTIL_H int zpool_label_disk_wait(const char *, int); +_LIBZUTIL_H int zpool_disk_wait(const char *); struct udev_device; @@ -163,6 +164,8 @@ _LIBZUTIL_H void zfs_niceraw(uint64_t, char *, size_t); _LIBZUTIL_H void zpool_dump_ddt(const ddt_stat_t *, const ddt_histogram_t *); _LIBZUTIL_H int zpool_history_unpack(char *, uint64_t, uint64_t *, nvlist_t ***, uint_t *); +_LIBZUTIL_H void fsleep(float sec); +_LIBZUTIL_H int zpool_getenv_int(const char *env, int default_val); struct zfs_cmd; @@ -205,6 +208,60 @@ _LIBZUTIL_H void zfs_setproctitle(const char *fmt, ...); typedef int (*pool_vdev_iter_f)(void *, nvlist_t *, void *); int for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func, void *data); +int for_each_vdev_macro_helper_func(void *zhp_data, nvlist_t *nv, void *data); +int for_each_real_leaf_vdev_macro_helper_func(void *zhp_data, nvlist_t *nv, + void *data); +/* + * Often you'll want to iterate over all the vdevs in the pool, but don't want + * to use for_each_vdev() since it requires a callback function. + * + * Instead you can use FOR_EACH_VDEV(): + * + * zpool_handle_t *zhp // Assume this is initialized + * nvlist_t *nv + * ... + * FOR_EACH_VDEV(zhp, nv) { + * const char *path = NULL; + * nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path); + * printf("Looking at vdev %s\n", path); + * } + * + * Note: FOR_EACH_VDEV runs in O(n^2) time where n = number of vdevs. However, + * there's an upper limit of 256 vdevs per dRAID top-level vdevs (TLDs), 255 for + * raidz2 TLDs, a real world limit of ~500 vdevs for mirrors, so this shouldn't + * really be an issue. + * + * Here are some micro-benchmarks of a complete FOR_EACH_VDEV loop on a RAID0 + * pool: + * + * 100 vdevs = 0.7ms + * 500 vdevs = 17ms + * 750 vdevs = 40ms + * 1000 vdevs = 82ms + * + * The '__nv += 0' at the end of the for() loop gets around a "comma or + * semicolon followed by non-blank" checkstyle error. Note on most compliers + * the '__nv += 0' can just be replaced with 'NULL', but gcc on Centos 7 + * will give a 'warning: statement with no effect' error if you do that. + */ +#define __FOR_EACH_VDEV(__zhp, __nv, __func) { \ + __nv = zpool_get_config(__zhp, NULL); \ + VERIFY0(nvlist_lookup_nvlist(__nv, ZPOOL_CONFIG_VDEV_TREE, &__nv)); \ + } \ + for (nvlist_t *__root_nv = __nv, *__state = (nvlist_t *)0; \ + for_each_vdev_cb(&__state, __root_nv, __func, &__nv) == 1; \ + __nv += 0) + +#define FOR_EACH_VDEV(__zhp, __nv) \ + __FOR_EACH_VDEV(__zhp, __nv, for_each_vdev_macro_helper_func) + +/* + * "real leaf" vdevs are leaf vdevs that are real devices (disks or files). + * This excludes leaf vdevs like like draid spares. + */ +#define FOR_EACH_REAL_LEAF_VDEV(__zhp, __nv) \ + __FOR_EACH_VDEV(__zhp, __nv, for_each_real_leaf_vdev_macro_helper_func) + int for_each_vdev_in_nvlist(nvlist_t *nvroot, pool_vdev_iter_f func, void *data); void update_vdevs_config_dev_sysfs_path(nvlist_t *config); diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 3c975397ed..9bb8f6a47d 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -179,6 +179,7 @@ + @@ -466,6 +467,7 @@ + @@ -497,6 +499,7 @@ + @@ -567,6 +570,7 @@ + @@ -1402,8 +1406,6 @@ - - @@ -6355,6 +6357,12 @@ + + + + + + @@ -7588,6 +7596,12 @@ + + + + + + @@ -7597,6 +7611,10 @@ + + + + @@ -7714,6 +7732,11 @@ + + + + + @@ -7881,6 +7904,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -8070,12 +8124,6 @@ - - - - - - @@ -8102,11 +8150,6 @@ - - - - - @@ -8191,10 +8234,6 @@ - - - - @@ -8330,6 +8369,10 @@ + + + + @@ -8355,6 +8398,9 @@ + + + @@ -8628,6 +8674,7 @@ + @@ -8665,11 +8712,27 @@ + + + + + + + + + + + + + + + + diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 4ebd112f45..2f9ccbc2ab 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3036,6 +3036,9 @@ zpool_vdev_is_interior(const char *name) return (B_FALSE); } +/* + * Lookup the nvlist for a given vdev. + */ nvlist_t * zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) @@ -3043,6 +3046,7 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, char *end; nvlist_t *nvroot, *search, *ret; uint64_t guid; + boolean_t __avail_spare, __l2cache, __log; search = fnvlist_alloc(); @@ -3058,6 +3062,18 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, nvroot = fnvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE); + /* + * User can pass NULL for avail_spare, l2cache, and log, but + * we still need to provide variables to vdev_to_nvlist_iter(), so + * just point them to junk variables here. + */ + if (!avail_spare) + avail_spare = &__avail_spare; + if (!l2cache) + l2cache = &__l2cache; + if (!log) + log = &__log; + *avail_spare = B_FALSE; *l2cache = B_FALSE; if (log != NULL) @@ -3313,21 +3329,23 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) } /* - * Mark the given vdev degraded. + * Generic set vdev state function */ -int -zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) +static int +zpool_vdev_set_state(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux, + vdev_state_t state) { zfs_cmd_t zc = {"\0"}; char errbuf[ERRBUFLEN]; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid); + dgettext(TEXT_DOMAIN, "cannot set %s %llu"), + zpool_state_to_name(state, aux), (u_longlong_t)guid); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_guid = guid; - zc.zc_cookie = VDEV_STATE_DEGRADED; + zc.zc_cookie = state; zc.zc_obj = aux; if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) @@ -3336,6 +3354,27 @@ zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) return (zpool_standard_error(hdl, errno, errbuf)); } +/* + * Mark the given vdev degraded. + */ +int +zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) +{ + return (zpool_vdev_set_state(zhp, guid, aux, VDEV_STATE_DEGRADED)); +} + +/* + * Mark the given vdev as in a removed state (as if the device does not exist). + * + * This is different than zpool_vdev_remove() which does a removal of a device + * from the pool (but the device does exist). + */ +int +zpool_vdev_set_removed_state(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) +{ + return (zpool_vdev_set_state(zhp, guid, aux, VDEV_STATE_REMOVED)); +} + /* * Returns TRUE if the given nvlist is a vdev that was originally swapped in as * a hot spare. diff --git a/lib/libzutil/os/linux/zutil_import_os.c b/lib/libzutil/os/linux/zutil_import_os.c index fbfae4f7e6..bb91dec5ac 100644 --- a/lib/libzutil/os/linux/zutil_import_os.c +++ b/lib/libzutil/os/linux/zutil_import_os.c @@ -170,25 +170,17 @@ zpool_open_func(void *arg) if (rn->rn_labelpaths) { const char *path = NULL; const char *devid = NULL; - const char *env = NULL; rdsk_node_t *slice; avl_index_t where; - int timeout; int error; if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid)) return; - env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS"); - if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 || - timeout < 0) { - timeout = DISK_LABEL_WAIT; - } - /* * Allow devlinks to stabilize so all paths are available. */ - zpool_label_disk_wait(rn->rn_name, timeout); + zpool_disk_wait(rn->rn_name); if (path != NULL) { slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); @@ -682,6 +674,20 @@ zpool_label_disk_wait(const char *path, int timeout_ms) #endif /* HAVE_LIBUDEV */ } +/* + * Simplified version of zpool_label_disk_wait() where we wait for a device + * to appear using the default timeouts. + */ +int +zpool_disk_wait(const char *path) +{ + int timeout; + timeout = zpool_getenv_int("ZPOOL_IMPORT_UDEV_TIMEOUT_MS", + DISK_LABEL_WAIT); + + return (zpool_label_disk_wait(path, timeout)); +} + /* * Encode the persistent devices strings * used for the vdev disk label @@ -767,6 +773,10 @@ no_dev: * in the nvlist * (if applicable). Like: * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' * + * If an old path was in the nvlist, and the rescan can not find a new path, + * then keep the old path, since the disk may have been removed. + * + * path: The vdev path (value from ZPOOL_CONFIG_PATH) * key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH) */ void @@ -774,6 +784,9 @@ update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path, const char *key) { char *upath, *spath; + const char *oldpath = NULL; + + (void) nvlist_lookup_string(nv, key, &oldpath); /* Add enclosure sysfs path (if disk is in an enclosure). */ upath = zfs_get_underlying_path(path); @@ -782,7 +795,14 @@ update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path, if (spath) { (void) nvlist_add_string(nv, key, spath); } else { - (void) nvlist_remove_all(nv, key); + /* + * We couldn't dynamically scan the disk's enclosure sysfs path. + * This could be because the disk went away. If there's an old + * enclosure sysfs path in the nvlist, then keep using it. + */ + if (!oldpath) { + (void) nvlist_remove_all(nv, key); + } } free(upath); diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index f7ef69a1d9..eb91311904 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1922,6 +1922,104 @@ zpool_find_config(libpc_handle_t *hdl, const char *target, nvlist_t **configp, return (0); } +/* Return if a vdev is a leaf vdev. Note: draid spares are leaf vdevs. */ +static boolean_t +vdev_is_leaf(nvlist_t *nv) +{ + uint_t children = 0; + nvlist_t **child; + + (void) nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children); + + return (children == 0); +} + +/* Return if a vdev is a leaf vdev and a real device (disk or file) */ +static boolean_t +vdev_is_real_leaf(nvlist_t *nv) +{ + const char *type = NULL; + if (!vdev_is_leaf(nv)) + return (B_FALSE); + + (void) nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type); + if ((strcmp(type, VDEV_TYPE_DISK) == 0) || + (strcmp(type, VDEV_TYPE_FILE) == 0)) { + return (B_TRUE); + } + + return (B_FALSE); +} + +/* + * This function is called by our FOR_EACH_VDEV() macros. + * + * state: State machine status (stored inside of a (nvlist_t *)) + * nv: The current vdev nvlist_t we are iterating over. + * last_nv: The previous vdev nvlist_t we returned to the user in + * the last iteration of FOR_EACH_VDEV(). We use it + * to find the next vdev nvlist_t we should return. + * real_leaves_only: Only return leaf vdevs. + * + * Returns 1 if we found the next vdev nvlist_t for this iteration. 0 if + * we're still searching for it. + */ +static int +__for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv, + boolean_t real_leaves_only) +{ + enum {FIRST_NV = 0, NEXT_IS_MATCH = 1, STOP_LOOKING = 2}; + + /* The very first entry in the NV list is a special case */ + if (*((nvlist_t **)state) == (nvlist_t *)FIRST_NV) { + if (real_leaves_only && !vdev_is_real_leaf(nv)) + return (0); + + *((nvlist_t **)last_nv) = nv; + *((nvlist_t **)state) = (nvlist_t *)STOP_LOOKING; + return (1); + } + + /* + * We came across our last_nv, meaning the next one is the one we + * want + */ + if (nv == *((nvlist_t **)last_nv)) { + /* Next iteration of this function will return the nvlist_t */ + *((nvlist_t **)state) = (nvlist_t *)NEXT_IS_MATCH; + return (0); + } + + /* + * We marked NEXT_IS_MATCH on the previous iteration, so this is the one + * we want. + */ + if (*(nvlist_t **)state == (nvlist_t *)NEXT_IS_MATCH) { + if (real_leaves_only && !vdev_is_real_leaf(nv)) + return (0); + + *((nvlist_t **)last_nv) = nv; + *((nvlist_t **)state) = (nvlist_t *)STOP_LOOKING; + return (1); + } + + return (0); +} + +int +for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv) +{ + return (__for_each_vdev_macro_helper_func(state, nv, last_nv, B_FALSE)); +} + +int +for_each_real_leaf_vdev_macro_helper_func(void *state, nvlist_t *nv, + void *last_nv) +{ + return (__for_each_vdev_macro_helper_func(state, nv, last_nv, B_TRUE)); +} + /* * Internal function for iterating over the vdevs. * diff --git a/lib/libzutil/zutil_pool.c b/lib/libzutil/zutil_pool.c index 288a0033cd..86460de3fc 100644 --- a/lib/libzutil/zutil_pool.c +++ b/lib/libzutil/zutil_pool.c @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -144,3 +145,33 @@ zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover, *leftover = bytes_read; return (0); } + +/* + * Floating point sleep(). Allows you to pass in a floating point value for + * seconds. + */ +void +fsleep(float sec) +{ + struct timespec req; + req.tv_sec = floor(sec); + req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC; + nanosleep(&req, NULL); +} + +/* + * Get environment variable 'env' and return it as an integer. + * If 'env' is not set, then return 'default_val' instead. + */ +int +zpool_getenv_int(const char *env, int default_val) +{ + char *str; + int val; + str = getenv(env); + if ((str == NULL) || sscanf(str, "%d", &val) != 1 || + val < 0) { + val = default_val; + } + return (val); +} diff --git a/man/man8/zpool-clear.8 b/man/man8/zpool-clear.8 index 7b9d40c74e..c61ecae483 100644 --- a/man/man8/zpool-clear.8 +++ b/man/man8/zpool-clear.8 @@ -36,6 +36,7 @@ .Sh SYNOPSIS .Nm zpool .Cm clear +.Op Fl -power .Ar pool .Oo Ar device Oc Ns … . @@ -52,6 +53,16 @@ Pools with enabled which have been suspended cannot be resumed. While the pool was suspended, it may have been imported on another host, and resuming I/O could result in pool damage. +.Bl -tag -width Ds +.It Fl -power +Power on the devices's slot in the storage enclosure and wait for the device +to show up before attempting to clear errors. +This is done on all the devices specified. +Alternatively, you can set the +.Sy ZPOOL_AUTO_POWER_ON_SLOT +environment variable to always enable this behavior. +Note: This flag currently works on Linux only. +.El . .Sh SEE ALSO .Xr zdb 8 , diff --git a/man/man8/zpool-offline.8 b/man/man8/zpool-offline.8 index edcf1d06ab..1b6095d637 100644 --- a/man/man8/zpool-offline.8 +++ b/man/man8/zpool-offline.8 @@ -36,12 +36,13 @@ .Sh SYNOPSIS .Nm zpool .Cm offline -.Op Fl ft +.Op Fl Sy -power Ns | Ns Op Fl Sy ft .Ar pool .Ar device Ns … .Nm zpool .Cm online -.Op Fl e +.Op Fl Sy -power +.Op Fl Sy e .Ar pool .Ar device Ns … . @@ -50,7 +51,7 @@ .It Xo .Nm zpool .Cm offline -.Op Fl ft +.Op Fl Sy -power Ns | Ns Op Fl Sy ft .Ar pool .Ar device Ns … .Xc @@ -60,6 +61,9 @@ While the is offline, no attempt is made to read or write to the device. This command is not applicable to spares. .Bl -tag -width Ds +.It Fl -power +Power off the device's slot in the storage enclosure. +This flag currently works on Linux only .It Fl f Force fault. Instead of offlining the disk, put it into a faulted state. @@ -73,6 +77,7 @@ Upon reboot, the specified physical device reverts to its previous state. .It Xo .Nm zpool .Cm online +.Op Fl -power .Op Fl e .Ar pool .Ar device Ns … @@ -80,6 +85,13 @@ Upon reboot, the specified physical device reverts to its previous state. Brings the specified physical device online. This command is not applicable to spares. .Bl -tag -width Ds +.It Fl -power +Power on the device's slot in the storage enclosure and wait for the device +to show up before attempting to online it. +Alternatively, you can set the +.Sy ZPOOL_AUTO_POWER_ON_SLOT +environment variable to always enable this behavior. +This flag currently works on Linux only .It Fl e Expand the device to use all available space. If the device is part of a mirror or raidz then all devices must be expanded diff --git a/man/man8/zpool-status.8 b/man/man8/zpool-status.8 index 10424b9f5b..56fa4aed05 100644 --- a/man/man8/zpool-status.8 +++ b/man/man8/zpool-status.8 @@ -57,6 +57,8 @@ and the estimated time to completion. Both of these are only approximate, because the amount of data in the pool and the other workloads on the system can change. .Bl -tag -width Ds +.It Fl -power +Display vdev enclosure slot power status (on or off). .It Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns … Run a script (or scripts) on each vdev and include the output as a new column in the diff --git a/man/man8/zpool.8 b/man/man8/zpool.8 index 4c4020bdd8..fe44e15cab 100644 --- a/man/man8/zpool.8 +++ b/man/man8/zpool.8 @@ -444,7 +444,7 @@ rpool 14.6G 54.9G 4 55 250K 2.69M .Ed . .Sh ENVIRONMENT VARIABLES -.Bl -tag -compact -width "ZPOOL_IMPORT_UDEV_TIMEOUT_MS" +.Bl -tag -compact -width "ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE" .It Sy ZFS_ABORT Cause .Nm @@ -456,6 +456,23 @@ Use ANSI color in and .Nm zpool Cm iostat output. +.It Sy ZPOOL_AUTO_POWER_ON_SLOT +Automatically attempt to turn on the drives enclosure slot power to a drive when +running the +.Nm zpool Cm online +or +.Nm zpool Cm clear +commands. +This has the same effect as passing the +.Fl -power +option to those commands. +.It Sy ZPOOL_POWER_ON_SLOT_TIMEOUT_MS +The maximum time in milliseconds to wait for a slot power sysfs value +to return the correct value after writing it. +For example, after writing "on" to the sysfs enclosure slot power_control file, +it can take some time for the enclosure to power down the slot and return +"on" if you read back the 'power_control' value. +Defaults to 30 seconds (30000ms) if not set. .It Sy ZPOOL_IMPORT_PATH The search path for devices or files to use with the pool. This is a colon-separated list of directories in which