diff --git a/include/sys/zio.h b/include/sys/zio.h index 3a756949a4..f985b4cf4d 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -598,7 +598,7 @@ extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, uint64_t size, boolean_t *slog); -extern void zio_flush(zio_t *zio, vdev_t *vd); +extern void zio_flush(zio_t *zio, vdev_t *vd, boolean_t propagate); extern void zio_shrink(zio_t *zio, uint64_t size); extern int zio_wait(zio_t *zio); diff --git a/module/os/freebsd/zfs/vdev_geom.c b/module/os/freebsd/zfs/vdev_geom.c index b7ff1063b0..7aaa42bfb1 100644 --- a/module/os/freebsd/zfs/vdev_geom.c +++ b/module/os/freebsd/zfs/vdev_geom.c @@ -1014,21 +1014,6 @@ vdev_geom_io_intr(struct bio *bp) zio->io_error = SET_ERROR(EIO); switch (zio->io_error) { - case ENOTSUP: - /* - * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know - * that future attempts will never succeed. In this case - * we set a persistent flag so that we don't bother with - * requests in the future. - */ - switch (bp->bio_cmd) { - case BIO_FLUSH: - vd->vdev_nowritecache = B_TRUE; - break; - case BIO_DELETE: - break; - } - break; case ENXIO: if (!vd->vdev_remove_wanted) { /* diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index e69c5f3841..2c963bb05c 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -1232,9 +1232,6 @@ BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error) zio->io_error = -error; #endif - if (zio->io_error && (zio->io_error == EOPNOTSUPP)) - zio->io_vd->vdev_nowritecache = B_TRUE; - bio_put(bio); ASSERT3S(zio->io_error, >=, 0); if (zio->io_error) diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 47346dd5ac..468390b9ac 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -1830,19 +1830,21 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) for (int v = 0; v < svdcount; v++) { if (vdev_writeable(svd[v])) { - zio_flush(zio, svd[v]); + zio_flush(zio, svd[v], B_FALSE); } } if (spa->spa_aux_sync_uber) { spa->spa_aux_sync_uber = B_FALSE; for (int v = 0; v < spa->spa_spares.sav_count; v++) { if (vdev_writeable(spa->spa_spares.sav_vdevs[v])) { - zio_flush(zio, spa->spa_spares.sav_vdevs[v]); + zio_flush(zio, spa->spa_spares.sav_vdevs[v], + B_FALSE); } } for (int v = 0; v < spa->spa_l2cache.sav_count; v++) { if (vdev_writeable(spa->spa_l2cache.sav_vdevs[v])) { - zio_flush(zio, spa->spa_l2cache.sav_vdevs[v]); + zio_flush(zio, spa->spa_l2cache.sav_vdevs[v], + B_FALSE); } } } @@ -2007,13 +2009,13 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) zio = zio_root(spa, NULL, NULL, flags); for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) - zio_flush(zio, vd); + zio_flush(zio, vd, B_FALSE); for (int i = 0; i < 2; i++) { if (!sav[i]->sav_label_sync) continue; for (int v = 0; v < sav[i]->sav_count; v++) - zio_flush(zio, sav[i]->sav_vdevs[v]); + zio_flush(zio, sav[i]->sav_vdevs[v], B_FALSE); if (l == 1) sav[i]->sav_label_sync = B_FALSE; } @@ -2091,7 +2093,7 @@ retry: for (vdev_t *vd = txg_list_head(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)); vd != NULL; vd = txg_list_next(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg))) - zio_flush(zio, vd); + zio_flush(zio, vd, B_FALSE); (void) zio_wait(zio); diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 15c8b8ca60..187d3908ff 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -4172,7 +4172,7 @@ io_error_exit: goto io_error_exit; } pio = zio_root(spa, NULL, NULL, 0); - zio_flush(pio, raidvd); + zio_flush(pio, raidvd, B_FALSE); zio_wait(pio); zfs_dbgmsg("reflow: wrote %llu bytes (logical) to scratch area", @@ -4231,7 +4231,7 @@ overwrite: goto io_error_exit; } pio = zio_root(spa, NULL, NULL, 0); - zio_flush(pio, raidvd); + zio_flush(pio, raidvd, B_FALSE); zio_wait(pio); zfs_dbgmsg("reflow: overwrote %llu bytes (logical) to real location", @@ -4339,7 +4339,7 @@ vdev_raidz_reflow_copy_scratch(spa_t *spa) } zio_wait(pio); pio = zio_root(spa, NULL, NULL, 0); - zio_flush(pio, raidvd); + zio_flush(pio, raidvd, B_FALSE); zio_wait(pio); zfs_dbgmsg("reflow recovery: overwrote %llu bytes (logical) " diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 3983da6aa4..f451c170fc 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -23,6 +23,7 @@ * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright (c) 2018 Datto Inc. + * Copyright (c) 2024, Klara, Inc. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -1495,12 +1496,6 @@ zil_lwb_flush_vdevs_done(zio_t *zio) * includes ZIO errors from either this LWB's write or * flush, as well as any errors from other dependent LWBs * (e.g. a root LWB ZIO that might be a child of this LWB). - * - * With that said, it's important to note that LWB flush - * errors are not propagated up to the LWB root ZIO. - * This is incorrect behavior, and results in VDEV flush - * errors not being handled correctly here. See the - * comment above the call to "zio_flush" for details. */ zcw->zcw_zio_error = zio->io_error; @@ -1650,17 +1645,8 @@ zil_lwb_write_done(zio_t *zio) while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) { vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev); - if (vd != NULL) { - /* - * The "ZIO_FLAG_DONT_PROPAGATE" is currently - * always used within "zio_flush". This means, - * any errors when flushing the vdev(s), will - * (unfortunately) not be handled correctly, - * since these "zio_flush" errors will not be - * propagated up to "zil_lwb_flush_vdevs_done". - */ - zio_flush(lwb->lwb_root_zio, vd); - } + if (vd != NULL) + zio_flush(lwb->lwb_root_zio, vd, B_TRUE); kmem_free(zv, sizeof (*zv)); } } diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 53992931e0..37a33bb1e8 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1640,10 +1640,10 @@ zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, abd_t *data, uint64_t size, * the flushes complete. */ void -zio_flush(zio_t *pio, vdev_t *vd) +zio_flush(zio_t *pio, vdev_t *vd, boolean_t propagate) { - const zio_flag_t flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | - ZIO_FLAG_DONT_RETRY; + const zio_flag_t flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY | + (propagate ? 0 : ZIO_FLAG_DONT_PROPAGATE); if (vd->vdev_nowritecache) return; @@ -1654,7 +1654,7 @@ zio_flush(zio_t *pio, vdev_t *vd) NULL, ZIO_STAGE_OPEN, ZIO_FLUSH_PIPELINE)); } else { for (uint64_t c = 0; c < vd->vdev_children; c++) - zio_flush(pio, vd->vdev_child[c]); + zio_flush(pio, vd->vdev_child[c], propagate); } } @@ -4553,11 +4553,14 @@ zio_vdev_io_assess(zio_t *zio) /* * If a cache flush returns ENOTSUP or ENOTTY, we know that no future * attempts will ever succeed. In this case we set a persistent - * boolean flag so that we don't bother with it in the future. + * boolean flag so that we don't bother with it in the future, and + * then we act like the flush succeeded. */ if ((zio->io_error == ENOTSUP || zio->io_error == ENOTTY) && - zio->io_type == ZIO_TYPE_FLUSH && vd != NULL) + zio->io_type == ZIO_TYPE_FLUSH && vd != NULL) { vd->vdev_nowritecache = B_TRUE; + zio->io_error = 0; + } if (zio->io_error) zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 5817e64900..4a3fcd2cbe 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -124,6 +124,10 @@ tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos', 'scrub_after_resilver', 'suspend_resume_single', 'zpool_status_-s'] tags = ['functional', 'fault'] +[tests/functional/flush:Linux] +tests = ['zil_flush_error'] +tags = ['functional', 'flush'] + [tests/functional/features/large_dnode:Linux] tests = ['large_dnode_002_pos', 'large_dnode_006_pos', 'large_dnode_008_pos'] tags = ['functional', 'features', 'large_dnode'] diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index 6db10b91de..39b0ccf594 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -379,6 +379,7 @@ if os.environ.get('CI') == 'true': 'fault/auto_spare_ashift': ['SKIP', ci_reason], 'fault/auto_spare_shared': ['SKIP', ci_reason], 'fault/suspend_resume_single': ['SKIP', ci_reason], + 'flush/zil_flush_error': ['SKIP', ci_reason], 'procfs/pool_state': ['SKIP', ci_reason], }) diff --git a/tests/zfs-tests/include/blkdev.shlib b/tests/zfs-tests/include/blkdev.shlib index 51eff3023e..bd8557c94b 100644 --- a/tests/zfs-tests/include/blkdev.shlib +++ b/tests/zfs-tests/include/blkdev.shlib @@ -462,13 +462,16 @@ function unload_scsi_debug # Get scsi_debug device name. # Returns basename of scsi_debug device (for example "sdb"). # -function get_debug_device +# $1 (optional): Return the first $1 number of SCSI debug device names. +function get_debug_device #num { + typeset num=${1:-1} + for i in {1..10} ; do - val=$(lsscsi | awk '/scsi_debug/ {print $6; exit}' | cut -d/ -f3) + val=$(lsscsi | awk '/scsi_debug/ {print $6}' | cut -d/ -f3 | head -n$num) # lsscsi can take time to settle - if [ "$val" != "-" ] ; then + if [[ ! "$val" =~ "-" ]] ; then break fi sleep 1 diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index bbeabc6dfb..34a5a0f075 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1516,6 +1516,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/features/large_dnode/large_dnode_008_pos.ksh \ functional/features/large_dnode/large_dnode_009_pos.ksh \ functional/features/large_dnode/setup.ksh \ + functional/flush/cleanup.ksh \ + functional/flush/zil_flush_error.ksh \ + functional/flush/setup.ksh \ functional/grow/grow_pool_001_pos.ksh \ functional/grow/grow_replicas_001_pos.ksh \ functional/history/cleanup.ksh \ diff --git a/tests/zfs-tests/tests/functional/flush/cleanup.ksh b/tests/zfs-tests/tests/functional/flush/cleanup.ksh new file mode 100755 index 0000000000..4eb59574e4 --- /dev/null +++ b/tests/zfs-tests/tests/functional/flush/cleanup.ksh @@ -0,0 +1,28 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2024, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/flush/setup.ksh b/tests/zfs-tests/tests/functional/flush/setup.ksh new file mode 100755 index 0000000000..94a3936ce2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/flush/setup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2024, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +log_pass diff --git a/tests/zfs-tests/tests/functional/flush/zil_flush_error.ksh b/tests/zfs-tests/tests/functional/flush/zil_flush_error.ksh new file mode 100755 index 0000000000..e053c5d3ba --- /dev/null +++ b/tests/zfs-tests/tests/functional/flush/zil_flush_error.ksh @@ -0,0 +1,259 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2024, Klara, Inc. +# + +# +# This tests that if the ZIL write sequence fails, it corectly falls back and +# waits until the transaction has fully committed before returning. +# +# When this test was written, the ZIL has a flaw - it assumes that if its +# writes succeed, then the data is definitely on disk and available for reply +# if the pool fails. It issues a flush immediately after the write, but does +# not check it is result. If a disk fails after the data has been accepted into +# the disk cache, but before it can be written to permanent storage, then +# fsync() will have returned success even though the data is not stored in the +# ZIL for replay. +# +# If the main pool then fails before the transaction can be written, then data +# is lost, and fsync() returning success was premature. +# +# To prove this, we create a pool with a separate log device. We inject two +# faults: +# +# - ZIL writes appear to succeed, but never make it disk +# - ZIL flushes fail, and return error +# +# We then remove the main pool device, and do a write+fsync. This goes to the +# ZIL, and appears to succeed. When the txg closes, the write will fail, and +# the pool suspends. +# +# Then, we simulate a reboot by copying the content of the pool devices aside. +# We restore the pool devices, bring it back online, and export it - we don't +# need it anymore, but we have to clean up properly. Then we restore the copied +# content and import the pool, in whatever state it was in when it suspended. +# +# Finally, we check the content of the file we wrote to. If it matches what we +# wrote, then the fsync() was correct, and all is well. If it doesn't match, +# then the flaw is present, and the test fails. +# +# We run the test twice: once without the log device injections, one with. The +# first confirms the expected behaviour of the ZIL - when the pool is imported, +# the log is replayed. The second fails as above. When the flaw is corrected, +# both will succeed, and this overall test succeeds. +# + +. $STF_SUITE/include/libtest.shlib + +TMPDIR=${TMPDIR:-$TEST_BASE_DIR} + +BACKUP_MAIN="$TMPDIR/backup_main" +BACKUP_LOG="$TMPDIR/backup_log" + +LOOP_LOG="$TMPDIR/loop_log" + +DATA_FILE="$TMPDIR/data_file" + +verify_runnable "global" + +function cleanup +{ + zinject -c all + destroy_pool $TESTPOOL + unload_scsi_debug + rm -f $BACKUP_MAIN $BACKUP_LOG $DATA_FILE +} + +log_onexit cleanup + +log_assert "verify fsync() waits if the ZIL commit fails" + +# create 128K of random data, and take its checksum. we do this up front to +# ensure we don't get messed up by any latency from reading /dev/random or +# checksumming the file on the pool +log_must dd if=/dev/random of=$DATA_FILE bs=128K count=1 +typeset sum=$(sha256digest $DATA_FILE) + +# create a virtual scsi device with two device nodes. these are backed by the +# same memory. we do this because we need to be able to take the device offline +# properly in order to get the pool to suspend; fault injection on a loop +# device can't do it. once offline, we can use the second node to take a copy +# of its state. +load_scsi_debug 100 1 2 1 '512b' +set -A sd $(get_debug_device 2) + +# create a loop device for the log. +truncate -s 100M $LOOP_LOG +typeset ld=$(basename $(losetup -f)) +log_must losetup /dev/$ld $LOOP_LOG + +# this function runs the entire test sequence. the option decides if faults +# are injected on the slog device, mimicking the trigger situation that causes +# the fsync() bug to occur +function test_fsync +{ + typeset -i do_fault_log="$1" + + log_note "setting up test" + + # create the pool. the main data store is on the scsi device, with the + # log on a loopback. we bias the ZIL towards to the log device to try + # to ensure that fsync() never involves the main device + log_must zpool create -f -O logbias=latency $TESTPOOL ${sd[0]} log $ld + + # create the file ahead of time. the ZIL head structure is created on + # first use, and does a full txg wait, which we need to avoid + log_must dd if=/dev/zero of=/$TESTPOOL/data_file \ + bs=128k count=1 conv=fsync + log_must zpool sync + + # arrange for writes to the log device to appear to succeed, and + # flushes to fail. this simulates a loss of the device between it + # accepting the the write into its cache, but before it can be written + # out + if [[ $do_fault_log != 0 ]] ; then + log_note "injecting log device faults" + log_must zinject -d $ld -e noop -T write $TESTPOOL + log_must zinject -d $ld -e io -T flush $TESTPOOL + fi + + # take the main device offline. there is no IO in flight, so ZFS won't + # notice immediately + log_note "taking main pool offline" + log_must eval "echo offline > /sys/block/${sd[0]}/device/state" + + # write out some data, then call fsync(). there are three possible + # results: + # + # - if the bug is present, fsync() will return success, and dd will + # succeed "immediately"; before the pool suspends + # - if the bug is fixed, fsync() will block, the pool will suspend, and + # dd will return success after the pool returns to service + # - if something else goes wrong, dd will fail; this may happen before + # or after the pool suspends or returns. this shouldn't happen, and + # should abort the test + # + # we have to put dd in the background, otherwise if it blocks we will + # block with it. what we're interested in is whether or not it succeeds + # before the pool is suspended. if it does, then we expect that after + # the suspended pool is reimported, the data will have been written + log_note "running dd in background to write data and call fsync()" + dd if=$DATA_FILE of=/$TESTPOOL/data_file bs=128k count=1 conv=fsync & + fsync_pid=$! + + # wait for the pool to suspend. this should happen within ~5s, when the + # txg sync tries to write the change to the main device + log_note "waiting for pool to suspend" + typeset -i tries=10 + until [[ $(cat /proc/spl/kstat/zfs/$TESTPOOL/state) == "SUSPENDED" ]] ; do + if ((tries-- == 0)); then + log_fail "pool didn't suspend" + fi + sleep 1 + done + + # the pool is suspended. see if dd is still present; if it is, then + # it's blocked in fsync(), and we have no expectation that the write + # made it to disk. if dd has exited, then its return code will tell + # us whether fsync() returned success, or it failed for some other + # reason + typeset -i fsync_success=0 + if kill -0 $fsync_pid ; then + log_note "dd is blocked; fsync() has not returned" + else + log_note "dd has finished, ensuring it was successful" + log_must wait $fsync_pid + fsync_success=1 + fi + + # pool is suspended. if we online the main device right now, it will + # retry writing the transaction, which will succed, and everything will + # continue as its supposed to. that's the opposite of what we want; we + # want to do an import, as if after reboot, to force the pool to try to + # replay the ZIL, so we can compare the final result against what + # fsync() told us + # + # however, right now the pool is wedged. we need to get it back online + # so we can export it, so we can do the import. so we need to copy the + # entire pool state away. for the scsi device, we can do this through + # the second device node. for the loopback, we can copy it directly + log_note "taking copy of suspended pool" + log_must cp /dev/${sd[1]} $BACKUP_MAIN + log_must cp /dev/$ld $BACKUP_LOG + + # bring the entire pool back online, by clearing error injections and + # restoring the main device. this will unblock anything still waiting + # on it, and tidy up all the internals so we can reset it + log_note "bringing pool back online" + if [[ $do_fault_log != 0 ]] ; then + log_must zinject -c all + fi + log_must eval "echo running > /sys/block/${sd[0]}/device/state" + log_must zpool clear $TESTPOOL + + # now the pool is back online. if dd was blocked, it should now + # complete successfully. make sure that's true + if [[ $fsync_success == 0 ]] ; then + log_note "ensuring blocked dd has now finished" + log_must wait $fsync_pid + fi + + log_note "exporting pool" + + # pool now clean, export it + log_must zpool export $TESTPOOL + + log_note "reverting pool to suspended state" + + # restore the pool to the suspended state, mimicking a reboot + log_must cp $BACKUP_MAIN /dev/${sd[0]} + log_must cp $BACKUP_LOG /dev/$ld + + # import the crashed pool + log_must zpool import $TESTPOOL + + # if fsync() succeeded before the pool suspended, then the ZIL should + # have replayed properly and the data is now available on the pool + # + # note that we don't check the alternative; fsync() blocking does not + # mean that data _didn't_ make it to disk, just the ZFS never claimed + # that it did. in that case we can't know what _should_ be on disk + # right now, so can't check + if [[ $fsync_success == 1 ]] ; then + log_note "fsync() succeeded earlier; checking data was written correctly" + typeset newsum=$(sha256digest /$TESTPOOL/data_file) + log_must test "$sum" = "$newsum" + fi + + log_note "test finished, cleaning up" + log_must zpool destroy -f $TESTPOOL +} + +log_note "first run: ZIL succeeds, and repairs the pool at import" +test_fsync 0 + +log_note "second run: ZIL commit fails, and falls back to txg sync" +test_fsync 1 + +log_pass "fsync() waits if the ZIL commit fails"