From bb80b4649af50f1e45ce920b1165bed6880a4252 Mon Sep 17 00:00:00 2001 From: Arun KV <65647132+arun-kv@users.noreply.github.com> Date: Tue, 14 Sep 2021 01:32:39 +0530 Subject: [PATCH] Fixed data integrity issue when underlying disk returns error Errors in zil_lwb_write_done() are not propagated to zil_lwb_flush_vdevs_done() which can result in zil_commit_impl() not returning an error to applications even when zfs was not able to write data to the disk. Remove the ZIO_FLAG_DONT_PROPAGATE flag from zio_rewrite() to allow errors to propagate and consolidate the error handling for flush and write errors to a single location (rather than having error handling split between the "write done" and "flush done" handlers). Reviewed-by: George Wilson Reviewed-by: Prakash Surya Signed-off-by: Arun KV Closes #12391 Closes #12443 --- module/zfs/zil.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 2eeb4fa4fe..640e805d09 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -1178,6 +1178,20 @@ zil_lwb_flush_vdevs_done(zio_t *zio) ASSERT3P(zcw->zcw_lwb, ==, lwb); zcw->zcw_lwb = NULL; + /* + * We expect any ZIO errors from child ZIOs to have been + * propagated "up" to this specific LWB's root ZIO, in + * order for this error handling to work correctly. This + * includes ZIO errors from either this LWB's write or + * flush, as well as any errors from other dependent LWBs + * (e.g. a root LWB ZIO that might be a child of this LWB). + * + * With that said, it's important to note that LWB flush + * errors are not propagated up to the LWB root ZIO. + * This is incorrect behavior, and results in VDEV flush + * errors not being handled correctly here. See the + * comment above the call to "zio_flush" for details. + */ zcw->zcw_zio_error = zio->io_error; @@ -1251,6 +1265,12 @@ zil_lwb_write_done(zio_t *zio) * nodes. We avoid calling zio_flush() since there isn't any * good reason for doing so, after the lwb block failed to be * written out. + * + * Additionally, we don't perform any further error handling at + * this point (e.g. setting "zcw_zio_error" appropriately), as + * we expect that to occur in "zil_lwb_flush_vdevs_done" (thus, + * we expect any error seen here, to have been propagated to + * that function). */ if (zio->io_error != 0) { while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) @@ -1281,8 +1301,17 @@ zil_lwb_write_done(zio_t *zio) while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) { vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev); - if (vd != NULL) + if (vd != NULL) { + /* + * The "ZIO_FLAG_DONT_PROPAGATE" is currently + * always used within "zio_flush". This means, + * any errors when flushing the vdev(s), will + * (unfortunately) not be handled correctly, + * since these "zio_flush" errors will not be + * propagated up to "zil_lwb_flush_vdevs_done". + */ zio_flush(lwb->lwb_root_zio, vd); + } kmem_free(zv, sizeof (*zv)); } } @@ -1399,8 +1428,7 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb) lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, zilog->zl_spa, 0, &lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb, - prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | - ZIO_FLAG_FASTWRITE, &zb); + prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb); ASSERT3P(lwb->lwb_write_zio, !=, NULL); lwb->lwb_state = LWB_STATE_OPENED;