Fixed data integrity issue when underlying disk returns error

Errors in zil_lwb_write_done() are not propagated to
zil_lwb_flush_vdevs_done() which can result in zil_commit_impl()
not returning an error to applications even when zfs was not able
to write data to the disk.

Remove the ZIO_FLAG_DONT_PROPAGATE flag from zio_rewrite() to
allow errors to propagate and consolidate the error handling for
flush and write errors to a single location (rather than having
error handling split between the "write done" and "flush done"
handlers).

Reviewed-by: George Wilson <gwilson@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Signed-off-by: Arun KV <arun.kv@datacore.com>
Closes #12391
Closes #12443
This commit is contained in:
Arun KV 2021-09-14 01:32:39 +05:30 committed by Tony Hutter
parent 7816a6b85b
commit bb80b4649a
1 changed files with 31 additions and 3 deletions

View File

@ -1178,6 +1178,20 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
ASSERT3P(zcw->zcw_lwb, ==, lwb); ASSERT3P(zcw->zcw_lwb, ==, lwb);
zcw->zcw_lwb = NULL; zcw->zcw_lwb = NULL;
/*
* We expect any ZIO errors from child ZIOs to have been
* propagated "up" to this specific LWB's root ZIO, in
* order for this error handling to work correctly. This
* includes ZIO errors from either this LWB's write or
* flush, as well as any errors from other dependent LWBs
* (e.g. a root LWB ZIO that might be a child of this LWB).
*
* With that said, it's important to note that LWB flush
* errors are not propagated up to the LWB root ZIO.
* This is incorrect behavior, and results in VDEV flush
* errors not being handled correctly here. See the
* comment above the call to "zio_flush" for details.
*/
zcw->zcw_zio_error = zio->io_error; zcw->zcw_zio_error = zio->io_error;
@ -1251,6 +1265,12 @@ zil_lwb_write_done(zio_t *zio)
* nodes. We avoid calling zio_flush() since there isn't any * nodes. We avoid calling zio_flush() since there isn't any
* good reason for doing so, after the lwb block failed to be * good reason for doing so, after the lwb block failed to be
* written out. * written out.
*
* Additionally, we don't perform any further error handling at
* this point (e.g. setting "zcw_zio_error" appropriately), as
* we expect that to occur in "zil_lwb_flush_vdevs_done" (thus,
* we expect any error seen here, to have been propagated to
* that function).
*/ */
if (zio->io_error != 0) { if (zio->io_error != 0) {
while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) while ((zv = avl_destroy_nodes(t, &cookie)) != NULL)
@ -1281,8 +1301,17 @@ zil_lwb_write_done(zio_t *zio)
while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) { while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) {
vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev); vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev);
if (vd != NULL) if (vd != NULL) {
/*
* The "ZIO_FLAG_DONT_PROPAGATE" is currently
* always used within "zio_flush". This means,
* any errors when flushing the vdev(s), will
* (unfortunately) not be handled correctly,
* since these "zio_flush" errors will not be
* propagated up to "zil_lwb_flush_vdevs_done".
*/
zio_flush(lwb->lwb_root_zio, vd); zio_flush(lwb->lwb_root_zio, vd);
}
kmem_free(zv, sizeof (*zv)); kmem_free(zv, sizeof (*zv));
} }
} }
@ -1399,8 +1428,7 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb)
lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio,
zilog->zl_spa, 0, &lwb->lwb_blk, lwb_abd, zilog->zl_spa, 0, &lwb->lwb_blk, lwb_abd,
BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb, BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb,
prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb);
ZIO_FLAG_FASTWRITE, &zb);
ASSERT3P(lwb->lwb_write_zio, !=, NULL); ASSERT3P(lwb->lwb_write_zio, !=, NULL);
lwb->lwb_state = LWB_STATE_OPENED; lwb->lwb_state = LWB_STATE_OPENED;