Fixed data integrity issue when underlying disk returns error
Errors in zil_lwb_write_done() are not propagated to zil_lwb_flush_vdevs_done() which can result in zil_commit_impl() not returning an error to applications even when zfs was not able to write data to the disk. Remove the ZIO_FLAG_DONT_PROPAGATE flag from zio_rewrite() to allow errors to propagate and consolidate the error handling for flush and write errors to a single location (rather than having error handling split between the "write done" and "flush done" handlers). Reviewed-by: George Wilson <gwilson@delphix.com> Reviewed-by: Prakash Surya <prakash.surya@delphix.com> Signed-off-by: Arun KV <arun.kv@datacore.com> Closes #12391 Closes #12443
This commit is contained in:
parent
7816a6b85b
commit
bb80b4649a
|
@ -1178,6 +1178,20 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
|
||||||
|
|
||||||
ASSERT3P(zcw->zcw_lwb, ==, lwb);
|
ASSERT3P(zcw->zcw_lwb, ==, lwb);
|
||||||
zcw->zcw_lwb = NULL;
|
zcw->zcw_lwb = NULL;
|
||||||
|
/*
|
||||||
|
* We expect any ZIO errors from child ZIOs to have been
|
||||||
|
* propagated "up" to this specific LWB's root ZIO, in
|
||||||
|
* order for this error handling to work correctly. This
|
||||||
|
* includes ZIO errors from either this LWB's write or
|
||||||
|
* flush, as well as any errors from other dependent LWBs
|
||||||
|
* (e.g. a root LWB ZIO that might be a child of this LWB).
|
||||||
|
*
|
||||||
|
* With that said, it's important to note that LWB flush
|
||||||
|
* errors are not propagated up to the LWB root ZIO.
|
||||||
|
* This is incorrect behavior, and results in VDEV flush
|
||||||
|
* errors not being handled correctly here. See the
|
||||||
|
* comment above the call to "zio_flush" for details.
|
||||||
|
*/
|
||||||
|
|
||||||
zcw->zcw_zio_error = zio->io_error;
|
zcw->zcw_zio_error = zio->io_error;
|
||||||
|
|
||||||
|
@ -1251,6 +1265,12 @@ zil_lwb_write_done(zio_t *zio)
|
||||||
* nodes. We avoid calling zio_flush() since there isn't any
|
* nodes. We avoid calling zio_flush() since there isn't any
|
||||||
* good reason for doing so, after the lwb block failed to be
|
* good reason for doing so, after the lwb block failed to be
|
||||||
* written out.
|
* written out.
|
||||||
|
*
|
||||||
|
* Additionally, we don't perform any further error handling at
|
||||||
|
* this point (e.g. setting "zcw_zio_error" appropriately), as
|
||||||
|
* we expect that to occur in "zil_lwb_flush_vdevs_done" (thus,
|
||||||
|
* we expect any error seen here, to have been propagated to
|
||||||
|
* that function).
|
||||||
*/
|
*/
|
||||||
if (zio->io_error != 0) {
|
if (zio->io_error != 0) {
|
||||||
while ((zv = avl_destroy_nodes(t, &cookie)) != NULL)
|
while ((zv = avl_destroy_nodes(t, &cookie)) != NULL)
|
||||||
|
@ -1281,8 +1301,17 @@ zil_lwb_write_done(zio_t *zio)
|
||||||
|
|
||||||
while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) {
|
while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) {
|
||||||
vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev);
|
vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev);
|
||||||
if (vd != NULL)
|
if (vd != NULL) {
|
||||||
|
/*
|
||||||
|
* The "ZIO_FLAG_DONT_PROPAGATE" is currently
|
||||||
|
* always used within "zio_flush". This means,
|
||||||
|
* any errors when flushing the vdev(s), will
|
||||||
|
* (unfortunately) not be handled correctly,
|
||||||
|
* since these "zio_flush" errors will not be
|
||||||
|
* propagated up to "zil_lwb_flush_vdevs_done".
|
||||||
|
*/
|
||||||
zio_flush(lwb->lwb_root_zio, vd);
|
zio_flush(lwb->lwb_root_zio, vd);
|
||||||
|
}
|
||||||
kmem_free(zv, sizeof (*zv));
|
kmem_free(zv, sizeof (*zv));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1399,8 +1428,7 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb)
|
||||||
lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio,
|
lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio,
|
||||||
zilog->zl_spa, 0, &lwb->lwb_blk, lwb_abd,
|
zilog->zl_spa, 0, &lwb->lwb_blk, lwb_abd,
|
||||||
BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb,
|
BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb,
|
||||||
prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE |
|
prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb);
|
||||||
ZIO_FLAG_FASTWRITE, &zb);
|
|
||||||
ASSERT3P(lwb->lwb_write_zio, !=, NULL);
|
ASSERT3P(lwb->lwb_write_zio, !=, NULL);
|
||||||
|
|
||||||
lwb->lwb_state = LWB_STATE_OPENED;
|
lwb->lwb_state = LWB_STATE_OPENED;
|
||||||
|
|
Loading…
Reference in New Issue