From deb7a84231aff8d772bb4ce9fa486d1886f1a2b6 Mon Sep 17 00:00:00 2001 From: Robert Evans Date: Mon, 25 Mar 2024 17:56:49 -0400 Subject: [PATCH] Fix corruption caused by mmap flushing problems 1) Make mmap flushes synchronous. Linux may skip flushing dirty pages already in writeback unless data-integrity sync is requested. 2) Change zfs_putpage to use TXG_WAIT. Otherwise dirty pages may be skipped due to DMU pushing back on TX assign. 3) Add missing mmap flush when doing block cloning. 4) While here, pass errors from putpage to writepage/writepages. This change fixes corruption edge cases, but unfortunately adds synchronous ZIL flushes for dirty mmap pages to llseek and bclone operations. It may be possible to avoid these sync writes later but would need more tricky refactoring of the writeback code. Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Signed-off-by: Robert Evans Closes #15933 Closes #16019 --- module/os/linux/zfs/zfs_vnops_os.c | 5 +---- module/os/linux/zfs/zpl_file.c | 8 ++++---- module/zfs/zfs_vnops.c | 6 +++++- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index c06a75662b..7c473bc7e9 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -3792,11 +3792,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); - err = dmu_tx_assign(tx, TXG_NOWAIT); + err = dmu_tx_assign(tx, TXG_WAIT); if (err != 0) { - if (err == ERESTART) - dmu_tx_wait(tx); - dmu_tx_abort(tx); #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO filemap_dirty_folio(page_mapping(pp), page_folio(pp)); diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index 3caa0fc6c2..9dec52215c 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -720,23 +720,23 @@ zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data) { boolean_t *for_sync = data; fstrans_cookie_t cookie; + int ret; ASSERT(PageLocked(pp)); ASSERT(!PageWriteback(pp)); cookie = spl_fstrans_mark(); - (void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync); + ret = zfs_putpage(pp->mapping->host, pp, wbc, *for_sync); spl_fstrans_unmark(cookie); - return (0); + return (ret); } #ifdef HAVE_WRITEPAGE_T_FOLIO static int zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data) { - (void) zpl_putpage(&pp->page, wbc, data); - return (0); + return (zpl_putpage(&pp->page, wbc, data)); } #endif diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 2b37834d5c..7020f88ecf 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -130,7 +130,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off) /* Flush any mmap()'d data to disk */ if (zn_has_cached_data(zp, 0, file_sz - 1)) - zn_flush_cached_data(zp, B_FALSE); + zn_flush_cached_data(zp, B_TRUE); lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER); error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff); @@ -1193,6 +1193,10 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, } } + /* Flush any mmap()'d data to disk */ + if (zn_has_cached_data(inzp, inoff, inoff + len - 1)) + zn_flush_cached_data(inzp, B_TRUE); + /* * Maintain predictable lock order. */