Fix corruption caused by mmap flushing problems

1) Make mmap flushes synchronous. Linux may skip flushing dirty pages
   already in writeback unless data-integrity sync is requested.

2) Change zfs_putpage to use TXG_WAIT. Otherwise dirty pages may be
   skipped due to DMU pushing back on TX assign.

3) Add missing mmap flush when doing block cloning.

4) While here, pass errors from putpage to writepage/writepages.

This change fixes corruption edge cases, but unfortunately adds
synchronous ZIL flushes for dirty mmap pages to llseek and bclone
operations. It may be possible to avoid these sync writes later
but would need more tricky refactoring of the writeback code.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Robert Evans <evansr@google.com>
Closes #15933 
Closes #16019
This commit is contained in:
Robert Evans 2024-03-25 17:56:49 -04:00 committed by Brian Behlendorf
parent eebf00bee9
commit deb7a84231
3 changed files with 10 additions and 9 deletions

View File

@ -3792,11 +3792,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp); zfs_sa_upgrade_txholds(tx, zp);
err = dmu_tx_assign(tx, TXG_NOWAIT); err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) { if (err != 0) {
if (err == ERESTART)
dmu_tx_wait(tx);
dmu_tx_abort(tx); dmu_tx_abort(tx);
#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
filemap_dirty_folio(page_mapping(pp), page_folio(pp)); filemap_dirty_folio(page_mapping(pp), page_folio(pp));

View File

@ -720,23 +720,23 @@ zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
{ {
boolean_t *for_sync = data; boolean_t *for_sync = data;
fstrans_cookie_t cookie; fstrans_cookie_t cookie;
int ret;
ASSERT(PageLocked(pp)); ASSERT(PageLocked(pp));
ASSERT(!PageWriteback(pp)); ASSERT(!PageWriteback(pp));
cookie = spl_fstrans_mark(); cookie = spl_fstrans_mark();
(void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync); ret = zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
spl_fstrans_unmark(cookie); spl_fstrans_unmark(cookie);
return (0); return (ret);
} }
#ifdef HAVE_WRITEPAGE_T_FOLIO #ifdef HAVE_WRITEPAGE_T_FOLIO
static int static int
zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data) zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data)
{ {
(void) zpl_putpage(&pp->page, wbc, data); return (zpl_putpage(&pp->page, wbc, data));
return (0);
} }
#endif #endif

View File

@ -130,7 +130,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
/* Flush any mmap()'d data to disk */ /* Flush any mmap()'d data to disk */
if (zn_has_cached_data(zp, 0, file_sz - 1)) if (zn_has_cached_data(zp, 0, file_sz - 1))
zn_flush_cached_data(zp, B_FALSE); zn_flush_cached_data(zp, B_TRUE);
lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER); lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER);
error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff); error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
@ -1193,6 +1193,10 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
} }
} }
/* Flush any mmap()'d data to disk */
if (zn_has_cached_data(inzp, inoff, inoff + len - 1))
zn_flush_cached_data(inzp, B_TRUE);
/* /*
* Maintain predictable lock order. * Maintain predictable lock order.
*/ */