Fix corruption caused by mmap flushing problems
1) Make mmap flushes synchronous. Linux may skip flushing dirty pages already in writeback unless data-integrity sync is requested. 2) Change zfs_putpage to use TXG_WAIT. Otherwise dirty pages may be skipped due to DMU pushing back on TX assign. 3) Add missing mmap flush when doing block cloning. 4) While here, pass errors from putpage to writepage/writepages. This change fixes corruption edge cases, but unfortunately adds synchronous ZIL flushes for dirty mmap pages to llseek and bclone operations. It may be possible to avoid these sync writes later but would need more tricky refactoring of the writeback code. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Robert Evans <evansr@google.com> Closes #15933 Closes #16019
This commit is contained in:
parent
eebf00bee9
commit
deb7a84231
|
@ -3792,11 +3792,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
||||||
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
||||||
zfs_sa_upgrade_txholds(tx, zp);
|
zfs_sa_upgrade_txholds(tx, zp);
|
||||||
|
|
||||||
err = dmu_tx_assign(tx, TXG_NOWAIT);
|
err = dmu_tx_assign(tx, TXG_WAIT);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
if (err == ERESTART)
|
|
||||||
dmu_tx_wait(tx);
|
|
||||||
|
|
||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
|
#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
|
||||||
filemap_dirty_folio(page_mapping(pp), page_folio(pp));
|
filemap_dirty_folio(page_mapping(pp), page_folio(pp));
|
||||||
|
|
|
@ -720,23 +720,23 @@ zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
|
||||||
{
|
{
|
||||||
boolean_t *for_sync = data;
|
boolean_t *for_sync = data;
|
||||||
fstrans_cookie_t cookie;
|
fstrans_cookie_t cookie;
|
||||||
|
int ret;
|
||||||
|
|
||||||
ASSERT(PageLocked(pp));
|
ASSERT(PageLocked(pp));
|
||||||
ASSERT(!PageWriteback(pp));
|
ASSERT(!PageWriteback(pp));
|
||||||
|
|
||||||
cookie = spl_fstrans_mark();
|
cookie = spl_fstrans_mark();
|
||||||
(void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
|
ret = zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
|
||||||
spl_fstrans_unmark(cookie);
|
spl_fstrans_unmark(cookie);
|
||||||
|
|
||||||
return (0);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_WRITEPAGE_T_FOLIO
|
#ifdef HAVE_WRITEPAGE_T_FOLIO
|
||||||
static int
|
static int
|
||||||
zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data)
|
zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data)
|
||||||
{
|
{
|
||||||
(void) zpl_putpage(&pp->page, wbc, data);
|
return (zpl_putpage(&pp->page, wbc, data));
|
||||||
return (0);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -130,7 +130,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
|
||||||
|
|
||||||
/* Flush any mmap()'d data to disk */
|
/* Flush any mmap()'d data to disk */
|
||||||
if (zn_has_cached_data(zp, 0, file_sz - 1))
|
if (zn_has_cached_data(zp, 0, file_sz - 1))
|
||||||
zn_flush_cached_data(zp, B_FALSE);
|
zn_flush_cached_data(zp, B_TRUE);
|
||||||
|
|
||||||
lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER);
|
lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER);
|
||||||
error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
|
error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
|
||||||
|
@ -1193,6 +1193,10 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Flush any mmap()'d data to disk */
|
||||||
|
if (zn_has_cached_data(inzp, inoff, inoff + len - 1))
|
||||||
|
zn_flush_cached_data(inzp, B_TRUE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Maintain predictable lock order.
|
* Maintain predictable lock order.
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue