From c1801cbe59dfc99ba7aa34f09c0b6b8f35bb2d8f Mon Sep 17 00:00:00 2001
From: Alexander Motin <mav@FreeBSD.org>
Date: Fri, 11 Aug 2023 12:04:08 -0400
Subject: [PATCH] ZIL: Avoid dbuf_read() before dmu_sync().

In most cases dmu_sync() works with dirty records directly and does
not need actual data. The only exception is dmu_sync_late_arrival().
To save some CPU time use dmu_buf_hold_noread*() in z*_get_data()
and explicitly call dbuf_read() in dmu_sync_late_arrival(). There
is also a chance that by that time TXG will already be synced and
we won't have to do it at all.

Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by:	Alexander Motin <mav@FreeBSD.org>
Sponsored by:	iXsystems, Inc.
Closes #15153
---
 include/sys/dmu.h      | 4 ++++
 include/sys/dmu_impl.h | 2 --
 module/zfs/dmu.c       | 9 ++++++++-
 module/zfs/zfs_vnops.c | 4 ++--
 module/zfs/zvol.c      | 4 ++--
 5 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index 7e57d133c2..615ba8fe74 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -572,11 +572,15 @@ int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
 int dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
     uint64_t length, int read, const void *tag, int *numbufsp,
     dmu_buf_t ***dbpp);
+int dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
+    const void *tag, dmu_buf_t **dbp);
 int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
     const void *tag, dmu_buf_t **dbp, int flags);
 int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
     uint64_t length, boolean_t read, const void *tag, int *numbufsp,
     dmu_buf_t ***dbpp, uint32_t flags);
+int dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset, const void *tag,
+    dmu_buf_t **dbp);
 /*
  * Add a reference to a dmu buffer that has already been held via
  * dmu_buf_hold() in the current context.
diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h
index ce6ae3c665..83ae2b76ba 100644
--- a/include/sys/dmu_impl.h
+++ b/include/sys/dmu_impl.h
@@ -247,8 +247,6 @@ typedef struct dmu_sendstatus {
 
 void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
 void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);
-int dmu_buf_hold_noread(objset_t *, uint64_t, uint64_t,
-    const void *, dmu_buf_t **);
 
 #ifdef	__cplusplus
 }
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index 078811dbf4..ddb29020b0 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -165,7 +165,7 @@ dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
 	{	zfs_acl_byteswap,	"acl"		}
 };
 
-static int
+int
 dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
     const void *tag, dmu_buf_t **dbp)
 {
@@ -185,6 +185,7 @@ dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
 	*dbp = &db->db;
 	return (0);
 }
+
 int
 dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
     const void *tag, dmu_buf_t **dbp)
@@ -1653,6 +1654,12 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
 {
 	dmu_sync_arg_t *dsa;
 	dmu_tx_t *tx;
+	int error;
+
+	error = dbuf_read((dmu_buf_impl_t *)zgd->zgd_db, NULL,
+	    DB_RF_CANFAIL | DB_RF_NOPREFETCH);
+	if (error != 0)
+		return (error);
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_space(tx, zgd->zgd_db->db_size);
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 54ea43363b..07c177f3bc 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -917,8 +917,8 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
 		}
 #endif
 		if (error == 0)
-			error = dmu_buf_hold(os, object, offset, zgd, &db,
-			    DMU_READ_NO_PREFETCH);
+			error = dmu_buf_hold_noread(os, object, offset, zgd,
+			    &db);
 
 		if (error == 0) {
 			blkptr_t *bp = &lr->lr_blkptr;
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index cd4e6f0c75..f44d1b7b55 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -727,8 +727,8 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
 		offset = P2ALIGN_TYPED(offset, size, uint64_t);
 		zgd->zgd_lr = zfs_rangelock_enter(&zv->zv_rangelock, offset,
 		    size, RL_READER);
-		error = dmu_buf_hold_by_dnode(zv->zv_dn, offset, zgd, &db,
-		    DMU_READ_NO_PREFETCH);
+		error = dmu_buf_hold_noread_by_dnode(zv->zv_dn, offset, zgd,
+		    &db);
 		if (error == 0) {
 			blkptr_t *bp = &lr->lr_blkptr;