diff --git a/include/os/freebsd/spl/sys/sdt.h b/include/os/freebsd/spl/sys/sdt.h
index 2daa6de1af..6f45e036bc 100644
--- a/include/os/freebsd/spl/sys/sdt.h
+++ b/include/os/freebsd/spl/sys/sdt.h
@@ -37,7 +37,7 @@ SDT_PROBE_DECLARE(sdt, , , set__error);
 #define	SET_ERROR(err) \
 	((sdt_sdt___set__error->id ? \
 	(*sdt_probe_func)(sdt_sdt___set__error->id, \
-	    (uintptr_t)err, 0, 0, 0, 0) : 0), err)
+	    (uintptr_t)err, 0, 0, 0, 0, 0) : 0), err)
 #else
 #define	SET_ERROR(err) (err)
 #endif
diff --git a/include/os/freebsd/spl/sys/systm.h b/include/os/freebsd/spl/sys/systm.h
index 98ee955752..f17d820e7a 100644
--- a/include/os/freebsd/spl/sys/systm.h
+++ b/include/os/freebsd/spl/sys/systm.h
@@ -39,5 +39,6 @@
 #define	PAGEMASK	(~PAGEOFFSET)
 
 #define	delay(x)	pause("soldelay", (x))
+#define	delay_sig(x)	(pause_sig("soldelay", (x)) != EAGAIN)
 
 #endif	/* _OPENSOLARIS_SYS_SYSTM_H_ */
diff --git a/include/os/linux/spl/sys/timer.h b/include/os/linux/spl/sys/timer.h
index 02c3c78934..abb9ef04fe 100644
--- a/include/os/linux/spl/sys/timer.h
+++ b/include/os/linux/spl/sys/timer.h
@@ -51,6 +51,7 @@
 #define	ddi_time_after_eq64(a, b)	ddi_time_before_eq64(b, a)
 
 #define	delay(ticks)			schedule_timeout_uninterruptible(ticks)
+#define	delay_sig(ticks)		(schedule_timeout_interruptible(ticks) > 0)
 
 #define	SEC_TO_TICK(sec)		((sec) * HZ)
 #define	MSEC_TO_TICK(ms)		msecs_to_jiffies(ms)
diff --git a/include/sys/vfs_ratelimit.h b/include/sys/vfs_ratelimit.h
index c54821aa21..8b92476c83 100644
--- a/include/sys/vfs_ratelimit.h
+++ b/include/sys/vfs_ratelimit.h
@@ -55,10 +55,15 @@ void vfs_ratelimit_free(struct vfs_ratelimit *rl);
 struct vfs_ratelimit *vfs_ratelimit_set(struct vfs_ratelimit *rl,
     zfs_prop_t prop, uint64_t limit);
 
-void vfs_ratelimit_data_read(objset_t *os, size_t blocksize, size_t bytes);
-void vfs_ratelimit_data_write(objset_t *os, size_t blocksize, size_t bytes);
-void vfs_ratelimit_metadata_read(objset_t *os);
-void vfs_ratelimit_metadata_write(objset_t *os);
+int vfs_ratelimit_data_read(objset_t *os, size_t blocksize, size_t bytes);
+int vfs_ratelimit_data_write(objset_t *os, size_t blocksize, size_t bytes);
+int vfs_ratelimit_data_copy(objset_t *srcos, objset_t *dstos, size_t blocksize,
+    size_t bytes);
+int vfs_ratelimit_metadata_read(objset_t *os);
+int vfs_ratelimit_metadata_write(objset_t *os);
+
+void vfs_ratelimit_data_read_spin(objset_t *os, size_t blocksize, size_t bytes);
+void vfs_ratelimit_data_write_spin(objset_t *os, size_t blocksize, size_t bytes);
 
 #ifdef	__cplusplus
 }
diff --git a/man/man7/zfsprops.7 b/man/man7/zfsprops.7
index a97c69d121..185cbff3d6 100644
--- a/man/man7/zfsprops.7
+++ b/man/man7/zfsprops.7
@@ -1189,7 +1189,7 @@ This property may be changed with
 .It Sy limit_bw_total Ns = Ns Ar size Ns | Ns Sy none
 Limits the read, write, or combined bandwidth, respectively, that a dataset and
 its descendants can consume.
-Limits are applied to both file systems and ZFS volumes.
+Limits are applied to file systems, volumes and their snapshots.
 Bandwidth limits are in bytes per second.
 .Pp
 The configured limits are hierarchical, just like quotas; i.e., even if a
diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c
index 62f6c87eca..d39ef04b48 100644
--- a/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -1156,7 +1156,11 @@ zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
 		goto out;
 	}
 
-	vfs_ratelimit_metadata_write(os);
+	error = vfs_ratelimit_metadata_write(os);
+	if (error != 0) {
+		zfs_acl_ids_free(&acl_ids);
+		goto out;
+	}
 
 	getnewvnode_reserve_();
 
@@ -1291,7 +1295,10 @@ zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
 		ASSERT0(error);
 	}
 
-	vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	error = vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	if (error != 0) {
+		goto out;
+	}
 
 	/*
 	 * We may delete the znode now, or we may put it in the unlinked set;
@@ -1321,8 +1328,7 @@ zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
-		zfs_exit(zfsvfs, FTAG);
-		return (error);
+		goto out;
 	}
 
 	/*
@@ -1520,7 +1526,12 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
 		return (SET_ERROR(EDQUOT));
 	}
 
-	vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	error = vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	if (error != 0) {
+		zfs_acl_ids_free(&acl_ids);
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	/*
 	 * Add a new entry to the directory.
@@ -1643,6 +1654,11 @@ zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
 		goto out;
 	}
 
+	error = vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	if (error != 0) {
+		goto out;
+	}
+
 	vnevent_rmdir(vp, dvp, name, ct);
 
 	vfs_ratelimit_metadata_write(zfsvfs->z_os);
@@ -1657,8 +1673,7 @@ zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
-		zfs_exit(zfsvfs, FTAG);
-		return (error);
+		goto out;
 	}
 
 	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
@@ -1783,6 +1798,21 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 	offset = zfs_uio_offset(uio);
 	prefetch = zp->z_zn_prefetch;
 
+	/*
+	 * Calling vfs_ratelimit_data_read() for each directory entry would be
+	 * way too expensive. We don't want to do that so we do the following
+	 * instead:
+	 * We charge here only for a single block. If there is a lot of traffic
+	 * we are going to wait before any reading is issued. Once we read all
+	 * directory entries we will charge the process for the rest, as this is
+	 * when we will know how much data exactly was read.
+	 */
+	error = vfs_ratelimit_data_read(os, zp->z_blksz, zp->z_blksz);
+	if (error != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
+
 	/*
 	 * Initialize the iterator cursor.
 	 */
@@ -1940,12 +1970,16 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 		*ncookies -= ncooks;
 
 	/*
-	 * This is post factum, but if we would do that inside the loop we
-	 * wouldn't know the record length before reading it anyway plus we
-	 * would be calling vfs_ratelimit_data_read() way too often and each
-	 * call accounts for a single operation.
+	 * Charge the process for the rest, if more than a single block was
+	 * read.
 	 */
-	vfs_ratelimit_data_read(os, zp->z_blksz, outcount);
+	if (error == 0 && outcount > zp->z_blksz) {
+		error = vfs_ratelimit_data_read(os, zp->z_blksz,
+		    outcount - zp->z_blksz);
+		if (error != 0) {
+			goto update;
+		}
+	}
 
 	if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) {
 		iovp->iov_base += outcount;
@@ -2039,7 +2073,11 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
 		}
 	}
 
-	vfs_ratelimit_metadata_read(zfsvfs->z_os);
+	error = vfs_ratelimit_metadata_read(zfsvfs->z_os);
+	if (error != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	/*
 	 * Return all attributes.  It's cheaper to provide the answer
@@ -2637,7 +2675,10 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
 		}
 	}
 
-	vfs_ratelimit_metadata_write(os);
+	err = vfs_ratelimit_metadata_write(os);
+	if (err != 0) {
+		goto out2;
+	}
 
 	tx = dmu_tx_create(os);
 
@@ -3375,6 +3416,11 @@ zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
 		}
 	}
 
+	error = vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	if (error != 0) {
+		goto out;
+	}
+
 	vn_seqc_write_begin(*svpp);
 	vn_seqc_write_begin(sdvp);
 	if (*tvpp != NULL)
@@ -3586,14 +3632,18 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
 		return (error);
 	}
 
-	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids,
-	    0 /* projid */)) {
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, 0 /* projid */)) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EDQUOT));
 	}
 
-	vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	error = vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	if (error != 0) {
+		zfs_acl_ids_free(&acl_ids);
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	getnewvnode_reserve_();
 	tx = dmu_tx_create(zfsvfs->z_os);
@@ -3692,7 +3742,11 @@ zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct)
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
-	vfs_ratelimit_metadata_read(zfsvfs->z_os);
+	error = vfs_ratelimit_metadata_read(zfsvfs->z_os);
+	if (error != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	if (zp->z_is_sa)
 		error = sa_lookup_uio(zp->z_sa_hdl,
@@ -3822,7 +3876,11 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
 		return (error);
 	}
 
-	vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	error = vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	if (error != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
@@ -3839,8 +3897,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
 	error = zfs_link_create(tdzp, name, szp, tx, 0);
 
 	if (error == 0) {
-		uint64_t txtype = TX_LINK;
-		zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
+		zfs_log_link(zilog, tx, TX_LINK, tdzp, szp, name);
 	}
 
 	dmu_tx_commit(tx);
@@ -4153,7 +4210,7 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
 		pgsin_a = MIN(*rahead, pgsin_a);
 	}
 
-	vfs_ratelimit_data_read(zfsvfs->z_os, zp->z_blksz,
+	error = vfs_ratelimit_data_read(zfsvfs->z_os, zp->z_blksz,
 	    MIN(end, obj_size) - start);
 
 	/*
@@ -4162,8 +4219,10 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
 	 * ZFS will panic if we request DMU to read beyond the end of the last
 	 * allocated block.
 	 */
-	error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b,
-	    &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE));
+	if (error == 0) {
+		error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count,
+		    &pgsin_b, &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE));
+	}
 
 	if (lr != NULL)
 		zfs_rangelock_exit(lr);
@@ -4292,7 +4351,9 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
 		goto out;
 	}
 
-	vfs_ratelimit_data_write(zfsvfs->z_os, zp->z_blksz, len);
+	if (vfs_ratelimit_data_write(zfsvfs->z_os, zp->z_blksz, len) != 0) {
+		goto out;
+	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_write(tx, zp->z_id, off, len);
diff --git a/module/os/freebsd/zfs/zvol_os.c b/module/os/freebsd/zfs/zvol_os.c
index 4f8278feb1..3df8848172 100644
--- a/module/os/freebsd/zfs/zvol_os.c
+++ b/module/os/freebsd/zfs/zvol_os.c
@@ -730,7 +730,9 @@ zvol_geom_bio_strategy(struct bio *bp)
 
 	if (bp->bio_cmd == BIO_DELETE) {
 		/* Should we account only for a single metadata write? */
-		vfs_ratelimit_metadata_write(zv->zv_objset);
+		error = vfs_ratelimit_metadata_write(zv->zv_objset);
+		if (error != 0)
+			goto unlock;
 		dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error != 0) {
@@ -747,29 +749,29 @@ zvol_geom_bio_strategy(struct bio *bp)
 	while (resid != 0 && off < volsize) {
 		size_t size = MIN(resid, zvol_maxphys);
 		if (doread) {
-			vfs_ratelimit_data_read(zv->zv_objset,
+			error = vfs_ratelimit_data_read(zv->zv_objset,
 			    zv->zv_volblocksize, size);
+			if (error != 0)
+				break;
 			error = dmu_read(os, ZVOL_OBJ, off, size, addr,
 			    DMU_READ_PREFETCH);
+			if (error != 0)
+				break;
 		} else {
-			vfs_ratelimit_data_write(zv->zv_objset,
+			error = vfs_ratelimit_data_write(zv->zv_objset,
 			    zv->zv_volblocksize, size);
+			if (error != 0)
+				break;
 			dmu_tx_t *tx = dmu_tx_create(os);
 			dmu_tx_hold_write_by_dnode(tx, zv->zv_dn, off, size);
 			error = dmu_tx_assign(tx, TXG_WAIT);
-			if (error) {
+			if (error != 0) {
 				dmu_tx_abort(tx);
-			} else {
-				dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
-				zvol_log_write(zv, tx, off, size, commit);
-				dmu_tx_commit(tx);
+				break;
 			}
-		}
-		if (error) {
-			/* Convert checksum errors into IO errors. */
-			if (error == ECKSUM)
-				error = SET_ERROR(EIO);
-			break;
+			dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
+			zvol_log_write(zv, tx, off, size, commit);
+			dmu_tx_commit(tx);
 		}
 		off += size;
 		addr += size;
@@ -779,7 +781,12 @@ unlock:
 	zfs_rangelock_exit(lr);
 
 	bp->bio_completed = bp->bio_length - resid;
-	if (bp->bio_completed < bp->bio_length && off > volsize)
+	if (error == EINTR && bp->bio_completed > 0)
+		error = 0;
+	/* Convert checksum errors into IO errors. */
+	else if (error == ECKSUM)
+		error = SET_ERROR(EIO);
+	if (error == 0 && bp->bio_completed < bp->bio_length && off > volsize)
 		error = SET_ERROR(EINVAL);
 
 	switch (bp->bio_cmd) {
diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
index 4f3d3eea1b..31acb89bc5 100644
--- a/module/os/linux/zfs/zfs_vnops_os.c
+++ b/module/os/linux/zfs/zfs_vnops_os.c
@@ -683,7 +683,11 @@ top:
 			goto out;
 		}
 
-		vfs_ratelimit_metadata_write(os);
+		error = vfs_ratelimit_metadata_write(os);
+		if (error != 0) {
+			zfs_acl_ids_free(&acl_ids);
+			goto out;
+		}
 
 		tx = dmu_tx_create(os);
 
@@ -879,7 +883,11 @@ top:
 		goto out;
 	}
 
-	vfs_ratelimit_metadata_write(os);
+	error = vfs_ratelimit_metadata_write(os);
+	if (error != 0) {
+		zfs_acl_ids_free(&acl_ids);
+		goto out;
+	}
 
 	tx = dmu_tx_create(os);
 
@@ -1012,6 +1020,11 @@ top:
 		goto out;
 	}
 
+	error = vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	if (error != 0) {
+		goto out;
+	}
+
 	mutex_enter(&zp->z_lock);
 	may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 &&
 	    !zn_has_cached_data(zp, 0, LLONG_MAX);
@@ -1290,7 +1303,13 @@ top:
 		return (SET_ERROR(EDQUOT));
 	}
 
-	vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	error = vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	if (error != 0) {
+		zfs_acl_ids_free(&acl_ids);
+		zfs_dirent_unlock(dl);
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	/*
 	 * Add a new entry to the directory.
@@ -1434,7 +1453,10 @@ top:
 		goto out;
 	}
 
-	vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	error = vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	if (error != 0) {
+		goto out;
+	}
 
 	/*
 	 * Grab a lock on the directory to make sure that no one is
@@ -1535,6 +1557,7 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
 	int		done = 0;
 	uint64_t	parent;
 	uint64_t	offset; /* must be unsigned; checks for < 1 */
+	size_t		nbytes;
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
@@ -1553,6 +1576,21 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
 	os = zfsvfs->z_os;
 	offset = ctx->pos;
 	prefetch = zp->z_zn_prefetch;
+	nbytes = 0;
+
+	/*
+	 * Calling vfs_ratelimit_data_read() for each directory entry would be
+	 * way too expensive. We don't want to do that so we do the following
+	 * instead:
+	 * We charge here only for a single block. If there is a lot of traffic
+	 * we are going to wait before any reading is issued. Once we read all
+	 * directory entries we will charge the process for the rest, as this is
+	 * when we will know how much data exactly was read.
+	 */
+	error = vfs_ratelimit_data_read(os, zp->z_blksz, zp->z_blksz);
+	if (error != 0) {
+		goto out;
+	}
 
 	/*
 	 * Initialize the iterator cursor.
@@ -1645,18 +1683,21 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
 			offset += 1;
 		}
 		ctx->pos = offset;
+		/*
+		 * TODO: We should be adding size of dirent structure here too.
+		 */
+		nbytes += strlen(zap.za_name);
 	}
 	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
 
-#ifdef TODO
 	/*
-	 * This is post factum, but if we would do that inside the loop we
-	 * wouldn't know the record length before reading it anyway plus we
-	 * would be calling vfs_ratelimit_data_read() way too often and each
-	 * call accounts for a single operation.
+	 * Charge the process for the rest, if more than a single block was
+	 * read.
 	 */
-	vfs_ratelimit_data_read(os, zp->z_blksz, size /* ??? */);
-#endif
+	if (error == 0 && nbytes > zp->z_blksz) {
+		error = vfs_ratelimit_data_read(os, zp->z_blksz,
+		    nbytes - zp->z_blksz);
+	}
 
 update:
 	zap_cursor_fini(&zc);
@@ -1697,7 +1738,11 @@ zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
-	vfs_ratelimit_metadata_read(zfsvfs->z_os);
+	error = vfs_ratelimit_metadata_read(zfsvfs->z_os);
+	if (error != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	mutex_enter(&zp->z_lock);
 
@@ -2298,7 +2343,10 @@ top:
 		}
 	}
 
-	vfs_ratelimit_metadata_write(os);
+	err = vfs_ratelimit_metadata_write(os);
+	if (err != 0) {
+		goto out2;
+	}
 
 	tx = dmu_tx_create(os);
 
@@ -3012,7 +3060,10 @@ top:
 		}
 	}
 
-	vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	error = vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	if (error != 0) {
+		goto out;
+	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
@@ -3328,7 +3379,13 @@ top:
 		return (SET_ERROR(EDQUOT));
 	}
 
-	vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	error = vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	if (error != 0) {
+		zfs_acl_ids_free(&acl_ids);
+		zfs_dirent_unlock(dl);
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
@@ -3438,7 +3495,11 @@ zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr)
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
-	vfs_ratelimit_metadata_read(zfsvfs->z_os);
+	error = vfs_ratelimit_metadata_read(zfsvfs->z_os);
+	if (error != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	mutex_enter(&zp->z_lock);
 	if (zp->z_is_sa)
@@ -3577,7 +3638,11 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 		return (error);
 	}
 
-	vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	error = vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	if (error != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 top:
 	/*
@@ -3820,6 +3885,13 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 		return (0);
 	}
 
+	if (vfs_ratelimit_data_write(zfsvfs->z_os, zp->z_blksz, pglen) != 0) {
+		unlock_page(pp);
+		zfs_rangelock_exit(lr);
+		zfs_exit(zfsvfs, FTAG);
+		return (0);
+	}
+
 	/*
 	 * Counterpart for redirty_page_for_writepage() above.  This page
 	 * was in fact not skipped and should not be counted as if it were.
@@ -3947,7 +4019,10 @@ zfs_dirty_inode(struct inode *ip, int flags)
 	}
 #endif
 
-	vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	error = vfs_ratelimit_metadata_write(zfsvfs->z_os);
+	if (error != 0) {
+		goto out;
+	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 
@@ -3994,7 +4069,6 @@ zfs_inactive(struct inode *ip)
 	znode_t	*zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	uint64_t atime[2];
-	int error;
 	int need_unlock = 0;
 
 	/* Only read lock if we haven't already write locked, e.g. rollback */
@@ -4009,28 +4083,30 @@ zfs_inactive(struct inode *ip)
 	}
 
 	if (zp->z_atime_dirty && zp->z_unlinked == B_FALSE) {
-		vfs_ratelimit_metadata_write(zfsvfs->z_os);
+		if (vfs_ratelimit_metadata_write(zfsvfs->z_os) != 0) {
+			goto out;
+		}
 
 		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
 
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 		zfs_sa_upgrade_txholds(tx, zp);
-		error = dmu_tx_assign(tx, TXG_WAIT);
-		if (error) {
+		if (dmu_tx_assign(tx, TXG_WAIT) != 0) {
 			dmu_tx_abort(tx);
-		} else {
-			inode_timespec_t tmp_atime;
-			tmp_atime = zpl_inode_get_atime(ip);
-			ZFS_TIME_ENCODE(&tmp_atime, atime);
-			mutex_enter(&zp->z_lock);
-			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
-			    (void *)&atime, sizeof (atime), tx);
-			zp->z_atime_dirty = B_FALSE;
-			mutex_exit(&zp->z_lock);
-			dmu_tx_commit(tx);
+			goto out;
 		}
-	}
 
+		inode_timespec_t tmp_atime;
+		tmp_atime = zpl_inode_get_atime(ip);
+		ZFS_TIME_ENCODE(&tmp_atime, atime);
+		mutex_enter(&zp->z_lock);
+		(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
+		    (void *)&atime, sizeof (atime), tx);
+		zp->z_atime_dirty = B_FALSE;
+		mutex_exit(&zp->z_lock);
+		dmu_tx_commit(tx);
+	}
+out:
 	zfs_zinactive(zp);
 	if (need_unlock)
 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
@@ -4046,6 +4122,7 @@ zfs_fillpage(struct inode *ip, struct page *pp)
 	loff_t i_size = i_size_read(ip);
 	u_offset_t io_off = page_offset(pp);
 	size_t io_len = PAGE_SIZE;
+	int error;
 
 	ASSERT3U(io_off, <, i_size);
 
@@ -4055,12 +4132,12 @@ zfs_fillpage(struct inode *ip, struct page *pp)
 	vfs_ratelimit_data_read(zfsvfs->z_os, PAGESIZE, io_len);
 
 	void *va = kmap(pp);
-	int error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off,
+	error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off,
 	    io_len, va, DMU_READ_PREFETCH);
 	if (io_len != PAGE_SIZE)
 		memset((char *)va + io_len, 0, PAGE_SIZE - io_len);
 	kunmap(pp);
-
+out:
 	if (error) {
 		/* convert checksum errors into IO errors */
 		if (error == ECKSUM)
@@ -4097,7 +4174,9 @@ zfs_getpage(struct inode *ip, struct page *pp)
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
-	error = zfs_fillpage(ip, pp);
+	error = vfs_ratelimit_data_read(zfsvfs->z_os, 0, PAGE_SIZE);
+	if (error == 0)
+		error = zfs_fillpage(ip, pp);
 	if (error == 0)
 		dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE);
 
diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
index 5a19f3e579..b65064e3cc 100644
--- a/module/os/linux/zfs/zvol_os.c
+++ b/module/os/linux/zfs/zvol_os.c
@@ -297,8 +297,14 @@ zvol_write(zv_request_t *zvr)
 		if (bytes > volsize - off)	/* don't write past the end */
 			bytes = volsize - off;
 
-		vfs_ratelimit_data_write(zv->zv_objset, zv->zv_volblocksize,
-		    bytes);
+		error = vfs_ratelimit_data_write(zv->zv_objset,
+		    zv->zv_volblocksize, bytes);
+		if (error != 0) {
+			/* XXX-PJD Is it safe to reset the error? */
+			if (error == EINTR && uio.uio_resid < start_resid)
+				error = 0;
+			break;
+		}
 
 		dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
 
@@ -400,7 +406,11 @@ zvol_discard(zv_request_t *zvr)
 	    start, size, RL_WRITER);
 
 	/* Should we account only for a single metadata write? */
-	vfs_ratelimit_metadata_write(zv->zv_objset);
+	error = vfs_ratelimit_metadata_write(zv->zv_objset);
+	if (error != 0) {
+		zfs_rangelock_exit(lr);
+		goto unlock;
+	}
 
 	tx = dmu_tx_create(zv->zv_objset);
 	dmu_tx_mark_netfree(tx);
@@ -483,8 +493,14 @@ zvol_read(zv_request_t *zvr)
 		if (bytes > volsize - uio.uio_loffset)
 			bytes = volsize - uio.uio_loffset;
 
-		vfs_ratelimit_data_read(zv->zv_objset, zv->zv_volblocksize,
-		    bytes);
+		error = vfs_ratelimit_data_read(zv->zv_objset,
+		    zv->zv_volblocksize, bytes);
+		if (error != 0) {
+			/* XXX-PJD Is it safe to reset the error? */
+			if (error == EINTR && uio.uio_resid < start_resid)
+				error = 0;
+			break;
+		}
 
 		error = dmu_read_uio_dnode(zv->zv_dn, &uio, bytes);
 		if (error) {
diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c
index 957d7c7c07..5a756b1049 100644
--- a/module/zfs/dmu_recv.c
+++ b/module/zfs/dmu_recv.c
@@ -2205,7 +2205,11 @@ flush_write_batch_impl(struct receive_writer_arg *rwa)
 
 		ASSERT3U(drrw->drr_object, ==, rwa->last_object);
 
-		vfs_ratelimit_data_write(rwa->os, drrw->drr_logical_size,
+		/*
+		 * vfs_ratelimit_data_write_spin() will sleep in short periods
+		 * and return immediately when a signal is pending.
+		 */
+		vfs_ratelimit_data_write_spin(rwa->os, 0,
 		    drrw->drr_logical_size);
 
 		if (drrw->drr_logical_size != dn->dn_datablksz) {
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index 6a345b7dc6..92c40d25e7 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -1631,6 +1631,7 @@ issue_data_read(struct send_reader_thread_arg *srta, struct send_range *range)
 	struct srd *srdp = &range->sru.data;
 	blkptr_t *bp = &srdp->bp;
 	objset_t *os = srta->smta->os;
+	int error;
 
 	ASSERT3U(range->type, ==, DATA);
 	ASSERT3U(range->start_blkid + 1, ==, range->end_blkid);
@@ -1685,11 +1686,15 @@ issue_data_read(struct send_reader_thread_arg *srta, struct send_range *range)
 	    .zb_blkid = range->start_blkid,
 	};
 
-	vfs_ratelimit_data_read(os, BP_GET_LSIZE(bp), BP_GET_LSIZE(bp));
+	/*
+	 * vfs_ratelimit_data_read_spin() will sleep in short periods and return
+	 * immediately when a signal is pending.
+	 */
+	vfs_ratelimit_data_read_spin(os, 0, BP_GET_LSIZE(bp));
 
 	arc_flags_t aflags = ARC_FLAG_CACHED_ONLY;
 
-	int arc_err = arc_read(NULL, os->os_spa, bp,
+	error = arc_read(NULL, os->os_spa, bp,
 	    arc_getbuf_func, &srdp->abuf, ZIO_PRIORITY_ASYNC_READ,
 	    zioflags, &aflags, &zb);
 	/*
@@ -1698,7 +1703,7 @@ issue_data_read(struct send_reader_thread_arg *srta, struct send_range *range)
 	 * entry to the ARC, and we also avoid polluting the ARC cache with
 	 * data that is not likely to be used in the future.
 	 */
-	if (arc_err != 0) {
+	if (error != 0) {
 		srdp->abd = abd_alloc_linear(srdp->datasz, B_FALSE);
 		srdp->io_outstanding = B_TRUE;
 		zio_nowait(zio_read(NULL, os->os_spa, bp, srdp->abd,
@@ -2555,8 +2560,9 @@ dmu_send_impl(struct dmu_send_params *dspp)
 	while (err == 0 && !range->eos_marker) {
 		err = do_dump(&dsc, range);
 		range = get_next_range(&srt_arg->q, range);
-		if (issig())
+		if (issig()) {
 			err = SET_ERROR(EINTR);
+		}
 	}
 
 	/*
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index 4f7252e2c3..a5d66ac18c 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -371,6 +371,10 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
 			dd->dd_snap_cmtime = t;
 		}
 
+		if (dd->dd_myname[0] != '$') {
+			dsl_dir_ratelimit_read(dd);
+		}
+
 		dmu_buf_init_user(&dd->dd_dbu, NULL, dsl_dir_evict_async,
 		    &dd->dd_dbuf);
 		winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu);
@@ -380,6 +384,7 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
 			if (dsl_deadlist_is_open(&dd->dd_livelist))
 				dsl_dir_livelist_close(dd);
 			dsl_prop_fini(dd);
+			vfs_ratelimit_free(dd->dd_ratelimit);
 			cv_destroy(&dd->dd_activity_cv);
 			mutex_destroy(&dd->dd_activity_lock);
 			mutex_destroy(&dd->dd_lock);
@@ -2036,7 +2041,6 @@ dsl_dir_ratelimit_recurse(dsl_dir_t *dd)
 		ASSERT(child_dd->dd_ratelimit == NULL);
 		child_dd->dd_ratelimit_root = dd->dd_ratelimit_root;
 
-
 		dsl_dir_ratelimit_recurse(child_dd);
 
 		dsl_dir_rele(child_dd, FTAG);
@@ -2320,7 +2324,7 @@ dsl_dir_ratelimit_rename(dsl_dir_t *dd, dsl_dir_t *newparent)
 
 	if (dd->dd_ratelimit_root != dd) {
 		ASSERT(dd->dd_ratelimit == NULL);
-		dd->dd_ratelimit_root = newparent;
+		dd->dd_ratelimit_root = newparent->dd_ratelimit_root;
 
 		dsl_dir_ratelimit_recurse(dd);
 	}
diff --git a/module/zfs/vfs_ratelimit.c b/module/zfs/vfs_ratelimit.c
index 4e6493b136..18f3b09d07 100644
--- a/module/zfs/vfs_ratelimit.c
+++ b/module/zfs/vfs_ratelimit.c
@@ -54,7 +54,7 @@
  * - It would be hard to predict what limits should be configured as there are a
  *   lot of factors that dictate how much disk bandwidth is really required
  *   (due to RAIDZ inflation, compression, gang blocks, deduplication,
- *    NOP writes, I/O aggregation, metadata traffic, etc.).
+ *    block cloning, NOP writes, I/O aggregation, metadata traffic, etc.).
  * By enforcing the limits at the VFS level for file system operations it should
  * be easy to find out what limits applications require and verify that the
  * limits are correctly enforced by monitoring system calls issued by the
@@ -76,20 +76,20 @@
  * We walk down the dataset tree and set dd_ratelimit_root field to point to
  * this dsl_dir until we find dsl_dir that also has the vfs_ratelimit structure
  * already attached to it (which means it has its own limits configured).
- * During the accounting it allows us for quick access to the ratelimit
+ * During the accounting it allows us to quickly access the ratelimit
  * structure we need by just going to ds_dir->dd_ratelimit_root;
- * If ratelimits are not configured on this dataset or any of its parents,
+ * If ratelimits are not configured on this dataset and all of its ancestors,
  * the ds_dir->dd_ratelimit_root will be set to NULL, so we know we don't
  * have to do any accounting.
  *
  * The limits are configured per second, but we divde the second and the limits
- * into RATELIMIT_RESOLUTION slots (10 by default). This is to avoid a choking
+ * into RATELIMIT_RESOLUTION slots (16 by default). This is to avoid a choking
  * effect, when process is doing progress in 1s steps. For example if we have
  * read bandwidth limits configured to 100MB/s and the process is trying to
  * read 130MB, it will take 1.3 seconds, not 2 seconds.
- * Not that very low limits may be rounded up - 7 ops/s limit will be rounded
- * up to 10 ops/s, so each slot is assigned 1 op/s limit. This rounding up
- * is done in the kernel and isn't shown in the properties value.
+ * Note that very low limits may be rounded up - 7 ops/s limit will be rounded
+ * up to 16 ops/s, so each time slot is assigned 1 op/s limit. This rounding up
+ * is done in the kernel and isn't shown in the properties.
  *
  * How does the accounting work?
  *
@@ -99,34 +99,31 @@
  * and two operations total. Not all of those limits have to be configured or
  * some might be configured on a dataset and others on a parent dataset(s).
  *
- * We remember those values in the rtslot structures at every level we have
- * limits configured on. The rtslot strucuture also remembers the time of
- * the request. For each ratelimit type (read bandwidth, total, operation read,
- * operation total) and for each dataset with the limits configured when we walk
- * the dataset tree up we find the point in time until which we have to wait to
- * satisfy configured limit. We select the furthest point in time and we do to
- * sleep. If the request doesn't exceed any limits, we just do the accounting
- * and allow for the request to be executed immediately.
+ * For each type we use two fields to track the wait times: rl_timeslot and
+ * rl_reminder. rl_timeslot holds the point in time up to which the last
+ * processes is waiting for. If the rl_timeslot is lower than the current time,
+ * it means that no processes are waiting. rl_reminder is the amount of data
+ * modulo the limit. For example if we have a read bandwidth limit of 64MB/s,
+ * so it is 4MB per 1/16s. The process is trying to read 11MB. This would
+ * give us rl_timeslot = now + 2 (we account for 2 full time slots of 1/16s)
+ * and rl_reminder = 3MB. This process has to sleep for 2/16s. When immediately
+ * another process is trying to read 1MB, this 1MB will be added to the current
+ * rl_reminder giving 4MB, so full limit unit for 1/16s. Now rl_timeslot will
+ * be set to now + 3 and rl_reminder to 0. The last process is going to sleep
+ * for 3/16s.
  */
 
 /*
  * Number of slots we divide one second into. More granularity is better for
- * interactivity, but it takes more memory and more calculations.
+ * interactivity, but for small limits we may lose some precision.
  */
 #define	RATELIMIT_RESOLUTION	16
 
 struct vfs_ratelimit {
 	kmutex_t	rl_lock;
 	uint64_t	rl_limits[ZFS_RATELIMIT_NTYPES];
-	/* List of current waiters and past activity. */
-	list_t		rl_list;
-};
-
-struct rtslot {
-	list_node_t	rts_node;
-	hrtime_t	rts_timeslot;
-	int		rts_types;
-	uint64_t	rts_counts[ZFS_RATELIMIT_NTYPES];
+	uint64_t	rl_timeslot[ZFS_RATELIMIT_NTYPES];
+	uint64_t	rl_reminder[ZFS_RATELIMIT_NTYPES];
 };
 
 int
@@ -197,13 +194,6 @@ vfs_ratelimit_alloc(const uint64_t *limits)
 	rl = kmem_zalloc(sizeof (*rl), KM_SLEEP);
 
 	mutex_init(&rl->rl_lock, NULL, MUTEX_DEFAULT, NULL);
-	list_create(&rl->rl_list, sizeof (struct rtslot),
-	    offsetof(struct rtslot, rts_node));
-	/* Create two slots for a good start. */
-	for (i = 0; i < 2; i++) {
-		list_insert_tail(&rl->rl_list,
-		    kmem_zalloc(sizeof (struct rtslot), KM_SLEEP));
-	}
 
 	if (limits != NULL) {
 		for (i = ZFS_RATELIMIT_FIRST; i < ZFS_RATELIMIT_NTYPES; i++) {
@@ -227,17 +217,11 @@ vfs_ratelimit_alloc(const uint64_t *limits)
 void
 vfs_ratelimit_free(struct vfs_ratelimit *rl)
 {
-	struct rtslot *slot;
 
 	if (rl == NULL) {
 		return;
 	}
 
-	while ((slot = list_remove_head(&rl->rl_list)) != NULL) {
-		kmem_free(slot, sizeof (*slot));
-	}
-	list_destroy(&rl->rl_list);
-
 	mutex_destroy(&rl->rl_lock);
 
 	kmem_free(rl, sizeof (*rl));
@@ -278,28 +262,24 @@ static __inline hrtime_t
 gettimeslot(void)
 {
 	inode_timespec_t ts;
-	hrtime_t nsec;
 
 	gethrestime(&ts);
-	nsec = ((hrtime_t)ts.tv_sec * NANOSEC) + ts.tv_nsec;
-	return (nsec / (NANOSEC / RATELIMIT_RESOLUTION));
+
+	return (((hrtime_t)ts.tv_sec * RATELIMIT_RESOLUTION) +
+	    ts.tv_nsec / (NANOSEC / RATELIMIT_RESOLUTION));
 }
 
 /*
  * Returns bit mask of the types configured for the given ratelimit structure.
  */
 static int
-ratelimit_types(const struct vfs_ratelimit *rl)
+ratelimit_types(const uint64_t *counts)
 {
 	int types, type;
 
-	if (rl == NULL) {
-		return (0);
-	}
-
 	types = 0;
 	for (type = ZFS_RATELIMIT_FIRST; type <= ZFS_RATELIMIT_LAST; type++) {
-		if (rl->rl_limits[type] > 0) {
+		if (counts[type] > 0) {
 			types |= (1 << type);
 		}
 	}
@@ -318,7 +298,6 @@ static dsl_dir_t *
 ratelimit_first(objset_t *os, int types)
 {
 	dsl_dir_t *dd;
-	int mytypes;
 
 	ASSERT(RRM_READ_HELD(&os->os_spa->spa_ratelimit_lock));
 
@@ -327,13 +306,17 @@ ratelimit_first(objset_t *os, int types)
 		if (dd == NULL) {
 			return (NULL);
 		}
-		mytypes = ratelimit_types(dd->dd_ratelimit);
-		if ((mytypes & types) != 0) {
-			/*
-			 * This dataset has at last one limit we are
-			 * interested in.
-			 */
-			return (dd);
+		if (dd->dd_ratelimit != NULL) {
+			int mytypes;
+
+			mytypes = ratelimit_types(dd->dd_ratelimit->rl_limits);
+			if ((mytypes & types) != 0) {
+				/*
+				 * This dataset has at last one limit we are
+				 * interested in.
+				 */
+				return (dd);
+			}
 		}
 		if (dd->dd_parent == NULL) {
 			return (NULL);
@@ -351,8 +334,6 @@ ratelimit_first(objset_t *os, int types)
 static dsl_dir_t *
 ratelimit_parent(dsl_dir_t *dd, int types)
 {
-	int mytypes;
-
 	ASSERT(RRM_READ_HELD(&dd->dd_pool->dp_spa->spa_ratelimit_lock));
 
 	for (;;) {
@@ -363,154 +344,63 @@ ratelimit_parent(dsl_dir_t *dd, int types)
 		if (dd == NULL) {
 			return (NULL);
 		}
-		mytypes = ratelimit_types(dd->dd_ratelimit);
-		if ((mytypes & types) != 0) {
-			/*
-			 * This dataset has at last one limit we are
-			 * interested in.
-			 */
-			return (dd);
+		if (dd->dd_ratelimit != NULL) {
+			int mytypes;
+
+			mytypes = ratelimit_types(dd->dd_ratelimit->rl_limits);
+			if ((mytypes & types) != 0) {
+				/*
+				 * This dataset has at last one limit we are
+				 * interested in.
+				 */
+				return (dd);
+			}
 		}
 	}
 }
 
-/*
- * If we have any entries with 'timeslot > now' we also must have an entry with
- * 'timeslot == now'. In other words if there is no entry with
- * 'timeslot == now', it means that all the entires expired.
- *
- * We return either the most recent entry related to the given type or we return
- * 'timeslot == now' entry not related to the given type and we will use it to
- * store accouting information about this type as well.
- */
-static struct rtslot *
-ratelimit_find(struct vfs_ratelimit *rl, int typebit, hrtime_t now)
-{
-	struct rtslot *slot;
-
-	ASSERT(MUTEX_HELD(&rl->rl_lock));
-
-	for (slot = list_head(&rl->rl_list); slot != NULL;
-	    slot = list_next(&rl->rl_list, slot)) {
-		if (slot->rts_timeslot < now) {
-			break;
-		}
-		if ((slot->rts_types & typebit) != 0 ||
-		    slot->rts_timeslot == now) {
-			return (slot);
-		}
-	}
-	/* All the entries expired. */
-#ifndef NDEBUG
-	for (slot = list_head(&rl->rl_list); slot != NULL;
-	    slot = list_next(&rl->rl_list, slot)) {
-		ASSERT(slot->rts_timeslot < now);
-	}
-#endif
-
-	return (NULL);
-}
-
 /*
  * Account for our request across all the types configured in this ratelimit
  * structure.
  * Return a timeslot we should wait for or now if we can execute the request
  * without waiting (we are within limits).
  */
-static uint64_t
-ratelimit_account(struct vfs_ratelimit *rl, int types, hrtime_t now,
+static hrtime_t
+ratelimit_account(struct vfs_ratelimit *rl, hrtime_t now,
     const uint64_t *counts)
 {
-	uint64_t timeslot;
-	int type, typebit;
+	hrtime_t timeslot;
+	int type;
 
-	timeslot = 0;
+	timeslot = now;
 
 	mutex_enter(&rl->rl_lock);
 
 	for (type = ZFS_RATELIMIT_FIRST; type <= ZFS_RATELIMIT_LAST; type++) {
-		struct rtslot *slot;
-		uint64_t count, nexttimeslot;
+		uint64_t count;
 
-		typebit = (1 << type);
-
-		if ((types & typebit) == 0) {
-			/* Not interested in this type. */
-			continue;
-		}
 		if (rl->rl_limits[type] == 0) {
 			/* This type has no limit configured on this dataset. */
 			continue;
 		}
 		count = counts[type];
-		ASSERT(count > 0);
-
-		slot = ratelimit_find(rl, typebit, now);
-		if (slot == NULL) {
-			slot = list_remove_tail(&rl->rl_list);
-			ASSERT(slot->rts_timeslot < now);
-			slot->rts_types = typebit;
-			slot->rts_timeslot = now;
-			memset(slot->rts_counts, 0, sizeof (slot->rts_counts));
-			list_insert_head(&rl->rl_list, slot);
-		} else if (slot->rts_timeslot == now) {
-			/* The 'now' slot may not have our type yet. */
-			slot->rts_types |= typebit;
-		}
-		ASSERT((slot->rts_types & typebit) != 0);
-		nexttimeslot = slot->rts_timeslot + 1;
-
-		for (;;) {
-			if (slot->rts_counts[type] + count <=
-			    rl->rl_limits[type]) {
-				slot->rts_counts[type] += count;
-				break;
-			}
-
-			/*
-			 * This request is too big to fit into a single slot,
-			 * ie. a single request exceeds the limit or this and
-			 * the previous requests exceed the limit.
-			 */
-
-			/*
-			 * Fit as much as we can into the current slot.
-			 */
-			count -= rl->rl_limits[type] - slot->rts_counts[type];
-			slot->rts_counts[type] = rl->rl_limits[type];
-
-			/*
-			 * Take the next slot (if already exists isn't aware of
-			 * our type yet), take an expired slot from the tail of
-			 * the list or allocate a new slot.
-			 */
-			slot = list_prev(&rl->rl_list, slot);
-			if (slot != NULL) {
-				ASSERT((slot->rts_types & typebit) == 0);
-				ASSERT(slot->rts_timeslot == nexttimeslot);
-				ASSERT0(slot->rts_counts[type]);
-
-				slot->rts_types |= typebit;
-			} else {
-				slot = list_tail(&rl->rl_list);
-				if (slot->rts_timeslot < now) {
-					list_remove(&rl->rl_list, slot);
-				} else {
-					slot = kmem_alloc(sizeof (*slot),
-					    KM_SLEEP);
-				}
-				slot->rts_types = typebit;
-				slot->rts_timeslot = nexttimeslot;
-				memset(slot->rts_counts, 0,
-				    sizeof (slot->rts_counts));
-				list_insert_head(&rl->rl_list, slot);
-			}
-
-			nexttimeslot++;
+		if (count == 0) {
+			/* Not interested in this type. */
+			continue;
 		}
 
-		if (timeslot < slot->rts_timeslot) {
-			timeslot = slot->rts_timeslot;
+		if (rl->rl_timeslot[type] < now) {
+			rl->rl_reminder[type] = 0;
+			rl->rl_timeslot[type] = now;
+		} else {
+			count += rl->rl_reminder[type];
+		}
+
+		rl->rl_timeslot[type] += count / rl->rl_limits[type];
+		rl->rl_reminder[type] = count % rl->rl_limits[type];;
+
+		if (timeslot < rl->rl_timeslot[type]) {
+			timeslot = rl->rl_timeslot[type];
 		}
 	}
 
@@ -519,106 +409,173 @@ ratelimit_account(struct vfs_ratelimit *rl, int types, hrtime_t now,
 	return (timeslot);
 }
 
-static void
-vfs_ratelimit(objset_t *os, int types, const uint64_t *counts)
+static hrtime_t
+ratelimit_account_all(objset_t *os, const uint64_t *counts)
 {
 	dsl_dir_t *dd;
 	hrtime_t now, timeslot;
+	int types;
+
+	ASSERT(RRM_READ_HELD(&os->os_spa->spa_ratelimit_lock));
+
+	types = ratelimit_types(counts);
+	now = timeslot = gettimeslot();
+
+	for (dd = ratelimit_first(os, types); dd != NULL;
+	    dd = ratelimit_parent(dd, types)) {
+		hrtime_t ts;
+
+		ts = ratelimit_account(dd->dd_ratelimit, now, counts);
+		if (ts > timeslot) {
+			timeslot = ts;
+		}
+	}
+
+	return (timeslot);
+}
+
+static int
+ratelimit_sleep(hrtime_t timeslot)
+{
+	hrtime_t now;
+	int error = 0;
 
 	now = gettimeslot();
-	timeslot = 0;
+
+	if (timeslot > now) {
+		/*
+		 * Too much traffic, slow it down.
+		 */
+#ifdef _KERNEL
+		if (delay_sig((hz / RATELIMIT_RESOLUTION) * (timeslot - now))) {
+			error = EINTR;
+		}
+#else
+		delay((hz / RATELIMIT_RESOLUTION) * (timeslot - now));
+#endif
+	}
+
+	return (error);
+}
+
+static int
+vfs_ratelimit_sleep(objset_t *os, const uint64_t *counts)
+{
+	hrtime_t timeslot;
 
 	/*
 	 * Prevents configuration changes when we have requests in-flight.
 	 */
 	rrm_enter_read(&os->os_spa->spa_ratelimit_lock, FTAG);
 
-	for (dd = ratelimit_first(os, types); dd != NULL;
-	    dd = ratelimit_parent(dd, types)) {
-		hrtime_t ts;
-
-		ts = ratelimit_account(dd->dd_ratelimit, types, now, counts);
-		if (ts > timeslot) {
-			timeslot = ts;
-		}
-	}
+	timeslot = ratelimit_account_all(os, counts);
 
 	rrm_exit(&os->os_spa->spa_ratelimit_lock, FTAG);
 
-	if (timeslot > now) {
-		/*
-		 * Too much traffic, slow it down.
-		 */
-		delay((hz / RATELIMIT_RESOLUTION) * (timeslot - now));
-	}
+	return (ratelimit_sleep(timeslot));
 }
 
 /*
  * For every data read we charge:
  * - bytes of read bandwidth
  * - bytes of total bandwidth
- * - (bytes - 1) / blocksize + 1 of read operations
- * - (bytes - 1) / blocksize + 1 of total operations
+ * - (bytes + blocksize - 1) / blocksize of read operations
+ * - (bytes + blocksize - 1) / blocksize of total operations
  */
-void
+int
 vfs_ratelimit_data_read(objset_t *os, size_t blocksize, size_t bytes)
 {
 	uint64_t counts[ZFS_RATELIMIT_NTYPES];
-	unsigned int types;
+	size_t operations;
 
 	if (bytes == 0) {
-		return;
+		return (0);
 	}
 	if (blocksize == 0) {
 		blocksize = bytes;
 	}
-
-	types =  (1 << ZFS_RATELIMIT_BW_READ);
-	types |= (1 << ZFS_RATELIMIT_BW_TOTAL);
-	types |= (1 << ZFS_RATELIMIT_OP_READ);
-	types |= (1 << ZFS_RATELIMIT_OP_TOTAL);
+	operations = (bytes + blocksize - 1) / blocksize;
 
 	memset(counts, 0, sizeof (counts));
 	counts[ZFS_RATELIMIT_BW_READ] = bytes;
 	counts[ZFS_RATELIMIT_BW_TOTAL] = bytes;
-	counts[ZFS_RATELIMIT_OP_READ] = (bytes - 1) / blocksize + 1;
-	counts[ZFS_RATELIMIT_OP_TOTAL] = (bytes - 1) / blocksize + 1;
+	counts[ZFS_RATELIMIT_OP_READ] = operations;
+	counts[ZFS_RATELIMIT_OP_TOTAL] = operations;
 
-	vfs_ratelimit(os, types, counts);
+	return (vfs_ratelimit_sleep(os, counts));
 }
 
 /*
  * For every data write we charge:
  * - bytes of write bandwidth
  * - bytes of total bandwidth
- * - (bytes - 1) / blocksize + 1 of write operations
- * - (bytes - 1) / blocksize + 1 of total operations
+ * - (bytes + blocksize - 1) / blocksize of read operations
+ * - (bytes + blocksize - 1) / blocksize of total operations
  */
-void
+int
 vfs_ratelimit_data_write(objset_t *os, size_t blocksize, size_t bytes)
 {
 	uint64_t counts[ZFS_RATELIMIT_NTYPES];
-	unsigned int types;
+	size_t operations;
 
 	if (bytes == 0) {
-		return;
+		return (0);
 	}
 	if (blocksize == 0) {
 		blocksize = bytes;
 	}
-
-	types =  (1 << ZFS_RATELIMIT_BW_WRITE);
-	types |= (1 << ZFS_RATELIMIT_BW_TOTAL);
-	types |= (1 << ZFS_RATELIMIT_OP_WRITE);
-	types |= (1 << ZFS_RATELIMIT_OP_TOTAL);
+	operations = (bytes + blocksize - 1) / blocksize;
 
 	memset(counts, 0, sizeof (counts));
 	counts[ZFS_RATELIMIT_BW_WRITE] = bytes;
 	counts[ZFS_RATELIMIT_BW_TOTAL] = bytes;
-	counts[ZFS_RATELIMIT_OP_WRITE] = (bytes - 1) / blocksize + 1;
-	counts[ZFS_RATELIMIT_OP_TOTAL] = (bytes - 1) / blocksize + 1;
+	counts[ZFS_RATELIMIT_OP_WRITE] = operations;
+	counts[ZFS_RATELIMIT_OP_TOTAL] = operations;
 
-	vfs_ratelimit(os, types, counts);
+	return (vfs_ratelimit_sleep(os, counts));
+}
+
+int
+vfs_ratelimit_data_copy(objset_t *srcos, objset_t *dstos, size_t blocksize,
+    size_t bytes)
+{
+	uint64_t counts[ZFS_RATELIMIT_NTYPES];
+	size_t operations;
+	hrtime_t dstts, srcts;
+	spa_t *spa = srcos->os_spa;
+
+	if (bytes == 0) {
+		return (0);
+	}
+	if (blocksize == 0) {
+		blocksize = bytes;
+	}
+	operations = (bytes + blocksize - 1) / blocksize;
+
+	/*
+	 * Prevents configuration changes when we have requests in-flight.
+	 */
+	rrm_enter_read(&spa->spa_ratelimit_lock, FTAG);
+
+	memset(counts, 0, sizeof (counts));
+	counts[ZFS_RATELIMIT_BW_READ] = bytes;
+	counts[ZFS_RATELIMIT_BW_TOTAL] = bytes;
+	counts[ZFS_RATELIMIT_OP_READ] = operations;
+	counts[ZFS_RATELIMIT_OP_TOTAL] = operations;
+
+	srcts = ratelimit_account_all(srcos, counts);
+
+	memset(counts, 0, sizeof (counts));
+	counts[ZFS_RATELIMIT_BW_WRITE] = bytes;
+	counts[ZFS_RATELIMIT_BW_TOTAL] = bytes;
+	counts[ZFS_RATELIMIT_OP_WRITE] = operations;
+	counts[ZFS_RATELIMIT_OP_TOTAL] = operations;
+
+	dstts = ratelimit_account_all(dstos, counts);
+
+	rrm_exit(&spa->spa_ratelimit_lock, FTAG);
+
+	return (ratelimit_sleep(dstts > srcts ? dstts : srcts));
 }
 
 /*
@@ -626,20 +583,16 @@ vfs_ratelimit_data_write(objset_t *os, size_t blocksize, size_t bytes)
  * - one read operation
  * - one total operation
  */
-void
+int
 vfs_ratelimit_metadata_read(objset_t *os)
 {
 	uint64_t counts[ZFS_RATELIMIT_NTYPES];
-	unsigned int types;
-
-	types =  (1 << ZFS_RATELIMIT_OP_READ);
-	types |= (1 << ZFS_RATELIMIT_OP_TOTAL);
 
 	memset(counts, 0, sizeof (counts));
 	counts[ZFS_RATELIMIT_OP_READ] = 1;
 	counts[ZFS_RATELIMIT_OP_TOTAL] = 1;
 
-	vfs_ratelimit(os, types, counts);
+	return (vfs_ratelimit_sleep(os, counts));
 }
 
 /*
@@ -647,18 +600,89 @@ vfs_ratelimit_metadata_read(objset_t *os)
  * - one read operation
  * - one total operation
  */
-void
+int
 vfs_ratelimit_metadata_write(objset_t *os)
 {
 	uint64_t counts[ZFS_RATELIMIT_NTYPES];
-	unsigned int types;
-
-	types =  (1 << ZFS_RATELIMIT_OP_WRITE);
-	types |= (1 << ZFS_RATELIMIT_OP_TOTAL);
 
 	memset(counts, 0, sizeof (counts));
 	counts[ZFS_RATELIMIT_OP_WRITE] = 1;
 	counts[ZFS_RATELIMIT_OP_TOTAL] = 1;
 
-	vfs_ratelimit(os, types, counts);
+	return (vfs_ratelimit_sleep(os, counts));
+}
+
+/*
+ * Function spins until timeout is reached or the process received a signal.
+ * This function is different than ratelimit_sleep(), because pause_sig()
+ * might not be woken up by a signal if the process has multiple threads.
+ * We use *_spin() functions for zfs send/recv where kernel starts additional
+ * kernel threads and interrupting userland process with CTRL+C (SIGINT)
+ * doesn't interrupt pause_sig() waiting in another kernel thread.
+ */
+static void
+ratelimit_spin(objset_t *os, const uint64_t *counts)
+{
+	hrtime_t timeslot;
+
+	/*
+	 * Prevents configuration changes when we have requests in-flight.
+	 */
+	rrm_enter_read(&os->os_spa->spa_ratelimit_lock, FTAG);
+
+	timeslot = ratelimit_account_all(os, counts);
+
+	rrm_exit(&os->os_spa->spa_ratelimit_lock, FTAG);
+
+	while (timeslot > gettimeslot() && !issig()) {
+		delay(hz / RATELIMIT_RESOLUTION);
+	}
+}
+
+void
+vfs_ratelimit_data_read_spin(objset_t *os, size_t blocksize, size_t bytes)
+{
+	uint64_t counts[ZFS_RATELIMIT_NTYPES];
+	size_t operations;
+
+	if (bytes == 0) {
+		return;
+	}
+
+	if (blocksize == 0) {
+		blocksize = bytes;
+	}
+	operations = (bytes + blocksize - 1) / blocksize;
+
+	memset(counts, 0, sizeof (counts));
+	counts[ZFS_RATELIMIT_BW_READ] = bytes;
+	counts[ZFS_RATELIMIT_BW_TOTAL] = bytes;
+	counts[ZFS_RATELIMIT_OP_READ] = operations;
+	counts[ZFS_RATELIMIT_OP_TOTAL] = operations;
+
+	ratelimit_spin(os, counts);
+}
+
+void
+vfs_ratelimit_data_write_spin(objset_t *os, size_t blocksize, size_t bytes)
+{
+	uint64_t counts[ZFS_RATELIMIT_NTYPES];
+	size_t operations;
+
+	if (bytes == 0) {
+		return;
+	}
+
+	if (blocksize == 0) {
+		blocksize = bytes;
+	}
+	operations = (bytes + blocksize - 1) / blocksize;
+
+	memset(counts, 0, sizeof (counts));
+	counts[ZFS_RATELIMIT_BW_WRITE] = bytes;
+	counts[ZFS_RATELIMIT_BW_TOTAL] = bytes;
+	counts[ZFS_RATELIMIT_OP_WRITE] = operations;
+	counts[ZFS_RATELIMIT_OP_TOTAL] = operations;
+
+	ratelimit_spin(os, counts);
 }
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 8a50fe40e7..56c3a43169 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -78,7 +78,8 @@ static int zfs_bclone_wait_dirty = 0;
 /*
  * Maximum bytes to read per chunk in zfs_read().
  */
-static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024;
+//static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024;
+static uint64_t zfs_vnops_read_chunk_size = 1024 * 512;
 
 int
 zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
@@ -299,7 +300,14 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 		ssize_t nbytes = MIN(n, zfs_vnops_read_chunk_size -
 		    P2PHASE(zfs_uio_offset(uio), zfs_vnops_read_chunk_size));
 
-		vfs_ratelimit_data_read(zfsvfs->z_os, zp->z_blksz, nbytes);
+		error = vfs_ratelimit_data_read(zfsvfs->z_os, zp->z_blksz,
+		    nbytes);
+		if (error != 0) {
+			if (error == EINTR && n < start_resid) {
+				error = 0;
+			}
+			break;
+		}
 
 #ifdef UIO_NOCOPY
 		if (zfs_uio_segflg(uio) == UIO_NOCOPY)
@@ -614,7 +622,15 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 			}
 		}
 
-		vfs_ratelimit_data_write(zfsvfs->z_os, blksz, nbytes);
+		error = vfs_ratelimit_data_write(zfsvfs->z_os, blksz, nbytes);
+		if (error != 0) {
+			if (error == EINTR && n < start_resid) {
+				error = 0;
+			}
+			if (abuf != NULL)
+				dmu_return_arcbuf(abuf);
+			break;
+		}
 
 		/*
 		 * Start a transaction.
@@ -1315,8 +1331,10 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
 			break;
 		}
 
-		vfs_ratelimit_data_read(inos, inblksz, size);
-		vfs_ratelimit_data_write(outos, inblksz, size);
+		error = vfs_ratelimit_data_copy(inos, outos, inblksz, size);
+		if (error != 0) {
+			break;
+		}
 
 		nbps = maxblocks;
 		last_synced_txg = spa_last_synced_txg(dmu_objset_spa(inos));
diff --git a/tests/zfs-tests/tests/functional/ratelimit/filesystem_op_single.ksh b/tests/zfs-tests/tests/functional/ratelimit/filesystem_op_single.ksh
index cdaa03efd4..957f4c0e22 100755
--- a/tests/zfs-tests/tests/functional/ratelimit/filesystem_op_single.ksh
+++ b/tests/zfs-tests/tests/functional/ratelimit/filesystem_op_single.ksh
@@ -90,15 +90,7 @@ log_must ratelimit_filesystem_op_single unlink limit_op_write=none 1024 1 "$TEST
 # Operations total limits limit writing.
 log_must ratelimit_filesystem_op_single chmod limit_op_total=128 512 4 "$TESTDIR/file"
 log_must ratelimit_filesystem_op_single chown limit_op_total=64 512 8 "$TESTDIR/file"
-# Creating a file requires one metadata write and one metadata read operation.
-# On successful open(2), zfs_freebsd_open() calls vnode_create_vobject()
-# with size=0. If size=0, vnode_create_vobject() interprets this as not having
-# the proper size and calls VOP_GETATTR().
-if is_freebsd; then
-	log_must ratelimit_filesystem_op_single create limit_op_total=128 512 8 "$TESTDIR/file"
-else
-	log_must ratelimit_filesystem_op_single create limit_op_total=128 512 4 "$TESTDIR/file"
-fi
+log_must ratelimit_filesystem_op_single create limit_op_total=128 512 4 "$TESTDIR/file"
 log_must ratelimit_filesystem_op_single unlink limit_op_total=64 512 8 "$TESTDIR/file"
 log_must ratelimit_filesystem_op_single mkdir limit_op_total=128 512 4 "$TESTDIR/file"
 log_must ratelimit_filesystem_op_single rmdir limit_op_total=64 512 8 "$TESTDIR/file"
@@ -122,11 +114,7 @@ log_must ratelimit_filesystem_op_single unlink limit_op_total=none 1024 1 "$TEST
 # Operations read limits don't affect writing.
 log_must ratelimit_filesystem_op_single chmod limit_op_read=32 1024 1 "$TESTDIR/file"
 log_must ratelimit_filesystem_op_single chown limit_op_read=64 1024 1 "$TESTDIR/file"
-if is_freebsd; then
-	log_must ratelimit_filesystem_op_single create limit_op_read=128 1024 8 "$TESTDIR/file"
-else
-	log_must ratelimit_filesystem_op_single create limit_op_read=128 1024 1 "$TESTDIR/file"
-fi
+log_must ratelimit_filesystem_op_single create limit_op_read=128 1024 1 "$TESTDIR/file"
 log_must ratelimit_filesystem_op_single unlink limit_op_read=256 1024 1 "$TESTDIR/file"
 log_must ratelimit_filesystem_op_single mkdir limit_op_read=32 1024 1 "$TESTDIR/file"
 log_must ratelimit_filesystem_op_single rmdir limit_op_read=64 1024 1 "$TESTDIR/file"