Introduce a tunable to exclude special class buffers from L2ARC

Special allocation class or dedup vdevs may have roughly the same
performance as L2ARC vdevs. Introduce a new tunable to exclude those
buffers from being cacheable on L2ARC.

Reviewed-by: Don Brady <don.brady@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: George Amanakis <gamanakis@gmail.com>
Closes #11761
Closes #12285
This commit is contained in:
George Amanakis 2021-11-11 21:52:16 +01:00 committed by Tony Hutter
parent c8f795ba53
commit 8bd3dca9bf
8 changed files with 119 additions and 21 deletions

View File

@ -85,6 +85,7 @@ typedef void arc_prune_func_t(int64_t bytes, void *priv);
/* Shared module parameters */ /* Shared module parameters */
extern int zfs_arc_average_blocksize; extern int zfs_arc_average_blocksize;
extern int l2arc_exclude_special;
/* generic arc_done_func_t's which you can use */ /* generic arc_done_func_t's which you can use */
arc_read_done_func_t arc_bcopy_func; arc_read_done_func_t arc_bcopy_func;

View File

@ -441,16 +441,7 @@ dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg)
(dbuf_is_metadata(_db) && \ (dbuf_is_metadata(_db) && \
((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA))) ((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
#define DBUF_IS_L2CACHEABLE(_db) \ boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db);
((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \
(dbuf_is_metadata(_db) && \
((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
#define DNODE_LEVEL_IS_L2CACHEABLE(_dn, _level) \
((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_ALL || \
(((_level) > 0 || \
DMU_OT_IS_METADATA((_dn)->dn_handle->dnh_dnode->dn_type)) && \
((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
#ifdef ZFS_DEBUG #ifdef ZFS_DEBUG

View File

@ -200,10 +200,6 @@ struct objset {
#define DMU_GROUPUSED_DNODE(os) ((os)->os_groupused_dnode.dnh_dnode) #define DMU_GROUPUSED_DNODE(os) ((os)->os_groupused_dnode.dnh_dnode)
#define DMU_PROJECTUSED_DNODE(os) ((os)->os_projectused_dnode.dnh_dnode) #define DMU_PROJECTUSED_DNODE(os) ((os)->os_projectused_dnode.dnh_dnode)
#define DMU_OS_IS_L2CACHEABLE(os) \
((os)->os_secondary_cache == ZFS_CACHE_ALL || \
(os)->os_secondary_cache == ZFS_CACHE_METADATA)
/* called from zpl */ /* called from zpl */
int dmu_objset_hold(const char *name, void *tag, objset_t **osp); int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag, int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,

View File

@ -109,6 +109,11 @@ A value of
.Sy 100 .Sy 100
disables this feature. disables this feature.
. .
.It Sy l2arc_exclude_special Ns = Ns Sy 0 Ns | Ns 1 Pq int
Controls whether buffers present on special vdevs are eligibile for caching
into L2ARC.
If set to 1, exclude dbufs on special vdevs from being cached to L2ARC.
.
.It Sy l2arc_mfuonly Ns = Ns Sy 0 Ns | Ns 1 Pq int .It Sy l2arc_mfuonly Ns = Ns Sy 0 Ns | Ns 1 Pq int
Controls whether only MFU metadata and data are cached from ARC into L2ARC. Controls whether only MFU metadata and data are cached from ARC into L2ARC.
This may be desired to avoid wasting space on L2ARC when reading/writing large This may be desired to avoid wasting space on L2ARC when reading/writing large

View File

@ -877,6 +877,14 @@ static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
#define l2arc_hdr_arcstats_decrement_state(hdr) \ #define l2arc_hdr_arcstats_decrement_state(hdr) \
l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE) l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE)
/*
* l2arc_exclude_special : A zfs module parameter that controls whether buffers
* present on special vdevs are eligibile for caching in L2ARC. If
* set to 1, exclude dbufs on special vdevs from being cached to
* L2ARC.
*/
int l2arc_exclude_special = 0;
/* /*
* l2arc_mfuonly : A ZFS module parameter that controls whether only MFU * l2arc_mfuonly : A ZFS module parameter that controls whether only MFU
* metadata and data are cached from ARC into L2ARC. * metadata and data are cached from ARC into L2ARC.
@ -11136,6 +11144,10 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, ULONG, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
"Cache only MFU data from ARC into L2ARC"); "Cache only MFU data from ARC into L2ARC");
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW,
"If set to 1 exclude dbufs on special vdevs from being cached to "
"L2ARC.");
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int, ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int,
param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes"); param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes");

View File

@ -53,6 +53,7 @@
#include <cityhash.h> #include <cityhash.h>
#include <sys/spa_impl.h> #include <sys/spa_impl.h>
#include <sys/wmsum.h> #include <sys/wmsum.h>
#include <sys/vdev_impl.h>
kstat_t *dbuf_ksp; kstat_t *dbuf_ksp;
@ -594,6 +595,68 @@ dbuf_is_metadata(dmu_buf_impl_t *db)
} }
} }
/*
* We want to exclude buffers that are on a special allocation class from
* L2ARC.
*/
boolean_t
dbuf_is_l2cacheable(dmu_buf_impl_t *db)
{
vdev_t *vd = NULL;
zfs_cache_type_t cache = db->db_objset->os_secondary_cache;
blkptr_t *bp = db->db_blkptr;
if (bp != NULL && !BP_IS_HOLE(bp)) {
uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
vdev_t *rvd = db->db_objset->os_spa->spa_root_vdev;
if (vdev < rvd->vdev_children)
vd = rvd->vdev_child[vdev];
if (cache == ZFS_CACHE_ALL ||
(dbuf_is_metadata(db) && cache == ZFS_CACHE_METADATA)) {
if (vd == NULL)
return (B_TRUE);
if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
l2arc_exclude_special == 0)
return (B_TRUE);
}
}
return (B_FALSE);
}
static inline boolean_t
dnode_level_is_l2cacheable(blkptr_t *bp, dnode_t *dn, int64_t level)
{
vdev_t *vd = NULL;
zfs_cache_type_t cache = dn->dn_objset->os_secondary_cache;
if (bp != NULL && !BP_IS_HOLE(bp)) {
uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
vdev_t *rvd = dn->dn_objset->os_spa->spa_root_vdev;
if (vdev < rvd->vdev_children)
vd = rvd->vdev_child[vdev];
if (cache == ZFS_CACHE_ALL || ((level > 0 ||
DMU_OT_IS_METADATA(dn->dn_handle->dnh_dnode->dn_type)) &&
cache == ZFS_CACHE_METADATA)) {
if (vd == NULL)
return (B_TRUE);
if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
l2arc_exclude_special == 0)
return (B_TRUE);
}
}
return (B_FALSE);
}
/* /*
* This function *must* return indices evenly distributed between all * This function *must* return indices evenly distributed between all
@ -1523,7 +1586,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
DTRACE_SET_STATE(db, "read issued"); DTRACE_SET_STATE(db, "read issued");
mutex_exit(&db->db_mtx); mutex_exit(&db->db_mtx);
if (DBUF_IS_L2CACHEABLE(db)) if (dbuf_is_l2cacheable(db))
aflags |= ARC_FLAG_L2CACHE; aflags |= ARC_FLAG_L2CACHE;
dbuf_add_ref(db, NULL); dbuf_add_ref(db, NULL);
@ -3372,7 +3435,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
dpa->dpa_arg = arg; dpa->dpa_arg = arg;
/* flag if L2ARC eligible, l2arc_noprefetch then decides */ /* flag if L2ARC eligible, l2arc_noprefetch then decides */
if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level)) if (dnode_level_is_l2cacheable(&bp, dn, level))
dpa->dpa_aflags |= ARC_FLAG_L2CACHE; dpa->dpa_aflags |= ARC_FLAG_L2CACHE;
/* /*
@ -3390,7 +3453,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
zbookmark_phys_t zb; zbookmark_phys_t zb;
/* flag if L2ARC eligible, l2arc_noprefetch then decides */ /* flag if L2ARC eligible, l2arc_noprefetch then decides */
if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level)) if (dnode_level_is_l2cacheable(&bp, dn, level))
iter_aflags |= ARC_FLAG_L2CACHE; iter_aflags |= ARC_FLAG_L2CACHE;
SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET, SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
@ -4989,7 +5052,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
children_ready_cb = dbuf_write_children_ready; children_ready_cb = dbuf_write_children_ready;
dr->dr_zio = arc_write(pio, os->os_spa, txg, dr->dr_zio = arc_write(pio, os->os_spa, txg,
&dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db), &dr->dr_bp_copy, data, dbuf_is_l2cacheable(db),
&zp, dbuf_write_ready, &zp, dbuf_write_ready,
children_ready_cb, dbuf_write_physdone, children_ready_cb, dbuf_write_physdone,
dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE, dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE,

View File

@ -1846,7 +1846,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
dsa->dsa_tx = NULL; dsa->dsa_tx = NULL;
zio_nowait(arc_write(pio, os->os_spa, txg, zio_nowait(arc_write(pio, os->os_spa, txg,
zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db), zgd->zgd_bp, dr->dt.dl.dr_data, dbuf_is_l2cacheable(db),
&zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa, &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb)); ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));

View File

@ -63,6 +63,8 @@
#include <sys/dmu_recv.h> #include <sys/dmu_recv.h>
#include <sys/zfs_project.h> #include <sys/zfs_project.h>
#include "zfs_namecheck.h" #include "zfs_namecheck.h"
#include <sys/vdev_impl.h>
#include <sys/arc.h>
/* /*
* Needed to close a window in dnode_move() that allows the objset to be freed * Needed to close a window in dnode_move() that allows the objset to be freed
@ -411,6 +413,34 @@ dnode_multilist_index_func(multilist_t *ml, void *obj)
multilist_get_num_sublists(ml)); multilist_get_num_sublists(ml));
} }
static inline boolean_t
dmu_os_is_l2cacheable(objset_t *os)
{
vdev_t *vd = NULL;
zfs_cache_type_t cache = os->os_secondary_cache;
blkptr_t *bp = os->os_rootbp;
if (bp != NULL && !BP_IS_HOLE(bp)) {
uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
vdev_t *rvd = os->os_spa->spa_root_vdev;
if (vdev < rvd->vdev_children)
vd = rvd->vdev_child[vdev];
if (cache == ZFS_CACHE_ALL || cache == ZFS_CACHE_METADATA) {
if (vd == NULL)
return (B_TRUE);
if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
l2arc_exclude_special == 0)
return (B_TRUE);
}
}
return (B_FALSE);
}
/* /*
* Instantiates the objset_t in-memory structure corresponding to the * Instantiates the objset_t in-memory structure corresponding to the
* objset_phys_t that's pointed to by the specified blkptr_t. * objset_phys_t that's pointed to by the specified blkptr_t.
@ -453,7 +483,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
if (DMU_OS_IS_L2CACHEABLE(os)) if (dmu_os_is_l2cacheable(os))
aflags |= ARC_FLAG_L2CACHE; aflags |= ARC_FLAG_L2CACHE;
if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) { if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) {
@ -1661,7 +1691,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
} }
zio = arc_write(pio, os->os_spa, tx->tx_txg, zio = arc_write(pio, os->os_spa, tx->tx_txg,
blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), blkptr_copy, os->os_phys_buf, dmu_os_is_l2cacheable(os),
&zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done, &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);