dnode: allow storage class to be overridden by object type

spa_preferred_class() selects a storage class based on (among other
things) the DMU object type. This only works for old-style object types
that match only one specific kind of thing. For DMU_OTN_ types we need
another way to signal the storage class.

This commit allows the object type to be overridden in the IO policy for
the purposes of choosing a storage class. It then adds the ability to
set the storage type on a dnode hold, such that all writes generated
under that hold will get it.

This method has two shortcomings:

- it would be better if we could "name" a set of storage class
  preferences rather than it being implied by the object type.
- it would be better if this info were stored in the dnode on disk.

In the absence of those things, this seems like the smallest possible
change.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.
Closes #15894
This commit is contained in:
Rob Norris 2023-06-27 12:50:18 +10:00 committed by Brian Behlendorf
parent e26b3771ee
commit d54d0fff39
6 changed files with 35 additions and 2 deletions

View File

@ -380,6 +380,9 @@ struct dnode {
/* holds prefetch structure */ /* holds prefetch structure */
struct zfetch dn_zfetch; struct zfetch dn_zfetch;
/* Not in dn_phys, but should be. set it after taking a hold */
dmu_object_type_t dn_storage_type; /* type for storage class */
}; };
/* /*
@ -462,6 +465,8 @@ void dnode_evict_dbufs(dnode_t *dn);
void dnode_evict_bonus(dnode_t *dn); void dnode_evict_bonus(dnode_t *dn);
void dnode_free_interior_slots(dnode_t *dn); void dnode_free_interior_slots(dnode_t *dn);
void dnode_set_storage_type(dnode_t *dn, dmu_object_type_t type);
#define DNODE_IS_DIRTY(_dn) \ #define DNODE_IS_DIRTY(_dn) \
((_dn)->dn_dirty_txg >= spa_syncing_txg((_dn)->dn_objset->os_spa)) ((_dn)->dn_dirty_txg >= spa_syncing_txg((_dn)->dn_objset->os_spa))

View File

@ -356,6 +356,7 @@ typedef struct zio_prop {
uint8_t zp_iv[ZIO_DATA_IV_LEN]; uint8_t zp_iv[ZIO_DATA_IV_LEN];
uint8_t zp_mac[ZIO_DATA_MAC_LEN]; uint8_t zp_mac[ZIO_DATA_MAC_LEN];
uint32_t zp_zpl_smallblk; uint32_t zp_zpl_smallblk;
dmu_object_type_t zp_storage_type;
} zio_prop_t; } zio_prop_t;
typedef struct zio_cksum_report zio_cksum_report_t; typedef struct zio_cksum_report zio_cksum_report_t;

View File

@ -2362,6 +2362,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
memset(zp->zp_mac, 0, ZIO_DATA_MAC_LEN); memset(zp->zp_mac, 0, ZIO_DATA_MAC_LEN);
zp->zp_zpl_smallblk = DMU_OT_IS_FILE(zp->zp_type) ? zp->zp_zpl_smallblk = DMU_OT_IS_FILE(zp->zp_type) ?
os->os_zpl_special_smallblock : 0; os->os_zpl_special_smallblock : 0;
zp->zp_storage_type = dn ? dn->dn_storage_type : DMU_OT_NONE;
ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT); ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT);
} }

View File

@ -543,6 +543,17 @@ dnode_setbonus_type(dnode_t *dn, dmu_object_type_t newtype, dmu_tx_t *tx)
rw_exit(&dn->dn_struct_rwlock); rw_exit(&dn->dn_struct_rwlock);
} }
void
dnode_set_storage_type(dnode_t *dn, dmu_object_type_t newtype)
{
/*
* This is not in the dnode_phys, but it should be, and perhaps one day
* will. For now we require it be set after taking a hold.
*/
ASSERT3U(zfs_refcount_count(&dn->dn_holds), >=, 1);
dn->dn_storage_type = newtype;
}
void void
dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx) dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx)
{ {
@ -604,6 +615,8 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0); dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0);
dn->dn_id_flags = 0; dn->dn_id_flags = 0;
dn->dn_storage_type = DMU_OT_NONE;
dmu_zfetch_init(&dn->dn_zfetch, dn); dmu_zfetch_init(&dn->dn_zfetch, dn);
ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type)); ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
@ -687,6 +700,8 @@ dnode_destroy(dnode_t *dn)
dn->dn_newprojid = ZFS_DEFAULT_PROJID; dn->dn_newprojid = ZFS_DEFAULT_PROJID;
dn->dn_id_flags = 0; dn->dn_id_flags = 0;
dn->dn_storage_type = DMU_OT_NONE;
dmu_zfetch_fini(&dn->dn_zfetch); dmu_zfetch_fini(&dn->dn_zfetch);
kmem_cache_free(dnode_cache, dn); kmem_cache_free(dnode_cache, dn);
arc_space_return(sizeof (dnode_t), ARC_SPACE_DNODE); arc_space_return(sizeof (dnode_t), ARC_SPACE_DNODE);
@ -946,6 +961,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
ndn->dn_newgid = odn->dn_newgid; ndn->dn_newgid = odn->dn_newgid;
ndn->dn_newprojid = odn->dn_newprojid; ndn->dn_newprojid = odn->dn_newprojid;
ndn->dn_id_flags = odn->dn_id_flags; ndn->dn_id_flags = odn->dn_id_flags;
ndn->dn_storage_type = odn->dn_storage_type;
dmu_zfetch_init(&ndn->dn_zfetch, ndn); dmu_zfetch_init(&ndn->dn_zfetch, ndn);
/* /*
@ -1004,6 +1020,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
odn->dn_newgid = 0; odn->dn_newgid = 0;
odn->dn_newprojid = ZFS_DEFAULT_PROJID; odn->dn_newprojid = ZFS_DEFAULT_PROJID;
odn->dn_id_flags = 0; odn->dn_id_flags = 0;
odn->dn_storage_type = DMU_OT_NONE;
/* /*
* Mark the dnode. * Mark the dnode.

View File

@ -2010,7 +2010,16 @@ metaslab_class_t *
spa_preferred_class(spa_t *spa, const zio_t *zio) spa_preferred_class(spa_t *spa, const zio_t *zio)
{ {
const zio_prop_t *zp = &zio->io_prop; const zio_prop_t *zp = &zio->io_prop;
dmu_object_type_t objtype = zp->zp_type;
/*
* Override object type for the purposes of selecting a storage class.
* Primarily for DMU_OTN_ types where we can't explicitly control their
* storage class; instead, choose a static type most closely matches
* what we want.
*/
dmu_object_type_t objtype =
zp->zp_storage_type == DMU_OT_NONE ?
zp->zp_type : zp->zp_storage_type;
/* /*
* ZIL allocations determine their class in zio_alloc_zil(). * ZIL allocations determine their class in zio_alloc_zil().

View File

@ -3069,7 +3069,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
zp.zp_checksum = gio->io_prop.zp_checksum; zp.zp_checksum = gio->io_prop.zp_checksum;
zp.zp_compress = ZIO_COMPRESS_OFF; zp.zp_compress = ZIO_COMPRESS_OFF;
zp.zp_complevel = gio->io_prop.zp_complevel; zp.zp_complevel = gio->io_prop.zp_complevel;
zp.zp_type = DMU_OT_NONE; zp.zp_type = zp.zp_storage_type = DMU_OT_NONE;
zp.zp_level = 0; zp.zp_level = 0;
zp.zp_copies = gio->io_prop.zp_copies; zp.zp_copies = gio->io_prop.zp_copies;
zp.zp_dedup = B_FALSE; zp.zp_dedup = B_FALSE;