ddt: introduce lightweight entry

The idea here is that sometimes you need the contents of an entry with
no intent to modify it, and/or from a place where its difficult to get
hold of its originating ddt_t to know how to interpret it.

A lightweight entry contains everything you might need to "read" an
entry - its key, type and phys contents - but none of the extras for
modifying it or using it in a larger context. It also has the full
complement of phys slots, so it can represent any kind of dedup entry
without having to know the specific configuration of the table it came
from.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.
Closes #15893
This commit is contained in:
Rob Norris 2023-07-03 22:16:04 +10:00 committed by Brian Behlendorf
parent d17ab631a9
commit 4d686c3da5
6 changed files with 58 additions and 34 deletions

View File

@ -1914,15 +1914,16 @@ dump_log_spacemaps(spa_t *spa)
} }
static void static void
dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index) dump_ddt_entry(const ddt_t *ddt, const ddt_lightweight_entry_t *ddlwe,
uint64_t index)
{ {
const ddt_key_t *ddk = &dde->dde_key; const ddt_key_t *ddk = &ddlwe->ddlwe_key;
char blkbuf[BP_SPRINTF_LEN]; char blkbuf[BP_SPRINTF_LEN];
blkptr_t blk; blkptr_t blk;
int p; int p;
for (p = 0; p < DDT_NPHYS(ddt); p++) { for (p = 0; p < ddlwe->ddlwe_nphys; p++) {
const ddt_phys_t *ddp = &dde->dde_phys[p]; const ddt_phys_t *ddp = &ddlwe->ddlwe_phys[p];
if (ddp->ddp_phys_birth == 0) if (ddp->ddp_phys_birth == 0)
continue; continue;
ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
@ -1959,7 +1960,7 @@ static void
dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class) dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
{ {
char name[DDT_NAMELEN]; char name[DDT_NAMELEN];
ddt_entry_t dde; ddt_lightweight_entry_t ddlwe;
uint64_t walk = 0; uint64_t walk = 0;
dmu_object_info_t doi; dmu_object_info_t doi;
uint64_t count, dspace, mspace; uint64_t count, dspace, mspace;
@ -2000,8 +2001,8 @@ dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
(void) printf("%s contents:\n\n", name); (void) printf("%s contents:\n\n", name);
while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0) while ((error = ddt_object_walk(ddt, type, class, &walk, &ddlwe)) == 0)
dump_dde(ddt, &dde, walk); dump_ddt_entry(ddt, &ddlwe, walk);
ASSERT3U(error, ==, ENOENT); ASSERT3U(error, ==, ENOENT);

View File

@ -173,6 +173,18 @@ typedef struct {
avl_node_t dde_node; /* ddt_tree node */ avl_node_t dde_node; /* ddt_tree node */
} ddt_entry_t; } ddt_entry_t;
/*
* A lightweight entry is for short-lived or transient uses, like iterating or
* inspecting, when you don't care where it came from.
*/
typedef struct {
ddt_key_t ddlwe_key;
ddt_type_t ddlwe_type;
ddt_class_t ddlwe_class;
uint8_t ddlwe_nphys;
ddt_phys_t ddlwe_phys[DDT_PHYS_MAX];
} ddt_lightweight_entry_t;
/* /*
* In-core DDT object. This covers all entries and stats for a the whole pool * In-core DDT object. This covers all entries and stats for a the whole pool
* for a given checksum type. * for a given checksum type.
@ -241,7 +253,6 @@ extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
extern int ddt_get_pool_dedup_cached(spa_t *spa, uint64_t *psize); extern int ddt_get_pool_dedup_cached(spa_t *spa, uint64_t *psize);
extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp); extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
extern ddt_t *ddt_select_checksum(spa_t *spa, enum zio_checksum checksum);
extern void ddt_enter(ddt_t *ddt); extern void ddt_enter(ddt_t *ddt);
extern void ddt_exit(ddt_t *ddt); extern void ddt_exit(ddt_t *ddt);
extern void ddt_init(void); extern void ddt_init(void);
@ -263,7 +274,8 @@ extern void ddt_create(spa_t *spa);
extern int ddt_load(spa_t *spa); extern int ddt_load(spa_t *spa);
extern void ddt_unload(spa_t *spa); extern void ddt_unload(spa_t *spa);
extern void ddt_sync(spa_t *spa, uint64_t txg); extern void ddt_sync(spa_t *spa, uint64_t txg);
extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde); extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb,
ddt_lightweight_entry_t *ddlwe);
extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp); extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp);

View File

@ -41,6 +41,17 @@ extern "C" {
#define DDT_DIR_VERSION "version" #define DDT_DIR_VERSION "version"
#define DDT_DIR_FLAGS "flags" #define DDT_DIR_FLAGS "flags"
/* Fill a lightweight entry from a live entry. */
#define DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, ddlwe) do { \
memset((ddlwe), 0, sizeof (*ddlwe)); \
(ddlwe)->ddlwe_key = (dde)->dde_key; \
(ddlwe)->ddlwe_type = (dde)->dde_type; \
(ddlwe)->ddlwe_class = (dde)->dde_class; \
(ddlwe)->ddlwe_nphys = DDT_NPHYS(ddt); \
for (int p = 0; p < (ddlwe)->ddlwe_nphys; p++) \
(ddlwe)->ddlwe_phys[p] = (dde)->dde_phys[p]; \
} while (0)
/* /*
* Ops vector to access a specific DDT object type. * Ops vector to access a specific DDT object type.
*/ */
@ -91,7 +102,7 @@ extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
extern void ddt_object_name(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz, extern void ddt_object_name(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
char *name); char *name);
extern int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz, extern int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
uint64_t *walk, ddt_entry_t *dde); uint64_t *walk, ddt_lightweight_entry_t *ddlwe);
extern int ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz, extern int ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
uint64_t *count); uint64_t *count);
extern int ddt_object_info(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz, extern int ddt_object_info(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,

View File

@ -202,7 +202,7 @@ boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp); boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds); boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
ddt_entry_t *dde, dmu_tx_t *tx); ddt_lightweight_entry_t *ddlwe, dmu_tx_t *tx);
void dsl_scan_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx); void dsl_scan_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx); void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2, void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,

View File

@ -401,13 +401,20 @@ ddt_object_remove(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
int int
ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class, ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
uint64_t *walk, ddt_entry_t *dde) uint64_t *walk, ddt_lightweight_entry_t *ddlwe)
{ {
ASSERT(ddt_object_exists(ddt, type, class)); ASSERT(ddt_object_exists(ddt, type, class));
return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os, int error = ddt_ops[type]->ddt_op_walk(ddt->ddt_os,
ddt->ddt_object[type][class], walk, &dde->dde_key, ddt->ddt_object[type][class], walk, &ddlwe->ddlwe_key,
dde->dde_phys, sizeof (dde->dde_phys))); ddlwe->ddlwe_phys, sizeof (ddlwe->ddlwe_phys));
if (error == 0) {
ddlwe->ddlwe_type = type;
ddlwe->ddlwe_class = class;
ddlwe->ddlwe_nphys = DDT_NPHYS(ddt);
return (0);
}
return (error);
} }
int int
@ -572,12 +579,6 @@ ddt_select(spa_t *spa, const blkptr_t *bp)
return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]); return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]);
} }
ddt_t *
ddt_select_checksum(spa_t *spa, enum zio_checksum checksum)
{
return (spa->spa_ddt[checksum]);
}
void void
ddt_enter(ddt_t *ddt) ddt_enter(ddt_t *ddt)
{ {
@ -1347,8 +1348,10 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
* traversing.) * traversing.)
*/ */
if (nclass < oclass) { if (nclass < oclass) {
ddt_lightweight_entry_t ddlwe;
DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);
dsl_scan_ddt_entry(dp->dp_scan, dsl_scan_ddt_entry(dp->dp_scan,
ddt->ddt_checksum, dde, tx); ddt->ddt_checksum, &ddlwe, tx);
} }
} }
} }
@ -1455,7 +1458,7 @@ ddt_sync(spa_t *spa, uint64_t txg)
} }
int int
ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde) ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_lightweight_entry_t *ddlwe)
{ {
do { do {
do { do {
@ -1468,10 +1471,8 @@ ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde)
ddb->ddb_class)) { ddb->ddb_class)) {
error = ddt_object_walk(ddt, error = ddt_object_walk(ddt,
ddb->ddb_type, ddb->ddb_class, ddb->ddb_type, ddb->ddb_class,
&ddb->ddb_cursor, dde); &ddb->ddb_cursor, ddlwe);
} }
dde->dde_type = ddb->ddb_type;
dde->dde_class = ddb->ddb_class;
if (error == 0) if (error == 0)
return (0); return (0);
if (error != ENOENT) if (error != ENOENT)

View File

@ -2929,10 +2929,10 @@ enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
void void
dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
ddt_entry_t *dde, dmu_tx_t *tx) ddt_lightweight_entry_t *ddlwe, dmu_tx_t *tx)
{ {
(void) tx; (void) tx;
const ddt_key_t *ddk = &dde->dde_key; const ddt_key_t *ddk = &ddlwe->ddlwe_key;
blkptr_t bp; blkptr_t bp;
zbookmark_phys_t zb = { 0 }; zbookmark_phys_t zb = { 0 };
@ -2953,9 +2953,8 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
if (scn->scn_done_txg != 0) if (scn->scn_done_txg != 0)
return; return;
ddt_t *ddt = ddt_select_checksum(tx->tx_pool->dp_spa, checksum); for (int p = 0; p < ddlwe->ddlwe_nphys; p++) {
for (int p = 0; p < DDT_NPHYS(ddt); p++) { ddt_phys_t *ddp = &ddlwe->ddlwe_phys[p];
ddt_phys_t *ddp = &dde->dde_phys[p];
if (ddp->ddp_phys_birth == 0 || if (ddp->ddp_phys_birth == 0 ||
ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg) ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg)
@ -3004,11 +3003,11 @@ static void
dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx) dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
{ {
ddt_bookmark_t *ddb = &scn->scn_phys.scn_ddt_bookmark; ddt_bookmark_t *ddb = &scn->scn_phys.scn_ddt_bookmark;
ddt_entry_t dde = {{{{0}}}}; ddt_lightweight_entry_t ddlwe = {0};
int error; int error;
uint64_t n = 0; uint64_t n = 0;
while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &dde)) == 0) { while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &ddlwe)) == 0) {
ddt_t *ddt; ddt_t *ddt;
if (ddb->ddb_class > scn->scn_phys.scn_ddt_class_max) if (ddb->ddb_class > scn->scn_phys.scn_ddt_class_max)
@ -3023,7 +3022,7 @@ dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
ddt = scn->scn_dp->dp_spa->spa_ddt[ddb->ddb_checksum]; ddt = scn->scn_dp->dp_spa->spa_ddt[ddb->ddb_checksum];
ASSERT(avl_first(&ddt->ddt_tree) == NULL); ASSERT(avl_first(&ddt->ddt_tree) == NULL);
dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx); dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &ddlwe, tx);
n++; n++;
if (dsl_scan_check_suspend(scn, NULL)) if (dsl_scan_check_suspend(scn, NULL))