ddt: rework access to phys array slots

The "flat phys" feature will use only a single phys slot for all
entries, which means the old "single", "double" etc naming now makes no
sense, and more importantly, means that choosing the right slot for a
given block pointer will depend on how many slots are in use for a given
DDT.

This removes the old names, and adds accessor macros to decouple
specific phys array indexes from any particular meaning.

(These macros look strange in isolation, mainly in the way they take the
ddt_t* as an arg but don't use it. This is mostly a separate commit to
introduce the concept to the reader before the "flat phys" commit
extends it).

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.
Closes #15893
This commit is contained in:
Rob Norris 2023-07-03 15:16:02 +10:00 committed by Brian Behlendorf
parent d63f5d7e50
commit d17ab631a9
8 changed files with 79 additions and 65 deletions

View File

@ -1916,21 +1916,20 @@ dump_log_spacemaps(spa_t *spa)
static void
dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
{
const ddt_phys_t *ddp = dde->dde_phys;
const ddt_key_t *ddk = &dde->dde_key;
const char *types[4] = { "ditto", "single", "double", "triple" };
char blkbuf[BP_SPRINTF_LEN];
blkptr_t blk;
int p;
for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
for (p = 0; p < DDT_NPHYS(ddt); p++) {
const ddt_phys_t *ddp = &dde->dde_phys[p];
if (ddp->ddp_phys_birth == 0)
continue;
ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
(void) printf("index %llx refcnt %llu %s %s\n",
(void) printf("index %llx refcnt %llu phys %d %s\n",
(u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
types[p], blkbuf);
p, blkbuf);
}
}
@ -5724,7 +5723,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
VERIFY3P(dde, !=, NULL);
/* Get the phys for this variant */
ddt_phys_t *ddp = ddt_phys_select(dde, bp);
ddt_phys_t *ddp = ddt_phys_select(ddt, dde, bp);
VERIFY3P(ddp, !=, NULL);
/*
@ -5751,7 +5750,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
dde->dde_lead_zio[idx] = (zio_t *)(uintptr_t)B_TRUE;
/* Consume a reference for this block. */
VERIFY3U(ddt_phys_total_refcnt(dde), >, 0);
VERIFY3U(ddt_phys_total_refcnt(ddt, dde), >, 0);
ddt_phys_decref(ddp);
if (seen) {

View File

@ -137,19 +137,10 @@ typedef struct {
uint64_t ddp_phys_birth;
} ddt_phys_t;
/*
* Named indexes into the ddt_phys_t array in each entry.
*
* Note, we no longer generate new DDT_PHYS_DITTO-type blocks. However,
* we maintain the ability to free existing dedup-ditto blocks.
*/
enum ddt_phys_type {
DDT_PHYS_DITTO = 0,
DDT_PHYS_SINGLE = 1,
DDT_PHYS_DOUBLE = 2,
DDT_PHYS_TRIPLE = 3,
DDT_PHYS_TYPES
};
#define DDT_PHYS_MAX (4)
#define DDT_NPHYS(ddt) ((ddt) ? DDT_PHYS_MAX : DDT_PHYS_MAX)
#define DDT_PHYS_IS_DITTO(ddt, p) ((ddt) && p == 0)
#define DDT_PHYS_FOR_COPIES(ddt, p) ((ddt) ? (p) : (p))
/*
* A "live" entry, holding changes to an entry made this txg, and other data to
@ -162,11 +153,11 @@ enum ddt_phys_type {
typedef struct {
/* key must be first for ddt_key_compare */
ddt_key_t dde_key; /* ddt_tree key */
ddt_phys_t dde_phys[DDT_PHYS_TYPES]; /* on-disk data */
ddt_key_t dde_key; /* ddt_tree key */
ddt_phys_t dde_phys[DDT_PHYS_MAX]; /* on-disk data */
/* in-flight update IOs */
zio_t *dde_lead_zio[DDT_PHYS_TYPES];
zio_t *dde_lead_zio[DDT_PHYS_MAX];
/* copy of data after a repair read, to be rewritten */
struct abd *dde_repair_abd;
@ -234,7 +225,8 @@ extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
extern void ddt_phys_clear(ddt_phys_t *ddp);
extern void ddt_phys_addref(ddt_phys_t *ddp);
extern void ddt_phys_decref(ddt_phys_t *ddp);
extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
extern ddt_phys_t *ddt_phys_select(const ddt_t *ddt, const ddt_entry_t *dde,
const blkptr_t *bp);
extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
@ -249,6 +241,7 @@ extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
extern int ddt_get_pool_dedup_cached(spa_t *spa, uint64_t *psize);
extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
extern ddt_t *ddt_select_checksum(spa_t *spa, enum zio_checksum checksum);
extern void ddt_enter(ddt_t *ddt);
extern void ddt_exit(ddt_t *ddt);
extern void ddt_init(void);

View File

@ -82,7 +82,7 @@ extern void ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg);
*/
#define DDT_NAMELEN 32
extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
extern uint64_t ddt_phys_total_refcnt(const ddt_t *ddt, const ddt_entry_t *dde);
extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);

View File

@ -540,11 +540,10 @@ ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp, uint64_t txg)
}
ddt_phys_t *
ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp)
ddt_phys_select(const ddt_t *ddt, const ddt_entry_t *dde, const blkptr_t *bp)
{
ddt_phys_t *ddp = (ddt_phys_t *)dde->dde_phys;
for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
ddt_phys_t *ddp = (ddt_phys_t *)&dde->dde_phys[p];
if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_dva[0]) &&
BP_GET_BIRTH(bp) == ddp->ddp_phys_birth)
return (ddp);
@ -553,12 +552,15 @@ ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp)
}
uint64_t
ddt_phys_total_refcnt(const ddt_entry_t *dde)
ddt_phys_total_refcnt(const ddt_t *ddt, const ddt_entry_t *dde)
{
uint64_t refcnt = 0;
for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++)
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
if (DDT_PHYS_IS_DITTO(ddt, p))
continue;
refcnt += dde->dde_phys[p].ddp_refcnt;
}
return (refcnt);
}
@ -570,6 +572,12 @@ ddt_select(spa_t *spa, const blkptr_t *bp)
return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]);
}
ddt_t *
ddt_select_checksum(spa_t *spa, enum zio_checksum checksum)
{
return (spa->spa_ddt[checksum]);
}
void
ddt_enter(ddt_t *ddt)
{
@ -613,9 +621,9 @@ ddt_alloc(const ddt_key_t *ddk)
}
static void
ddt_free(ddt_entry_t *dde)
ddt_free(const ddt_t *ddt, ddt_entry_t *dde)
{
for (int p = 0; p < DDT_PHYS_TYPES; p++)
for (int p = 0; p < DDT_NPHYS(ddt); p++)
ASSERT3P(dde->dde_lead_zio[p], ==, NULL);
if (dde->dde_repair_abd != NULL)
@ -631,7 +639,7 @@ ddt_remove(ddt_t *ddt, ddt_entry_t *dde)
ASSERT(MUTEX_HELD(&ddt->ddt_lock));
avl_remove(&ddt->ddt_tree, dde);
ddt_free(dde);
ddt_free(ddt, dde);
}
static boolean_t
@ -759,7 +767,7 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
if (dde->dde_flags & DDE_FLAG_OVERQUOTA) {
if (dde->dde_waiters == 0) {
avl_remove(&ddt->ddt_tree, dde);
ddt_free(dde);
ddt_free(ddt, dde);
}
return (NULL);
}
@ -805,7 +813,7 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
/* Over quota. If no one is waiting, clean up right now. */
if (dde->dde_waiters == 0) {
avl_remove(&ddt->ddt_tree, dde);
ddt_free(dde);
ddt_free(ddt, dde);
return (NULL);
}
@ -1212,7 +1220,7 @@ ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde)
avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL)
avl_insert(&ddt->ddt_repair_tree, dde, where);
else
ddt_free(dde);
ddt_free(ddt, dde);
ddt_exit(ddt);
}
@ -1220,16 +1228,15 @@ ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde)
static void
ddt_repair_entry_done(zio_t *zio)
{
ddt_t *ddt = ddt_select(zio->io_spa, zio->io_bp);
ddt_entry_t *rdde = zio->io_private;
ddt_free(rdde);
ddt_free(ddt, rdde);
}
static void
ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio)
{
ddt_phys_t *ddp = dde->dde_phys;
ddt_phys_t *rddp = rdde->dde_phys;
ddt_key_t *ddk = &dde->dde_key;
ddt_key_t *rddk = &rdde->dde_key;
zio_t *zio;
@ -1238,7 +1245,9 @@ ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio)
zio = zio_null(rio, rio->io_spa, NULL,
ddt_repair_entry_done, rdde, rio->io_flags);
for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++, rddp++) {
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
ddt_phys_t *ddp = &dde->dde_phys[p];
ddt_phys_t *rddp = &rdde->dde_phys[p];
if (ddp->ddp_phys_birth == 0 ||
ddp->ddp_phys_birth != rddp->ddp_phys_birth ||
memcmp(ddp->ddp_dva, rddp->ddp_dva, sizeof (ddp->ddp_dva)))
@ -1281,7 +1290,6 @@ static void
ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
{
dsl_pool_t *dp = ddt->ddt_spa->spa_dsl_pool;
ddt_phys_t *ddp = dde->dde_phys;
ddt_key_t *ddk = &dde->dde_key;
ddt_type_t otype = dde->dde_type;
ddt_type_t ntype = DDT_TYPE_DEFAULT;
@ -1291,13 +1299,14 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
ASSERT(dde->dde_flags & DDE_FLAG_LOADED);
for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
ASSERT3P(dde->dde_lead_zio[p], ==, NULL);
ddt_phys_t *ddp = &dde->dde_phys[p];
if (ddp->ddp_phys_birth == 0) {
ASSERT0(ddp->ddp_refcnt);
continue;
}
if (p == DDT_PHYS_DITTO) {
if (DDT_PHYS_IS_DITTO(ddt, p)) {
/*
* Note, we no longer create DDT-DITTO blocks, but we
* don't want to leak any written by older software.
@ -1310,8 +1319,6 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
total_refcnt += ddp->ddp_refcnt;
}
/* We do not create new DDT-DITTO blocks. */
ASSERT0(dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth);
if (total_refcnt > 1)
nclass = DDT_CLASS_DUPLICATE;
else
@ -1369,7 +1376,7 @@ ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg)
while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) {
ddt_sync_entry(ddt, dde, tx, txg);
ddt_free(dde);
ddt_free(ddt, dde);
}
uint64_t count = 0;
@ -1512,7 +1519,8 @@ ddt_addref(spa_t *spa, const blkptr_t *bp)
ASSERT3S(dde->dde_class, <, DDT_CLASSES);
ddp = &dde->dde_phys[BP_GET_NDVAS(bp)];
int p = DDT_PHYS_FOR_COPIES(ddt, BP_GET_NDVAS(bp));
ddp = &dde->dde_phys[p];
/*
* This entry already existed (dde_type is real), so it must

View File

@ -36,14 +36,15 @@ static void
ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
{
spa_t *spa = ddt->ddt_spa;
ddt_phys_t *ddp = dde->dde_phys;
ddt_key_t *ddk = &dde->dde_key;
uint64_t lsize = DDK_GET_LSIZE(ddk);
uint64_t psize = DDK_GET_PSIZE(ddk);
memset(dds, 0, sizeof (*dds));
for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
ddt_phys_t *ddp = &dde->dde_phys[p];
uint64_t dsize = 0;
uint64_t refcnt = ddp->ddp_refcnt;

View File

@ -22,6 +22,7 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018 by Delphix. All rights reserved.
* Copyright (c) 2023, Klara Inc.
*/
#include <sys/zfs_context.h>

View File

@ -2933,7 +2933,6 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
{
(void) tx;
const ddt_key_t *ddk = &dde->dde_key;
ddt_phys_t *ddp = dde->dde_phys;
blkptr_t bp;
zbookmark_phys_t zb = { 0 };
@ -2954,7 +2953,10 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
if (scn->scn_done_txg != 0)
return;
for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
ddt_t *ddt = ddt_select_checksum(tx->tx_pool->dp_spa, checksum);
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
ddt_phys_t *ddp = &dde->dde_phys[p];
if (ddp->ddp_phys_birth == 0 ||
ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg)
continue;

View File

@ -3254,12 +3254,14 @@ static void
zio_ddt_child_read_done(zio_t *zio)
{
blkptr_t *bp = zio->io_bp;
ddt_t *ddt;
ddt_entry_t *dde = zio->io_private;
ddt_phys_t *ddp;
zio_t *pio = zio_unique_parent(zio);
mutex_enter(&pio->io_lock);
ddp = ddt_phys_select(dde, bp);
ddt = ddt_select(zio->io_spa, bp);
ddp = ddt_phys_select(ddt, dde, bp);
if (zio->io_error == 0)
ddt_phys_clear(ddp); /* this ddp doesn't need repair */
@ -3282,8 +3284,7 @@ zio_ddt_read_start(zio_t *zio)
if (zio->io_child_error[ZIO_CHILD_DDT]) {
ddt_t *ddt = ddt_select(zio->io_spa, bp);
ddt_entry_t *dde = ddt_repair_start(ddt, bp);
ddt_phys_t *ddp = dde->dde_phys;
ddt_phys_t *ddp_self = ddt_phys_select(dde, bp);
ddt_phys_t *ddp_self = ddt_phys_select(ddt, dde, bp);
blkptr_t blk;
ASSERT(zio->io_vsd == NULL);
@ -3292,7 +3293,8 @@ zio_ddt_read_start(zio_t *zio)
if (ddp_self == NULL)
return (zio);
for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
ddt_phys_t *ddp = &dde->dde_phys[p];
if (ddp->ddp_phys_birth == 0 || ddp == ddp_self)
continue;
ddt_bp_create(ddt->ddt_checksum, &dde->dde_key, ddp,
@ -3372,7 +3374,10 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
* loaded).
*/
for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
if (DDT_PHYS_IS_DITTO(ddt, p))
continue;
zio_t *lio = dde->dde_lead_zio[p];
if (lio != NULL && do_raw) {
@ -3384,7 +3389,10 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
}
}
for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
if (DDT_PHYS_IS_DITTO(ddt, p))
continue;
ddt_phys_t *ddp = &dde->dde_phys[p];
if (ddp->ddp_phys_birth != 0 && do_raw) {
@ -3452,15 +3460,16 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
static void
zio_ddt_child_write_ready(zio_t *zio)
{
int p = zio->io_prop.zp_copies;
ddt_t *ddt = ddt_select(zio->io_spa, zio->io_bp);
ddt_entry_t *dde = zio->io_private;
ddt_phys_t *ddp = &dde->dde_phys[p];
zio_t *pio;
if (zio->io_error)
return;
int p = DDT_PHYS_FOR_COPIES(ddt, zio->io_prop.zp_copies);
ddt_phys_t *ddp = &dde->dde_phys[p];
ddt_enter(ddt);
ASSERT(dde->dde_lead_zio[p] == zio);
@ -3477,9 +3486,10 @@ zio_ddt_child_write_ready(zio_t *zio)
static void
zio_ddt_child_write_done(zio_t *zio)
{
int p = zio->io_prop.zp_copies;
ddt_t *ddt = ddt_select(zio->io_spa, zio->io_bp);
ddt_entry_t *dde = zio->io_private;
int p = DDT_PHYS_FOR_COPIES(ddt, zio->io_prop.zp_copies);
ddt_phys_t *ddp = &dde->dde_phys[p];
ddt_enter(ddt);
@ -3506,11 +3516,9 @@ zio_ddt_write(zio_t *zio)
blkptr_t *bp = zio->io_bp;
uint64_t txg = zio->io_txg;
zio_prop_t *zp = &zio->io_prop;
int p = zp->zp_copies;
zio_t *cio = NULL;
ddt_t *ddt = ddt_select(spa, bp);
ddt_entry_t *dde;
ddt_phys_t *ddp;
ASSERT(BP_GET_DEDUP(bp));
ASSERT(BP_GET_CHECKSUM(bp) == zp->zp_checksum);
@ -3528,7 +3536,9 @@ zio_ddt_write(zio_t *zio)
ddt_exit(ddt);
return (zio);
}
ddp = &dde->dde_phys[p];
int p = DDT_PHYS_FOR_COPIES(ddt, zp->zp_copies);
ddt_phys_t *ddp = &dde->dde_phys[p];
if (zp->zp_dedup_verify && zio_ddt_collision(zio, ddt, dde)) {
/*
@ -3600,7 +3610,7 @@ zio_ddt_free(zio_t *zio)
ddt_enter(ddt);
freedde = dde = ddt_lookup(ddt, bp);
if (dde) {
ddp = ddt_phys_select(dde, bp);
ddp = ddt_phys_select(ddt, dde, bp);
if (ddp)
ddt_phys_decref(ddp);
}