ddt: slim down ddt_entry_t

This slims down the in-memory entry to as small as it can be. The
IO-related parts are made into a separate entry, since they're
relatively rarely needed.

The variable allocation for dde_phys is to support the upcoming flat
format.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.
Closes #15893
This commit is contained in:
Rob Norris 2023-07-03 19:54:40 +10:00 committed by Brian Behlendorf
parent 4d686c3da5
commit 0ba5f503c5
3 changed files with 63 additions and 31 deletions

View File

@ -151,16 +151,22 @@ typedef struct {
#define DDE_FLAG_LOADED (1 << 0) /* entry ready for use */
#define DDE_FLAG_OVERQUOTA (1 << 1) /* entry unusable, no space */
/*
* Additional data to support entry update or repair. This is fixed size
* because its relatively rarely used.
*/
typedef struct {
/* key must be first for ddt_key_compare */
ddt_key_t dde_key; /* ddt_tree key */
ddt_phys_t dde_phys[DDT_PHYS_MAX]; /* on-disk data */
/* copy of data after a repair read, to be rewritten */
abd_t *dde_repair_abd;
/* in-flight update IOs */
zio_t *dde_lead_zio[DDT_PHYS_MAX];
} ddt_entry_io_t;
/* copy of data after a repair read, to be rewritten */
struct abd *dde_repair_abd;
typedef struct {
/* key must be first for ddt_key_compare */
ddt_key_t dde_key; /* ddt_tree key */
avl_node_t dde_node; /* ddt_tree_node */
/* storage type and class the entry was loaded from */
ddt_type_t dde_type;
@ -170,7 +176,9 @@ typedef struct {
kcondvar_t dde_cv; /* signaled when load completes */
uint64_t dde_waiters; /* count of waiters on dde_cv */
avl_node_t dde_node; /* ddt_tree node */
ddt_entry_io_t *dde_io; /* IO support, when required */
ddt_phys_t dde_phys[]; /* physical data */
} ddt_entry_t;
/*
@ -265,6 +273,8 @@ extern void ddt_prefetch_all(spa_t *spa);
extern boolean_t ddt_class_contains(spa_t *spa, ddt_class_t max_class,
const blkptr_t *bp);
extern void ddt_alloc_entry_io(ddt_entry_t *dde);
extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);

View File

@ -164,6 +164,9 @@
static kmem_cache_t *ddt_cache;
static kmem_cache_t *ddt_entry_cache;
#define DDT_ENTRY_SIZE \
(sizeof (ddt_entry_t) + sizeof (ddt_phys_t) * DDT_PHYS_MAX)
/*
* Enable/disable prefetching of dedup-ed blocks which are going to be freed.
*/
@ -343,7 +346,7 @@ ddt_object_lookup(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os,
ddt->ddt_object[type][class], &dde->dde_key,
dde->dde_phys, sizeof (dde->dde_phys)));
dde->dde_phys, sizeof (ddt_phys_t) * DDT_NPHYS(ddt)));
}
static int
@ -386,7 +389,7 @@ ddt_object_update(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
return (ddt_ops[type]->ddt_op_update(ddt->ddt_os,
ddt->ddt_object[type][class], &dde->dde_key, dde->dde_phys,
sizeof (dde->dde_phys), tx));
sizeof (ddt_phys_t) * DDT_NPHYS(ddt), tx));
}
static int
@ -597,7 +600,7 @@ ddt_init(void)
ddt_cache = kmem_cache_create("ddt_cache",
sizeof (ddt_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
ddt_entry_cache = kmem_cache_create("ddt_entry_cache",
sizeof (ddt_entry_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
DDT_ENTRY_SIZE, 0, NULL, NULL, NULL, NULL, NULL, 0);
}
void
@ -613,7 +616,7 @@ ddt_alloc(const ddt_key_t *ddk)
ddt_entry_t *dde;
dde = kmem_cache_alloc(ddt_entry_cache, KM_SLEEP);
memset(dde, 0, sizeof (ddt_entry_t));
memset(dde, 0, DDT_ENTRY_SIZE);
cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL);
dde->dde_key = *ddk;
@ -621,14 +624,27 @@ ddt_alloc(const ddt_key_t *ddk)
return (dde);
}
void
ddt_alloc_entry_io(ddt_entry_t *dde)
{
if (dde->dde_io != NULL)
return;
dde->dde_io = kmem_zalloc(sizeof (ddt_entry_io_t), KM_SLEEP);
}
static void
ddt_free(const ddt_t *ddt, ddt_entry_t *dde)
{
if (dde->dde_io != NULL) {
for (int p = 0; p < DDT_NPHYS(ddt); p++)
ASSERT3P(dde->dde_lead_zio[p], ==, NULL);
ASSERT3P(dde->dde_io->dde_lead_zio[p], ==, NULL);
if (dde->dde_repair_abd != NULL)
abd_free(dde->dde_repair_abd);
if (dde->dde_io->dde_repair_abd != NULL)
abd_free(dde->dde_io->dde_repair_abd);
kmem_free(dde->dde_io, sizeof (ddt_entry_io_t));
}
cv_destroy(&dde->dde_cv);
kmem_cache_free(ddt_entry_cache, dde);
@ -1191,6 +1207,7 @@ ddt_repair_start(ddt_t *ddt, const blkptr_t *bp)
ddt_key_fill(&ddk, bp);
dde = ddt_alloc(&ddk);
ddt_alloc_entry_io(dde);
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
for (ddt_class_t class = 0; class < DDT_CLASSES; class++) {
@ -1205,7 +1222,7 @@ ddt_repair_start(ddt_t *ddt, const blkptr_t *bp)
}
}
memset(dde->dde_phys, 0, sizeof (dde->dde_phys));
memset(dde->dde_phys, 0, sizeof (ddt_phys_t) * DDT_NPHYS(ddt));
return (dde);
}
@ -1217,7 +1234,8 @@ ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde)
ddt_enter(ddt);
if (dde->dde_repair_abd != NULL && spa_writeable(ddt->ddt_spa) &&
if (dde->dde_io->dde_repair_abd != NULL &&
spa_writeable(ddt->ddt_spa) &&
avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL)
avl_insert(&ddt->ddt_repair_tree, dde, where);
else
@ -1255,8 +1273,9 @@ ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio)
continue;
ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
zio_nowait(zio_rewrite(zio, zio->io_spa, 0, &blk,
rdde->dde_repair_abd, DDK_GET_PSIZE(rddk), NULL, NULL,
ZIO_PRIORITY_SYNC_WRITE, ZIO_DDT_CHILD_FLAGS(zio), NULL));
rdde->dde_io->dde_repair_abd, DDK_GET_PSIZE(rddk),
NULL, NULL, ZIO_PRIORITY_SYNC_WRITE,
ZIO_DDT_CHILD_FLAGS(zio), NULL));
}
zio_nowait(zio);
@ -1301,7 +1320,8 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
ASSERT(dde->dde_flags & DDE_FLAG_LOADED);
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
ASSERT3P(dde->dde_lead_zio[p], ==, NULL);
ASSERT(dde->dde_io == NULL ||
dde->dde_io->dde_lead_zio[p] == NULL);
ddt_phys_t *ddp = &dde->dde_phys[p];
if (ddp->ddp_phys_birth == 0) {
ASSERT0(ddp->ddp_refcnt);

View File

@ -3265,8 +3265,8 @@ zio_ddt_child_read_done(zio_t *zio)
if (zio->io_error == 0)
ddt_phys_clear(ddp); /* this ddp doesn't need repair */
if (zio->io_error == 0 && dde->dde_repair_abd == NULL)
dde->dde_repair_abd = zio->io_abd;
if (zio->io_error == 0 && dde->dde_io->dde_repair_abd == NULL)
dde->dde_io->dde_repair_abd = zio->io_abd;
else
abd_free(zio->io_abd);
mutex_exit(&pio->io_lock);
@ -3340,8 +3340,8 @@ zio_ddt_read_done(zio_t *zio)
zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_FALSE);
return (NULL);
}
if (dde->dde_repair_abd != NULL) {
abd_copy(zio->io_abd, dde->dde_repair_abd,
if (dde->dde_io->dde_repair_abd != NULL) {
abd_copy(zio->io_abd, dde->dde_io->dde_repair_abd,
zio->io_size);
zio->io_child_error[ZIO_CHILD_DDT] = 0;
}
@ -3378,7 +3378,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
if (DDT_PHYS_IS_DITTO(ddt, p))
continue;
zio_t *lio = dde->dde_lead_zio[p];
zio_t *lio = dde->dde_io->dde_lead_zio[p];
if (lio != NULL && do_raw) {
return (lio->io_size != zio->io_size ||
@ -3472,7 +3472,7 @@ zio_ddt_child_write_ready(zio_t *zio)
ddt_enter(ddt);
ASSERT(dde->dde_lead_zio[p] == zio);
ASSERT(dde->dde_io->dde_lead_zio[p] == zio);
ddt_phys_fill(ddp, zio->io_bp);
@ -3495,8 +3495,8 @@ zio_ddt_child_write_done(zio_t *zio)
ddt_enter(ddt);
ASSERT(ddp->ddp_refcnt == 0);
ASSERT(dde->dde_lead_zio[p] == zio);
dde->dde_lead_zio[p] = NULL;
ASSERT(dde->dde_io->dde_lead_zio[p] == zio);
dde->dde_io->dde_lead_zio[p] = NULL;
if (zio->io_error == 0) {
zio_link_t *zl = NULL;
@ -3563,11 +3563,13 @@ zio_ddt_write(zio_t *zio)
return (zio);
}
if (ddp->ddp_phys_birth != 0 || dde->dde_lead_zio[p] != NULL) {
ddt_alloc_entry_io(dde);
if (ddp->ddp_phys_birth != 0 || dde->dde_io->dde_lead_zio[p] != NULL) {
if (ddp->ddp_phys_birth != 0)
ddt_bp_fill(ddp, bp, txg);
if (dde->dde_lead_zio[p] != NULL)
zio_add_child(zio, dde->dde_lead_zio[p]);
if (dde->dde_io->dde_lead_zio[p] != NULL)
zio_add_child(zio, dde->dde_io->dde_lead_zio[p]);
else
ddt_phys_addref(ddp);
} else if (zio->io_bp_override) {
@ -3583,7 +3585,7 @@ zio_ddt_write(zio_t *zio)
ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);
zio_push_transform(cio, zio->io_abd, zio->io_size, 0, NULL);
dde->dde_lead_zio[p] = cio;
dde->dde_io->dde_lead_zio[p] = cio;
}
ddt_exit(ddt);