Remove fastwrite mechanism.
Fastwrite was introduced many years ago to improve ZIL writes spread between multiple top-level vdevs by tracking number of allocated but not written blocks and choosing vdev with smaller count. It suposed to reduce ZIL knowledge about allocation, but actually made ZIL to even more actively report allocation code about the allocations, complicating both ZIL and metaslabs code. On top of that, it seems ZIO_FLAG_FASTWRITE setting in dmu_sync() was lost many years ago, that was one of the declared benefits. Plus introduction of embedded log metaslab class solved another problem with allocation rotor accounting both normal and log allocations, since in most cases those are now in different metaslab classes. After all that, I'd prefer to simplify already too complicated ZIL, ZIO and metaslab code if the benefit of complexity is not obvious. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: George Wilson <george.wilson@delphix.com> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Closes #15107
This commit is contained in:
parent
5bdfff5cfc
commit
b22bab2547
|
@ -80,7 +80,6 @@ uint64_t metaslab_largest_allocatable(metaslab_t *);
|
|||
#define METASLAB_ASYNC_ALLOC 0x8
|
||||
#define METASLAB_DONT_THROTTLE 0x10
|
||||
#define METASLAB_MUST_RESERVE 0x20
|
||||
#define METASLAB_FASTWRITE 0x40
|
||||
#define METASLAB_ZIL 0x80
|
||||
|
||||
int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t,
|
||||
|
@ -96,8 +95,6 @@ void metaslab_unalloc_dva(spa_t *, const dva_t *, uint64_t);
|
|||
int metaslab_claim(spa_t *, const blkptr_t *, uint64_t);
|
||||
int metaslab_claim_impl(vdev_t *, uint64_t, uint64_t, uint64_t);
|
||||
void metaslab_check_free(spa_t *, const blkptr_t *);
|
||||
void metaslab_fastwrite_mark(spa_t *, const blkptr_t *);
|
||||
void metaslab_fastwrite_unmark(spa_t *, const blkptr_t *);
|
||||
|
||||
void metaslab_stat_init(void);
|
||||
void metaslab_stat_fini(void);
|
||||
|
|
|
@ -266,7 +266,6 @@ struct vdev {
|
|||
metaslab_group_t *vdev_mg; /* metaslab group */
|
||||
metaslab_group_t *vdev_log_mg; /* embedded slog metaslab group */
|
||||
metaslab_t **vdev_ms; /* metaslab array */
|
||||
uint64_t vdev_pending_fastwrite; /* allocated fastwrites */
|
||||
txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
|
||||
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
|
||||
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
|
||||
|
|
|
@ -91,7 +91,6 @@ typedef enum {
|
|||
typedef struct lwb {
|
||||
zilog_t *lwb_zilog; /* back pointer to log struct */
|
||||
blkptr_t lwb_blk; /* on disk address of this log blk */
|
||||
boolean_t lwb_fastwrite; /* is blk marked for fastwrite? */
|
||||
boolean_t lwb_slog; /* lwb_blk is on SLOG device */
|
||||
boolean_t lwb_indirect; /* do not postpone zil_lwb_commit() */
|
||||
int lwb_nused; /* # used bytes in buffer */
|
||||
|
|
|
@ -222,7 +222,6 @@ typedef uint64_t zio_flag_t;
|
|||
#define ZIO_FLAG_NOPWRITE (1ULL << 28)
|
||||
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
|
||||
#define ZIO_FLAG_DELEGATED (1ULL << 30)
|
||||
#define ZIO_FLAG_FASTWRITE (1ULL << 31)
|
||||
|
||||
#define ZIO_FLAG_MUSTSUCCEED 0
|
||||
#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
|
||||
|
|
|
@ -5101,7 +5101,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
|||
zio_alloc_list_t *zal, int allocator)
|
||||
{
|
||||
metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
|
||||
metaslab_group_t *mg, *fast_mg, *rotor;
|
||||
metaslab_group_t *mg, *rotor;
|
||||
vdev_t *vd;
|
||||
boolean_t try_hard = B_FALSE;
|
||||
|
||||
|
@ -5164,15 +5164,6 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
|||
} else if (d != 0) {
|
||||
vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
|
||||
mg = vd->vdev_mg->mg_next;
|
||||
} else if (flags & METASLAB_FASTWRITE) {
|
||||
mg = fast_mg = mca->mca_rotor;
|
||||
|
||||
do {
|
||||
if (fast_mg->mg_vd->vdev_pending_fastwrite <
|
||||
mg->mg_vd->vdev_pending_fastwrite)
|
||||
mg = fast_mg;
|
||||
} while ((fast_mg = fast_mg->mg_next) != mca->mca_rotor);
|
||||
|
||||
} else {
|
||||
ASSERT(mca->mca_rotor != NULL);
|
||||
mg = mca->mca_rotor;
|
||||
|
@ -5297,7 +5288,7 @@ top:
|
|||
mg->mg_bias = 0;
|
||||
}
|
||||
|
||||
if ((flags & METASLAB_FASTWRITE) ||
|
||||
if ((flags & METASLAB_ZIL) ||
|
||||
atomic_add_64_nv(&mca->mca_aliquot, asize) >=
|
||||
mg->mg_aliquot + mg->mg_bias) {
|
||||
mca->mca_rotor = mg->mg_next;
|
||||
|
@ -5310,11 +5301,6 @@ top:
|
|||
((flags & METASLAB_GANG_HEADER) ? 1 : 0));
|
||||
DVA_SET_ASIZE(&dva[d], asize);
|
||||
|
||||
if (flags & METASLAB_FASTWRITE) {
|
||||
atomic_add_64(&vd->vdev_pending_fastwrite,
|
||||
psize);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
next:
|
||||
|
@ -5950,55 +5936,6 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
|
|||
return (error);
|
||||
}
|
||||
|
||||
void
|
||||
metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp)
|
||||
{
|
||||
const dva_t *dva = bp->blk_dva;
|
||||
int ndvas = BP_GET_NDVAS(bp);
|
||||
uint64_t psize = BP_GET_PSIZE(bp);
|
||||
int d;
|
||||
vdev_t *vd;
|
||||
|
||||
ASSERT(!BP_IS_HOLE(bp));
|
||||
ASSERT(!BP_IS_EMBEDDED(bp));
|
||||
ASSERT(psize > 0);
|
||||
|
||||
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
|
||||
|
||||
for (d = 0; d < ndvas; d++) {
|
||||
if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
|
||||
continue;
|
||||
atomic_add_64(&vd->vdev_pending_fastwrite, psize);
|
||||
}
|
||||
|
||||
spa_config_exit(spa, SCL_VDEV, FTAG);
|
||||
}
|
||||
|
||||
void
|
||||
metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp)
|
||||
{
|
||||
const dva_t *dva = bp->blk_dva;
|
||||
int ndvas = BP_GET_NDVAS(bp);
|
||||
uint64_t psize = BP_GET_PSIZE(bp);
|
||||
int d;
|
||||
vdev_t *vd;
|
||||
|
||||
ASSERT(!BP_IS_HOLE(bp));
|
||||
ASSERT(!BP_IS_EMBEDDED(bp));
|
||||
ASSERT(psize > 0);
|
||||
|
||||
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
|
||||
|
||||
for (d = 0; d < ndvas; d++) {
|
||||
if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
|
||||
continue;
|
||||
ASSERT3U(vd->vdev_pending_fastwrite, >=, psize);
|
||||
atomic_sub_64(&vd->vdev_pending_fastwrite, psize);
|
||||
}
|
||||
|
||||
spa_config_exit(spa, SCL_VDEV, FTAG);
|
||||
}
|
||||
|
||||
static void
|
||||
metaslab_check_free_impl_cb(uint64_t inner, vdev_t *vd, uint64_t offset,
|
||||
uint64_t size, void *arg)
|
||||
|
|
|
@ -1192,7 +1192,6 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
|
|||
|
||||
ASSERT(tvd == tvd->vdev_top);
|
||||
|
||||
tvd->vdev_pending_fastwrite = svd->vdev_pending_fastwrite;
|
||||
tvd->vdev_ms_array = svd->vdev_ms_array;
|
||||
tvd->vdev_ms_shift = svd->vdev_ms_shift;
|
||||
tvd->vdev_ms_count = svd->vdev_ms_count;
|
||||
|
@ -1655,7 +1654,6 @@ vdev_metaslab_fini(vdev_t *vd)
|
|||
}
|
||||
}
|
||||
ASSERT0(vd->vdev_ms_count);
|
||||
ASSERT3U(vd->vdev_pending_fastwrite, ==, 0);
|
||||
}
|
||||
|
||||
typedef struct vdev_probe_stats {
|
||||
|
|
|
@ -761,15 +761,13 @@ zil_lwb_vdev_compare(const void *x1, const void *x2)
|
|||
}
|
||||
|
||||
static lwb_t *
|
||||
zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg,
|
||||
boolean_t fastwrite)
|
||||
zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg)
|
||||
{
|
||||
lwb_t *lwb;
|
||||
|
||||
lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP);
|
||||
lwb->lwb_zilog = zilog;
|
||||
lwb->lwb_blk = *bp;
|
||||
lwb->lwb_fastwrite = fastwrite;
|
||||
lwb->lwb_slog = slog;
|
||||
lwb->lwb_indirect = B_FALSE;
|
||||
if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
|
||||
|
@ -916,7 +914,6 @@ zil_create(zilog_t *zilog)
|
|||
dmu_tx_t *tx = NULL;
|
||||
blkptr_t blk;
|
||||
int error = 0;
|
||||
boolean_t fastwrite = FALSE;
|
||||
boolean_t slog = FALSE;
|
||||
dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
|
||||
|
||||
|
@ -949,8 +946,6 @@ zil_create(zilog_t *zilog)
|
|||
|
||||
error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk,
|
||||
ZIL_MIN_BLKSZ, &slog);
|
||||
fastwrite = TRUE;
|
||||
|
||||
if (error == 0)
|
||||
zil_init_log_chain(zilog, &blk);
|
||||
}
|
||||
|
@ -959,7 +954,7 @@ zil_create(zilog_t *zilog)
|
|||
* Allocate a log write block (lwb) for the first log block.
|
||||
*/
|
||||
if (error == 0)
|
||||
lwb = zil_alloc_lwb(zilog, &blk, slog, txg, fastwrite);
|
||||
lwb = zil_alloc_lwb(zilog, &blk, slog, txg);
|
||||
|
||||
/*
|
||||
* If we just allocated the first log block, commit our transaction
|
||||
|
@ -1044,9 +1039,6 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
|
|||
ASSERT(zh->zh_claim_txg == 0);
|
||||
VERIFY(!keep_first);
|
||||
while ((lwb = list_remove_head(&zilog->zl_lwb_list)) != NULL) {
|
||||
if (lwb->lwb_fastwrite)
|
||||
metaslab_fastwrite_unmark(zilog->zl_spa,
|
||||
&lwb->lwb_blk);
|
||||
if (lwb->lwb_buf != NULL)
|
||||
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
|
||||
zio_free(zilog->zl_spa, txg, &lwb->lwb_blk);
|
||||
|
@ -1551,7 +1543,6 @@ zil_lwb_write_done(zio_t *zio)
|
|||
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_ISSUED);
|
||||
lwb->lwb_state = LWB_STATE_WRITE_DONE;
|
||||
lwb->lwb_write_zio = NULL;
|
||||
lwb->lwb_fastwrite = FALSE;
|
||||
nlwb = list_next(&zilog->zl_lwb_list, lwb);
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
|
||||
|
@ -1718,20 +1709,12 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb)
|
|||
ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
|
||||
lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);
|
||||
|
||||
/* Lock so zil_sync() doesn't fastwrite_unmark after zio is created */
|
||||
mutex_enter(&zilog->zl_lock);
|
||||
if (!lwb->lwb_fastwrite) {
|
||||
metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
|
||||
lwb->lwb_fastwrite = 1;
|
||||
}
|
||||
|
||||
lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, zilog->zl_spa, 0,
|
||||
&lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk),
|
||||
zil_lwb_write_done, lwb, prio,
|
||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb);
|
||||
zil_lwb_write_done, lwb, prio, ZIO_FLAG_CANFAIL, &zb);
|
||||
|
||||
mutex_enter(&zilog->zl_lock);
|
||||
lwb->lwb_state = LWB_STATE_OPENED;
|
||||
|
||||
zil_lwb_set_zio_dependency(zilog, lwb);
|
||||
zilog->zl_last_lwb_opened = lwb;
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
|
@ -1864,7 +1847,7 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs)
|
|||
/*
|
||||
* Allocate a new log write block (lwb).
|
||||
*/
|
||||
nlwb = zil_alloc_lwb(zilog, bp, slog, txg, TRUE);
|
||||
nlwb = zil_alloc_lwb(zilog, bp, slog, txg);
|
||||
}
|
||||
|
||||
lwb->lwb_state = LWB_STATE_ISSUED;
|
||||
|
@ -3651,18 +3634,6 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
|
|||
BP_ZERO(&zh->zh_log);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove fastwrite on any blocks that have been pre-allocated for
|
||||
* the next commit. This prevents fastwrite counter pollution by
|
||||
* unused, long-lived LWBs.
|
||||
*/
|
||||
for (; lwb != NULL; lwb = list_next(&zilog->zl_lwb_list, lwb)) {
|
||||
if (lwb->lwb_fastwrite && !lwb->lwb_write_zio) {
|
||||
metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk);
|
||||
lwb->lwb_fastwrite = 0;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
}
|
||||
|
||||
|
@ -3895,9 +3866,6 @@ zil_close(zilog_t *zilog)
|
|||
ASSERT(list_is_empty(&zilog->zl_lwb_list));
|
||||
ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED);
|
||||
|
||||
if (lwb->lwb_fastwrite)
|
||||
metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk);
|
||||
|
||||
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
|
||||
zil_free_lwb(zilog, lwb);
|
||||
}
|
||||
|
|
|
@ -3024,11 +3024,6 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
|
|||
*/
|
||||
pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
|
||||
|
||||
/*
|
||||
* We didn't allocate this bp, so make sure it doesn't get unmarked.
|
||||
*/
|
||||
pio->io_flags &= ~ZIO_FLAG_FASTWRITE;
|
||||
|
||||
zio_nowait(zio);
|
||||
|
||||
return (pio);
|
||||
|
@ -3616,7 +3611,6 @@ zio_dva_allocate(zio_t *zio)
|
|||
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
|
||||
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
|
||||
|
||||
flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0;
|
||||
if (zio->io_flags & ZIO_FLAG_NODATA)
|
||||
flags |= METASLAB_DONT_THROTTLE;
|
||||
if (zio->io_flags & ZIO_FLAG_GANG_CHILD)
|
||||
|
@ -3776,7 +3770,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
|
|||
* of, so we just hash the objset ID to pick the allocator to get
|
||||
* some parallelism.
|
||||
*/
|
||||
int flags = METASLAB_FASTWRITE | METASLAB_ZIL;
|
||||
int flags = METASLAB_ZIL;
|
||||
int allocator = (uint_t)cityhash4(0, 0, 0,
|
||||
os->os_dsl_dataset->ds_object) % spa->spa_alloc_count;
|
||||
error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
|
||||
|
@ -4931,12 +4925,6 @@ zio_done(zio_t *zio)
|
|||
zfs_ereport_free_checksum(zcr);
|
||||
}
|
||||
|
||||
if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp &&
|
||||
!BP_IS_HOLE(zio->io_bp) && !BP_IS_EMBEDDED(zio->io_bp) &&
|
||||
!(zio->io_flags & ZIO_FLAG_NOPWRITE)) {
|
||||
metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* It is the responsibility of the done callback to ensure that this
|
||||
* particular zio is no longer discoverable for adoption, and as
|
||||
|
|
Loading…
Reference in New Issue