arc_read()/arc_access() refactoring and cleanup
ARC code was many times significantly modified over the years, that created significant amount of tangled and potentially broken code. This should make arc_access()/arc_read() code some more readable. - Decouple prefetch status tracking from b_refcnt. It made sense originally, but became highly cryptic over the years. Move all the logic into arc_access(). While there, clean up and comment state transitions in arc_access(). Some transitions were weird IMO. - Unify arc_access() calls to arc_read() instead of sometimes calling it from arc_read_done(). To avoid extra state changes and checks add one more b_refcnt for ARC_FLAG_IO_IN_PROGRESS. - Reimplement ARC_FLAG_WAIT in case of ARC_FLAG_IO_IN_PROGRESS with the same callback mechanism to not falsely account them as hits. Count those as "iohits", an intermediate between "hits" and "misses". While there, call read callbacks in original request order, that should be good for fairness and random speculations/allocations/aggregations. - Introduce additional statistic counters for prefetch, accounting predictive vs prescient and hits vs iohits vs misses. - Remove hash_lock argument from functions not needing it. - Remove ARC_FLAG_PREDICTIVE_PREFETCH, since it should be opposite to ARC_FLAG_PRESCIENT_PREFETCH if ARC_FLAG_PREFETCH is set. We may wish to add ARC_FLAG_PRESCIENT_PREFETCH to few more places. - Fix few false positive tests found in the process. Reviewed-by: George Wilson <gwilson@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu> Reviewed-by: Ryan Moeller <ryan@iXsystems.com> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Closes #14123
This commit is contained in:
parent
dc8c2f6158
commit
c935fe2e92
|
@ -103,12 +103,12 @@ DEFINE_EVENT(zfs_arc_buf_hdr_class, name, \
|
|||
TP_PROTO(arc_buf_hdr_t *ab), \
|
||||
TP_ARGS(ab))
|
||||
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__hit);
|
||||
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__iohit);
|
||||
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__evict);
|
||||
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__delete);
|
||||
DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mru);
|
||||
DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mfu);
|
||||
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__async__upgrade__sync);
|
||||
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__demand__hit__predictive__prefetch);
|
||||
DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__hit);
|
||||
DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__miss);
|
||||
|
||||
|
@ -387,12 +387,12 @@ DEFINE_ARC_WAIT_FOR_EVICTION_EVENT(zfs_arc__wait__for__eviction);
|
|||
#else
|
||||
|
||||
DEFINE_DTRACE_PROBE1(arc__hit);
|
||||
DEFINE_DTRACE_PROBE1(arc__iohit);
|
||||
DEFINE_DTRACE_PROBE1(arc__evict);
|
||||
DEFINE_DTRACE_PROBE1(arc__delete);
|
||||
DEFINE_DTRACE_PROBE1(new_state__mru);
|
||||
DEFINE_DTRACE_PROBE1(new_state__mfu);
|
||||
DEFINE_DTRACE_PROBE1(arc__async__upgrade__sync);
|
||||
DEFINE_DTRACE_PROBE1(arc__demand__hit__predictive__prefetch);
|
||||
DEFINE_DTRACE_PROBE1(l2arc__hit);
|
||||
DEFINE_DTRACE_PROBE1(l2arc__miss);
|
||||
DEFINE_DTRACE_PROBE2(l2arc__read);
|
||||
|
|
|
@ -115,7 +115,6 @@ typedef enum arc_flags
|
|||
ARC_FLAG_PREFETCH = 1 << 2, /* I/O is a prefetch */
|
||||
ARC_FLAG_CACHED = 1 << 3, /* I/O was in cache */
|
||||
ARC_FLAG_L2CACHE = 1 << 4, /* cache in L2ARC */
|
||||
ARC_FLAG_PREDICTIVE_PREFETCH = 1 << 5, /* I/O from zfetch */
|
||||
ARC_FLAG_PRESCIENT_PREFETCH = 1 << 6, /* long min lifespan */
|
||||
|
||||
/*
|
||||
|
|
|
@ -101,9 +101,14 @@ struct arc_callback {
|
|||
boolean_t acb_compressed;
|
||||
boolean_t acb_noauth;
|
||||
boolean_t acb_nobuf;
|
||||
boolean_t acb_wait;
|
||||
int acb_wait_error;
|
||||
kmutex_t acb_wait_lock;
|
||||
kcondvar_t acb_wait_cv;
|
||||
zbookmark_phys_t acb_zb;
|
||||
zio_t *acb_zio_dummy;
|
||||
zio_t *acb_zio_head;
|
||||
arc_callback_t *acb_prev;
|
||||
arc_callback_t *acb_next;
|
||||
};
|
||||
|
||||
|
@ -511,15 +516,27 @@ struct arc_buf_hdr {
|
|||
};
|
||||
|
||||
typedef struct arc_stats {
|
||||
/* Number of requests that were satisfied without I/O. */
|
||||
kstat_named_t arcstat_hits;
|
||||
/* Number of requests for which I/O was already running. */
|
||||
kstat_named_t arcstat_iohits;
|
||||
/* Number of requests for which I/O has to be issued. */
|
||||
kstat_named_t arcstat_misses;
|
||||
/* Same three, but specifically for demand data. */
|
||||
kstat_named_t arcstat_demand_data_hits;
|
||||
kstat_named_t arcstat_demand_data_iohits;
|
||||
kstat_named_t arcstat_demand_data_misses;
|
||||
/* Same three, but specifically for demand metadata. */
|
||||
kstat_named_t arcstat_demand_metadata_hits;
|
||||
kstat_named_t arcstat_demand_metadata_iohits;
|
||||
kstat_named_t arcstat_demand_metadata_misses;
|
||||
/* Same three, but specifically for prefetch data. */
|
||||
kstat_named_t arcstat_prefetch_data_hits;
|
||||
kstat_named_t arcstat_prefetch_data_iohits;
|
||||
kstat_named_t arcstat_prefetch_data_misses;
|
||||
/* Same three, but specifically for prefetch metadata. */
|
||||
kstat_named_t arcstat_prefetch_metadata_hits;
|
||||
kstat_named_t arcstat_prefetch_metadata_iohits;
|
||||
kstat_named_t arcstat_prefetch_metadata_misses;
|
||||
kstat_named_t arcstat_mru_hits;
|
||||
kstat_named_t arcstat_mru_ghost_hits;
|
||||
|
@ -844,8 +861,18 @@ typedef struct arc_stats {
|
|||
kstat_named_t arcstat_meta_max;
|
||||
kstat_named_t arcstat_meta_min;
|
||||
kstat_named_t arcstat_async_upgrade_sync;
|
||||
/* Number of predictive prefetch requests. */
|
||||
kstat_named_t arcstat_predictive_prefetch;
|
||||
/* Number of requests for which predictive prefetch has completed. */
|
||||
kstat_named_t arcstat_demand_hit_predictive_prefetch;
|
||||
/* Number of requests for which predictive prefetch was running. */
|
||||
kstat_named_t arcstat_demand_iohit_predictive_prefetch;
|
||||
/* Number of prescient prefetch requests. */
|
||||
kstat_named_t arcstat_prescient_prefetch;
|
||||
/* Number of requests for which prescient prefetch has completed. */
|
||||
kstat_named_t arcstat_demand_hit_prescient_prefetch;
|
||||
/* Number of requests for which prescient prefetch was running. */
|
||||
kstat_named_t arcstat_demand_iohit_prescient_prefetch;
|
||||
kstat_named_t arcstat_need_free;
|
||||
kstat_named_t arcstat_sys_free;
|
||||
kstat_named_t arcstat_raw_size;
|
||||
|
@ -855,14 +882,19 @@ typedef struct arc_stats {
|
|||
|
||||
typedef struct arc_sums {
|
||||
wmsum_t arcstat_hits;
|
||||
wmsum_t arcstat_iohits;
|
||||
wmsum_t arcstat_misses;
|
||||
wmsum_t arcstat_demand_data_hits;
|
||||
wmsum_t arcstat_demand_data_iohits;
|
||||
wmsum_t arcstat_demand_data_misses;
|
||||
wmsum_t arcstat_demand_metadata_hits;
|
||||
wmsum_t arcstat_demand_metadata_iohits;
|
||||
wmsum_t arcstat_demand_metadata_misses;
|
||||
wmsum_t arcstat_prefetch_data_hits;
|
||||
wmsum_t arcstat_prefetch_data_iohits;
|
||||
wmsum_t arcstat_prefetch_data_misses;
|
||||
wmsum_t arcstat_prefetch_metadata_hits;
|
||||
wmsum_t arcstat_prefetch_metadata_iohits;
|
||||
wmsum_t arcstat_prefetch_metadata_misses;
|
||||
wmsum_t arcstat_mru_hits;
|
||||
wmsum_t arcstat_mru_ghost_hits;
|
||||
|
@ -936,8 +968,12 @@ typedef struct arc_sums {
|
|||
wmsum_t arcstat_prune;
|
||||
aggsum_t arcstat_meta_used;
|
||||
wmsum_t arcstat_async_upgrade_sync;
|
||||
wmsum_t arcstat_predictive_prefetch;
|
||||
wmsum_t arcstat_demand_hit_predictive_prefetch;
|
||||
wmsum_t arcstat_demand_iohit_predictive_prefetch;
|
||||
wmsum_t arcstat_prescient_prefetch;
|
||||
wmsum_t arcstat_demand_hit_prescient_prefetch;
|
||||
wmsum_t arcstat_demand_iohit_prescient_prefetch;
|
||||
wmsum_t arcstat_raw_size;
|
||||
wmsum_t arcstat_cached_only_in_progress;
|
||||
wmsum_t arcstat_abd_chunk_waste_size;
|
||||
|
|
550
module/zfs/arc.c
550
module/zfs/arc.c
File diff suppressed because it is too large
Load Diff
|
@ -185,7 +185,8 @@ static boolean_t
|
|||
traverse_prefetch_metadata(traverse_data_t *td,
|
||||
const blkptr_t *bp, const zbookmark_phys_t *zb)
|
||||
{
|
||||
arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
|
||||
arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH |
|
||||
ARC_FLAG_PRESCIENT_PREFETCH;
|
||||
int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
|
||||
|
||||
if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
|
||||
|
|
|
@ -517,13 +517,11 @@ dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock)
|
|||
issued = 0;
|
||||
for (int64_t blk = pf_start; blk < pf_end; blk++) {
|
||||
issued += dbuf_prefetch_impl(zf->zf_dnode, 0, blk,
|
||||
ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH,
|
||||
dmu_zfetch_done, zs);
|
||||
ZIO_PRIORITY_ASYNC_READ, 0, dmu_zfetch_done, zs);
|
||||
}
|
||||
for (int64_t iblk = ipf_start; iblk < ipf_end; iblk++) {
|
||||
issued += dbuf_prefetch_impl(zf->zf_dnode, 1, iblk,
|
||||
ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH,
|
||||
dmu_zfetch_done, zs);
|
||||
ZIO_PRIORITY_ASYNC_READ, 0, dmu_zfetch_done, zs);
|
||||
}
|
||||
|
||||
if (!have_lock)
|
||||
|
|
|
@ -163,6 +163,7 @@ before_clone=$(get_prop written $TESTPOOL/$TESTFS1)
|
|||
log_must zfs clone $TESTPOOL/$TESTFS1@snap1 $TESTPOOL/$TESTFS1/snap1.clone
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS1/snap1.clone/testfile bs=1M \
|
||||
count=40
|
||||
sync_pool
|
||||
after_clone=$(get_prop written $TESTPOOL/$TESTFS1)
|
||||
within_percent $before_clone $after_clone 99.5 || \
|
||||
log_fail "unexpected written for clone $before_clone $after_clone"
|
||||
|
|
|
@ -80,7 +80,7 @@ log_must fio $FIO_SCRIPTS/mkfiles.fio
|
|||
log_must fio $FIO_SCRIPTS/random_reads.fio
|
||||
|
||||
log_must zpool export $TESTPOOL
|
||||
log_must zpool import -d $VDIR $TESTPOOL
|
||||
log_must zpool import -N -d $VDIR $TESTPOOL
|
||||
|
||||
# Regardless of l2arc_noprefetch, some MFU buffers might be evicted
|
||||
# from ARC, accessed later on as prefetches and transition to MRU as
|
||||
|
|
|
@ -95,6 +95,7 @@ for type in "" "mirror" "raidz2" "draid"; do
|
|||
|
||||
# Fill the pool, verify the vdevs are no longer sparse.
|
||||
file_write -o create -f /$TESTPOOL/file -b 1048576 -c $fill_mb -d R
|
||||
sync_pool $TESTPOOL
|
||||
verify_vdevs "-ge" "$VDEV_MAX_MB" $VDEVS
|
||||
|
||||
# Remove the file, wait for trim, verify the vdevs are now sparse.
|
||||
|
|
|
@ -94,6 +94,7 @@ for type in "" "mirror" "raidz2" "draid"; do
|
|||
|
||||
# Fill the pool, verify the vdevs are no longer sparse.
|
||||
file_write -o create -f /$TESTPOOL/file -b 1048576 -c $fill_mb -d R
|
||||
sync_pool $TESTPOOL
|
||||
verify_vdevs "-ge" "$VDEV_MAX_MB" $VDEVS
|
||||
|
||||
# Remove the file, issue trim, verify the vdevs are now sparse.
|
||||
|
|
|
@ -83,6 +83,7 @@ function do_test {
|
|||
|
||||
# Write to zvol
|
||||
log_must dd if=$datafile1 of=$zvolpath conv=fsync
|
||||
sync_pool
|
||||
|
||||
# Record how much space we've used (should be 5MB, with 128k
|
||||
# of tolerance).
|
||||
|
|
Loading…
Reference in New Issue