From 95f71c019d7c3e3b728a9b05e2117ce6b09f1b87 Mon Sep 17 00:00:00 2001 From: ednadolski-ix <137826107+ednadolski-ix@users.noreply.github.com> Date: Fri, 1 Sep 2023 19:00:30 -0600 Subject: [PATCH] Selectable block allocators ZFS historically has had several space allocators that were dynamically selectable. While these have been retained in OpenZFS, only a single allocator has been statically compiled in. This patch compiles all allocators for OpenZFS and provides a module parameter to allow for manual selection between them. Reviewed-by: Brian Behlendorf Reviewed-by: Ameer Hamza Reviewed-by: Alexander Motin Signed-off-by: Edmund Nadolski Closes #15218 --- include/os/freebsd/spl/sys/mod_os.h | 3 + include/sys/metaslab.h | 1 + include/sys/spa.h | 3 + include/sys/spa_impl.h | 3 + module/os/freebsd/zfs/sysctl_os.c | 18 ++++++ module/os/linux/zfs/spa_misc_os.c | 12 ++++ module/zfs/metaslab.c | 98 +++++++++++++++++++++-------- module/zfs/spa.c | 14 +++-- module/zfs/spa_misc.c | 6 ++ 9 files changed, 127 insertions(+), 31 deletions(-) diff --git a/include/os/freebsd/spl/sys/mod_os.h b/include/os/freebsd/spl/sys/mod_os.h index 77ce75ca3f..08d983c51f 100644 --- a/include/os/freebsd/spl/sys/mod_os.h +++ b/include/os/freebsd/spl/sys/mod_os.h @@ -73,6 +73,9 @@ #define param_set_deadman_failmode_args(var) \ CTLTYPE_STRING, NULL, 0, param_set_deadman_failmode, "A" +#define param_set_active_allocator_args(var) \ + CTLTYPE_STRING, NULL, 0, param_set_active_allocator, "A" + #define param_set_deadman_synctime_args(var) \ CTLTYPE_U64, NULL, 0, param_set_deadman_synctime, "QU" diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index 0df6e5f81f..815b5d0c9c 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -39,6 +39,7 @@ extern "C" { typedef struct metaslab_ops { + const char *msop_name; uint64_t (*msop_alloc)(metaslab_t *, uint64_t); } metaslab_ops_t; diff --git a/include/sys/spa.h b/include/sys/spa.h index b908556874..18062d3f2a 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -1056,6 +1056,8 @@ extern uint64_t spa_deadman_synctime(spa_t *spa); extern uint64_t spa_deadman_ziotime(spa_t *spa); extern uint64_t spa_dirty_data(spa_t *spa); extern spa_autotrim_t spa_get_autotrim(spa_t *spa); +extern int spa_get_allocator(spa_t *spa); +extern void spa_set_allocator(spa_t *spa, const char *allocator); /* Miscellaneous support routines */ extern void spa_load_failed(spa_t *spa, const char *fmt, ...) @@ -1207,6 +1209,7 @@ int param_set_deadman_ziotime(ZFS_MODULE_PARAM_ARGS); int param_set_deadman_synctime(ZFS_MODULE_PARAM_ARGS); int param_set_slop_shift(ZFS_MODULE_PARAM_ARGS); int param_set_deadman_failmode(ZFS_MODULE_PARAM_ARGS); +int param_set_active_allocator(ZFS_MODULE_PARAM_ARGS); #ifdef ZFS_DEBUG #define dprintf_bp(bp, fmt, ...) do { \ diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 588c72f6e4..1a04bedc31 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -263,6 +263,7 @@ struct spa { */ spa_alloc_t *spa_allocs; int spa_alloc_count; + int spa_active_allocator; /* selectable allocator */ spa_aux_vdev_t spa_spares; /* hot spares */ spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */ @@ -467,6 +468,8 @@ extern int param_set_deadman_failmode_common(const char *val); extern void spa_set_deadman_synctime(hrtime_t ns); extern void spa_set_deadman_ziotime(hrtime_t ns); extern const char *spa_history_zone(void); +extern const char *zfs_active_allocator; +extern int param_set_active_allocator_common(const char *val); #ifdef __cplusplus } diff --git a/module/os/freebsd/zfs/sysctl_os.c b/module/os/freebsd/zfs/sysctl_os.c index 8ae2f23c3e..ba9a95e4a6 100644 --- a/module/os/freebsd/zfs/sysctl_os.c +++ b/module/os/freebsd/zfs/sysctl_os.c @@ -503,6 +503,24 @@ SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance, /* metaslab.c */ +int +param_set_active_allocator(SYSCTL_HANDLER_ARGS) +{ + char buf[16]; + int rc; + + if (req->newptr == NULL) + strlcpy(buf, zfs_active_allocator, sizeof (buf)); + + rc = sysctl_handle_string(oidp, buf, sizeof (buf), req); + if (rc || req->newptr == NULL) + return (rc); + if (strcmp(buf, zfs_active_allocator) == 0) + return (0); + + return (param_set_active_allocator_common(buf)); +} + /* * In pools where the log space map feature is not enabled we touch * multiple metaslabs (and their respective space maps) with each diff --git a/module/os/linux/zfs/spa_misc_os.c b/module/os/linux/zfs/spa_misc_os.c index 3efc8b9644..c8cbedcd51 100644 --- a/module/os/linux/zfs/spa_misc_os.c +++ b/module/os/linux/zfs/spa_misc_os.c @@ -103,6 +103,18 @@ param_set_slop_shift(const char *buf, zfs_kernel_param_t *kp) return (0); } +int +param_set_active_allocator(const char *val, zfs_kernel_param_t *kp) +{ + int error; + + error = -param_set_active_allocator_common(val); + if (error == 0) + error = param_set_charp(val, kp); + + return (error); +} + const char * spa_history_zone(void) { diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 20dc934593..dd4ff77e6f 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -40,8 +40,6 @@ #include #include -#define WITH_DF_BLOCK_ALLOCATOR - #define GANG_ALLOCATION(flags) \ ((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER)) @@ -1622,9 +1620,6 @@ metaslab_block_find(zfs_btree_t *t, range_tree_t *rt, uint64_t start, return (rs); } -#if defined(WITH_DF_BLOCK_ALLOCATOR) || \ - defined(WITH_CF_BLOCK_ALLOCATOR) - /* * This is a helper function that can be used by the allocator to find a * suitable block to allocate. This will search the specified B-tree looking @@ -1659,9 +1654,74 @@ metaslab_block_picker(range_tree_t *rt, uint64_t *cursor, uint64_t size, *cursor = 0; return (-1ULL); } -#endif /* WITH_DF/CF_BLOCK_ALLOCATOR */ -#if defined(WITH_DF_BLOCK_ALLOCATOR) +static uint64_t metaslab_df_alloc(metaslab_t *msp, uint64_t size); +static uint64_t metaslab_cf_alloc(metaslab_t *msp, uint64_t size); +static uint64_t metaslab_ndf_alloc(metaslab_t *msp, uint64_t size); +metaslab_ops_t *metaslab_allocator(spa_t *spa); + +static metaslab_ops_t metaslab_allocators[] = { + { "dynamic", metaslab_df_alloc }, + { "cursor", metaslab_cf_alloc }, + { "new-dynamic", metaslab_ndf_alloc }, +}; + +static int +spa_find_allocator_byname(const char *val) +{ + int a = ARRAY_SIZE(metaslab_allocators) - 1; + if (strcmp("new-dynamic", val) == 0) + return (-1); /* remove when ndf is working */ + for (; a >= 0; a--) { + if (strcmp(val, metaslab_allocators[a].msop_name) == 0) + return (a); + } + return (-1); +} + +void +spa_set_allocator(spa_t *spa, const char *allocator) +{ + int a = spa_find_allocator_byname(allocator); + if (a < 0) a = 0; + spa->spa_active_allocator = a; + zfs_dbgmsg("spa allocator: %s\n", metaslab_allocators[a].msop_name); +} + +int +spa_get_allocator(spa_t *spa) +{ + return (spa->spa_active_allocator); +} + +#if defined(_KERNEL) +int +param_set_active_allocator_common(const char *val) +{ + char *p; + + if (val == NULL) + return (SET_ERROR(EINVAL)); + + if ((p = strchr(val, '\n')) != NULL) + *p = '\0'; + + int a = spa_find_allocator_byname(val); + if (a < 0) + return (SET_ERROR(EINVAL)); + + zfs_active_allocator = metaslab_allocators[a].msop_name; + return (0); +} +#endif + +metaslab_ops_t * +metaslab_allocator(spa_t *spa) +{ + int allocator = spa_get_allocator(spa); + return (&metaslab_allocators[allocator]); +} + /* * ========================================================================== * Dynamic Fit (df) block allocator @@ -1736,12 +1796,6 @@ metaslab_df_alloc(metaslab_t *msp, uint64_t size) return (offset); } -const metaslab_ops_t zfs_metaslab_ops = { - metaslab_df_alloc -}; -#endif /* WITH_DF_BLOCK_ALLOCATOR */ - -#if defined(WITH_CF_BLOCK_ALLOCATOR) /* * ========================================================================== * Cursor fit block allocator - @@ -1784,12 +1838,6 @@ metaslab_cf_alloc(metaslab_t *msp, uint64_t size) return (offset); } -const metaslab_ops_t zfs_metaslab_ops = { - metaslab_cf_alloc -}; -#endif /* WITH_CF_BLOCK_ALLOCATOR */ - -#if defined(WITH_NDF_BLOCK_ALLOCATOR) /* * ========================================================================== * New dynamic fit allocator - @@ -1846,12 +1894,6 @@ metaslab_ndf_alloc(metaslab_t *msp, uint64_t size) return (-1ULL); } -const metaslab_ops_t zfs_metaslab_ops = { - metaslab_ndf_alloc -}; -#endif /* WITH_NDF_BLOCK_ALLOCATOR */ - - /* * ========================================================================== * Metaslabs @@ -6232,3 +6274,9 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, try_hard_before_gang, INT, ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, find_max_tries, UINT, ZMOD_RW, "Normally only consider this many of the best metaslabs in each vdev"); + +/* BEGIN CSTYLED */ +ZFS_MODULE_PARAM_CALL(zfs, zfs_, active_allocator, + param_set_active_allocator, param_get_charp, ZMOD_RW, + "SPA active allocator"); +/* END CSTYLED */ diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 88ee4ea9f4..cda62f939c 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1295,24 +1295,26 @@ spa_thread(void *arg) } #endif +extern metaslab_ops_t *metaslab_allocator(spa_t *spa); + /* * Activate an uninitialized pool. */ static void spa_activate(spa_t *spa, spa_mode_t mode) { + metaslab_ops_t *msp = metaslab_allocator(spa); ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); spa->spa_state = POOL_STATE_ACTIVE; spa->spa_mode = mode; spa->spa_read_spacemaps = spa_mode_readable_spacemaps; - spa->spa_normal_class = metaslab_class_create(spa, &zfs_metaslab_ops); - spa->spa_log_class = metaslab_class_create(spa, &zfs_metaslab_ops); - spa->spa_embedded_log_class = - metaslab_class_create(spa, &zfs_metaslab_ops); - spa->spa_special_class = metaslab_class_create(spa, &zfs_metaslab_ops); - spa->spa_dedup_class = metaslab_class_create(spa, &zfs_metaslab_ops); + spa->spa_normal_class = metaslab_class_create(spa, msp); + spa->spa_log_class = metaslab_class_create(spa, msp); + spa->spa_embedded_log_class = metaslab_class_create(spa, msp); + spa->spa_special_class = metaslab_class_create(spa, msp); + spa->spa_dedup_class = metaslab_class_create(spa, msp); /* Try to create a covering process */ mutex_enter(&spa->spa_proc_lock); diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 3b355e0deb..413476196b 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -389,6 +389,11 @@ static const uint64_t spa_min_slop = 128ULL * 1024 * 1024; static const uint64_t spa_max_slop = 128ULL * 1024 * 1024 * 1024; static const int spa_allocators = 4; +/* + * Spa active allocator. + * Valid values are zfs_active_allocator=. + */ +const char *zfs_active_allocator = "dynamic"; void spa_load_failed(spa_t *spa, const char *fmt, ...) @@ -710,6 +715,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms); spa->spa_deadman_ziotime = MSEC2NSEC(zfs_deadman_ziotime_ms); spa_set_deadman_failmode(spa, zfs_deadman_failmode); + spa_set_allocator(spa, zfs_active_allocator); zfs_refcount_create(&spa->spa_refcount); spa_config_lock_init(spa);