Implement shared log pool for ZFS

Signed-off-by: Paul Dagnelie <pcd@delphix.com>
This commit is contained in:
Paul Dagnelie 2022-12-02 12:54:48 -08:00
parent 62e7d3c89e
commit f00af48295
70 changed files with 2905 additions and 641 deletions

View File

@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2019 by Delphix. All rights reserved.
* Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Nexenta Systems, Inc.
* Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC.
@ -5790,6 +5790,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
if (BP_GET_DEDUP(bp)) {
ddt_t *ddt;
ddt_entry_t *dde;
ASSERT3P(zilog, ==, NULL);
ddt = ddt_select(zcb->zcb_spa, bp);
ddt_enter(ddt);
@ -5807,9 +5808,19 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
ddt_exit(ddt);
}
VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
/*
* Theoretically, we could try to track leaks here, but it would
* require also importing the shared log pool and processing the
* chain map and space maps for it. The ZIL currently doesn't have
* much facility to support multiple pools at once, so we leave this
* for future work.
*/
if (zilog && zilog->zl_spa != zilog->zl_io_spa)
return;
VERIFY0(zio_wait(zio_claim(NULL, zcb->zcb_spa,
refcnt ? 0 : spa_min_claim_txg(zcb->zcb_spa),
bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
bp, NULL, NULL, ZIO_FLAG_CANFAIL)));
}
static void
@ -6802,6 +6813,47 @@ zdb_brt_entry_compare(const void *zcn1, const void *zcn2)
return (cmp);
}
static int
chain_map_count_blk_cb(spa_t *spa, const blkptr_t *bp, void *arg)
{
(void) spa;
zdb_cb_t *zbc = arg;
zdb_count_block(zbc, NULL, bp, ZDB_OT_OTHER);
return (0);
}
static int
chain_map_count_lr_cb(spa_t *spa, const lr_t *lrc, void *arg)
{
(void) spa;
zdb_cb_t *zbc = arg;
lr_write_t *lr = (lr_write_t *)lrc;
blkptr_t *bp = &lr->lr_blkptr;
if (lrc->lrc_txtype != TX_WRITE || BP_IS_HOLE(bp))
return (0);
zdb_count_block(zbc, NULL, bp, ZDB_OT_OTHER);
return (0);
}
/*
* Count the blocks in the chain maps.
*/
static void
chain_map_count_blocks(spa_t *spa, zdb_cb_t *zbc)
{
avl_tree_t *pool_t = &spa->spa_chain_map;
for (spa_chain_map_pool_t *pool_node = avl_first(pool_t);
pool_node != NULL; pool_node = AVL_NEXT(pool_t, pool_node)) {
avl_tree_t *os_t = &pool_node->scmp_os_tree;
for (spa_chain_map_os_t *os_node = avl_first(os_t);
os_node != NULL; os_node = AVL_NEXT(os_t, os_node)) {
(void) zil_parse_raw(spa, &os_node->scmo_chain_head,
chain_map_count_blk_cb, chain_map_count_lr_cb, zbc);
}
}
}
static int
dump_block_stats(spa_t *spa)
{
@ -6865,6 +6917,10 @@ dump_block_stats(spa_t *spa)
deleted_livelists_count_blocks(spa, zcb);
if (spa_is_shared_log(spa)) {
chain_map_count_blocks(spa, zcb);
}
if (dump_opt['c'] > 1)
flags |= TRAVERSE_PREFETCH_DATA;
@ -7219,7 +7275,7 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
zdb_ddt_entry_t *zdde, zdde_search;
if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
BP_IS_EMBEDDED(bp))
BP_IS_EMBEDDED(bp) || (zilog && zilog->zl_spa != zilog->zl_io_spa))
return (0);
if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
@ -7990,6 +8046,8 @@ dump_mos_leaks(spa_t *spa)
scip_next_mapping_object);
mos_obj_refd(spa->spa_condensing_indirect_phys.
scip_prev_obsolete_sm_object);
if (spa_is_shared_log(spa))
mos_obj_refd(spa->spa_dsl_pool->dp_chain_map_obj);
if (spa->spa_condensing_indirect_phys.scip_next_mapping_object != 0) {
vdev_indirect_mapping_t *vim =
vdev_indirect_mapping_open(mos,

View File

@ -111,6 +111,7 @@ static int zpool_do_split(int, char **);
static int zpool_do_initialize(int, char **);
static int zpool_do_scrub(int, char **);
static int zpool_do_resilver(int, char **);
static int zpool_do_recycle(int, char **);
static int zpool_do_trim(int, char **);
static int zpool_do_import(int, char **);
@ -182,6 +183,7 @@ typedef enum {
HELP_REMOVE,
HELP_INITIALIZE,
HELP_SCRUB,
HELP_RECYCLE,
HELP_RESILVER,
HELP_TRIM,
HELP_STATUS,
@ -326,6 +328,7 @@ static zpool_command_t command_table[] = {
{ "split", zpool_do_split, HELP_SPLIT },
{ NULL },
{ "initialize", zpool_do_initialize, HELP_INITIALIZE },
{ "recycle", zpool_do_recycle, HELP_RECYCLE },
{ "resilver", zpool_do_resilver, HELP_RESILVER },
{ "scrub", zpool_do_scrub, HELP_SCRUB },
{ "trim", zpool_do_trim, HELP_TRIM },
@ -368,7 +371,8 @@ get_usage(zpool_help_t idx)
case HELP_CLEAR:
return (gettext("\tclear [[--power]|[-nF]] <pool> [device]\n"));
case HELP_CREATE:
return (gettext("\tcreate [-fnd] [-o property=value] ... \n"
return (gettext("\tcreate [-fndL] [-l pool] ... \n"
"\t [-o property=value] ... \n"
"\t [-O file-system-property=value] ... \n"
"\t [-m mountpoint] [-R root] <pool> <vdev> ...\n"));
case HELP_CHECKPOINT:
@ -387,8 +391,8 @@ get_usage(zpool_help_t idx)
"\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] "
"[-R root] [-F [-n]] -a\n"
"\timport [-o mntopts] [-o property=value] ... \n"
"\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] "
"[-R root] [-F [-n]]\n"
"\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m -L pool] "
"[-N] [-R root] [-F [-n]]\n"
"\t [--rewind-to-checkpoint] <pool | id> [newpool]\n"));
case HELP_IOSTAT:
return (gettext("\tiostat [[[-c [script1,script2,...]"
@ -422,6 +426,8 @@ get_usage(zpool_help_t idx)
"[<device> ...]\n"));
case HELP_SCRUB:
return (gettext("\tscrub [-s | -p] [-w] [-e] <pool> ...\n"));
case HELP_RECYCLE:
return (gettext("\trecycle [-nv] <pool> ...\n"));
case HELP_RESILVER:
return (gettext("\tresilver <pool> ...\n"));
case HELP_TRIM:
@ -1169,10 +1175,14 @@ zpool_do_add(int argc, char **argv)
&props, B_TRUE) == 0);
}
}
uint64_t shared_log;
boolean_t has_shared_log = nvlist_lookup_uint64(config,
ZPOOL_CONFIG_SHARED_LOG_POOL, &shared_log) == 0;
/* pass off to make_root_vdev for processing */
nvroot = make_root_vdev(zhp, props, !check_inuse,
check_replication, B_FALSE, dryrun, argc, argv);
check_replication, B_FALSE, dryrun, has_shared_log, argc,
argv);
if (nvroot == NULL) {
zpool_close(zhp);
return (1);
@ -1632,9 +1642,11 @@ zpool_do_create(int argc, char **argv)
nvlist_t *fsprops = NULL;
nvlist_t *props = NULL;
char *propval;
zpool_handle_t *shared_log_pool = NULL;
boolean_t is_shared_log = B_FALSE;
/* check options */
while ((c = getopt(argc, argv, ":fndR:m:o:O:t:")) != -1) {
while ((c = getopt(argc, argv, ":fndR:m:o:O:t:l:L")) != -1) {
switch (c) {
case 'f':
force = B_TRUE;
@ -1733,6 +1745,17 @@ zpool_do_create(int argc, char **argv)
goto errout;
tname = optarg;
break;
case 'l':
shared_log_pool = zpool_open(g_zfs, optarg);
if (shared_log_pool == NULL) {
(void) fprintf(stderr, gettext("could not open "
"shared log pool '%s'"), optarg);
goto errout;
}
break;
case 'L':
is_shared_log = B_TRUE;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
@ -1773,9 +1796,17 @@ zpool_do_create(int argc, char **argv)
/* pass off to make_root_vdev for bulk processing */
nvroot = make_root_vdev(NULL, props, force, !force, B_FALSE, dryrun,
argc - 1, argv + 1);
shared_log_pool != NULL, argc - 1, argv + 1);
if (nvroot == NULL)
goto errout;
if (shared_log_pool) {
fnvlist_add_uint64(nvroot, ZPOOL_CONFIG_SHARED_LOG_POOL,
fnvlist_lookup_uint64(zpool_get_config(shared_log_pool,
NULL), ZPOOL_CONFIG_POOL_GUID));
}
if (is_shared_log)
fnvlist_add_boolean(nvroot, ZPOOL_CONFIG_IS_SHARED_LOG);
/* make_root_vdev() allows 0 toplevel children if there are spares */
if (!zfs_allocatable_devs(nvroot)) {
@ -2518,7 +2549,8 @@ vdev_health_check_cb(void *hdl_data, nvlist_t *nv, void *data)
*/
static void
print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
nvlist_t *nv, int depth, boolean_t isspare, vdev_rebuild_stat_t *vrs)
nvlist_t *nv, int depth, boolean_t isspare, boolean_t recurse,
vdev_rebuild_stat_t *vrs)
{
nvlist_t **child, *root;
uint_t c, i, vsc, children;
@ -2798,7 +2830,7 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
(void) printf("\n");
for (c = 0; c < children; c++) {
for (c = 0; c < children && recurse; c++) {
uint64_t islog = B_FALSE, ishole = B_FALSE;
/* Don't print logs or holes here */
@ -2822,7 +2854,7 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
vname = zpool_vdev_name(g_zfs, zhp, child[c],
cb->cb_name_flags | VDEV_NAME_TYPE_ID);
print_status_config(zhp, cb, vname, child[c], depth + 2,
isspare, vrs);
isspare, B_TRUE, vrs);
free(vname);
}
}
@ -2833,7 +2865,7 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
*/
static void
print_import_config(status_cbdata_t *cb, const char *name, nvlist_t *nv,
int depth)
int depth, boolean_t recurse)
{
nvlist_t **child;
uint_t c, children;
@ -2899,7 +2931,7 @@ print_import_config(status_cbdata_t *cb, const char *name, nvlist_t *nv,
}
(void) printf("\n");
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
if (!recurse || nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0)
return;
@ -2915,7 +2947,7 @@ print_import_config(status_cbdata_t *cb, const char *name, nvlist_t *nv,
vname = zpool_vdev_name(g_zfs, NULL, child[c],
cb->cb_name_flags | VDEV_NAME_TYPE_ID);
print_import_config(cb, vname, child[c], depth + 2);
print_import_config(cb, vname, child[c], depth + 2, B_TRUE);
free(vname);
}
@ -2996,13 +3028,70 @@ print_class_vdevs(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *nv,
cb->cb_name_flags | VDEV_NAME_TYPE_ID);
if (cb->cb_print_status)
print_status_config(zhp, cb, name, child[c], 2,
B_FALSE, NULL);
B_FALSE, B_TRUE, NULL);
else
print_import_config(cb, name, child[c], 2);
print_import_config(cb, name, child[c], 2, B_TRUE);
free(name);
}
}
/*
* Find a pool with a matching GUID.
*/
typedef struct find_cbdata {
uint64_t cb_guid;
zpool_handle_t *cb_zhp;
} find_cbdata_t;
static int
find_pool(zpool_handle_t *zhp, void *data)
{
find_cbdata_t *cbp = data;
if (cbp->cb_guid ==
zpool_get_prop_int(zhp, ZPOOL_PROP_GUID, NULL)) {
cbp->cb_zhp = zhp;
return (1);
}
zpool_close(zhp);
return (0);
}
/*
* Given a pool GUID, find the matching pool.
*/
static zpool_handle_t *
find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid)
{
find_cbdata_t cb;
cb.cb_guid = pool_guid;
if (zpool_iter(zhdl, find_pool, &cb) != 1)
return (NULL);
return (cb.cb_zhp);
}
static void
print_shared_log(zpool_handle_t *zhp, status_cbdata_t *cb,
uint64_t shared_log_guid)
{
(void) printf(gettext("\tshared log\n"));
zpool_handle_t *shared_log = find_by_guid(g_zfs, shared_log_guid);
VERIFY(shared_log);
nvlist_t *shared_log_config = zpool_get_config(shared_log, NULL);
nvlist_t *nvroot;
VERIFY0(nvlist_lookup_nvlist(shared_log_config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot));
const char *name = zpool_get_name(shared_log);
if (cb->cb_print_status)
print_status_config(zhp, cb, name, nvroot, 2,
B_FALSE, B_FALSE, NULL);
else
print_import_config(cb, name, nvroot, 2, B_FALSE);
zpool_close(shared_log);
}
/*
* Display the status for the given pool.
*/
@ -3376,7 +3465,7 @@ show_import(nvlist_t *config, boolean_t report_error)
if (cb.cb_namewidth < 10)
cb.cb_namewidth = 10;
print_import_config(&cb, name, nvroot, 0);
print_import_config(&cb, name, nvroot, 0, B_TRUE);
print_class_vdevs(NULL, &cb, nvroot, VDEV_ALLOC_BIAS_DEDUP);
print_class_vdevs(NULL, &cb, nvroot, VDEV_ALLOC_BIAS_SPECIAL);
@ -3694,6 +3783,11 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
"no such pool available\n"), orig_name);
err = B_TRUE;
} else {
if (import->shared_log_guid) {
fnvlist_add_uint64(found_config,
ZPOOL_CONFIG_SHARED_LOG_POOL,
import->shared_log_guid);
}
err |= do_import(found_config, new_name,
mntopts, props, flags, mount_tp_nthr);
}
@ -3982,6 +4076,7 @@ zpool_do_import(int argc, char **argv)
char *cachefile = NULL;
importargs_t idata = { 0 };
char *endptr;
zpool_handle_t *shared_log_pool = NULL;
struct option long_options[] = {
{"rewind-to-checkpoint", no_argument, NULL, CHECKPOINT_OPT},
@ -3989,7 +4084,7 @@ zpool_do_import(int argc, char **argv)
};
/* check options */
while ((c = getopt_long(argc, argv, ":aCc:d:DEfFlmnNo:R:stT:VX",
while ((c = getopt_long(argc, argv, ":aCc:d:DEfFlL:mnNo:R:stT:VX",
long_options, NULL)) != -1) {
switch (c) {
case 'a':
@ -4015,6 +4110,14 @@ zpool_do_import(int argc, char **argv)
case 'l':
flags |= ZFS_IMPORT_LOAD_KEYS;
break;
case 'L':
shared_log_pool = zpool_open(g_zfs, optarg);
if (shared_log_pool == NULL) {
(void) fprintf(stderr, gettext("could not open "
"shared log pool '%s'"), optarg);
goto error;
}
break;
case 'm':
flags |= ZFS_IMPORT_MISSING_LOG;
break;
@ -4087,6 +4190,16 @@ zpool_do_import(int argc, char **argv)
argc -= optind;
argv += optind;
if (shared_log_pool != NULL && ! (flags & ZFS_IMPORT_MISSING_LOG)) {
(void) fprintf(stderr, gettext("-L requires -m\n"));
usage(B_FALSE);
}
if (shared_log_pool != NULL && do_all) {
(void) fprintf(stderr, gettext("-L is incompatible with -a\n"));
usage(B_FALSE);
}
if (cachefile && nsearch != 0) {
(void) fprintf(stderr, gettext("-c is incompatible with -d\n"));
usage(B_FALSE);
@ -4210,6 +4323,10 @@ zpool_do_import(int argc, char **argv)
idata.policy = policy;
idata.do_destroyed = do_destroyed;
idata.do_all = do_all;
if (shared_log_pool) {
idata.shared_log_guid = fnvlist_lookup_uint64(zpool_get_config(
shared_log_pool, NULL), ZPOOL_CONFIG_POOL_GUID);
}
libpc_handle_t lpch = {
.lpc_lib_handle = g_zfs,
@ -6569,7 +6686,7 @@ print_one_column(zpool_prop_t prop, uint64_t value, const char *str,
*/
static void
print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
list_cbdata_t *cb, int depth, boolean_t isspare)
list_cbdata_t *cb, int depth, boolean_t isspare, boolean_t recurse)
{
nvlist_t **child;
vdev_stat_t *vs;
@ -6577,7 +6694,7 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
char *vname;
boolean_t scripted = cb->cb_scripted;
uint64_t islog = B_FALSE;
const char *dashes = "%-*s - - - - "
const char *dashes = "%*s%-*s - - - - "
"- - - - -\n";
verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
@ -6647,7 +6764,7 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
(void) fputc('\n', stdout);
}
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
if (!recurse || nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0)
return;
@ -6668,7 +6785,8 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
vname = zpool_vdev_name(g_zfs, zhp, child[c],
cb->cb_name_flags | VDEV_NAME_TYPE_ID);
print_list_stats(zhp, vname, child[c], cb, depth + 2, B_FALSE);
print_list_stats(zhp, vname, child[c], cb, depth + 2, B_FALSE,
B_TRUE);
free(vname);
}
@ -6696,27 +6814,44 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
if (!printed) {
/* LINTED E_SEC_PRINTF_VAR_FMT */
(void) printf(dashes, cb->cb_namewidth,
(void) printf(dashes, depth + 2, "", cb->cb_namewidth,
class_name[n]);
printed = B_TRUE;
}
vname = zpool_vdev_name(g_zfs, zhp, child[c],
cb->cb_name_flags | VDEV_NAME_TYPE_ID);
print_list_stats(zhp, vname, child[c], cb, depth + 2,
B_FALSE);
print_list_stats(zhp, vname, child[c], cb, depth + 4,
B_FALSE, B_TRUE);
free(vname);
}
}
uint64_t shared_log_guid;
if (name == NULL && nvlist_lookup_uint64(zpool_get_config(zhp, NULL),
ZPOOL_CONFIG_SHARED_LOG_POOL, &shared_log_guid) == 0) {
(void) printf(dashes, depth + 2, "", cb->cb_namewidth, "shared log");
zpool_handle_t *shared_log = find_by_guid(g_zfs,
shared_log_guid);
VERIFY(shared_log);
nvlist_t *shared_log_config = zpool_get_config(shared_log,
NULL);
nvlist_t *nvroot;
VERIFY0(nvlist_lookup_nvlist(shared_log_config,
ZPOOL_CONFIG_VDEV_TREE, &nvroot));
print_list_stats(shared_log, zpool_get_name(shared_log), nvroot,
cb, depth + 4, B_FALSE, B_FALSE);
zpool_close(shared_log);
}
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) == 0 && children > 0) {
/* LINTED E_SEC_PRINTF_VAR_FMT */
(void) printf(dashes, cb->cb_namewidth, "cache");
(void) printf(dashes, depth + 2, "", cb->cb_namewidth, "cache");
for (c = 0; c < children; c++) {
vname = zpool_vdev_name(g_zfs, zhp, child[c],
cb->cb_name_flags);
print_list_stats(zhp, vname, child[c], cb, depth + 2,
B_FALSE);
print_list_stats(zhp, vname, child[c], cb, depth + 4,
B_FALSE, B_TRUE);
free(vname);
}
}
@ -6724,12 +6859,12 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child,
&children) == 0 && children > 0) {
/* LINTED E_SEC_PRINTF_VAR_FMT */
(void) printf(dashes, cb->cb_namewidth, "spare");
(void) printf(dashes, depth + 2, "", cb->cb_namewidth, "spare");
for (c = 0; c < children; c++) {
vname = zpool_vdev_name(g_zfs, zhp, child[c],
cb->cb_name_flags);
print_list_stats(zhp, vname, child[c], cb, depth + 2,
B_TRUE);
print_list_stats(zhp, vname, child[c], cb, depth + 4,
B_TRUE, B_TRUE);
free(vname);
}
}
@ -6751,7 +6886,7 @@ list_callback(zpool_handle_t *zhp, void *data)
config = zpool_get_config(zhp, NULL);
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
print_list_stats(zhp, NULL, nvroot, cbp, 0, B_FALSE);
print_list_stats(zhp, NULL, nvroot, cbp, 0, B_FALSE, B_TRUE);
}
return (0);
@ -6908,7 +7043,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
boolean_t rebuild = B_FALSE;
boolean_t wait = B_FALSE;
int c;
nvlist_t *nvroot;
nvlist_t *nvroot, *config;
char *poolname, *old_disk, *new_disk;
zpool_handle_t *zhp;
nvlist_t *props = NULL;
@ -6991,7 +7126,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
return (1);
}
if (zpool_get_config(zhp, NULL) == NULL) {
if ((config = zpool_get_config(zhp, NULL)) == NULL) {
(void) fprintf(stderr, gettext("pool '%s' is unavailable\n"),
poolname);
zpool_close(zhp);
@ -7013,8 +7148,12 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
}
}
uint64_t shared_log;
boolean_t has_shared_log = nvlist_lookup_uint64(config,
ZPOOL_CONFIG_SHARED_LOG_POOL, &shared_log) == 0;
nvroot = make_root_vdev(zhp, props, force, B_FALSE, replacing, B_FALSE,
argc, argv);
has_shared_log, argc, argv);
if (nvroot == NULL) {
zpool_close(zhp);
nvlist_free(props);
@ -7938,6 +8077,73 @@ zpool_do_resilver(int argc, char **argv)
B_FALSE, scrub_callback, &cb));
}
struct recycle_data {
boolean_t dryrun;
boolean_t verbose;
};
static int
recycle_callback(zpool_handle_t *zhp, void *data)
{
struct recycle_data *rd = data;
nvlist_t *nvl;
int err = lzc_recycle(zpool_get_name(zhp), rd->dryrun, &nvl);
if (err)
return (err);
if (rd->verbose) {
printf("Cleaned up%s: [", rd->dryrun ? " (dry run)" : "");
nvpair_t *elem = NULL;
boolean_t first = B_TRUE;
while ((elem = nvlist_next_nvpair(nvl, elem))) {
printf("%s%s", first ? "" : ",\n\t", nvpair_name(elem));
first = B_FALSE;
}
printf("]\n");
}
nvlist_free(nvl);
return (0);
}
/*
* zpool recycle <pool> ...
*
* Cleans up chain maps for non-attached client pools
*/
int
zpool_do_recycle(int argc, char **argv)
{
int c;
struct recycle_data rd = {0};
/* check options */
while ((c = getopt(argc, argv, "nv")) != -1) {
switch (c) {
case 'n':
rd.dryrun = B_TRUE;
zfs_fallthrough;
case 'v':
rd.verbose = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}
argc -= optind;
argv += optind;
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name argument\n"));
usage(B_FALSE);
}
return (for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL,
B_FALSE, recycle_callback, &rd));
}
/*
* zpool trim [-d] [-r <rate>] [-c | -s] <pool> [<device> ...]
*
@ -8838,7 +9044,8 @@ print_spares(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **spares,
for (i = 0; i < nspares; i++) {
name = zpool_vdev_name(g_zfs, zhp, spares[i],
cb->cb_name_flags);
print_status_config(zhp, cb, name, spares[i], 2, B_TRUE, NULL);
print_status_config(zhp, cb, name, spares[i], 2, B_TRUE, B_TRUE,
NULL);
free(name);
}
}
@ -8859,7 +9066,7 @@ print_l2cache(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **l2cache,
name = zpool_vdev_name(g_zfs, zhp, l2cache[i],
cb->cb_name_flags);
print_status_config(zhp, cb, name, l2cache[i], 2,
B_FALSE, NULL);
B_FALSE, B_TRUE, NULL);
free(name);
}
}
@ -9401,12 +9608,18 @@ status_callback(zpool_handle_t *zhp, void *data)
printf("\n");
print_status_config(zhp, cbp, zpool_get_name(zhp), nvroot, 0,
B_FALSE, NULL);
B_FALSE, B_TRUE, NULL);
print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_BIAS_DEDUP);
print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_BIAS_SPECIAL);
print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_CLASS_LOGS);
uint64_t shared_log_guid;
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SHARED_LOG_POOL,
&shared_log_guid) == 0) {
print_shared_log(zhp, cbp, shared_log_guid);
}
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
&l2cache, &nl2cache) == 0)
print_l2cache(zhp, cbp, l2cache, nl2cache);

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023 by Delphix. All rights reserved.
*/
#ifndef ZPOOL_UTIL_H
@ -57,8 +58,8 @@ char *zpool_get_cmd_search_path(void);
*/
nvlist_t *make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force,
int check_rep, boolean_t replacing, boolean_t dryrun, int argc,
char **argv);
int check_rep, boolean_t replacing, boolean_t dryrun,
boolean_t have_shlog, int argc, char **argv);
nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname,
nvlist_t *props, splitflags_t flags, int argc, char **argv);

View File

@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
* Copyright (c) 2013, 2023 by Delphix. All rights reserved.
* Copyright (c) 2016, 2017 Intel Corporation.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
*/
@ -1488,7 +1488,7 @@ draid_config_by_type(nvlist_t *nv, const char *type, uint64_t children)
* because the program is just going to exit anyway.
*/
static nvlist_t *
construct_spec(nvlist_t *props, int argc, char **argv)
construct_spec(nvlist_t *props, boolean_t have_shlog, int argc, char **argv)
{
nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
@ -1735,6 +1735,12 @@ construct_spec(nvlist_t *props, int argc, char **argv)
goto spec_out;
}
if (seen_logs && have_shlog) {
(void) fprintf(stderr, gettext("invalid vdev specification: "
"cannot mix shared log and log devices"));
goto spec_out;
}
if (seen_logs && nlogs == 0) {
(void) fprintf(stderr, gettext("invalid vdev specification: "
"log requires at least 1 device\n"));
@ -1779,7 +1785,8 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
uint_t c, children;
if (argc > 0) {
if ((newroot = construct_spec(props, argc, argv)) == NULL) {
if ((newroot = construct_spec(props, B_FALSE, argc, argv)) ==
NULL) {
(void) fprintf(stderr, gettext("Unable to build a "
"pool from the specified devices\n"));
return (NULL);
@ -1853,7 +1860,8 @@ num_normal_vdevs(nvlist_t *nvroot)
*/
nvlist_t *
make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep,
boolean_t replacing, boolean_t dryrun, int argc, char **argv)
boolean_t replacing, boolean_t dryrun, boolean_t have_shlog, int argc,
char **argv)
{
nvlist_t *newroot;
nvlist_t *poolconfig = NULL;
@ -1864,7 +1872,7 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep,
* that we have a valid specification, and that all devices can be
* opened.
*/
if ((newroot = construct_spec(props, argc, argv)) == NULL)
if ((newroot = construct_spec(props, have_shlog, argc, argv)) == NULL)
return (NULL);
if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL)) {

View File

@ -159,6 +159,8 @@ typedef enum zfs_error {
EZFS_SHAREFAILED, /* filesystem share failed */
EZFS_RAIDZ_EXPAND_IN_PROGRESS, /* a raidz is currently expanding */
EZFS_ASHIFT_MISMATCH, /* can't add vdevs with different ashifts */
/* Operation cannot be performed on a shared log pool */
EZFS_SHAREDLOG,
EZFS_UNKNOWN
} zfs_error_t;

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
* Copyright (c) 2012, 2023 by Delphix. All rights reserved.
* Copyright 2017 RackTop Systems.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
@ -156,6 +156,8 @@ _LIBZFS_CORE_H int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *);
_LIBZFS_CORE_H int lzc_set_bootenv(const char *, const nvlist_t *);
_LIBZFS_CORE_H int lzc_get_bootenv(const char *, nvlist_t **);
_LIBZFS_CORE_H int lzc_recycle(const char *, boolean_t, nvlist_t **);
_LIBZFS_CORE_H int lzc_get_vdev_prop(const char *, nvlist_t *, nvlist_t **);
_LIBZFS_CORE_H int lzc_set_vdev_prop(const char *, nvlist_t *, nvlist_t **);

View File

@ -81,6 +81,7 @@ typedef struct importargs {
nvlist_t *policy; /* load policy (max txg, rewind, etc.) */
boolean_t do_destroyed;
boolean_t do_all;
uint64_t shared_log_guid;
} importargs_t;
typedef struct libpc_handle {

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
* Copyright (c) 2011, 2023 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright 2014 HybridCluster. All rights reserved.
@ -390,6 +390,7 @@ typedef struct dmu_buf {
#define DMU_POOL_ZPOOL_CHECKPOINT "com.delphix:zpool_checkpoint"
#define DMU_POOL_LOG_SPACEMAP_ZAP "com.delphix:log_spacemap_zap"
#define DMU_POOL_DELETED_CLONES "com.delphix:deleted_clones"
#define DMU_POOL_CHAIN_MAP_OBJ "com.delphix:chain_map_obj"
/*
* Allocate an object from this objset. The range of object numbers

View File

@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2012, 2013 by Delphix. All rights reserved.
*/
#ifndef _SYS_DMU_TX_H
@ -154,6 +154,7 @@ void dmu_tx_wait(dmu_tx_t *tx);
*/
extern dmu_tx_t *dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg);
dmu_tx_t *dmu_tx_create_mos(struct dsl_pool *dp);
/*
* These routines are only called by the DMU.
*/

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
* Copyright (c) 2013, 2023 by Delphix. All rights reserved.
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/
@ -106,6 +106,7 @@ typedef struct dsl_pool {
uint64_t dp_bptree_obj;
uint64_t dp_empty_bpobj;
bpobj_t dp_obsolete_bpobj;
uint64_t dp_chain_map_obj;
struct dsl_scan *dp_scan;

View File

@ -851,6 +851,8 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_EXPANSION_TIME "expansion_time" /* not stored */
#define ZPOOL_CONFIG_REBUILD_STATS "org.openzfs:rebuild_stats"
#define ZPOOL_CONFIG_COMPATIBILITY "compatibility"
#define ZPOOL_CONFIG_SHARED_LOG_POOL "com.delphix:shared_log_pool"
#define ZPOOL_CONFIG_IS_SHARED_LOG "com.delphix:is_shared_log"
/*
* The persistent vdev state is stored as separate values rather than a single
@ -1518,7 +1520,8 @@ typedef enum zfs_ioc {
ZFS_IOC_VDEV_GET_PROPS, /* 0x5a55 */
ZFS_IOC_VDEV_SET_PROPS, /* 0x5a56 */
ZFS_IOC_POOL_SCRUB, /* 0x5a57 */
ZFS_IOC_POOL_PREFETCH, /* 0x5a58 */
ZFS_IOC_POOL_PREFETCH, /* 0x5158 */
ZFS_IOC_POOL_RECYCLE, /* 0x5a59 */
/*
* Per-platform (Optional) - 8/128 numbers reserved.
@ -1748,6 +1751,11 @@ typedef enum {
*/
#define ZPOOL_PREFETCH_TYPE "prefetch_type"
/*
* The following name is used when invoking ZFS_IOC_POOL_RECYCLE.
*/
#define ZPOOL_RECYCLE_DRYRUN "dryrun"
/*
* Flags for ZFS_IOC_VDEV_SET_STATE
*/

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2011, 2023 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
*/
@ -37,14 +37,20 @@
extern "C" {
#endif
typedef enum metaslab_type {
METASLAB_TYPE_NORMAL,
METASLAB_TYPE_VIRTUAL,
} metaslab_type_t;
typedef struct metaslab_ops {
const char *msop_name;
uint64_t (*msop_alloc)(metaslab_t *, uint64_t);
metaslab_type_t msop_type;
} metaslab_ops_t;
extern const metaslab_ops_t zfs_metaslab_ops;
extern const metaslab_ops_t zfs_virtual_ops;
int metaslab_init(metaslab_group_t *, uint64_t, uint64_t, uint64_t,
metaslab_t **);

View File

@ -24,7 +24,7 @@
*/
/*
* Copyright (c) 2011, 2019 by Delphix. All rights reserved.
* Copyright (c) 2011, 2023 by Delphix. All rights reserved.
*/
#ifndef _SYS_METASLAB_IMPL_H
@ -179,7 +179,15 @@ typedef struct metaslab_class_allocator {
struct metaslab_class {
kmutex_t mc_lock;
spa_t *mc_spa;
const metaslab_ops_t *mc_ops;
const metaslab_ops_t *mc_ops;
/*
* If this field is set, this is a "virtual" metaslab class. In
* actuality, the allocations will be done by the spa this is pointing
* to, using another pool for our storage. This enables the shared
* SLOG architecture. If this field is set, most of the other fields
* in this metaslab class are not used, and should be unset.
*/
struct spa *mc_virtual;
/*
* Track the number of metaslab groups that have been initialized

View File

@ -836,6 +836,8 @@ void spa_select_allocator(zio_t *zio);
extern kmutex_t spa_namespace_lock;
extern avl_tree_t spa_namespace_avl;
extern kcondvar_t spa_namespace_cv;
extern avl_tree_t spa_shared_log_avl;
extern kmutex_t spa_shared_log_lock;
/*
* SPA configuration functions in spa_config.c
@ -1028,7 +1030,8 @@ extern void spa_altroot(spa_t *, char *, size_t);
extern uint32_t spa_sync_pass(spa_t *spa);
extern char *spa_name(spa_t *spa);
extern uint64_t spa_guid(spa_t *spa);
extern uint64_t spa_load_guid(spa_t *spa);
extern uint64_t spa_const_guid(const spa_t *spa);
extern uint64_t spa_load_guid(const spa_t *spa);
extern uint64_t spa_last_synced_txg(spa_t *spa);
extern uint64_t spa_first_txg(spa_t *spa);
extern uint64_t spa_syncing_txg(spa_t *spa);
@ -1124,7 +1127,8 @@ extern boolean_t spa_multihost(spa_t *spa);
extern uint32_t spa_get_hostid(spa_t *spa);
extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *);
extern boolean_t spa_livelist_delete_check(spa_t *spa);
extern boolean_t spa_is_shared_log(const spa_t *spa);
extern boolean_t spa_uses_shared_log(const spa_t *spa);
extern boolean_t spa_mmp_remote_host_activity(spa_t *spa);
extern spa_mode_t spa_mode(spa_t *spa);
@ -1222,6 +1226,15 @@ extern void spa_export_os(spa_t *spa);
extern void spa_activate_os(spa_t *spa);
extern void spa_deactivate_os(spa_t *spa);
extern void spa_zil_map_insert(spa_t *spa, objset_t *os,
const blkptr_t *prev_bp, blkptr_t *bp);
extern void spa_zil_map_set_final(spa_t *spa, objset_t *os, blkptr_t *bp);
extern void spa_zil_delete(spa_t *spa, objset_t *os);
extern void spa_zil_header_convert(spa_t *spa, objset_t *os, blkptr_t *bp);
extern void spa_zil_header_mask(spa_t *spa, blkptr_t *bp);
extern spa_t *spa_get_shared_log_pool(spa_t *spa);
extern int spa_recycle(spa_t *spa, boolean_t dryrun, nvlist_t *outnvl);
/* module param call functions */
int param_set_deadman_ziotime(ZFS_MODULE_PARAM_ARGS);
int param_set_deadman_synctime(ZFS_MODULE_PARAM_ARGS);

View File

@ -217,6 +217,36 @@ typedef enum spa_config_source {
SPA_CONFIG_SRC_MOS /* MOS, but not always from right txg */
} spa_config_source_t;
typedef enum spa_pool_type {
SPA_TYPE_NORMAL = 0,
SPA_TYPE_SHARED_LOG,
} spa_pool_type_t;
typedef struct spa_zil_update_head {
avl_node_t szuh_avl;
list_t szuh_list;
uint64_t szuh_id;
blkptr_t szuh_chain_head;
boolean_t szuh_set;
} spa_zil_update_head_t;
typedef struct spa_zil_update {
list_node_t szu_list;
blkptr_t szu_chain_head;
} spa_zil_update_t;
typedef struct spa_chain_map_os {
avl_node_t scmo_avl;
uint64_t scmo_id;
blkptr_t scmo_chain_head;
} spa_chain_map_os_t;
typedef struct spa_chain_map_pool {
avl_node_t scmp_avl;
uint64_t scmp_guid;
avl_tree_t scmp_os_tree;
} spa_chain_map_pool_t;
struct spa {
/*
* Fields protected by spa_namespace_lock.
@ -224,6 +254,9 @@ struct spa {
char spa_name[ZFS_MAX_DATASET_NAME_LEN]; /* pool name */
char *spa_comment; /* comment */
avl_node_t spa_avl; /* node in spa_namespace_avl */
avl_node_t spa_log_avl; /* node in spa_shared_log_avl */
/* node in spa_registered_clients */
avl_node_t spa_client_avl;
nvlist_t *spa_config; /* last synced config */
nvlist_t *spa_config_syncing; /* currently syncing config */
nvlist_t *spa_config_splitting; /* config for splitting */
@ -244,6 +277,8 @@ struct spa {
boolean_t spa_is_initializing; /* true while opening pool */
boolean_t spa_is_exporting; /* true while exporting pool */
kthread_t *spa_export_thread; /* valid during pool export */
/* true if pool's log device is shared log */
boolean_t spa_uses_shared_log;
kthread_t *spa_load_thread; /* loading, no namespace lock */
metaslab_class_t *spa_normal_class; /* normal data class */
metaslab_class_t *spa_log_class; /* intent log data class */
@ -303,6 +338,7 @@ struct spa {
boolean_t spa_extreme_rewind; /* rewind past deferred frees */
kmutex_t spa_scrub_lock; /* resilver/scrub lock */
uint64_t spa_scrub_inflight; /* in-flight scrub bytes */
boolean_t spa_pool_type; /* normal or object-based */
/* in-flight verification bytes */
uint64_t spa_load_verify_bytes;
@ -477,6 +513,17 @@ struct spa {
*/
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
zfs_refcount_t spa_refcount; /* number of opens */
/* Only used if type is shared log */
kmutex_t spa_chain_map_lock;
avl_tree_t spa_chain_map;
avl_tree_t spa_registered_clients;
/* Only used during syncing context if using shared log */
kmutex_t spa_zil_map_lock;
avl_tree_t spa_zil_map;
list_t spa_zil_deletes;
taskq_t *spa_chain_map_taskq;
};
extern char *spa_config_path;
@ -495,6 +542,7 @@ extern void spa_set_deadman_ziotime(hrtime_t ns);
extern const char *spa_history_zone(void);
extern const char *zfs_active_allocator;
extern int param_set_active_allocator_common(const char *val);
extern void spa_set_pool_type(spa_t *);
#ifdef __cplusplus
}

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2012, 2023 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@ -563,10 +563,15 @@ typedef int zil_parse_lr_func_t(zilog_t *zilog, const lr_t *lr, void *arg,
typedef int zil_replay_func_t(void *arg1, void *arg2, boolean_t byteswap);
typedef int zil_get_data_t(void *arg, uint64_t arg2, lr_write_t *lr, char *dbuf,
struct lwb *lwb, zio_t *zio);
typedef int zil_parse_raw_blk_func_t(spa_t *spa, const blkptr_t *bp, void *arg);
typedef int zil_parse_raw_lr_func_t(spa_t *spa, const lr_t *lr, void *arg);
extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg,
boolean_t decrypt);
extern int zil_parse_raw(spa_t *spa, const blkptr_t *bp,
zil_parse_raw_blk_func_t *parse_func,
zil_parse_raw_lr_func_t *parse_lr_func, void *arg);
extern void zil_init(void);
extern void zil_fini(void);
@ -620,6 +625,8 @@ extern void zil_sums_fini(zil_sums_t *zs);
extern void zil_kstat_values_update(zil_kstat_values_t *zs,
zil_sums_t *zil_sums);
extern boolean_t zil_shared_log(zilog_t *zl);
extern int zil_replay_disable;
#ifdef __cplusplus

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2012, 2023 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@ -190,6 +190,7 @@ struct zilog {
kmutex_t zl_lock; /* protects most zilog_t fields */
struct dsl_pool *zl_dmu_pool; /* DSL pool */
spa_t *zl_spa; /* handle for read/write log */
spa_t *zl_io_spa; /* handle for read/write log */
const zil_header_t *zl_header; /* log header buffer */
objset_t *zl_os; /* object set we're logging */
zil_get_data_t *zl_get_data; /* callback to get object content */

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2011, 2023 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
@ -82,6 +82,7 @@ typedef enum spa_feature {
SPA_FEATURE_AVZ_V2,
SPA_FEATURE_REDACTION_LIST_SPILL,
SPA_FEATURE_RAIDZ_EXPANSION,
SPA_FEATURE_SHARED_LOG,
SPA_FEATURES
} spa_feature_t;

View File

@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
* Copyright (c) 2011, 2023 by Delphix. All rights reserved.
* Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved.
* Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
* Copyright (c) 2013 Martin Matuska. All rights reserved.
@ -3869,6 +3869,10 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"invalid property value(s) specified"));
return (zfs_error(hdl, EZFS_BADPROP, errbuf));
case EINVAL:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"cannot create filesystem in shared log pool"));
return (zfs_error(hdl, EZFS_SHAREDLOG, errbuf));
#ifdef _ILP32
case EOVERFLOW:
/*

View File

@ -184,6 +184,7 @@
<elf-symbol name='lzc_receive_with_cmdprops' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_receive_with_header' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_receive_with_heal' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_recycle' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_redact' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_release' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_rename' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
* Copyright (c) 2012, 2023 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright 2017 RackTop Systems.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
@ -1927,3 +1927,13 @@ lzc_get_bootenv(const char *pool, nvlist_t **outnvl)
{
return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl));
}
int
lzc_recycle(const char *pool, boolean_t dryrun, nvlist_t **outnvl)
{
nvlist_t *args = fnvlist_alloc();
fnvlist_add_boolean_value(args, ZPOOL_RECYCLE_DRYRUN, dryrun);
int err = lzc_ioctl(ZFS_IOC_POOL_RECYCLE, pool, args, outnvl);
fnvlist_free(args);
return (err);
}

View File

@ -21,7 +21,7 @@
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2012, 2024 by Delphix. All rights reserved.
* Copyright 2015 RackTop Systems.
* Copyright (c) 2016, Intel Corporation.
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
@ -635,6 +635,18 @@ get_configs(libpc_handle_t *hdl, pool_list_t *pl, boolean_t active_ok,
ZPOOL_CONFIG_HOSTNAME, hostname);
}
if (nvlist_lookup_uint64(tmp,
ZPOOL_CONFIG_SHARED_LOG_POOL, &guid) == 0) {
fnvlist_add_uint64(config,
ZPOOL_CONFIG_SHARED_LOG_POOL, guid);
}
if (fnvlist_lookup_boolean(tmp,
ZPOOL_CONFIG_IS_SHARED_LOG)) {
fnvlist_add_boolean(config,
ZPOOL_CONFIG_IS_SHARED_LOG);
}
config_seen = B_TRUE;
}
@ -1511,6 +1523,11 @@ zpool_find_import_impl(libpc_handle_t *hdl, importargs_t *iarg,
iarg->guid == this_guid;
}
if (matched) {
if (iarg->shared_log_guid) {
fnvlist_add_uint64(config,
ZPOOL_CONFIG_SHARED_LOG_POOL,
iarg->shared_log_guid);
}
/*
* Verify all remaining entries can be opened
* exclusively. This will prune all underlying

View File

@ -19,7 +19,7 @@
.\" CDDL HEADER END
.\"
.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
.\" Copyright (c) 2012, 2023 by Delphix. All rights reserved.
.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
.\" Copyright (c) 2017 Datto Inc.
.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
@ -178,6 +178,13 @@ However, raidz vdev types are not supported for the intent log.
For more information, see the
.Sx Intent Log
section.
.It Sy shared log
A separate ZFS storage pool used as a shared intent log device.
Only one shared log can specified at pool creation or import, and a normal log
device cannot also be specified.
For more information, see the
.Sx Intent Log
section.
.It Sy dedup
A device solely dedicated for deduplication tables.
The redundancy of this device should match the redundancy of the other normal
@ -395,6 +402,9 @@ In addition, log devices are imported and exported as part of the pool
that contains them.
Mirrored devices can be removed by specifying the top-level mirror vdev.
.
A shared log pool can be used as a log device; this pool can be used by
several different "client" pools to provide easier management of space
to be used for the intent log.
.Ss Cache Devices
Devices can be added to a storage pool as
.Qq cache devices .

View File

@ -19,7 +19,7 @@
.\" CDDL HEADER END
.\"
.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
.\" Copyright (c) 2012, 2023 by Delphix. All rights reserved.
.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
.\" Copyright (c) 2017 Datto Inc.
.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
@ -37,8 +37,9 @@
.Sh SYNOPSIS
.Nm zpool
.Cm create
.Op Fl dfn
.Op Fl dfnL
.Op Fl m Ar mountpoint
.Op Fl l Ar pool
.Oo Fl o Ar property Ns = Ns Ar value Oc Ns
.Oo Fl o Sy feature@ Ns Ar feature Ns = Ns Ar value Oc
.Op Fl o Ar compatibility Ns = Ns Sy off Ns | Ns Sy legacy Ns | Ns Ar file Ns Oo , Ns Ar file Oc Ns
@ -167,6 +168,18 @@ Displays the configuration that would be used without actually creating the
pool.
The actual pool creation can still fail due to insufficient privileges or
device sharing.
.It Fl L
Create the pool as a shared log pool.
Shared log pools cannot have filesystems or ZVOLs created in them, but they
can be used as a virtual log device by several other pools, allowing more
efficient use of physical log devices.
Only one shared log pool can be imported on the system at a given
time.
.It Fl l Ar pool
Create the pool using the provided pool as the log device.
The provided pool must be a shared log pool (created using the
.Fl L
flag), and no other log devices can be specified as part of the vdev tree.
.It Fl o Ar property Ns = Ns Ar value
Sets the given pool properties.
See

View File

@ -19,7 +19,7 @@
.\" CDDL HEADER END
.\"
.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
.\" Copyright (c) 2012, 2023 by Delphix. All rights reserved.
.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
.\" Copyright (c) 2017 Datto Inc.
.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
@ -50,8 +50,9 @@
.Op Fl R Ar root
.Nm zpool
.Cm import
.Op Fl Dflmt
.Op Fl Dflt
.Op Fl F Op Fl nTX
.Op Fl m Op Fl L Ar pool
.Op Fl -rewind-to-checkpoint
.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir Ns | Ns Ar device
.Op Fl o Ar mntopts
@ -256,8 +257,9 @@ health of your pool and should only be used as a last resort.
.It Xo
.Nm zpool
.Cm import
.Op Fl Dflmt
.Op Fl Dflt
.Op Fl F Op Fl nTX
.Op Fl m Op Fl L Ar pool
.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir Ns | Ns Ar device
.Op Fl o Ar mntopts
.Oo Fl o Ar property Ns = Ns Ar value Oc Ns
@ -330,6 +332,12 @@ encrypted datasets will be left unavailable until the keys are loaded.
.It Fl m
Allows a pool to import when there is a missing log device.
Recent transactions can be lost because the log device will be discarded.
.It Fl L
Causes the pool to switch to using the specified shared log pool when
imported.
Requires the
.Fl m
flag.
.It Fl n
Used with the
.Fl F

70
man/man8/zpool-recycle.8 Normal file
View File

@ -0,0 +1,70 @@
.\"
.\" CDDL HEADER START
.\"
.\" The contents of this file are subject to the terms of the
.\" Common Development and Distribution License (the "License").
.\" You may not use this file except in compliance with the License.
.\"
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
.\" or https://opensource.org/licenses/CDDL-1.0.
.\" See the License for the specific language governing permissions
.\" and limitations under the License.
.\"
.\" When distributing Covered Code, include this CDDL HEADER in each
.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
.\"
.\" CDDL HEADER END
.\"
.\" Copyright (c) 2023 by Delphix. All rights reserved.
.\"
.Dd February 13, 2023
.Dt ZPOOL-RECYCLE 8
.Os
.
.Sh NAME
.Nm zpool-recycle
.Nd recycle space used by shared log ZIL chains
.Sh SYNOPSIS
.Nm zpool
.Cm recycle
.Op Fl n
.Op Ar pool
.
.Sh DESCRIPTION
Client pools (from
.Nm zpool
.Cm create
.Fl l
) store their ZIL logs on the shared log pool (from
.Nm zpool
.Cm create
.Fl L
).
When a client pool is deleted with
.Nm zpool
.Cm destroy
, any space used in the shared log pool is reclaimed.
However, if a client pool is exported and will not be imported again
(e.g. because the disks were damaged or removed), any space associated with
it in the shared log pool remains allocated.
.Nm zpool
.Cm recycle
will reclaim space in the shared log pool that is referenced by any
non-imported client pools.
These non-imported client pools will have their logs deleted; if they are
subsequently imported,
.Nm zpool
.Cm import
.Fl m
will need to be used to ignore the missing logs, discarding any recent
transactions.
.
.Sh OPTIONS
.Bl -tag -width "-n"
.It Fl n
Do not actually perform any deletions, just print out the list of pool
GUIDs that would be affected.
.El

View File

@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2011, 2023 by Delphix. All rights reserved.
* Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
*/
@ -246,6 +246,8 @@ spa_import_rootpool(const char *name, bool checkpointrewind)
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
VDEV_ALLOC_ROOTPOOL);
if (error == 0)
spa_set_pool_type(spa);
spa_config_exit(spa, SCL_ALL, FTAG);
if (error) {
mutex_exit(&spa_namespace_lock);

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2011, 2023 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
@ -754,6 +754,11 @@ zpool_feature_init(void)
"Support for raidz expansion",
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures);
zfeature_register(SPA_FEATURE_SHARED_LOG,
"com.delphix:shared_log", "shared_log",
"Support for shared log pools.", 0, ZFEATURE_TYPE_BOOLEAN, NULL,
sfeatures);
zfs_mod_list_supported_free(sfeatures);
}

View File

@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
* Copyright (c) 2012, 2023 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@ -658,6 +658,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
if (ds == NULL || !ds->ds_is_snapshot)
os->os_zil_header = os->os_phys->os_zil_header;
if (spa_uses_shared_log(spa))
spa_zil_header_convert(spa, os, &os->os_zil_header.zh_log);
os->os_zil = zil_alloc(os, &os->os_zil_header);
for (i = 0; i < TXG_SIZE; i++) {
@ -1192,6 +1194,9 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx)
const char *tail;
int error;
if (spa_is_shared_log(dp->dp_spa))
return (SET_ERROR(EINVAL));
if (strchr(doca->doca_name, '@') != NULL)
return (SET_ERROR(EINVAL));
@ -1720,6 +1725,10 @@ sync_meta_dnode_task(void *arg)
*/
zil_sync(os->os_zil, tx);
os->os_phys->os_zil_header = os->os_zil_header;
if (os->os_spa->spa_uses_shared_log) {
spa_zil_header_mask(os->os_spa,
&os->os_phys->os_zil_header.zh_log);
}
zio_nowait(soa->soa_zio);
mutex_destroy(&soa->soa_mutex);

View File

@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2012, 2023 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@ -75,6 +75,14 @@ dmu_tx_create_dd(dsl_dir_t *dd)
return (tx);
}
dmu_tx_t *
dmu_tx_create_mos(dsl_pool_t *dp)
{
dmu_tx_t *tx = dmu_tx_create_dd(dp->dp_mos_dir);
tx->tx_objset = dp->dp_meta_objset;
return (tx);
}
dmu_tx_t *
dmu_tx_create(objset_t *os)
{

View File

@ -726,7 +726,8 @@ kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
* It's a block in the intent log. It has no
* accounting, so just free it.
*/
dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
if (!zil_shared_log(zilog))
dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
} else {
ASSERT(zilog == NULL);
ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), >,

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2012, 2022 by Delphix. All rights reserved.
* Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright (c) 2014 Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@ -1303,7 +1303,7 @@ top_of_function:
* refreservation values. Also, if checkrefquota is set, test if
* allocating this space would exceed the dataset's refquota.
*/
if (first && tx->tx_objset) {
if (first && tx->tx_objset && tx->tx_objset->os_dsl_dataset) {
int error;
dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
* Copyright (c) 2011, 2023 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
@ -360,6 +360,14 @@ dsl_pool_open(dsl_pool_t *dp)
if (err)
goto out;
if (spa_is_shared_log(dp->dp_spa)) {
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_CHAIN_MAP_OBJ, sizeof (uint64_t), 1,
&dp->dp_chain_map_obj);
if (err != 0)
goto out;
}
err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);
out:
@ -548,6 +556,16 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops __attribute__((unused)),
#endif
dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
if (spa_is_shared_log(spa)) {
dp->dp_chain_map_obj = zap_create_flags(dp->dp_meta_objset, 0,
ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY |
ZAP_FLAG_PRE_HASHED_KEY, DMU_OTN_ZAP_METADATA, 0, 0,
DMU_OT_NONE, 0, tx);
VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_CHAIN_MAP_OBJ, sizeof (uint64_t), 1,
&dp->dp_chain_map_obj, tx));
}
dmu_tx_commit(tx);
rrw_exit(&dp->dp_config_rwlock, FTAG);

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2019 by Delphix. All rights reserved.
* Copyright (c) 2011, 2023 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
@ -1660,6 +1660,12 @@ static metaslab_ops_t metaslab_allocators[] = {
{ "new-dynamic", metaslab_ndf_alloc },
};
const metaslab_ops_t zfs_virtual_ops = {
"virtual",
NULL,
METASLAB_TYPE_VIRTUAL
};
static int
spa_find_allocator_byname(const char *val)
{
@ -2866,6 +2872,10 @@ metaslab_fini(metaslab_t *msp)
range_tree_destroy(msp->ms_unflushed_frees);
for (int t = 0; t < TXG_SIZE; t++) {
if (spa_is_shared_log(spa) &&
spa_load_state(spa) != SPA_LOAD_NONE) {
range_tree_vacate(msp->ms_allocating[t], NULL, NULL);
}
range_tree_destroy(msp->ms_allocating[t]);
}
for (int t = 0; t < TXG_DEFER_SIZE; t++) {
@ -5845,6 +5855,19 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp,
int ndvas, uint64_t txg, blkptr_t *hintbp, int flags,
zio_alloc_list_t *zal, zio_t *zio, int allocator)
{
if (mc->mc_ops->msop_type == METASLAB_TYPE_VIRTUAL) {
ASSERT3P(mc->mc_virtual, !=, NULL);
spa_t *target_spa = mc->mc_virtual;
dmu_tx_t *tx = dmu_tx_create_mos(target_spa->spa_dsl_pool);
VERIFY0(dmu_tx_assign(tx, TXG_WAIT | TXG_NOTHROTTLE));
uint64_t target_txg = dmu_tx_get_txg(tx);
int ret = metaslab_alloc(target_spa,
spa_normal_class(target_spa), psize, bp, ndvas, target_txg,
hintbp, flags, zal, zio, allocator);
dmu_tx_commit(tx);
return (ret);
}
dva_t *dva = bp->blk_dva;
dva_t *hintdva = (hintbp != NULL) ? hintbp->blk_dva : NULL;
int error = 0;
@ -5861,7 +5884,7 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp,
}
ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa));
ASSERT(BP_GET_NDVAS(bp) == 0);
ASSERT0(BP_GET_NDVAS(bp));
ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp));
ASSERT3P(zal, !=, NULL);
@ -5887,8 +5910,8 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp,
DVA_GET_VDEV(&dva[d]), zio, flags, allocator);
}
}
ASSERT(error == 0);
ASSERT(BP_GET_NDVAS(bp) == ndvas);
ASSERT0(error);
ASSERT3U(BP_GET_NDVAS(bp), ==, ndvas);
spa_config_exit(spa, SCL_ALLOC, FTAG);

View File

@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
* Copyright (c) 2013, 2019 by Delphix. All rights reserved.
* Copyright (c) 2013, 2023 by Delphix. All rights reserved.
* Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
*/
@ -618,7 +618,7 @@ range_tree_verify_not_present(range_tree_t *rt, uint64_t off, uint64_t size)
{
range_seg_t *rs = range_tree_find(rt, off, size);
if (rs != NULL)
panic("segment already in tree; rs=%p", (void *)rs);
panic("segment already in tree; rt=%px rs=%px", rt, (void *)rs);
}
boolean_t

File diff suppressed because it is too large Load Diff

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2017 by Delphix. All rights reserved.
* Copyright (c) 2017, 2023 by Delphix. All rights reserved.
*/
/*
@ -544,6 +544,10 @@ spa_checkpoint(const char *pool)
error = spa_open(pool, &spa, FTAG);
if (error != 0)
return (error);
if (spa_uses_shared_log(spa) || spa_is_shared_log(spa)) {
spa_close(spa, FTAG);
return (SET_ERROR(ENOTSUP));
}
mutex_enter(&spa->spa_vdev_top_lock);

View File

@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
* Copyright (c) 2011, 2023 by Delphix. All rights reserved.
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
*/
@ -466,6 +466,11 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
if (spa->spa_compatibility != NULL)
fnvlist_add_string(config, ZPOOL_CONFIG_COMPATIBILITY,
spa->spa_compatibility);
if (spa->spa_uses_shared_log)
fnvlist_add_uint64(config, ZPOOL_CONFIG_SHARED_LOG_POOL,
spa_guid(spa_get_shared_log_pool(spa)));
if (spa_is_shared_log(spa))
fnvlist_add_boolean(config, ZPOOL_CONFIG_IS_SHARED_LOG);
hostid = spa_get_hostid(spa);
if (hostid != 0)

View File

@ -238,6 +238,8 @@
avl_tree_t spa_namespace_avl;
kmutex_t spa_namespace_lock;
avl_tree_t spa_shared_log_avl;
kmutex_t spa_shared_log_lock;
kcondvar_t spa_namespace_cv;
static const int spa_max_replication_override = SPA_DVAS_PER_BP;
@ -445,6 +447,41 @@ static int zfs_user_indirect_is_special = B_TRUE;
*/
static uint_t zfs_special_class_metadata_reserve_pct = 25;
void
spa_set_pool_type(spa_t *spa)
{
ASSERT3P(spa->spa_root_vdev, !=, NULL);
/*
* Must hold all of spa_config locks.
*/
ASSERT3U(spa_config_held(spa, SCL_ALL, RW_WRITER), ==, SCL_ALL);
if (fnvlist_lookup_boolean(spa->spa_config,
ZPOOL_CONFIG_IS_SHARED_LOG)) {
spa->spa_pool_type = SPA_TYPE_SHARED_LOG;
avl_index_t where;
mutex_enter(&spa_shared_log_lock);
if (avl_find(&spa_shared_log_avl, spa, &where) == NULL)
avl_insert(&spa_shared_log_avl, spa, where);
mutex_exit(&spa_shared_log_lock);
} else {
spa->spa_pool_type = SPA_TYPE_NORMAL;
}
}
boolean_t
spa_is_shared_log(const spa_t *spa)
{
return (spa->spa_pool_type == SPA_TYPE_SHARED_LOG);
}
boolean_t
spa_uses_shared_log(const spa_t *spa)
{
return (spa->spa_uses_shared_log);
}
/*
* ==========================================================================
* SPA config locking
@ -685,6 +722,15 @@ spa_log_sm_sort_by_txg(const void *va, const void *vb)
return (TREE_CMP(a->sls_txg, b->sls_txg));
}
static int
spa_guid_compare(const void *a1, const void *a2)
{
const spa_t *s1 = a1;
const spa_t *s2 = a2;
return (TREE_CMP(spa_const_guid(s1), spa_const_guid(s2)));
}
/*
* Create an uninitialized spa_t with the given name. Requires
* spa_namespace_lock. The caller must ensure that the spa_t doesn't already
@ -714,6 +760,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
mutex_init(&spa->spa_feat_stats_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_flushed_ms_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_activities_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_chain_map_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_zil_map_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL);
@ -777,6 +825,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
sizeof (metaslab_t), offsetof(metaslab_t, ms_spa_txg_node));
avl_create(&spa->spa_sm_logs_by_txg, spa_log_sm_sort_by_txg,
sizeof (spa_log_sm_t), offsetof(spa_log_sm_t, sls_node));
avl_create(&spa->spa_registered_clients, spa_guid_compare,
sizeof (spa_t), offsetof(spa_t, spa_client_avl));
list_create(&spa->spa_log_summary, sizeof (log_summary_entry_t),
offsetof(log_summary_entry_t, lse_node));
@ -852,6 +902,12 @@ spa_remove(spa_t *spa)
avl_remove(&spa_namespace_avl, spa);
if (spa_is_shared_log(spa)) {
mutex_enter(&spa_shared_log_lock);
avl_remove(&spa_shared_log_avl, spa);
mutex_exit(&spa_shared_log_lock);
}
if (spa->spa_root)
spa_strfree(spa->spa_root);
@ -875,6 +931,7 @@ spa_remove(spa_t *spa)
avl_destroy(&spa->spa_metaslabs_by_flushed);
avl_destroy(&spa->spa_sm_logs_by_txg);
avl_destroy(&spa->spa_registered_clients);
list_destroy(&spa->spa_log_summary);
list_destroy(&spa->spa_config_list);
list_destroy(&spa->spa_leaf_list);
@ -916,6 +973,7 @@ spa_remove(spa_t *spa)
mutex_destroy(&spa->spa_vdev_top_lock);
mutex_destroy(&spa->spa_feat_stats_lock);
mutex_destroy(&spa->spa_activities_lock);
mutex_destroy(&spa->spa_chain_map_lock);
kmem_free(spa, sizeof (spa_t));
}
@ -1747,6 +1805,20 @@ spa_name(spa_t *spa)
return (spa->spa_name);
}
uint64_t
spa_const_guid(const spa_t *spa)
{
/*
* If we fail to parse the config during spa_load(), we can go through
* the error path (which posts an ereport) and end up here with no root
* vdev. We stash the original pool guid in 'spa_config_guid' to handle
* this case.
*/
if (spa->spa_root_vdev == NULL)
return (spa->spa_config_guid);
return (spa->spa_root_vdev->vdev_guid);
}
uint64_t
spa_guid(spa_t *spa)
{
@ -1776,7 +1848,7 @@ spa_guid(spa_t *spa)
}
uint64_t
spa_load_guid(spa_t *spa)
spa_load_guid(const spa_t *spa)
{
/*
* This is a GUID that exists solely as a reference for the
@ -2523,6 +2595,7 @@ void
spa_init(spa_mode_t mode)
{
mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa_shared_log_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa_l2cache_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL);
@ -2536,6 +2609,9 @@ spa_init(spa_mode_t mode)
avl_create(&spa_l2cache_avl, spa_l2cache_compare, sizeof (spa_aux_t),
offsetof(spa_aux_t, aux_avl));
avl_create(&spa_shared_log_avl, spa_guid_compare, sizeof (spa_t),
offsetof(spa_t, spa_log_avl));
spa_mode_global = mode;
#ifndef _KERNEL
@ -2611,6 +2687,7 @@ spa_fini(void)
cv_destroy(&spa_namespace_cv);
mutex_destroy(&spa_namespace_lock);
mutex_destroy(&spa_shared_log_lock);
mutex_destroy(&spa_spare_lock);
mutex_destroy(&spa_l2cache_lock);
}
@ -2623,7 +2700,8 @@ spa_fini(void)
boolean_t
spa_has_slogs(spa_t *spa)
{
return (spa->spa_log_class->mc_groups != 0);
return (spa->spa_log_class->mc_groups != 0 ||
spa->spa_log_class->mc_virtual != NULL);
}
spa_log_state_t

View File

@ -7101,6 +7101,34 @@ error:
return (ret);
}
static const zfs_ioc_key_t zfs_keys_pool_recycle[] = {
{ZPOOL_RECYCLE_DRYRUN, DATA_TYPE_BOOLEAN_VALUE, 0},
};
static int
zfs_ioc_pool_recycle(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
{
int err;
boolean_t rc, dryrun = B_FALSE;
spa_t *spa;
if ((err = spa_open(pool, &spa, FTAG)) != 0)
return (err);
if (innvl) {
err = nvlist_lookup_boolean_value(innvl, ZPOOL_RECYCLE_DRYRUN,
&rc);
if (err == 0)
dryrun = rc;
}
err = spa_recycle(spa, dryrun, outnvl);
spa_close(spa, FTAG);
return (0);
}
static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
static void
@ -7402,6 +7430,11 @@ zfs_ioctl_init(void)
POOL_CHECK_NONE, B_FALSE, B_FALSE,
zfs_keys_get_props, ARRAY_SIZE(zfs_keys_get_props));
zfs_ioctl_register("zpool_recycle", ZFS_IOC_POOL_RECYCLE,
zfs_ioc_pool_recycle, zfs_secpolicy_config, POOL_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
zfs_keys_pool_recycle, ARRAY_SIZE(zfs_keys_pool_recycle));
/* IOCTLS that use the legacy function signature */
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2011, 2023 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright (c) 2018 Datto Inc.
*/
@ -238,23 +238,16 @@ zil_kstats_global_update(kstat_t *ksp, int rw)
return (0);
}
/*
* Read a log block and make sure it's valid.
*/
static int
zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
blkptr_t *nbp, char **begin, char **end, arc_buf_t **abuf)
zil_read_log_block(spa_t *spa, boolean_t decrypt, zio_flag_t zio_flags,
const blkptr_t *bp, blkptr_t *nbp, char **begin, char **end,
arc_buf_t **abuf)
{
zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
arc_flags_t aflags = ARC_FLAG_WAIT;
zbookmark_phys_t zb;
int error;
if (zilog->zl_header->zh_claim_txg == 0)
zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB;
if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID))
zio_flags |= ZIO_FLAG_SPECULATIVE;
zio_flags |= ZIO_FLAG_CANFAIL;
if (!decrypt)
zio_flags |= ZIO_FLAG_RAW;
@ -262,7 +255,7 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET],
ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func,
error = arc_read(NULL, spa, bp, arc_getbuf_func,
abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
if (error == 0) {
@ -346,7 +339,7 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid,
ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf,
error = arc_read(NULL, zilog->zl_io_spa, bp, arc_getbuf_func, &abuf,
ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
if (error == 0) {
@ -453,6 +446,133 @@ zil_kstat_values_update(zil_kstat_values_t *zs, zil_sums_t *zil_sums)
wmsum_value(&zil_sums->zil_itx_metaslab_slog_alloc);
}
/*
* Parse the intent log, and call parse_blk_func for each valid block within
* and parse_lr_func for each valid record within.
*/
static int
zil_parse_raw_impl(spa_t *spa, const blkptr_t *bp,
zil_parse_raw_blk_func_t *parse_blk_func,
zil_parse_raw_lr_func_t *parse_lr_func, void *arg, zio_flag_t zio_flags)
{
(void) parse_lr_func;
blkptr_t next_blk = {{{{0}}}};
int error = 0;
for (blkptr_t blk = *bp; !BP_IS_HOLE(&blk); blk = next_blk) {
char *lrp, *end;
arc_buf_t *abuf = NULL;
/*
* We do the read before the parse function so that if the
* parse function frees the block, we still have next_blk so we
* can continue the chain.
*/
int read_error = zil_read_log_block(spa, B_FALSE, zio_flags,
&blk, &next_blk, &lrp, &end, &abuf);
error = parse_blk_func(spa, &blk, arg);
if (error != 0) {
if (abuf)
arc_buf_destroy(abuf, &abuf);
break;
}
if (read_error != 0) {
if (abuf)
arc_buf_destroy(abuf, &abuf);
error = read_error;
break;
}
int reclen;
for (; lrp < end; lrp += reclen) {
lr_t *lr = (lr_t *)lrp;
reclen = lr->lrc_reclen;
ASSERT3U(reclen, >=, sizeof (lr_t));
ASSERT3U(reclen, <=, end - lrp);
error = parse_lr_func(spa, lr, arg);
if (error != 0)
break;
}
arc_buf_destroy(abuf, &abuf);
}
return (error);
}
int
zil_parse_raw(spa_t *spa, const blkptr_t *bp,
zil_parse_raw_blk_func_t *parse_blk_func,
zil_parse_raw_lr_func_t *parse_lr_func, void *arg)
{
return (zil_parse_raw_impl(spa, bp, parse_blk_func, parse_lr_func, arg,
0));
}
struct parse_arg {
zilog_t *zilog;
zil_parse_blk_func_t *parse_blk_func;
zil_parse_lr_func_t *parse_lr_func;
void *arg;
uint64_t txg;
boolean_t decrypt;
uint64_t blk_seq;
uint64_t claim_blk_seq;
uint64_t claim_lr_seq;
uint64_t max_blk_seq;
uint64_t max_lr_seq;
uint64_t blk_count;
uint64_t lr_count;
int error;
};
static int
parse_blk_wrapper(spa_t *spa, const blkptr_t *bp, void *arg)
{
(void) spa;
struct parse_arg *pa = arg;
pa->blk_seq = bp->blk_cksum.zc_word[ZIL_ZC_SEQ];
if (pa->blk_seq > pa->claim_blk_seq)
return (EINTR);
int error = pa->parse_blk_func(pa->zilog, bp, pa->arg, pa->txg);
if (error) {
pa->error = error;
return (EINTR);
}
ASSERT3U(pa->max_blk_seq, <, pa->blk_seq);
pa->max_blk_seq = pa->blk_seq;
pa->blk_count++;
if (pa->max_lr_seq == pa->claim_lr_seq &&
pa->max_blk_seq == pa->claim_blk_seq) {
return (EINTR);
}
return (0);
}
static int
parse_lr_wrapper(spa_t *spa, const lr_t *lr, void *arg)
{
(void) spa;
struct parse_arg *pa = arg;
if (lr->lrc_seq > pa->claim_lr_seq)
return (EINTR);
int error = pa->parse_lr_func(pa->zilog, lr, pa->arg, pa->txg);
if (error != 0) {
pa->error = error;
return (EINTR);
}
ASSERT3U(pa->max_lr_seq, <, lr->lrc_seq);
pa->max_lr_seq = lr->lrc_seq;
pa->lr_count++;
return (0);
}
/*
* Parse the intent log, and call parse_func for each valid record within.
*/
@ -463,95 +583,58 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
{
const zil_header_t *zh = zilog->zl_header;
boolean_t claimed = !!zh->zh_claim_txg;
uint64_t claim_blk_seq = claimed ? zh->zh_claim_blk_seq : UINT64_MAX;
uint64_t claim_lr_seq = claimed ? zh->zh_claim_lr_seq : UINT64_MAX;
uint64_t max_blk_seq = 0;
uint64_t max_lr_seq = 0;
uint64_t blk_count = 0;
uint64_t lr_count = 0;
blkptr_t blk, next_blk = {{{{0}}}};
int error = 0;
struct parse_arg arg2;
arg2.claim_blk_seq = claimed ? zh->zh_claim_blk_seq : UINT64_MAX;
arg2.claim_lr_seq = claimed ? zh->zh_claim_lr_seq : UINT64_MAX;
arg2.max_blk_seq = 0;
arg2.max_lr_seq = 0;
arg2.blk_count = 0;
arg2.lr_count = 0;
arg2.arg = arg;
arg2.parse_blk_func = parse_blk_func;
arg2.parse_lr_func = parse_lr_func;
arg2.txg = txg;
arg2.decrypt = decrypt;
arg2.zilog = zilog;
arg2.error = 0;
arg2.blk_seq = 0;
zio_flag_t zio_flags = 0;
if (!claimed)
zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB;
if (!(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID))
zio_flags |= ZIO_FLAG_SPECULATIVE;
/*
* Old logs didn't record the maximum zh_claim_lr_seq.
*/
if (!(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID))
claim_lr_seq = UINT64_MAX;
arg2.claim_lr_seq = UINT64_MAX;
/*
* Starting at the block pointed to by zh_log we read the log chain.
* For each block in the chain we strongly check that block to
* ensure its validity. We stop when an invalid block is found.
* For each block pointer in the chain we call parse_blk_func().
* For each record in each valid block we call parse_lr_func().
* If the log has been claimed, stop if we encounter a sequence
* number greater than the highest claimed sequence number.
*/
zil_bp_tree_init(zilog);
for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) {
uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ];
int reclen;
char *lrp, *end;
arc_buf_t *abuf = NULL;
int error = zil_parse_raw_impl(zilog->zl_io_spa, &zh->zh_log,
parse_blk_wrapper, parse_lr_wrapper, &arg2, zio_flags);
if (blk_seq > claim_blk_seq)
break;
// If this happens, we got an error from zil_read_log_block_spa
if (error != 0 && error != EINTR && claimed) {
char name[ZFS_MAX_DATASET_NAME_LEN];
error = parse_blk_func(zilog, &blk, arg, txg);
if (error != 0)
break;
ASSERT3U(max_blk_seq, <, blk_seq);
max_blk_seq = blk_seq;
blk_count++;
dmu_objset_name(zilog->zl_os, name);
if (max_lr_seq == claim_lr_seq && max_blk_seq == claim_blk_seq)
break;
error = zil_read_log_block(zilog, decrypt, &blk, &next_blk,
&lrp, &end, &abuf);
if (error != 0) {
if (abuf)
arc_buf_destroy(abuf, &abuf);
if (claimed) {
char name[ZFS_MAX_DATASET_NAME_LEN];
dmu_objset_name(zilog->zl_os, name);
cmn_err(CE_WARN, "ZFS read log block error %d, "
"dataset %s, seq 0x%llx\n", error, name,
(u_longlong_t)blk_seq);
}
break;
}
for (; lrp < end; lrp += reclen) {
lr_t *lr = (lr_t *)lrp;
reclen = lr->lrc_reclen;
ASSERT3U(reclen, >=, sizeof (lr_t));
ASSERT3U(reclen, <=, end - lrp);
if (lr->lrc_seq > claim_lr_seq) {
arc_buf_destroy(abuf, &abuf);
goto done;
}
error = parse_lr_func(zilog, lr, arg, txg);
if (error != 0) {
arc_buf_destroy(abuf, &abuf);
goto done;
}
ASSERT3U(max_lr_seq, <, lr->lrc_seq);
max_lr_seq = lr->lrc_seq;
lr_count++;
}
arc_buf_destroy(abuf, &abuf);
cmn_err(CE_WARN, "ZFS read log block error %d, "
"dataset %s, seq 0x%llx\n", error, name,
(u_longlong_t)arg2.blk_seq);
}
done:
if (error == EINTR)
error = arg2.error;
zilog->zl_parse_error = error;
zilog->zl_parse_blk_seq = max_blk_seq;
zilog->zl_parse_lr_seq = max_lr_seq;
zilog->zl_parse_blk_count = blk_count;
zilog->zl_parse_lr_count = lr_count;
zilog->zl_parse_blk_seq = arg2.max_blk_seq;
zilog->zl_parse_lr_seq = arg2.max_lr_seq;
zilog->zl_parse_blk_count = arg2.blk_count;
zilog->zl_parse_lr_count = arg2.lr_count;
zil_bp_tree_fini(zilog);
@ -565,6 +648,8 @@ zil_clear_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx,
(void) tx;
ASSERT(!BP_IS_HOLE(bp));
// We do not support checkpoints of shared log client pools.
ASSERT(!zilog->zl_spa->spa_uses_shared_log);
/*
* As we call this function from the context of a rewind to a
* checkpoint, each ZIL block whose txg is later than the txg
@ -577,7 +662,7 @@ zil_clear_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx,
if (zil_bp_tree_add(zilog, bp) != 0)
return (0);
zio_free(zilog->zl_spa, first_txg, bp);
zio_free(zilog->zl_io_spa, first_txg, bp);
return (0);
}
@ -598,7 +683,8 @@ zil_claim_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx,
* If tx == NULL, just verify that the block is claimable.
*/
if (BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) < first_txg ||
zil_bp_tree_add(zilog, bp) != 0)
zil_bp_tree_add(zilog, bp) != 0 ||
zilog->zl_spa != zilog->zl_io_spa)
return (0);
return (zio_wait(zio_claim(NULL, zilog->zl_spa,
@ -708,7 +794,8 @@ zil_free_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx,
{
(void) claim_txg;
zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp);
if (!zilog->zl_spa->spa_uses_shared_log)
zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp);
return (0);
}
@ -725,7 +812,8 @@ zil_free_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t claim_txg)
* If we previously claimed it, we need to free it.
*/
if (BP_GET_LOGICAL_BIRTH(bp) >= claim_txg &&
zil_bp_tree_add(zilog, bp) == 0 && !BP_IS_HOLE(bp)) {
zil_bp_tree_add(zilog, bp) == 0 && !BP_IS_HOLE(bp) &&
!zilog->zl_spa->spa_uses_shared_log) {
zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp);
}
@ -966,7 +1054,7 @@ zil_create(zilog_t *zilog)
int error = 0;
boolean_t slog = FALSE;
dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
spa_t *spa = zilog->zl_spa;
/*
* Wait for any previous destroy to complete.
@ -990,14 +1078,23 @@ zil_create(zilog_t *zilog)
txg = dmu_tx_get_txg(tx);
if (!BP_IS_HOLE(&blk)) {
zio_free(zilog->zl_spa, txg, &blk);
if (spa_uses_shared_log(spa)) {
spa_zil_delete(spa, zilog->zl_os);
} else {
zio_free(spa, txg, &blk);
}
BP_ZERO(&blk);
}
error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk,
error = zio_alloc_zil(spa, zilog->zl_os, txg, &blk,
ZIL_MIN_BLKSZ, &slog);
if (error == 0)
zil_init_log_chain(zilog, &blk);
spa_zil_map_insert(spa, zilog->zl_os, NULL, &blk);
if (spa_uses_shared_log(spa)) {
spa_t *shared_log = spa_get_shared_log_pool(spa);
txg_wait_synced(shared_log->spa_dsl_pool, 0);
}
}
/*
@ -1018,9 +1115,8 @@ zil_create(zilog_t *zilog)
* this until we write the first xattr log record because we
* need to wait for the feature activation to sync out.
*/
if (spa_feature_is_enabled(zilog->zl_spa,
SPA_FEATURE_ZILSAXATTR) && dmu_objset_type(zilog->zl_os) !=
DMU_OST_ZVOL) {
if (spa_feature_is_enabled(spa, SPA_FEATURE_ZILSAXATTR) &&
dmu_objset_type(zilog->zl_os) != DMU_OST_ZVOL) {
mutex_enter(&ds->ds_lock);
ds->ds_feature_activation[SPA_FEATURE_ZILSAXATTR] =
(void *)B_TRUE;
@ -1036,7 +1132,7 @@ zil_create(zilog_t *zilog)
*/
zil_commit_activate_saxattr_feature(zilog);
}
IMPLY(spa_feature_is_enabled(zilog->zl_spa, SPA_FEATURE_ZILSAXATTR) &&
IMPLY(spa_feature_is_enabled(spa, SPA_FEATURE_ZILSAXATTR) &&
dmu_objset_type(zilog->zl_os) != DMU_OST_ZVOL,
dsl_dataset_feature_is_active(ds, SPA_FEATURE_ZILSAXATTR));
@ -1088,11 +1184,14 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
if (!list_is_empty(&zilog->zl_lwb_list)) {
ASSERT(zh->zh_claim_txg == 0);
VERIFY(!keep_first);
spa_zil_delete(zilog->zl_spa, zilog->zl_os);
while ((lwb = list_remove_head(&zilog->zl_lwb_list)) != NULL) {
if (lwb->lwb_buf != NULL)
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
if (!BP_IS_HOLE(&lwb->lwb_blk))
if (!BP_IS_HOLE(&lwb->lwb_blk) &&
!spa_uses_shared_log(zilog->zl_spa)) {
zio_free(zilog->zl_spa, txg, &lwb->lwb_blk);
}
zil_free_lwb(zilog, lwb);
}
} else if (!keep_first) {
@ -1440,7 +1539,7 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
zil_commit_waiter_t *zcw;
itx_t *itx;
spa_config_exit(zilog->zl_spa, SCL_STATE, lwb);
spa_config_exit(zilog->zl_io_spa, SCL_STATE, lwb);
hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp;
@ -1878,6 +1977,7 @@ static void
zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
{
spa_t *spa = zilog->zl_spa;
spa_t *io_spa = zilog->zl_io_spa;
zil_chain_t *zilc;
boolean_t slog;
zbookmark_phys_t zb;
@ -1893,7 +1993,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
lwb->lwb_nused = lwb->lwb_nfilled;
ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_nmax);
lwb->lwb_root_zio = zio_root(spa, zil_lwb_flush_vdevs_done, lwb,
lwb->lwb_root_zio = zio_root(io_spa, zil_lwb_flush_vdevs_done, lwb,
ZIO_FLAG_CANFAIL);
/*
@ -1926,7 +2026,7 @@ next_lwb:
SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET],
ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);
lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, spa, 0,
lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, io_spa, 0,
&lwb->lwb_blk, lwb_abd, lwb->lwb_sz, zil_lwb_write_done,
lwb, prio, ZIO_FLAG_CANFAIL, &zb);
zil_lwb_add_block(lwb, &lwb->lwb_blk);
@ -1975,11 +2075,14 @@ next_lwb:
&slog);
}
if (error == 0) {
ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), ==, txg);
IMPLY(spa == io_spa, BP_GET_LOGICAL_BIRTH(bp) == txg);
BP_SET_CHECKSUM(bp, nlwb->lwb_slim ? ZIO_CHECKSUM_ZILOG2 :
ZIO_CHECKSUM_ZILOG);
VERIFY(zfs_blkptr_verify(io_spa, bp, BLK_CONFIG_NEEDED,
BLK_VERIFY_HALT));
bp->blk_cksum = lwb->lwb_blk.blk_cksum;
bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++;
spa_zil_map_insert(spa, zilog->zl_os, &lwb->lwb_blk, bp);
}
/*
@ -1993,7 +2096,7 @@ next_lwb:
mutex_exit(&zilog->zl_lwb_io_lock);
dmu_tx_commit(tx);
spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
spa_config_enter(io_spa, SCL_STATE, lwb, RW_READER);
/*
* We've completed all potentially blocking operations. Update the
@ -3726,6 +3829,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
*/
zil_init_log_chain(zilog, &blk);
zh->zh_log = blk;
spa_zil_map_set_final(spa, zilog->zl_os, &blk);
} else {
/*
* A destroyed ZIL chain can't contain any TX_SETSAXATTR
@ -3736,7 +3840,11 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
SPA_FEATURE_ZILSAXATTR))
dsl_dataset_deactivate_feature(ds,
SPA_FEATURE_ZILSAXATTR, tx);
spa_zil_delete(spa, zilog->zl_os);
}
mutex_exit(&zilog->zl_lock);
return;
}
while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
@ -3745,7 +3853,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
lwb->lwb_alloc_txg > txg || lwb->lwb_max_txg > txg)
break;
list_remove(&zilog->zl_lwb_list, lwb);
if (!BP_IS_HOLE(&lwb->lwb_blk))
if (!BP_IS_HOLE(&lwb->lwb_blk) && !spa->spa_uses_shared_log)
zio_free(spa, txg, &lwb->lwb_blk);
zil_free_lwb(zilog, lwb);
@ -3757,6 +3865,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
*/
if (list_is_empty(&zilog->zl_lwb_list))
BP_ZERO(&zh->zh_log);
spa_zil_map_set_final(spa, zilog->zl_os, &zh->zh_log);
}
mutex_exit(&zilog->zl_lock);
@ -3845,6 +3954,13 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys)
zilog->zl_header = zh_phys;
zilog->zl_os = os;
zilog->zl_spa = dmu_objset_spa(os);
zilog->zl_io_spa = spa_get_shared_log_pool(zilog->zl_spa);
if (zilog->zl_io_spa == NULL) {
zilog->zl_io_spa = zilog->zl_spa;
} else {
IMPLY(BP_IS_HOLE(&(zh_phys->zh_log)),
BP_GET_LOGICAL_BIRTH(&zh_phys->zh_log) == 0);
}
zilog->zl_dmu_pool = dmu_objset_pool(os);
zilog->zl_destroy_txg = TXG_INITIAL - 1;
zilog->zl_logbias = dmu_objset_logbias(os);
@ -3932,6 +4048,8 @@ zil_open(objset_t *os, zil_get_data_t *get_data, zil_sums_t *zil_sums)
ASSERT3P(zilog->zl_get_data, ==, NULL);
ASSERT3P(zilog->zl_last_lwb_opened, ==, NULL);
ASSERT(list_is_empty(&zilog->zl_lwb_list));
IMPLY(BP_IS_HOLE(&zilog->zl_header->zh_log),
BP_GET_LOGICAL_BIRTH(&zilog->zl_header->zh_log) == 0);
zilog->zl_get_data = get_data;
zilog->zl_sums = zil_sums;
@ -4352,6 +4470,12 @@ zil_reset(const char *osname, void *arg)
return (0);
}
boolean_t
zil_shared_log(zilog_t *zilog)
{
return (zilog->zl_spa != zilog->zl_io_spa);
}
EXPORT_SYMBOL(zil_alloc);
EXPORT_SYMBOL(zil_free);
EXPORT_SYMBOL(zil_open);

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2022 by Delphix. All rights reserved.
* Copyright (c) 2011, 2023 by Delphix. All rights reserved.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, 2023, 2024, Klara Inc.
@ -3883,15 +3883,16 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
int flags = METASLAB_ZIL;
int allocator = (uint_t)cityhash4(0, 0, 0,
os->os_dsl_dataset->ds_object) % spa->spa_alloc_count;
boolean_t must_slog = spa_uses_shared_log(spa);
error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
txg, NULL, flags, &io_alloc_list, NULL, allocator);
*slog = (error == 0);
if (error != 0) {
*slog = (error == 0 || must_slog);
if (error != 0 && !must_slog) {
error = metaslab_alloc(spa, spa_embedded_log_class(spa), size,
new_bp, 1, txg, NULL, flags,
&io_alloc_list, NULL, allocator);
}
if (error != 0) {
if (error != 0 && !must_slog) {
error = metaslab_alloc(spa, spa_normal_class(spa), size,
new_bp, 1, txg, NULL, flags,
&io_alloc_list, NULL, allocator);

View File

@ -928,6 +928,11 @@ tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos',
'scrub_mirror_003_pos', 'scrub_mirror_004_pos']
tags = ['functional', 'scrub_mirror']
[tests/functional/shared_log]
tests = ['shared_log_001_pos', 'shared_log_002_pos', 'shared_log_003_pos', 'shared_log_004_pos',
'shared_log_005_pos', 'shared_log_006_neg']
tags = ['functional', 'shared_log']
[tests/functional/slog]
tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos',
'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg',

View File

@ -29,5 +29,4 @@ tests = ['sequential_writes', 'sequential_reads', 'sequential_reads_arc_cached',
'sequential_reads_arc_cached_clone', 'sequential_reads_dbuf_cached',
'random_reads', 'random_writes', 'random_readwrite', 'random_writes_zil',
'random_readwrite_fixed']
post =
tags = ['perf', 'regression']

View File

@ -14,7 +14,7 @@
*/
/*
* Copyright (c) 2018 by Delphix. All rights reserved.
* Copyright (c) 2018, 2023 by Delphix. All rights reserved.
*/
#include <stdio.h>
@ -790,6 +790,18 @@ test_set_bootenv(const char *pool)
nvlist_free(required);
}
static void
test_pool_recycle(const char *pool)
{
nvlist_t *required = fnvlist_alloc();
fnvlist_add_boolean_value(required, "dryrun", B_FALSE);
IOC_INPUT_TEST_WILD(ZFS_IOC_POOL_RECYCLE, pool, required, NULL, 0);
nvlist_free(required);
}
static void
zfs_ioc_input_tests(const char *pool)
{
@ -884,6 +896,8 @@ zfs_ioc_input_tests(const char *pool)
test_scrub(pool);
test_pool_recycle(pool);
/*
* cleanup
*/
@ -1039,6 +1053,7 @@ validate_ioc_values(void)
CHECK(ZFS_IOC_BASE + 83 == ZFS_IOC_WAIT);
CHECK(ZFS_IOC_BASE + 84 == ZFS_IOC_WAIT_FS);
CHECK(ZFS_IOC_BASE + 87 == ZFS_IOC_POOL_SCRUB);
CHECK(ZFS_IOC_BASE + 88 == ZFS_IOC_POOL_RECYCLE);
CHECK(ZFS_IOC_PLATFORM_BASE + 1 == ZFS_IOC_EVENTS_NEXT);
CHECK(ZFS_IOC_PLATFORM_BASE + 2 == ZFS_IOC_EVENTS_CLEAR);
CHECK(ZFS_IOC_PLATFORM_BASE + 3 == ZFS_IOC_EVENTS_SEEK);

View File

@ -21,7 +21,7 @@
#
# Copyright (c) 2009, Sun Microsystems Inc. All rights reserved.
# Copyright (c) 2012, 2020, Delphix. All rights reserved.
# Copyright (c) 2012, 2024, Delphix. All rights reserved.
# Copyright (c) 2017, Tim Chase. All rights reserved.
# Copyright (c) 2017, Nexenta Systems Inc. All rights reserved.
# Copyright (c) 2017, Lawrence Livermore National Security LLC.

View File

@ -29,6 +29,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS = \
perf/regression/sequential_reads.ksh \
perf/regression/sequential_writes.ksh \
perf/regression/setup.ksh \
perf/regression/cleanup.ksh \
\
perf/scripts/prefetch_io.sh
@ -363,6 +364,8 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \
functional/rsend/rsend.kshlib \
functional/scrub_mirror/default.cfg \
functional/scrub_mirror/scrub_mirror_common.kshlib \
functional/shared_log/shared_log.cfg \
functional/shared_log/shared_log.kshlib \
functional/slog/slog.cfg \
functional/slog/slog.kshlib \
functional/snapshot/snapshot.cfg \
@ -1942,6 +1945,14 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/scrub_mirror/scrub_mirror_003_pos.ksh \
functional/scrub_mirror/scrub_mirror_004_pos.ksh \
functional/scrub_mirror/setup.ksh \
functional/shared_log/cleanup.ksh \
functional/shared_log/setup.ksh \
functional/shared_log/shared_log_001_pos.ksh \
functional/shared_log/shared_log_002_pos.ksh \
functional/shared_log/shared_log_003_pos.ksh \
functional/shared_log/shared_log_004_pos.ksh \
functional/shared_log/shared_log_005_pos.ksh \
functional/shared_log/shared_log_006_neg.ksh \
functional/slog/cleanup.ksh \
functional/slog/setup.ksh \
functional/slog/slog_001_pos.ksh \

View File

@ -109,5 +109,6 @@ if is_linux || is_freebsd; then
"feature@block_cloning"
"feature@vdev_zaps_v2"
"feature@raidz_expansion"
"feature@shared_log"
)
fi

View File

@ -0,0 +1,45 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/shared_log/shared_log.cfg
verify_runnable "global"
if datasetexists $TESTPOOL ; then
log_must zpool destroy -f $TESTPOOL
fi
if datasetexists $TESTPOOL2 ; then
log_must zpool destroy -f $TESTPOOL2
fi
if datasetexists $LOGPOOL ; then
log_must zpool destroy -f $LOGPOOL
fi
if datasetexists ${LOGPOOL}2 ; then
log_must zpool destroy -f ${LOGPOOL}2
fi
log_pass

View File

@ -0,0 +1,32 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/shared_log/shared_log.cfg
verify_runnable "global"
log_pass

View File

@ -0,0 +1,31 @@
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Delphix. All rights reserved.
#
export LOGPOOL="${TESTPOOL}_log"
IFS=' ' read -r -a array <<< "$DISKS"
export DISK0="${array[0]}"
export DISK1="${array[1]}"
export DISK2="${array[2]}"

View File

@ -0,0 +1,43 @@
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/shared_log/shared_log.cfg
function cleanup
{
poolexists $TESTPOOL && destroy_pool $TESTPOOL
poolexists $TESTPOOL2 && destroy_pool $TESTPOOL2
poolexists $LOGPOOL && destroy_pool $LOGPOOL
poolexists ${LOGPOOL}2 && destroy_pool ${LOGPOOL}2
}
function verify_shared_log
{
local target="$1"
local log_pool_name="$2"
zpool list -v $target | grep -A 1 "shared log" | tail -n 1 | grep "^ *$log_pool_name" >/dev/null
return $?
}

View File

@ -0,0 +1,48 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib
#
# DESCRIPTION:
# Creating a pool with a shared log succeeds.
#
# STRATEGY:
# 1. Create shared log pool
# 2. Create client pool with shared log
# 3. Display pool status
#
verify_runnable "global"
log_assert "Creating a pool with a shared log succeeds."
log_onexit cleanup
log_must create_pool $LOGPOOL -L "$DISK0"
log_must create_pool $TESTPOOL -l $LOGPOOL "$DISK1"
log_must verify_shared_log $TESTPOOL $LOGPOOL
log_pass "Creating a pool with a shared log succeeds."

View File

@ -0,0 +1,59 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib
#
# DESCRIPTION:
# Using a pool with a shared log device succeeds at basic operations.
#
# STRATEGY:
# 1. Create shared log pool & client
# 2. Create sync=always fs on client
# 3. Write data to fs
# 4. Export & import client
# 5. Write data to fs again
#
verify_runnable "global"
log_assert "Using a pool with a shared log device succeeds at basic operations."
log_onexit cleanup
typeset FS="$TESTPOOL/fs"
log_must create_pool $LOGPOOL -L "$DISK0"
log_must create_pool $TESTPOOL -l $LOGPOOL "$DISK1"
log_must verify_shared_log $TESTPOOL $LOGPOOL
log_must zfs create -o sync=always -o recordsize=8k $FS
mntpnt=$(get_prop mountpoint $FS)
log_must dd if=/dev/urandom of="$mntpnt/f1" bs=8k count=128
log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL
log_must dd if=/dev/urandom of="$mntpnt/f1" bs=8k count=128
log_pass "Using a pool with a shared log device succeeds at basic operations."

View File

@ -0,0 +1,62 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib
#
# DESCRIPTION:
# Shared log pool can be exported and imported.
#
# STRATEGY:
# 1. Create shared log pool & client
# 2. Write some data to the client pool
# 3. Export client
# 4. Export & import provider
# 5. Import client
# 6. Write data to client
#
verify_runnable "global"
log_assert "Shared log pool can be exported and imported."
log_onexit cleanup
typeset FS="$TESTPOOL/fs"
log_must create_pool $LOGPOOL -L "$DISK0"
log_must create_pool $TESTPOOL -l $LOGPOOL "$DISK1"
log_must verify_shared_log $TESTPOOL $LOGPOOL
log_must zfs create -o sync=always -o recordsize=8k $FS
mntpnt=$(get_prop mountpoint $FS)
log_must dd if=/dev/urandom of="$mntpnt/f1" bs=8k count=128
log_must zpool export $TESTPOOL
log_must zpool export $LOGPOOL
log_must zpool import $LOGPOOL
log_must zpool import $TESTPOOL
log_must dd if=/dev/urandom of="$mntpnt/f2" bs=8k count=128
log_pass "Shared log pool can be exported and imported."

View File

@ -0,0 +1,67 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib
#
# DESCRIPTION:
# Client pools can be reimported without provider, with flag.
#
# STRATEGY:
# 1. Create shared log pools & client
# 2. Write data to client
# 3. Export client and provider
# 4. Import client with -m
# 5. Export client
# 6. Import client with -m and new pool
#
verify_runnable "global"
log_assert "Client pools can be reimported without provider, with flag."
log_onexit cleanup
typeset FS="$TESTPOOL/fs"
log_must create_pool $LOGPOOL -L "$DISK0"
log_must create_pool ${LOGPOOL}2 -L "$DISK1"
log_must create_pool $TESTPOOL -l $LOGPOOL "$DISK2"
log_must verify_shared_log $TESTPOOL $LOGPOOL
log_must zfs create -o sync=always -o recordsize=8k $FS
mntpnt=$(get_prop mountpoint $FS)
log_must dd if=/dev/urandom of="$mntpnt/f1" bs=8k count=128
log_must zpool export $TESTPOOL
log_must zpool export $LOGPOOL
log_must zpool import -m $TESTPOOL
log_must dd if=/dev/urandom of="$mntpnt/f2" bs=8k count=128
log_must zpool export $TESTPOOL
log_must zpool import $LOGPOOL
log_must zpool import -m -L ${LOGPOOL}2 $TESTPOOL
log_must verify_shared_log $TESTPOOL ${LOGPOOL}2
log_must dd if=/dev/urandom of="$mntpnt/f3" bs=8k count=128
log_pass "Client pools can be reimported without provider, with flag."

View File

@ -0,0 +1,58 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib
#
# DESCRIPTION:
# Test scrub with client and provider pools.
#
# STRATEGY:
# 1. Create shared log pool & client
# 2. Write some data to the client pool
# 3. Scrub client and provider pools
#
verify_runnable "global"
log_assert "Test scrub with client and provider pools."
log_onexit cleanup
typeset FS="$TESTPOOL/fs"
log_must create_pool $LOGPOOL -L "$DISK0"
log_must create_pool $TESTPOOL -l $LOGPOOL "$DISK1"
log_must verify_shared_log $TESTPOOL $LOGPOOL
log_must zfs create -o sync=always -o recordsize=8k $FS
mntpnt=$(get_prop mountpoint $FS)
log_must dd if=/dev/urandom of="$mntpnt/f1" bs=8k count=128
log_must zpool scrub $LOGPOOL
log_must zpool scrub $TESTPOOL
log_must zpool wait -t scrub $LOGPOOL
log_must zpool wait -t scrub $TESTPOOL
log_pass "Test scrub with client and provider pools."

View File

@ -0,0 +1,73 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib
#
# DESCRIPTION:
# Negative shared log testing.
#
# STRATEGY:
# 1. Create shared log pool & client
# 2. Write some data to the client pool
# 3. Scrub client and provider pools
#
verify_runnable "global"
log_assert "Negative shared log testing."
log_onexit cleanup
log_mustnot create_pool $TESTPOOL -l $LOGPOOL "$DISK0"
log_must create_pool $TESTPOOL2 "$DISK2"
log_mustnot create_pool $TESTPOOL -l $TESTPOOL2 "$DISK0"
log_must zpool destroy $TESTPOOL2
log_must create_pool $LOGPOOL -L "$DISK0"
log_mustnot create_pool $TESTPOOL -l "${LOGPOOL}2" "$DISK1"
log_mustnot create_pool $TESTPOOL -l $LOGPOOL "$DISK1" log "$DISK2"
log_must create_pool ${LOGPOOL}2 -L "$DISK1"
log_must zpool destroy ${LOGPOOL}2
typeset FS="$LOGPOOL/fs"
log_mustnot zfs create -o sync=always -o recordsize=8k $FS
log_mustnot create_pool $TESTPOOL -l $LOGPOOL -o feature@shared_log=disabled "$DISK1"
log_mustnot create_pool ${LOGPOOL}2 -L -o feature@shared_log=disabled "$DISK1"
log_must create_pool $TESTPOOL -l $LOGPOOL "$DISK1"
log_mustnot zpool export $LOGPOOL
log_mustnot zpool destroy $LOGPOOL
log_mustnot zpool reguid $LOGPOOL
log_mustnot zpool reguid $TESTPOOL
log_mustnot zpool checkpoint $TESTPOOL
log_mustnot zpool checkpoint $LOGPOOL
log_pass "Negative shared log testing."

View File

@ -10,7 +10,7 @@
#
#
# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
# Copyright (c) 2015, 2023 by Delphix. All rights reserved.
# Copyright (c) 2016, Intel Corporation.
#
@ -21,6 +21,7 @@ export PERF_RUNTIME=${PERF_RUNTIME:-'180'}
export PERF_RANDSEED=${PERF_RANDSEED:-'1234'}
export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'}
export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'}
export PERF_LOG_TYPES=${PERF_LOG_TYPES:-"none slog shared"}
# Default to JSON for fio output
export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'}
@ -44,12 +45,13 @@ function get_suffix
typeset threads=$1
typeset sync=$2
typeset iosize=$3
typeset log_type=$4
typeset sync_str=$(get_sync_str $sync)
typeset filesystems=$(get_nfilesystems)
typeset suffix="$sync_str.$iosize-ios"
suffix="$suffix.$threads-threads.$filesystems-filesystems"
suffix="$suffix.$threads-threads.$filesystems-filesystems.$log_type-log"
echo $suffix
}
@ -63,9 +65,10 @@ function do_fio_run_impl
typeset threads_per_fs=$5
typeset sync=$6
typeset iosize=$7
typeset log_type=$8
typeset sync_str=$(get_sync_str $sync)
log_note "Running with $threads $sync_str threads, $iosize ios"
log_note "Running with $threads $sync_str threads, $iosize ios with log $log_type"
if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
log_must test $do_recreate
@ -73,7 +76,7 @@ function do_fio_run_impl
fi
if $do_recreate; then
recreate_perf_pool
recreate_perf_pool $log_type
#
# A value of zero for "threads_per_fs" is "special", and
@ -127,7 +130,7 @@ function do_fio_run_impl
fi
# This will be part of the output filename.
typeset suffix=$(get_suffix $threads $sync $iosize)
typeset suffix=$(get_suffix $threads $sync $iosize $log_type)
# Start the data collection
do_collect_scripts $suffix
@ -168,20 +171,27 @@ function do_fio_run
typeset script=$1
typeset do_recreate=$2
typeset clear_cache=$3
typeset log_types=$4
typeset threads threads_per_fs sync iosize
for threads in $PERF_NTHREADS; do
for threads_per_fs in $PERF_NTHREADS_PER_FS; do
for sync in $PERF_SYNC_TYPES; do
for iosize in $PERF_IOSIZES; do
do_fio_run_impl \
$script \
$do_recreate \
$clear_cache \
$threads \
$threads_per_fs \
$sync \
$iosize
for logtype in $log_types; do
if [[ $sync == "0" && $logtype != "none" ]]; then
continue
fi
do_fio_run_impl \
$script \
$do_recreate \
$clear_cache \
$threads \
$threads_per_fs \
$sync \
$iosize \
$logtype
done
done
done
done
@ -286,8 +296,10 @@ function clear_zinject_delays
#
function recreate_perf_pool
{
typeset logtype=$1
[[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set."
log_note "recreating $PERFPOOL with $logtype"
#
# In case there's been some "leaked" zinject delays, or if the
# performance test injected some delays itself, we clear all
@ -297,6 +309,23 @@ function recreate_perf_pool
#
clear_zinject_delays
if [[ $logtype == "none" ]]; then
destroy_pool $PERFPOOL
destroy_pool "${PERFPOOL}_log"
create_pool $PERFPOOL $DISKS
else
typeset disks="${DISKS% *}"
typeset log_disk="${DISKS##* }"
if [[ $logtype == "slog" ]]; then
destroy_pool $PERFPOOL
destroy_pool "${PERFPOOL}_log"
create_pool $PERFPOOL $disks log $log_disk
else
destroy_pool $PERFPOOL
create_pool ${PERFPOOL}_log -L $log_disk
create_pool $PERFPOOL -l ${PERFPOOL}_log $disks
fi
fi
#
# This function handles the case where the pool already exists,
# and will destroy the previous pool and recreate a new pool.

View File

@ -0,0 +1,38 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 20236 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
verify_runnable "global"
if datasetexists $PERFPOOL ; then
log_must destroy_pool $PERFPOOL
fi
if datasetexists ${PERFPOOL}_log ; then
log_must destroy_pool ${PERFPOOL}_log
fi
log_pass

View File

@ -12,7 +12,7 @@
#
#
# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
# Copyright (c) 2015, 2023 by Delphix. All rights reserved.
#
#
@ -45,55 +45,56 @@ function cleanup
# kill fio and iostat
pkill fio
pkill iostat
recreate_perf_pool
}
trap "log_fail \"Measure IO stats during random read load\"" SIGTERM
log_onexit cleanup
recreate_perf_pool
populate_perf_filesystems
for logtype in $PERF_LOG_TYPES; do
recreate_perf_pool $logtype
populate_perf_filesystems
# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'16 32'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES=${PERF_IOSIZES:-'8k'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'16 32'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES=${PERF_IOSIZES:-'8k'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
# Layout the files to be used by the read tests. Create as many files as the
# largest number of threads. An fio run with fewer threads will use a subset
# of the available files.
export NUMJOBS=$(get_max $PERF_NTHREADS)
export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS))
export DIRECTORY=$(get_directory)
log_must fio $FIO_SCRIPTS/mkfiles.fio
# Layout the files to be used by the read tests. Create as many files as the
# largest number of threads. An fio run with fewer threads will use a subset
# of the available files.
export NUMJOBS=$(get_max $PERF_NTHREADS)
export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS))
export DIRECTORY=$(get_directory)
log_must fio $FIO_SCRIPTS/mkfiles.fio
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
)
fi
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
)
fi
log_note "Random reads with settings: $(print_perf_settings)"
do_fio_run random_reads.fio false true
log_note "Random reads with settings: $(print_perf_settings)"
do_fio_run random_reads.fio false true $logtype
done
log_pass "Measure IO stats during random read load"

View File

@ -12,7 +12,7 @@
#
#
# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
# Copyright (c) 2015, 2022 by Delphix. All rights reserved.
#
#
@ -45,55 +45,56 @@ function cleanup
# kill fio and iostat
pkill fio
pkill iostat
recreate_perf_pool
}
trap "log_fail \"Measure IO stats during random read load\"" SIGTERM
log_onexit cleanup
recreate_perf_pool
populate_perf_filesystems
for logtype in $PERF_LOG_TYPES; do
recreate_perf_pool $logtype
populate_perf_filesystems
# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'32 64'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES='bssplit' # bssplit used instead of fixed sizes
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'32 64'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES='bssplit' # bssplit used instead of fixed sizes
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
# Layout the files to be used by the readwrite tests. Create as many files
# as the largest number of threads. An fio run with fewer threads will use
# a subset of the available files.
export NUMJOBS=$(get_max $PERF_NTHREADS)
export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS))
export DIRECTORY=$(get_directory)
log_must fio $FIO_SCRIPTS/mkfiles.fio
# Layout the files to be used by the readwrite tests. Create as many files
# as the largest number of threads. An fio run with fewer threads will use
# a subset of the available files.
export NUMJOBS=$(get_max $PERF_NTHREADS)
export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS))
export DIRECTORY=$(get_directory)
log_must fio $FIO_SCRIPTS/mkfiles.fio
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
)
fi
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
)
fi
log_note "Random reads and writes with settings: $(print_perf_settings)"
do_fio_run random_readwrite.fio false true
log_note "Random reads and writes with settings: $(print_perf_settings)"
do_fio_run random_readwrite.fio false true $logtype
done
log_pass "Measure IO stats during random read and write load"

View File

@ -1,4 +1,4 @@
#!/bin/ksh
tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh#!/bin/ksh
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
@ -11,7 +11,7 @@
#
#
# Copyright (c) 2017, 2021 by Delphix. All rights reserved.
# Copyright (c) 2017, 2023 by Delphix. All rights reserved.
#
#
@ -35,57 +35,58 @@ function cleanup
# kill fio and iostat
pkill fio
pkill iostat
recreate_perf_pool
}
trap "log_fail \"Measure IO stats during random read write load\"" SIGTERM
log_onexit cleanup
recreate_perf_pool
populate_perf_filesystems
for logtype in $PERF_LOG_TYPES; do
recreate_perf_pool $logtype
populate_perf_filesystems
# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES=${PERF_IOSIZES:-'8k'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'0 1'}
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES=${PERF_IOSIZES:-'8k'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'0 1'}
# Layout the files to be used by the readwrite tests. Create as many files
# as the largest number of threads. An fio run with fewer threads will use
# a subset of the available files.
export NUMJOBS=$(get_max $PERF_NTHREADS)
export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS))
export DIRECTORY=$(get_directory)
log_must fio $FIO_SCRIPTS/mkfiles.fio
# Layout the files to be used by the readwrite tests. Create as many files
# as the largest number of threads. An fio run with fewer threads will use
# a subset of the available files.
export NUMJOBS=$(get_max $PERF_NTHREADS)
export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS))
export DIRECTORY=$(get_directory)
log_must fio $FIO_SCRIPTS/mkfiles.fio
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"kstat zfs:0 1" "kstat"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
"dtrace -Cs $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"dtrace -s $PERF_SCRIPTS/profile.d" "profile"
)
fi
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"kstat zfs:0 1" "kstat"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
"dtrace -Cs $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"dtrace -s $PERF_SCRIPTS/profile.d" "profile"
)
fi
log_note "Random reads and writes with settings: $(print_perf_settings)"
do_fio_run random_readwrite_fixed.fio false true
log_note "Random reads and writes with settings: $(print_perf_settings)"
do_fio_run random_readwrite_fixed.fio false true $logtype
done
log_pass "Measure IO stats during random read and write load"

View File

@ -12,7 +12,7 @@
#
#
# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
# Copyright (c) 2015, 2023 by Delphix. All rights reserved.
#
#
@ -44,14 +44,12 @@ function cleanup
# kill fio and iostat
pkill fio
pkill iostat
recreate_perf_pool
}
trap "log_fail \"Measure IO stats during random read load\"" SIGTERM
log_onexit cleanup
recreate_perf_pool
populate_perf_filesystems
recreate_perf_pool none
# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
@ -86,5 +84,5 @@ else
fi
log_note "Random writes with settings: $(print_perf_settings)"
do_fio_run random_writes.fio true false
do_fio_run random_writes.fio true false "$PERF_LOG_TYPES"
log_pass "Measure IO stats during random write load"

View File

@ -12,7 +12,7 @@
#
#
# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
# Copyright (c) 2015, 2023 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
@ -26,21 +26,12 @@ function cleanup
pkill fio
pkill iostat
#
# We're using many filesystems depending on the number of
# threads for each test, and there's no good way to get a list
# of all the filesystems that should be destroyed on cleanup
# (i.e. the list of filesystems used for the last test ran).
# Thus, we simply recreate the pool as a way to destroy all
# filesystems and leave a fresh pool behind.
#
recreate_perf_pool
}
trap "log_fail \"Measure IO stats during random write load\"" SIGTERM
log_onexit cleanup
recreate_perf_pool
recreate_perf_pool none
# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
@ -82,5 +73,5 @@ else
fi
log_note \
"ZIL specific random write workload with settings: $(print_perf_settings)"
do_fio_run random_writes.fio true false
do_fio_run random_writes.fio true false "$PERF_LOG_TYPES"
log_pass "Measure IO stats during ZIL specific random write workload"

View File

@ -12,7 +12,7 @@
#
#
# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
# Copyright (c) 2015, 2023 by Delphix. All rights reserved.
#
#
@ -43,57 +43,59 @@ function cleanup
# kill fio and iostat
pkill fio
pkill iostat
recreate_perf_pool
}
trap "log_fail \"Measure IO stats during random read load\"" SIGTERM
log_onexit cleanup
recreate_perf_pool
populate_perf_filesystems
for logtype in $PERF_LOG_TYPES; do
recreate_perf_pool $logtype
populate_perf_filesystems
# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'8 16'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES=${PERF_IOSIZES:-'128k 1m'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'8 16'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES=${PERF_IOSIZES:-'128k 1m'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
# Layout the files to be used by the read tests. Create as many files as the
# largest number of threads. An fio run with fewer threads will use a subset
# of the available files.
export NUMJOBS=$(get_max $PERF_NTHREADS)
export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS))
export DIRECTORY=$(get_directory)
log_must fio $FIO_SCRIPTS/mkfiles.fio
# Layout the files to be used by the read tests. Create as many files as the
# largest number of threads. An fio run with fewer threads will use a subset
# of the available files.
export NUMJOBS=$(get_max $PERF_NTHREADS)
export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS))
export DIRECTORY=$(get_directory)
log_must fio $FIO_SCRIPTS/mkfiles.fio
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
)
fi
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
log_note "Sequential reads with settings: $(print_perf_settings)"
do_fio_run sequential_reads.fio false true
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
)
fi
log_note "Sequential reads with settings: $(print_perf_settings)"
do_fio_run sequential_reads.fio false true $logtype
done
log_pass "Measure IO stats during sequential read load"

View File

@ -12,7 +12,7 @@
#
#
# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
# Copyright (c) 2015, 2023 by Delphix. All rights reserved.
#
#
@ -35,57 +35,58 @@ function cleanup
# kill fio and iostat
pkill fio
pkill iostat
recreate_perf_pool
}
trap "log_fail \"Measure IO stats during random read load\"" SIGTERM
log_onexit cleanup
recreate_perf_pool
populate_perf_filesystems
for logtype in $PERF_LOG_TYPES; do
recreate_perf_pool $logtype
populate_perf_filesystems
# Make sure the working set can be cached in the arc. Aim for 1/2 of arc.
export TOTAL_SIZE=$(($(get_max_arc_size) / 2))
# Make sure the working set can be cached in the arc. Aim for 1/2 of arc.
export TOTAL_SIZE=$(($(get_max_arc_size) / 2))
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES=${PERF_IOSIZES:-'128k'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES=${PERF_IOSIZES:-'128k'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
# Layout the files to be used by the read tests. Create as many files as the
# largest number of threads. An fio run with fewer threads will use a subset
# of the available files.
export NUMJOBS=$(get_max $PERF_NTHREADS)
export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS))
export DIRECTORY=$(get_directory)
log_must fio $FIO_SCRIPTS/mkfiles.fio
# Layout the files to be used by the read tests. Create as many files as the
# largest number of threads. An fio run with fewer threads will use a subset
# of the available files.
export NUMJOBS=$(get_max $PERF_NTHREADS)
export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS))
export DIRECTORY=$(get_directory)
log_must fio $FIO_SCRIPTS/mkfiles.fio
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
)
fi
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
)
fi
log_note "Sequential cached reads with settings: $(print_perf_settings)"
do_fio_run sequential_reads.fio false false
log_note "Sequential cached reads with settings: $(print_perf_settings)"
do_fio_run sequential_reads.fio false false $logtype
done
log_pass "Measure IO stats during sequential cached read load"

View File

@ -12,7 +12,7 @@
#
#
# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
# Copyright (c) 2015, 2023 by Delphix. All rights reserved.
#
#
@ -41,78 +41,79 @@ function cleanup
# kill fio and iostat
pkill fio
pkill iostat
recreate_perf_pool
}
trap "log_fail \"Measure IO stats during random read load\"" SIGTERM
log_onexit cleanup
recreate_perf_pool
populate_perf_filesystems
for logtype in $PERF_LOG_TYPES; do
recreate_perf_pool $logtype
populate_perf_filesystems
# Make sure the working set can be cached in the arc. Aim for 1/2 of arc.
export TOTAL_SIZE=$(($(get_max_arc_size) / 2))
# Make sure the working set can be cached in the arc. Aim for 1/2 of arc.
export TOTAL_SIZE=$(($(get_max_arc_size) / 2))
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES=${PERF_IOSIZES:-'128k'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES=${PERF_IOSIZES:-'128k'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
# Layout the files to be used by the read tests. Create as many files as the
# largest number of threads. An fio run with fewer threads will use a subset
# of the available files.
export NUMJOBS=$(get_max $PERF_NTHREADS)
export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS))
export DIRECTORY=$(get_directory)
log_must fio $FIO_SCRIPTS/mkfiles.fio
# Layout the files to be used by the read tests. Create as many files as the
# largest number of threads. An fio run with fewer threads will use a subset
# of the available files.
export NUMJOBS=$(get_max $PERF_NTHREADS)
export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS))
export DIRECTORY=$(get_directory)
log_must fio $FIO_SCRIPTS/mkfiles.fio
#
# Only a single filesystem is used by this test. To be defensive, we
# double check that TESTFS only contains a single filesystem. We
# wouldn't want to assume this was the case, and have it actually
# contain multiple filesystem (causing cascading failures later).
#
log_must test $(get_nfilesystems) -eq 1
#
# Only a single filesystem is used by this test. To be defensive, we
# double check that TESTFS only contains a single filesystem. We
# wouldn't want to assume this was the case, and have it actually
# contain multiple filesystem (causing cascading failures later).
#
log_must test $(get_nfilesystems) -eq 1
log_note "Creating snapshot, $TESTSNAP, of $TESTFS"
create_snapshot $TESTFS $TESTSNAP
log_note "Creating clone, $PERFPOOL/$TESTCLONE, from $TESTFS@$TESTSNAP"
create_clone $TESTFS@$TESTSNAP $PERFPOOL/$TESTCLONE
log_note "Creating snapshot, $TESTSNAP, of $TESTFS"
create_snapshot $TESTFS $TESTSNAP
log_note "Creating clone, $PERFPOOL/$TESTCLONE, from $TESTFS@$TESTSNAP"
create_clone $TESTFS@$TESTSNAP $PERFPOOL/$TESTCLONE
#
# We want to run FIO against the clone we created above, and not the
# clone's originating filesystem. Thus, we override the default behavior
# and explicitly set TESTFS to the clone.
#
export TESTFS=$PERFPOOL/$TESTCLONE
#
# We want to run FIO against the clone we created above, and not the
# clone's originating filesystem. Thus, we override the default behavior
# and explicitly set TESTFS to the clone.
#
export TESTFS=$PERFPOOL/$TESTCLONE
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
)
fi
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
)
fi
log_note "Sequential cached reads from $DIRECTORY with " \
"ettings: $(print_perf_settings)"
do_fio_run sequential_reads.fio false false
log_note "Sequential cached reads from $DIRECTORY with " \
"settings: $(print_perf_settings)"
do_fio_run sequential_reads.fio false false $logtype
done
log_pass "Measure IO stats during sequential cached read load"

View File

@ -12,7 +12,7 @@
#
#
# Copyright (c) 2016, 2021 by Delphix. All rights reserved.
# Copyright (c) 2016, 2023 by Delphix. All rights reserved.
#
#
@ -39,59 +39,60 @@ function cleanup
# kill fio and iostat
pkill fio
pkill iostat
recreate_perf_pool
}
trap "log_fail \"Measure IO stats during sequential read load\"" SIGTERM
log_onexit cleanup
recreate_perf_pool
populate_perf_filesystems
for logtype in $PERF_LOG_TYPES; do
recreate_perf_pool $logtype
populate_perf_filesystems
# Ensure the working set can be cached in the dbuf cache.
export TOTAL_SIZE=$(($(get_dbuf_cache_size) * 3 / 4))
# Ensure the working set can be cached in the dbuf cache.
export TOTAL_SIZE=$(($(get_dbuf_cache_size) * 3 / 4))
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'64'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES=${PERF_IOSIZES:-'64k'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'64'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES=${PERF_IOSIZES:-'64k'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
# Layout the files to be used by the read tests. Create as many files as the
# largest number of threads. An fio run with fewer threads will use a subset
# of the available files.
export NUMJOBS=$(get_max $PERF_NTHREADS)
export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS))
export DIRECTORY=$(get_directory)
log_must fio $FIO_SCRIPTS/mkfiles.fio
# Layout the files to be used by the read tests. Create as many files as the
# largest number of threads. An fio run with fewer threads will use a subset
# of the available files.
export NUMJOBS=$(get_max $PERF_NTHREADS)
export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS))
export DIRECTORY=$(get_directory)
log_must fio $FIO_SCRIPTS/mkfiles.fio
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"kstat zfs:0 1" "kstat"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
"dtrace -Cs $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"dtrace -Cs $PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch"
"dtrace -s $PERF_SCRIPTS/profile.d" "profile"
)
fi
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"kstat zfs:0 1" "kstat"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
"dtrace -Cs $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"dtrace -Cs $PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch"
"dtrace -s $PERF_SCRIPTS/profile.d" "profile"
)
fi
log_note "Sequential cached reads with settings: $(print_perf_settings)"
do_fio_run sequential_reads.fio false false
log_note "Sequential cached reads with settings: $(print_perf_settings)"
do_fio_run sequential_reads.fio false false $logtype
done
log_pass "Measure IO stats during sequential cached read load"

View File

@ -12,7 +12,7 @@
#
#
# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
# Copyright (c) 2015, 2023 by Delphix. All rights reserved.
#
#
@ -44,47 +44,48 @@ function cleanup
# kill fio and iostat
pkill fio
pkill iostat
recreate_perf_pool
}
trap "log_fail \"Measure IO stats during random read load\"" SIGTERM
log_onexit cleanup
recreate_perf_pool
populate_perf_filesystems
for logtype in $PERF_LOG_TYPES; do
recreate_perf_pool $logtype
populate_perf_filesystems
# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'16 32'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES=${PERF_IOSIZES:-'8k 1m'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'0 1'}
# Variables specific to this test for use by fio.
export PERF_NTHREADS=${PERF_NTHREADS:-'16 32'}
export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
export PERF_IOSIZES=${PERF_IOSIZES:-'8k 1m'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'0 1'}
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
# Set up the scripts and output files that will log performance data.
lun_list=$(pool_to_lun_list $PERFPOOL)
log_note "Collecting backend IO stats with lun list $lun_list"
if is_linux; then
typeset perf_record_cmd="perf record -F 99 -a -g -q \
-o /dev/stdout -- sleep ${PERF_RUNTIME}"
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
)
fi
export collect_scripts=(
"zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat"
"vmstat -t 1" "vmstat"
"mpstat -P ALL 1" "mpstat"
"iostat -tdxyz 1" "iostat"
"$perf_record_cmd" "perf"
)
else
export collect_scripts=(
"$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io"
"vmstat -T d 1" "vmstat"
"mpstat -T d 1" "mpstat"
"iostat -T d -xcnz 1" "iostat"
)
fi
log_note "Sequential writes with settings: $(print_perf_settings)"
do_fio_run sequential_writes.fio true false
log_note "Sequential writes with settings: $(print_perf_settings)"
do_fio_run sequential_writes.fio true false $logtype
done
log_pass "Measure IO stats during sequential write load"