Merge branch 'master' into NAS-130821-2

Signed-off-by: Umer Saleem <usaleem@ixsystems.com>
This commit is contained in:
Umer Saleem 2024-09-10 10:26:37 +05:00
commit 80110e704d
37 changed files with 1419 additions and 160 deletions

View File

@ -2045,7 +2045,7 @@ dump_all_ddts(spa_t *spa)
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
if (!ddt)
if (!ddt || ddt->ddt_version == DDT_VERSION_UNCONFIGURED)
continue;
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
for (ddt_class_t class = 0; class < DDT_CLASSES;
@ -2072,6 +2072,32 @@ dump_all_ddts(spa_t *spa)
}
dump_dedup_ratio(&dds_total);
/*
* Dump a histogram of unique class entry age
*/
if (dump_opt['D'] == 3 && getenv("ZDB_DDT_UNIQUE_AGE_HIST") != NULL) {
ddt_age_histo_t histogram;
(void) printf("DDT walk unique, building age histogram...\n");
ddt_prune_walk(spa, 0, &histogram);
/*
* print out histogram for unique entry class birth
*/
if (histogram.dah_entries > 0) {
(void) printf("%5s %9s %4s\n",
"age", "blocks", "amnt");
(void) printf("%5s %9s %4s\n",
"-----", "---------", "----");
for (int i = 0; i < HIST_BINS; i++) {
(void) printf("%5d %9d %4d%%\n", 1 << i,
(int)histogram.dah_age_histo[i],
(int)((histogram.dah_age_histo[i] * 100) /
histogram.dah_entries));
}
}
}
}
static void
@ -5749,12 +5775,17 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
ddt_entry_t *dde = ddt_lookup(ddt, bp);
/*
* ddt_lookup() can only return NULL if this block didn't exist
* ddt_lookup() can return NULL if this block didn't exist
* in the DDT and creating it would take the DDT over its
* quota. Since we got the block from disk, it must exist in
* the DDT, so this can't happen.
* the DDT, so this can't happen. However, when unique entries
* are pruned, the dedup bit can be set with no corresponding
* entry in the DDT.
*/
VERIFY3P(dde, !=, NULL);
if (dde == NULL) {
ddt_exit(ddt);
goto skipped;
}
/* Get the phys for this variant */
ddt_phys_variant_t v = ddt_phys_select(ddt, dde, bp);
@ -5774,8 +5805,8 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
(void *)(((uintptr_t)dde->dde_io) | (1 << v));
/* Consume a reference for this block. */
VERIFY3U(ddt_phys_total_refcnt(ddt, dde->dde_phys), >, 0);
ddt_phys_decref(dde->dde_phys, v);
if (ddt_phys_total_refcnt(ddt, dde->dde_phys) > 0)
ddt_phys_decref(dde->dde_phys, v);
/*
* If this entry has a single flat phys, it may have been
@ -5864,6 +5895,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
}
}
skipped:
for (i = 0; i < 4; i++) {
int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
int t = (i & 1) ? type : ZDB_OT_TOTAL;
@ -8138,7 +8170,7 @@ dump_mos_leaks(spa_t *spa)
for (uint64_t c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
if (!ddt)
if (!ddt || ddt->ddt_version == DDT_VERSION_UNCONFIGURED)
continue;
/* DDT store objects */
@ -8150,11 +8182,14 @@ dump_mos_leaks(spa_t *spa)
}
/* FDT container */
mos_obj_refd(ddt->ddt_dir_object);
if (ddt->ddt_version == DDT_VERSION_FDT)
mos_obj_refd(ddt->ddt_dir_object);
/* FDT log objects */
mos_obj_refd(ddt->ddt_log[0].ddl_object);
mos_obj_refd(ddt->ddt_log[1].ddl_object);
if (ddt->ddt_flags & DDT_FLAG_LOG) {
mos_obj_refd(ddt->ddt_log[0].ddl_object);
mos_obj_refd(ddt->ddt_log[1].ddl_object);
}
}
if (spa->spa_brt != NULL) {

View File

@ -75,6 +75,7 @@
#include "zpool_util.h"
#include "zfs_comutil.h"
#include "zfeature_common.h"
#include "zfs_valstr.h"
#include "statcommon.h"
@ -130,6 +131,8 @@ static int zpool_do_version(int, char **);
static int zpool_do_wait(int, char **);
static int zpool_do_ddt_prune(int, char **);
static int zpool_do_help(int argc, char **argv);
static zpool_compat_status_t zpool_do_load_compat(
@ -170,6 +173,7 @@ typedef enum {
HELP_CLEAR,
HELP_CREATE,
HELP_CHECKPOINT,
HELP_DDT_PRUNE,
HELP_DESTROY,
HELP_DETACH,
HELP_EXPORT,
@ -426,6 +430,8 @@ static zpool_command_t command_table[] = {
{ "sync", zpool_do_sync, HELP_SYNC },
{ NULL },
{ "wait", zpool_do_wait, HELP_WAIT },
{ NULL },
{ "ddtprune", zpool_do_ddt_prune, HELP_DDT_PRUNE },
};
#define NCOMMAND (ARRAY_SIZE(command_table))
@ -545,6 +551,8 @@ get_usage(zpool_help_t idx)
case HELP_WAIT:
return (gettext("\twait [-Hp] [-T d|u] [-t <activity>[,...]] "
"<pool> [interval]\n"));
case HELP_DDT_PRUNE:
return (gettext("\tddtprune -d|-p <amount> <pool>\n"));
default:
__builtin_unreachable();
}
@ -11929,6 +11937,7 @@ static void
zpool_do_events_nvprint(nvlist_t *nvl, int depth)
{
nvpair_t *nvp;
static char flagstr[256];
for (nvp = nvlist_next_nvpair(nvl, NULL);
nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
@ -11988,7 +11997,21 @@ zpool_do_events_nvprint(nvlist_t *nvl, int depth)
case DATA_TYPE_UINT32:
(void) nvpair_value_uint32(nvp, &i32);
printf(gettext("0x%x"), i32);
if (strcmp(name,
FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE) == 0 ||
strcmp(name,
FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE) == 0) {
zfs_valstr_zio_stage(i32, flagstr,
sizeof (flagstr));
printf(gettext("0x%x [%s]"), i32, flagstr);
} else if (strcmp(name,
FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY) == 0) {
zfs_valstr_zio_priority(i32, flagstr,
sizeof (flagstr));
printf(gettext("0x%x [%s]"), i32, flagstr);
} else {
printf(gettext("0x%x"), i32);
}
break;
case DATA_TYPE_INT64:
@ -12009,6 +12032,12 @@ zpool_do_events_nvprint(nvlist_t *nvl, int depth)
printf(gettext("\"%s\" (0x%llx)"),
zpool_state_to_name(i64, VDEV_AUX_NONE),
(u_longlong_t)i64);
} else if (strcmp(name,
FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS) == 0) {
zfs_valstr_zio_flag(i64, flagstr,
sizeof (flagstr));
printf(gettext("0x%llx [%s]"),
(u_longlong_t)i64, flagstr);
} else {
printf(gettext("0x%llx"), (u_longlong_t)i64);
}
@ -13342,6 +13371,88 @@ found:;
return (error);
}
/*
* zpool ddtprune -d|-p <amount> <pool>
*
* -d <days> Prune entries <days> old and older
* -p <percent> Prune <percent> amount of entries
*
* Prune single reference entries from DDT to satisfy the amount specified.
*/
int
zpool_do_ddt_prune(int argc, char **argv)
{
zpool_ddt_prune_unit_t unit = ZPOOL_DDT_PRUNE_NONE;
uint64_t amount = 0;
zpool_handle_t *zhp;
char *endptr;
int c;
while ((c = getopt(argc, argv, "d:p:")) != -1) {
switch (c) {
case 'd':
if (unit == ZPOOL_DDT_PRUNE_PERCENTAGE) {
(void) fprintf(stderr, gettext("-d cannot be "
"combined with -p option\n"));
usage(B_FALSE);
}
errno = 0;
amount = strtoull(optarg, &endptr, 0);
if (errno != 0 || *endptr != '\0' || amount == 0) {
(void) fprintf(stderr,
gettext("invalid days value\n"));
usage(B_FALSE);
}
amount *= 86400; /* convert days to seconds */
unit = ZPOOL_DDT_PRUNE_AGE;
break;
case 'p':
if (unit == ZPOOL_DDT_PRUNE_AGE) {
(void) fprintf(stderr, gettext("-p cannot be "
"combined with -d option\n"));
usage(B_FALSE);
}
errno = 0;
amount = strtoull(optarg, &endptr, 0);
if (errno != 0 || *endptr != '\0' ||
amount == 0 || amount > 100) {
(void) fprintf(stderr,
gettext("invalid percentage value\n"));
usage(B_FALSE);
}
unit = ZPOOL_DDT_PRUNE_PERCENTAGE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}
argc -= optind;
argv += optind;
if (unit == ZPOOL_DDT_PRUNE_NONE) {
(void) fprintf(stderr,
gettext("missing amount option (-d|-p <value>)\n"));
usage(B_FALSE);
} else if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool argument\n"));
usage(B_FALSE);
} else if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}
zhp = zpool_open(g_zfs, argv[0]);
if (zhp == NULL)
return (-1);
int error = zpool_ddt_prune(zhp, unit, amount);
zpool_close(zhp);
return (error);
}
static int
find_command_idx(const char *command, int *idx)
{

View File

@ -276,6 +276,8 @@ extern unsigned long zio_decompress_fail_fraction;
extern unsigned long zfs_reconstruct_indirect_damage_fraction;
extern uint64_t raidz_expand_max_reflow_bytes;
extern uint_t raidz_expand_pause_point;
extern boolean_t ddt_prune_artificial_age;
extern boolean_t ddt_dump_prune_histogram;
static ztest_shared_opts_t *ztest_shared_opts;
@ -446,6 +448,7 @@ ztest_func_t ztest_fletcher;
ztest_func_t ztest_fletcher_incr;
ztest_func_t ztest_verify_dnode_bt;
ztest_func_t ztest_pool_prefetch_ddt;
ztest_func_t ztest_ddt_prune;
static uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */
static uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */
@ -502,6 +505,7 @@ static ztest_info_t ztest_info[] = {
ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
ZTI_INIT(ztest_pool_prefetch_ddt, 1, &zopt_rarely),
ZTI_INIT(ztest_ddt_prune, 1, &zopt_rarely),
};
#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
@ -6211,13 +6215,14 @@ void
ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
{
(void) zd, (void) id;
nvlist_t *props = NULL;
(void) pthread_rwlock_rdlock(&ztest_name_lock);
(void) ztest_spa_prop_set_uint64(ZPOOL_PROP_AUTOTRIM, ztest_random(2));
VERIFY0(spa_prop_get(ztest_spa, &props));
nvlist_t *props = fnvlist_alloc();
VERIFY0(spa_prop_get(ztest_spa, props));
if (ztest_opts.zo_verbose >= 6)
dump_nvlist(props, 4);
@ -7288,6 +7293,17 @@ ztest_trim(ztest_ds_t *zd, uint64_t id)
mutex_exit(&ztest_vdev_lock);
}
void
ztest_ddt_prune(ztest_ds_t *zd, uint64_t id)
{
(void) zd, (void) id;
spa_t *spa = ztest_spa;
uint64_t pct = ztest_random(15) + 1;
(void) ddt_prune_unique_entries(spa, ZPOOL_DDT_PRUNE_PERCENTAGE, pct);
}
/*
* Verify pool integrity by running zdb.
*/
@ -7469,6 +7485,13 @@ ztest_resume_thread(void *arg)
{
spa_t *spa = arg;
/*
* Synthesize aged DDT entries for ddt prune testing
*/
ddt_prune_artificial_age = B_TRUE;
if (ztest_opts.zo_verbose >= 3)
ddt_dump_prune_histogram = B_TRUE;
while (!ztest_exiting) {
if (spa_suspended(spa))
ztest_resume(spa);
@ -8587,6 +8610,12 @@ ztest_init(ztest_shared_t *zs)
if (i == SPA_FEATURE_LOG_SPACEMAP && ztest_random(4) == 0)
continue;
/*
* split 50/50 between legacy and fast dedup
*/
if (i == SPA_FEATURE_FAST_DEDUP && ztest_random(2) != 0)
continue;
VERIFY3S(-1, !=, asprintf(&buf, "feature@%s",
spa_feature_table[i].fi_uname));
fnvlist_add_uint64(props, buf, 0);

View File

@ -100,6 +100,7 @@ usr/share/man/man8/zpool-clear.8
usr/share/man/man8/zpool-create.8
usr/share/man/man8/zpool-destroy.8
usr/share/man/man8/zpool-detach.8
usr/share/man/man8/zpool-ddtprune.8
usr/share/man/man8/zpool-events.8
usr/share/man/man8/zpool-export.8
usr/share/man/man8/zpool-get.8

View File

@ -14,6 +14,7 @@ COMMON_H = \
zfs_fletcher.h \
zfs_namecheck.h \
zfs_prop.h \
zfs_valstr.h \
\
sys/abd.h \
sys/abd_impl.h \

View File

@ -305,6 +305,9 @@ _LIBZFS_H int zpool_reopen_one(zpool_handle_t *, void *);
_LIBZFS_H int zpool_sync_one(zpool_handle_t *, void *);
_LIBZFS_H int zpool_ddt_prune(zpool_handle_t *, zpool_ddt_prune_unit_t,
uint64_t);
_LIBZFS_H int zpool_vdev_online(zpool_handle_t *, const char *, int,
vdev_state_t *);
_LIBZFS_H int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);

View File

@ -161,6 +161,9 @@ _LIBZFS_CORE_H int lzc_set_vdev_prop(const char *, nvlist_t *, nvlist_t **);
_LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);
_LIBZFS_CORE_H int lzc_ddt_prune(const char *, zpool_ddt_prune_unit_t,
uint64_t);
#ifdef __cplusplus
}
#endif

View File

@ -405,6 +405,9 @@ extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb,
extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp);
extern int ddt_prune_unique_entries(spa_t *spa, zpool_ddt_prune_unit_t unit,
uint64_t amount);
#ifdef __cplusplus
}
#endif

View File

@ -35,8 +35,11 @@ extern "C" {
#endif
/* DDT version numbers */
#define DDT_VERSION_LEGACY (0)
#define DDT_VERSION_FDT (1)
#define DDT_VERSION_LEGACY (0)
#define DDT_VERSION_FDT (1)
/* Dummy version to signal that configure is still necessary */
#define DDT_VERSION_UNCONFIGURED (UINT64_MAX)
/* Names of interesting objects in the DDT root dir */
#define DDT_DIR_VERSION "version"
@ -187,8 +190,11 @@ extern void ddt_log_commit(ddt_t *ddt, ddt_log_update_t *dlu);
extern boolean_t ddt_log_take_first(ddt_t *ddt, ddt_log_t *ddl,
ddt_lightweight_entry_t *ddlwe);
extern boolean_t ddt_log_take_key(ddt_t *ddt, ddt_log_t *ddl,
const ddt_key_t *ddk, ddt_lightweight_entry_t *ddlwe);
extern boolean_t ddt_log_find_key(ddt_t *ddt, const ddt_key_t *ddk,
ddt_lightweight_entry_t *ddlwe);
extern boolean_t ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl,
const ddt_key_t *ddk);
extern void ddt_log_checkpoint(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe,
dmu_tx_t *tx);
@ -211,6 +217,44 @@ extern void ddt_log_fini(void);
* them up.
*/
/*
* We use a histogram to convert a percentage request into a
* cutoff value where entries older than the cutoff get pruned.
*
* The histogram bins represent hours in power-of-two increments.
* 16 bins covers up to four years.
*/
#define HIST_BINS 16
typedef struct ddt_age_histo {
uint64_t dah_entries;
uint64_t dah_age_histo[HIST_BINS];
} ddt_age_histo_t;
void ddt_prune_walk(spa_t *spa, uint64_t cutoff, ddt_age_histo_t *histogram);
#if defined(_KERNEL) || !defined(ZFS_DEBUG)
#define ddt_dump_age_histogram(histo, cutoff) ((void)0)
#else
static inline void
ddt_dump_age_histogram(ddt_age_histo_t *histogram, uint64_t cutoff)
{
if (histogram->dah_entries == 0)
return;
(void) printf("DDT prune unique class age, %llu hour cutoff\n",
(u_longlong_t)(gethrestime_sec() - cutoff)/3600);
(void) printf("%5s %9s %4s\n", "age", "blocks", "amnt");
(void) printf("%5s %9s %4s\n", "-----", "---------", "----");
for (int i = 0; i < HIST_BINS; i++) {
(void) printf("%5d %9llu %4d%%\n", 1<<i,
(u_longlong_t)histogram->dah_age_histo[i],
(int)((histogram->dah_age_histo[i] * 100) /
histogram->dah_entries));
}
}
#endif
/*
* Enough room to expand DMU_POOL_DDT format for all possible DDT
* checksum/class/type combinations.

View File

@ -1422,7 +1422,7 @@ typedef enum {
*/
typedef enum zfs_ioc {
/*
* Core features - 88/128 numbers reserved.
* Core features - 89/128 numbers reserved.
*/
#ifdef __FreeBSD__
ZFS_IOC_FIRST = 0,
@ -1519,6 +1519,7 @@ typedef enum zfs_ioc {
ZFS_IOC_VDEV_SET_PROPS, /* 0x5a56 */
ZFS_IOC_POOL_SCRUB, /* 0x5a57 */
ZFS_IOC_POOL_PREFETCH, /* 0x5a58 */
ZFS_IOC_DDT_PRUNE, /* 0x5a59 */
/*
* Per-platform (Optional) - 8/128 numbers reserved.
@ -1655,6 +1656,12 @@ typedef enum {
ZPOOL_PREFETCH_DDT
} zpool_prefetch_type_t;
typedef enum {
ZPOOL_DDT_PRUNE_NONE,
ZPOOL_DDT_PRUNE_AGE, /* in seconds */
ZPOOL_DDT_PRUNE_PERCENTAGE, /* 1 - 100 */
} zpool_ddt_prune_unit_t;
/*
* Bookmark name values.
*/
@ -1753,6 +1760,12 @@ typedef enum {
*/
#define ZPOOL_PREFETCH_TYPE "prefetch_type"
/*
* The following are names used when invoking ZFS_IOC_DDT_PRUNE.
*/
#define DDT_PRUNE_UNIT "ddt_prune_unit"
#define DDT_PRUNE_AMOUNT "ddt_prune_amount"
/*
* Flags for ZFS_IOC_VDEV_SET_STATE
*/

View File

@ -1201,9 +1201,9 @@ extern void spa_boot_init(void);
/* properties */
extern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
extern int spa_prop_get(spa_t *spa, nvlist_t **nvp);
extern int spa_prop_get(spa_t *spa, nvlist_t *nvp);
extern int spa_prop_get_nvlist(spa_t *spa, char **props,
unsigned int n_props, nvlist_t **outnvl);
unsigned int n_props, nvlist_t *outnvl);
extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
extern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t);

View File

@ -412,6 +412,7 @@ struct spa {
uint64_t spa_dedup_dspace; /* Cache get_dedup_dspace() */
uint64_t spa_dedup_checksum; /* default dedup checksum */
uint64_t spa_dspace; /* dspace in normal class */
boolean_t spa_active_ddt_prune; /* ddt prune process active */
struct brt *spa_brt; /* in-core BRT */
kmutex_t spa_vdev_top_lock; /* dueling offline/remove */
kmutex_t spa_proc_lock; /* protects spa_proc* */

View File

@ -167,6 +167,9 @@ typedef enum zio_suspend_reason {
* This was originally an enum type. However, those are 32-bit and there is no
* way to make a 64-bit enum type. Since we ran out of bits for flags, we were
* forced to upgrade it to a uint64_t.
*
* NOTE: PLEASE UPDATE THE BITFIELD STRINGS IN zfs_valstr.c IF YOU ADD ANOTHER
* FLAG.
*/
typedef uint64_t zio_flag_t;
/*

View File

@ -120,6 +120,9 @@ extern "C" {
/*
* zio pipeline stage definitions
*
* NOTE: PLEASE UPDATE THE BITFIELD STRINGS IN zfs_valstr.c IF YOU ADD ANOTHER
* FLAG.
*/
enum zio_stage {
ZIO_STAGE_OPEN = 1 << 0, /* RWFCXT */

View File

@ -22,6 +22,10 @@
extern "C" {
#endif
/*
* NOTE: PLEASE UPDATE THE ENUM STRINGS IN zfs_valstr.c IF YOU ADD ANOTHER
* VALUE.
*/
typedef enum zio_priority {
ZIO_PRIORITY_SYNC_READ,
ZIO_PRIORITY_SYNC_WRITE, /* ZIL */

84
include/zfs_valstr.h Normal file
View File

@ -0,0 +1,84 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2024, Klara Inc.
*/
#ifndef _ZFS_VALSTR_H
#define _ZFS_VALSTR_H extern __attribute__((visibility("default")))
#include <sys/fs/zfs.h>
#include <sys/types.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* These macros create function prototypes for pretty-printing or stringifying
* certain kinds of numeric types.
*
* _ZFS_VALSTR_DECLARE_BITFIELD(name) creates:
*
* size_t zfs_valstr_<name>_bits(uint64_t bits, char *out, size_t outlen);
* expands single char for each set bit, and space for each clear bit
*
* size_t zfs_valstr_<name>_pairs(uint64_t bits, char *out, size_t outlen);
* expands two-char mnemonic for each bit set in `bits`, separated by `|`
*
* size_t zfs_valstr_<name>(uint64_t bits, char *out, size_t outlen);
* expands full name of each bit set in `bits`, separated by spaces
*
* _ZFS_VALSTR_DECLARE_ENUM(name) creates:
*
* size_t zfs_valstr_<name>(int v, char *out, size_t outlen);
* expands full name of enum value
*
* Each _ZFS_VALSTR_DECLARE_xxx needs a corresponding _VALSTR_xxx_IMPL string
* table in vfs_valstr.c.
*/
#define _ZFS_VALSTR_DECLARE_BITFIELD(name) \
_ZFS_VALSTR_H size_t zfs_valstr_ ## name ## _bits( \
uint64_t bits, char *out, size_t outlen); \
_ZFS_VALSTR_H size_t zfs_valstr_ ## name ## _pairs( \
uint64_t bits, char *out, size_t outlen); \
_ZFS_VALSTR_H size_t zfs_valstr_ ## name( \
uint64_t bits, char *out, size_t outlen); \
#define _ZFS_VALSTR_DECLARE_ENUM(name) \
_ZFS_VALSTR_H size_t zfs_valstr_ ## name( \
int v, char *out, size_t outlen); \
_ZFS_VALSTR_DECLARE_BITFIELD(zio_flag)
_ZFS_VALSTR_DECLARE_BITFIELD(zio_stage)
_ZFS_VALSTR_DECLARE_ENUM(zio_priority)
#undef _ZFS_VALSTR_DECLARE_BITFIELD
#undef _ZFS_VALSTR_DECLARE_ENUM
#ifdef __cplusplus
}
#endif
#endif /* _ZFS_VALSTR_H */

View File

@ -65,11 +65,6 @@ typedef int32_t ssize32_t;
typedef int32_t time32_t;
typedef int32_t clock32_t;
struct timeval32 {
time32_t tv_sec; /* seconds */
int32_t tv_usec; /* and microseconds */
};
typedef struct timespec32 {
time32_t tv_sec; /* seconds */
int32_t tv_nsec; /* and nanoseconds */

View File

@ -47,6 +47,7 @@ nodist_libzfs_la_SOURCES = \
module/zcommon/zfs_fletcher_superscalar4.c \
module/zcommon/zfs_namecheck.c \
module/zcommon/zfs_prop.c \
module/zcommon/zfs_valstr.c \
module/zcommon/zpool_prop.c \
module/zcommon/zprop_common.c

View File

@ -183,8 +183,8 @@
<elf-symbol name='fsleep' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='get_dataset_depth' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='get_system_hostid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='getexecname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='get_timestamp' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='getexecname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='getextmntent' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='getmntany' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='getprop_uint64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -454,6 +454,13 @@
<elf-symbol name='zfs_userns' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_userspace' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_valid_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_valstr_zio_flag' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_valstr_zio_flag_bits' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_valstr_zio_flag_pairs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_valstr_zio_priority' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_valstr_zio_stage' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_valstr_zio_stage_bits' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_valstr_zio_stage_pairs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_version_kernel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_version_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_version_print' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -466,7 +473,9 @@
<elf-symbol name='zpool_clear' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_clear_label' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_close' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_collect_unsup_feat' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_ddt_prune' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_default_search_paths' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_disable_datasets' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -485,8 +494,8 @@
<elf-symbol name='zpool_export_force' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_feature_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_find_config' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_find_vdev' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_find_parent_vdev' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_find_vdev' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_find_vdev_by_physpath' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_free_handles' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_get_all_vdev_props' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -529,7 +538,6 @@
<elf-symbol name='zpool_prefetch' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_prepare_and_label_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_prepare_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_collect_unsup_feat' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_prop_align_right' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_prop_column_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_prop_default_numeric' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -5929,6 +5937,7 @@
<enumerator name='ZFS_IOC_VDEV_SET_PROPS' value='23126'/>
<enumerator name='ZFS_IOC_POOL_SCRUB' value='23127'/>
<enumerator name='ZFS_IOC_POOL_PREFETCH' value='23128'/>
<enumerator name='ZFS_IOC_DDT_PRUNE' value='23129'/>
<enumerator name='ZFS_IOC_PLATFORM' value='23168'/>
<enumerator name='ZFS_IOC_EVENTS_NEXT' value='23169'/>
<enumerator name='ZFS_IOC_EVENTS_CLEAR' value='23170'/>
@ -5963,6 +5972,13 @@
<enumerator name='ZPOOL_PREFETCH_DDT' value='1'/>
</enum-decl>
<typedef-decl name='zpool_prefetch_type_t' type-id='0299ab50' id='e55ff6bc'/>
<enum-decl name='zpool_ddt_prune_unit_t' naming-typedef-id='02e25ab0' id='509ae11c'>
<underlying-type type-id='9cac1fee'/>
<enumerator name='ZPOOL_DDT_PRUNE_NONE' value='0'/>
<enumerator name='ZPOOL_DDT_PRUNE_AGE' value='1'/>
<enumerator name='ZPOOL_DDT_PRUNE_PERCENTAGE' value='2'/>
</enum-decl>
<typedef-decl name='zpool_ddt_prune_unit_t' type-id='509ae11c' id='02e25ab0'/>
<enum-decl name='spa_feature' id='33ecb627'>
<underlying-type type-id='9cac1fee'/>
<enumerator name='SPA_FEATURE_NONE' value='-1'/>
@ -6139,6 +6155,12 @@
<parameter type-id='857bb57e'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='lzc_ddt_prune' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='80f4b756'/>
<parameter type-id='02e25ab0'/>
<parameter type-id='9c313c2d'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zfs_resolve_shortname' mangled-name='zfs_resolve_shortname' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_resolve_shortname'>
<parameter type-id='80f4b756'/>
<parameter type-id='26a90f95'/>
@ -6798,6 +6820,12 @@
<parameter type-id='80f4b756' name='propval'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zpool_ddt_prune' mangled-name='zpool_ddt_prune' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_ddt_prune'>
<parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='02e25ab0' name='unit'/>
<parameter type-id='9c313c2d' name='amount'/>
<return type-id='95e97e5e'/>
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='lib/libzfs/libzfs_sendrecv.c' language='LANG_C99'>
<array-type-def dimensions='1' type-id='8901473c' size-in-bits='576' id='f5da478b'>
@ -7837,7 +7865,7 @@
</data-member>
</class-decl>
<typedef-decl name='vdev_cbdata_t' type-id='b8006be8' id='a9679c94'/>
<class-decl name='zprop_get_cbdata' size-in-bits='832' is-struct='yes' visibility='default' id='f3d3c319'>
<class-decl name='zprop_get_cbdata' size-in-bits='960' is-struct='yes' visibility='default' id='f3d3c319'>
<data-member access='public' layout-offset-in-bits='0'>
<var-decl name='cb_sources' type-id='95e97e5e' visibility='default'/>
</data-member>
@ -7856,6 +7884,9 @@
<data-member access='public' layout-offset-in-bits='448'>
<var-decl name='cb_first' type-id='c19b74c3' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='480'>
<var-decl name='cb_json' type-id='c19b74c3' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='512'>
<var-decl name='cb_proplist' type-id='3a9b2288' visibility='default'/>
</data-member>
@ -7865,6 +7896,15 @@
<data-member access='public' layout-offset-in-bits='640'>
<var-decl name='cb_vdevs' type-id='a9679c94' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='832'>
<var-decl name='cb_jsobj' type-id='5ce45b60' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='896'>
<var-decl name='cb_json_as_int' type-id='c19b74c3' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='928'>
<var-decl name='cb_json_pool_key_guid' type-id='c19b74c3' visibility='default'/>
</data-member>
</class-decl>
<typedef-decl name='zprop_get_cbdata_t' type-id='f3d3c319' id='f3d87113'/>
<typedef-decl name='zprop_func' type-id='2e711a2a' id='1ec3747a'/>
@ -7968,6 +8008,11 @@
<qualified-type-def type-id='d33f11cb' restrict='yes' id='5c53ba29'/>
<pointer-type-def type-id='ffa52b96' size-in-bits='64' id='76c8174b'/>
<pointer-type-def type-id='f3d87113' size-in-bits='64' id='0d2a0670'/>
<function-decl name='nvlist_print_json' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='822cd80b'/>
<parameter type-id='5ce45b60'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zpool_label_disk' mangled-name='zpool_label_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_label_disk'>
<parameter type-id='b0382bb3'/>
<parameter type-id='4c81de99'/>
@ -8075,6 +8120,11 @@
<parameter type-id='d33f11cb'/>
<return type-id='48b5725f'/>
</function-decl>
<function-decl name='putc' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='95e97e5e'/>
<parameter type-id='822cd80b'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='puts' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='80f4b756'/>
<return type-id='95e97e5e'/>
@ -8093,6 +8143,11 @@
<parameter type-id='95e97e5e'/>
<return type-id='48b5725f'/>
</function-decl>
<function-decl name='strspn' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='80f4b756'/>
<parameter type-id='80f4b756'/>
<return type-id='b59d7dce'/>
</function-decl>
<function-decl name='strnlen' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='80f4b756'/>
<parameter type-id='b59d7dce'/>
@ -8292,12 +8347,12 @@
<function-decl name='zfs_version_print' mangled-name='zfs_version_print' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_print'>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='use_color' mangled-name='use_color' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='use_color'>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zfs_version_nvlist' mangled-name='zfs_version_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_nvlist'>
<return type-id='5ce45b60'/>
</function-decl>
<function-decl name='use_color' mangled-name='use_color' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='use_color'>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='printf_color' mangled-name='printf_color' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='printf_color'>
<parameter type-id='80f4b756' name='color'/>
<parameter type-id='80f4b756' name='format'/>
@ -8802,11 +8857,6 @@
<parameter type-id='78c01427'/>
<return type-id='13956559'/>
</function-decl>
<function-decl name='strspn' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='80f4b756'/>
<parameter type-id='80f4b756'/>
<return type-id='b59d7dce'/>
</function-decl>
<function-decl name='zfs_dirnamelen' mangled-name='zfs_dirnamelen' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_dirnamelen'>
<parameter type-id='80f4b756' name='path'/>
<return type-id='79a0948f'/>
@ -9788,6 +9838,50 @@
<return type-id='c19b74c3'/>
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='module/zcommon/zfs_valstr.c' language='LANG_C99'>
<function-decl name='zfs_valstr_zio_flag' mangled-name='zfs_valstr_zio_flag' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valstr_zio_flag'>
<parameter type-id='9c313c2d' name='bits'/>
<parameter type-id='26a90f95' name='out'/>
<parameter type-id='b59d7dce' name='outlen'/>
<return type-id='b59d7dce'/>
</function-decl>
<function-decl name='zfs_valstr_zio_flag_bits' mangled-name='zfs_valstr_zio_flag_bits' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valstr_zio_flag_bits'>
<parameter type-id='9c313c2d' name='bits'/>
<parameter type-id='26a90f95' name='out'/>
<parameter type-id='b59d7dce' name='outlen'/>
<return type-id='b59d7dce'/>
</function-decl>
<function-decl name='zfs_valstr_zio_flag_pairs' mangled-name='zfs_valstr_zio_flag_pairs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valstr_zio_flag_pairs'>
<parameter type-id='9c313c2d' name='bits'/>
<parameter type-id='26a90f95' name='out'/>
<parameter type-id='b59d7dce' name='outlen'/>
<return type-id='b59d7dce'/>
</function-decl>
<function-decl name='zfs_valstr_zio_stage' mangled-name='zfs_valstr_zio_stage' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valstr_zio_stage'>
<parameter type-id='9c313c2d' name='bits'/>
<parameter type-id='26a90f95' name='out'/>
<parameter type-id='b59d7dce' name='outlen'/>
<return type-id='b59d7dce'/>
</function-decl>
<function-decl name='zfs_valstr_zio_stage_bits' mangled-name='zfs_valstr_zio_stage_bits' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valstr_zio_stage_bits'>
<parameter type-id='9c313c2d' name='bits'/>
<parameter type-id='26a90f95' name='out'/>
<parameter type-id='b59d7dce' name='outlen'/>
<return type-id='b59d7dce'/>
</function-decl>
<function-decl name='zfs_valstr_zio_stage_pairs' mangled-name='zfs_valstr_zio_stage_pairs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valstr_zio_stage_pairs'>
<parameter type-id='9c313c2d' name='bits'/>
<parameter type-id='26a90f95' name='out'/>
<parameter type-id='b59d7dce' name='outlen'/>
<return type-id='b59d7dce'/>
</function-decl>
<function-decl name='zfs_valstr_zio_priority' mangled-name='zfs_valstr_zio_priority' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valstr_zio_priority'>
<parameter type-id='95e97e5e' name='v'/>
<parameter type-id='26a90f95' name='out'/>
<parameter type-id='b59d7dce' name='outlen'/>
<return type-id='b59d7dce'/>
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='module/zcommon/zpool_prop.c' language='LANG_C99'>
<function-decl name='zpool_prop_string_to_index' mangled-name='zpool_prop_string_to_index' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_string_to_index'>
<parameter type-id='5d0c23fb' name='prop'/>

View File

@ -5652,3 +5652,31 @@ zpool_set_vdev_prop(zpool_handle_t *zhp, const char *vdevname,
return (ret);
}
/*
* Prune older entries from the DDT to reclaim space under the quota
*/
int
zpool_ddt_prune(zpool_handle_t *zhp, zpool_ddt_prune_unit_t unit,
uint64_t amount)
{
int error = lzc_ddt_prune(zhp->zpool_name, unit, amount);
if (error != 0) {
libzfs_handle_t *hdl = zhp->zpool_hdl;
char errbuf[ERRBUFLEN];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot prune dedup table on '%s'"), zhp->zpool_name);
if (error == EALREADY) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"a prune operation is already in progress"));
(void) zfs_error(hdl, EZFS_BUSY, errbuf);
} else {
(void) zpool_standard_error(hdl, errno, errbuf);
}
return (-1);
}
return (0);
}

View File

@ -162,6 +162,7 @@
<elf-symbol name='lzc_channel_program_nosync' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_clone' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_ddt_prune' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_destroy_bookmarks' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzc_destroy_snaps' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -1444,6 +1445,7 @@
<enumerator name='ZFS_IOC_VDEV_SET_PROPS' value='23126'/>
<enumerator name='ZFS_IOC_POOL_SCRUB' value='23127'/>
<enumerator name='ZFS_IOC_POOL_PREFETCH' value='23128'/>
<enumerator name='ZFS_IOC_DDT_PRUNE' value='23129'/>
<enumerator name='ZFS_IOC_PLATFORM' value='23168'/>
<enumerator name='ZFS_IOC_EVENTS_NEXT' value='23169'/>
<enumerator name='ZFS_IOC_EVENTS_CLEAR' value='23170'/>
@ -1484,6 +1486,13 @@
<enumerator name='ZPOOL_PREFETCH_DDT' value='1'/>
</enum-decl>
<typedef-decl name='zpool_prefetch_type_t' type-id='0299ab50' id='e55ff6bc'/>
<enum-decl name='zpool_ddt_prune_unit_t' naming-typedef-id='02e25ab0' id='509ae11c'>
<underlying-type type-id='9cac1fee'/>
<enumerator name='ZPOOL_DDT_PRUNE_NONE' value='0'/>
<enumerator name='ZPOOL_DDT_PRUNE_AGE' value='1'/>
<enumerator name='ZPOOL_DDT_PRUNE_PERCENTAGE' value='2'/>
</enum-decl>
<typedef-decl name='zpool_ddt_prune_unit_t' type-id='509ae11c' id='02e25ab0'/>
<enum-decl name='data_type_t' naming-typedef-id='8d0687d2' id='aeeae136'>
<underlying-type type-id='9cac1fee'/>
<enumerator name='DATA_TYPE_DONTCARE' value='-1'/>
@ -3015,6 +3024,12 @@
<parameter type-id='857bb57e' name='outnvl'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='lzc_ddt_prune' mangled-name='lzc_ddt_prune' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_ddt_prune'>
<parameter type-id='80f4b756' name='pool'/>
<parameter type-id='02e25ab0' name='unit'/>
<parameter type-id='9c313c2d' name='amount'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-type size-in-bits='64' id='c70fa2e8'>
<parameter type-id='95e97e5e'/>
<parameter type-id='eaa32e2f'/>

View File

@ -1927,3 +1927,25 @@ lzc_get_bootenv(const char *pool, nvlist_t **outnvl)
{
return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl));
}
/*
* Prune the specified amount from the pool's dedup table.
*/
int
lzc_ddt_prune(const char *pool, zpool_ddt_prune_unit_t unit, uint64_t amount)
{
int error;
nvlist_t *result = NULL;
nvlist_t *args = fnvlist_alloc();
fnvlist_add_int32(args, DDT_PRUNE_UNIT, unit);
fnvlist_add_uint64(args, DDT_PRUNE_AMOUNT, amount);
error = lzc_ioctl(ZFS_IOC_DDT_PRUNE, pool, args, &result);
fnvlist_free(args);
fnvlist_free(result);
return (error);
}

View File

@ -64,6 +64,7 @@ nodist_libzpool_la_SOURCES = \
module/zcommon/zfs_fletcher_superscalar4.c \
module/zcommon/zfs_namecheck.c \
module/zcommon/zfs_prop.c \
module/zcommon/zfs_valstr.c \
module/zcommon/zpool_prop.c \
module/zcommon/zprop_common.c \
\

View File

@ -1,4 +1,6 @@
libzstd_la_CFLAGS = $(AM_CFLAGS) $(LIBRARY_CFLAGS)
libzstd_la_CPPFLAGS = $(AM_CPPFLAGS) $(LIBZPOOL_CPPFLAGS)
# -fno-tree-vectorize is set for gcc in zstd/common/compiler.h
# Set it for other compilers, too.
libzstd_la_CFLAGS += -fno-tree-vectorize

View File

@ -72,6 +72,7 @@ dist_man_MANS = \
%D%/man8/zpool-create.8 \
%D%/man8/zpool-destroy.8 \
%D%/man8/zpool-detach.8 \
%D%/man8/zpool-ddtprune.8 \
%D%/man8/zpool-events.8 \
%D%/man8/zpool-export.8 \
%D%/man8/zpool-get.8 \

48
man/man8/zpool-ddtprune.8 Normal file
View File

@ -0,0 +1,48 @@
.\"
.\" CDDL HEADER START
.\"
.\" The contents of this file are subject to the terms of the
.\" Common Development and Distribution License (the "License").
.\" You may not use this file except in compliance with the License.
.\"
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
.\" or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions
.\" and limitations under the License.
.\"
.\" When distributing Covered Code, include this CDDL HEADER in each
.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
.\"
.\" CDDL HEADER END
.\"
.\"
.\" Copyright (c) 2024, Klara Inc.
.\"
.Dd June 17, 2024
.Dt ZPOOL-DDTPRUNE 8
.Os
.
.Sh NAME
.Nm zpool-ddtprune
.Nd Prunes the oldest entries from the single reference dedup table(s)
.Sh SYNOPSIS
.Nm zpool
.Cm ddtprune
.Fl d Ar days | Fl p Ar percentage
.Ar pool
.Sh DESCRIPTION
This command prunes older unique entries from the dedup table.
As a complement to the dedup quota feature,
.Sy ddtprune
allows removal of older non-duplicate entries to make room for
newer duplicate entries.
.Pp
The amount to prune can be based on a target percentage of the unique entries
or based on the age (i.e., every unique entry older than N days).
.
.Sh SEE ALSO
.Xr zdb 8 ,
.Xr zpool-status 8

View File

@ -592,6 +592,7 @@ don't wait.
.Xr zpool-checkpoint 8 ,
.Xr zpool-clear 8 ,
.Xr zpool-create 8 ,
.Xr zpool-ddtprune 8 ,
.Xr zpool-destroy 8 ,
.Xr zpool-detach 8 ,
.Xr zpool-events 8 ,

View File

@ -241,6 +241,7 @@ ZCOMMON_OBJS := \
zfs_fletcher_superscalar4.o \
zfs_namecheck.o \
zfs_prop.o \
zfs_valstr.o \
zpool_prop.o \
zprop_common.o

View File

@ -233,6 +233,7 @@ SRCS+= cityhash.c \
zfs_fletcher_superscalar.c \
zfs_namecheck.c \
zfs_prop.c \
zfs_valstr.c \
zpool_prop.c \
zprop_common.c

View File

@ -124,7 +124,6 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
SYSCTL_NODE(_vfs_zfs_livelist, OID_AUTO, condense, CTLFLAG_RW, 0,
"ZFS livelist condense");
SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, cache, CTLFLAG_RW, 0, "ZFS VDEV Cache");
SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, file, CTLFLAG_RW, 0, "ZFS VDEV file");
SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, mirror, CTLFLAG_RD, 0,
"ZFS VDEV mirror");

277
module/zcommon/zfs_valstr.c Normal file
View File

@ -0,0 +1,277 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2024, Klara Inc.
*/
#include <sys/fs/zfs.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/string.h>
#include <sys/debug.h>
#include "zfs_valstr.h"
/*
* Each bit in a bitfield has three possible string representations:
* - single char
* - two-char pair
* - full name
*/
typedef struct {
const char vb_bit;
const char vb_pair[2];
const char *vb_name;
} valstr_bit_t;
/*
* Emits a character for each bit in `bits`, up to the number of elements
* in the table. Set bits get the character in vb_bit, clear bits get a
* space. This results in all strings having the same width, for easier
* visual comparison.
*/
static size_t
valstr_bitfield_bits(const valstr_bit_t *table, const size_t nelems,
uint64_t bits, char *out, size_t outlen)
{
ASSERT(out);
size_t n = 0;
for (int b = 0; b < nelems; b++) {
if (n == outlen)
break;
uint64_t mask = (1ULL << b);
out[n++] = (bits & mask) ? table[b].vb_bit : ' ';
}
if (n < outlen)
out[n++] = '\0';
return (n);
}
/*
* Emits a two-char pair for each bit set in `bits`, taken from vb_pair, and
* separated by a `|` character. This gives a concise representation of the
* whole value.
*/
static size_t
valstr_bitfield_pairs(const valstr_bit_t *table, const size_t nelems,
uint64_t bits, char *out, size_t outlen)
{
ASSERT(out);
size_t n = 0;
for (int b = 0; b < nelems; b++) {
ASSERT3U(n, <=, outlen);
if (n == outlen)
break;
uint64_t mask = (1ULL << b);
if (bits & mask) {
size_t len = (n > 0) ? 3 : 2;
if (n > outlen-len)
break;
if (n > 0)
out[n++] = '|';
out[n++] = table[b].vb_pair[0];
out[n++] = table[b].vb_pair[1];
}
}
if (n < outlen)
out[n++] = '\0';
return (n);
}
/*
* Emits the full name for each bit set in `bits`, taken from vb_name, and
* separated by a space. This unambiguously shows the entire set of bits, but
* can get very long.
*/
static size_t
valstr_bitfield_str(const valstr_bit_t *table, const size_t nelems,
uint64_t bits, char *out, size_t outlen)
{
ASSERT(out);
size_t n = 0;
for (int b = 0; b < nelems; b++) {
ASSERT3U(n, <=, outlen);
if (n == outlen)
break;
uint64_t mask = (1ULL << b);
if (bits & mask) {
size_t len = strlen(table[b].vb_name);
if (n > 0)
len++;
if (n > outlen-len)
break;
if (n > 0) {
out[n++] = ' ';
len--;
}
memcpy(&out[n], table[b].vb_name, len);
n += len;
}
}
if (n < outlen)
out[n++] = '\0';
return (n);
}
/*
* Emits the name of the given enum value in the table.
*/
static size_t
valstr_enum_str(const char **table, const size_t nelems,
int v, char *out, size_t outlen)
{
ASSERT(out);
ASSERT3U(v, <, nelems);
if (v >= nelems)
return (0);
return (MIN(strlcpy(out, table[v], outlen), outlen));
}
/*
* These macros create the string tables for the given name, and implement
* the public functions described in zfs_valstr.h.
*/
#define _VALSTR_BITFIELD_IMPL(name, ...) \
static const valstr_bit_t valstr_ ## name ## _table[] = { __VA_ARGS__ };\
size_t \
zfs_valstr_ ## name ## _bits(uint64_t bits, char *out, size_t outlen) \
{ \
return (valstr_bitfield_bits(valstr_ ## name ## _table, \
ARRAY_SIZE(valstr_ ## name ## _table), bits, out, outlen)); \
} \
\
size_t \
zfs_valstr_ ## name ## _pairs(uint64_t bits, char *out, size_t outlen) \
{ \
return (valstr_bitfield_pairs(valstr_ ## name ## _table, \
ARRAY_SIZE(valstr_ ## name ## _table), bits, out, outlen)); \
} \
\
size_t \
zfs_valstr_ ## name(uint64_t bits, char *out, size_t outlen) \
{ \
return (valstr_bitfield_str(valstr_ ## name ## _table, \
ARRAY_SIZE(valstr_ ## name ## _table), bits, out, outlen)); \
} \
#define _VALSTR_ENUM_IMPL(name, ...) \
static const char *valstr_ ## name ## _table[] = { __VA_ARGS__ }; \
size_t \
zfs_valstr_ ## name(int v, char *out, size_t outlen) \
{ \
return (valstr_enum_str(valstr_ ## name ## _table, \
ARRAY_SIZE(valstr_ ## name ## _table), v, out, outlen)); \
} \
/* String tables */
/* ZIO flags: zio_flag_t, typically zio->io_flags */
/* BEGIN CSTYLED */
_VALSTR_BITFIELD_IMPL(zio_flag,
{ '.', "DA", "DONT_AGGREGATE" },
{ '.', "RP", "IO_REPAIR" },
{ '.', "SH", "SELF_HEAL" },
{ '.', "RS", "RESILVER" },
{ '.', "SC", "SCRUB" },
{ '.', "ST", "SCAN_THREAD" },
{ '.', "PH", "PHYSICAL" },
{ '.', "CF", "CANFAIL" },
{ '.', "SP", "SPECULATIVE" },
{ '.', "CW", "CONFIG_WRITER" },
{ '.', "DR", "DONT_RETRY" },
{ '?', "??", "[UNUSED 11]" },
{ '.', "ND", "NODATA" },
{ '.', "ID", "INDUCE_DAMAGE" },
{ '.', "AL", "IO_ALLOCATING" },
{ '.', "RE", "IO_RETRY" },
{ '.', "PR", "PROBE" },
{ '.', "TH", "TRYHARD" },
{ '.', "OP", "OPTIONAL" },
{ '.', "DQ", "DONT_QUEUE" },
{ '.', "DP", "DONT_PROPAGATE" },
{ '.', "BY", "IO_BYPASS" },
{ '.', "RW", "IO_REWRITE" },
{ '.', "CM", "RAW_COMPRESS" },
{ '.', "EN", "RAW_ENCRYPT" },
{ '.', "GG", "GANG_CHILD" },
{ '.', "DD", "DDT_CHILD" },
{ '.', "GF", "GODFATHER" },
{ '.', "NP", "NOPWRITE" },
{ '.', "EX", "REEXECUTED" },
{ '.', "DG", "DELEGATED" },
)
/* END CSTYLED */
/*
* ZIO pipeline stage(s): enum zio_stage, typically zio->io_stage or
* zio->io_pipeline.
*/
/* BEGIN CSTYLED */
_VALSTR_BITFIELD_IMPL(zio_stage,
{ 'O', "O ", "OPEN" },
{ 'I', "RI", "READ_BP_INIT" },
{ 'I', "WI", "WRITE_BP_INIT" },
{ 'I', "FI", "FREE_BP_INIT" },
{ 'A', "IA", "ISSUE_ASYNC" },
{ 'W', "WC", "WRITE_COMPRESS" },
{ 'E', "EN", "ENCRYPT" },
{ 'C', "CG", "CHECKSUM_GENERATE" },
{ 'N', "NW", "NOP_WRITE" },
{ 'B', "BF", "BRT_FREE" },
{ 'd', "dS", "DDT_READ_START" },
{ 'd', "dD", "DDT_READ_DONE" },
{ 'd', "dW", "DDT_WRITE" },
{ 'd', "dF", "DDT_FREE" },
{ 'G', "GA", "GANG_ASSEMBLE" },
{ 'G', "GI", "GANG_ISSUE" },
{ 'D', "DT", "DVA_THROTTLE" },
{ 'D', "DA", "DVA_ALLOCATE" },
{ 'D', "DF", "DVA_FREE" },
{ 'D', "DC", "DVA_CLAIM" },
{ 'R', "R ", "READY" },
{ 'V', "VS", "VDEV_IO_START" },
{ 'V', "VD", "VDEV_IO_DONE" },
{ 'V', "VA", "VDEV_IO_ASSESS" },
{ 'C', "CV", "CHECKSUM_VERIFY" },
{ 'X', "X ", "DONE" },
)
/* END CSTYLED */
/* ZIO priority: zio_priority_t, typically zio->io_priority */
/* BEGIN CSTYLED */
_VALSTR_ENUM_IMPL(zio_priority,
"SYNC_READ",
"SYNC_WRITE",
"ASYNC_READ",
"ASYNC_WRITE",
"SCRUB",
"REMOVAL",
"INITIALIZING",
"TRIM",
"REBUILD",
"[NUM_QUEUEABLE]",
"NOW",
)
/* END CSTYLED */
#undef _VALSTR_BITFIELD_IMPL
#undef _VALSTR_ENUM_IMPL

View File

@ -125,6 +125,13 @@
* without which, no space would be recovered and the DDT would continue to be
* considered "over quota". See zap_shrink_enabled.
*
* ## Dedup table pruning
*
* As a complement to the dedup quota feature, ddtprune allows removal of older
* non-duplicate entries to make room for newer duplicate entries. The amount
* to prune can be based on a target percentage of the unique entries or based
* on the age (i.e., prune unique entry older than N days).
*
* ## Dedup log
*
* Historically, all entries modified on a txg were written back to dedup
@ -228,6 +235,19 @@ int zfs_dedup_prefetch = 0;
*/
uint_t dedup_class_wait_txgs = 5;
/*
* How many DDT prune entries to add to the DDT sync AVL tree.
* Note these addtional entries have a memory footprint of a
* ddt_entry_t (216 bytes).
*/
static uint32_t zfs_ddt_prunes_per_txg = 50000;
/*
* For testing, synthesize aged DDT entries
* (in global scope for ztest)
*/
boolean_t ddt_prune_artificial_age = B_FALSE;
boolean_t ddt_dump_prune_histogram = B_FALSE;
/*
* Don't do more than this many incremental flush passes per txg.
@ -268,10 +288,6 @@ static const uint64_t ddt_version_flags[] = {
[DDT_VERSION_FDT] = DDT_FLAG_FLAT | DDT_FLAG_LOG,
};
/* Dummy version to signal that configure is still necessary */
#define DDT_VERSION_UNCONFIGURED (UINT64_MAX)
#ifdef _KERNEL
/* per-DDT kstats */
typedef struct {
/* total lookups and whether they returned new or existing entries */
@ -324,6 +340,7 @@ static const ddt_kstats_t ddt_kstats_template = {
{ "log_flush_time_rate", KSTAT_DATA_UINT32 },
};
#ifdef _KERNEL
#define _DDT_KSTAT_STAT(ddt, stat) \
&((ddt_kstats_t *)(ddt)->ddt_ksp->ks_data)->stat.value.ui64
#define DDT_KSTAT_BUMP(ddt, stat) \
@ -343,6 +360,7 @@ static const ddt_kstats_t ddt_kstats_template = {
#define DDT_KSTAT_ZERO(ddt, stat) do {} while (0)
#endif /* _KERNEL */
static void
ddt_object_create(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
dmu_tx_t *tx)
@ -715,6 +733,30 @@ ddt_phys_clear(ddt_univ_phys_t *ddp, ddt_phys_variant_t v)
memset(&ddp->ddp_trad[v], 0, DDT_TRAD_PHYS_SIZE / DDT_PHYS_MAX);
}
static uint64_t
ddt_class_start(void)
{
uint64_t start = gethrestime_sec();
if (ddt_prune_artificial_age) {
/*
* debug aide -- simulate a wider distribution
* so we don't have to wait for an aged DDT
* to test prune.
*/
int range = 1 << 21;
int percent = random_in_range(100);
if (percent < 50) {
range = range >> 4;
} else if (percent > 75) {
range /= 2;
}
start -= random_in_range(range);
}
return (start);
}
void
ddt_phys_addref(ddt_univ_phys_t *ddp, ddt_phys_variant_t v)
{
@ -789,6 +831,9 @@ ddt_phys_dva_count(const ddt_univ_phys_t *ddp, ddt_phys_variant_t v,
ddt_phys_variant_t
ddt_phys_select(const ddt_t *ddt, const ddt_entry_t *dde, const blkptr_t *bp)
{
if (dde == NULL)
return (DDT_PHYS_NONE);
const ddt_univ_phys_t *ddp = dde->dde_phys;
if (ddt->ddt_flags & DDT_FLAG_FLAT) {
@ -1019,6 +1064,47 @@ ddt_prefetch_all(spa_t *spa)
static int ddt_configure(ddt_t *ddt, boolean_t new);
/*
* If the BP passed to ddt_lookup has valid DVAs, then we need to compare them
* to the ones in the entry. If they're different, then the passed-in BP is
* from a previous generation of this entry (ie was previously pruned) and we
* have to act like the entry doesn't exist at all.
*
* This should only happen during a lookup to free the block (zio_ddt_free()).
*
* XXX this is similar in spirit to ddt_phys_select(), maybe can combine
* -- robn, 2024-02-09
*/
static boolean_t
ddt_entry_lookup_is_valid(ddt_t *ddt, const blkptr_t *bp, ddt_entry_t *dde)
{
/* If the BP has no DVAs, then this entry is good */
uint_t ndvas = BP_GET_NDVAS(bp);
if (ndvas == 0)
return (B_TRUE);
/*
* Only checking the phys for the copies. For flat, there's only one;
* for trad it'll be the one that has the matching set of DVAs.
*/
const dva_t *dvas = (ddt->ddt_flags & DDT_FLAG_FLAT) ?
dde->dde_phys->ddp_flat.ddp_dva :
dde->dde_phys->ddp_trad[ndvas].ddp_dva;
/*
* Compare entry DVAs with the BP. They should all be there, but
* there's not really anything we can do if its only partial anyway,
* that's an error somewhere else, maybe long ago.
*/
uint_t d;
for (d = 0; d < ndvas; d++)
if (!DVA_EQUAL(&dvas[d], &bp->blk_dva[d]))
return (B_FALSE);
ASSERT3U(d, ==, ndvas);
return (B_TRUE);
}
ddt_entry_t *
ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
{
@ -1054,8 +1140,11 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
/* If it's already loaded, we can just return it. */
DDT_KSTAT_BUMP(ddt, dds_lookup_live_hit);
if (dde->dde_flags & DDE_FLAG_LOADED)
return (dde);
if (dde->dde_flags & DDE_FLAG_LOADED) {
if (ddt_entry_lookup_is_valid(ddt, bp, dde))
return (dde);
return (NULL);
}
/* Someone else is loading it, wait for it. */
dde->dde_waiters++;
@ -1074,7 +1163,11 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
}
DDT_KSTAT_BUMP(ddt, dds_lookup_existing);
return (dde);
/* Make sure the loaded entry matches the BP */
if (ddt_entry_lookup_is_valid(ddt, bp, dde))
return (dde);
return (NULL);
} else
DDT_KSTAT_BUMP(ddt, dds_lookup_live_miss);
@ -1083,32 +1176,42 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
/* Record the time this class was created (used by ddt prune) */
if (ddt->ddt_flags & DDT_FLAG_FLAT)
dde->dde_phys->ddp_flat.ddp_class_start = gethrestime_sec();
dde->dde_phys->ddp_flat.ddp_class_start = ddt_class_start();
avl_insert(&ddt->ddt_tree, dde, where);
/* If its in the log tree, we can "load" it from there */
if (ddt->ddt_flags & DDT_FLAG_LOG) {
ddt_lightweight_entry_t ddlwe;
boolean_t found = B_FALSE;
if (ddt_log_take_key(ddt, ddt->ddt_log_active,
&search, &ddlwe)) {
DDT_KSTAT_BUMP(ddt, dds_lookup_log_active_hit);
found = B_TRUE;
} else if (ddt_log_take_key(ddt, ddt->ddt_log_flushing,
&search, &ddlwe)) {
DDT_KSTAT_BUMP(ddt, dds_lookup_log_flushing_hit);
found = B_TRUE;
}
if (found) {
dde->dde_flags = DDE_FLAG_LOADED | DDE_FLAG_LOGGED;
if (ddt_log_find_key(ddt, &search, &ddlwe)) {
/*
* See if we have the key first, and if so, set up
* the entry.
*/
dde->dde_type = ddlwe.ddlwe_type;
dde->dde_class = ddlwe.ddlwe_class;
memcpy(dde->dde_phys, &ddlwe.ddlwe_phys,
DDT_PHYS_SIZE(ddt));
/* Whatever we found isn't valid for this BP, eject */
if (!ddt_entry_lookup_is_valid(ddt, bp, dde)) {
avl_remove(&ddt->ddt_tree, dde);
ddt_free(ddt, dde);
return (NULL);
}
/* Remove it and count it */
if (ddt_log_remove_key(ddt,
ddt->ddt_log_active, &search)) {
DDT_KSTAT_BUMP(ddt, dds_lookup_log_active_hit);
} else {
VERIFY(ddt_log_remove_key(ddt,
ddt->ddt_log_flushing, &search));
DDT_KSTAT_BUMP(ddt,
dds_lookup_log_flushing_hit);
}
dde->dde_flags = DDE_FLAG_LOADED | DDE_FLAG_LOGGED;
DDT_KSTAT_BUMP(ddt, dds_lookup_log_hit);
DDT_KSTAT_BUMP(ddt, dds_lookup_existing);
@ -1147,6 +1250,8 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
dde->dde_type = type; /* will be DDT_TYPES if no entry found */
dde->dde_class = class; /* will be DDT_CLASSES if no entry found */
boolean_t valid = B_TRUE;
if (dde->dde_type == DDT_TYPES &&
dde->dde_class == DDT_CLASSES &&
ddt_over_quota(spa)) {
@ -1160,6 +1265,24 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
/* Flag cleanup required */
dde->dde_flags |= DDE_FLAG_OVERQUOTA;
} else if (error == 0) {
/*
* If what we loaded is no good for this BP and there's no one
* waiting for it, we can just remove it and get out. If its no
* good but there are waiters, we have to leave it, because we
* don't know what they want. If its not needed we'll end up
* taking an entry log/sync, but it can only happen if more
* than one previous version of this block is being deleted at
* the same time. This is extremely unlikely to happen and not
* worth the effort to deal with without taking an entry
* update.
*/
valid = ddt_entry_lookup_is_valid(ddt, bp, dde);
if (!valid && dde->dde_waiters == 0) {
avl_remove(&ddt->ddt_tree, dde);
ddt_free(ddt, dde);
return (NULL);
}
DDT_KSTAT_BUMP(ddt, dds_lookup_stored_hit);
DDT_KSTAT_BUMP(ddt, dds_lookup_existing);
@ -1188,7 +1311,10 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
dde->dde_flags |= DDE_FLAG_LOADED;
cv_broadcast(&dde->dde_cv);
return (dde->dde_flags & DDE_FLAG_OVERQUOTA ? NULL : dde);
if ((dde->dde_flags & DDE_FLAG_OVERQUOTA) || !valid)
return (NULL);
return (dde);
}
void
@ -1417,7 +1543,6 @@ not_found:
static void
ddt_table_alloc_kstats(ddt_t *ddt)
{
#ifdef _KERNEL
char *mod = kmem_asprintf("zfs/%s", spa_name(ddt->ddt_spa));
char *name = kmem_asprintf("ddt_stats_%s",
zio_checksum_table[ddt->ddt_checksum].ci_name);
@ -1433,9 +1558,6 @@ ddt_table_alloc_kstats(ddt_t *ddt)
kmem_strfree(name);
kmem_strfree(mod);
#else
(void) ddt;
#endif /* _KERNEL */
}
static ddt_t *
@ -1465,13 +1587,11 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c)
static void
ddt_table_free(ddt_t *ddt)
{
#ifdef _KERNEL
if (ddt->ddt_ksp != NULL) {
kmem_free(ddt->ddt_ksp->ks_data, sizeof (ddt_kstats_t));
ddt->ddt_ksp->ks_data = NULL;
kstat_delete(ddt->ddt_ksp);
}
#endif /* _KERNEL */
ddt_log_free(ddt);
ASSERT0(avl_numnodes(&ddt->ddt_tree));
@ -1811,7 +1931,7 @@ ddt_sync_flush_entry(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe,
uint64_t phys_refcnt = ddt_phys_refcnt(ddp, v);
if (ddt_phys_birth(ddp, v) == 0) {
ASSERT3U(phys_refcnt, ==, 0);
ASSERT0(phys_refcnt);
continue;
}
if (DDT_PHYS_IS_DITTO(ddt, p)) {
@ -2285,8 +2405,9 @@ ddt_walk_ready(spa_t *spa)
return (B_TRUE);
}
int
ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_lightweight_entry_t *ddlwe)
static int
ddt_walk_impl(spa_t *spa, ddt_bookmark_t *ddb, ddt_lightweight_entry_t *ddlwe,
uint64_t flags, boolean_t wait)
{
do {
do {
@ -2295,7 +2416,11 @@ ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_lightweight_entry_t *ddlwe)
if (ddt == NULL)
continue;
if (ddt->ddt_flush_force_txg > 0)
if (flags != 0 &&
(ddt->ddt_flags & flags) != flags)
continue;
if (wait && ddt->ddt_flush_force_txg > 0)
return (EAGAIN);
int error = ENOENT;
@ -2319,13 +2444,19 @@ ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_lightweight_entry_t *ddlwe)
return (SET_ERROR(ENOENT));
}
int
ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_lightweight_entry_t *ddlwe)
{
return (ddt_walk_impl(spa, ddb, ddlwe, 0, B_TRUE));
}
/*
* This function is used by Block Cloning (brt.c) to increase reference
* counter for the DDT entry if the block is already in DDT.
*
* Return false if the block, despite having the D bit set, is not present
* in the DDT. Currently this is not possible but might be in the future.
* See the comment below.
* in the DDT. This is possible when the DDT has been pruned by an admin
* or by the DDT quota mechanism.
*/
boolean_t
ddt_addref(spa_t *spa, const blkptr_t *bp)
@ -2356,28 +2487,13 @@ ddt_addref(spa_t *spa, const blkptr_t *bp)
int p = DDT_PHYS_FOR_COPIES(ddt, BP_GET_NDVAS(bp));
ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p);
/*
* This entry already existed (dde_type is real), so it must
* have refcnt >0 at the start of this txg. We are called from
* brt_pending_apply(), before frees are issued, so the refcnt
* can't be lowered yet. Therefore, it must be >0. We assert
* this because if the order of BRT and DDT interactions were
* ever to change and the refcnt was ever zero here, then
* likely further action is required to fill out the DDT entry,
* and this is a place that is likely to be missed in testing.
*/
ASSERT3U(ddt_phys_refcnt(dde->dde_phys, v), >, 0);
ddt_phys_addref(dde->dde_phys, v);
result = B_TRUE;
} else {
/*
* At the time of implementating this if the block has the
* DEDUP flag set it must exist in the DEDUP table, but
* there are many advocates that want ability to remove
* entries from DDT with refcnt=1. If this will happen,
* we may have a block with the DEDUP set, but which doesn't
* have a corresponding entry in the DDT. Be ready.
* If the block has the DEDUP flag set it still might not
* exist in the DEDUP table due to DDT pruning of entries
* where refcnt=1.
*/
ddt_remove(ddt, dde);
result = B_FALSE;
@ -2389,6 +2505,261 @@ ddt_addref(spa_t *spa, const blkptr_t *bp)
return (result);
}
typedef struct ddt_prune_entry {
ddt_t *dpe_ddt;
ddt_key_t dpe_key;
list_node_t dpe_node;
ddt_univ_phys_t dpe_phys[];
} ddt_prune_entry_t;
typedef struct ddt_prune_info {
spa_t *dpi_spa;
uint64_t dpi_txg_syncs;
uint64_t dpi_pruned;
list_t dpi_candidates;
} ddt_prune_info_t;
/*
* Add prune candidates for ddt_sync during spa_sync
*/
static void
prune_candidates_sync(void *arg, dmu_tx_t *tx)
{
(void) tx;
ddt_prune_info_t *dpi = arg;
ddt_prune_entry_t *dpe;
spa_config_enter(dpi->dpi_spa, SCL_ZIO, FTAG, RW_READER);
/* Process the prune candidates collected so far */
while ((dpe = list_remove_head(&dpi->dpi_candidates)) != NULL) {
blkptr_t blk;
ddt_t *ddt = dpe->dpe_ddt;
ddt_enter(ddt);
/*
* If it's on the live list, then it was loaded for update
* this txg and is no longer stale; skip it.
*/
if (avl_find(&ddt->ddt_tree, &dpe->dpe_key, NULL)) {
ddt_exit(ddt);
kmem_free(dpe, sizeof (*dpe));
continue;
}
ddt_bp_create(ddt->ddt_checksum, &dpe->dpe_key,
dpe->dpe_phys, DDT_PHYS_FLAT, &blk);
ddt_entry_t *dde = ddt_lookup(ddt, &blk);
if (dde != NULL && !(dde->dde_flags & DDE_FLAG_LOGGED)) {
ASSERT(dde->dde_flags & DDE_FLAG_LOADED);
/*
* Zero the physical, so we don't try to free DVAs
* at flush nor try to reuse this entry.
*/
ddt_phys_clear(dde->dde_phys, DDT_PHYS_FLAT);
dpi->dpi_pruned++;
}
ddt_exit(ddt);
kmem_free(dpe, sizeof (*dpe));
}
spa_config_exit(dpi->dpi_spa, SCL_ZIO, FTAG);
dpi->dpi_txg_syncs++;
}
/*
* Prune candidates are collected in open context and processed
* in sync context as part of ddt_sync_table().
*/
static void
ddt_prune_entry(list_t *list, ddt_t *ddt, const ddt_key_t *ddk,
const ddt_univ_phys_t *ddp)
{
ASSERT(ddt->ddt_flags & DDT_FLAG_FLAT);
size_t dpe_size = sizeof (ddt_prune_entry_t) + DDT_FLAT_PHYS_SIZE;
ddt_prune_entry_t *dpe = kmem_alloc(dpe_size, KM_SLEEP);
dpe->dpe_ddt = ddt;
dpe->dpe_key = *ddk;
memcpy(dpe->dpe_phys, ddp, DDT_FLAT_PHYS_SIZE);
list_insert_head(list, dpe);
}
/*
* Interate over all the entries in the DDT unique class.
* The walk will perform one of the following operations:
* (a) build a histogram than can be used when pruning
* (b) prune entries older than the cutoff
*
* Also called by zdb(8) to dump the age histogram
*/
void
ddt_prune_walk(spa_t *spa, uint64_t cutoff, ddt_age_histo_t *histogram)
{
ddt_bookmark_t ddb = {
.ddb_class = DDT_CLASS_UNIQUE,
.ddb_type = 0,
.ddb_checksum = 0,
.ddb_cursor = 0
};
ddt_lightweight_entry_t ddlwe = {0};
int error;
int total = 0, valid = 0;
int candidates = 0;
uint64_t now = gethrestime_sec();
ddt_prune_info_t dpi;
boolean_t pruning = (cutoff != 0);
if (pruning) {
dpi.dpi_txg_syncs = 0;
dpi.dpi_pruned = 0;
dpi.dpi_spa = spa;
list_create(&dpi.dpi_candidates, sizeof (ddt_prune_entry_t),
offsetof(ddt_prune_entry_t, dpe_node));
}
if (histogram != NULL)
memset(histogram, 0, sizeof (ddt_age_histo_t));
while ((error =
ddt_walk_impl(spa, &ddb, &ddlwe, DDT_FLAG_FLAT, B_FALSE)) == 0) {
ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
VERIFY(ddt);
if (spa_shutting_down(spa) || issig())
break;
total++;
ASSERT(ddt->ddt_flags & DDT_FLAG_FLAT);
ASSERT3U(ddlwe.ddlwe_phys.ddp_flat.ddp_refcnt, <=, 1);
uint64_t class_start =
ddlwe.ddlwe_phys.ddp_flat.ddp_class_start;
/*
* If this entry is on the log, then the stored entry is stale
* and we should skip it.
*/
if (ddt_log_find_key(ddt, &ddlwe.ddlwe_key, NULL))
continue;
/* prune older entries */
if (pruning && class_start < cutoff) {
if (candidates++ >= zfs_ddt_prunes_per_txg) {
/* sync prune candidates in batches */
VERIFY0(dsl_sync_task(spa_name(spa),
NULL, prune_candidates_sync,
&dpi, 0, ZFS_SPACE_CHECK_NONE));
candidates = 1;
}
ddt_prune_entry(&dpi.dpi_candidates, ddt,
&ddlwe.ddlwe_key, &ddlwe.ddlwe_phys);
}
/* build a histogram */
if (histogram != NULL) {
uint64_t age = MAX(1, (now - class_start) / 3600);
int bin = MIN(highbit64(age) - 1, HIST_BINS - 1);
histogram->dah_entries++;
histogram->dah_age_histo[bin]++;
}
valid++;
}
if (pruning && valid > 0) {
if (!list_is_empty(&dpi.dpi_candidates)) {
/* sync out final batch of prune candidates */
VERIFY0(dsl_sync_task(spa_name(spa), NULL,
prune_candidates_sync, &dpi, 0,
ZFS_SPACE_CHECK_NONE));
}
list_destroy(&dpi.dpi_candidates);
zfs_dbgmsg("pruned %llu entries (%d%%) across %llu txg syncs",
(u_longlong_t)dpi.dpi_pruned,
(int)((dpi.dpi_pruned * 100) / valid),
(u_longlong_t)dpi.dpi_txg_syncs);
}
}
static uint64_t
ddt_total_entries(spa_t *spa)
{
ddt_object_t ddo;
ddt_get_dedup_object_stats(spa, &ddo);
return (ddo.ddo_count);
}
int
ddt_prune_unique_entries(spa_t *spa, zpool_ddt_prune_unit_t unit,
uint64_t amount)
{
uint64_t cutoff;
uint64_t start_time = gethrtime();
if (spa->spa_active_ddt_prune)
return (SET_ERROR(EALREADY));
if (ddt_total_entries(spa) == 0)
return (0);
spa->spa_active_ddt_prune = B_TRUE;
zfs_dbgmsg("prune %llu %s", (u_longlong_t)amount,
unit == ZPOOL_DDT_PRUNE_PERCENTAGE ? "%" : "seconds old or older");
if (unit == ZPOOL_DDT_PRUNE_PERCENTAGE) {
ddt_age_histo_t histogram;
uint64_t oldest = 0;
/* Make a pass over DDT to build a histogram */
ddt_prune_walk(spa, 0, &histogram);
int target = (histogram.dah_entries * amount) / 100;
/*
* Figure out our cutoff date
* (i.e., which bins to prune from)
*/
for (int i = HIST_BINS - 1; i >= 0 && target > 0; i--) {
if (histogram.dah_age_histo[i] != 0) {
/* less than this bucket remaining */
if (target < histogram.dah_age_histo[i]) {
oldest = MAX(1, (1<<i) * 3600);
target = 0;
} else {
target -= histogram.dah_age_histo[i];
}
}
}
cutoff = gethrestime_sec() - oldest;
if (ddt_dump_prune_histogram)
ddt_dump_age_histogram(&histogram, cutoff);
} else if (unit == ZPOOL_DDT_PRUNE_AGE) {
cutoff = gethrestime_sec() - amount;
} else {
return (EINVAL);
}
if (cutoff > 0 && !spa_shutting_down(spa) && !issig()) {
/* Traverse DDT to prune entries older that our cuttoff */
ddt_prune_walk(spa, cutoff, NULL);
}
zfs_dbgmsg("%s: prune completed in %llu ms",
spa_name(spa), (u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
spa->spa_active_ddt_prune = B_FALSE;
return (0);
}
ZFS_MODULE_PARAM(zfs_dedup, zfs_dedup_, prefetch, INT, ZMOD_RW,
"Enable prefetching dedup-ed blks");

View File

@ -353,16 +353,15 @@ ddt_log_take_first(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe)
}
boolean_t
ddt_log_take_key(ddt_t *ddt, ddt_log_t *ddl, const ddt_key_t *ddk,
ddt_lightweight_entry_t *ddlwe)
ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl, const ddt_key_t *ddk)
{
ddt_log_entry_t *ddle = avl_find(&ddl->ddl_tree, ddk, NULL);
if (ddle == NULL)
return (B_FALSE);
DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, ddlwe);
ddt_histogram_sub_entry(ddt, &ddt->ddt_log_histogram, ddlwe);
ddt_lightweight_entry_t ddlwe;
DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, &ddlwe);
ddt_histogram_sub_entry(ddt, &ddt->ddt_log_histogram, &ddlwe);
avl_remove(&ddl->ddl_tree, ddle);
kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ?
@ -371,6 +370,21 @@ ddt_log_take_key(ddt_t *ddt, ddt_log_t *ddl, const ddt_key_t *ddk,
return (B_TRUE);
}
boolean_t
ddt_log_find_key(ddt_t *ddt, const ddt_key_t *ddk,
ddt_lightweight_entry_t *ddlwe)
{
ddt_log_entry_t *ddle =
avl_find(&ddt->ddt_log_active->ddl_tree, ddk, NULL);
if (!ddle)
ddle = avl_find(&ddt->ddt_log_flushing->ddl_tree, ddk, NULL);
if (!ddle)
return (B_FALSE);
if (ddlwe)
DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, ddlwe);
return (B_TRUE);
}
void
ddt_log_checkpoint(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe, dmu_tx_t *tx)
{

View File

@ -366,21 +366,15 @@ spa_prop_add(spa_t *spa, const char *propname, nvlist_t *outnvl)
int
spa_prop_get_nvlist(spa_t *spa, char **props, unsigned int n_props,
nvlist_t **outnvl)
nvlist_t *outnvl)
{
int err = 0;
if (props == NULL)
return (0);
if (*outnvl == NULL) {
err = nvlist_alloc(outnvl, NV_UNIQUE_NAME, KM_SLEEP);
if (err)
return (err);
}
for (unsigned int i = 0; i < n_props && err == 0; i++) {
err = spa_prop_add(spa, props[i], *outnvl);
err = spa_prop_add(spa, props[i], outnvl);
}
return (err);
@ -406,7 +400,7 @@ spa_prop_add_user(nvlist_t *nvl, const char *propname, char *strval,
* Get property values from the spa configuration.
*/
static void
spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
spa_prop_get_config(spa_t *spa, nvlist_t *nv)
{
vdev_t *rvd = spa->spa_root_vdev;
dsl_pool_t *pool = spa->spa_dsl_pool;
@ -428,48 +422,48 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
size += metaslab_class_get_space(spa_dedup_class(spa));
size += metaslab_class_get_space(spa_embedded_log_class(spa));
spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
spa_prop_add_list(nv, ZPOOL_PROP_SIZE, NULL, size, src);
spa_prop_add_list(nv, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
spa_prop_add_list(nv, ZPOOL_PROP_FREE, NULL,
size - alloc, src);
spa_prop_add_list(*nvp, ZPOOL_PROP_CHECKPOINT, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_CHECKPOINT, NULL,
spa->spa_checkpoint_info.sci_dspace, src);
spa_prop_add_list(*nvp, ZPOOL_PROP_FRAGMENTATION, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_FRAGMENTATION, NULL,
metaslab_class_fragmentation(mc), src);
spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_EXPANDSZ, NULL,
metaslab_class_expandable_space(mc), src);
spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_READONLY, NULL,
(spa_mode(spa) == SPA_MODE_READ), src);
cap = (size == 0) ? 0 : (alloc * 100 / size);
spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
spa_prop_add_list(nv, ZPOOL_PROP_CAPACITY, NULL, cap, src);
spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_DEDUPRATIO, NULL,
ddt_get_pool_dedup_ratio(spa), src);
spa_prop_add_list(*nvp, ZPOOL_PROP_BCLONEUSED, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_BCLONEUSED, NULL,
brt_get_used(spa), src);
spa_prop_add_list(*nvp, ZPOOL_PROP_BCLONESAVED, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_BCLONESAVED, NULL,
brt_get_saved(spa), src);
spa_prop_add_list(*nvp, ZPOOL_PROP_BCLONERATIO, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_BCLONERATIO, NULL,
brt_get_ratio(spa), src);
spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUP_TABLE_SIZE, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_DEDUP_TABLE_SIZE, NULL,
ddt_get_ddt_dsize(spa), src);
spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_HEALTH, NULL,
rvd->vdev_state, src);
version = spa_version(spa);
if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) {
spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_VERSION, NULL,
version, ZPROP_SRC_DEFAULT);
} else {
spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_VERSION, NULL,
version, ZPROP_SRC_LOCAL);
}
spa_prop_add_list(*nvp, ZPOOL_PROP_LOAD_GUID,
spa_prop_add_list(nv, ZPOOL_PROP_LOAD_GUID,
NULL, spa_load_guid(spa), src);
}
@ -479,62 +473,62 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
* when opening pools before this version freedir will be NULL.
*/
if (pool->dp_free_dir != NULL) {
spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_FREEING, NULL,
dsl_dir_phys(pool->dp_free_dir)->dd_used_bytes,
src);
} else {
spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING,
spa_prop_add_list(nv, ZPOOL_PROP_FREEING,
NULL, 0, src);
}
if (pool->dp_leak_dir != NULL) {
spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_LEAKED, NULL,
dsl_dir_phys(pool->dp_leak_dir)->dd_used_bytes,
src);
} else {
spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED,
spa_prop_add_list(nv, ZPOOL_PROP_LEAKED,
NULL, 0, src);
}
}
spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
spa_prop_add_list(nv, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
if (spa->spa_comment != NULL) {
spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment,
spa_prop_add_list(nv, ZPOOL_PROP_COMMENT, spa->spa_comment,
0, ZPROP_SRC_LOCAL);
}
if (spa->spa_compatibility != NULL) {
spa_prop_add_list(*nvp, ZPOOL_PROP_COMPATIBILITY,
spa_prop_add_list(nv, ZPOOL_PROP_COMPATIBILITY,
spa->spa_compatibility, 0, ZPROP_SRC_LOCAL);
}
if (spa->spa_root != NULL)
spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
spa_prop_add_list(nv, ZPOOL_PROP_ALTROOT, spa->spa_root,
0, ZPROP_SRC_LOCAL);
if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) {
spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_MAXBLOCKSIZE, NULL,
MIN(zfs_max_recordsize, SPA_MAXBLOCKSIZE), ZPROP_SRC_NONE);
} else {
spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_MAXBLOCKSIZE, NULL,
SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE);
}
if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_DNODE)) {
spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_MAXDNODESIZE, NULL,
DNODE_MAX_SIZE, ZPROP_SRC_NONE);
} else {
spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL,
spa_prop_add_list(nv, ZPOOL_PROP_MAXDNODESIZE, NULL,
DNODE_MIN_SIZE, ZPROP_SRC_NONE);
}
if ((dp = list_head(&spa->spa_config_list)) != NULL) {
if (dp->scd_path == NULL) {
spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
spa_prop_add_list(nv, ZPOOL_PROP_CACHEFILE,
"none", 0, ZPROP_SRC_LOCAL);
} else if (strcmp(dp->scd_path, spa_config_path) != 0) {
spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
spa_prop_add_list(nv, ZPOOL_PROP_CACHEFILE,
dp->scd_path, 0, ZPROP_SRC_LOCAL);
}
}
@ -544,19 +538,13 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
* Get zpool property values.
*/
int
spa_prop_get(spa_t *spa, nvlist_t **nvp)
spa_prop_get(spa_t *spa, nvlist_t *nv)
{
objset_t *mos = spa->spa_meta_objset;
zap_cursor_t zc;
zap_attribute_t za;
dsl_pool_t *dp;
int err;
if (*nvp == NULL) {
err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP);
if (err)
return (err);
}
int err = 0;
dp = spa_get_dsl(spa);
dsl_pool_config_enter(dp, FTAG);
@ -565,7 +553,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
/*
* Get properties from the spa config.
*/
spa_prop_get_config(spa, nvp);
spa_prop_get_config(spa, nv);
/* If no pool property object, no more prop to get. */
if (mos == NULL || spa->spa_pool_props_object == 0)
@ -610,7 +598,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
intval = za.za_first_integer;
}
spa_prop_add_list(*nvp, prop, strval, intval, src);
spa_prop_add_list(nv, prop, strval, intval, src);
if (strval != NULL)
kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN);
@ -627,10 +615,10 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
break;
}
if (prop != ZPOOL_PROP_INVAL) {
spa_prop_add_list(*nvp, prop, strval, 0, src);
spa_prop_add_list(nv, prop, strval, 0, src);
} else {
src = ZPROP_SRC_LOCAL;
spa_prop_add_user(*nvp, za.za_name, strval,
spa_prop_add_user(nv, za.za_name, strval,
src);
}
kmem_free(strval, za.za_num_integers);
@ -644,11 +632,9 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
out:
mutex_exit(&spa->spa_props_lock);
dsl_pool_config_exit(dp, FTAG);
if (err && err != ENOENT) {
nvlist_free(*nvp);
*nvp = NULL;
if (err && err != ENOENT)
return (err);
}
return (0);
}

View File

@ -3050,7 +3050,6 @@ static const zfs_ioc_key_t zfs_keys_get_props[] = {
static int
zfs_ioc_pool_get_props(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
{
nvlist_t *nvp = outnvl;
spa_t *spa;
char **props = NULL;
unsigned int n_props = 0;
@ -3069,16 +3068,17 @@ zfs_ioc_pool_get_props(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
*/
mutex_enter(&spa_namespace_lock);
if ((spa = spa_lookup(pool)) != NULL) {
error = spa_prop_get(spa, &nvp);
error = spa_prop_get(spa, outnvl);
if (error == 0 && props != NULL)
error = spa_prop_get_nvlist(spa, props, n_props,
&nvp);
outnvl);
}
mutex_exit(&spa_namespace_lock);
} else {
error = spa_prop_get(spa, &nvp);
error = spa_prop_get(spa, outnvl);
if (error == 0 && props != NULL)
error = spa_prop_get_nvlist(spa, props, n_props, &nvp);
error = spa_prop_get_nvlist(spa, props, n_props,
outnvl);
spa_close(spa, FTAG);
}
@ -4342,6 +4342,51 @@ zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
}
#define DDT_PRUNE_UNIT "ddt_prune_unit"
#define DDT_PRUNE_AMOUNT "ddt_prune_amount"
/*
* innvl: {
* "ddt_prune_unit" -> uint32_t
* "ddt_prune_amount" -> uint64_t
* }
*
* outnvl: "waited" -> boolean_t
*/
static const zfs_ioc_key_t zfs_keys_ddt_prune[] = {
{DDT_PRUNE_UNIT, DATA_TYPE_INT32, 0},
{DDT_PRUNE_AMOUNT, DATA_TYPE_UINT64, 0},
};
static int
zfs_ioc_ddt_prune(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
{
int32_t unit;
uint64_t amount;
if (nvlist_lookup_int32(innvl, DDT_PRUNE_UNIT, &unit) != 0 ||
nvlist_lookup_uint64(innvl, DDT_PRUNE_AMOUNT, &amount) != 0) {
return (EINVAL);
}
spa_t *spa;
int error = spa_open(poolname, &spa, FTAG);
if (error != 0)
return (error);
if (!spa_feature_is_enabled(spa, SPA_FEATURE_FAST_DEDUP)) {
spa_close(spa, FTAG);
return (SET_ERROR(ENOTSUP));
}
error = ddt_prune_unique_entries(spa, (zpool_ddt_prune_unit_t)unit,
amount);
spa_close(spa, FTAG);
return (error);
}
/*
* This ioctl waits for activity of a particular type to complete. If there is
* no activity of that type in progress, it returns immediately, and the
@ -7430,6 +7475,11 @@ zfs_ioctl_init(void)
POOL_CHECK_NONE, B_FALSE, B_FALSE,
zfs_keys_get_props, ARRAY_SIZE(zfs_keys_get_props));
zfs_ioctl_register("zpool_ddt_prune", ZFS_IOC_DDT_PRUNE,
zfs_ioc_ddt_prune, zfs_secpolicy_config, POOL_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
zfs_keys_ddt_prune, ARRAY_SIZE(zfs_keys_ddt_prune));
/* IOCTLS that use the legacy function signature */
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,

View File

@ -2553,7 +2553,7 @@ zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
if (reason != ZIO_SUSPEND_MMP) {
cmn_err(CE_WARN, "Pool '%s' has encountered an uncorrectable "
"I/O failure and has been suspended.\n", spa_name(spa));
"I/O failure and has been suspended.", spa_name(spa));
}
(void) zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL,
@ -2589,6 +2589,10 @@ zio_resume(spa_t *spa)
* Reexecute all previously suspended i/o.
*/
mutex_enter(&spa->spa_suspend_lock);
if (spa->spa_suspended != ZIO_SUSPEND_NONE)
cmn_err(CE_WARN, "Pool '%s' was suspended and is being "
"resumed. Failed I/O will be retried.",
spa_name(spa));
spa->spa_suspended = ZIO_SUSPEND_NONE;
cv_broadcast(&spa->spa_suspend_cv);
pio = spa->spa_suspend_zio_root;
@ -3859,6 +3863,16 @@ zio_ddt_free(zio_t *zio)
}
ddt_exit(ddt);
/*
* When no entry was found, it must have been pruned,
* so we can free it now instead of decrementing the
* refcount in the DDT.
*/
if (!dde) {
BP_SET_DEDUP(bp, 0);
zio->io_pipeline |= ZIO_STAGE_DVA_FREE;
}
return (zio);
}

View File

@ -389,7 +389,7 @@ if os.environ.get('CI') == 'true':
def process_results(pathname):
try:
f = open(pathname)
f = open(pathname, errors='replace')
except IOError as e:
print('Error opening file:', e)
sys.exit(1)