Merge 'openzfs/master' into truenas/zfs-2.3-release

Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
This commit is contained in:
Ameer Hamza 2024-04-02 02:32:41 +05:00
commit 7995c961aa
86 changed files with 1792 additions and 838 deletions

View File

@ -199,7 +199,8 @@ sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free,
break; break;
sublivelist_verify_block_t svb = { sublivelist_verify_block_t svb = {
.svb_dva = bp->blk_dva[i], .svb_dva = bp->blk_dva[i],
.svb_allocated_txg = bp->blk_birth .svb_allocated_txg =
BP_GET_LOGICAL_BIRTH(bp)
}; };
if (zfs_btree_find(&sv->sv_leftover, &svb, if (zfs_btree_find(&sv->sv_leftover, &svb,
@ -2340,7 +2341,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
(int)BPE_GET_ETYPE(bp), (int)BPE_GET_ETYPE(bp),
(u_longlong_t)BPE_GET_LSIZE(bp), (u_longlong_t)BPE_GET_LSIZE(bp),
(u_longlong_t)BPE_GET_PSIZE(bp), (u_longlong_t)BPE_GET_PSIZE(bp),
(u_longlong_t)bp->blk_birth); (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp));
return; return;
} }
@ -2358,7 +2359,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
buflen - strlen(blkbuf), buflen - strlen(blkbuf),
"%llxL B=%llu", "%llxL B=%llu",
(u_longlong_t)BP_GET_LSIZE(bp), (u_longlong_t)BP_GET_LSIZE(bp),
(u_longlong_t)bp->blk_birth); (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp));
} else { } else {
(void) snprintf(blkbuf + strlen(blkbuf), (void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf), buflen - strlen(blkbuf),
@ -2366,8 +2367,8 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
(u_longlong_t)BP_GET_LSIZE(bp), (u_longlong_t)BP_GET_LSIZE(bp),
(u_longlong_t)BP_GET_PSIZE(bp), (u_longlong_t)BP_GET_PSIZE(bp),
(u_longlong_t)BP_GET_FILL(bp), (u_longlong_t)BP_GET_FILL(bp),
(u_longlong_t)bp->blk_birth, (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp),
(u_longlong_t)BP_PHYSICAL_BIRTH(bp)); (u_longlong_t)BP_GET_BIRTH(bp));
if (bp_freed) if (bp_freed)
(void) snprintf(blkbuf + strlen(blkbuf), (void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf), " %s", "FREE"); buflen - strlen(blkbuf), " %s", "FREE");
@ -2417,7 +2418,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
{ {
int err = 0; int err = 0;
if (bp->blk_birth == 0) if (BP_GET_LOGICAL_BIRTH(bp) == 0)
return (0); return (0);
print_indirect(spa, bp, zb, dnp); print_indirect(spa, bp, zb, dnp);
@ -2605,7 +2606,7 @@ dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
(void) arg, (void) tx; (void) arg, (void) tx;
char blkbuf[BP_SPRINTF_LEN]; char blkbuf[BP_SPRINTF_LEN];
if (bp->blk_birth != 0) { if (BP_GET_LOGICAL_BIRTH(bp) != 0) {
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
(void) printf("\t%s\n", blkbuf); (void) printf("\t%s\n", blkbuf);
} }
@ -2646,7 +2647,7 @@ dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
(void) arg, (void) tx; (void) arg, (void) tx;
char blkbuf[BP_SPRINTF_LEN]; char blkbuf[BP_SPRINTF_LEN];
ASSERT(bp->blk_birth != 0); ASSERT(BP_GET_LOGICAL_BIRTH(bp) != 0);
snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, bp_freed); snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, bp_freed);
(void) printf("\t%s\n", blkbuf); (void) printf("\t%s\n", blkbuf);
return (0); return (0);
@ -5788,7 +5789,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (zb->zb_level == ZB_DNODE_LEVEL) if (zb->zb_level == ZB_DNODE_LEVEL)
return (0); return (0);
if (dump_opt['b'] >= 5 && bp->blk_birth > 0) { if (dump_opt['b'] >= 5 && BP_GET_LOGICAL_BIRTH(bp) > 0) {
char blkbuf[BP_SPRINTF_LEN]; char blkbuf[BP_SPRINTF_LEN];
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
(void) printf("objset %llu object %llu " (void) printf("objset %llu object %llu "

View File

@ -173,8 +173,8 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg)
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
(void) printf("%shas blkptr, %s\n", tab_prefix, (void) printf("%shas blkptr, %s\n", tab_prefix,
!BP_IS_HOLE(bp) && !BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) >=
bp->blk_birth >= spa_min_claim_txg(zilog->zl_spa) ? spa_min_claim_txg(zilog->zl_spa) ?
"will claim" : "won't claim"); "will claim" : "won't claim");
print_log_bp(bp, tab_prefix); print_log_bp(bp, tab_prefix);
@ -186,7 +186,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg)
(void) printf("%s<hole>\n", tab_prefix); (void) printf("%s<hole>\n", tab_prefix);
return; return;
} }
if (bp->blk_birth < zilog->zl_header->zh_claim_txg) { if (BP_GET_LOGICAL_BIRTH(bp) < zilog->zl_header->zh_claim_txg) {
(void) printf("%s<block already committed>\n", (void) printf("%s<block already committed>\n",
tab_prefix); tab_prefix);
return; return;
@ -237,8 +237,8 @@ zil_prt_rec_write_enc(zilog_t *zilog, int txtype, const void *arg)
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
(void) printf("%shas blkptr, %s\n", tab_prefix, (void) printf("%shas blkptr, %s\n", tab_prefix,
!BP_IS_HOLE(bp) && !BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) >=
bp->blk_birth >= spa_min_claim_txg(zilog->zl_spa) ? spa_min_claim_txg(zilog->zl_spa) ?
"will claim" : "won't claim"); "will claim" : "won't claim");
print_log_bp(bp, tab_prefix); print_log_bp(bp, tab_prefix);
} }
@ -473,7 +473,7 @@ print_log_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
if (claim_txg != 0) if (claim_txg != 0)
claim = "already claimed"; claim = "already claimed";
else if (bp->blk_birth >= spa_min_claim_txg(zilog->zl_spa)) else if (BP_GET_LOGICAL_BIRTH(bp) >= spa_min_claim_txg(zilog->zl_spa))
claim = "will claim"; claim = "will claim";
else else
claim = "won't claim"; claim = "won't claim";

View File

@ -612,8 +612,8 @@ zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l)
* Uberblock root block pointer has valid birth TXG. * Uberblock root block pointer has valid birth TXG.
* Copying it to the label NVlist * Copying it to the label NVlist
*/ */
if (ub->ub_rootbp.blk_birth != 0) { if (BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp) != 0) {
const uint64_t txg = ub->ub_rootbp.blk_birth; const uint64_t txg = BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp);
ub->ub_txg = txg; ub->ub_txg = txg;
if (nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG) != 0) { if (nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG) != 0) {

View File

@ -458,7 +458,7 @@ static char *zpool_sysfs_gets(char *path)
} }
/* Remove trailing newline */ /* Remove trailing newline */
if (buf[count - 1] == '\n') if (count > 0 && buf[count - 1] == '\n')
buf[count - 1] = 0; buf[count - 1] = 0;
close(fd); close(fd);

View File

@ -22,7 +22,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright (c) 2012 by Frederik Wessels. All rights reserved. * Copyright (c) 2012 by Frederik Wessels. All rights reserved.
* Copyright (c) 2012 by Cyril Plisko. All rights reserved. * Copyright (c) 2012 by Cyril Plisko. All rights reserved.
* Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved. * Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved.
@ -131,6 +131,13 @@ static int zpool_do_help(int argc, char **argv);
static zpool_compat_status_t zpool_do_load_compat( static zpool_compat_status_t zpool_do_load_compat(
const char *, boolean_t *); const char *, boolean_t *);
enum zpool_options {
ZPOOL_OPTION_POWER = 1024,
ZPOOL_OPTION_ALLOW_INUSE,
ZPOOL_OPTION_ALLOW_REPLICATION_MISMATCH,
ZPOOL_OPTION_ALLOW_ASHIFT_MISMATCH
};
/* /*
* These libumem hooks provide a reasonable set of defaults for the allocator's * These libumem hooks provide a reasonable set of defaults for the allocator's
* debugging facilities. * debugging facilities.
@ -347,7 +354,7 @@ get_usage(zpool_help_t idx)
{ {
switch (idx) { switch (idx) {
case HELP_ADD: case HELP_ADD:
return (gettext("\tadd [-fgLnP] [-o property=value] " return (gettext("\tadd [-afgLnP] [-o property=value] "
"<pool> <vdev> ...\n")); "<pool> <vdev> ...\n"));
case HELP_ATTACH: case HELP_ATTACH:
return (gettext("\tattach [-fsw] [-o property=value] " return (gettext("\tattach [-fsw] [-o property=value] "
@ -413,7 +420,7 @@ get_usage(zpool_help_t idx)
"[<device> ...]\n")); "[<device> ...]\n"));
case HELP_STATUS: case HELP_STATUS:
return (gettext("\tstatus [--power] [-c [script1,script2,...]] " return (gettext("\tstatus [--power] [-c [script1,script2,...]] "
"[-igLpPstvxD] [-T d|u] [pool] ... \n" "[-DegiLpPstvx] [-T d|u] [pool] ...\n"
"\t [interval [count]]\n")); "\t [interval [count]]\n"));
case HELP_UPGRADE: case HELP_UPGRADE:
return (gettext("\tupgrade\n" return (gettext("\tupgrade\n"
@ -1009,8 +1016,9 @@ add_prop_list_default(const char *propname, const char *propval,
} }
/* /*
* zpool add [-fgLnP] [-o property=value] <pool> <vdev> ... * zpool add [-afgLnP] [-o property=value] <pool> <vdev> ...
* *
* -a Disable the ashift validation checks
* -f Force addition of devices, even if they appear in use * -f Force addition of devices, even if they appear in use
* -g Display guid for individual vdev name. * -g Display guid for individual vdev name.
* -L Follow links when resolving vdev path name. * -L Follow links when resolving vdev path name.
@ -1026,8 +1034,11 @@ add_prop_list_default(const char *propname, const char *propval,
int int
zpool_do_add(int argc, char **argv) zpool_do_add(int argc, char **argv)
{ {
boolean_t force = B_FALSE; boolean_t check_replication = B_TRUE;
boolean_t check_inuse = B_TRUE;
boolean_t dryrun = B_FALSE; boolean_t dryrun = B_FALSE;
boolean_t check_ashift = B_TRUE;
boolean_t force = B_FALSE;
int name_flags = 0; int name_flags = 0;
int c; int c;
nvlist_t *nvroot; nvlist_t *nvroot;
@ -1038,8 +1049,18 @@ zpool_do_add(int argc, char **argv)
nvlist_t *props = NULL; nvlist_t *props = NULL;
char *propval; char *propval;
struct option long_options[] = {
{"allow-in-use", no_argument, NULL, ZPOOL_OPTION_ALLOW_INUSE},
{"allow-replication-mismatch", no_argument, NULL,
ZPOOL_OPTION_ALLOW_REPLICATION_MISMATCH},
{"allow-ashift-mismatch", no_argument, NULL,
ZPOOL_OPTION_ALLOW_ASHIFT_MISMATCH},
{0, 0, 0, 0}
};
/* check options */ /* check options */
while ((c = getopt(argc, argv, "fgLno:P")) != -1) { while ((c = getopt_long(argc, argv, "fgLno:P", long_options, NULL))
!= -1) {
switch (c) { switch (c) {
case 'f': case 'f':
force = B_TRUE; force = B_TRUE;
@ -1069,6 +1090,15 @@ zpool_do_add(int argc, char **argv)
case 'P': case 'P':
name_flags |= VDEV_NAME_PATH; name_flags |= VDEV_NAME_PATH;
break; break;
case ZPOOL_OPTION_ALLOW_INUSE:
check_inuse = B_FALSE;
break;
case ZPOOL_OPTION_ALLOW_REPLICATION_MISMATCH:
check_replication = B_FALSE;
break;
case ZPOOL_OPTION_ALLOW_ASHIFT_MISMATCH:
check_ashift = B_FALSE;
break;
case '?': case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"), (void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt); optopt);
@ -1089,6 +1119,19 @@ zpool_do_add(int argc, char **argv)
usage(B_FALSE); usage(B_FALSE);
} }
if (force) {
if (!check_inuse || !check_replication || !check_ashift) {
(void) fprintf(stderr, gettext("'-f' option is not "
"allowed with '--allow-replication-mismatch', "
"'--allow-ashift-mismatch', or "
"'--allow-in-use'\n"));
usage(B_FALSE);
}
check_inuse = B_FALSE;
check_replication = B_FALSE;
check_ashift = B_FALSE;
}
poolname = argv[0]; poolname = argv[0];
argc--; argc--;
@ -1119,8 +1162,8 @@ zpool_do_add(int argc, char **argv)
} }
/* pass off to make_root_vdev for processing */ /* pass off to make_root_vdev for processing */
nvroot = make_root_vdev(zhp, props, force, !force, B_FALSE, dryrun, nvroot = make_root_vdev(zhp, props, !check_inuse,
argc, argv); check_replication, B_FALSE, dryrun, argc, argv);
if (nvroot == NULL) { if (nvroot == NULL) {
zpool_close(zhp); zpool_close(zhp);
return (1); return (1);
@ -1224,7 +1267,7 @@ zpool_do_add(int argc, char **argv)
ret = 0; ret = 0;
} else { } else {
ret = (zpool_add(zhp, nvroot) != 0); ret = (zpool_add(zhp, nvroot, check_ashift) != 0);
} }
nvlist_free(props); nvlist_free(props);
@ -7081,7 +7124,6 @@ zpool_do_split(int argc, char **argv)
return (ret); return (ret);
} }
#define POWER_OPT 1024
/* /*
* zpool online [--power] <pool> <device> ... * zpool online [--power] <pool> <device> ...
@ -7099,7 +7141,7 @@ zpool_do_online(int argc, char **argv)
int flags = 0; int flags = 0;
boolean_t is_power_on = B_FALSE; boolean_t is_power_on = B_FALSE;
struct option long_options[] = { struct option long_options[] = {
{"power", no_argument, NULL, POWER_OPT}, {"power", no_argument, NULL, ZPOOL_OPTION_POWER},
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
@ -7109,7 +7151,7 @@ zpool_do_online(int argc, char **argv)
case 'e': case 'e':
flags |= ZFS_ONLINE_EXPAND; flags |= ZFS_ONLINE_EXPAND;
break; break;
case POWER_OPT: case ZPOOL_OPTION_POWER:
is_power_on = B_TRUE; is_power_on = B_TRUE;
break; break;
case '?': case '?':
@ -7222,7 +7264,7 @@ zpool_do_offline(int argc, char **argv)
boolean_t is_power_off = B_FALSE; boolean_t is_power_off = B_FALSE;
struct option long_options[] = { struct option long_options[] = {
{"power", no_argument, NULL, POWER_OPT}, {"power", no_argument, NULL, ZPOOL_OPTION_POWER},
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
@ -7235,7 +7277,7 @@ zpool_do_offline(int argc, char **argv)
case 't': case 't':
istmp = B_TRUE; istmp = B_TRUE;
break; break;
case POWER_OPT: case ZPOOL_OPTION_POWER:
is_power_off = B_TRUE; is_power_off = B_TRUE;
break; break;
case '?': case '?':
@ -7335,7 +7377,7 @@ zpool_do_clear(int argc, char **argv)
char *pool, *device; char *pool, *device;
struct option long_options[] = { struct option long_options[] = {
{"power", no_argument, NULL, POWER_OPT}, {"power", no_argument, NULL, ZPOOL_OPTION_POWER},
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
@ -7352,7 +7394,7 @@ zpool_do_clear(int argc, char **argv)
case 'X': case 'X':
xtreme_rewind = B_TRUE; xtreme_rewind = B_TRUE;
break; break;
case POWER_OPT: case ZPOOL_OPTION_POWER:
is_power_on = B_TRUE; is_power_on = B_TRUE;
break; break;
case '?': case '?':
@ -9177,22 +9219,22 @@ status_callback(zpool_handle_t *zhp, void *data)
} }
/* /*
* zpool status [-c [script1,script2,...]] [-igLpPstvx] [--power] [-T d|u] ... * zpool status [-c [script1,script2,...]] [-DegiLpPstvx] [--power] [-T d|u] ...
* [pool] [interval [count]] * [pool] [interval [count]]
* *
* -c CMD For each vdev, run command CMD * -c CMD For each vdev, run command CMD
* -D Display dedup status (undocumented)
* -e Display only unhealthy vdevs * -e Display only unhealthy vdevs
* -i Display vdev initialization status.
* -g Display guid for individual vdev name. * -g Display guid for individual vdev name.
* -i Display vdev initialization status.
* -L Follow links when resolving vdev path name. * -L Follow links when resolving vdev path name.
* -p Display values in parsable (exact) format. * -p Display values in parsable (exact) format.
* -P Display full path for vdev name. * -P Display full path for vdev name.
* -s Display slow IOs column. * -s Display slow IOs column.
* -v Display complete error logs
* -x Display only pools with potential problems
* -D Display dedup status (undocumented)
* -t Display vdev TRIM status. * -t Display vdev TRIM status.
* -T Display a timestamp in date(1) or Unix format * -T Display a timestamp in date(1) or Unix format
* -v Display complete error logs
* -x Display only pools with potential problems
* --power Display vdev enclosure slot power status * --power Display vdev enclosure slot power status
* *
* Describes the health status of all pools or some subset. * Describes the health status of all pools or some subset.
@ -9208,12 +9250,12 @@ zpool_do_status(int argc, char **argv)
char *cmd = NULL; char *cmd = NULL;
struct option long_options[] = { struct option long_options[] = {
{"power", no_argument, NULL, POWER_OPT}, {"power", no_argument, NULL, ZPOOL_OPTION_POWER},
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
/* check options */ /* check options */
while ((c = getopt_long(argc, argv, "c:eigLpPsvxDtT:", long_options, while ((c = getopt_long(argc, argv, "c:DegiLpPstT:vx", long_options,
NULL)) != -1) { NULL)) != -1) {
switch (c) { switch (c) {
case 'c': case 'c':
@ -9240,15 +9282,18 @@ zpool_do_status(int argc, char **argv)
} }
cmd = optarg; cmd = optarg;
break; break;
case 'D':
cb.cb_dedup_stats = B_TRUE;
break;
case 'e': case 'e':
cb.cb_print_unhealthy = B_TRUE; cb.cb_print_unhealthy = B_TRUE;
break; break;
case 'i':
cb.cb_print_vdev_init = B_TRUE;
break;
case 'g': case 'g':
cb.cb_name_flags |= VDEV_NAME_GUID; cb.cb_name_flags |= VDEV_NAME_GUID;
break; break;
case 'i':
cb.cb_print_vdev_init = B_TRUE;
break;
case 'L': case 'L':
cb.cb_name_flags |= VDEV_NAME_FOLLOW_LINKS; cb.cb_name_flags |= VDEV_NAME_FOLLOW_LINKS;
break; break;
@ -9261,22 +9306,19 @@ zpool_do_status(int argc, char **argv)
case 's': case 's':
cb.cb_print_slow_ios = B_TRUE; cb.cb_print_slow_ios = B_TRUE;
break; break;
case 'v':
cb.cb_verbose = B_TRUE;
break;
case 'x':
cb.cb_explain = B_TRUE;
break;
case 'D':
cb.cb_dedup_stats = B_TRUE;
break;
case 't': case 't':
cb.cb_print_vdev_trim = B_TRUE; cb.cb_print_vdev_trim = B_TRUE;
break; break;
case 'T': case 'T':
get_timestamp_arg(*optarg); get_timestamp_arg(*optarg);
break; break;
case POWER_OPT: case 'v':
cb.cb_verbose = B_TRUE;
break;
case 'x':
cb.cb_explain = B_TRUE;
break;
case ZPOOL_OPTION_POWER:
cb.cb_print_power = B_TRUE; cb.cb_print_power = B_TRUE;
break; break;
case '?': case '?':
@ -9315,7 +9357,6 @@ zpool_do_status(int argc, char **argv)
if (cb.vcdl != NULL) if (cb.vcdl != NULL)
free_vdev_cmd_data_list(cb.vcdl); free_vdev_cmd_data_list(cb.vcdl);
if (argc == 0 && cb.cb_count == 0) if (argc == 0 && cb.cb_count == 0)
(void) fprintf(stderr, gettext("no pools available\n")); (void) fprintf(stderr, gettext("no pools available\n"));
else if (cb.cb_explain && cb.cb_first && cb.cb_allpools) else if (cb.cb_explain && cb.cb_first && cb.cb_allpools)

View File

@ -20,7 +20,7 @@
*/ */
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Integros [integros.com] * Copyright (c) 2014 Integros [integros.com]
@ -3375,7 +3375,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
"log" : NULL, raidz_children, zs->zs_mirrors, "log" : NULL, raidz_children, zs->zs_mirrors,
1); 1);
error = spa_vdev_add(spa, nvroot); error = spa_vdev_add(spa, nvroot, B_FALSE);
fnvlist_free(nvroot); fnvlist_free(nvroot);
switch (error) { switch (error) {
@ -3438,7 +3438,7 @@ ztest_vdev_class_add(ztest_ds_t *zd, uint64_t id)
nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0, nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
class, raidz_children, zs->zs_mirrors, 1); class, raidz_children, zs->zs_mirrors, 1);
error = spa_vdev_add(spa, nvroot); error = spa_vdev_add(spa, nvroot, B_FALSE);
fnvlist_free(nvroot); fnvlist_free(nvroot);
if (error == ENOSPC) if (error == ENOSPC)
@ -3545,7 +3545,7 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
*/ */
nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL, nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL,
(ztest_opts.zo_vdev_size * 5) / 4, 0, NULL, 0, 0, 1); (ztest_opts.zo_vdev_size * 5) / 4, 0, NULL, 0, 0, 1);
error = spa_vdev_add(spa, nvroot); error = spa_vdev_add(spa, nvroot, B_FALSE);
switch (error) { switch (error) {
case 0: case 0:

View File

@ -4,6 +4,7 @@ dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_FILEMAP], [ AC_DEFUN([ZFS_AC_KERNEL_SRC_FILEMAP], [
ZFS_LINUX_TEST_SRC([filemap_range_has_page], [ ZFS_LINUX_TEST_SRC([filemap_range_has_page], [
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/pagemap.h>
],[ ],[
struct address_space *mapping = NULL; struct address_space *mapping = NULL;
loff_t lstart = 0; loff_t lstart = 0;

View File

@ -0,0 +1,17 @@
AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE], [
ZFS_LINUX_TEST_SRC([page_size], [
#include <linux/mm.h>
],[
unsigned long s;
s = page_size(NULL);
])
])
AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_SIZE], [
AC_MSG_CHECKING([whether page_size() is available])
ZFS_LINUX_TEST_RESULT([page_size], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_MM_PAGE_SIZE, 1, [page_size() is available])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -16,6 +16,9 @@ dnl #
dnl # 5.3: VFS copy_file_range() expected to do its own fallback, dnl # 5.3: VFS copy_file_range() expected to do its own fallback,
dnl # generic_copy_file_range() added to support it dnl # generic_copy_file_range() added to support it
dnl # dnl #
dnl # 6.8: generic_copy_file_range() removed, replaced by
dnl # splice_copy_file_range()
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE], [ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE], [
ZFS_LINUX_TEST_SRC([vfs_copy_file_range], [ ZFS_LINUX_TEST_SRC([vfs_copy_file_range], [
#include <linux/fs.h> #include <linux/fs.h>
@ -72,6 +75,30 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE], [
]) ])
]) ])
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_SPLICE_COPY_FILE_RANGE], [
ZFS_LINUX_TEST_SRC([splice_copy_file_range], [
#include <linux/splice.h>
], [
struct file *src_file __attribute__ ((unused)) = NULL;
loff_t src_off __attribute__ ((unused)) = 0;
struct file *dst_file __attribute__ ((unused)) = NULL;
loff_t dst_off __attribute__ ((unused)) = 0;
size_t len __attribute__ ((unused)) = 0;
splice_copy_file_range(src_file, src_off, dst_file, dst_off,
len);
])
])
AC_DEFUN([ZFS_AC_KERNEL_VFS_SPLICE_COPY_FILE_RANGE], [
AC_MSG_CHECKING([whether splice_copy_file_range() is available])
ZFS_LINUX_TEST_RESULT([splice_copy_file_range], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_VFS_SPLICE_COPY_FILE_RANGE, 1,
[splice_copy_file_range() is available])
],[
AC_MSG_RESULT(no)
])
])
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE], [ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE], [
ZFS_LINUX_TEST_SRC([vfs_clone_file_range], [ ZFS_LINUX_TEST_SRC([vfs_clone_file_range], [
#include <linux/fs.h> #include <linux/fs.h>

View File

@ -118,6 +118,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_VFS_IOV_ITER ZFS_AC_KERNEL_SRC_VFS_IOV_ITER
ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE
ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE
ZFS_AC_KERNEL_SRC_VFS_SPLICE_COPY_FILE_RANGE
ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE
ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE
ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE
@ -166,6 +167,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ
ZFS_AC_KERNEL_SRC_SYNC_BDEV ZFS_AC_KERNEL_SRC_SYNC_BDEV
ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE
case "$host_cpu" in case "$host_cpu" in
powerpc*) powerpc*)
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@ -266,6 +268,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_VFS_IOV_ITER ZFS_AC_KERNEL_VFS_IOV_ITER
ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE
ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE
ZFS_AC_KERNEL_VFS_SPLICE_COPY_FILE_RANGE
ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE
ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE
ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE
@ -314,6 +317,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
ZFS_AC_KERNEL_COPY_SPLICE_READ ZFS_AC_KERNEL_COPY_SPLICE_READ
ZFS_AC_KERNEL_SYNC_BDEV ZFS_AC_KERNEL_SYNC_BDEV
ZFS_AC_KERNEL_MM_PAGE_SIZE
case "$host_cpu" in case "$host_cpu" in
powerpc*) powerpc*)
ZFS_AC_KERNEL_CPU_HAS_FEATURE ZFS_AC_KERNEL_CPU_HAS_FEATURE

View File

@ -21,7 +21,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2022 by Delphix. All rights reserved. * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright Joyent, Inc. * Copyright Joyent, Inc.
* Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2016, Intel Corporation. * Copyright (c) 2016, Intel Corporation.
@ -158,6 +158,7 @@ typedef enum zfs_error {
EZFS_RESUME_EXISTS, /* Resume on existing dataset without force */ EZFS_RESUME_EXISTS, /* Resume on existing dataset without force */
EZFS_SHAREFAILED, /* filesystem share failed */ EZFS_SHAREFAILED, /* filesystem share failed */
EZFS_RAIDZ_EXPAND_IN_PROGRESS, /* a raidz is currently expanding */ EZFS_RAIDZ_EXPAND_IN_PROGRESS, /* a raidz is currently expanding */
EZFS_ASHIFT_MISMATCH, /* can't add vdevs with different ashifts */
EZFS_UNKNOWN EZFS_UNKNOWN
} zfs_error_t; } zfs_error_t;
@ -261,7 +262,7 @@ _LIBZFS_H boolean_t zpool_skip_pool(const char *);
_LIBZFS_H int zpool_create(libzfs_handle_t *, const char *, nvlist_t *, _LIBZFS_H int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
nvlist_t *, nvlist_t *); nvlist_t *, nvlist_t *);
_LIBZFS_H int zpool_destroy(zpool_handle_t *, const char *); _LIBZFS_H int zpool_destroy(zpool_handle_t *, const char *);
_LIBZFS_H int zpool_add(zpool_handle_t *, nvlist_t *); _LIBZFS_H int zpool_add(zpool_handle_t *, nvlist_t *, boolean_t check_ashift);
typedef struct splitflags { typedef struct splitflags {
/* do not split, but return the config that would be split off */ /* do not split, but return the config that would be split off */

View File

@ -80,7 +80,9 @@ noinst_HEADERS = \
%D%/spl/sys/zmod.h \ %D%/spl/sys/zmod.h \
%D%/spl/sys/zone.h \ %D%/spl/sys/zone.h \
\ \
%D%/zfs/sys/arc_os.h \
%D%/zfs/sys/freebsd_crypto.h \ %D%/zfs/sys/freebsd_crypto.h \
%D%/zfs/sys/freebsd_event.h \
%D%/zfs/sys/vdev_os.h \ %D%/zfs/sys/vdev_os.h \
%D%/zfs/sys/zfs_bootenv_os.h \ %D%/zfs/sys/zfs_bootenv_os.h \
%D%/zfs/sys/zfs_context_os.h \ %D%/zfs/sys/zfs_context_os.h \

View File

@ -5,6 +5,7 @@ kernel_linux_HEADERS = \
%D%/kernel/linux/compiler_compat.h \ %D%/kernel/linux/compiler_compat.h \
%D%/kernel/linux/dcache_compat.h \ %D%/kernel/linux/dcache_compat.h \
%D%/kernel/linux/kmap_compat.h \ %D%/kernel/linux/kmap_compat.h \
%D%/kernel/linux/mm_compat.h \
%D%/kernel/linux/mod_compat.h \ %D%/kernel/linux/mod_compat.h \
%D%/kernel/linux/page_compat.h \ %D%/kernel/linux/page_compat.h \
%D%/kernel/linux/percpu_compat.h \ %D%/kernel/linux/percpu_compat.h \

View File

@ -0,0 +1,36 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2023, 2024, Klara Inc.
*/
#ifndef _ZFS_MM_COMPAT_H
#define _ZFS_MM_COMPAT_H
#include <linux/mm.h>
/* 5.4 introduced page_size(). Older kernels can use a trivial macro instead */
#ifndef HAVE_MM_PAGE_SIZE
#define page_size(p) ((unsigned long)(PAGE_SIZE << compound_order(p)))
#endif
#endif /* _ZFS_MM_COMPAT_H */

View File

@ -68,6 +68,7 @@ enum scope_prefix_types {
zfs_trim, zfs_trim,
zfs_txg, zfs_txg,
zfs_vdev, zfs_vdev,
zfs_vdev_disk,
zfs_vdev_file, zfs_vdev_file,
zfs_vdev_mirror, zfs_vdev_mirror,
zfs_vnops, zfs_vnops,

View File

@ -79,6 +79,9 @@ typedef struct abd {
typedef int abd_iter_func_t(void *buf, size_t len, void *priv); typedef int abd_iter_func_t(void *buf, size_t len, void *priv);
typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *priv); typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *priv);
#if defined(__linux__) && defined(_KERNEL)
typedef int abd_iter_page_func_t(struct page *, size_t, size_t, void *);
#endif
extern int zfs_abd_scatter_enabled; extern int zfs_abd_scatter_enabled;
@ -125,6 +128,10 @@ void abd_release_ownership_of_buf(abd_t *);
int abd_iterate_func(abd_t *, size_t, size_t, abd_iter_func_t *, void *); int abd_iterate_func(abd_t *, size_t, size_t, abd_iter_func_t *, void *);
int abd_iterate_func2(abd_t *, abd_t *, size_t, size_t, size_t, int abd_iterate_func2(abd_t *, abd_t *, size_t, size_t, size_t,
abd_iter_func2_t *, void *); abd_iter_func2_t *, void *);
#if defined(__linux__) && defined(_KERNEL)
int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *,
void *);
#endif
void abd_copy_off(abd_t *, abd_t *, size_t, size_t, size_t); void abd_copy_off(abd_t *, abd_t *, size_t, size_t, size_t);
void abd_copy_from_buf_off(abd_t *, const void *, size_t, size_t); void abd_copy_from_buf_off(abd_t *, const void *, size_t, size_t);
void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t); void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t);
@ -213,6 +220,8 @@ void abd_fini(void);
/* /*
* Linux ABD bio functions * Linux ABD bio functions
* Note: these are only needed to support vdev_classic. See comment in
* vdev_disk.c.
*/ */
#if defined(__linux__) && defined(_KERNEL) #if defined(__linux__) && defined(_KERNEL)
unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t); unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t);

View File

@ -21,6 +21,7 @@
/* /*
* Copyright (c) 2014 by Chunwei Chen. All rights reserved. * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
* Copyright (c) 2016, 2019 by Delphix. All rights reserved. * Copyright (c) 2016, 2019 by Delphix. All rights reserved.
* Copyright (c) 2023, 2024, Klara Inc.
*/ */
#ifndef _ABD_IMPL_H #ifndef _ABD_IMPL_H
@ -38,12 +39,30 @@ typedef enum abd_stats_op {
ABDSTAT_DECR /* Decrease abdstat values */ ABDSTAT_DECR /* Decrease abdstat values */
} abd_stats_op_t; } abd_stats_op_t;
struct scatterlist; /* forward declaration */ /* forward declarations */
struct scatterlist;
struct page;
struct abd_iter { struct abd_iter {
/* public interface */ /* public interface */
void *iter_mapaddr; /* addr corresponding to iter_pos */ union {
size_t iter_mapsize; /* length of data valid at mapaddr */ /* for abd_iter_map()/abd_iter_unmap() */
struct {
/* addr corresponding to iter_pos */
void *iter_mapaddr;
/* length of data valid at mapaddr */
size_t iter_mapsize;
};
/* for abd_iter_page() */
struct {
/* current page */
struct page *iter_page;
/* offset of data in page */
size_t iter_page_doff;
/* size of data in page */
size_t iter_page_dsize;
};
};
/* private */ /* private */
abd_t *iter_abd; /* ABD being iterated through */ abd_t *iter_abd; /* ABD being iterated through */
@ -78,6 +97,7 @@ boolean_t abd_iter_at_end(struct abd_iter *);
void abd_iter_advance(struct abd_iter *, size_t); void abd_iter_advance(struct abd_iter *, size_t);
void abd_iter_map(struct abd_iter *); void abd_iter_map(struct abd_iter *);
void abd_iter_unmap(struct abd_iter *); void abd_iter_unmap(struct abd_iter *);
void abd_iter_page(struct abd_iter *);
/* /*
* Helper macros * Helper macros

View File

@ -752,8 +752,6 @@ void dmu_buf_sub_user_size(dmu_buf_t *db, uint64_t nsub);
void *dmu_buf_get_user(dmu_buf_t *db); void *dmu_buf_get_user(dmu_buf_t *db);
objset_t *dmu_buf_get_objset(dmu_buf_t *db); objset_t *dmu_buf_get_objset(dmu_buf_t *db);
dnode_t *dmu_buf_dnode_enter(dmu_buf_t *db);
void dmu_buf_dnode_exit(dmu_buf_t *db);
/* Block until any in-progress dmu buf user evictions complete. */ /* Block until any in-progress dmu buf user evictions complete. */
void dmu_buf_user_evict_wait(void); void dmu_buf_user_evict_wait(void);
@ -902,6 +900,8 @@ extern uint_t zfs_max_recordsize;
*/ */
void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
uint64_t len, enum zio_priority pri); uint64_t len, enum zio_priority pri);
void dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
uint64_t len, enum zio_priority pri);
void dmu_prefetch_dnode(objset_t *os, uint64_t object, enum zio_priority pri); void dmu_prefetch_dnode(objset_t *os, uint64_t object, enum zio_priority pri);
typedef struct dmu_object_info { typedef struct dmu_object_info {

View File

@ -21,7 +21,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, 2017 Joyent, Inc. All rights reserved. * Copyright (c) 2013, 2017 Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Integros [integros.com] * Copyright (c) 2014 Integros [integros.com]
@ -1603,6 +1603,7 @@ typedef enum {
ZFS_ERR_RESUME_EXISTS, ZFS_ERR_RESUME_EXISTS,
ZFS_ERR_CRYPTO_NOTSUP, ZFS_ERR_CRYPTO_NOTSUP,
ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS, ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS,
ZFS_ERR_ASHIFT_MISMATCH,
} zfs_errno_t; } zfs_errno_t;
/* /*

View File

@ -20,7 +20,7 @@
*/ */
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2021 by Delphix. All rights reserved. * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved.
@ -125,15 +125,15 @@ typedef struct zio_cksum_salt {
* *
* 64 56 48 40 32 24 16 8 0 * 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+ * +-------+-------+-------+-------+-------+-------+-------+-------+
* 0 | pad | vdev1 | GRID | ASIZE | * 0 | pad | vdev1 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+ * +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 | * 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+ * +-------+-------+-------+-------+-------+-------+-------+-------+
* 2 | pad | vdev2 | GRID | ASIZE | * 2 | pad | vdev2 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+ * +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 | * 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+ * +-------+-------+-------+-------+-------+-------+-------+-------+
* 4 | pad | vdev3 | GRID | ASIZE | * 4 | pad | vdev3 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+ * +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 |G| offset3 | * 5 |G| offset3 |
* +-------+-------+-------+-------+-------+-------+-------+-------+ * +-------+-------+-------+-------+-------+-------+-------+-------+
@ -165,7 +165,6 @@ typedef struct zio_cksum_salt {
* LSIZE logical size * LSIZE logical size
* PSIZE physical size (after compression) * PSIZE physical size (after compression)
* ASIZE allocated size (including RAID-Z parity and gang block headers) * ASIZE allocated size (including RAID-Z parity and gang block headers)
* GRID RAID-Z layout information (reserved for future use)
* cksum checksum function * cksum checksum function
* comp compression function * comp compression function
* G gang block indicator * G gang block indicator
@ -190,11 +189,11 @@ typedef struct zio_cksum_salt {
* *
* 64 56 48 40 32 24 16 8 0 * 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+ * +-------+-------+-------+-------+-------+-------+-------+-------+
* 0 | vdev1 | GRID | ASIZE | * 0 | vdev1 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+ * +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 | * 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+ * +-------+-------+-------+-------+-------+-------+-------+-------+
* 2 | vdev2 | GRID | ASIZE | * 2 | vdev2 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+ * +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 | * 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+ * +-------+-------+-------+-------+-------+-------+-------+-------+
@ -355,7 +354,7 @@ typedef enum bp_embedded_type {
#define BPE_NUM_WORDS 14 #define BPE_NUM_WORDS 14
#define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t)) #define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t))
#define BPE_IS_PAYLOADWORD(bp, wp) \ #define BPE_IS_PAYLOADWORD(bp, wp) \
((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth) ((wp) != &(bp)->blk_prop && (wp) != (&(bp)->blk_birth_word[1]))
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */ #define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */ #define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
@ -374,8 +373,7 @@ typedef struct blkptr {
dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */ dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
uint64_t blk_prop; /* size, compression, type, etc */ uint64_t blk_prop; /* size, compression, type, etc */
uint64_t blk_pad[2]; /* Extra space for the future */ uint64_t blk_pad[2]; /* Extra space for the future */
uint64_t blk_phys_birth; /* txg when block was allocated */ uint64_t blk_birth_word[2];
uint64_t blk_birth; /* transaction group at birth */
uint64_t blk_fill; /* fill count */ uint64_t blk_fill; /* fill count */
zio_cksum_t blk_cksum; /* 256-bit checksum */ zio_cksum_t blk_cksum; /* 256-bit checksum */
} blkptr_t; } blkptr_t;
@ -395,9 +393,6 @@ typedef struct blkptr {
BF64_SET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, \ BF64_SET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, \
SPA_MINBLOCKSHIFT, 0, x) SPA_MINBLOCKSHIFT, 0, x)
#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8)
#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x)
#define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, SPA_VDEVBITS) #define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, SPA_VDEVBITS)
#define DVA_SET_VDEV(dva, x) \ #define DVA_SET_VDEV(dva, x) \
BF64_SET((dva)->dva_word[0], 32, SPA_VDEVBITS, x) BF64_SET((dva)->dva_word[0], 32, SPA_VDEVBITS, x)
@ -480,15 +475,23 @@ typedef struct blkptr {
#define BP_GET_FREE(bp) BF64_GET((bp)->blk_fill, 0, 1) #define BP_GET_FREE(bp) BF64_GET((bp)->blk_fill, 0, 1)
#define BP_SET_FREE(bp, x) BF64_SET((bp)->blk_fill, 0, 1, x) #define BP_SET_FREE(bp, x) BF64_SET((bp)->blk_fill, 0, 1, x)
#define BP_PHYSICAL_BIRTH(bp) \ #define BP_GET_LOGICAL_BIRTH(bp) (bp)->blk_birth_word[1]
#define BP_SET_LOGICAL_BIRTH(bp, x) ((bp)->blk_birth_word[1] = (x))
#define BP_GET_PHYSICAL_BIRTH(bp) (bp)->blk_birth_word[0]
#define BP_SET_PHYSICAL_BIRTH(bp, x) ((bp)->blk_birth_word[0] = (x))
#define BP_GET_BIRTH(bp) \
(BP_IS_EMBEDDED(bp) ? 0 : \ (BP_IS_EMBEDDED(bp) ? 0 : \
(bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth) BP_GET_PHYSICAL_BIRTH(bp) ? BP_GET_PHYSICAL_BIRTH(bp) : \
BP_GET_LOGICAL_BIRTH(bp))
#define BP_SET_BIRTH(bp, logical, physical) \ #define BP_SET_BIRTH(bp, logical, physical) \
{ \ { \
ASSERT(!BP_IS_EMBEDDED(bp)); \ ASSERT(!BP_IS_EMBEDDED(bp)); \
(bp)->blk_birth = (logical); \ BP_SET_LOGICAL_BIRTH(bp, logical); \
(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \ BP_SET_PHYSICAL_BIRTH(bp, \
((logical) == (physical) ? 0 : (physical))); \
} }
#define BP_GET_FILL(bp) \ #define BP_GET_FILL(bp) \
@ -541,8 +544,8 @@ typedef struct blkptr {
(dva1)->dva_word[0] == (dva2)->dva_word[0]) (dva1)->dva_word[0] == (dva2)->dva_word[0])
#define BP_EQUAL(bp1, bp2) \ #define BP_EQUAL(bp1, bp2) \
(BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) && \ (BP_GET_BIRTH(bp1) == BP_GET_BIRTH(bp2) && \
(bp1)->blk_birth == (bp2)->blk_birth && \ BP_GET_LOGICAL_BIRTH(bp1) == BP_GET_LOGICAL_BIRTH(bp2) && \
DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \ DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \
DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \ DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \
DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2])) DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2]))
@ -581,8 +584,8 @@ typedef struct blkptr {
(bp)->blk_prop = 0; \ (bp)->blk_prop = 0; \
(bp)->blk_pad[0] = 0; \ (bp)->blk_pad[0] = 0; \
(bp)->blk_pad[1] = 0; \ (bp)->blk_pad[1] = 0; \
(bp)->blk_phys_birth = 0; \ (bp)->blk_birth_word[0] = 0; \
(bp)->blk_birth = 0; \ (bp)->blk_birth_word[1] = 0; \
(bp)->blk_fill = 0; \ (bp)->blk_fill = 0; \
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \ ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
} }
@ -631,7 +634,7 @@ typedef struct blkptr {
(u_longlong_t)BP_GET_LEVEL(bp), \ (u_longlong_t)BP_GET_LEVEL(bp), \
type, \ type, \
(u_longlong_t)BP_GET_LSIZE(bp), \ (u_longlong_t)BP_GET_LSIZE(bp), \
(u_longlong_t)bp->blk_birth); \ (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp)); \
} else if (BP_IS_EMBEDDED(bp)) { \ } else if (BP_IS_EMBEDDED(bp)) { \
len = func(buf + len, size - len, \ len = func(buf + len, size - len, \
"EMBEDDED [L%llu %s] et=%u %s " \ "EMBEDDED [L%llu %s] et=%u %s " \
@ -642,14 +645,14 @@ typedef struct blkptr {
compress, \ compress, \
(u_longlong_t)BPE_GET_LSIZE(bp), \ (u_longlong_t)BPE_GET_LSIZE(bp), \
(u_longlong_t)BPE_GET_PSIZE(bp), \ (u_longlong_t)BPE_GET_PSIZE(bp), \
(u_longlong_t)bp->blk_birth); \ (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp)); \
} else if (BP_IS_REDACTED(bp)) { \ } else if (BP_IS_REDACTED(bp)) { \
len += func(buf + len, size - len, \ len += func(buf + len, size - len, \
"REDACTED [L%llu %s] size=%llxL birth=%lluL", \ "REDACTED [L%llu %s] size=%llxL birth=%lluL", \
(u_longlong_t)BP_GET_LEVEL(bp), \ (u_longlong_t)BP_GET_LEVEL(bp), \
type, \ type, \
(u_longlong_t)BP_GET_LSIZE(bp), \ (u_longlong_t)BP_GET_LSIZE(bp), \
(u_longlong_t)bp->blk_birth); \ (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp)); \
} else { \ } else { \
for (int d = 0; d < BP_GET_NDVAS(bp); d++) { \ for (int d = 0; d < BP_GET_NDVAS(bp); d++) { \
const dva_t *dva = &bp->blk_dva[d]; \ const dva_t *dva = &bp->blk_dva[d]; \
@ -691,8 +694,8 @@ typedef struct blkptr {
ws, \ ws, \
(u_longlong_t)BP_GET_LSIZE(bp), \ (u_longlong_t)BP_GET_LSIZE(bp), \
(u_longlong_t)BP_GET_PSIZE(bp), \ (u_longlong_t)BP_GET_PSIZE(bp), \
(u_longlong_t)bp->blk_birth, \ (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp), \
(u_longlong_t)BP_PHYSICAL_BIRTH(bp), \ (u_longlong_t)BP_GET_BIRTH(bp), \
(u_longlong_t)BP_GET_FILL(bp), \ (u_longlong_t)BP_GET_FILL(bp), \
ws, \ ws, \
(u_longlong_t)bp->blk_cksum.zc_word[0], \ (u_longlong_t)bp->blk_cksum.zc_word[0], \
@ -782,7 +785,7 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
#define SPA_ASYNC_DETACH_SPARE 0x4000 #define SPA_ASYNC_DETACH_SPARE 0x4000
/* device manipulation */ /* device manipulation */
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot); extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t ashift_check);
extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
int replacing, int rebuild); int replacing, int rebuild);
extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
@ -1142,9 +1145,9 @@ extern const char *spa_state_to_name(spa_t *spa);
/* error handling */ /* error handling */
struct zbookmark_phys; struct zbookmark_phys;
extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb, extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb,
const uint64_t *birth); const uint64_t birth);
extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb, extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb,
const uint64_t *birth); uint64_t birth);
extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd, extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd,
const zbookmark_phys_t *zb, zio_t *zio, uint64_t state); const zbookmark_phys_t *zb, zio_t *zio, uint64_t state);
extern boolean_t zfs_ereport_is_valid(const char *clazz, spa_t *spa, vdev_t *vd, extern boolean_t zfs_ereport_is_valid(const char *clazz, spa_t *spa, vdev_t *vd,

View File

@ -165,7 +165,7 @@ struct uberblock {
* pool from a checkpointed uberblock [see spa_ld_select_uberblock()], * pool from a checkpointed uberblock [see spa_ld_select_uberblock()],
* the value of the field is used to determine which ZIL blocks have * the value of the field is used to determine which ZIL blocks have
* been allocated according to the ms_sm when we are rewinding to a * been allocated according to the ms_sm when we are rewinding to a
* checkpoint. Specifically, if blk_birth > ub_checkpoint_txg, then * checkpoint. Specifically, if logical birth > ub_checkpoint_txg,then
* the ZIL block is not allocated [see uses of spa_min_claim_txg()]. * the ZIL block is not allocated [see uses of spa_min_claim_txg()].
*/ */
uint64_t ub_checkpoint_txg; uint64_t ub_checkpoint_txg;

View File

@ -253,6 +253,9 @@ int zap_add_by_dnode(dnode_t *dn, const char *key,
int zap_add_uint64(objset_t *ds, uint64_t zapobj, const uint64_t *key, int zap_add_uint64(objset_t *ds, uint64_t zapobj, const uint64_t *key,
int key_numints, int integer_size, uint64_t num_integers, int key_numints, int integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx); const void *val, dmu_tx_t *tx);
int zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
int key_numints, int integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx);
/* /*
* Set the attribute with the given name to the given value. If an * Set the attribute with the given name to the given value. If an
@ -267,6 +270,9 @@ int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, int key_numints,
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx); int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
int zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
int key_numints,
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
/* /*
* Get the length (in integers) and the integer size of the specified * Get the length (in integers) and the integer size of the specified
@ -292,6 +298,8 @@ int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
int zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx); int zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx);
int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, dmu_tx_t *tx); int key_numints, dmu_tx_t *tx);
int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
int key_numints, dmu_tx_t *tx);
/* /*
* Returns (in *count) the number of attributes in the specified zap * Returns (in *count) the number of attributes in the specified zap

View File

@ -145,6 +145,7 @@ typedef struct zap {
dmu_buf_user_t zap_dbu; dmu_buf_user_t zap_dbu;
objset_t *zap_objset; objset_t *zap_objset;
uint64_t zap_object; uint64_t zap_object;
dnode_t *zap_dnode;
struct dmu_buf *zap_dbuf; struct dmu_buf *zap_dbuf;
krwlock_t zap_rwlock; krwlock_t zap_rwlock;
boolean_t zap_ismicro; boolean_t zap_ismicro;

View File

@ -47,7 +47,7 @@ struct zap_stats;
* entries - header space (2*chunksize) * entries - header space (2*chunksize)
*/ */
#define ZAP_LEAF_NUMCHUNKS_BS(bs) \ #define ZAP_LEAF_NUMCHUNKS_BS(bs) \
(((1<<(bs)) - 2*ZAP_LEAF_HASH_NUMENTRIES_BS(bs)) / \ (((1U << (bs)) - 2 * ZAP_LEAF_HASH_NUMENTRIES_BS(bs)) / \
ZAP_LEAF_CHUNKSIZE - 2) ZAP_LEAF_CHUNKSIZE - 2)
#define ZAP_LEAF_NUMCHUNKS(l) (ZAP_LEAF_NUMCHUNKS_BS(((l)->l_bs))) #define ZAP_LEAF_NUMCHUNKS(l) (ZAP_LEAF_NUMCHUNKS_BS(((l)->l_bs)))
@ -80,7 +80,7 @@ struct zap_stats;
* chunks per entry (3). * chunks per entry (3).
*/ */
#define ZAP_LEAF_HASH_SHIFT_BS(bs) ((bs) - 5) #define ZAP_LEAF_HASH_SHIFT_BS(bs) ((bs) - 5)
#define ZAP_LEAF_HASH_NUMENTRIES_BS(bs) (1 << ZAP_LEAF_HASH_SHIFT_BS(bs)) #define ZAP_LEAF_HASH_NUMENTRIES_BS(bs) (1U << ZAP_LEAF_HASH_SHIFT_BS(bs))
#define ZAP_LEAF_HASH_SHIFT(l) (ZAP_LEAF_HASH_SHIFT_BS(((l)->l_bs))) #define ZAP_LEAF_HASH_SHIFT(l) (ZAP_LEAF_HASH_SHIFT_BS(((l)->l_bs)))
#define ZAP_LEAF_HASH_NUMENTRIES(l) (ZAP_LEAF_HASH_NUMENTRIES_BS(((l)->l_bs))) #define ZAP_LEAF_HASH_NUMENTRIES(l) (ZAP_LEAF_HASH_NUMENTRIES_BS(((l)->l_bs)))
@ -163,7 +163,7 @@ typedef struct zap_leaf {
dmu_buf_user_t l_dbu; dmu_buf_user_t l_dbu;
krwlock_t l_rwlock; krwlock_t l_rwlock;
uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */ uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */
int l_bs; /* block size shift */ uint_t l_bs; /* block size shift */
dmu_buf_t *l_dbuf; dmu_buf_t *l_dbuf;
} zap_leaf_t; } zap_leaf_t;
@ -243,7 +243,7 @@ extern boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh,
*/ */
extern void zap_leaf_init(zap_leaf_t *l, boolean_t sort); extern void zap_leaf_init(zap_leaf_t *l, boolean_t sort);
extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, int len); extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, size_t len);
extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort); extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort);
extern void zap_leaf_stats(struct zap *zap, zap_leaf_t *l, extern void zap_leaf_stats(struct zap *zap, zap_leaf_t *l,
struct zap_stats *zs); struct zap_stats *zs);

View File

@ -25,6 +25,7 @@
/* /*
* Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2024, Klara Inc.
*/ */
#ifndef _ZIO_IMPL_H #ifndef _ZIO_IMPL_H
@ -39,7 +40,7 @@ extern "C" {
* *
* The ZFS I/O pipeline is comprised of various stages which are defined * The ZFS I/O pipeline is comprised of various stages which are defined
* in the zio_stage enum below. The individual stages are used to construct * in the zio_stage enum below. The individual stages are used to construct
* these basic I/O operations: Read, Write, Free, Claim, and Ioctl. * these basic I/O operations: Read, Write, Free, Claim, Ioctl and Trim.
* *
* I/O operations: (XXX - provide detail for each of the operations) * I/O operations: (XXX - provide detail for each of the operations)
* *
@ -48,6 +49,7 @@ extern "C" {
* Free: * Free:
* Claim: * Claim:
* Ioctl: * Ioctl:
* Trim:
* *
* Although the most common pipeline are used by the basic I/O operations * Although the most common pipeline are used by the basic I/O operations
* above, there are some helper pipelines (one could consider them * above, there are some helper pipelines (one could consider them
@ -120,43 +122,43 @@ extern "C" {
* zio pipeline stage definitions * zio pipeline stage definitions
*/ */
enum zio_stage { enum zio_stage {
ZIO_STAGE_OPEN = 1 << 0, /* RWFCI */ ZIO_STAGE_OPEN = 1 << 0, /* RWFCIT */
ZIO_STAGE_READ_BP_INIT = 1 << 1, /* R---- */ ZIO_STAGE_READ_BP_INIT = 1 << 1, /* R----- */
ZIO_STAGE_WRITE_BP_INIT = 1 << 2, /* -W--- */ ZIO_STAGE_WRITE_BP_INIT = 1 << 2, /* -W---- */
ZIO_STAGE_FREE_BP_INIT = 1 << 3, /* --F-- */ ZIO_STAGE_FREE_BP_INIT = 1 << 3, /* --F--- */
ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */ ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* -WF--T */
ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */ ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W---- */
ZIO_STAGE_ENCRYPT = 1 << 6, /* -W--- */ ZIO_STAGE_ENCRYPT = 1 << 6, /* -W---- */
ZIO_STAGE_CHECKSUM_GENERATE = 1 << 7, /* -W--- */ ZIO_STAGE_CHECKSUM_GENERATE = 1 << 7, /* -W---- */
ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W--- */ ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W---- */
ZIO_STAGE_BRT_FREE = 1 << 9, /* --F-- */ ZIO_STAGE_BRT_FREE = 1 << 9, /* --F--- */
ZIO_STAGE_DDT_READ_START = 1 << 10, /* R---- */ ZIO_STAGE_DDT_READ_START = 1 << 10, /* R----- */
ZIO_STAGE_DDT_READ_DONE = 1 << 11, /* R---- */ ZIO_STAGE_DDT_READ_DONE = 1 << 11, /* R----- */
ZIO_STAGE_DDT_WRITE = 1 << 12, /* -W--- */ ZIO_STAGE_DDT_WRITE = 1 << 12, /* -W---- */
ZIO_STAGE_DDT_FREE = 1 << 13, /* --F-- */ ZIO_STAGE_DDT_FREE = 1 << 13, /* --F--- */
ZIO_STAGE_GANG_ASSEMBLE = 1 << 14, /* RWFC- */ ZIO_STAGE_GANG_ASSEMBLE = 1 << 14, /* RWFC-- */
ZIO_STAGE_GANG_ISSUE = 1 << 15, /* RWFC- */ ZIO_STAGE_GANG_ISSUE = 1 << 15, /* RWFC-- */
ZIO_STAGE_DVA_THROTTLE = 1 << 16, /* -W--- */ ZIO_STAGE_DVA_THROTTLE = 1 << 16, /* -W---- */
ZIO_STAGE_DVA_ALLOCATE = 1 << 17, /* -W--- */ ZIO_STAGE_DVA_ALLOCATE = 1 << 17, /* -W---- */
ZIO_STAGE_DVA_FREE = 1 << 18, /* --F-- */ ZIO_STAGE_DVA_FREE = 1 << 18, /* --F--- */
ZIO_STAGE_DVA_CLAIM = 1 << 19, /* ---C- */ ZIO_STAGE_DVA_CLAIM = 1 << 19, /* ---C-- */
ZIO_STAGE_READY = 1 << 20, /* RWFCI */ ZIO_STAGE_READY = 1 << 20, /* RWFCIT */
ZIO_STAGE_VDEV_IO_START = 1 << 21, /* RW--I */ ZIO_STAGE_VDEV_IO_START = 1 << 21, /* RW--IT */
ZIO_STAGE_VDEV_IO_DONE = 1 << 22, /* RW--I */ ZIO_STAGE_VDEV_IO_DONE = 1 << 22, /* RW---T */
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 23, /* RW--I */ ZIO_STAGE_VDEV_IO_ASSESS = 1 << 23, /* RW--IT */
ZIO_STAGE_CHECKSUM_VERIFY = 1 << 24, /* R---- */ ZIO_STAGE_CHECKSUM_VERIFY = 1 << 24, /* R----- */
ZIO_STAGE_DONE = 1 << 25 /* RWFCI */ ZIO_STAGE_DONE = 1 << 25 /* RWFCIT */
}; };
#define ZIO_ROOT_PIPELINE \ #define ZIO_ROOT_PIPELINE \

View File

@ -93,9 +93,9 @@ livelist_compare(const void *larg, const void *rarg)
* Since we're storing blkptrs without cancelling FREE/ALLOC pairs, * Since we're storing blkptrs without cancelling FREE/ALLOC pairs,
* it's possible the offsets are equal. In that case, sort by txg * it's possible the offsets are equal. In that case, sort by txg
*/ */
if (l->blk_birth < r->blk_birth) { if (BP_GET_LOGICAL_BIRTH(l) < BP_GET_LOGICAL_BIRTH(r)) {
return (-1); return (-1);
} else if (l->blk_birth > r->blk_birth) { } else if (BP_GET_LOGICAL_BIRTH(l) > BP_GET_LOGICAL_BIRTH(r)) {
return (+1); return (+1);
} }
return (0); return (0);

View File

@ -1112,14 +1112,11 @@
<var-decl name='prev' type-id='b03eadb4' visibility='default'/> <var-decl name='prev' type-id='b03eadb4' visibility='default'/>
</data-member> </data-member>
</class-decl> </class-decl>
<class-decl name='list' size-in-bits='256' is-struct='yes' visibility='default' id='e824dae9'> <class-decl name='list' size-in-bits='192' is-struct='yes' visibility='default' id='e824dae9'>
<data-member access='public' layout-offset-in-bits='0'> <data-member access='public' layout-offset-in-bits='0'>
<var-decl name='list_size' type-id='b59d7dce' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='64'>
<var-decl name='list_offset' type-id='b59d7dce' visibility='default'/> <var-decl name='list_offset' type-id='b59d7dce' visibility='default'/>
</data-member> </data-member>
<data-member access='public' layout-offset-in-bits='128'> <data-member access='public' layout-offset-in-bits='64'>
<var-decl name='list_head' type-id='b0b5e45e' visibility='default'/> <var-decl name='list_head' type-id='b0b5e45e' visibility='default'/>
</data-member> </data-member>
</class-decl> </class-decl>
@ -2832,6 +2829,9 @@
</function-type> </function-type>
</abi-instr> </abi-instr>
<abi-instr address-size='64' path='lib/libzfs/libzfs_crypto.c' language='LANG_C99'> <abi-instr address-size='64' path='lib/libzfs/libzfs_crypto.c' language='LANG_C99'>
<array-type-def dimensions='1' type-id='38b51b3c' size-in-bits='832' id='02b72c00'>
<subrange length='13' type-id='7359adad' id='487fded1'/>
</array-type-def>
<array-type-def dimensions='1' type-id='fb7c6451' size-in-bits='256' id='64177143'> <array-type-def dimensions='1' type-id='fb7c6451' size-in-bits='256' id='64177143'>
<subrange length='32' type-id='7359adad' id='ae5bde82'/> <subrange length='32' type-id='7359adad' id='ae5bde82'/>
</array-type-def> </array-type-def>
@ -2844,6 +2844,10 @@
<class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/> <class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/>
<class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/> <class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/>
<class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/> <class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/>
<class-decl name='__locale_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='23de8b96'/>
<array-type-def dimensions='1' type-id='80f4b756' size-in-bits='832' id='39e6f84a'>
<subrange length='13' type-id='7359adad' id='487fded1'/>
</array-type-def>
<array-type-def dimensions='1' type-id='95e97e5e' size-in-bits='896' id='47394ee0'> <array-type-def dimensions='1' type-id='95e97e5e' size-in-bits='896' id='47394ee0'>
<subrange length='28' type-id='7359adad' id='3db583d7'/> <subrange length='28' type-id='7359adad' id='3db583d7'/>
</array-type-def> </array-type-def>
@ -2964,6 +2968,24 @@
<typedef-decl name='__clock_t' type-id='bd54fe1a' id='4d66c6d7'/> <typedef-decl name='__clock_t' type-id='bd54fe1a' id='4d66c6d7'/>
<typedef-decl name='__ssize_t' type-id='bd54fe1a' id='41060289'/> <typedef-decl name='__ssize_t' type-id='bd54fe1a' id='41060289'/>
<typedef-decl name='FILE' type-id='ec1ed955' id='aa12d1ba'/> <typedef-decl name='FILE' type-id='ec1ed955' id='aa12d1ba'/>
<class-decl name='__locale_struct' size-in-bits='1856' is-struct='yes' visibility='default' id='90cc1ce3'>
<data-member access='public' layout-offset-in-bits='0'>
<var-decl name='__locales' type-id='02b72c00' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='832'>
<var-decl name='__ctype_b' type-id='31347b7a' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='896'>
<var-decl name='__ctype_tolower' type-id='6d60f45d' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='960'>
<var-decl name='__ctype_toupper' type-id='6d60f45d' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='1024'>
<var-decl name='__names' type-id='39e6f84a' visibility='default'/>
</data-member>
</class-decl>
<typedef-decl name='__locale_t' type-id='f01e1813' id='b7ac9b5f'/>
<class-decl name='__sigset_t' size-in-bits='1024' is-struct='yes' naming-typedef-id='b9c97942' visibility='default' id='2616147f'> <class-decl name='__sigset_t' size-in-bits='1024' is-struct='yes' naming-typedef-id='b9c97942' visibility='default' id='2616147f'>
<data-member access='public' layout-offset-in-bits='0'> <data-member access='public' layout-offset-in-bits='0'>
<var-decl name='__val' type-id='d2baa450' visibility='default'/> <var-decl name='__val' type-id='d2baa450' visibility='default'/>
@ -2979,6 +3001,7 @@
</data-member> </data-member>
</union-decl> </union-decl>
<typedef-decl name='__sigval_t' type-id='a094b870' id='eabacd01'/> <typedef-decl name='__sigval_t' type-id='a094b870' id='eabacd01'/>
<typedef-decl name='locale_t' type-id='b7ac9b5f' id='973a4f8d'/>
<class-decl name='siginfo_t' size-in-bits='1024' is-struct='yes' naming-typedef-id='cb681f62' visibility='default' id='d8149419'> <class-decl name='siginfo_t' size-in-bits='1024' is-struct='yes' naming-typedef-id='cb681f62' visibility='default' id='d8149419'>
<data-member access='public' layout-offset-in-bits='0'> <data-member access='public' layout-offset-in-bits='0'>
<var-decl name='si_signo' type-id='95e97e5e' visibility='default'/> <var-decl name='si_signo' type-id='95e97e5e' visibility='default'/>
@ -3214,9 +3237,13 @@
<pointer-type-def type-id='bb4788fa' size-in-bits='64' id='cecf4ea7'/> <pointer-type-def type-id='bb4788fa' size-in-bits='64' id='cecf4ea7'/>
<pointer-type-def type-id='010ae0b9' size-in-bits='64' id='e4c6fa61'/> <pointer-type-def type-id='010ae0b9' size-in-bits='64' id='e4c6fa61'/>
<pointer-type-def type-id='79bd3751' size-in-bits='64' id='c65a1f29'/> <pointer-type-def type-id='79bd3751' size-in-bits='64' id='c65a1f29'/>
<pointer-type-def type-id='23de8b96' size-in-bits='64' id='38b51b3c'/>
<pointer-type-def type-id='90cc1ce3' size-in-bits='64' id='f01e1813'/>
<qualified-type-def type-id='9b23c9ad' restrict='yes' id='8c85230f'/> <qualified-type-def type-id='9b23c9ad' restrict='yes' id='8c85230f'/>
<qualified-type-def type-id='80f4b756' restrict='yes' id='9d26089a'/> <qualified-type-def type-id='80f4b756' restrict='yes' id='9d26089a'/>
<pointer-type-def type-id='80f4b756' size-in-bits='64' id='7d3cd834'/> <pointer-type-def type-id='80f4b756' size-in-bits='64' id='7d3cd834'/>
<qualified-type-def type-id='95e97e5e' const='yes' id='2448a865'/>
<pointer-type-def type-id='2448a865' size-in-bits='64' id='6d60f45d'/>
<qualified-type-def type-id='aca3bac8' const='yes' id='2498fd78'/> <qualified-type-def type-id='aca3bac8' const='yes' id='2498fd78'/>
<pointer-type-def type-id='2498fd78' size-in-bits='64' id='eed6c816'/> <pointer-type-def type-id='2498fd78' size-in-bits='64' id='eed6c816'/>
<qualified-type-def type-id='eed6c816' restrict='yes' id='a431a9da'/> <qualified-type-def type-id='eed6c816' restrict='yes' id='a431a9da'/>
@ -3249,6 +3276,7 @@
<class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/> <class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/>
<class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/> <class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/>
<class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/> <class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/>
<class-decl name='__locale_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='23de8b96'/>
<function-decl name='zpool_get_prop_int' mangled-name='zpool_get_prop_int' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_prop_int'> <function-decl name='zpool_get_prop_int' mangled-name='zpool_get_prop_int' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_prop_int'>
<parameter type-id='4c81de99'/> <parameter type-id='4c81de99'/>
<parameter type-id='5d0c23fb'/> <parameter type-id='5d0c23fb'/>
@ -3353,6 +3381,10 @@
<function-decl name='dlerror' visibility='default' binding='global' size-in-bits='64'> <function-decl name='dlerror' visibility='default' binding='global' size-in-bits='64'>
<return type-id='26a90f95'/> <return type-id='26a90f95'/>
</function-decl> </function-decl>
<function-decl name='uselocale' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='973a4f8d'/>
<return type-id='973a4f8d'/>
</function-decl>
<function-decl name='PKCS5_PBKDF2_HMAC_SHA1' visibility='default' binding='global' size-in-bits='64'> <function-decl name='PKCS5_PBKDF2_HMAC_SHA1' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='80f4b756'/> <parameter type-id='80f4b756'/>
<parameter type-id='95e97e5e'/> <parameter type-id='95e97e5e'/>
@ -3436,8 +3468,9 @@
<parameter type-id='80f4b756'/> <parameter type-id='80f4b756'/>
<return type-id='26a90f95'/> <return type-id='26a90f95'/>
</function-decl> </function-decl>
<function-decl name='strerror' visibility='default' binding='global' size-in-bits='64'> <function-decl name='strerror_l' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='95e97e5e'/> <parameter type-id='95e97e5e'/>
<parameter type-id='973a4f8d'/>
<return type-id='26a90f95'/> <return type-id='26a90f95'/>
</function-decl> </function-decl>
<function-decl name='tcgetattr' visibility='default' binding='global' size-in-bits='64'> <function-decl name='tcgetattr' visibility='default' binding='global' size-in-bits='64'>
@ -3794,12 +3827,18 @@
<qualified-type-def type-id='9c313c2d' const='yes' id='c3b7ba7d'/> <qualified-type-def type-id='9c313c2d' const='yes' id='c3b7ba7d'/>
<pointer-type-def type-id='c3b7ba7d' size-in-bits='64' id='713a56f5'/> <pointer-type-def type-id='c3b7ba7d' size-in-bits='64' id='713a56f5'/>
<pointer-type-def type-id='01a1b934' size-in-bits='64' id='566b3f52'/> <pointer-type-def type-id='01a1b934' size-in-bits='64' id='566b3f52'/>
<qualified-type-def type-id='566b3f52' restrict='yes' id='c878edd6'/>
<pointer-type-def type-id='566b3f52' size-in-bits='64' id='82d4e9e8'/>
<qualified-type-def type-id='82d4e9e8' restrict='yes' id='aa19c230'/>
<pointer-type-def type-id='7e291ce6' size-in-bits='64' id='ca64ff60'/> <pointer-type-def type-id='7e291ce6' size-in-bits='64' id='ca64ff60'/>
<pointer-type-def type-id='9da381c4' size-in-bits='64' id='cb785ebf'/> <pointer-type-def type-id='9da381c4' size-in-bits='64' id='cb785ebf'/>
<pointer-type-def type-id='1b055409' size-in-bits='64' id='9d424d31'/> <pointer-type-def type-id='1b055409' size-in-bits='64' id='9d424d31'/>
<pointer-type-def type-id='8e0af06e' size-in-bits='64' id='053457bd'/> <pointer-type-def type-id='8e0af06e' size-in-bits='64' id='053457bd'/>
<pointer-type-def type-id='857bb57e' size-in-bits='64' id='75be733c'/> <pointer-type-def type-id='857bb57e' size-in-bits='64' id='75be733c'/>
<pointer-type-def type-id='a63d15a3' size-in-bits='64' id='a195f4a3'/> <pointer-type-def type-id='a63d15a3' size-in-bits='64' id='a195f4a3'/>
<qualified-type-def type-id='a195f4a3' restrict='yes' id='33518961'/>
<pointer-type-def type-id='a195f4a3' size-in-bits='64' id='e80ff3ab'/>
<qualified-type-def type-id='e80ff3ab' restrict='yes' id='8f2c7109'/>
<pointer-type-def type-id='eae6431d' size-in-bits='64' id='0d41d328'/> <pointer-type-def type-id='eae6431d' size-in-bits='64' id='0d41d328'/>
<pointer-type-def type-id='7a6844eb' size-in-bits='64' id='18c91f9e'/> <pointer-type-def type-id='7a6844eb' size-in-bits='64' id='18c91f9e'/>
<pointer-type-def type-id='dddf6ca2' size-in-bits='64' id='d915a820'/> <pointer-type-def type-id='dddf6ca2' size-in-bits='64' id='d915a820'/>
@ -4232,9 +4271,13 @@
<parameter type-id='9d424d31'/> <parameter type-id='9d424d31'/>
<return type-id='95e97e5e'/> <return type-id='95e97e5e'/>
</function-decl> </function-decl>
<function-decl name='getgrnam' visibility='default' binding='global' size-in-bits='64'> <function-decl name='getgrnam_r' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='80f4b756'/> <parameter type-id='9d26089a'/>
<return type-id='566b3f52'/> <parameter type-id='c878edd6'/>
<parameter type-id='266fe297'/>
<parameter type-id='b59d7dce'/>
<parameter type-id='aa19c230'/>
<return type-id='95e97e5e'/>
</function-decl> </function-decl>
<function-decl name='hasmntopt' visibility='default' binding='global' size-in-bits='64'> <function-decl name='hasmntopt' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='48bea5ec'/> <parameter type-id='48bea5ec'/>
@ -4258,9 +4301,13 @@
<parameter type-id='18c91f9e'/> <parameter type-id='18c91f9e'/>
<return type-id='95e97e5e'/> <return type-id='95e97e5e'/>
</function-decl> </function-decl>
<function-decl name='getpwnam' visibility='default' binding='global' size-in-bits='64'> <function-decl name='getpwnam_r' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='80f4b756'/> <parameter type-id='9d26089a'/>
<return type-id='a195f4a3'/> <parameter type-id='33518961'/>
<parameter type-id='266fe297'/>
<parameter type-id='b59d7dce'/>
<parameter type-id='8f2c7109'/>
<return type-id='95e97e5e'/>
</function-decl> </function-decl>
<function-decl name='strtol' visibility='default' binding='global' size-in-bits='64'> <function-decl name='strtol' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='9d26089a'/> <parameter type-id='9d26089a'/>
@ -6315,6 +6362,7 @@
<function-decl name='zpool_add' mangled-name='zpool_add' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_add'> <function-decl name='zpool_add' mangled-name='zpool_add' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_add'>
<parameter type-id='4c81de99' name='zhp'/> <parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='5ce45b60' name='nvroot'/> <parameter type-id='5ce45b60' name='nvroot'/>
<parameter type-id='c19b74c3' name='ashift_check'/>
<return type-id='95e97e5e'/> <return type-id='95e97e5e'/>
</function-decl> </function-decl>
<function-decl name='zpool_export' mangled-name='zpool_export' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_export'> <function-decl name='zpool_export' mangled-name='zpool_export' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_export'>
@ -6778,7 +6826,7 @@
<enumerator name='LZC_SEND_FLAG_RAW' value='8'/> <enumerator name='LZC_SEND_FLAG_RAW' value='8'/>
<enumerator name='LZC_SEND_FLAG_SAVED' value='16'/> <enumerator name='LZC_SEND_FLAG_SAVED' value='16'/>
</enum-decl> </enum-decl>
<class-decl name='ddt_key' size-in-bits='320' is-struct='yes' visibility='default' id='e0a4a1cb'> <class-decl name='ddt_key_t' size-in-bits='320' is-struct='yes' naming-typedef-id='67f6d2cf' visibility='default' id='5fae1718'>
<data-member access='public' layout-offset-in-bits='0'> <data-member access='public' layout-offset-in-bits='0'>
<var-decl name='ddk_cksum' type-id='39730d0b' visibility='default'/> <var-decl name='ddk_cksum' type-id='39730d0b' visibility='default'/>
</data-member> </data-member>
@ -6786,7 +6834,7 @@
<var-decl name='ddk_prop' type-id='9c313c2d' visibility='default'/> <var-decl name='ddk_prop' type-id='9c313c2d' visibility='default'/>
</data-member> </data-member>
</class-decl> </class-decl>
<typedef-decl name='ddt_key_t' type-id='e0a4a1cb' id='67f6d2cf'/> <typedef-decl name='ddt_key_t' type-id='5fae1718' id='67f6d2cf'/>
<enum-decl name='dmu_object_type' id='04b3b0b9'> <enum-decl name='dmu_object_type' id='04b3b0b9'>
<underlying-type type-id='9cac1fee'/> <underlying-type type-id='9cac1fee'/>
<enumerator name='DMU_OT_NONE' value='0'/> <enumerator name='DMU_OT_NONE' value='0'/>

View File

@ -22,7 +22,7 @@
/* /*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
* Copyright (c) 2018 Datto Inc. * Copyright (c) 2018 Datto Inc.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
@ -1724,7 +1724,7 @@ zpool_discard_checkpoint(zpool_handle_t *zhp)
* necessary verification to ensure that the vdev specification is well-formed. * necessary verification to ensure that the vdev specification is well-formed.
*/ */
int int
zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot, boolean_t check_ashift)
{ {
zfs_cmd_t zc = {"\0"}; zfs_cmd_t zc = {"\0"};
int ret; int ret;
@ -1756,6 +1756,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
zcmd_write_conf_nvlist(hdl, &zc, nvroot); zcmd_write_conf_nvlist(hdl, &zc, nvroot);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_flags = check_ashift;
if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) { if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
switch (errno) { switch (errno) {

View File

@ -22,7 +22,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2020 Joyent, Inc. All rights reserved. * Copyright 2020 Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
* Copyright (c) 2017 Datto Inc. * Copyright (c) 2017 Datto Inc.
* Copyright (c) 2020 The FreeBSD Foundation * Copyright (c) 2020 The FreeBSD Foundation
@ -319,6 +319,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
"dataset without force")); "dataset without force"));
case EZFS_RAIDZ_EXPAND_IN_PROGRESS: case EZFS_RAIDZ_EXPAND_IN_PROGRESS:
return (dgettext(TEXT_DOMAIN, "raidz expansion in progress")); return (dgettext(TEXT_DOMAIN, "raidz expansion in progress"));
case EZFS_ASHIFT_MISMATCH:
return (dgettext(TEXT_DOMAIN, "adding devices with "
"different physical sector sizes is not allowed"));
case EZFS_UNKNOWN: case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error")); return (dgettext(TEXT_DOMAIN, "unknown error"));
default: default:
@ -768,6 +771,9 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS: case ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS:
zfs_verror(hdl, EZFS_RAIDZ_EXPAND_IN_PROGRESS, fmt, ap); zfs_verror(hdl, EZFS_RAIDZ_EXPAND_IN_PROGRESS, fmt, ap);
break; break;
case ZFS_ERR_ASHIFT_MISMATCH:
zfs_verror(hdl, EZFS_ASHIFT_MISMATCH, fmt, ap);
break;
default: default:
zfs_error_aux(hdl, "%s", zfs_strerror(error)); zfs_error_aux(hdl, "%s", zfs_strerror(error));
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);

View File

@ -2,6 +2,7 @@
.\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved. .\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
.\" Copyright (c) 2019, 2021 by Delphix. All rights reserved. .\" Copyright (c) 2019, 2021 by Delphix. All rights reserved.
.\" Copyright (c) 2019 Datto Inc. .\" Copyright (c) 2019 Datto Inc.
.\" Copyright (c) 2023, 2024 Klara, Inc.
.\" The contents of this file are subject to the terms of the Common Development .\" The contents of this file are subject to the terms of the Common Development
.\" and Distribution License (the "License"). You may not use this file except .\" and Distribution License (the "License"). You may not use this file except
.\" in compliance with the License. You can obtain a copy of the license at .\" in compliance with the License. You can obtain a copy of the license at
@ -15,7 +16,7 @@
.\" own identifying information: .\" own identifying information:
.\" Portions Copyright [yyyy] [name of copyright owner] .\" Portions Copyright [yyyy] [name of copyright owner]
.\" .\"
.Dd July 21, 2023 .Dd January 9, 2024
.Dt ZFS 4 .Dt ZFS 4
.Os .Os
. .
@ -244,12 +245,25 @@ For blocks that could be forced to be a gang block (due to
.Sy metaslab_force_ganging ) , .Sy metaslab_force_ganging ) ,
force this many of them to be gang blocks. force this many of them to be gang blocks.
. .
.It Sy zfs_ddt_zap_default_bs Ns = Ns Sy 15 Po 32 KiB Pc Pq int .It Sy brt_zap_prefetch Ns = Ns Sy 1 Ns | Ns 0 Pq int
Controls prefetching BRT records for blocks which are going to be cloned.
.
.It Sy brt_zap_default_bs Ns = Ns Sy 12 Po 4 KiB Pc Pq int
Default BRT ZAP data block size as a power of 2. Note that changing this after
creating a BRT on the pool will not affect existing BRTs, only newly created
ones.
.
.It Sy brt_zap_default_ibs Ns = Ns Sy 12 Po 4 KiB Pc Pq int
Default BRT ZAP indirect block size as a power of 2. Note that changing this
after creating a BRT on the pool will not affect existing BRTs, only newly
created ones.
.
.It Sy ddt_zap_default_bs Ns = Ns Sy 15 Po 32 KiB Pc Pq int
Default DDT ZAP data block size as a power of 2. Note that changing this after Default DDT ZAP data block size as a power of 2. Note that changing this after
creating a DDT on the pool will not affect existing DDTs, only newly created creating a DDT on the pool will not affect existing DDTs, only newly created
ones. ones.
. .
.It Sy zfs_ddt_zap_default_ibs Ns = Ns Sy 15 Po 32 KiB Pc Pq int .It Sy ddt_zap_default_ibs Ns = Ns Sy 15 Po 32 KiB Pc Pq int
Default DDT ZAP indirect block size as a power of 2. Note that changing this Default DDT ZAP indirect block size as a power of 2. Note that changing this
after creating a DDT on the pool will not affect existing DDTs, only newly after creating a DDT on the pool will not affect existing DDTs, only newly
created ones. created ones.
@ -1362,6 +1376,29 @@ _
4 Driver No driver retries on driver errors. 4 Driver No driver retries on driver errors.
.TE .TE
. .
.It Sy zfs_vdev_disk_max_segs Ns = Ns Sy 0 Pq uint
Maximum number of segments to add to a BIO (min 4).
If this is higher than the maximum allowed by the device queue or the kernel
itself, it will be clamped.
Setting it to zero will cause the kernel's ideal size to be used.
This parameter only applies on Linux.
This parameter is ignored if
.Sy zfs_vdev_disk_classic Ns = Ns Sy 1 .
.
.It Sy zfs_vdev_disk_classic Ns = Ns Sy 0 Ns | Ns 1 Pq uint
If set to 1, OpenZFS will submit IO to Linux using the method it used in 2.2
and earlier.
This "classic" method has known issues with highly fragmented IO requests and
is slower on many workloads, but it has been in use for many years and is known
to be very stable.
If you set this parameter, please also open a bug report why you did so,
including the workload involved and any error messages.
.Pp
This parameter and the classic submission method will be removed once we have
total confidence in the new method.
.Pp
This parameter only applies on Linux, and can only be set at module load time.
.
.It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int .It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int
Time before expiring Time before expiring
.Pa .zfs/snapshot . .Pa .zfs/snapshot .

View File

@ -24,8 +24,9 @@
.\" Copyright (c) 2018 George Melikov. All Rights Reserved. .\" Copyright (c) 2018 George Melikov. All Rights Reserved.
.\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2024 by Delphix. All Rights Reserved.
.\" .\"
.Dd March 16, 2022 .Dd March 8, 2024
.Dt ZPOOL-ADD 8 .Dt ZPOOL-ADD 8
.Os .Os
. .
@ -36,6 +37,7 @@
.Nm zpool .Nm zpool
.Cm add .Cm add
.Op Fl fgLnP .Op Fl fgLnP
.Op Fl -allow-in-use -allow-replication-mismatch -allow-ashift-mismatch
.Oo Fl o Ar property Ns = Ns Ar value Oc .Oo Fl o Ar property Ns = Ns Ar value Oc
.Ar pool vdev Ns .Ar pool vdev Ns
. .
@ -56,7 +58,8 @@ subcommand.
.It Fl f .It Fl f
Forces use of Forces use of
.Ar vdev Ns s , .Ar vdev Ns s ,
even if they appear in use or specify a conflicting replication level. even if they appear in use, have conflicting ashift values, or specify
a conflicting replication level.
Not all devices can be overridden in this manner. Not all devices can be overridden in this manner.
.It Fl g .It Fl g
Display Display
@ -91,6 +94,17 @@ See the
manual page for a list of valid properties that can be set. manual page for a list of valid properties that can be set.
The only property supported at the moment is The only property supported at the moment is
.Sy ashift . .Sy ashift .
.It Fl -allow-ashift-mismatch
Disable the ashift validation which allows mismatched ashift values in the
pool.
Adding top-level
.Ar vdev Ns s
with different sector sizes will prohibit future device removal operations, see
.Xr zpool-remove 8 .
.It Fl -allow-in-use
Allow vdevs to be added even if they might be in use in another pool.
.It Fl -allow-replication-mismatch
Allow vdevs with conflicting replication levels to be added to the pool.
.El .El
. .
.Sh EXAMPLES .Sh EXAMPLES

View File

@ -25,8 +25,9 @@
.\" Copyright (c) 2018 George Melikov. All Rights Reserved. .\" Copyright (c) 2018 George Melikov. All Rights Reserved.
.\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2024, Klara Inc.
.\" .\"
.Dd July 11, 2023 .Dd February 28, 2024
.Dt ZPOOL-EVENTS 8 .Dt ZPOOL-EVENTS 8
.Os .Os
. .
@ -363,7 +364,7 @@ that is, the bits set in the good data which are cleared in the bad data.
.Sh I/O STAGES .Sh I/O STAGES
The ZFS I/O pipeline is comprised of various stages which are defined below. The ZFS I/O pipeline is comprised of various stages which are defined below.
The individual stages are used to construct these basic I/O The individual stages are used to construct these basic I/O
operations: Read, Write, Free, Claim, and Ioctl. operations: Read, Write, Free, Claim, Ioctl and Trim.
These stages may be These stages may be
set on an event to describe the life cycle of a given I/O request. set on an event to describe the life cycle of a given I/O request.
.Pp .Pp
@ -372,43 +373,43 @@ tab(:);
l l l . l l l .
Stage:Bit Mask:Operations Stage:Bit Mask:Operations
_:_:_ _:_:_
ZIO_STAGE_OPEN:0x00000001:RWFCI ZIO_STAGE_OPEN:0x00000001:RWFCIT
ZIO_STAGE_READ_BP_INIT:0x00000002:R---- ZIO_STAGE_READ_BP_INIT:0x00000002:R-----
ZIO_STAGE_WRITE_BP_INIT:0x00000004:-W--- ZIO_STAGE_WRITE_BP_INIT:0x00000004:-W----
ZIO_STAGE_FREE_BP_INIT:0x00000008:--F-- ZIO_STAGE_FREE_BP_INIT:0x00000008:--F---
ZIO_STAGE_ISSUE_ASYNC:0x00000010:RWF-- ZIO_STAGE_ISSUE_ASYNC:0x00000010:-WF--T
ZIO_STAGE_WRITE_COMPRESS:0x00000020:-W--- ZIO_STAGE_WRITE_COMPRESS:0x00000020:-W----
ZIO_STAGE_ENCRYPT:0x00000040:-W--- ZIO_STAGE_ENCRYPT:0x00000040:-W----
ZIO_STAGE_CHECKSUM_GENERATE:0x00000080:-W--- ZIO_STAGE_CHECKSUM_GENERATE:0x00000080:-W----
ZIO_STAGE_NOP_WRITE:0x00000100:-W--- ZIO_STAGE_NOP_WRITE:0x00000100:-W----
ZIO_STAGE_BRT_FREE:0x00000200:--F-- ZIO_STAGE_BRT_FREE:0x00000200:--F---
ZIO_STAGE_DDT_READ_START:0x00000400:R---- ZIO_STAGE_DDT_READ_START:0x00000400:R-----
ZIO_STAGE_DDT_READ_DONE:0x00000800:R---- ZIO_STAGE_DDT_READ_DONE:0x00000800:R-----
ZIO_STAGE_DDT_WRITE:0x00001000:-W--- ZIO_STAGE_DDT_WRITE:0x00001000:-W----
ZIO_STAGE_DDT_FREE:0x00002000:--F-- ZIO_STAGE_DDT_FREE:0x00002000:--F---
ZIO_STAGE_GANG_ASSEMBLE:0x00004000:RWFC- ZIO_STAGE_GANG_ASSEMBLE:0x00004000:RWFC--
ZIO_STAGE_GANG_ISSUE:0x00008000:RWFC- ZIO_STAGE_GANG_ISSUE:0x00008000:RWFC--
ZIO_STAGE_DVA_THROTTLE:0x00010000:-W--- ZIO_STAGE_DVA_THROTTLE:0x00010000:-W----
ZIO_STAGE_DVA_ALLOCATE:0x00020000:-W--- ZIO_STAGE_DVA_ALLOCATE:0x00020000:-W----
ZIO_STAGE_DVA_FREE:0x00040000:--F-- ZIO_STAGE_DVA_FREE:0x00040000:--F---
ZIO_STAGE_DVA_CLAIM:0x00080000:---C- ZIO_STAGE_DVA_CLAIM:0x00080000:---C--
ZIO_STAGE_READY:0x00100000:RWFCI ZIO_STAGE_READY:0x00100000:RWFCIT
ZIO_STAGE_VDEV_IO_START:0x00200000:RW--I ZIO_STAGE_VDEV_IO_START:0x00200000:RW--IT
ZIO_STAGE_VDEV_IO_DONE:0x00400000:RW--I ZIO_STAGE_VDEV_IO_DONE:0x00400000:RW---T
ZIO_STAGE_VDEV_IO_ASSESS:0x00800000:RW--I ZIO_STAGE_VDEV_IO_ASSESS:0x00800000:RW--IT
ZIO_STAGE_CHECKSUM_VERIFY:0x01000000:R---- ZIO_STAGE_CHECKSUM_VERIFY:0x01000000:R-----
ZIO_STAGE_DONE:0x02000000:RWFCI ZIO_STAGE_DONE:0x02000000:RWFCIT
.TE .TE
. .
.Sh I/O FLAGS .Sh I/O FLAGS

View File

@ -36,7 +36,7 @@
.Sh SYNOPSIS .Sh SYNOPSIS
.Nm zpool .Nm zpool
.Cm status .Cm status
.Op Fl DeigLpPstvx .Op Fl DegiLpPstvx
.Op Fl T Sy u Ns | Ns Sy d .Op Fl T Sy u Ns | Ns Sy d
.Op Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns .Op Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns
.Oo Ar pool Oc Ns .Oo Ar pool Oc Ns
@ -69,14 +69,20 @@ See the
option of option of
.Nm zpool Cm iostat .Nm zpool Cm iostat
for complete details. for complete details.
.It Fl D
Display a histogram of deduplication statistics, showing the allocated
.Pq physically present on disk
and referenced
.Pq logically referenced in the pool
block counts and sizes by reference count.
.It Fl e .It Fl e
Only show unhealthy vdevs (not-ONLINE or with errors). Only show unhealthy vdevs (not-ONLINE or with errors).
.It Fl i
Display vdev initialization status.
.It Fl g .It Fl g
Display vdev GUIDs instead of the normal device names Display vdev GUIDs instead of the normal device names
These GUIDs can be used in place of device names for the zpool These GUIDs can be used in place of device names for the zpool
detach/offline/remove/replace commands. detach/offline/remove/replace commands.
.It Fl i
Display vdev initialization status.
.It Fl L .It Fl L
Display real paths for vdevs resolving all symbolic links. Display real paths for vdevs resolving all symbolic links.
This can be used to look up the current block device name regardless of the This can be used to look up the current block device name regardless of the
@ -90,12 +96,6 @@ the path.
This can be used in conjunction with the This can be used in conjunction with the
.Fl L .Fl L
flag. flag.
.It Fl D
Display a histogram of deduplication statistics, showing the allocated
.Pq physically present on disk
and referenced
.Pq logically referenced in the pool
block counts and sizes by reference count.
.It Fl s .It Fl s
Display the number of leaf vdev slow I/O operations. Display the number of leaf vdev slow I/O operations.
This is the number of I/O operations that didn't complete in This is the number of I/O operations that didn't complete in

View File

@ -417,10 +417,8 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd)
{ {
ASSERT(!abd_is_gang(abd)); ASSERT(!abd_is_gang(abd));
abd_verify(abd); abd_verify(abd);
memset(aiter, 0, sizeof (struct abd_iter));
aiter->iter_abd = abd; aiter->iter_abd = abd;
aiter->iter_pos = 0;
aiter->iter_mapaddr = NULL;
aiter->iter_mapsize = 0;
} }
/* /*

View File

@ -21,6 +21,7 @@
/* /*
* Copyright (c) 2014 by Chunwei Chen. All rights reserved. * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
* Copyright (c) 2019 by Delphix. All rights reserved. * Copyright (c) 2019 by Delphix. All rights reserved.
* Copyright (c) 2023, 2024, Klara Inc.
*/ */
/* /*
@ -59,7 +60,9 @@
#include <sys/zfs_znode.h> #include <sys/zfs_znode.h>
#ifdef _KERNEL #ifdef _KERNEL
#include <linux/kmap_compat.h> #include <linux/kmap_compat.h>
#include <linux/mm_compat.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <linux/version.h>
#endif #endif
#ifdef _KERNEL #ifdef _KERNEL
@ -895,14 +898,9 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd)
{ {
ASSERT(!abd_is_gang(abd)); ASSERT(!abd_is_gang(abd));
abd_verify(abd); abd_verify(abd);
memset(aiter, 0, sizeof (struct abd_iter));
aiter->iter_abd = abd; aiter->iter_abd = abd;
aiter->iter_mapaddr = NULL; if (!abd_is_linear(abd)) {
aiter->iter_mapsize = 0;
aiter->iter_pos = 0;
if (abd_is_linear(abd)) {
aiter->iter_offset = 0;
aiter->iter_sg = NULL;
} else {
aiter->iter_offset = ABD_SCATTER(abd).abd_offset; aiter->iter_offset = ABD_SCATTER(abd).abd_offset;
aiter->iter_sg = ABD_SCATTER(abd).abd_sgl; aiter->iter_sg = ABD_SCATTER(abd).abd_sgl;
} }
@ -915,6 +913,7 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd)
boolean_t boolean_t
abd_iter_at_end(struct abd_iter *aiter) abd_iter_at_end(struct abd_iter *aiter)
{ {
ASSERT3U(aiter->iter_pos, <=, aiter->iter_abd->abd_size);
return (aiter->iter_pos == aiter->iter_abd->abd_size); return (aiter->iter_pos == aiter->iter_abd->abd_size);
} }
@ -926,8 +925,15 @@ abd_iter_at_end(struct abd_iter *aiter)
void void
abd_iter_advance(struct abd_iter *aiter, size_t amount) abd_iter_advance(struct abd_iter *aiter, size_t amount)
{ {
/*
* Ensure that last chunk is not in use. abd_iterate_*() must clear
* this state (directly or abd_iter_unmap()) before advancing.
*/
ASSERT3P(aiter->iter_mapaddr, ==, NULL); ASSERT3P(aiter->iter_mapaddr, ==, NULL);
ASSERT0(aiter->iter_mapsize); ASSERT0(aiter->iter_mapsize);
ASSERT3P(aiter->iter_page, ==, NULL);
ASSERT0(aiter->iter_page_doff);
ASSERT0(aiter->iter_page_dsize);
/* There's nothing left to advance to, so do nothing */ /* There's nothing left to advance to, so do nothing */
if (abd_iter_at_end(aiter)) if (abd_iter_at_end(aiter))
@ -1009,6 +1015,106 @@ abd_cache_reap_now(void)
} }
#if defined(_KERNEL) #if defined(_KERNEL)
/*
* Yield the next page struct and data offset and size within it, without
* mapping it into the address space.
*/
void
abd_iter_page(struct abd_iter *aiter)
{
if (abd_iter_at_end(aiter)) {
aiter->iter_page = NULL;
aiter->iter_page_doff = 0;
aiter->iter_page_dsize = 0;
return;
}
struct page *page;
size_t doff, dsize;
if (abd_is_linear(aiter->iter_abd)) {
ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset);
/* memory address at iter_pos */
void *paddr = ABD_LINEAR_BUF(aiter->iter_abd) + aiter->iter_pos;
/* struct page for address */
page = is_vmalloc_addr(paddr) ?
vmalloc_to_page(paddr) : virt_to_page(paddr);
/* offset of address within the page */
doff = offset_in_page(paddr);
/* total data remaining in abd from this position */
dsize = aiter->iter_abd->abd_size - aiter->iter_offset;
} else {
ASSERT(!abd_is_gang(aiter->iter_abd));
/* current scatter page */
page = sg_page(aiter->iter_sg);
/* position within page */
doff = aiter->iter_offset;
/* remaining data in scatterlist */
dsize = MIN(aiter->iter_sg->length - aiter->iter_offset,
aiter->iter_abd->abd_size - aiter->iter_pos);
}
ASSERT(page);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
if (PageTail(page)) {
/*
* This page is part of a "compound page", which is a group of
* pages that can be referenced from a single struct page *.
* Its organised as a "head" page, followed by a series of
* "tail" pages.
*
* In OpenZFS, compound pages are allocated using the
* __GFP_COMP flag, which we get from scatter ABDs and SPL
* vmalloc slabs (ie >16K allocations). So a great many of the
* IO buffers we get are going to be of this type.
*
* The tail pages are just regular PAGE_SIZE pages, and can be
* safely used as-is. However, the head page has length
* covering itself and all the tail pages. If this ABD chunk
* spans multiple pages, then we can use the head page and a
* >PAGE_SIZE length, which is far more efficient.
*
* To do this, we need to adjust the offset to be counted from
* the head page. struct page for compound pages are stored
* contiguously, so we can just adjust by a simple offset.
*
* Before kernel 4.5, compound page heads were refcounted
* separately, such that moving back to the head page would
* require us to take a reference to it and releasing it once
* we're completely finished with it. In practice, that means
* when our caller is done with the ABD, which we have no
* insight into from here. Rather than contort this API to
* track head page references on such ancient kernels, we just
* compile this block out and use the tail pages directly. This
* is slightly less efficient, but makes everything far
* simpler.
*/
struct page *head = compound_head(page);
doff += ((page - head) * PAGESIZE);
page = head;
}
#endif
/* final page and position within it */
aiter->iter_page = page;
aiter->iter_page_doff = doff;
/* amount of data in the chunk, up to the end of the page */
aiter->iter_page_dsize = MIN(dsize, page_size(page) - doff);
}
/*
* Note: ABD BIO functions only needed to support vdev_classic. See comments in
* vdev_disk.c.
*/
/* /*
* bio_nr_pages for ABD. * bio_nr_pages for ABD.
* @off is the offset in @abd * @off is the offset in @abd
@ -1163,4 +1269,5 @@ MODULE_PARM_DESC(zfs_abd_scatter_min_size,
module_param(zfs_abd_scatter_max_order, uint, 0644); module_param(zfs_abd_scatter_max_order, uint, 0644);
MODULE_PARM_DESC(zfs_abd_scatter_max_order, MODULE_PARM_DESC(zfs_abd_scatter_max_order,
"Maximum order allocation used for a scatter ABD."); "Maximum order allocation used for a scatter ABD.");
#endif
#endif /* _KERNEL */

View File

@ -24,6 +24,7 @@
* Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>. * Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
* LLNL-CODE-403049. * LLNL-CODE-403049.
* Copyright (c) 2012, 2019 by Delphix. All rights reserved. * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
* Copyright (c) 2023, 2024, Klara Inc.
*/ */
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
@ -66,6 +67,13 @@ typedef struct vdev_disk {
krwlock_t vd_lock; krwlock_t vd_lock;
} vdev_disk_t; } vdev_disk_t;
/*
* Maximum number of segments to add to a bio (min 4). If this is higher than
* the maximum allowed by the device queue or the kernel itself, it will be
* clamped. Setting it to zero will cause the kernel's ideal size to be used.
*/
uint_t zfs_vdev_disk_max_segs = 0;
/* /*
* Unique identifier for the exclusive vdev holder. * Unique identifier for the exclusive vdev holder.
*/ */
@ -83,55 +91,47 @@ static uint_t zfs_vdev_open_timeout_ms = 1000;
*/ */
#define EFI_MIN_RESV_SIZE (16 * 1024) #define EFI_MIN_RESV_SIZE (16 * 1024)
/*
* Virtual device vector for disks.
*/
typedef struct dio_request {
zio_t *dr_zio; /* Parent ZIO */
atomic_t dr_ref; /* References */
int dr_error; /* Bio error */
int dr_bio_count; /* Count of bio's */
struct bio *dr_bio[]; /* Attached bio's */
} dio_request_t;
/* /*
* BIO request failfast mask. * BIO request failfast mask.
*/ */
static unsigned int zfs_vdev_failfast_mask = 1; static unsigned int zfs_vdev_failfast_mask = 1;
/*
* Convert SPA mode flags into bdev open mode flags.
*/
#ifdef HAVE_BLK_MODE_T #ifdef HAVE_BLK_MODE_T
static blk_mode_t typedef blk_mode_t vdev_bdev_mode_t;
#define VDEV_BDEV_MODE_READ BLK_OPEN_READ
#define VDEV_BDEV_MODE_WRITE BLK_OPEN_WRITE
#define VDEV_BDEV_MODE_EXCL BLK_OPEN_EXCL
#define VDEV_BDEV_MODE_MASK (BLK_OPEN_READ|BLK_OPEN_WRITE|BLK_OPEN_EXCL)
#else #else
static fmode_t typedef fmode_t vdev_bdev_mode_t;
#define VDEV_BDEV_MODE_READ FMODE_READ
#define VDEV_BDEV_MODE_WRITE FMODE_WRITE
#define VDEV_BDEV_MODE_EXCL FMODE_EXCL
#define VDEV_BDEV_MODE_MASK (FMODE_READ|FMODE_WRITE|FMODE_EXCL)
#endif #endif
vdev_bdev_mode(spa_mode_t spa_mode, boolean_t exclusive)
static vdev_bdev_mode_t
vdev_bdev_mode(spa_mode_t smode)
{ {
#ifdef HAVE_BLK_MODE_T ASSERT3U(smode, !=, SPA_MODE_UNINIT);
blk_mode_t mode = 0; ASSERT0(smode & ~(SPA_MODE_READ|SPA_MODE_WRITE));
if (spa_mode & SPA_MODE_READ) vdev_bdev_mode_t bmode = VDEV_BDEV_MODE_EXCL;
mode |= BLK_OPEN_READ;
if (spa_mode & SPA_MODE_WRITE) if (smode & SPA_MODE_READ)
mode |= BLK_OPEN_WRITE; bmode |= VDEV_BDEV_MODE_READ;
if (exclusive) if (smode & SPA_MODE_WRITE)
mode |= BLK_OPEN_EXCL; bmode |= VDEV_BDEV_MODE_WRITE;
#else
fmode_t mode = 0;
if (spa_mode & SPA_MODE_READ) ASSERT(bmode & VDEV_BDEV_MODE_MASK);
mode |= FMODE_READ; ASSERT0(bmode & ~VDEV_BDEV_MODE_MASK);
if (spa_mode & SPA_MODE_WRITE) return (bmode);
mode |= FMODE_WRITE;
if (exclusive)
mode |= FMODE_EXCL;
#endif
return (mode);
} }
/* /*
@ -238,30 +238,28 @@ vdev_disk_kobj_evt_post(vdev_t *v)
} }
static zfs_bdev_handle_t * static zfs_bdev_handle_t *
vdev_blkdev_get_by_path(const char *path, spa_mode_t mode, void *holder) vdev_blkdev_get_by_path(const char *path, spa_mode_t smode, void *holder)
{ {
vdev_bdev_mode_t bmode = vdev_bdev_mode(smode);
#if defined(HAVE_BDEV_OPEN_BY_PATH) #if defined(HAVE_BDEV_OPEN_BY_PATH)
return (bdev_open_by_path(path, return (bdev_open_by_path(path, bmode, holder, NULL));
vdev_bdev_mode(mode, B_TRUE), holder, NULL));
#elif defined(HAVE_BLKDEV_GET_BY_PATH_4ARG) #elif defined(HAVE_BLKDEV_GET_BY_PATH_4ARG)
return (blkdev_get_by_path(path, return (blkdev_get_by_path(path, bmode, holder, NULL));
vdev_bdev_mode(mode, B_TRUE), holder, NULL));
#else #else
return (blkdev_get_by_path(path, return (blkdev_get_by_path(path, bmode, holder));
vdev_bdev_mode(mode, B_TRUE), holder));
#endif #endif
} }
static void static void
vdev_blkdev_put(zfs_bdev_handle_t *bdh, spa_mode_t mode, void *holder) vdev_blkdev_put(zfs_bdev_handle_t *bdh, spa_mode_t smode, void *holder)
{ {
#if defined(HAVE_BDEV_RELEASE) #if defined(HAVE_BDEV_RELEASE)
return (bdev_release(bdh)); return (bdev_release(bdh));
#elif defined(HAVE_BLKDEV_PUT_HOLDER) #elif defined(HAVE_BLKDEV_PUT_HOLDER)
return (blkdev_put(BDH_BDEV(bdh), holder)); return (blkdev_put(BDH_BDEV(bdh), holder));
#else #else
return (blkdev_put(BDH_BDEV(bdh), return (blkdev_put(BDH_BDEV(bdh), vdev_bdev_mode(smode)));
vdev_bdev_mode(mode, B_TRUE)));
#endif #endif
} }
@ -270,11 +268,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
uint64_t *logical_ashift, uint64_t *physical_ashift) uint64_t *logical_ashift, uint64_t *physical_ashift)
{ {
zfs_bdev_handle_t *bdh; zfs_bdev_handle_t *bdh;
#ifdef HAVE_BLK_MODE_T spa_mode_t smode = spa_mode(v->vdev_spa);
blk_mode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa), B_FALSE);
#else
fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa), B_FALSE);
#endif
hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms); hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms);
vdev_disk_t *vd; vdev_disk_t *vd;
@ -325,16 +319,16 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
reread_part = B_TRUE; reread_part = B_TRUE;
} }
vdev_blkdev_put(bdh, mode, zfs_vdev_holder); vdev_blkdev_put(bdh, smode, zfs_vdev_holder);
} }
if (reread_part) { if (reread_part) {
bdh = vdev_blkdev_get_by_path(disk_name, mode, bdh = vdev_blkdev_get_by_path(disk_name, smode,
zfs_vdev_holder); zfs_vdev_holder);
if (!BDH_IS_ERR(bdh)) { if (!BDH_IS_ERR(bdh)) {
int error = int error =
vdev_bdev_reread_part(BDH_BDEV(bdh)); vdev_bdev_reread_part(BDH_BDEV(bdh));
vdev_blkdev_put(bdh, mode, zfs_vdev_holder); vdev_blkdev_put(bdh, smode, zfs_vdev_holder);
if (error == 0) { if (error == 0) {
timeout = MSEC2NSEC( timeout = MSEC2NSEC(
zfs_vdev_open_timeout_ms * 2); zfs_vdev_open_timeout_ms * 2);
@ -379,7 +373,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
hrtime_t start = gethrtime(); hrtime_t start = gethrtime();
bdh = BDH_ERR_PTR(-ENXIO); bdh = BDH_ERR_PTR(-ENXIO);
while (BDH_IS_ERR(bdh) && ((gethrtime() - start) < timeout)) { while (BDH_IS_ERR(bdh) && ((gethrtime() - start) < timeout)) {
bdh = vdev_blkdev_get_by_path(v->vdev_path, mode, bdh = vdev_blkdev_get_by_path(v->vdev_path, smode,
zfs_vdev_holder); zfs_vdev_holder);
if (unlikely(BDH_PTR_ERR(bdh) == -ENOENT)) { if (unlikely(BDH_PTR_ERR(bdh) == -ENOENT)) {
/* /*
@ -457,95 +451,15 @@ vdev_disk_close(vdev_t *v)
if (v->vdev_reopening || vd == NULL) if (v->vdev_reopening || vd == NULL)
return; return;
if (vd->vd_bdh != NULL) { if (vd->vd_bdh != NULL)
vdev_blkdev_put(vd->vd_bdh, spa_mode(v->vdev_spa), vdev_blkdev_put(vd->vd_bdh, spa_mode(v->vdev_spa),
zfs_vdev_holder); zfs_vdev_holder);
}
rw_destroy(&vd->vd_lock); rw_destroy(&vd->vd_lock);
kmem_free(vd, sizeof (vdev_disk_t)); kmem_free(vd, sizeof (vdev_disk_t));
v->vdev_tsd = NULL; v->vdev_tsd = NULL;
} }
static dio_request_t *
vdev_disk_dio_alloc(int bio_count)
{
dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) +
sizeof (struct bio *) * bio_count, KM_SLEEP);
atomic_set(&dr->dr_ref, 0);
dr->dr_bio_count = bio_count;
dr->dr_error = 0;
for (int i = 0; i < dr->dr_bio_count; i++)
dr->dr_bio[i] = NULL;
return (dr);
}
static void
vdev_disk_dio_free(dio_request_t *dr)
{
int i;
for (i = 0; i < dr->dr_bio_count; i++)
if (dr->dr_bio[i])
bio_put(dr->dr_bio[i]);
kmem_free(dr, sizeof (dio_request_t) +
sizeof (struct bio *) * dr->dr_bio_count);
}
static void
vdev_disk_dio_get(dio_request_t *dr)
{
atomic_inc(&dr->dr_ref);
}
static void
vdev_disk_dio_put(dio_request_t *dr)
{
int rc = atomic_dec_return(&dr->dr_ref);
/*
* Free the dio_request when the last reference is dropped and
* ensure zio_interpret is called only once with the correct zio
*/
if (rc == 0) {
zio_t *zio = dr->dr_zio;
int error = dr->dr_error;
vdev_disk_dio_free(dr);
if (zio) {
zio->io_error = error;
ASSERT3S(zio->io_error, >=, 0);
if (zio->io_error)
vdev_disk_error(zio);
zio_delay_interrupt(zio);
}
}
}
BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
{
dio_request_t *dr = bio->bi_private;
if (dr->dr_error == 0) {
#ifdef HAVE_1ARG_BIO_END_IO_T
dr->dr_error = BIO_END_IO_ERROR(bio);
#else
if (error)
dr->dr_error = -(error);
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
dr->dr_error = EIO;
#endif
}
/* Drop reference acquired by __vdev_disk_physio */
vdev_disk_dio_put(dr);
}
static inline void static inline void
vdev_submit_bio_impl(struct bio *bio) vdev_submit_bio_impl(struct bio *bio)
{ {
@ -697,8 +611,460 @@ vdev_bio_alloc(struct block_device *bdev, gfp_t gfp_mask,
return (bio); return (bio);
} }
static inline uint_t
vdev_bio_max_segs(struct block_device *bdev)
{
/*
* Smallest of the device max segs and the tuneable max segs. Minimum
* 4, so there's room to finish split pages if they come up.
*/
const uint_t dev_max_segs = queue_max_segments(bdev_get_queue(bdev));
const uint_t tune_max_segs = (zfs_vdev_disk_max_segs > 0) ?
MAX(4, zfs_vdev_disk_max_segs) : dev_max_segs;
const uint_t max_segs = MIN(tune_max_segs, dev_max_segs);
#ifdef HAVE_BIO_MAX_SEGS
return (bio_max_segs(max_segs));
#else
return (MIN(max_segs, BIO_MAX_PAGES));
#endif
}
static inline uint_t
vdev_bio_max_bytes(struct block_device *bdev)
{
return (queue_max_sectors(bdev_get_queue(bdev)) << 9);
}
/*
* Virtual block IO object (VBIO)
*
* Linux block IO (BIO) objects have a limit on how many data segments (pages)
* they can hold. Depending on how they're allocated and structured, a large
* ZIO can require more than one BIO to be submitted to the kernel, which then
* all have to complete before we can return the completed ZIO back to ZFS.
*
* A VBIO is a wrapper around multiple BIOs, carrying everything needed to
* translate a ZIO down into the kernel block layer and back again.
*
* Note that these are only used for data ZIOs (read/write). Meta-operations
* (flush/trim) don't need multiple BIOs and so can just make the call
* directly.
*/
typedef struct {
zio_t *vbio_zio; /* parent zio */
struct block_device *vbio_bdev; /* blockdev to submit bios to */
abd_t *vbio_abd; /* abd carrying borrowed linear buf */
uint_t vbio_max_segs; /* max segs per bio */
uint_t vbio_max_bytes; /* max bytes per bio */
uint_t vbio_lbs_mask; /* logical block size mask */
uint64_t vbio_offset; /* start offset of next bio */
struct bio *vbio_bio; /* pointer to the current bio */
int vbio_flags; /* bio flags */
} vbio_t;
static vbio_t *
vbio_alloc(zio_t *zio, struct block_device *bdev, int flags)
{
vbio_t *vbio = kmem_zalloc(sizeof (vbio_t), KM_SLEEP);
vbio->vbio_zio = zio;
vbio->vbio_bdev = bdev;
vbio->vbio_abd = NULL;
vbio->vbio_max_segs = vdev_bio_max_segs(bdev);
vbio->vbio_max_bytes = vdev_bio_max_bytes(bdev);
vbio->vbio_lbs_mask = ~(bdev_logical_block_size(bdev)-1);
vbio->vbio_offset = zio->io_offset;
vbio->vbio_bio = NULL;
vbio->vbio_flags = flags;
return (vbio);
}
BIO_END_IO_PROTO(vbio_completion, bio, error);
static int
vbio_add_page(vbio_t *vbio, struct page *page, uint_t size, uint_t offset)
{
struct bio *bio = vbio->vbio_bio;
uint_t ssize;
while (size > 0) {
if (bio == NULL) {
/* New BIO, allocate and set up */
bio = vdev_bio_alloc(vbio->vbio_bdev, GFP_NOIO,
vbio->vbio_max_segs);
VERIFY(bio);
BIO_BI_SECTOR(bio) = vbio->vbio_offset >> 9;
bio_set_op_attrs(bio,
vbio->vbio_zio->io_type == ZIO_TYPE_WRITE ?
WRITE : READ, vbio->vbio_flags);
if (vbio->vbio_bio) {
bio_chain(vbio->vbio_bio, bio);
vdev_submit_bio(vbio->vbio_bio);
}
vbio->vbio_bio = bio;
}
/*
* Only load as much of the current page data as will fit in
* the space left in the BIO, respecting lbs alignment. Older
* kernels will error if we try to overfill the BIO, while
* newer ones will accept it and split the BIO. This ensures
* everything works on older kernels, and avoids an additional
* overhead on the new.
*/
ssize = MIN(size, (vbio->vbio_max_bytes - BIO_BI_SIZE(bio)) &
vbio->vbio_lbs_mask);
if (ssize > 0 &&
bio_add_page(bio, page, ssize, offset) == ssize) {
/* Accepted, adjust and load any remaining. */
size -= ssize;
offset += ssize;
continue;
}
/* No room, set up for a new BIO and loop */
vbio->vbio_offset += BIO_BI_SIZE(bio);
/* Signal new BIO allocation wanted */
bio = NULL;
}
return (0);
}
/* Iterator callback to submit ABD pages to the vbio. */
static int
vbio_fill_cb(struct page *page, size_t off, size_t len, void *priv)
{
vbio_t *vbio = priv;
return (vbio_add_page(vbio, page, len, off));
}
/* Create some BIOs, fill them with data and submit them */
static void
vbio_submit(vbio_t *vbio, abd_t *abd, uint64_t size)
{
ASSERT(vbio->vbio_bdev);
/*
* We plug so we can submit the BIOs as we go and only unplug them when
* they are fully created and submitted. This is important; if we don't
* plug, then the kernel may start executing earlier BIOs while we're
* still creating and executing later ones, and if the device goes
* away while that's happening, older kernels can get confused and
* trample memory.
*/
struct blk_plug plug;
blk_start_plug(&plug);
(void) abd_iterate_page_func(abd, 0, size, vbio_fill_cb, vbio);
ASSERT(vbio->vbio_bio);
vbio->vbio_bio->bi_end_io = vbio_completion;
vbio->vbio_bio->bi_private = vbio;
vdev_submit_bio(vbio->vbio_bio);
blk_finish_plug(&plug);
vbio->vbio_bio = NULL;
vbio->vbio_bdev = NULL;
}
/* IO completion callback */
BIO_END_IO_PROTO(vbio_completion, bio, error)
{
vbio_t *vbio = bio->bi_private;
zio_t *zio = vbio->vbio_zio;
ASSERT(zio);
/* Capture and log any errors */
#ifdef HAVE_1ARG_BIO_END_IO_T
zio->io_error = BIO_END_IO_ERROR(bio);
#else
zio->io_error = 0;
if (error)
zio->io_error = -(error);
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
zio->io_error = EIO;
#endif
ASSERT3U(zio->io_error, >=, 0);
if (zio->io_error)
vdev_disk_error(zio);
/* Return the BIO to the kernel */
bio_put(bio);
/*
* If we copied the ABD before issuing it, clean up and return the copy
* to the ADB, with changes if appropriate.
*/
if (vbio->vbio_abd != NULL) {
void *buf = abd_to_buf(vbio->vbio_abd);
abd_free(vbio->vbio_abd);
vbio->vbio_abd = NULL;
if (zio->io_type == ZIO_TYPE_READ)
abd_return_buf_copy(zio->io_abd, buf, zio->io_size);
else
abd_return_buf(zio->io_abd, buf, zio->io_size);
}
/* Final cleanup */
kmem_free(vbio, sizeof (vbio_t));
/* All done, submit for processing */
zio_delay_interrupt(zio);
}
/*
* Iterator callback to count ABD pages and check their size & alignment.
*
* On Linux, each BIO segment can take a page pointer, and an offset+length of
* the data within that page. A page can be arbitrarily large ("compound"
* pages) but we still have to ensure the data portion is correctly sized and
* aligned to the logical block size, to ensure that if the kernel wants to
* split the BIO, the two halves will still be properly aligned.
*/
typedef struct {
uint_t bmask;
uint_t npages;
uint_t end;
} vdev_disk_check_pages_t;
static int
vdev_disk_check_pages_cb(struct page *page, size_t off, size_t len, void *priv)
{
vdev_disk_check_pages_t *s = priv;
/*
* If we didn't finish on a block size boundary last time, then there
* would be a gap if we tried to use this ABD as-is, so abort.
*/
if (s->end != 0)
return (1);
/*
* Note if we're taking less than a full block, so we can check it
* above on the next call.
*/
s->end = len & s->bmask;
/* All blocks after the first must start on a block size boundary. */
if (s->npages != 0 && (off & s->bmask) != 0)
return (1);
s->npages++;
return (0);
}
/*
* Check if we can submit the pages in this ABD to the kernel as-is. Returns
* the number of pages, or 0 if it can't be submitted like this.
*/
static boolean_t
vdev_disk_check_pages(abd_t *abd, uint64_t size, struct block_device *bdev)
{
vdev_disk_check_pages_t s = {
.bmask = bdev_logical_block_size(bdev)-1,
.npages = 0,
.end = 0,
};
if (abd_iterate_page_func(abd, 0, size, vdev_disk_check_pages_cb, &s))
return (B_FALSE);
return (B_TRUE);
}
static int
vdev_disk_io_rw(zio_t *zio)
{
vdev_t *v = zio->io_vd;
vdev_disk_t *vd = v->vdev_tsd;
struct block_device *bdev = BDH_BDEV(vd->vd_bdh);
int flags = 0;
/*
* Accessing outside the block device is never allowed.
*/
if (zio->io_offset + zio->io_size > bdev->bd_inode->i_size) {
vdev_dbgmsg(zio->io_vd,
"Illegal access %llu size %llu, device size %llu",
(u_longlong_t)zio->io_offset,
(u_longlong_t)zio->io_size,
(u_longlong_t)i_size_read(bdev->bd_inode));
return (SET_ERROR(EIO));
}
if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
v->vdev_failfast == B_TRUE) {
bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1,
zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4);
}
/*
* Check alignment of the incoming ABD. If any part of it would require
* submitting a page that is not aligned to the logical block size,
* then we take a copy into a linear buffer and submit that instead.
* This should be impossible on a 512b LBS, and fairly rare on 4K,
* usually requiring abnormally-small data blocks (eg gang blocks)
* mixed into the same ABD as larger ones (eg aggregated).
*/
abd_t *abd = zio->io_abd;
if (!vdev_disk_check_pages(abd, zio->io_size, bdev)) {
void *buf;
if (zio->io_type == ZIO_TYPE_READ)
buf = abd_borrow_buf(zio->io_abd, zio->io_size);
else
buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
/*
* Wrap the copy in an abd_t, so we can use the same iterators
* to count and fill the vbio later.
*/
abd = abd_get_from_buf(buf, zio->io_size);
/*
* False here would mean the borrowed copy has an invalid
* alignment too, which would mean we've somehow been passed a
* linear ABD with an interior page that has a non-zero offset
* or a size not a multiple of PAGE_SIZE. This is not possible.
* It would mean either zio_buf_alloc() or its underlying
* allocators have done something extremely strange, or our
* math in vdev_disk_check_pages() is wrong. In either case,
* something in seriously wrong and its not safe to continue.
*/
VERIFY(vdev_disk_check_pages(abd, zio->io_size, bdev));
}
/* Allocate vbio, with a pointer to the borrowed ABD if necessary */
vbio_t *vbio = vbio_alloc(zio, bdev, flags);
if (abd != zio->io_abd)
vbio->vbio_abd = abd;
/* Fill it with data pages and submit it to the kernel */
vbio_submit(vbio, abd, zio->io_size);
return (0);
}
/* ========== */
/*
* This is the classic, battle-tested BIO submission code. Until we're totally
* sure that the new code is safe and correct in all cases, this will remain
* available and can be enabled by setting zfs_vdev_disk_classic=1 at module
* load time.
*
* These functions have been renamed to vdev_classic_* to make it clear what
* they belong to, but their implementations are unchanged.
*/
/*
* Virtual device vector for disks.
*/
typedef struct dio_request {
zio_t *dr_zio; /* Parent ZIO */
atomic_t dr_ref; /* References */
int dr_error; /* Bio error */
int dr_bio_count; /* Count of bio's */
struct bio *dr_bio[]; /* Attached bio's */
} dio_request_t;
static dio_request_t *
vdev_classic_dio_alloc(int bio_count)
{
dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) +
sizeof (struct bio *) * bio_count, KM_SLEEP);
atomic_set(&dr->dr_ref, 0);
dr->dr_bio_count = bio_count;
dr->dr_error = 0;
for (int i = 0; i < dr->dr_bio_count; i++)
dr->dr_bio[i] = NULL;
return (dr);
}
static void
vdev_classic_dio_free(dio_request_t *dr)
{
int i;
for (i = 0; i < dr->dr_bio_count; i++)
if (dr->dr_bio[i])
bio_put(dr->dr_bio[i]);
kmem_free(dr, sizeof (dio_request_t) +
sizeof (struct bio *) * dr->dr_bio_count);
}
static void
vdev_classic_dio_get(dio_request_t *dr)
{
atomic_inc(&dr->dr_ref);
}
static void
vdev_classic_dio_put(dio_request_t *dr)
{
int rc = atomic_dec_return(&dr->dr_ref);
/*
* Free the dio_request when the last reference is dropped and
* ensure zio_interpret is called only once with the correct zio
*/
if (rc == 0) {
zio_t *zio = dr->dr_zio;
int error = dr->dr_error;
vdev_classic_dio_free(dr);
if (zio) {
zio->io_error = error;
ASSERT3S(zio->io_error, >=, 0);
if (zio->io_error)
vdev_disk_error(zio);
zio_delay_interrupt(zio);
}
}
}
BIO_END_IO_PROTO(vdev_classic_physio_completion, bio, error)
{
dio_request_t *dr = bio->bi_private;
if (dr->dr_error == 0) {
#ifdef HAVE_1ARG_BIO_END_IO_T
dr->dr_error = BIO_END_IO_ERROR(bio);
#else
if (error)
dr->dr_error = -(error);
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
dr->dr_error = EIO;
#endif
}
/* Drop reference acquired by vdev_classic_physio */
vdev_classic_dio_put(dr);
}
static inline unsigned int static inline unsigned int
vdev_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset) vdev_classic_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
{ {
unsigned long nr_segs = abd_nr_pages_off(zio->io_abd, unsigned long nr_segs = abd_nr_pages_off(zio->io_abd,
bio_size, abd_offset); bio_size, abd_offset);
@ -711,9 +1077,16 @@ vdev_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
} }
static int static int
__vdev_disk_physio(struct block_device *bdev, zio_t *zio, vdev_classic_physio(zio_t *zio)
size_t io_size, uint64_t io_offset, int rw, int flags)
{ {
vdev_t *v = zio->io_vd;
vdev_disk_t *vd = v->vdev_tsd;
struct block_device *bdev = BDH_BDEV(vd->vd_bdh);
size_t io_size = zio->io_size;
uint64_t io_offset = zio->io_offset;
int rw = zio->io_type == ZIO_TYPE_READ ? READ : WRITE;
int flags = 0;
dio_request_t *dr; dio_request_t *dr;
uint64_t abd_offset; uint64_t abd_offset;
uint64_t bio_offset; uint64_t bio_offset;
@ -736,7 +1109,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
} }
retry: retry:
dr = vdev_disk_dio_alloc(bio_count); dr = vdev_classic_dio_alloc(bio_count);
if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) && if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
zio->io_vd->vdev_failfast == B_TRUE) { zio->io_vd->vdev_failfast == B_TRUE) {
@ -771,23 +1144,23 @@ retry:
* this should be rare - see the comment above. * this should be rare - see the comment above.
*/ */
if (dr->dr_bio_count == i) { if (dr->dr_bio_count == i) {
vdev_disk_dio_free(dr); vdev_classic_dio_free(dr);
bio_count *= 2; bio_count *= 2;
goto retry; goto retry;
} }
nr_vecs = vdev_bio_max_segs(zio, bio_size, abd_offset); nr_vecs = vdev_classic_bio_max_segs(zio, bio_size, abd_offset);
dr->dr_bio[i] = vdev_bio_alloc(bdev, GFP_NOIO, nr_vecs); dr->dr_bio[i] = vdev_bio_alloc(bdev, GFP_NOIO, nr_vecs);
if (unlikely(dr->dr_bio[i] == NULL)) { if (unlikely(dr->dr_bio[i] == NULL)) {
vdev_disk_dio_free(dr); vdev_classic_dio_free(dr);
return (SET_ERROR(ENOMEM)); return (SET_ERROR(ENOMEM));
} }
/* Matching put called by vdev_disk_physio_completion */ /* Matching put called by vdev_classic_physio_completion */
vdev_disk_dio_get(dr); vdev_classic_dio_get(dr);
BIO_BI_SECTOR(dr->dr_bio[i]) = bio_offset >> 9; BIO_BI_SECTOR(dr->dr_bio[i]) = bio_offset >> 9;
dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion; dr->dr_bio[i]->bi_end_io = vdev_classic_physio_completion;
dr->dr_bio[i]->bi_private = dr; dr->dr_bio[i]->bi_private = dr;
bio_set_op_attrs(dr->dr_bio[i], rw, flags); bio_set_op_attrs(dr->dr_bio[i], rw, flags);
@ -801,7 +1174,7 @@ retry:
} }
/* Extra reference to protect dio_request during vdev_submit_bio */ /* Extra reference to protect dio_request during vdev_submit_bio */
vdev_disk_dio_get(dr); vdev_classic_dio_get(dr);
if (dr->dr_bio_count > 1) if (dr->dr_bio_count > 1)
blk_start_plug(&plug); blk_start_plug(&plug);
@ -815,11 +1188,13 @@ retry:
if (dr->dr_bio_count > 1) if (dr->dr_bio_count > 1)
blk_finish_plug(&plug); blk_finish_plug(&plug);
vdev_disk_dio_put(dr); vdev_classic_dio_put(dr);
return (error); return (error);
} }
/* ========== */
BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error) BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
{ {
zio_t *zio = bio->bi_private; zio_t *zio = bio->bi_private;
@ -928,12 +1303,14 @@ vdev_disk_io_trim(zio_t *zio)
#endif #endif
} }
int (*vdev_disk_io_rw_fn)(zio_t *zio) = NULL;
static void static void
vdev_disk_io_start(zio_t *zio) vdev_disk_io_start(zio_t *zio)
{ {
vdev_t *v = zio->io_vd; vdev_t *v = zio->io_vd;
vdev_disk_t *vd = v->vdev_tsd; vdev_disk_t *vd = v->vdev_tsd;
int rw, error; int error;
/* /*
* If the vdev is closed, it's likely in the REMOVED or FAULTED state. * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
@ -996,13 +1373,6 @@ vdev_disk_io_start(zio_t *zio)
rw_exit(&vd->vd_lock); rw_exit(&vd->vd_lock);
zio_execute(zio); zio_execute(zio);
return; return;
case ZIO_TYPE_WRITE:
rw = WRITE;
break;
case ZIO_TYPE_READ:
rw = READ;
break;
case ZIO_TYPE_TRIM: case ZIO_TYPE_TRIM:
zio->io_error = vdev_disk_io_trim(zio); zio->io_error = vdev_disk_io_trim(zio);
@ -1015,23 +1385,34 @@ vdev_disk_io_start(zio_t *zio)
#endif #endif
return; return;
case ZIO_TYPE_READ:
case ZIO_TYPE_WRITE:
zio->io_target_timestamp = zio_handle_io_delay(zio);
error = vdev_disk_io_rw_fn(zio);
rw_exit(&vd->vd_lock);
if (error) {
zio->io_error = error;
zio_interrupt(zio);
}
return;
default: default:
/*
* Getting here means our parent vdev has made a very strange
* request of us, and shouldn't happen. Assert here to force a
* crash in dev builds, but in production return the IO
* unhandled. The pool will likely suspend anyway but that's
* nicer than crashing the kernel.
*/
ASSERT3S(zio->io_type, ==, -1);
rw_exit(&vd->vd_lock); rw_exit(&vd->vd_lock);
zio->io_error = SET_ERROR(ENOTSUP); zio->io_error = SET_ERROR(ENOTSUP);
zio_interrupt(zio); zio_interrupt(zio);
return; return;
} }
zio->io_target_timestamp = zio_handle_io_delay(zio); __builtin_unreachable();
error = __vdev_disk_physio(BDH_BDEV(vd->vd_bdh), zio,
zio->io_size, zio->io_offset, rw, 0);
rw_exit(&vd->vd_lock);
if (error) {
zio->io_error = error;
zio_interrupt(zio);
return;
}
} }
static void static void
@ -1080,8 +1461,49 @@ vdev_disk_rele(vdev_t *vd)
/* XXX: Implement me as a vnode rele for the device */ /* XXX: Implement me as a vnode rele for the device */
} }
/*
* BIO submission method. See comment above about vdev_classic.
* Set zfs_vdev_disk_classic=0 for new, =1 for classic
*/
static uint_t zfs_vdev_disk_classic = 0; /* default new */
/* Set submission function from module parameter */
static int
vdev_disk_param_set_classic(const char *buf, zfs_kernel_param_t *kp)
{
int err = param_set_uint(buf, kp);
if (err < 0)
return (SET_ERROR(err));
vdev_disk_io_rw_fn =
zfs_vdev_disk_classic ? vdev_classic_physio : vdev_disk_io_rw;
printk(KERN_INFO "ZFS: forcing %s BIO submission\n",
zfs_vdev_disk_classic ? "classic" : "new");
return (0);
}
/*
* At first use vdev use, set the submission function from the default value if
* it hasn't been set already.
*/
static int
vdev_disk_init(spa_t *spa, nvlist_t *nv, void **tsd)
{
(void) spa;
(void) nv;
(void) tsd;
if (vdev_disk_io_rw_fn == NULL)
vdev_disk_io_rw_fn = zfs_vdev_disk_classic ?
vdev_classic_physio : vdev_disk_io_rw;
return (0);
}
vdev_ops_t vdev_disk_ops = { vdev_ops_t vdev_disk_ops = {
.vdev_op_init = NULL, .vdev_op_init = vdev_disk_init,
.vdev_op_fini = NULL, .vdev_op_fini = NULL,
.vdev_op_open = vdev_disk_open, .vdev_op_open = vdev_disk_open,
.vdev_op_close = vdev_disk_close, .vdev_op_close = vdev_disk_close,
@ -1174,3 +1596,10 @@ ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, open_timeout_ms, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, failfast_mask, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, failfast_mask, UINT, ZMOD_RW,
"Defines failfast mask: 1 - device, 2 - transport, 4 - driver"); "Defines failfast mask: 1 - device, 2 - transport, 4 - driver");
ZFS_MODULE_PARAM(zfs_vdev_disk, zfs_vdev_disk_, max_segs, UINT, ZMOD_RW,
"Maximum number of data segments to add to an IO request (min 4)");
ZFS_MODULE_PARAM_CALL(zfs_vdev_disk, zfs_vdev_disk_, classic,
vdev_disk_param_set_classic, param_get_uint, ZMOD_RD,
"Use classic BIO submission method");

View File

@ -3824,11 +3824,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp); zfs_sa_upgrade_txholds(tx, zp);
err = dmu_tx_assign(tx, TXG_NOWAIT); err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) { if (err != 0) {
if (err == ERESTART)
dmu_tx_wait(tx);
dmu_tx_abort(tx); dmu_tx_abort(tx);
#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
filemap_dirty_folio(page_mapping(pp), page_folio(pp)); filemap_dirty_folio(page_mapping(pp), page_folio(pp));

View File

@ -720,23 +720,23 @@ zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
{ {
boolean_t *for_sync = data; boolean_t *for_sync = data;
fstrans_cookie_t cookie; fstrans_cookie_t cookie;
int ret;
ASSERT(PageLocked(pp)); ASSERT(PageLocked(pp));
ASSERT(!PageWriteback(pp)); ASSERT(!PageWriteback(pp));
cookie = spl_fstrans_mark(); cookie = spl_fstrans_mark();
(void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync); ret = zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
spl_fstrans_unmark(cookie); spl_fstrans_unmark(cookie);
return (0); return (ret);
} }
#ifdef HAVE_WRITEPAGE_T_FOLIO #ifdef HAVE_WRITEPAGE_T_FOLIO
static int static int
zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data) zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data)
{ {
(void) zpl_putpage(&pp->page, wbc, data); return (zpl_putpage(&pp->page, wbc, data));
return (0);
} }
#endif #endif

View File

@ -26,6 +26,9 @@
#include <linux/compat.h> #include <linux/compat.h>
#endif #endif
#include <linux/fs.h> #include <linux/fs.h>
#ifdef HAVE_VFS_SPLICE_COPY_FILE_RANGE
#include <linux/splice.h>
#endif
#include <sys/file.h> #include <sys/file.h>
#include <sys/zfs_znode.h> #include <sys/zfs_znode.h>
#include <sys/zfs_vnops.h> #include <sys/zfs_vnops.h>
@ -102,7 +105,7 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
ret = zpl_clone_file_range_impl(src_file, src_off, ret = zpl_clone_file_range_impl(src_file, src_off,
dst_file, dst_off, len); dst_file, dst_off, len);
#ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE #if defined(HAVE_VFS_GENERIC_COPY_FILE_RANGE)
/* /*
* Since Linux 5.3 the filesystem driver is responsible for executing * Since Linux 5.3 the filesystem driver is responsible for executing
* an appropriate fallback, and a generic fallback function is provided. * an appropriate fallback, and a generic fallback function is provided.
@ -111,6 +114,15 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
ret == -EAGAIN) ret == -EAGAIN)
ret = generic_copy_file_range(src_file, src_off, dst_file, ret = generic_copy_file_range(src_file, src_off, dst_file,
dst_off, len, flags); dst_off, len, flags);
#elif defined(HAVE_VFS_SPLICE_COPY_FILE_RANGE)
/*
* Since 6.8 the fallback function is called splice_copy_file_range
* and has a slightly different signature.
*/
if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
ret == -EAGAIN)
ret = splice_copy_file_range(src_file, src_off, dst_file,
dst_off, len);
#else #else
/* /*
* Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal * Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal
@ -118,7 +130,7 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
*/ */
if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN) if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN)
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
#endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE */ #endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE || HAVE_VFS_SPLICE_COPY_FILE_RANGE */
return (ret); return (ret);
} }

View File

@ -1314,6 +1314,13 @@ zvol_os_create_minor(const char *name)
if (idx < 0) if (idx < 0)
return (SET_ERROR(-idx)); return (SET_ERROR(-idx));
minor = idx << ZVOL_MINOR_BITS; minor = idx << ZVOL_MINOR_BITS;
if (MINOR(minor) != minor) {
/* too many partitions can cause an overflow */
zfs_dbgmsg("zvol: create minor overflow: %s, minor %u/%u",
name, minor, MINOR(minor));
ida_simple_remove(&zvol_ida, idx);
return (SET_ERROR(EINVAL));
}
zv = zvol_find_by_name_hash(name, hash, RW_NONE); zv = zvol_find_by_name_hash(name, hash, RW_NONE);
if (zv) { if (zv) {

View File

@ -826,6 +826,48 @@ abd_iterate_func(abd_t *abd, size_t off, size_t size,
return (ret); return (ret);
} }
#if defined(__linux__) && defined(_KERNEL)
int
abd_iterate_page_func(abd_t *abd, size_t off, size_t size,
abd_iter_page_func_t *func, void *private)
{
struct abd_iter aiter;
int ret = 0;
if (size == 0)
return (0);
abd_verify(abd);
ASSERT3U(off + size, <=, abd->abd_size);
abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off);
while (size > 0) {
IMPLY(abd_is_gang(abd), c_abd != NULL);
abd_iter_page(&aiter);
size_t len = MIN(aiter.iter_page_dsize, size);
ASSERT3U(len, >, 0);
ret = func(aiter.iter_page, aiter.iter_page_doff,
len, private);
aiter.iter_page = NULL;
aiter.iter_page_doff = 0;
aiter.iter_page_dsize = 0;
if (ret != 0)
break;
size -= len;
c_abd = abd_advance_abd_iter(abd, c_abd, &aiter, len);
}
return (ret);
}
#endif
struct buf_arg { struct buf_arg {
void *arg_buf; void *arg_buf;
}; };

View File

@ -1014,7 +1014,7 @@ static arc_buf_hdr_t *
buf_hash_find(uint64_t spa, const blkptr_t *bp, kmutex_t **lockp) buf_hash_find(uint64_t spa, const blkptr_t *bp, kmutex_t **lockp)
{ {
const dva_t *dva = BP_IDENTITY(bp); const dva_t *dva = BP_IDENTITY(bp);
uint64_t birth = BP_PHYSICAL_BIRTH(bp); uint64_t birth = BP_GET_BIRTH(bp);
uint64_t idx = BUF_HASH_INDEX(spa, dva, birth); uint64_t idx = BUF_HASH_INDEX(spa, dva, birth);
kmutex_t *hash_lock = BUF_HASH_LOCK(idx); kmutex_t *hash_lock = BUF_HASH_LOCK(idx);
arc_buf_hdr_t *hdr; arc_buf_hdr_t *hdr;
@ -2183,7 +2183,7 @@ arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
* (and generate an ereport) before leaving the ARC. * (and generate an ereport) before leaving the ARC.
*/ */
ret = SET_ERROR(EIO); ret = SET_ERROR(EIO);
spa_log_error(spa, zb, &buf->b_hdr->b_birth); spa_log_error(spa, zb, buf->b_hdr->b_birth);
(void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, (void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
spa, NULL, zb, NULL, 0); spa, NULL, zb, NULL, 0);
} }
@ -5251,7 +5251,7 @@ arc_read_done(zio_t *zio)
if (HDR_IN_HASH_TABLE(hdr)) { if (HDR_IN_HASH_TABLE(hdr)) {
arc_buf_hdr_t *found; arc_buf_hdr_t *found;
ASSERT3U(hdr->b_birth, ==, BP_PHYSICAL_BIRTH(zio->io_bp)); ASSERT3U(hdr->b_birth, ==, BP_GET_BIRTH(zio->io_bp));
ASSERT3U(hdr->b_dva.dva_word[0], ==, ASSERT3U(hdr->b_dva.dva_word[0], ==,
BP_IDENTITY(zio->io_bp)->dva_word[0]); BP_IDENTITY(zio->io_bp)->dva_word[0]);
ASSERT3U(hdr->b_dva.dva_word[1], ==, ASSERT3U(hdr->b_dva.dva_word[1], ==,
@ -5354,7 +5354,7 @@ arc_read_done(zio_t *zio)
error = SET_ERROR(EIO); error = SET_ERROR(EIO);
if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
spa_log_error(zio->io_spa, &acb->acb_zb, spa_log_error(zio->io_spa, &acb->acb_zb,
&zio->io_bp->blk_birth); BP_GET_LOGICAL_BIRTH(zio->io_bp));
(void) zfs_ereport_post( (void) zfs_ereport_post(
FM_EREPORT_ZFS_AUTHENTICATION, FM_EREPORT_ZFS_AUTHENTICATION,
zio->io_spa, NULL, &acb->acb_zb, zio, 0); zio->io_spa, NULL, &acb->acb_zb, zio, 0);
@ -5639,7 +5639,7 @@ top:
*/ */
rc = SET_ERROR(EIO); rc = SET_ERROR(EIO);
if ((zio_flags & ZIO_FLAG_SPECULATIVE) == 0) { if ((zio_flags & ZIO_FLAG_SPECULATIVE) == 0) {
spa_log_error(spa, zb, &hdr->b_birth); spa_log_error(spa, zb, hdr->b_birth);
(void) zfs_ereport_post( (void) zfs_ereport_post(
FM_EREPORT_ZFS_AUTHENTICATION, FM_EREPORT_ZFS_AUTHENTICATION,
spa, NULL, zb, NULL, 0); spa, NULL, zb, NULL, 0);
@ -5686,12 +5686,12 @@ top:
* embedded data. * embedded data.
*/ */
arc_buf_hdr_t *exists = NULL; arc_buf_hdr_t *exists = NULL;
hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, hdr = arc_hdr_alloc(guid, psize, lsize,
BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), 0, type); BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), 0, type);
if (!embedded_bp) { if (!embedded_bp) {
hdr->b_dva = *BP_IDENTITY(bp); hdr->b_dva = *BP_IDENTITY(bp);
hdr->b_birth = BP_PHYSICAL_BIRTH(bp); hdr->b_birth = BP_GET_BIRTH(bp);
exists = buf_hash_insert(hdr, &hash_lock); exists = buf_hash_insert(hdr, &hash_lock);
} }
if (exists != NULL) { if (exists != NULL) {
@ -6557,7 +6557,7 @@ arc_write_done(zio_t *zio)
buf_discard_identity(hdr); buf_discard_identity(hdr);
} else { } else {
hdr->b_dva = *BP_IDENTITY(zio->io_bp); hdr->b_dva = *BP_IDENTITY(zio->io_bp);
hdr->b_birth = BP_PHYSICAL_BIRTH(zio->io_bp); hdr->b_birth = BP_GET_BIRTH(zio->io_bp);
} }
} else { } else {
ASSERT(HDR_EMPTY(hdr)); ASSERT(HDR_EMPTY(hdr));

View File

@ -893,7 +893,7 @@ bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed,
*/ */
memset(&stored_bp, 0, sizeof (stored_bp)); memset(&stored_bp, 0, sizeof (stored_bp));
stored_bp.blk_prop = bp->blk_prop; stored_bp.blk_prop = bp->blk_prop;
stored_bp.blk_birth = bp->blk_birth; BP_SET_LOGICAL_BIRTH(&stored_bp, BP_GET_LOGICAL_BIRTH(bp));
} else if (!BP_GET_DEDUP(bp)) { } else if (!BP_GET_DEDUP(bp)) {
/* The bpobj will compress better without the checksum */ /* The bpobj will compress better without the checksum */
memset(&stored_bp.blk_cksum, 0, sizeof (stored_bp.blk_cksum)); memset(&stored_bp.blk_cksum, 0, sizeof (stored_bp.blk_cksum));
@ -953,7 +953,8 @@ space_range_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
(void) bp_freed, (void) tx; (void) bp_freed, (void) tx;
struct space_range_arg *sra = arg; struct space_range_arg *sra = arg;
if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) { if (BP_GET_LOGICAL_BIRTH(bp) > sra->mintxg &&
BP_GET_LOGICAL_BIRTH(bp) <= sra->maxtxg) {
if (dsl_pool_sync_context(spa_get_dsl(sra->spa))) if (dsl_pool_sync_context(spa_get_dsl(sra->spa)))
sra->used += bp_get_dsize_sync(sra->spa, bp); sra->used += bp_get_dsize_sync(sra->spa, bp);
else else
@ -985,7 +986,7 @@ bpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
/* /*
* Return the amount of space in the bpobj which is: * Return the amount of space in the bpobj which is:
* mintxg < blk_birth <= maxtxg * mintxg < logical birth <= maxtxg
*/ */
int int
bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg, bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,

View File

@ -248,7 +248,7 @@ static kmem_cache_t *brt_pending_entry_cache;
/* /*
* Enable/disable prefetching of BRT entries that we are going to modify. * Enable/disable prefetching of BRT entries that we are going to modify.
*/ */
int zfs_brt_prefetch = 1; static int brt_zap_prefetch = 1;
#ifdef ZFS_DEBUG #ifdef ZFS_DEBUG
#define BRT_DEBUG(...) do { \ #define BRT_DEBUG(...) do { \
@ -260,8 +260,8 @@ int zfs_brt_prefetch = 1;
#define BRT_DEBUG(...) do { } while (0) #define BRT_DEBUG(...) do { } while (0)
#endif #endif
int brt_zap_leaf_blockshift = 12; static int brt_zap_default_bs = 12;
int brt_zap_indirect_blockshift = 12; static int brt_zap_default_ibs = 12;
static kstat_t *brt_ksp; static kstat_t *brt_ksp;
@ -458,8 +458,7 @@ brt_vdev_create(brt_t *brt, brt_vdev_t *brtvd, dmu_tx_t *tx)
brtvd->bv_mos_entries = zap_create_flags(brt->brt_mos, 0, brtvd->bv_mos_entries = zap_create_flags(brt->brt_mos, 0,
ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY, DMU_OTN_ZAP_METADATA, ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY, DMU_OTN_ZAP_METADATA,
brt_zap_leaf_blockshift, brt_zap_indirect_blockshift, DMU_OT_NONE, brt_zap_default_bs, brt_zap_default_ibs, DMU_OT_NONE, 0, tx);
0, tx);
VERIFY(brtvd->bv_mos_entries != 0); VERIFY(brtvd->bv_mos_entries != 0);
BRT_DEBUG("MOS entries created, object=%llu", BRT_DEBUG("MOS entries created, object=%llu",
(u_longlong_t)brtvd->bv_mos_entries); (u_longlong_t)brtvd->bv_mos_entries);
@ -901,7 +900,6 @@ static int
brt_entry_lookup(brt_t *brt, brt_vdev_t *brtvd, brt_entry_t *bre) brt_entry_lookup(brt_t *brt, brt_vdev_t *brtvd, brt_entry_t *bre)
{ {
uint64_t mos_entries; uint64_t mos_entries;
uint64_t one, physsize;
int error; int error;
ASSERT(RW_LOCK_HELD(&brt->brt_lock)); ASSERT(RW_LOCK_HELD(&brt->brt_lock));
@ -919,21 +917,8 @@ brt_entry_lookup(brt_t *brt, brt_vdev_t *brtvd, brt_entry_t *bre)
brt_unlock(brt); brt_unlock(brt);
error = zap_length_uint64(brt->brt_mos, mos_entries, &bre->bre_offset, error = zap_lookup_uint64(brt->brt_mos, mos_entries, &bre->bre_offset,
BRT_KEY_WORDS, &one, &physsize); BRT_KEY_WORDS, 1, sizeof (bre->bre_refcount), &bre->bre_refcount);
if (error == 0) {
ASSERT3U(one, ==, 1);
ASSERT3U(physsize, ==, sizeof (bre->bre_refcount));
error = zap_lookup_uint64(brt->brt_mos, mos_entries,
&bre->bre_offset, BRT_KEY_WORDS, 1,
sizeof (bre->bre_refcount), &bre->bre_refcount);
BRT_DEBUG("ZAP lookup: object=%llu vdev=%llu offset=%llu "
"count=%llu error=%d", (u_longlong_t)mos_entries,
(u_longlong_t)brtvd->bv_vdevid,
(u_longlong_t)bre->bre_offset,
error == 0 ? (u_longlong_t)bre->bre_refcount : 0, error);
}
brt_wlock(brt); brt_wlock(brt);
@ -955,52 +940,10 @@ brt_entry_prefetch(brt_t *brt, uint64_t vdevid, brt_entry_t *bre)
if (mos_entries == 0) if (mos_entries == 0)
return; return;
BRT_DEBUG("ZAP prefetch: object=%llu vdev=%llu offset=%llu",
(u_longlong_t)mos_entries, (u_longlong_t)vdevid,
(u_longlong_t)bre->bre_offset);
(void) zap_prefetch_uint64(brt->brt_mos, mos_entries, (void) zap_prefetch_uint64(brt->brt_mos, mos_entries,
(uint64_t *)&bre->bre_offset, BRT_KEY_WORDS); (uint64_t *)&bre->bre_offset, BRT_KEY_WORDS);
} }
static int
brt_entry_update(brt_t *brt, brt_vdev_t *brtvd, brt_entry_t *bre, dmu_tx_t *tx)
{
int error;
ASSERT(RW_LOCK_HELD(&brt->brt_lock));
ASSERT(brtvd->bv_mos_entries != 0);
ASSERT(bre->bre_refcount > 0);
error = zap_update_uint64(brt->brt_mos, brtvd->bv_mos_entries,
(uint64_t *)&bre->bre_offset, BRT_KEY_WORDS, 1,
sizeof (bre->bre_refcount), &bre->bre_refcount, tx);
BRT_DEBUG("ZAP update: object=%llu vdev=%llu offset=%llu count=%llu "
"error=%d", (u_longlong_t)brtvd->bv_mos_entries,
(u_longlong_t)brtvd->bv_vdevid, (u_longlong_t)bre->bre_offset,
(u_longlong_t)bre->bre_refcount, error);
return (error);
}
static int
brt_entry_remove(brt_t *brt, brt_vdev_t *brtvd, brt_entry_t *bre, dmu_tx_t *tx)
{
int error;
ASSERT(RW_LOCK_HELD(&brt->brt_lock));
ASSERT(brtvd->bv_mos_entries != 0);
ASSERT0(bre->bre_refcount);
error = zap_remove_uint64(brt->brt_mos, brtvd->bv_mos_entries,
(uint64_t *)&bre->bre_offset, BRT_KEY_WORDS, tx);
BRT_DEBUG("ZAP remove: object=%llu vdev=%llu offset=%llu count=%llu "
"error=%d", (u_longlong_t)brtvd->bv_mos_entries,
(u_longlong_t)brtvd->bv_vdevid, (u_longlong_t)bre->bre_offset,
(u_longlong_t)bre->bre_refcount, error);
return (error);
}
/* /*
* Return TRUE if we _can_ have BRT entry for this bp. It might be false * Return TRUE if we _can_ have BRT entry for this bp. It might be false
* positive, but gives us quick answer if we should look into BRT, which * positive, but gives us quick answer if we should look into BRT, which
@ -1405,7 +1348,7 @@ brt_prefetch(brt_t *brt, const blkptr_t *bp)
ASSERT(bp != NULL); ASSERT(bp != NULL);
if (!zfs_brt_prefetch) if (!brt_zap_prefetch)
return; return;
brt_entry_fill(bp, &bre, &vdevid); brt_entry_fill(bp, &bre, &vdevid);
@ -1420,13 +1363,13 @@ brt_pending_entry_compare(const void *x1, const void *x2)
const blkptr_t *bp1 = &bpe1->bpe_bp, *bp2 = &bpe2->bpe_bp; const blkptr_t *bp1 = &bpe1->bpe_bp, *bp2 = &bpe2->bpe_bp;
int cmp; int cmp;
cmp = TREE_CMP(BP_PHYSICAL_BIRTH(bp1), BP_PHYSICAL_BIRTH(bp2));
if (cmp == 0) {
cmp = TREE_CMP(DVA_GET_VDEV(&bp1->blk_dva[0]), cmp = TREE_CMP(DVA_GET_VDEV(&bp1->blk_dva[0]),
DVA_GET_VDEV(&bp2->blk_dva[0])); DVA_GET_VDEV(&bp2->blk_dva[0]));
if (cmp == 0) { if (cmp == 0) {
cmp = TREE_CMP(DVA_GET_OFFSET(&bp1->blk_dva[0]), cmp = TREE_CMP(DVA_GET_OFFSET(&bp1->blk_dva[0]),
DVA_GET_OFFSET(&bp2->blk_dva[0])); DVA_GET_OFFSET(&bp2->blk_dva[0]));
if (unlikely(cmp == 0)) {
cmp = TREE_CMP(BP_GET_BIRTH(bp1), BP_GET_BIRTH(bp2));
} }
} }
@ -1471,10 +1414,10 @@ brt_pending_add(spa_t *spa, const blkptr_t *bp, dmu_tx_t *tx)
kmem_cache_free(brt_pending_entry_cache, newbpe); kmem_cache_free(brt_pending_entry_cache, newbpe);
} else { } else {
ASSERT(bpe == NULL); ASSERT(bpe == NULL);
}
/* Prefetch BRT entry, as we will need it in the syncing context. */ /* Prefetch BRT entry for the syncing context. */
brt_prefetch(brt, bp); brt_prefetch(brt, bp);
}
} }
void void
@ -1514,26 +1457,23 @@ brt_pending_remove(spa_t *spa, const blkptr_t *bp, dmu_tx_t *tx)
void void
brt_pending_apply(spa_t *spa, uint64_t txg) brt_pending_apply(spa_t *spa, uint64_t txg)
{ {
brt_t *brt; brt_t *brt = spa->spa_brt;
brt_pending_entry_t *bpe; brt_pending_entry_t *bpe;
avl_tree_t *pending_tree; avl_tree_t *pending_tree;
kmutex_t *pending_lock;
void *c; void *c;
ASSERT3U(txg, !=, 0); ASSERT3U(txg, !=, 0);
brt = spa->spa_brt; /*
* We are in syncing context, so no other brt_pending_tree accesses
* are possible for the TXG. Don't need to acquire brt_pending_lock.
*/
pending_tree = &brt->brt_pending_tree[txg & TXG_MASK]; pending_tree = &brt->brt_pending_tree[txg & TXG_MASK];
pending_lock = &brt->brt_pending_lock[txg & TXG_MASK];
mutex_enter(pending_lock);
c = NULL; c = NULL;
while ((bpe = avl_destroy_nodes(pending_tree, &c)) != NULL) { while ((bpe = avl_destroy_nodes(pending_tree, &c)) != NULL) {
boolean_t added_to_ddt; boolean_t added_to_ddt;
mutex_exit(pending_lock);
for (int i = 0; i < bpe->bpe_count; i++) { for (int i = 0; i < bpe->bpe_count; i++) {
/* /*
* If the block has DEDUP bit set, it means that it * If the block has DEDUP bit set, it means that it
@ -1551,31 +1491,20 @@ brt_pending_apply(spa_t *spa, uint64_t txg)
} }
kmem_cache_free(brt_pending_entry_cache, bpe); kmem_cache_free(brt_pending_entry_cache, bpe);
mutex_enter(pending_lock);
} }
mutex_exit(pending_lock);
} }
static void static void
brt_sync_entry(brt_t *brt, brt_vdev_t *brtvd, brt_entry_t *bre, dmu_tx_t *tx) brt_sync_entry(dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx)
{ {
ASSERT(RW_WRITE_HELD(&brt->brt_lock));
ASSERT(brtvd->bv_mos_entries != 0);
if (bre->bre_refcount == 0) { if (bre->bre_refcount == 0) {
int error; int error = zap_remove_uint64_by_dnode(dn, &bre->bre_offset,
BRT_KEY_WORDS, tx);
error = brt_entry_remove(brt, brtvd, bre, tx); VERIFY(error == 0 || error == ENOENT);
ASSERT(error == 0 || error == ENOENT);
/*
* If error == ENOENT then zfs_clone_range() was done from a
* removed (but opened) file (open(), unlink()).
*/
ASSERT(brt_entry_lookup(brt, brtvd, bre) == ENOENT);
} else { } else {
VERIFY0(brt_entry_update(brt, brtvd, bre, tx)); VERIFY0(zap_update_uint64_by_dnode(dn, &bre->bre_offset,
BRT_KEY_WORDS, 1, sizeof (bre->bre_refcount),
&bre->bre_refcount, tx));
} }
} }
@ -1584,6 +1513,7 @@ brt_sync_table(brt_t *brt, dmu_tx_t *tx)
{ {
brt_vdev_t *brtvd; brt_vdev_t *brtvd;
brt_entry_t *bre; brt_entry_t *bre;
dnode_t *dn;
uint64_t vdevid; uint64_t vdevid;
void *c; void *c;
@ -1607,14 +1537,19 @@ brt_sync_table(brt_t *brt, dmu_tx_t *tx)
if (brtvd->bv_mos_brtvdev == 0) if (brtvd->bv_mos_brtvdev == 0)
brt_vdev_create(brt, brtvd, tx); brt_vdev_create(brt, brtvd, tx);
VERIFY0(dnode_hold(brt->brt_mos, brtvd->bv_mos_entries,
FTAG, &dn));
c = NULL; c = NULL;
while ((bre = avl_destroy_nodes(&brtvd->bv_tree, &c)) != NULL) { while ((bre = avl_destroy_nodes(&brtvd->bv_tree, &c)) != NULL) {
brt_sync_entry(brt, brtvd, bre, tx); brt_sync_entry(dn, bre, tx);
brt_entry_free(bre); brt_entry_free(bre);
ASSERT(brt->brt_nentries > 0); ASSERT(brt->brt_nentries > 0);
brt->brt_nentries--; brt->brt_nentries--;
} }
dnode_rele(dn, FTAG);
brt_vdev_sync(brt, brtvd, tx); brt_vdev_sync(brt, brtvd, tx);
if (brtvd->bv_totalcount == 0) if (brtvd->bv_totalcount == 0)
@ -1729,9 +1664,10 @@ brt_unload(spa_t *spa)
} }
/* BEGIN CSTYLED */ /* BEGIN CSTYLED */
ZFS_MODULE_PARAM(zfs_brt, zfs_brt_, prefetch, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_brt, , brt_zap_prefetch, INT, ZMOD_RW,
"Enable prefetching of BRT entries"); "Enable prefetching of BRT ZAP entries");
#ifdef ZFS_BRT_DEBUG ZFS_MODULE_PARAM(zfs_brt, , brt_zap_default_bs, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_brt, zfs_brt_, debug, INT, ZMOD_RW, "BRT debug"); "BRT ZAP leaf blockshift");
#endif ZFS_MODULE_PARAM(zfs_brt, , brt_zap_default_ibs, UINT, ZMOD_RW,
"BRT ZAP indirect blockshift");
/* END CSTYLED */ /* END CSTYLED */

View File

@ -1217,7 +1217,7 @@ dbuf_verify(dmu_buf_impl_t *db)
ASSERT0(bp->blk_pad[1]); ASSERT0(bp->blk_pad[1]);
ASSERT(!BP_IS_EMBEDDED(bp)); ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT(BP_IS_HOLE(bp)); ASSERT(BP_IS_HOLE(bp));
ASSERT0(bp->blk_phys_birth); ASSERT0(BP_GET_PHYSICAL_BIRTH(bp));
} }
} }
} }
@ -1457,7 +1457,7 @@ dbuf_handle_indirect_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *dbbp)
dn->dn_datablksz : BP_GET_LSIZE(dbbp)); dn->dn_datablksz : BP_GET_LSIZE(dbbp));
BP_SET_TYPE(bp, BP_GET_TYPE(dbbp)); BP_SET_TYPE(bp, BP_GET_TYPE(dbbp));
BP_SET_LEVEL(bp, BP_GET_LEVEL(dbbp) - 1); BP_SET_LEVEL(bp, BP_GET_LEVEL(dbbp) - 1);
BP_SET_BIRTH(bp, dbbp->blk_birth, 0); BP_SET_BIRTH(bp, BP_GET_LOGICAL_BIRTH(dbbp), 0);
} }
} }
@ -1486,7 +1486,7 @@ dbuf_read_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *bp)
memset(db->db.db_data, 0, db->db.db_size); memset(db->db.db_data, 0, db->db.db_size);
if (bp != NULL && db->db_level > 0 && BP_IS_HOLE(bp) && if (bp != NULL && db->db_level > 0 && BP_IS_HOLE(bp) &&
bp->blk_birth != 0) { BP_GET_LOGICAL_BIRTH(bp) != 0) {
dbuf_handle_indirect_hole(db, dn, bp); dbuf_handle_indirect_hole(db, dn, bp);
} }
db->db_state = DB_CACHED; db->db_state = DB_CACHED;
@ -1633,7 +1633,8 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
* If this is not true it indicates tampering and we report an error. * If this is not true it indicates tampering and we report an error.
*/ */
if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bpp)) { if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bpp)) {
spa_log_error(db->db_objset->os_spa, &zb, &bpp->blk_birth); spa_log_error(db->db_objset->os_spa, &zb,
BP_GET_LOGICAL_BIRTH(bpp));
err = SET_ERROR(EIO); err = SET_ERROR(EIO);
goto early_unlock; goto early_unlock;
} }
@ -2832,7 +2833,7 @@ dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx)
dl = &dr->dt.dl; dl = &dr->dt.dl;
dl->dr_overridden_by = *bp; dl->dr_overridden_by = *bp;
dl->dr_override_state = DR_OVERRIDDEN; dl->dr_override_state = DR_OVERRIDDEN;
dl->dr_overridden_by.blk_birth = dr->dr_txg; BP_SET_LOGICAL_BIRTH(&dl->dr_overridden_by, dr->dr_txg);
} }
boolean_t boolean_t
@ -2909,7 +2910,7 @@ dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
BP_SET_BYTEORDER(&dl->dr_overridden_by, byteorder); BP_SET_BYTEORDER(&dl->dr_overridden_by, byteorder);
dl->dr_override_state = DR_OVERRIDDEN; dl->dr_override_state = DR_OVERRIDDEN;
dl->dr_overridden_by.blk_birth = dr->dr_txg; BP_SET_LOGICAL_BIRTH(&dl->dr_overridden_by, dr->dr_txg);
} }
void void
@ -4174,21 +4175,6 @@ dmu_buf_get_objset(dmu_buf_t *db)
return (dbi->db_objset); return (dbi->db_objset);
} }
dnode_t *
dmu_buf_dnode_enter(dmu_buf_t *db)
{
dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
DB_DNODE_ENTER(dbi);
return (DB_DNODE(dbi));
}
void
dmu_buf_dnode_exit(dmu_buf_t *db)
{
dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
DB_DNODE_EXIT(dbi);
}
static void static void
dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db) dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
{ {
@ -4727,7 +4713,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
dnode_diduse_space(dn, delta - zio->io_prev_space_delta); dnode_diduse_space(dn, delta - zio->io_prev_space_delta);
zio->io_prev_space_delta = delta; zio->io_prev_space_delta = delta;
if (bp->blk_birth != 0) { if (BP_GET_LOGICAL_BIRTH(bp) != 0) {
ASSERT((db->db_blkid != DMU_SPILL_BLKID && ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
BP_GET_TYPE(bp) == dn->dn_type) || BP_GET_TYPE(bp) == dn->dn_type) ||
(db->db_blkid == DMU_SPILL_BLKID && (db->db_blkid == DMU_SPILL_BLKID &&
@ -5014,7 +5000,7 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
ASSERT(dsl_pool_sync_context(spa_get_dsl(spa))); ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)));
drica.drica_os = dn->dn_objset; drica.drica_os = dn->dn_objset;
drica.drica_blk_birth = bp->blk_birth; drica.drica_blk_birth = BP_GET_LOGICAL_BIRTH(bp);
drica.drica_tx = tx; drica.drica_tx = tx;
if (spa_remap_blkptr(spa, &bp_copy, dbuf_remap_impl_callback, if (spa_remap_blkptr(spa, &bp_copy, dbuf_remap_impl_callback,
&drica)) { &drica)) {
@ -5029,7 +5015,8 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
if (dn->dn_objset != spa_meta_objset(spa)) { if (dn->dn_objset != spa_meta_objset(spa)) {
dsl_dataset_t *ds = dmu_objset_ds(dn->dn_objset); dsl_dataset_t *ds = dmu_objset_ds(dn->dn_objset);
if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) && if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
bp->blk_birth > ds->ds_dir->dd_origin_txg) { BP_GET_LOGICAL_BIRTH(bp) >
ds->ds_dir->dd_origin_txg) {
ASSERT(!BP_IS_EMBEDDED(bp)); ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT(dsl_dir_is_clone(ds->ds_dir)); ASSERT(dsl_dir_is_clone(ds->ds_dir));
ASSERT(spa_feature_is_enabled(spa, ASSERT(spa_feature_is_enabled(spa,
@ -5151,7 +5138,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
} }
ASSERT(db->db_level == 0 || data == db->db_buf); ASSERT(db->db_level == 0 || data == db->db_buf);
ASSERT3U(db->db_blkptr->blk_birth, <=, txg); ASSERT3U(BP_GET_LOGICAL_BIRTH(db->db_blkptr), <=, txg);
ASSERT(pio); ASSERT(pio);
SET_BOOKMARK(&zb, os->os_dsl_dataset ? SET_BOOKMARK(&zb, os->os_dsl_dataset ?

View File

@ -437,7 +437,7 @@ ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp)
for (int d = 0; d < SPA_DVAS_PER_BP; d++) for (int d = 0; d < SPA_DVAS_PER_BP; d++)
ddp->ddp_dva[d] = bp->blk_dva[d]; ddp->ddp_dva[d] = bp->blk_dva[d];
ddp->ddp_phys_birth = BP_PHYSICAL_BIRTH(bp); ddp->ddp_phys_birth = BP_GET_BIRTH(bp);
} }
void void
@ -485,7 +485,7 @@ ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp)
for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_dva[0]) && if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_dva[0]) &&
BP_PHYSICAL_BIRTH(bp) == ddp->ddp_phys_birth) BP_GET_BIRTH(bp) == ddp->ddp_phys_birth)
return (ddp); return (ddp);
} }
return (NULL); return (NULL);

View File

@ -712,8 +712,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
uint64_t len, zio_priority_t pri) uint64_t len, zio_priority_t pri)
{ {
dnode_t *dn; dnode_t *dn;
int64_t level2 = level;
uint64_t start, end, start2, end2;
if (dmu_prefetch_max == 0 || len == 0) { if (dmu_prefetch_max == 0 || len == 0) {
dmu_prefetch_dnode(os, object, pri); dmu_prefetch_dnode(os, object, pri);
@ -723,6 +721,18 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
if (dnode_hold(os, object, FTAG, &dn) != 0) if (dnode_hold(os, object, FTAG, &dn) != 0)
return; return;
dmu_prefetch_by_dnode(dn, level, offset, len, pri);
dnode_rele(dn, FTAG);
}
void
dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
uint64_t len, zio_priority_t pri)
{
int64_t level2 = level;
uint64_t start, end, start2, end2;
/* /*
* Depending on len we may do two prefetches: blocks [start, end) at * Depending on len we may do two prefetches: blocks [start, end) at
* level, and following blocks [start2, end2) at higher level2. * level, and following blocks [start2, end2) at higher level2.
@ -762,8 +772,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
for (uint64_t i = start2; i < end2; i++) for (uint64_t i = start2; i < end2; i++)
dbuf_prefetch(dn, level2, i, pri, 0); dbuf_prefetch(dn, level2, i, pri, 0);
rw_exit(&dn->dn_struct_rwlock); rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
} }
/* /*
@ -1619,7 +1627,7 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
* it's an old style hole. * it's an old style hole.
*/ */
if (BP_IS_HOLE(&dr->dt.dl.dr_overridden_by) && if (BP_IS_HOLE(&dr->dt.dl.dr_overridden_by) &&
dr->dt.dl.dr_overridden_by.blk_birth == 0) BP_GET_LOGICAL_BIRTH(&dr->dt.dl.dr_overridden_by) == 0)
BP_ZERO(&dr->dt.dl.dr_overridden_by); BP_ZERO(&dr->dt.dl.dr_overridden_by);
} else { } else {
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
@ -1650,7 +1658,7 @@ dmu_sync_late_arrival_done(zio_t *zio)
blkptr_t *bp_orig __maybe_unused = &zio->io_bp_orig; blkptr_t *bp_orig __maybe_unused = &zio->io_bp_orig;
ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE)); ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE));
ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig)); ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig));
ASSERT(zio->io_bp->blk_birth == zio->io_txg); ASSERT(BP_GET_LOGICAL_BIRTH(zio->io_bp) == zio->io_txg);
ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa)); ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa));
zio_free(zio->io_spa, zio->io_txg, zio->io_bp); zio_free(zio->io_spa, zio->io_txg, zio->io_bp);
} }
@ -2257,11 +2265,13 @@ dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
if (bp == NULL) { if (bp == NULL) {
/* /*
* The block was created in this transaction group, * The file size was increased, but the block was never
* so it has no BP yet. * written, otherwise we would either have the block
* pointer or the dirty record and would not get here.
* It is effectively a hole, so report it as such.
*/ */
error = SET_ERROR(EAGAIN); BP_ZERO(&bps[i]);
goto out; continue;
} }
/* /*
* Make sure we clone only data blocks. * Make sure we clone only data blocks.
@ -2277,11 +2287,11 @@ dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
* operation into ZIL, or it may be impossible to replay, since * operation into ZIL, or it may be impossible to replay, since
* the block may appear not yet allocated at that point. * the block may appear not yet allocated at that point.
*/ */
if (BP_PHYSICAL_BIRTH(bp) > spa_freeze_txg(os->os_spa)) { if (BP_GET_BIRTH(bp) > spa_freeze_txg(os->os_spa)) {
error = SET_ERROR(EINVAL); error = SET_ERROR(EINVAL);
goto out; goto out;
} }
if (BP_PHYSICAL_BIRTH(bp) > spa_last_synced_txg(os->os_spa)) { if (BP_GET_BIRTH(bp) > spa_last_synced_txg(os->os_spa)) {
error = SET_ERROR(EAGAIN); error = SET_ERROR(EAGAIN);
goto out; goto out;
} }
@ -2353,18 +2363,17 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
ASSERT3U(dr->dr_txg, ==, tx->tx_txg); ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
dl = &dr->dt.dl; dl = &dr->dt.dl;
dl->dr_overridden_by = *bp; dl->dr_overridden_by = *bp;
if (!BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) != 0) {
if (!BP_IS_EMBEDDED(bp)) {
BP_SET_BIRTH(&dl->dr_overridden_by, dr->dr_txg,
BP_GET_BIRTH(bp));
} else {
BP_SET_LOGICAL_BIRTH(&dl->dr_overridden_by,
dr->dr_txg);
}
}
dl->dr_brtwrite = B_TRUE; dl->dr_brtwrite = B_TRUE;
dl->dr_override_state = DR_OVERRIDDEN; dl->dr_override_state = DR_OVERRIDDEN;
if (BP_IS_HOLE(bp)) {
dl->dr_overridden_by.blk_birth = 0;
dl->dr_overridden_by.blk_phys_birth = 0;
} else {
dl->dr_overridden_by.blk_birth = dr->dr_txg;
if (!BP_IS_EMBEDDED(bp)) {
dl->dr_overridden_by.blk_phys_birth =
BP_PHYSICAL_BIRTH(bp);
}
}
mutex_exit(&db->db_mtx); mutex_exit(&db->db_mtx);
@ -2563,6 +2572,8 @@ EXPORT_SYMBOL(dmu_bonus_hold_by_dnode);
EXPORT_SYMBOL(dmu_buf_hold_array_by_bonus); EXPORT_SYMBOL(dmu_buf_hold_array_by_bonus);
EXPORT_SYMBOL(dmu_buf_rele_array); EXPORT_SYMBOL(dmu_buf_rele_array);
EXPORT_SYMBOL(dmu_prefetch); EXPORT_SYMBOL(dmu_prefetch);
EXPORT_SYMBOL(dmu_prefetch_by_dnode);
EXPORT_SYMBOL(dmu_prefetch_dnode);
EXPORT_SYMBOL(dmu_free_range); EXPORT_SYMBOL(dmu_free_range);
EXPORT_SYMBOL(dmu_free_long_range); EXPORT_SYMBOL(dmu_free_long_range);
EXPORT_SYMBOL(dmu_free_long_object); EXPORT_SYMBOL(dmu_free_long_object);

View File

@ -1352,8 +1352,10 @@ corrective_read_done(zio_t *zio)
{ {
cr_cb_data_t *data = zio->io_private; cr_cb_data_t *data = zio->io_private;
/* Corruption corrected; update error log if needed */ /* Corruption corrected; update error log if needed */
if (zio->io_error == 0) if (zio->io_error == 0) {
spa_remove_error(data->spa, &data->zb, &zio->io_bp->blk_birth); spa_remove_error(data->spa, &data->zb,
BP_GET_LOGICAL_BIRTH(zio->io_bp));
}
kmem_free(data, sizeof (cr_cb_data_t)); kmem_free(data, sizeof (cr_cb_data_t));
abd_free(zio->io_abd); abd_free(zio->io_abd);
} }
@ -1480,8 +1482,9 @@ do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw,
} }
rrd->abd = abd; rrd->abd = abd;
io = zio_rewrite(NULL, rwa->os->os_spa, bp->blk_birth, bp, abd, io = zio_rewrite(NULL, rwa->os->os_spa, BP_GET_LOGICAL_BIRTH(bp), bp,
BP_GET_PSIZE(bp), NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, flags, &zb); abd, BP_GET_PSIZE(bp), NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, flags,
&zb);
ASSERT(abd_get_size(abd) == BP_GET_LSIZE(bp) || ASSERT(abd_get_size(abd) == BP_GET_LSIZE(bp) ||
abd_get_size(abd) == BP_GET_PSIZE(bp)); abd_get_size(abd) == BP_GET_PSIZE(bp));
@ -2110,6 +2113,16 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
dmu_buf_rele(db, FTAG); dmu_buf_rele(db, FTAG);
dnode_rele(dn, FTAG); dnode_rele(dn, FTAG);
} }
/*
* If the receive fails, we want the resume stream to start with the
* same record that we last successfully received. There is no way to
* request resume from the object record, but we can benefit from the
* fact that sender always sends object record before anything else,
* after which it will "resend" data at offset 0 and resume normally.
*/
save_resume_state(rwa, drro->drr_object, 0, tx);
dmu_tx_commit(tx); dmu_tx_commit(tx);
return (0); return (0);
@ -2343,7 +2356,6 @@ receive_process_write_record(struct receive_writer_arg *rwa,
if (rwa->heal) { if (rwa->heal) {
blkptr_t *bp; blkptr_t *bp;
dmu_buf_t *dbp; dmu_buf_t *dbp;
dnode_t *dn;
int flags = DB_RF_CANFAIL; int flags = DB_RF_CANFAIL;
if (rwa->raw) if (rwa->raw)
@ -2375,19 +2387,15 @@ receive_process_write_record(struct receive_writer_arg *rwa,
dmu_buf_rele(dbp, FTAG); dmu_buf_rele(dbp, FTAG);
return (err); return (err);
} }
dn = dmu_buf_dnode_enter(dbp);
/* Make sure the on-disk block and recv record sizes match */ /* Make sure the on-disk block and recv record sizes match */
if (drrw->drr_logical_size != if (drrw->drr_logical_size != dbp->db_size) {
dn->dn_datablkszsec << SPA_MINBLOCKSHIFT) {
err = ENOTSUP; err = ENOTSUP;
dmu_buf_dnode_exit(dbp);
dmu_buf_rele(dbp, FTAG); dmu_buf_rele(dbp, FTAG);
return (err); return (err);
} }
/* Get the block pointer for the corrupted block */ /* Get the block pointer for the corrupted block */
bp = dmu_buf_get_blkptr(dbp); bp = dmu_buf_get_blkptr(dbp);
err = do_corrective_recv(rwa, drrw, rrd, bp); err = do_corrective_recv(rwa, drrw, rrd, bp);
dmu_buf_dnode_exit(dbp);
dmu_buf_rele(dbp, FTAG); dmu_buf_rele(dbp, FTAG);
return (err); return (err);
} }

View File

@ -619,7 +619,7 @@ dump_spill(dmu_send_cookie_t *dscp, const blkptr_t *bp, uint64_t object,
/* See comment in dump_dnode() for full details */ /* See comment in dump_dnode() for full details */
if (zfs_send_unmodified_spill_blocks && if (zfs_send_unmodified_spill_blocks &&
(bp->blk_birth <= dscp->dsc_fromtxg)) { (BP_GET_LOGICAL_BIRTH(bp) <= dscp->dsc_fromtxg)) {
drrs->drr_flags |= DRR_SPILL_UNMODIFIED; drrs->drr_flags |= DRR_SPILL_UNMODIFIED;
} }
@ -804,7 +804,7 @@ dump_dnode(dmu_send_cookie_t *dscp, const blkptr_t *bp, uint64_t object,
*/ */
if (zfs_send_unmodified_spill_blocks && if (zfs_send_unmodified_spill_blocks &&
(dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) && (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
(DN_SPILL_BLKPTR(dnp)->blk_birth <= dscp->dsc_fromtxg)) { (BP_GET_LOGICAL_BIRTH(DN_SPILL_BLKPTR(dnp)) <= dscp->dsc_fromtxg)) {
struct send_range record; struct send_range record;
blkptr_t *bp = DN_SPILL_BLKPTR(dnp); blkptr_t *bp = DN_SPILL_BLKPTR(dnp);
@ -1123,7 +1123,7 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
*/ */
if (sta->os->os_encrypted && if (sta->os->os_encrypted &&
!BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) { !BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) {
spa_log_error(spa, zb, &bp->blk_birth); spa_log_error(spa, zb, BP_GET_LOGICAL_BIRTH(bp));
return (SET_ERROR(EIO)); return (SET_ERROR(EIO));
} }

View File

@ -83,7 +83,8 @@ traverse_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
if (BP_IS_HOLE(bp)) if (BP_IS_HOLE(bp))
return (0); return (0);
if (claim_txg == 0 && bp->blk_birth >= spa_min_claim_txg(td->td_spa)) if (claim_txg == 0 &&
BP_GET_LOGICAL_BIRTH(bp) >= spa_min_claim_txg(td->td_spa))
return (-1); return (-1);
SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
@ -108,7 +109,7 @@ traverse_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
if (BP_IS_HOLE(bp)) if (BP_IS_HOLE(bp))
return (0); return (0);
if (claim_txg == 0 || bp->blk_birth < claim_txg) if (claim_txg == 0 || BP_GET_LOGICAL_BIRTH(bp) < claim_txg)
return (0); return (0);
ASSERT3U(BP_GET_LSIZE(bp), !=, 0); ASSERT3U(BP_GET_LSIZE(bp), !=, 0);
@ -192,7 +193,7 @@ traverse_prefetch_metadata(traverse_data_t *td, const dnode_phys_t *dnp,
*/ */
if (resume_skip_check(td, dnp, zb) != RESUME_SKIP_NONE) if (resume_skip_check(td, dnp, zb) != RESUME_SKIP_NONE)
return (B_FALSE); return (B_FALSE);
if (BP_IS_HOLE(bp) || bp->blk_birth <= td->td_min_txg) if (BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) <= td->td_min_txg)
return (B_FALSE); return (B_FALSE);
if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE) if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)
return (B_FALSE); return (B_FALSE);
@ -235,7 +236,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
ASSERT(0); ASSERT(0);
} }
if (bp->blk_birth == 0) { if (BP_GET_LOGICAL_BIRTH(bp) == 0) {
/* /*
* Since this block has a birth time of 0 it must be one of * Since this block has a birth time of 0 it must be one of
* two things: a hole created before the * two things: a hole created before the
@ -263,7 +264,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
zb->zb_object == DMU_META_DNODE_OBJECT) && zb->zb_object == DMU_META_DNODE_OBJECT) &&
td->td_hole_birth_enabled_txg <= td->td_min_txg) td->td_hole_birth_enabled_txg <= td->td_min_txg)
return (0); return (0);
} else if (bp->blk_birth <= td->td_min_txg) { } else if (BP_GET_LOGICAL_BIRTH(bp) <= td->td_min_txg) {
return (0); return (0);
} }

View File

@ -2557,7 +2557,7 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
} }
if (db != NULL && txg != 0 && (db->db_blkptr == NULL || if (db != NULL && txg != 0 && (db->db_blkptr == NULL ||
db->db_blkptr->blk_birth <= txg || BP_GET_LOGICAL_BIRTH(db->db_blkptr) <= txg ||
BP_IS_HOLE(db->db_blkptr))) { BP_IS_HOLE(db->db_blkptr))) {
/* /*
* This can only happen when we are searching up the tree * This can only happen when we are searching up the tree
@ -2605,7 +2605,7 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
i >= 0 && i < epb; i += inc) { i >= 0 && i < epb; i += inc) {
if (BP_GET_FILL(&bp[i]) >= minfill && if (BP_GET_FILL(&bp[i]) >= minfill &&
BP_GET_FILL(&bp[i]) <= maxfill && BP_GET_FILL(&bp[i]) <= maxfill &&
(hole || bp[i].blk_birth > txg)) (hole || BP_GET_LOGICAL_BIRTH(&bp[i]) > txg))
break; break;
if (inc > 0 || *offset > 0) if (inc > 0 || *offset > 0)
*offset += inc; *offset += inc;

View File

@ -1520,7 +1520,8 @@ dsl_bookmark_block_killed(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
* If the block was live (referenced) at the time of this * If the block was live (referenced) at the time of this
* bookmark, add its space to the bookmark's FBN. * bookmark, add its space to the bookmark's FBN.
*/ */
if (bp->blk_birth <= dbn->dbn_phys.zbm_creation_txg && if (BP_GET_LOGICAL_BIRTH(bp) <=
dbn->dbn_phys.zbm_creation_txg &&
(dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)) { (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)) {
mutex_enter(&dbn->dbn_lock); mutex_enter(&dbn->dbn_lock);
dbn->dbn_phys.zbm_referenced_freed_before_next_snap += dbn->dbn_phys.zbm_referenced_freed_before_next_snap +=

View File

@ -156,7 +156,8 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
return; return;
} }
ASSERT3U(bp->blk_birth, >, dsl_dataset_phys(ds)->ds_prev_snap_txg); ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), >,
dsl_dataset_phys(ds)->ds_prev_snap_txg);
dmu_buf_will_dirty(ds->ds_dbuf, tx); dmu_buf_will_dirty(ds->ds_dbuf, tx);
mutex_enter(&ds->ds_lock); mutex_enter(&ds->ds_lock);
delta = parent_delta(ds, used); delta = parent_delta(ds, used);
@ -190,7 +191,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
* they do not need to be freed. * they do not need to be freed.
*/ */
if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) && if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
bp->blk_birth > ds->ds_dir->dd_origin_txg && BP_GET_LOGICAL_BIRTH(bp) > ds->ds_dir->dd_origin_txg &&
!(BP_IS_EMBEDDED(bp))) { !(BP_IS_EMBEDDED(bp))) {
ASSERT(dsl_dir_is_clone(ds->ds_dir)); ASSERT(dsl_dir_is_clone(ds->ds_dir));
ASSERT(spa_feature_is_enabled(spa, ASSERT(spa_feature_is_enabled(spa,
@ -236,7 +237,7 @@ dsl_dataset_block_remapped(dsl_dataset_t *ds, uint64_t vdev, uint64_t offset,
mutex_exit(&ds->ds_remap_deadlist_lock); mutex_exit(&ds->ds_remap_deadlist_lock);
BP_ZERO(&fakebp); BP_ZERO(&fakebp);
fakebp.blk_birth = birth; BP_SET_LOGICAL_BIRTH(&fakebp, birth);
DVA_SET_VDEV(dva, vdev); DVA_SET_VDEV(dva, vdev);
DVA_SET_OFFSET(dva, offset); DVA_SET_OFFSET(dva, offset);
DVA_SET_ASIZE(dva, size); DVA_SET_ASIZE(dva, size);
@ -259,7 +260,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
return (0); return (0);
ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dmu_tx_is_syncing(tx));
ASSERT(bp->blk_birth <= tx->tx_txg); ASSERT(BP_GET_LOGICAL_BIRTH(bp) <= tx->tx_txg);
if (ds == NULL) { if (ds == NULL) {
dsl_free(tx->tx_pool, tx->tx_txg, bp); dsl_free(tx->tx_pool, tx->tx_txg, bp);
@ -277,7 +278,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
* they do not need to be freed. * they do not need to be freed.
*/ */
if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) && if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
bp->blk_birth > ds->ds_dir->dd_origin_txg && BP_GET_LOGICAL_BIRTH(bp) > ds->ds_dir->dd_origin_txg &&
!(BP_IS_EMBEDDED(bp))) { !(BP_IS_EMBEDDED(bp))) {
ASSERT(dsl_dir_is_clone(ds->ds_dir)); ASSERT(dsl_dir_is_clone(ds->ds_dir));
ASSERT(spa_feature_is_enabled(spa, ASSERT(spa_feature_is_enabled(spa,
@ -285,7 +286,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
bplist_append(&ds->ds_dir->dd_pending_frees, bp); bplist_append(&ds->ds_dir->dd_pending_frees, bp);
} }
if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) { if (BP_GET_LOGICAL_BIRTH(bp) > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
int64_t delta; int64_t delta;
dprintf_bp(bp, "freeing ds=%llu", (u_longlong_t)ds->ds_object); dprintf_bp(bp, "freeing ds=%llu", (u_longlong_t)ds->ds_object);
@ -317,16 +318,16 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
ASSERT3U(ds->ds_prev->ds_object, ==, ASSERT3U(ds->ds_prev->ds_object, ==,
dsl_dataset_phys(ds)->ds_prev_snap_obj); dsl_dataset_phys(ds)->ds_prev_snap_obj);
ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_num_children > 0); ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_num_children > 0);
/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ /* if (logical birth > prev prev snap txg) prev unique += bs */
if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj ==
ds->ds_object && bp->blk_birth > ds->ds_object && BP_GET_LOGICAL_BIRTH(bp) >
dsl_dataset_phys(ds->ds_prev)->ds_prev_snap_txg) { dsl_dataset_phys(ds->ds_prev)->ds_prev_snap_txg) {
dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
mutex_enter(&ds->ds_prev->ds_lock); mutex_enter(&ds->ds_prev->ds_lock);
dsl_dataset_phys(ds->ds_prev)->ds_unique_bytes += used; dsl_dataset_phys(ds->ds_prev)->ds_unique_bytes += used;
mutex_exit(&ds->ds_prev->ds_lock); mutex_exit(&ds->ds_prev->ds_lock);
} }
if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { if (BP_GET_LOGICAL_BIRTH(bp) > ds->ds_dir->dd_origin_txg) {
dsl_dir_transfer_space(ds->ds_dir, used, dsl_dir_transfer_space(ds->ds_dir, used,
DD_USED_HEAD, DD_USED_SNAP, tx); DD_USED_HEAD, DD_USED_SNAP, tx);
} }
@ -2895,7 +2896,7 @@ dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap)
if (snap == NULL) if (snap == NULL)
return (B_FALSE); return (B_FALSE);
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
birth = dsl_dataset_get_blkptr(ds)->blk_birth; birth = BP_GET_LOGICAL_BIRTH(dsl_dataset_get_blkptr(ds));
rrw_exit(&ds->ds_bp_rwlock, FTAG); rrw_exit(&ds->ds_bp_rwlock, FTAG);
if (birth > dsl_dataset_phys(snap)->ds_creation_txg) { if (birth > dsl_dataset_phys(snap)->ds_creation_txg) {
objset_t *os, *os_snap; objset_t *os, *os_snap;

View File

@ -474,7 +474,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed,
dl->dl_phys->dl_comp += sign * BP_GET_PSIZE(bp); dl->dl_phys->dl_comp += sign * BP_GET_PSIZE(bp);
dl->dl_phys->dl_uncomp += sign * BP_GET_UCSIZE(bp); dl->dl_phys->dl_uncomp += sign * BP_GET_UCSIZE(bp);
dle_tofind.dle_mintxg = bp->blk_birth; dle_tofind.dle_mintxg = BP_GET_LOGICAL_BIRTH(bp);
dle = avl_find(&dl->dl_tree, &dle_tofind, &where); dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
if (dle == NULL) if (dle == NULL)
dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE); dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
@ -483,7 +483,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed,
if (dle == NULL) { if (dle == NULL) {
zfs_panic_recover("blkptr at %p has invalid BLK_BIRTH %llu", zfs_panic_recover("blkptr at %p has invalid BLK_BIRTH %llu",
bp, (longlong_t)bp->blk_birth); bp, (longlong_t)BP_GET_LOGICAL_BIRTH(bp));
dle = avl_first(&dl->dl_tree); dle = avl_first(&dl->dl_tree);
} }
@ -1039,8 +1039,7 @@ dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed,
ASSERT3U(BP_GET_PSIZE(bp), ==, BP_GET_PSIZE(&found->le_bp)); ASSERT3U(BP_GET_PSIZE(bp), ==, BP_GET_PSIZE(&found->le_bp));
ASSERT3U(BP_GET_CHECKSUM(bp), ==, ASSERT3U(BP_GET_CHECKSUM(bp), ==,
BP_GET_CHECKSUM(&found->le_bp)); BP_GET_CHECKSUM(&found->le_bp));
ASSERT3U(BP_PHYSICAL_BIRTH(bp), ==, ASSERT3U(BP_GET_BIRTH(bp), ==, BP_GET_BIRTH(&found->le_bp));
BP_PHYSICAL_BIRTH(&found->le_bp));
} }
if (bp_freed) { if (bp_freed) {
if (found == NULL) { if (found == NULL) {

View File

@ -132,10 +132,11 @@ process_old_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
ASSERT(!BP_IS_HOLE(bp)); ASSERT(!BP_IS_HOLE(bp));
if (bp->blk_birth <= dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) { if (BP_GET_LOGICAL_BIRTH(bp) <=
dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) {
dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, bp_freed, tx); dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, bp_freed, tx);
if (poa->ds_prev && !poa->after_branch_point && if (poa->ds_prev && !poa->after_branch_point &&
bp->blk_birth > BP_GET_LOGICAL_BIRTH(bp) >
dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) { dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) {
dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes += dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes +=
bp_get_dsize_sync(dp->dp_spa, bp); bp_get_dsize_sync(dp->dp_spa, bp);
@ -313,7 +314,8 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); ASSERT3U(BP_GET_LOGICAL_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=,
tx->tx_txg);
rrw_exit(&ds->ds_bp_rwlock, FTAG); rrw_exit(&ds->ds_bp_rwlock, FTAG);
ASSERT(zfs_refcount_is_zero(&ds->ds_longholds)); ASSERT(zfs_refcount_is_zero(&ds->ds_longholds));
@ -727,7 +729,7 @@ kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
} else { } else {
ASSERT(zilog == NULL); ASSERT(zilog == NULL);
ASSERT3U(bp->blk_birth, >, ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), >,
dsl_dataset_phys(ka->ds)->ds_prev_snap_txg); dsl_dataset_phys(ka->ds)->ds_prev_snap_txg);
(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
} }
@ -1017,7 +1019,8 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
ASSERT(ds->ds_prev == NULL || ASSERT(ds->ds_prev == NULL ||
dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object); dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object);
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); ASSERT3U(BP_GET_LOGICAL_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=,
tx->tx_txg);
rrw_exit(&ds->ds_bp_rwlock, FTAG); rrw_exit(&ds->ds_bp_rwlock, FTAG);
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));

View File

@ -1047,7 +1047,7 @@ upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
* will be wrong. * will be wrong.
*/ */
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
ASSERT0(dsl_dataset_phys(prev)->ds_bp.blk_birth); ASSERT0(BP_GET_LOGICAL_BIRTH(&dsl_dataset_phys(prev)->ds_bp));
rrw_exit(&ds->ds_bp_rwlock, FTAG); rrw_exit(&ds->ds_bp_rwlock, FTAG);
/* The origin doesn't get attached to itself */ /* The origin doesn't get attached to itself */

View File

@ -429,8 +429,8 @@ sio2bp(const scan_io_t *sio, blkptr_t *bp)
{ {
memset(bp, 0, sizeof (*bp)); memset(bp, 0, sizeof (*bp));
bp->blk_prop = sio->sio_blk_prop; bp->blk_prop = sio->sio_blk_prop;
bp->blk_phys_birth = sio->sio_phys_birth; BP_SET_PHYSICAL_BIRTH(bp, sio->sio_phys_birth);
bp->blk_birth = sio->sio_birth; BP_SET_LOGICAL_BIRTH(bp, sio->sio_birth);
bp->blk_fill = 1; /* we always only work with data pointers */ bp->blk_fill = 1; /* we always only work with data pointers */
bp->blk_cksum = sio->sio_cksum; bp->blk_cksum = sio->sio_cksum;
@ -444,8 +444,8 @@ static inline void
bp2sio(const blkptr_t *bp, scan_io_t *sio, int dva_i) bp2sio(const blkptr_t *bp, scan_io_t *sio, int dva_i)
{ {
sio->sio_blk_prop = bp->blk_prop; sio->sio_blk_prop = bp->blk_prop;
sio->sio_phys_birth = bp->blk_phys_birth; sio->sio_phys_birth = BP_GET_PHYSICAL_BIRTH(bp);
sio->sio_birth = bp->blk_birth; sio->sio_birth = BP_GET_LOGICAL_BIRTH(bp);
sio->sio_cksum = bp->blk_cksum; sio->sio_cksum = bp->blk_cksum;
sio->sio_nr_dvas = BP_GET_NDVAS(bp); sio->sio_nr_dvas = BP_GET_NDVAS(bp);
@ -1721,7 +1721,8 @@ dsl_scan_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
zbookmark_phys_t zb; zbookmark_phys_t zb;
ASSERT(!BP_IS_REDACTED(bp)); ASSERT(!BP_IS_REDACTED(bp));
if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) if (BP_IS_HOLE(bp) ||
BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg)
return (0); return (0);
/* /*
@ -1730,7 +1731,8 @@ dsl_scan_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
* (on-disk) even if it hasn't been claimed (even though for * (on-disk) even if it hasn't been claimed (even though for
* scrub there's nothing to do to it). * scrub there's nothing to do to it).
*/ */
if (claim_txg == 0 && bp->blk_birth >= spa_min_claim_txg(dp->dp_spa)) if (claim_txg == 0 &&
BP_GET_LOGICAL_BIRTH(bp) >= spa_min_claim_txg(dp->dp_spa))
return (0); return (0);
SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET], SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
@ -1756,7 +1758,7 @@ dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
ASSERT(!BP_IS_REDACTED(bp)); ASSERT(!BP_IS_REDACTED(bp));
if (BP_IS_HOLE(bp) || if (BP_IS_HOLE(bp) ||
bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg)
return (0); return (0);
/* /*
@ -1764,7 +1766,7 @@ dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
* already txg sync'ed (but this log block contains * already txg sync'ed (but this log block contains
* other records that are not synced) * other records that are not synced)
*/ */
if (claim_txg == 0 || bp->blk_birth < claim_txg) if (claim_txg == 0 || BP_GET_LOGICAL_BIRTH(bp) < claim_txg)
return (0); return (0);
ASSERT3U(BP_GET_LSIZE(bp), !=, 0); ASSERT3U(BP_GET_LSIZE(bp), !=, 0);
@ -1903,7 +1905,8 @@ dsl_scan_prefetch(scan_prefetch_ctx_t *spc, blkptr_t *bp, zbookmark_phys_t *zb)
if (zfs_no_scrub_prefetch || BP_IS_REDACTED(bp)) if (zfs_no_scrub_prefetch || BP_IS_REDACTED(bp))
return; return;
if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_cur_min_txg || if (BP_IS_HOLE(bp) ||
BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg ||
(BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE && (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE &&
BP_GET_TYPE(bp) != DMU_OT_OBJSET)) BP_GET_TYPE(bp) != DMU_OT_OBJSET))
return; return;
@ -2174,7 +2177,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
if (dnp != NULL && if (dnp != NULL &&
dnp->dn_bonuslen > DN_MAX_BONUS_LEN(dnp)) { dnp->dn_bonuslen > DN_MAX_BONUS_LEN(dnp)) {
scn->scn_phys.scn_errors++; scn->scn_phys.scn_errors++;
spa_log_error(spa, zb, &bp->blk_birth); spa_log_error(spa, zb, BP_GET_LOGICAL_BIRTH(bp));
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
} }
@ -2270,7 +2273,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
* by arc_read() for the cases above. * by arc_read() for the cases above.
*/ */
scn->scn_phys.scn_errors++; scn->scn_phys.scn_errors++;
spa_log_error(spa, zb, &bp->blk_birth); spa_log_error(spa, zb, BP_GET_LOGICAL_BIRTH(bp));
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
} }
@ -2347,7 +2350,7 @@ dsl_scan_visitbp(const blkptr_t *bp, const zbookmark_phys_t *zb,
if (f != SPA_FEATURE_NONE) if (f != SPA_FEATURE_NONE)
ASSERT(dsl_dataset_feature_is_active(ds, f)); ASSERT(dsl_dataset_feature_is_active(ds, f));
if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) { if (BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg) {
scn->scn_lt_min_this_txg++; scn->scn_lt_min_this_txg++;
return; return;
} }
@ -2373,7 +2376,7 @@ dsl_scan_visitbp(const blkptr_t *bp, const zbookmark_phys_t *zb,
* Don't scan it now unless we need to because something * Don't scan it now unless we need to because something
* under it was modified. * under it was modified.
*/ */
if (BP_PHYSICAL_BIRTH(bp) > scn->scn_phys.scn_cur_max_txg) { if (BP_GET_BIRTH(bp) > scn->scn_phys.scn_cur_max_txg) {
scn->scn_gt_max_this_txg++; scn->scn_gt_max_this_txg++;
return; return;
} }
@ -4714,7 +4717,7 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
{ {
dsl_scan_t *scn = dp->dp_scan; dsl_scan_t *scn = dp->dp_scan;
spa_t *spa = dp->dp_spa; spa_t *spa = dp->dp_spa;
uint64_t phys_birth = BP_PHYSICAL_BIRTH(bp); uint64_t phys_birth = BP_GET_BIRTH(bp);
size_t psize = BP_GET_PSIZE(bp); size_t psize = BP_GET_PSIZE(bp);
boolean_t needs_io = B_FALSE; boolean_t needs_io = B_FALSE;
int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL; int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;

View File

@ -5495,8 +5495,9 @@ remap_blkptr_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
vdev_t *oldvd = vdev_lookup_top(vd->vdev_spa, vdev_t *oldvd = vdev_lookup_top(vd->vdev_spa,
DVA_GET_VDEV(&bp->blk_dva[0])); DVA_GET_VDEV(&bp->blk_dva[0]));
vdev_indirect_births_t *vib = oldvd->vdev_indirect_births; vdev_indirect_births_t *vib = oldvd->vdev_indirect_births;
bp->blk_phys_birth = vdev_indirect_births_physbirth(vib, uint64_t physical_birth = vdev_indirect_births_physbirth(vib,
DVA_GET_OFFSET(&bp->blk_dva[0]), DVA_GET_ASIZE(&bp->blk_dva[0])); DVA_GET_OFFSET(&bp->blk_dva[0]), DVA_GET_ASIZE(&bp->blk_dva[0]));
BP_SET_PHYSICAL_BIRTH(bp, physical_birth);
DVA_SET_VDEV(&bp->blk_dva[0], vd->vdev_id); DVA_SET_VDEV(&bp->blk_dva[0], vd->vdev_id);
DVA_SET_OFFSET(&bp->blk_dva[0], offset); DVA_SET_OFFSET(&bp->blk_dva[0], offset);
@ -5845,8 +5846,8 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp,
dva_t *hintdva = (hintbp != NULL) ? hintbp->blk_dva : NULL; dva_t *hintdva = (hintbp != NULL) ? hintbp->blk_dva : NULL;
int error = 0; int error = 0;
ASSERT(bp->blk_birth == 0); ASSERT0(BP_GET_LOGICAL_BIRTH(bp));
ASSERT(BP_PHYSICAL_BIRTH(bp) == 0); ASSERT0(BP_GET_PHYSICAL_BIRTH(bp));
spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER); spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER);
@ -5900,7 +5901,7 @@ metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now)
int ndvas = BP_GET_NDVAS(bp); int ndvas = BP_GET_NDVAS(bp);
ASSERT(!BP_IS_HOLE(bp)); ASSERT(!BP_IS_HOLE(bp));
ASSERT(!now || bp->blk_birth >= spa_syncing_txg(spa)); ASSERT(!now || BP_GET_LOGICAL_BIRTH(bp) >= spa_syncing_txg(spa));
/* /*
* If we have a checkpoint for the pool we need to make sure that * If we have a checkpoint for the pool we need to make sure that
@ -5918,7 +5919,7 @@ metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now)
* normally as they will be referenced by the checkpointed uberblock. * normally as they will be referenced by the checkpointed uberblock.
*/ */
boolean_t checkpoint = B_FALSE; boolean_t checkpoint = B_FALSE;
if (bp->blk_birth <= spa->spa_checkpoint_txg && if (BP_GET_LOGICAL_BIRTH(bp) <= spa->spa_checkpoint_txg &&
spa_syncing_txg(spa) > spa->spa_checkpoint_txg) { spa_syncing_txg(spa) > spa->spa_checkpoint_txg) {
/* /*
* At this point, if the block is part of the checkpoint * At this point, if the block is part of the checkpoint

View File

@ -21,7 +21,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright (c) 2018, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2018, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved.
@ -2655,8 +2655,8 @@ spa_claim_notify(zio_t *zio)
return; return;
mutex_enter(&spa->spa_props_lock); /* any mutex will do */ mutex_enter(&spa->spa_props_lock); /* any mutex will do */
if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) if (spa->spa_claim_max_txg < BP_GET_LOGICAL_BIRTH(zio->io_bp))
spa->spa_claim_max_txg = zio->io_bp->blk_birth; spa->spa_claim_max_txg = BP_GET_LOGICAL_BIRTH(zio->io_bp);
mutex_exit(&spa->spa_props_lock); mutex_exit(&spa->spa_props_lock);
} }
@ -6266,7 +6266,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
nvlist_t *nvl; nvlist_t *nvl;
if (props == NULL || if (props == NULL ||
nvlist_lookup_string(props, "tname", &poolname) != 0) nvlist_lookup_string(props,
zpool_prop_to_name(ZPOOL_PROP_TNAME), &poolname) != 0)
poolname = (char *)pool; poolname = (char *)pool;
/* /*
@ -7082,7 +7083,7 @@ spa_draid_feature_incr(void *arg, dmu_tx_t *tx)
* Add a device to a storage pool. * Add a device to a storage pool.
*/ */
int int
spa_vdev_add(spa_t *spa, nvlist_t *nvroot) spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t check_ashift)
{ {
uint64_t txg, ndraid = 0; uint64_t txg, ndraid = 0;
int error; int error;
@ -7173,6 +7174,16 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
} }
} }
if (check_ashift && spa->spa_max_ashift == spa->spa_min_ashift) {
for (int c = 0; c < vd->vdev_children; c++) {
tvd = vd->vdev_child[c];
if (tvd->vdev_ashift != spa->spa_max_ashift) {
return (spa_vdev_exit(spa, vd, txg,
ZFS_ERR_ASHIFT_MISMATCH));
}
}
}
for (int c = 0; c < vd->vdev_children; c++) { for (int c = 0; c < vd->vdev_children; c++) {
tvd = vd->vdev_child[c]; tvd = vd->vdev_child[c];
vdev_remove_child(vd, tvd); vdev_remove_child(vd, tvd);
@ -9801,7 +9812,7 @@ spa_sync_iterate_to_convergence(spa_t *spa, dmu_tx_t *tx)
* don't want to rely on that here). * don't want to rely on that here).
*/ */
if (pass == 1 && if (pass == 1 &&
spa->spa_uberblock.ub_rootbp.blk_birth < txg && BP_GET_LOGICAL_BIRTH(&spa->spa_uberblock.ub_rootbp) < txg &&
!dmu_objset_is_dirty(mos, txg)) { !dmu_objset_is_dirty(mos, txg)) {
/* /*
* Nothing changed on the first pass, therefore this * Nothing changed on the first pass, therefore this

View File

@ -180,7 +180,7 @@ static int get_head_ds(spa_t *spa, uint64_t dsobj, uint64_t *head_ds)
* during spa_errlog_sync(). * during spa_errlog_sync().
*/ */
void void
spa_log_error(spa_t *spa, const zbookmark_phys_t *zb, const uint64_t *birth) spa_log_error(spa_t *spa, const zbookmark_phys_t *zb, const uint64_t birth)
{ {
spa_error_entry_t search; spa_error_entry_t search;
spa_error_entry_t *new; spa_error_entry_t *new;
@ -223,13 +223,7 @@ spa_log_error(spa_t *spa, const zbookmark_phys_t *zb, const uint64_t *birth)
new->se_zep.zb_object = zb->zb_object; new->se_zep.zb_object = zb->zb_object;
new->se_zep.zb_level = zb->zb_level; new->se_zep.zb_level = zb->zb_level;
new->se_zep.zb_blkid = zb->zb_blkid; new->se_zep.zb_blkid = zb->zb_blkid;
new->se_zep.zb_birth = birth;
/*
* birth may end up being NULL, e.g. in zio_done(). We
* will handle this in process_error_block().
*/
if (birth != NULL)
new->se_zep.zb_birth = *birth;
} }
avl_insert(tree, new, where); avl_insert(tree, new, where);
@ -258,7 +252,7 @@ find_birth_txg(dsl_dataset_t *ds, zbookmark_err_phys_t *zep,
if (error == 0 && BP_IS_HOLE(&bp)) if (error == 0 && BP_IS_HOLE(&bp))
error = SET_ERROR(ENOENT); error = SET_ERROR(ENOENT);
*birth_txg = bp.blk_birth; *birth_txg = BP_GET_LOGICAL_BIRTH(&bp);
rw_exit(&dn->dn_struct_rwlock); rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG); dnode_rele(dn, FTAG);
return (error); return (error);
@ -535,7 +529,7 @@ process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
*/ */
zbookmark_phys_t zb; zbookmark_phys_t zb;
zep_to_zb(head_ds, zep, &zb); zep_to_zb(head_ds, zep, &zb);
spa_remove_error(spa, &zb, &zep->zb_birth); spa_remove_error(spa, &zb, zep->zb_birth);
} }
return (error); return (error);
@ -563,7 +557,7 @@ spa_get_last_errlog_size(spa_t *spa)
*/ */
static void static void
spa_add_healed_error(spa_t *spa, uint64_t obj, zbookmark_phys_t *healed_zb, spa_add_healed_error(spa_t *spa, uint64_t obj, zbookmark_phys_t *healed_zb,
const uint64_t *birth) const uint64_t birth)
{ {
char name[NAME_MAX_LEN]; char name[NAME_MAX_LEN];
@ -618,11 +612,7 @@ spa_add_healed_error(spa_t *spa, uint64_t obj, zbookmark_phys_t *healed_zb,
healed_zep.zb_object = healed_zb->zb_object; healed_zep.zb_object = healed_zb->zb_object;
healed_zep.zb_level = healed_zb->zb_level; healed_zep.zb_level = healed_zb->zb_level;
healed_zep.zb_blkid = healed_zb->zb_blkid; healed_zep.zb_blkid = healed_zb->zb_blkid;
healed_zep.zb_birth = birth;
if (birth != NULL)
healed_zep.zb_birth = *birth;
else
healed_zep.zb_birth = 0;
errphys_to_name(&healed_zep, name, sizeof (name)); errphys_to_name(&healed_zep, name, sizeof (name));
@ -742,7 +732,7 @@ spa_remove_healed_errors(spa_t *spa, avl_tree_t *s, avl_tree_t *l, dmu_tx_t *tx)
* later in spa_remove_healed_errors(). * later in spa_remove_healed_errors().
*/ */
void void
spa_remove_error(spa_t *spa, zbookmark_phys_t *zb, const uint64_t *birth) spa_remove_error(spa_t *spa, zbookmark_phys_t *zb, uint64_t birth)
{ {
spa_add_healed_error(spa, spa->spa_errlog_last, zb, birth); spa_add_healed_error(spa, spa->spa_errlog_last, zb, birth);
spa_add_healed_error(spa, spa->spa_errlog_scrub, zb, birth); spa_add_healed_error(spa, spa->spa_errlog_scrub, zb, birth);
@ -890,7 +880,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
if (error == EACCES) if (error == EACCES)
error = 0; error = 0;
else if (!error) else if (!error)
zep.zb_birth = bp.blk_birth; zep.zb_birth = BP_GET_LOGICAL_BIRTH(&bp);
rw_exit(&dn->dn_struct_rwlock); rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG); dnode_rele(dn, FTAG);

View File

@ -783,7 +783,7 @@ spa_flush_metaslabs(spa_t *spa, dmu_tx_t *tx)
* request of flushing everything before we attempt to return * request of flushing everything before we attempt to return
* immediately. * immediately.
*/ */
if (spa->spa_uberblock.ub_rootbp.blk_birth < txg && if (BP_GET_LOGICAL_BIRTH(&spa->spa_uberblock.ub_rootbp) < txg &&
!dmu_objset_is_dirty(spa_meta_objset(spa), txg) && !dmu_objset_is_dirty(spa_meta_objset(spa), txg) &&
!spa_flush_all_logs_requested(spa)) !spa_flush_all_logs_requested(spa))
return; return;

View File

@ -70,5 +70,5 @@ uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg, uint64_t mmp_delay)
} }
ub->ub_checkpoint_txg = 0; ub->ub_checkpoint_txg = 0;
return (ub->ub_rootbp.blk_birth == txg); return (BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp) == txg);
} }

View File

@ -531,7 +531,7 @@ vdev_mirror_child_select(zio_t *zio)
uint64_t txg = zio->io_txg; uint64_t txg = zio->io_txg;
int c, lowest_load; int c, lowest_load;
ASSERT(zio->io_bp == NULL || BP_PHYSICAL_BIRTH(zio->io_bp) == txg); ASSERT(zio->io_bp == NULL || BP_GET_BIRTH(zio->io_bp) == txg);
lowest_load = INT_MAX; lowest_load = INT_MAX;
mm->mm_preferred_cnt = 0; mm->mm_preferred_cnt = 0;

View File

@ -2190,12 +2190,11 @@ vdev_raidz_close(vdev_t *vd)
/* /*
* Return the logical width to use, given the txg in which the allocation * Return the logical width to use, given the txg in which the allocation
* happened. Note that BP_PHYSICAL_BIRTH() is usually the txg in which the * happened. Note that BP_GET_BIRTH() is usually the txg in which the
* BP was allocated. Remapped BP's (that were relocated due to device * BP was allocated. Remapped BP's (that were relocated due to device
* removal, see remap_blkptr_cb()), will have a more recent * removal, see remap_blkptr_cb()), will have a more recent physical birth
* BP_PHYSICAL_BIRTH() which reflects when the BP was relocated, but we can * which reflects when the BP was relocated, but we can ignore these because
* ignore these because they can't be on RAIDZ (device removal doesn't * they can't be on RAIDZ (device removal doesn't support RAIDZ).
* support RAIDZ).
*/ */
static uint64_t static uint64_t
vdev_raidz_get_logical_width(vdev_raidz_t *vdrz, uint64_t txg) vdev_raidz_get_logical_width(vdev_raidz_t *vdrz, uint64_t txg)
@ -2295,7 +2294,7 @@ vdev_raidz_io_verify(zio_t *zio, raidz_map_t *rm, raidz_row_t *rr, int col)
logical_rs.rs_start = rr->rr_offset; logical_rs.rs_start = rr->rr_offset;
logical_rs.rs_end = logical_rs.rs_start + logical_rs.rs_end = logical_rs.rs_start +
vdev_raidz_asize(zio->io_vd, rr->rr_size, vdev_raidz_asize(zio->io_vd, rr->rr_size,
BP_PHYSICAL_BIRTH(zio->io_bp)); BP_GET_BIRTH(zio->io_bp));
raidz_col_t *rc = &rr->rr_col[col]; raidz_col_t *rc = &rr->rr_col[col];
vdev_t *cvd = zio->io_vd->vdev_child[rc->rc_devidx]; vdev_t *cvd = zio->io_vd->vdev_child[rc->rc_devidx];
@ -2518,7 +2517,7 @@ vdev_raidz_io_start(zio_t *zio)
raidz_map_t *rm; raidz_map_t *rm;
uint64_t logical_width = vdev_raidz_get_logical_width(vdrz, uint64_t logical_width = vdev_raidz_get_logical_width(vdrz,
BP_PHYSICAL_BIRTH(zio->io_bp)); BP_GET_BIRTH(zio->io_bp));
if (logical_width != vdrz->vd_physical_width) { if (logical_width != vdrz->vd_physical_width) {
zfs_locked_range_t *lr = NULL; zfs_locked_range_t *lr = NULL;
uint64_t synced_offset = UINT64_MAX; uint64_t synced_offset = UINT64_MAX;

View File

@ -133,7 +133,7 @@ fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
* set up block 1 - the first leaf * set up block 1 - the first leaf
*/ */
dmu_buf_t *db; dmu_buf_t *db;
VERIFY0(dmu_buf_hold(zap->zap_objset, zap->zap_object, VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH)); 1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH));
dmu_buf_will_dirty(db, tx); dmu_buf_will_dirty(db, tx);
@ -182,7 +182,7 @@ zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2); newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
tbl->zt_nextblk = newblk; tbl->zt_nextblk = newblk;
ASSERT0(tbl->zt_blks_copied); ASSERT0(tbl->zt_blks_copied);
dmu_prefetch(zap->zap_objset, zap->zap_object, 0, dmu_prefetch_by_dnode(zap->zap_dnode, 0,
tbl->zt_blk << bs, tbl->zt_numblks << bs, tbl->zt_blk << bs, tbl->zt_numblks << bs,
ZIO_PRIORITY_SYNC_READ); ZIO_PRIORITY_SYNC_READ);
} }
@ -193,21 +193,21 @@ zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
uint64_t b = tbl->zt_blks_copied; uint64_t b = tbl->zt_blks_copied;
dmu_buf_t *db_old; dmu_buf_t *db_old;
int err = dmu_buf_hold(zap->zap_objset, zap->zap_object, int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
(tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH); (tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH);
if (err != 0) if (err != 0)
return (err); return (err);
/* first half of entries in old[b] go to new[2*b+0] */ /* first half of entries in old[b] go to new[2*b+0] */
dmu_buf_t *db_new; dmu_buf_t *db_new;
VERIFY0(dmu_buf_hold(zap->zap_objset, zap->zap_object, VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
(newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH)); (newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
dmu_buf_will_dirty(db_new, tx); dmu_buf_will_dirty(db_new, tx);
transfer_func(db_old->db_data, db_new->db_data, hepb); transfer_func(db_old->db_data, db_new->db_data, hepb);
dmu_buf_rele(db_new, FTAG); dmu_buf_rele(db_new, FTAG);
/* second half of entries in old[b] go to new[2*b+1] */ /* second half of entries in old[b] go to new[2*b+1] */
VERIFY0(dmu_buf_hold(zap->zap_objset, zap->zap_object, VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
(newblk + 2*b+1) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH)); (newblk + 2*b+1) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
dmu_buf_will_dirty(db_new, tx); dmu_buf_will_dirty(db_new, tx);
transfer_func((uint64_t *)db_old->db_data + hepb, transfer_func((uint64_t *)db_old->db_data + hepb,
@ -255,7 +255,7 @@ zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val,
uint64_t off = idx & ((1<<(bs-3))-1); uint64_t off = idx & ((1<<(bs-3))-1);
dmu_buf_t *db; dmu_buf_t *db;
int err = dmu_buf_hold(zap->zap_objset, zap->zap_object, int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
(tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH); (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
if (err != 0) if (err != 0)
return (err); return (err);
@ -267,7 +267,7 @@ zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val,
uint64_t off2 = idx2 & ((1<<(bs-3))-1); uint64_t off2 = idx2 & ((1<<(bs-3))-1);
dmu_buf_t *db2; dmu_buf_t *db2;
err = dmu_buf_hold(zap->zap_objset, zap->zap_object, err = dmu_buf_hold_by_dnode(zap->zap_dnode,
(tbl->zt_nextblk + blk2) << bs, FTAG, &db2, (tbl->zt_nextblk + blk2) << bs, FTAG, &db2,
DMU_READ_NO_PREFETCH); DMU_READ_NO_PREFETCH);
if (err != 0) { if (err != 0) {
@ -296,16 +296,9 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
uint64_t blk = idx >> (bs-3); uint64_t blk = idx >> (bs-3);
uint64_t off = idx & ((1<<(bs-3))-1); uint64_t off = idx & ((1<<(bs-3))-1);
/*
* Note: this is equivalent to dmu_buf_hold(), but we use
* _dnode_enter / _by_dnode because it's faster because we don't
* have to hold the dnode.
*/
dnode_t *dn = dmu_buf_dnode_enter(zap->zap_dbuf);
dmu_buf_t *db; dmu_buf_t *db;
int err = dmu_buf_hold_by_dnode(dn, int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
(tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH); (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
dmu_buf_dnode_exit(zap->zap_dbuf);
if (err != 0) if (err != 0)
return (err); return (err);
*valp = ((uint64_t *)db->db_data)[off]; *valp = ((uint64_t *)db->db_data)[off];
@ -319,11 +312,9 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
*/ */
blk = (idx*2) >> (bs-3); blk = (idx*2) >> (bs-3);
dn = dmu_buf_dnode_enter(zap->zap_dbuf); err = dmu_buf_hold_by_dnode(zap->zap_dnode,
err = dmu_buf_hold_by_dnode(dn,
(tbl->zt_nextblk + blk) << bs, FTAG, &db, (tbl->zt_nextblk + blk) << bs, FTAG, &db,
DMU_READ_NO_PREFETCH); DMU_READ_NO_PREFETCH);
dmu_buf_dnode_exit(zap->zap_dbuf);
if (err == 0) if (err == 0)
dmu_buf_rele(db, FTAG); dmu_buf_rele(db, FTAG);
} }
@ -368,7 +359,7 @@ zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx)
uint64_t newblk = zap_allocate_blocks(zap, 1); uint64_t newblk = zap_allocate_blocks(zap, 1);
dmu_buf_t *db_new; dmu_buf_t *db_new;
int err = dmu_buf_hold(zap->zap_objset, zap->zap_object, int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new, newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new,
DMU_READ_NO_PREFETCH); DMU_READ_NO_PREFETCH);
if (err != 0) if (err != 0)
@ -433,7 +424,7 @@ zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
l->l_blkid = zap_allocate_blocks(zap, 1); l->l_blkid = zap_allocate_blocks(zap, 1);
l->l_dbuf = NULL; l->l_dbuf = NULL;
VERIFY0(dmu_buf_hold(zap->zap_objset, zap->zap_object, VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf, l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
DMU_READ_NO_PREFETCH)); DMU_READ_NO_PREFETCH));
dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf); dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf);
@ -533,10 +524,8 @@ zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
return (SET_ERROR(ENOENT)); return (SET_ERROR(ENOENT));
int bs = FZAP_BLOCK_SHIFT(zap); int bs = FZAP_BLOCK_SHIFT(zap);
dnode_t *dn = dmu_buf_dnode_enter(zap->zap_dbuf); int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
int err = dmu_buf_hold_by_dnode(dn,
blkid << bs, NULL, &db, DMU_READ_NO_PREFETCH); blkid << bs, NULL, &db, DMU_READ_NO_PREFETCH);
dmu_buf_dnode_exit(zap->zap_dbuf);
if (err != 0) if (err != 0)
return (err); return (err);
@ -985,7 +974,7 @@ fzap_prefetch(zap_name_t *zn)
if (zap_idx_to_blk(zap, idx, &blk) != 0) if (zap_idx_to_blk(zap, idx, &blk) != 0)
return; return;
int bs = FZAP_BLOCK_SHIFT(zap); int bs = FZAP_BLOCK_SHIFT(zap);
dmu_prefetch(zap->zap_objset, zap->zap_object, 0, blk << bs, 1 << bs, dmu_prefetch_by_dnode(zap->zap_dnode, 0, blk << bs, 1 << bs,
ZIO_PRIORITY_SYNC_READ); ZIO_PRIORITY_SYNC_READ);
} }
@ -1228,7 +1217,7 @@ fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
*/ */
if (zc->zc_hash == 0 && zap_iterate_prefetch && if (zc->zc_hash == 0 && zap_iterate_prefetch &&
zc->zc_prefetch && zap_f_phys(zap)->zap_freeblk > 2) { zc->zc_prefetch && zap_f_phys(zap)->zap_freeblk > 2) {
dmu_prefetch(zc->zc_objset, zc->zc_zapobj, 0, 0, dmu_prefetch_by_dnode(zap->zap_dnode, 0, 0,
zap_f_phys(zap)->zap_freeblk << FZAP_BLOCK_SHIFT(zap), zap_f_phys(zap)->zap_freeblk << FZAP_BLOCK_SHIFT(zap),
ZIO_PRIORITY_ASYNC_READ); ZIO_PRIORITY_ASYNC_READ);
} }
@ -1356,7 +1345,7 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs)
zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs); 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs);
} else { } else {
dmu_prefetch(zap->zap_objset, zap->zap_object, 0, dmu_prefetch_by_dnode(zap->zap_dnode, 0,
zap_f_phys(zap)->zap_ptrtbl.zt_blk << bs, zap_f_phys(zap)->zap_ptrtbl.zt_blk << bs,
zap_f_phys(zap)->zap_ptrtbl.zt_numblks << bs, zap_f_phys(zap)->zap_ptrtbl.zt_numblks << bs,
ZIO_PRIORITY_SYNC_READ); ZIO_PRIORITY_SYNC_READ);
@ -1366,7 +1355,7 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs)
dmu_buf_t *db; dmu_buf_t *db;
int err; int err;
err = dmu_buf_hold(zap->zap_objset, zap->zap_object, err = dmu_buf_hold_by_dnode(zap->zap_dnode,
(zap_f_phys(zap)->zap_ptrtbl.zt_blk + b) << bs, (zap_f_phys(zap)->zap_ptrtbl.zt_blk + b) << bs,
FTAG, &db, DMU_READ_NO_PREFETCH); FTAG, &db, DMU_READ_NO_PREFETCH);
if (err == 0) { if (err == 0) {

View File

@ -41,7 +41,8 @@
#include <sys/zap_leaf.h> #include <sys/zap_leaf.h>
#include <sys/arc.h> #include <sys/arc.h>
static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry); static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, struct zap_leaf_entry *le,
uint16_t entry);
#define CHAIN_END 0xffff /* end of the chunk chain */ #define CHAIN_END 0xffff /* end of the chunk chain */
@ -52,16 +53,6 @@ static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)]) #define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
static void
zap_memset(void *a, int c, size_t n)
{
char *cp = a;
char *cpend = cp + n;
while (cp < cpend)
*cp++ = c;
}
static void static void
stv(int len, void *addr, uint64_t value) stv(int len, void *addr, uint64_t value)
{ {
@ -79,7 +70,7 @@ stv(int len, void *addr, uint64_t value)
*(uint64_t *)addr = value; *(uint64_t *)addr = value;
return; return;
default: default:
cmn_err(CE_PANIC, "bad int len %d", len); PANIC("bad int len %d", len);
} }
} }
@ -96,13 +87,13 @@ ldv(int len, const void *addr)
case 8: case 8:
return (*(uint64_t *)addr); return (*(uint64_t *)addr);
default: default:
cmn_err(CE_PANIC, "bad int len %d", len); PANIC("bad int len %d", len);
} }
return (0xFEEDFACEDEADBEEFULL); return (0xFEEDFACEDEADBEEFULL);
} }
void void
zap_leaf_byteswap(zap_leaf_phys_t *buf, int size) zap_leaf_byteswap(zap_leaf_phys_t *buf, size_t size)
{ {
zap_leaf_t l; zap_leaf_t l;
dmu_buf_t l_dbuf; dmu_buf_t l_dbuf;
@ -119,10 +110,10 @@ zap_leaf_byteswap(zap_leaf_phys_t *buf, int size)
buf->l_hdr.lh_prefix_len = BSWAP_16(buf->l_hdr.lh_prefix_len); buf->l_hdr.lh_prefix_len = BSWAP_16(buf->l_hdr.lh_prefix_len);
buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist); buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist);
for (int i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) for (uint_t i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++)
buf->l_hash[i] = BSWAP_16(buf->l_hash[i]); buf->l_hash[i] = BSWAP_16(buf->l_hash[i]);
for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) { for (uint_t i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i); zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i);
struct zap_leaf_entry *le; struct zap_leaf_entry *le;
@ -160,11 +151,11 @@ void
zap_leaf_init(zap_leaf_t *l, boolean_t sort) zap_leaf_init(zap_leaf_t *l, boolean_t sort)
{ {
l->l_bs = highbit64(l->l_dbuf->db_size) - 1; l->l_bs = highbit64(l->l_dbuf->db_size) - 1;
zap_memset(&zap_leaf_phys(l)->l_hdr, 0, memset(&zap_leaf_phys(l)->l_hdr, 0,
sizeof (struct zap_leaf_header)); sizeof (struct zap_leaf_header));
zap_memset(zap_leaf_phys(l)->l_hash, CHAIN_END, memset(zap_leaf_phys(l)->l_hash, CHAIN_END,
2*ZAP_LEAF_HASH_NUMENTRIES(l)); 2*ZAP_LEAF_HASH_NUMENTRIES(l));
for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { for (uint_t i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE; ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE;
ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1; ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1;
} }
@ -185,7 +176,7 @@ zap_leaf_chunk_alloc(zap_leaf_t *l)
{ {
ASSERT(zap_leaf_phys(l)->l_hdr.lh_nfree > 0); ASSERT(zap_leaf_phys(l)->l_hdr.lh_nfree > 0);
int chunk = zap_leaf_phys(l)->l_hdr.lh_freelist; uint_t chunk = zap_leaf_phys(l)->l_hdr.lh_freelist;
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE); ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE);
@ -223,28 +214,29 @@ zap_leaf_array_create(zap_leaf_t *l, const char *buf,
{ {
uint16_t chunk_head; uint16_t chunk_head;
uint16_t *chunkp = &chunk_head; uint16_t *chunkp = &chunk_head;
int byten = 0; int byten = integer_size;
uint64_t value = 0; uint64_t value = 0;
int shift = (integer_size - 1) * 8; int shift = (integer_size - 1) * 8;
int len = num_integers; int len = num_integers;
ASSERT3U(num_integers * integer_size, <=, ZAP_MAXVALUELEN); ASSERT3U(num_integers * integer_size, <=, ZAP_MAXVALUELEN);
if (len > 0)
value = ldv(integer_size, buf);
while (len > 0) { while (len > 0) {
uint16_t chunk = zap_leaf_chunk_alloc(l); uint16_t chunk = zap_leaf_chunk_alloc(l);
struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
la->la_type = ZAP_CHUNK_ARRAY; la->la_type = ZAP_CHUNK_ARRAY;
for (int i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) { for (int i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) {
if (byten == 0)
value = ldv(integer_size, buf);
la->la_array[i] = value >> shift; la->la_array[i] = value >> shift;
value <<= 8; value <<= 8;
if (++byten == integer_size) { if (--byten == 0) {
byten = 0;
buf += integer_size;
if (--len == 0) if (--len == 0)
break; break;
byten = integer_size;
buf += integer_size;
value = ldv(integer_size, buf);
} }
} }
@ -264,7 +256,7 @@ zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
*chunkp = CHAIN_END; *chunkp = CHAIN_END;
while (chunk != CHAIN_END) { while (chunk != CHAIN_END) {
int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next; uint_t nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==, ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
ZAP_CHUNK_ARRAY); ZAP_CHUNK_ARRAY);
zap_leaf_chunk_free(l, chunk); zap_leaf_chunk_free(l, chunk);
@ -333,7 +325,7 @@ zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk,
static boolean_t static boolean_t
zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn,
int chunk, int array_numints) uint_t chunk, int array_numints)
{ {
int bseen = 0; int bseen = 0;
@ -562,7 +554,7 @@ zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd,
uint64_t valuelen = integer_size * num_integers; uint64_t valuelen = integer_size * num_integers;
int numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(zn->zn_key_orig_numints * uint_t numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(zn->zn_key_orig_numints *
zn->zn_key_intlen) + ZAP_LEAF_ARRAY_NCHUNKS(valuelen); zn->zn_key_intlen) + ZAP_LEAF_ARRAY_NCHUNKS(valuelen);
if (numchunks > ZAP_LEAF_NUMCHUNKS(l)) if (numchunks > ZAP_LEAF_NUMCHUNKS(l))
return (SET_ERROR(E2BIG)); return (SET_ERROR(E2BIG));
@ -624,7 +616,7 @@ zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd,
/* link it into the hash chain */ /* link it into the hash chain */
/* XXX if we did the search above, we could just use that */ /* XXX if we did the search above, we could just use that */
uint16_t *chunkp = zap_leaf_rehash_entry(l, chunk); uint16_t *chunkp = zap_leaf_rehash_entry(l, le, chunk);
zap_leaf_phys(l)->l_hdr.lh_nentries++; zap_leaf_phys(l)->l_hdr.lh_nentries++;
@ -687,9 +679,8 @@ zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
*/ */
static uint16_t * static uint16_t *
zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry) zap_leaf_rehash_entry(zap_leaf_t *l, struct zap_leaf_entry *le, uint16_t entry)
{ {
struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
struct zap_leaf_entry *le2; struct zap_leaf_entry *le2;
uint16_t *chunkp; uint16_t *chunkp;
@ -722,7 +713,7 @@ zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
&ZAP_LEAF_CHUNK(nl, nchunk).l_array; &ZAP_LEAF_CHUNK(nl, nchunk).l_array;
struct zap_leaf_array *la = struct zap_leaf_array *la =
&ZAP_LEAF_CHUNK(l, chunk).l_array; &ZAP_LEAF_CHUNK(l, chunk).l_array;
int nextchunk = la->la_next; uint_t nextchunk = la->la_next;
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l)); ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));
@ -739,7 +730,7 @@ zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
} }
static void static void
zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl) zap_leaf_transfer_entry(zap_leaf_t *l, uint_t entry, zap_leaf_t *nl)
{ {
struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry); struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
@ -748,7 +739,7 @@ zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl)
struct zap_leaf_entry *nle = ZAP_LEAF_ENTRY(nl, chunk); struct zap_leaf_entry *nle = ZAP_LEAF_ENTRY(nl, chunk);
*nle = *le; /* structure assignment */ *nle = *le; /* structure assignment */
(void) zap_leaf_rehash_entry(nl, chunk); (void) zap_leaf_rehash_entry(nl, nle, chunk);
nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl); nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
nle->le_value_chunk = nle->le_value_chunk =
@ -766,7 +757,7 @@ zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl)
void void
zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort) zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort)
{ {
int bit = 64 - 1 - zap_leaf_phys(l)->l_hdr.lh_prefix_len; uint_t bit = 64 - 1 - zap_leaf_phys(l)->l_hdr.lh_prefix_len;
/* set new prefix and prefix_len */ /* set new prefix and prefix_len */
zap_leaf_phys(l)->l_hdr.lh_prefix <<= 1; zap_leaf_phys(l)->l_hdr.lh_prefix <<= 1;
@ -777,7 +768,7 @@ zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort)
zap_leaf_phys(l)->l_hdr.lh_prefix_len; zap_leaf_phys(l)->l_hdr.lh_prefix_len;
/* break existing hash chains */ /* break existing hash chains */
zap_memset(zap_leaf_phys(l)->l_hash, CHAIN_END, memset(zap_leaf_phys(l)->l_hash, CHAIN_END,
2*ZAP_LEAF_HASH_NUMENTRIES(l)); 2*ZAP_LEAF_HASH_NUMENTRIES(l));
if (sort) if (sort)
@ -792,7 +783,7 @@ zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort)
* but this accesses memory more sequentially, and when we're * but this accesses memory more sequentially, and when we're
* called, the block is usually pretty full. * called, the block is usually pretty full.
*/ */
for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { for (uint_t i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i); struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i);
if (le->le_type != ZAP_CHUNK_ENTRY) if (le->le_type != ZAP_CHUNK_ENTRY)
continue; continue;
@ -800,14 +791,14 @@ zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort)
if (le->le_hash & (1ULL << bit)) if (le->le_hash & (1ULL << bit))
zap_leaf_transfer_entry(l, i, nl); zap_leaf_transfer_entry(l, i, nl);
else else
(void) zap_leaf_rehash_entry(l, i); (void) zap_leaf_rehash_entry(l, le, i);
} }
} }
void void
zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs) zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)
{ {
int n = zap_f_phys(zap)->zap_ptrtbl.zt_shift - uint_t n = zap_f_phys(zap)->zap_ptrtbl.zt_shift -
zap_leaf_phys(l)->l_hdr.lh_prefix_len; zap_leaf_phys(l)->l_hdr.lh_prefix_len;
n = MIN(n, ZAP_HISTOGRAM_SIZE-1); n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
zs->zs_leafs_with_2n_pointers[n]++; zs->zs_leafs_with_2n_pointers[n]++;
@ -823,9 +814,9 @@ zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)
n = MIN(n, ZAP_HISTOGRAM_SIZE-1); n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
zs->zs_blocks_n_tenths_full[n]++; zs->zs_blocks_n_tenths_full[n]++;
for (int i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) { for (uint_t i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) {
int nentries = 0; uint_t nentries = 0;
int chunk = zap_leaf_phys(l)->l_hash[i]; uint_t chunk = zap_leaf_phys(l)->l_hash[i];
while (chunk != CHAIN_END) { while (chunk != CHAIN_END) {
struct zap_leaf_entry *le = struct zap_leaf_entry *le =

View File

@ -415,7 +415,7 @@ mze_destroy(zap_t *zap)
} }
static zap_t * static zap_t *
mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) mzap_open(dmu_buf_t *db)
{ {
zap_t *winner; zap_t *winner;
uint64_t *zap_hdr = (uint64_t *)db->db_data; uint64_t *zap_hdr = (uint64_t *)db->db_data;
@ -427,8 +427,8 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
zap_t *zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); zap_t *zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, NULL); rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, NULL);
rw_enter(&zap->zap_rwlock, RW_WRITER); rw_enter(&zap->zap_rwlock, RW_WRITER);
zap->zap_objset = os; zap->zap_objset = dmu_buf_get_objset(db);
zap->zap_object = obj; zap->zap_object = db->db_object;
zap->zap_dbuf = db; zap->zap_dbuf = db;
if (zap_block_type != ZBT_MICRO) { if (zap_block_type != ZBT_MICRO) {
@ -518,7 +518,7 @@ handle_winner:
* have the specified tag. * have the specified tag.
*/ */
static int static int
zap_lockdir_impl(dmu_buf_t *db, const void *tag, dmu_tx_t *tx, zap_lockdir_impl(dnode_t *dn, dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
{ {
ASSERT0(db->db_offset); ASSERT0(db->db_offset);
@ -528,13 +528,13 @@ zap_lockdir_impl(dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
*zapp = NULL; *zapp = NULL;
dmu_object_info_from_db(db, &doi); dmu_object_info_from_dnode(dn, &doi);
if (DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP) if (DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP)
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
zap_t *zap = dmu_buf_get_user(db); zap_t *zap = dmu_buf_get_user(db);
if (zap == NULL) { if (zap == NULL) {
zap = mzap_open(os, obj, db); zap = mzap_open(db);
if (zap == NULL) { if (zap == NULL) {
/* /*
* mzap_open() didn't like what it saw on-disk. * mzap_open() didn't like what it saw on-disk.
@ -563,6 +563,7 @@ zap_lockdir_impl(dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
} }
zap->zap_objset = os; zap->zap_objset = os;
zap->zap_dnode = dn;
if (lt == RW_WRITER) if (lt == RW_WRITER)
dmu_buf_will_dirty(db, tx); dmu_buf_will_dirty(db, tx);
@ -598,23 +599,16 @@ zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx,
zap_t **zapp) zap_t **zapp)
{ {
dmu_buf_t *db; dmu_buf_t *db;
int err;
int err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH); err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
if (err != 0) { if (err != 0)
return (err); return (err);
} err = zap_lockdir_impl(dn, db, tag, tx, lti, fatreader, adding, zapp);
#ifdef ZFS_DEBUG if (err != 0)
{
dmu_object_info_t doi;
dmu_object_info_from_db(db, &doi);
ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
}
#endif
err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
if (err != 0) {
dmu_buf_rele(db, tag); dmu_buf_rele(db, tag);
} else
VERIFY(dnode_add_ref(dn, tag));
return (err); return (err);
} }
@ -623,21 +617,23 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag, krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
zap_t **zapp) zap_t **zapp)
{ {
dnode_t *dn;
dmu_buf_t *db; dmu_buf_t *db;
int err;
int err = dmu_buf_hold(os, obj, 0, tag, &db, DMU_READ_NO_PREFETCH); err = dnode_hold(os, obj, tag, &dn);
if (err != 0) if (err != 0)
return (err); return (err);
#ifdef ZFS_DEBUG err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
{ if (err != 0) {
dmu_object_info_t doi; dnode_rele(dn, tag);
dmu_object_info_from_db(db, &doi); return (err);
ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
} }
#endif err = zap_lockdir_impl(dn, db, tag, tx, lti, fatreader, adding, zapp);
err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp); if (err != 0) {
if (err != 0)
dmu_buf_rele(db, tag); dmu_buf_rele(db, tag);
dnode_rele(dn, tag);
}
return (err); return (err);
} }
@ -645,6 +641,7 @@ void
zap_unlockdir(zap_t *zap, const void *tag) zap_unlockdir(zap_t *zap, const void *tag)
{ {
rw_exit(&zap->zap_rwlock); rw_exit(&zap->zap_rwlock);
dnode_rele(zap->zap_dnode, tag);
dmu_buf_rele(zap->zap_dbuf, tag); dmu_buf_rele(zap->zap_dbuf, tag);
} }
@ -730,7 +727,8 @@ mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags, dmu_tx_t *tx)
if (flags != 0) { if (flags != 0) {
zap_t *zap; zap_t *zap;
/* Only fat zap supports flags; upgrade immediately. */ /* Only fat zap supports flags; upgrade immediately. */
VERIFY0(zap_lockdir_impl(db, FTAG, tx, RW_WRITER, VERIFY(dnode_add_ref(dn, FTAG));
VERIFY0(zap_lockdir_impl(dn, db, FTAG, tx, RW_WRITER,
B_FALSE, B_FALSE, &zap)); B_FALSE, B_FALSE, &zap));
VERIFY0(mzap_upgrade(&zap, FTAG, tx, flags)); VERIFY0(mzap_upgrade(&zap, FTAG, tx, flags));
zap_unlockdir(zap, FTAG); zap_unlockdir(zap, FTAG);
@ -1325,6 +1323,26 @@ zap_add_by_dnode(dnode_t *dn, const char *key,
return (err); return (err);
} }
static int
zap_add_uint64_impl(zap_t *zap, const uint64_t *key,
int key_numints, int integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx, const void *tag)
{
int err;
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
if (zn == NULL) {
zap_unlockdir(zap, tag);
return (SET_ERROR(ENOTSUP));
}
err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
zap = zn->zn_zap; /* fzap_add() may change zap */
zap_name_free(zn);
if (zap != NULL) /* may be NULL if fzap_add() failed */
zap_unlockdir(zap, tag);
return (err);
}
int int
zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, int integer_size, uint64_t num_integers, int key_numints, int integer_size, uint64_t num_integers,
@ -1336,16 +1354,26 @@ zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
if (err != 0) if (err != 0)
return (err); return (err);
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); err = zap_add_uint64_impl(zap, key, key_numints,
if (zn == NULL) { integer_size, num_integers, val, tx, FTAG);
zap_unlockdir(zap, FTAG); /* zap_add_uint64_impl() calls zap_unlockdir() */
return (SET_ERROR(ENOTSUP)); return (err);
} }
err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx);
zap = zn->zn_zap; /* fzap_add() may change zap */ int
zap_name_free(zn); zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
if (zap != NULL) /* may be NULL if fzap_add() failed */ int key_numints, int integer_size, uint64_t num_integers,
zap_unlockdir(zap, FTAG); const void *val, dmu_tx_t *tx)
{
zap_t *zap;
int err =
zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
if (err != 0)
return (err);
err = zap_add_uint64_impl(zap, key, key_numints,
integer_size, num_integers, val, tx, FTAG);
/* zap_add_uint64_impl() calls zap_unlockdir() */
return (err); return (err);
} }
@ -1396,10 +1424,30 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name,
return (err); return (err);
} }
static int
zap_update_uint64_impl(zap_t *zap, const uint64_t *key, int key_numints,
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx,
const void *tag)
{
int err;
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
if (zn == NULL) {
zap_unlockdir(zap, tag);
return (SET_ERROR(ENOTSUP));
}
err = fzap_update(zn, integer_size, num_integers, val, tag, tx);
zap = zn->zn_zap; /* fzap_update() may change zap */
zap_name_free(zn);
if (zap != NULL) /* may be NULL if fzap_upgrade() failed */
zap_unlockdir(zap, tag);
return (err);
}
int int
zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, int key_numints, int integer_size, uint64_t num_integers, const void *val,
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) dmu_tx_t *tx)
{ {
zap_t *zap; zap_t *zap;
@ -1407,16 +1455,25 @@ zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
if (err != 0) if (err != 0)
return (err); return (err);
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); err = zap_update_uint64_impl(zap, key, key_numints,
if (zn == NULL) { integer_size, num_integers, val, tx, FTAG);
zap_unlockdir(zap, FTAG); /* zap_update_uint64_impl() calls zap_unlockdir() */
return (SET_ERROR(ENOTSUP)); return (err);
} }
err = fzap_update(zn, integer_size, num_integers, val, FTAG, tx);
zap = zn->zn_zap; /* fzap_update() may change zap */ int
zap_name_free(zn); zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints,
if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
zap_unlockdir(zap, FTAG); {
zap_t *zap;
int err =
zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
if (err != 0)
return (err);
err = zap_update_uint64_impl(zap, key, key_numints,
integer_size, num_integers, val, tx, FTAG);
/* zap_update_uint64_impl() calls zap_unlockdir() */
return (err); return (err);
} }
@ -1481,6 +1538,23 @@ zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx)
return (err); return (err);
} }
static int
zap_remove_uint64_impl(zap_t *zap, const uint64_t *key, int key_numints,
dmu_tx_t *tx, const void *tag)
{
int err;
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
if (zn == NULL) {
zap_unlockdir(zap, tag);
return (SET_ERROR(ENOTSUP));
}
err = fzap_remove(zn, tx);
zap_name_free(zn);
zap_unlockdir(zap, tag);
return (err);
}
int int
zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, dmu_tx_t *tx) int key_numints, dmu_tx_t *tx)
@ -1491,14 +1565,23 @@ zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap); zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
if (err != 0) if (err != 0)
return (err); return (err);
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); err = zap_remove_uint64_impl(zap, key, key_numints, tx, FTAG);
if (zn == NULL) { /* zap_remove_uint64_impl() calls zap_unlockdir() */
zap_unlockdir(zap, FTAG); return (err);
return (SET_ERROR(ENOTSUP)); }
}
err = fzap_remove(zn, tx); int
zap_name_free(zn); zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints,
zap_unlockdir(zap, FTAG); dmu_tx_t *tx)
{
zap_t *zap;
int err =
zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
if (err != 0)
return (err);
err = zap_remove_uint64_impl(zap, key, key_numints, tx, FTAG);
/* zap_remove_uint64_impl() calls zap_unlockdir() */
return (err); return (err);
} }
@ -1704,14 +1787,17 @@ EXPORT_SYMBOL(zap_prefetch_uint64);
EXPORT_SYMBOL(zap_add); EXPORT_SYMBOL(zap_add);
EXPORT_SYMBOL(zap_add_by_dnode); EXPORT_SYMBOL(zap_add_by_dnode);
EXPORT_SYMBOL(zap_add_uint64); EXPORT_SYMBOL(zap_add_uint64);
EXPORT_SYMBOL(zap_add_uint64_by_dnode);
EXPORT_SYMBOL(zap_update); EXPORT_SYMBOL(zap_update);
EXPORT_SYMBOL(zap_update_uint64); EXPORT_SYMBOL(zap_update_uint64);
EXPORT_SYMBOL(zap_update_uint64_by_dnode);
EXPORT_SYMBOL(zap_length); EXPORT_SYMBOL(zap_length);
EXPORT_SYMBOL(zap_length_uint64); EXPORT_SYMBOL(zap_length_uint64);
EXPORT_SYMBOL(zap_remove); EXPORT_SYMBOL(zap_remove);
EXPORT_SYMBOL(zap_remove_by_dnode); EXPORT_SYMBOL(zap_remove_by_dnode);
EXPORT_SYMBOL(zap_remove_norm); EXPORT_SYMBOL(zap_remove_norm);
EXPORT_SYMBOL(zap_remove_uint64); EXPORT_SYMBOL(zap_remove_uint64);
EXPORT_SYMBOL(zap_remove_uint64_by_dnode);
EXPORT_SYMBOL(zap_count); EXPORT_SYMBOL(zap_count);
EXPORT_SYMBOL(zap_value_search); EXPORT_SYMBOL(zap_value_search);
EXPORT_SYMBOL(zap_join); EXPORT_SYMBOL(zap_join);

View File

@ -27,7 +27,7 @@
* Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved. * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
* Copyright 2016 Nexenta Systems, Inc. All rights reserved. * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved. * Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Integros [integros.com] * Copyright (c) 2014 Integros [integros.com]
@ -1886,7 +1886,7 @@ zfs_ioc_vdev_add(zfs_cmd_t *zc)
error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
zc->zc_iflags, &config); zc->zc_iflags, &config);
if (error == 0) { if (error == 0) {
error = spa_vdev_add(spa, config); error = spa_vdev_add(spa, config, zc->zc_flags);
nvlist_free(config); nvlist_free(config);
} }
spa_close(spa, FTAG); spa_close(spa, FTAG);

View File

@ -123,7 +123,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
/* Flush any mmap()'d data to disk */ /* Flush any mmap()'d data to disk */
if (zn_has_cached_data(zp, 0, file_sz - 1)) if (zn_has_cached_data(zp, 0, file_sz - 1))
zn_flush_cached_data(zp, B_FALSE); zn_flush_cached_data(zp, B_TRUE);
lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER); lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER);
error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff); error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
@ -1187,6 +1187,10 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
} }
} }
/* Flush any mmap()'d data to disk */
if (zn_has_cached_data(inzp, inoff, inoff + len - 1))
zn_flush_cached_data(inzp, B_TRUE);
/* /*
* Maintain predictable lock order. * Maintain predictable lock order.
*/ */

View File

@ -557,7 +557,7 @@ zil_clear_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx,
* that we rewind to is invalid. Thus, we return -1 so * that we rewind to is invalid. Thus, we return -1 so
* zil_parse() doesn't attempt to read it. * zil_parse() doesn't attempt to read it.
*/ */
if (bp->blk_birth >= first_txg) if (BP_GET_LOGICAL_BIRTH(bp) >= first_txg)
return (-1); return (-1);
if (zil_bp_tree_add(zilog, bp) != 0) if (zil_bp_tree_add(zilog, bp) != 0)
@ -583,7 +583,7 @@ zil_claim_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx,
* Claim log block if not already committed and not already claimed. * Claim log block if not already committed and not already claimed.
* If tx == NULL, just verify that the block is claimable. * If tx == NULL, just verify that the block is claimable.
*/ */
if (BP_IS_HOLE(bp) || bp->blk_birth < first_txg || if (BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) < first_txg ||
zil_bp_tree_add(zilog, bp) != 0) zil_bp_tree_add(zilog, bp) != 0)
return (0); return (0);
@ -608,7 +608,7 @@ zil_claim_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t first_txg)
* waited for all writes to be stable first), so it is semantically * waited for all writes to be stable first), so it is semantically
* correct to declare this the end of the log. * correct to declare this the end of the log.
*/ */
if (lr->lr_blkptr.blk_birth >= first_txg) { if (BP_GET_LOGICAL_BIRTH(&lr->lr_blkptr) >= first_txg) {
error = zil_read_log_data(zilog, lr, NULL); error = zil_read_log_data(zilog, lr, NULL);
if (error != 0) if (error != 0)
return (error); return (error);
@ -655,7 +655,7 @@ zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx,
* just in case lets be safe and just stop here now instead of * just in case lets be safe and just stop here now instead of
* corrupting the pool. * corrupting the pool.
*/ */
if (BP_PHYSICAL_BIRTH(bp) >= first_txg) if (BP_GET_BIRTH(bp) >= first_txg)
return (SET_ERROR(ENOENT)); return (SET_ERROR(ENOENT));
/* /*
@ -710,8 +710,8 @@ zil_free_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t claim_txg)
/* /*
* If we previously claimed it, we need to free it. * If we previously claimed it, we need to free it.
*/ */
if (bp->blk_birth >= claim_txg && zil_bp_tree_add(zilog, bp) == 0 && if (BP_GET_LOGICAL_BIRTH(bp) >= claim_txg &&
!BP_IS_HOLE(bp)) { zil_bp_tree_add(zilog, bp) == 0 && !BP_IS_HOLE(bp)) {
zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp); zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp);
} }
@ -1965,7 +1965,7 @@ next_lwb:
&slog); &slog);
} }
if (error == 0) { if (error == 0) {
ASSERT3U(bp->blk_birth, ==, txg); ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), ==, txg);
BP_SET_CHECKSUM(bp, nlwb->lwb_slim ? ZIO_CHECKSUM_ZILOG2 : BP_SET_CHECKSUM(bp, nlwb->lwb_slim ? ZIO_CHECKSUM_ZILOG2 :
ZIO_CHECKSUM_ZILOG); ZIO_CHECKSUM_ZILOG);
bp->blk_cksum = lwb->lwb_blk.blk_cksum; bp->blk_cksum = lwb->lwb_blk.blk_cksum;

View File

@ -613,7 +613,7 @@ error:
zio->io_error = SET_ERROR(EIO); zio->io_error = SET_ERROR(EIO);
if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
spa_log_error(spa, &zio->io_bookmark, spa_log_error(spa, &zio->io_bookmark,
&zio->io_bp->blk_birth); BP_GET_LOGICAL_BIRTH(zio->io_bp));
(void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, (void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
spa, NULL, &zio->io_bookmark, zio, 0); spa, NULL, &zio->io_bookmark, zio, 0);
} }
@ -1052,8 +1052,8 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
(long long)bp->blk_prop, (long long)bp->blk_prop,
(long long)bp->blk_pad[0], (long long)bp->blk_pad[0],
(long long)bp->blk_pad[1], (long long)bp->blk_pad[1],
(long long)bp->blk_phys_birth, (long long)BP_GET_PHYSICAL_BIRTH(bp),
(long long)bp->blk_birth, (long long)BP_GET_LOGICAL_BIRTH(bp),
(long long)bp->blk_fill, (long long)bp->blk_fill,
(long long)bp->blk_cksum.zc_word[0], (long long)bp->blk_cksum.zc_word[0],
(long long)bp->blk_cksum.zc_word[1], (long long)bp->blk_cksum.zc_word[1],
@ -1156,10 +1156,11 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
/* /*
* Pool-specific checks. * Pool-specific checks.
* *
* Note: it would be nice to verify that the blk_birth and * Note: it would be nice to verify that the logical birth
* BP_PHYSICAL_BIRTH() are not too large. However, spa_freeze() * and physical birth are not too large. However,
* allows the birth time of log blocks (and dmu_sync()-ed blocks * spa_freeze() allows the birth time of log blocks (and
* that are in the log) to be arbitrarily large. * dmu_sync()-ed blocks that are in the log) to be arbitrarily
* large.
*/ */
for (int i = 0; i < BP_GET_NDVAS(bp); i++) { for (int i = 0; i < BP_GET_NDVAS(bp); i++) {
const dva_t *dva = &bp->blk_dva[i]; const dva_t *dva = &bp->blk_dva[i];
@ -1246,7 +1247,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
{ {
zio_t *zio; zio_t *zio;
zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp, zio = zio_create(pio, spa, BP_GET_BIRTH(bp), bp,
data, size, size, done, private, data, size, size, done, private,
ZIO_TYPE_READ, priority, flags, NULL, 0, zb, ZIO_TYPE_READ, priority, flags, NULL, 0, zb,
ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ? ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
@ -1435,7 +1436,7 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
* starts allocating blocks -- so that nothing is allocated twice. * starts allocating blocks -- so that nothing is allocated twice.
* If txg == 0 we just verify that the block is claimable. * If txg == 0 we just verify that the block is claimable.
*/ */
ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, ASSERT3U(BP_GET_LOGICAL_BIRTH(&spa->spa_uberblock.ub_rootbp), <,
spa_min_claim_txg(spa)); spa_min_claim_txg(spa));
ASSERT(txg == spa_min_claim_txg(spa) || txg == 0); ASSERT(txg == spa_min_claim_txg(spa) || txg == 0);
ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa)); /* zdb(8) */ ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa)); /* zdb(8) */
@ -1731,7 +1732,7 @@ zio_write_bp_init(zio_t *zio)
blkptr_t *bp = zio->io_bp; blkptr_t *bp = zio->io_bp;
zio_prop_t *zp = &zio->io_prop; zio_prop_t *zp = &zio->io_prop;
ASSERT(bp->blk_birth != zio->io_txg); ASSERT(BP_GET_LOGICAL_BIRTH(bp) != zio->io_txg);
*bp = *zio->io_bp_override; *bp = *zio->io_bp_override;
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
@ -1819,7 +1820,7 @@ zio_write_compress(zio_t *zio)
ASSERT(zio->io_child_type != ZIO_CHILD_DDT); ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
ASSERT(zio->io_bp_override == NULL); ASSERT(zio->io_bp_override == NULL);
if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) { if (!BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg) {
/* /*
* We're rewriting an existing block, which means we're * We're rewriting an existing block, which means we're
* working on behalf of spa_sync(). For spa_sync() to * working on behalf of spa_sync(). For spa_sync() to
@ -1866,7 +1867,7 @@ zio_write_compress(zio_t *zio)
BP_SET_TYPE(bp, zio->io_prop.zp_type); BP_SET_TYPE(bp, zio->io_prop.zp_type);
BP_SET_LEVEL(bp, zio->io_prop.zp_level); BP_SET_LEVEL(bp, zio->io_prop.zp_level);
zio_buf_free(cbuf, lsize); zio_buf_free(cbuf, lsize);
bp->blk_birth = zio->io_txg; BP_SET_LOGICAL_BIRTH(bp, zio->io_txg);
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
ASSERT(spa_feature_is_active(spa, ASSERT(spa_feature_is_active(spa,
SPA_FEATURE_EMBEDDED_DATA)); SPA_FEATURE_EMBEDDED_DATA));
@ -1947,7 +1948,7 @@ zio_write_compress(zio_t *zio)
* spa_sync() to allocate new blocks, but force rewrites after that. * spa_sync() to allocate new blocks, but force rewrites after that.
* There should only be a handful of blocks after pass 1 in any case. * There should only be a handful of blocks after pass 1 in any case.
*/ */
if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg && if (!BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg &&
BP_GET_PSIZE(bp) == psize && BP_GET_PSIZE(bp) == psize &&
pass >= zfs_sync_pass_rewrite) { pass >= zfs_sync_pass_rewrite) {
VERIFY3U(psize, !=, 0); VERIFY3U(psize, !=, 0);
@ -1961,7 +1962,7 @@ zio_write_compress(zio_t *zio)
} }
if (psize == 0) { if (psize == 0) {
if (zio->io_bp_orig.blk_birth != 0 && if (BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig) != 0 &&
spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) { spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) {
BP_SET_LSIZE(bp, lsize); BP_SET_LSIZE(bp, lsize);
BP_SET_TYPE(bp, zp->zp_type); BP_SET_TYPE(bp, zp->zp_type);
@ -3539,7 +3540,7 @@ zio_ddt_write(zio_t *zio)
else else
ddt_phys_addref(ddp); ddt_phys_addref(ddp);
} else if (zio->io_bp_override) { } else if (zio->io_bp_override) {
ASSERT(bp->blk_birth == txg); ASSERT(BP_GET_LOGICAL_BIRTH(bp) == txg);
ASSERT(BP_EQUAL(bp, zio->io_bp_override)); ASSERT(BP_EQUAL(bp, zio->io_bp_override));
ddt_phys_fill(ddp, bp); ddt_phys_fill(ddp, bp);
ddt_phys_addref(ddp); ddt_phys_addref(ddp);
@ -3810,11 +3811,13 @@ zio_dva_claim(zio_t *zio)
static void static void
zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp)
{ {
ASSERT(bp->blk_birth == zio->io_txg || BP_IS_HOLE(bp)); ASSERT(BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg || BP_IS_HOLE(bp));
ASSERT(zio->io_bp_override == NULL); ASSERT(zio->io_bp_override == NULL);
if (!BP_IS_HOLE(bp)) if (!BP_IS_HOLE(bp)) {
metaslab_free(zio->io_spa, bp, bp->blk_birth, B_TRUE); metaslab_free(zio->io_spa, bp, BP_GET_LOGICAL_BIRTH(bp),
B_TRUE);
}
if (gn != NULL) { if (gn != NULL) {
for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) { for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) {
@ -4555,8 +4558,8 @@ zio_ready(zio_t *zio)
if (zio->io_ready) { if (zio->io_ready) {
ASSERT(IO_IS_ALLOCATING(zio)); ASSERT(IO_IS_ALLOCATING(zio));
ASSERT(bp->blk_birth == zio->io_txg || BP_IS_HOLE(bp) || ASSERT(BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg ||
(zio->io_flags & ZIO_FLAG_NOPWRITE)); BP_IS_HOLE(bp) || (zio->io_flags & ZIO_FLAG_NOPWRITE));
ASSERT(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY] == 0); ASSERT(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY] == 0);
zio->io_ready(zio); zio->io_ready(zio);
@ -4852,7 +4855,7 @@ zio_done(zio_t *zio)
* error and generate a logical data ereport. * error and generate a logical data ereport.
*/ */
spa_log_error(zio->io_spa, &zio->io_bookmark, spa_log_error(zio->io_spa, &zio->io_bookmark,
&zio->io_bp->blk_birth); BP_GET_LOGICAL_BIRTH(zio->io_bp));
(void) zfs_ereport_post(FM_EREPORT_ZFS_DATA, (void) zfs_ereport_post(FM_EREPORT_ZFS_DATA,
zio->io_spa, NULL, &zio->io_bookmark, zio, 0); zio->io_spa, NULL, &zio->io_bookmark, zio, 0);
} }

View File

@ -272,7 +272,7 @@ static void
zio_checksum_gang_verifier(zio_cksum_t *zcp, const blkptr_t *bp) zio_checksum_gang_verifier(zio_cksum_t *zcp, const blkptr_t *bp)
{ {
const dva_t *dva = BP_IDENTITY(bp); const dva_t *dva = BP_IDENTITY(bp);
uint64_t txg = BP_PHYSICAL_BIRTH(bp); uint64_t txg = BP_GET_BIRTH(bp);
ASSERT(BP_IS_GANG(bp)); ASSERT(BP_IS_GANG(bp));

View File

@ -372,7 +372,8 @@ tags = ['functional', 'cli_root', 'zpool']
tests = ['zpool_add_001_pos', 'zpool_add_002_pos', 'zpool_add_003_pos', tests = ['zpool_add_001_pos', 'zpool_add_002_pos', 'zpool_add_003_pos',
'zpool_add_004_pos', 'zpool_add_006_pos', 'zpool_add_007_neg', 'zpool_add_004_pos', 'zpool_add_006_pos', 'zpool_add_007_neg',
'zpool_add_008_neg', 'zpool_add_009_neg', 'zpool_add_010_pos', 'zpool_add_008_neg', 'zpool_add_009_neg', 'zpool_add_010_pos',
'add-o_ashift', 'add_prop_ashift', 'zpool_add_dryrun_output'] 'add-o_ashift', 'add_prop_ashift', 'zpool_add_dryrun_output',
'zpool_add--allow-ashift-mismatch']
tags = ['functional', 'cli_root', 'zpool_add'] tags = ['functional', 'cli_root', 'zpool_add']
[tests/functional/cli_root/zpool_attach] [tests/functional/cli_root/zpool_attach]

View File

@ -988,6 +988,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_add/add_prop_ashift.ksh \ functional/cli_root/zpool_add/add_prop_ashift.ksh \
functional/cli_root/zpool_add/cleanup.ksh \ functional/cli_root/zpool_add/cleanup.ksh \
functional/cli_root/zpool_add/setup.ksh \ functional/cli_root/zpool_add/setup.ksh \
functional/cli_root/zpool_add/zpool_add--allow-ashift-mismatch.ksh \
functional/cli_root/zpool_add/zpool_add_001_pos.ksh \ functional/cli_root/zpool_add/zpool_add_001_pos.ksh \
functional/cli_root/zpool_add/zpool_add_002_pos.ksh \ functional/cli_root/zpool_add/zpool_add_002_pos.ksh \
functional/cli_root/zpool_add/zpool_add_003_pos.ksh \ functional/cli_root/zpool_add/zpool_add_003_pos.ksh \

View File

@ -97,20 +97,19 @@ function verify_pool_prop_eq
function verify_pool_props function verify_pool_props
{ {
typeset -r dsize=$1 typeset -r oused=$1
typeset -r ratio=$2 typeset -r osaved=$2
typeset dsize=$3
typeset ratio=$4
if [[ $dsize -eq 0 ]]; then if [[ $dsize -eq 0 ]]; then
verify_pool_prop_eq bcloneused 0 ratio=1
verify_pool_prop_eq bclonesaved 0 elif [[ $ratio -eq 1 ]]; then
verify_pool_prop_eq bcloneratio 1.00 dsize=0
else
if [[ $ratio -eq 1 ]]; then
verify_pool_prop_eq bcloneused 0
else
verify_pool_prop_eq bcloneused $dsize
fi fi
verify_pool_prop_eq bclonesaved $((dsize*(ratio-1))) verify_pool_prop_eq bcloneused $(($oused+$dsize))
verify_pool_prop_eq bclonesaved $(($osaved+dsize*(ratio-1)))
if [[ $oused -eq 0 ]]; then
verify_pool_prop_eq bcloneratio "${ratio}.00" verify_pool_prop_eq bcloneratio "${ratio}.00"
fi fi
} }
@ -124,16 +123,22 @@ function bclone_test
typeset -r srcdir=$4 typeset -r srcdir=$4
typeset -r dstdir=$5 typeset -r dstdir=$5
typeset dsize typeset dsize
typeset oused
typeset osaved
typeset -r original="${srcdir}/original" typeset -r original="${srcdir}/original"
typeset -r clone="${dstdir}/clone" typeset -r clone="${dstdir}/clone"
log_note "Testing file copy with datatype $datatype, file size $filesize, embedded $embedded" log_note "Testing file copy with datatype $datatype, file size $filesize, embedded $embedded"
# Save current block cloning stats for later use.
sync_pool $TESTPOOL
oused=$(get_pool_prop bcloneused $TESTPOOL)
osaved=$(get_pool_prop bclonesaved $TESTPOOL)
# Create a test file with known content. # Create a test file with known content.
case $datatype in case $datatype in
random|text) random|text)
sync_pool $TESTPOOL
if [[ $datatype = "random" ]]; then if [[ $datatype = "random" ]]; then
dd if=/dev/urandom of=$original bs=$filesize count=1 2>/dev/null dd if=/dev/urandom of=$original bs=$filesize count=1 2>/dev/null
else else
@ -146,13 +151,13 @@ function bclone_test
sync_pool $TESTPOOL sync_pool $TESTPOOL
# It is hard to predict block sizes that will be used, # It is hard to predict block sizes that will be used,
# so just do one clone and take it from bcloneused. # so just do one clone and take it from bcloneused.
filesize=$(zpool get -Hp -o value bcloneused $TESTPOOL) dsize=$(get_pool_prop bcloneused $TESTPOOL)
dsize=$(($dsize-$oused))
if [[ $embedded = "false" ]]; then if [[ $embedded = "false" ]]; then
log_must test $filesize -gt 0 log_must test $dsize -gt 0
fi fi
rm -f "${clone}-tmp" rm -f "${clone}-tmp"
sync_pool $TESTPOOL sync_pool $TESTPOOL
dsize=$filesize
;; ;;
hole) hole)
log_must truncate_test -s $filesize -f $original log_must truncate_test -s $filesize -f $original
@ -217,7 +222,7 @@ function bclone_test
test_file_integrity $original_checksum "${clone}4" $filesize test_file_integrity $original_checksum "${clone}4" $filesize
test_file_integrity $original_checksum "${clone}5" $filesize test_file_integrity $original_checksum "${clone}5" $filesize
verify_pool_props $dsize 7 verify_pool_props $oused $osaved $dsize 7
# Clear cache and test after fresh import. # Clear cache and test after fresh import.
log_must zpool export $TESTPOOL log_must zpool export $TESTPOOL
@ -240,7 +245,7 @@ function bclone_test
sync_pool $TESTPOOL sync_pool $TESTPOOL
verify_pool_props $dsize 11 verify_pool_props $oused $osaved $dsize 11
log_must zpool export $TESTPOOL log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL log_must zpool import $TESTPOOL
@ -268,7 +273,7 @@ function bclone_test
test_file_integrity $original_checksum "${clone}8" $filesize test_file_integrity $original_checksum "${clone}8" $filesize
test_file_integrity $original_checksum "${clone}9" $filesize test_file_integrity $original_checksum "${clone}9" $filesize
verify_pool_props $dsize 6 verify_pool_props $oused $osaved $dsize 6
rm -f "${clone}0" "${clone}2" "${clone}4" "${clone}8" "${clone}9" rm -f "${clone}0" "${clone}2" "${clone}4" "${clone}8" "${clone}9"
@ -276,11 +281,11 @@ function bclone_test
test_file_integrity $original_checksum "${clone}6" $filesize test_file_integrity $original_checksum "${clone}6" $filesize
verify_pool_props $dsize 1 verify_pool_props $oused $osaved $dsize 1
rm -f "${clone}6" rm -f "${clone}6"
sync_pool $TESTPOOL sync_pool $TESTPOOL
verify_pool_props $dsize 1 verify_pool_props $oused $osaved $dsize 1
} }

View File

@ -66,7 +66,7 @@ function bclone_corner_cases_init
export SECOND_HALF_ORIG0_CHECKSUM=$(second_half_checksum $ORIG0) export SECOND_HALF_ORIG0_CHECKSUM=$(second_half_checksum $ORIG0)
export SECOND_HALF_ORIG1_CHECKSUM=$(second_half_checksum $ORIG1) export SECOND_HALF_ORIG1_CHECKSUM=$(second_half_checksum $ORIG1)
export SECOND_HALF_ORIG2_CHECKSUM=$(second_half_checksum $ORIG2) export SECOND_HALF_ORIG2_CHECKSUM=$(second_half_checksum $ORIG2)
export ZEROS_CHECKSUM=$(dd if=/dev/zero bs=$HALFRECORDSIZE count=1 | sha256digest) export ZEROS_CHECKSUM=$(dd if=/dev/zero bs=$HALFRECORDSIZE count=1 2>/dev/null | sha256digest)
export FIRST_HALF_CHECKSUM="" export FIRST_HALF_CHECKSUM=""
export SECOND_HALF_CHECKSUM="" export SECOND_HALF_CHECKSUM=""
} }
@ -210,6 +210,8 @@ function bclone_corner_cases_test
typeset -r dstdir=$2 typeset -r dstdir=$2
typeset limit=$3 typeset limit=$3
typeset -i count=0 typeset -i count=0
typeset oused
typeset osaved
if [[ $srcdir != "count" ]]; then if [[ $srcdir != "count" ]]; then
if [[ -n "$limit" ]]; then if [[ -n "$limit" ]]; then
@ -217,6 +219,11 @@ function bclone_corner_cases_test
limit=$(random_int_between 1 $total_count $((limit*2)) | sort -nu | head -n $limit | xargs) limit=$(random_int_between 1 $total_count $((limit*2)) | sort -nu | head -n $limit | xargs)
fi fi
bclone_corner_cases_init $srcdir $dstdir bclone_corner_cases_init $srcdir $dstdir
# Save current block cloning stats for later use.
sync_pool $TESTPOOL
oused=$(get_pool_prop bcloneused $TESTPOOL)
osaved=$(get_pool_prop bclonesaved $TESTPOOL)
fi fi
# #
@ -285,21 +292,24 @@ function bclone_corner_cases_test
overwrite_clone "$second_overwrite" overwrite_clone "$second_overwrite"
if checksum_compare $read_after; then if checksum_compare $read_after; then
log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after" log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / second_overwrite: $second_overwrite / read_after: $read_after"
else else
log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after" log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / second_overwrite: $second_overwrite / read_after: $read_after"
fi fi
log_must zpool export $TESTPOOL log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL log_must zpool import $TESTPOOL
if checksum_compare "yes"; then if checksum_compare "yes"; then
log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after / read_next_txg" log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / second_overwrite: $second_overwrite / read_after: $read_after / read_next_txg"
else else
log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after / read_next_txg" log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / second_overwrite: $second_overwrite / read_after: $read_after / read_next_txg"
fi fi
rm -f "$CLONE" rm -f "$CLONE"
sync_pool $TESTPOOL
verify_pool_prop_eq bcloneused $oused
verify_pool_prop_eq bclonesaved $osaved
done done
done done
done done

View File

@ -22,7 +22,7 @@
# #
# Copyright 2017, loli10K. All rights reserved. # Copyright 2017, loli10K. All rights reserved.
# Copyright (c) 2020 by Delphix. All rights reserved. # Copyright (c) 2020, 2024 by Delphix. All rights reserved.
# #
. $STF_SUITE/include/libtest.shlib . $STF_SUITE/include/libtest.shlib
@ -60,12 +60,23 @@ log_must mkfile $SIZE $disk2
logical_ashift=$(get_tunable VDEV_FILE_LOGICAL_ASHIFT) logical_ashift=$(get_tunable VDEV_FILE_LOGICAL_ASHIFT)
orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT) orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
max_auto_ashift=$(get_tunable VDEV_MAX_AUTO_ASHIFT) max_auto_ashift=$(get_tunable VDEV_MAX_AUTO_ASHIFT)
opt=""
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16") typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
for ashift in ${ashifts[@]} for ashift in ${ashifts[@]}
do do
#
# Need to add the --allow-ashift-mismatch option to disable the
# ashift mismatch checks in zpool add.
#
if [[ $ashift -eq $orig_ashift ]]; then
opt=""
else
opt="--allow-ashift-mismatch"
fi
log_must zpool create $TESTPOOL $disk1 log_must zpool create $TESTPOOL $disk1
log_must zpool add -o ashift=$ashift $TESTPOOL $disk2 log_must zpool add $opt -o ashift=$ashift $TESTPOOL $disk2
log_must verify_ashift $disk2 $ashift log_must verify_ashift $disk2 $ashift
# clean things for the next run # clean things for the next run
@ -78,7 +89,7 @@ do
# #
log_must zpool create $TESTPOOL $disk1 log_must zpool create $TESTPOOL $disk1
log_must set_tunable32 VDEV_FILE_PHYSICAL_ASHIFT $ashift log_must set_tunable32 VDEV_FILE_PHYSICAL_ASHIFT $ashift
log_must zpool add $TESTPOOL $disk2 log_must zpool add $opt $TESTPOOL $disk2
exp=$(( (ashift <= max_auto_ashift) ? ashift : logical_ashift )) exp=$(( (ashift <= max_auto_ashift) ? ashift : logical_ashift ))
log_must verify_ashift $disk2 $exp log_must verify_ashift $disk2 $exp

View File

@ -22,7 +22,7 @@
# #
# Copyright 2017, loli10K. All rights reserved. # Copyright 2017, loli10K. All rights reserved.
# Copyright (c) 2020 by Delphix. All rights reserved. # Copyright (c) 2020, 2024 by Delphix. All rights reserved.
# #
. $STF_SUITE/include/libtest.shlib . $STF_SUITE/include/libtest.shlib
@ -68,8 +68,13 @@ log_must set_tunable32 VDEV_FILE_PHYSICAL_ASHIFT 16
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16") typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
for ashift in ${ashifts[@]} for ashift in ${ashifts[@]}
do do
if [ $ashift -eq $orig_ashift ];then
opt=""
else
opt="--allow-ashift-mismatch"
fi
log_must zpool create -o ashift=$ashift $TESTPOOL $disk1 log_must zpool create -o ashift=$ashift $TESTPOOL $disk1
log_must zpool add $TESTPOOL $disk2 log_must zpool add $opt $TESTPOOL $disk2
log_must verify_ashift $disk2 $ashift log_must verify_ashift $disk2 $ashift
# clean things for the next run # clean things for the next run
@ -82,8 +87,13 @@ for ashift in ${ashifts[@]}
do do
for cmdval in ${ashifts[@]} for cmdval in ${ashifts[@]}
do do
if [ $ashift -eq $cmdval ];then
opt=""
else
opt="--allow-ashift-mismatch"
fi
log_must zpool create -o ashift=$ashift $TESTPOOL $disk1 log_must zpool create -o ashift=$ashift $TESTPOOL $disk1
log_must zpool add -o ashift=$cmdval $TESTPOOL $disk2 log_must zpool add $opt -o ashift=$cmdval $TESTPOOL $disk2
log_must verify_ashift $disk2 $cmdval log_must verify_ashift $disk2 $cmdval
# clean things for the next run # clean things for the next run

View File

@ -65,4 +65,15 @@ log_mustnot vdevs_in_pool $TESTPOOL $DISK2
log_must zpool add -f $TESTPOOL $DISK2 log_must zpool add -f $TESTPOOL $DISK2
log_must vdevs_in_pool $TESTPOOL $DISK2 log_must vdevs_in_pool $TESTPOOL $DISK2
log_must zpool destroy $TESTPOOL
create_pool $TESTPOOL mirror $DISK0 $DISK1
log_must poolexists $TESTPOOL
log_mustnot zpool add $TESTPOOL $DISK2
log_mustnot vdevs_in_pool $TESTPOOL $DISK2
log_must zpool add --allow-replication-mismatch $TESTPOOL $DISK2
log_must vdevs_in_pool $TESTPOOL $DISK2
log_pass "'zpool add -f <pool> <vdev> ...' executes successfully." log_pass "'zpool add -f <pool> <vdev> ...' executes successfully."

View File

@ -70,7 +70,7 @@ if is_freebsd; then
recursive=$(get_tunable VOL_RECURSIVE) recursive=$(get_tunable VOL_RECURSIVE)
log_must set_tunable64 VOL_RECURSIVE 1 log_must set_tunable64 VOL_RECURSIVE 1
fi fi
log_must zpool add $TESTPOOL $ZVOL_DEVDIR/$TESTPOOL1/$TESTVOL log_must zpool add --allow-ashift-mismatch $TESTPOOL $ZVOL_DEVDIR/$TESTPOOL1/$TESTVOL
log_must vdevs_in_pool "$TESTPOOL" "$ZVOL_DEVDIR/$TESTPOOL1/$TESTVOL" log_must vdevs_in_pool "$TESTPOOL" "$ZVOL_DEVDIR/$TESTPOOL1/$TESTVOL"

View File

@ -75,7 +75,9 @@ log_must poolexists $TESTPOOL1
unset NOINUSE_CHECK unset NOINUSE_CHECK
log_mustnot zpool add -f $TESTPOOL $DISK1 log_mustnot zpool add -f $TESTPOOL $DISK1
log_mustnot zpool add --allow-in-use $TESTPOOL $DISK1
log_mustnot zpool add -f $TESTPOOL $mnttab_dev log_mustnot zpool add -f $TESTPOOL $mnttab_dev
log_mustnot zpool add --allow-in-use $TESTPOOL $mnttab_dev
if is_linux; then if is_linux; then
log_mustnot zpool add $TESTPOOL $vfstab_dev log_mustnot zpool add $TESTPOOL $vfstab_dev
else else

View File

@ -64,7 +64,9 @@ log_mustnot zpool add -f $TESTPOOL $DISK0
for type in "" "mirror" "raidz" "draid" "spare" "log" "dedup" "special" "cache" for type in "" "mirror" "raidz" "draid" "spare" "log" "dedup" "special" "cache"
do do
log_mustnot zpool add -f $TESTPOOL $type $DISK0 $DISK1 log_mustnot zpool add -f $TESTPOOL $type $DISK0 $DISK1
log_mustnot zpool add --allow-in-use $TESTPOOL $type $DISK0 $DISK1
log_mustnot zpool add -f $TESTPOOL $type $DISK1 $DISK1 log_mustnot zpool add -f $TESTPOOL $type $DISK1 $DISK1
log_mustnot zpool add --allow-in-use $TESTPOOL $type $DISK1 $DISK1
done done
log_pass "'zpool add' get fail as expected if vdevs are the same or vdev is " \ log_pass "'zpool add' get fail as expected if vdevs are the same or vdev is " \

View File

@ -138,7 +138,7 @@ function zpool_create_forced_add
while ((j < ${#add_args[@]})); do while ((j < ${#add_args[@]})); do
log_must zpool create $TESTPOOL1 ${create_args[$i]} log_must zpool create $TESTPOOL1 ${create_args[$i]}
log_mustnot zpool add $TESTPOOL1 ${add_args[$j]} log_mustnot zpool add $TESTPOOL1 ${add_args[$j]}
log_must zpool add -f $TESTPOOL1 ${add_args[$j]} log_must zpool add --allow-replication-mismatch $TESTPOOL1 ${add_args[$j]}
log_must zpool destroy -f $TESTPOOL1 log_must zpool destroy -f $TESTPOOL1
((j += 1)) ((j += 1))

View File

@ -76,7 +76,7 @@ log_onexit cleanup
SRC_FILE=src.data SRC_FILE=src.data
DST_FILE=dst.data DST_FILE=dst.data
SRC_SIZE=$(($RANDOM % 2048)) SRC_SIZE=$((1024 + $RANDOM % 1024))
# A smaller recordsize is used merely to speed up the test. # A smaller recordsize is used merely to speed up the test.
RECORDSIZE=4096 RECORDSIZE=4096
@ -120,7 +120,7 @@ for mode in "never" "auto" "always"; do
# Overwrite a random range of an existing file and immediately copy it. # Overwrite a random range of an existing file and immediately copy it.
sync_pool $TESTPOOL sync_pool $TESTPOOL
log_must dd if=/dev/urandom of=$SRC_FILE bs=$((RECORDSIZE / 2)) \ log_must dd if=/dev/urandom of=$SRC_FILE bs=$((RECORDSIZE / 2)) \
seek=$(($RANDOM % $SRC_SIZE)) count=$(($RANDOM % 16)) conv=notrunc seek=$(($RANDOM % $SRC_SIZE)) count=$((1 + $RANDOM % 16)) conv=notrunc
if [[ "$mode" == "always" ]]; then if [[ "$mode" == "always" ]]; then
log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE
log_must ls -l $CP_TESTDIR log_must ls -l $CP_TESTDIR
@ -152,7 +152,7 @@ for mode in "never" "auto" "always"; do
# Overwrite a random range of an existing file and immediately copy it. # Overwrite a random range of an existing file and immediately copy it.
log_must dd if=/dev/urandom of=$SRC_FILE bs=$((RECORDSIZE / 2)) \ log_must dd if=/dev/urandom of=$SRC_FILE bs=$((RECORDSIZE / 2)) \
seek=$(($RANDOM % $SRC_SIZE)) count=$(($RANDOM % 16)) conv=notrunc seek=$(($RANDOM % $SRC_SIZE)) count=$((1 + $RANDOM % 16)) conv=notrunc
log_must cp --reflink=$mode $SRC_FILE $DST_FILE log_must cp --reflink=$mode $SRC_FILE $DST_FILE
verify_copy $SRC_FILE $DST_FILE verify_copy $SRC_FILE $DST_FILE
log_must rm -f $SRC_FILE $DST_FILE log_must rm -f $SRC_FILE $DST_FILE

View File

@ -51,7 +51,7 @@ const char *__asan_default_options(void) {
int int
main(int argc, const char *const *argv) main(int argc, const char *const *argv)
{ {
if (argc != 2) { if (argc != 2 || strncmp(argv[1], "/dev/zd", 7) != 0) {
fprintf(stderr, "usage: %s /dev/zdX\n", argv[0]); fprintf(stderr, "usage: %s /dev/zdX\n", argv[0]);
return (1); return (1);
} }
@ -72,9 +72,10 @@ main(int argc, const char *const *argv)
return (1); return (1);
} }
unsigned int dev_part = minor(sb.st_rdev) % ZVOL_MINORS; const char *dev_part = strrchr(dev_name, 'p');
if (dev_part != 0) if (dev_part != NULL) {
sprintf(zvol_name + strlen(zvol_name), "-part%u", dev_part); sprintf(zvol_name + strlen(zvol_name), "-part%s", dev_part + 1);
}
for (size_t i = 0; i < strlen(zvol_name); ++i) for (size_t i = 0; i < strlen(zvol_name); ++i)
if (isblank(zvol_name[i])) if (isblank(zvol_name[i]))