Merge pull request #158 from truenas/zfs-2.2-release-cobia-rc1
Sync with upstream zfs-2.2-release branch
This commit is contained in:
commit
f8c61e8326
|
@ -79,6 +79,7 @@
|
||||||
#include <sys/dsl_crypt.h>
|
#include <sys/dsl_crypt.h>
|
||||||
#include <sys/dsl_scan.h>
|
#include <sys/dsl_scan.h>
|
||||||
#include <sys/btree.h>
|
#include <sys/btree.h>
|
||||||
|
#include <sys/brt.h>
|
||||||
#include <zfs_comutil.h>
|
#include <zfs_comutil.h>
|
||||||
#include <sys/zstd/zstd.h>
|
#include <sys/zstd/zstd.h>
|
||||||
|
|
||||||
|
@ -5342,12 +5343,20 @@ static const char *zdb_ot_extname[] = {
|
||||||
#define ZB_TOTAL DN_MAX_LEVELS
|
#define ZB_TOTAL DN_MAX_LEVELS
|
||||||
#define SPA_MAX_FOR_16M (SPA_MAXBLOCKSHIFT+1)
|
#define SPA_MAX_FOR_16M (SPA_MAXBLOCKSHIFT+1)
|
||||||
|
|
||||||
|
typedef struct zdb_brt_entry {
|
||||||
|
dva_t zbre_dva;
|
||||||
|
uint64_t zbre_refcount;
|
||||||
|
avl_node_t zbre_node;
|
||||||
|
} zdb_brt_entry_t;
|
||||||
|
|
||||||
typedef struct zdb_cb {
|
typedef struct zdb_cb {
|
||||||
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
|
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
|
||||||
uint64_t zcb_removing_size;
|
uint64_t zcb_removing_size;
|
||||||
uint64_t zcb_checkpoint_size;
|
uint64_t zcb_checkpoint_size;
|
||||||
uint64_t zcb_dedup_asize;
|
uint64_t zcb_dedup_asize;
|
||||||
uint64_t zcb_dedup_blocks;
|
uint64_t zcb_dedup_blocks;
|
||||||
|
uint64_t zcb_clone_asize;
|
||||||
|
uint64_t zcb_clone_blocks;
|
||||||
uint64_t zcb_psize_count[SPA_MAX_FOR_16M];
|
uint64_t zcb_psize_count[SPA_MAX_FOR_16M];
|
||||||
uint64_t zcb_lsize_count[SPA_MAX_FOR_16M];
|
uint64_t zcb_lsize_count[SPA_MAX_FOR_16M];
|
||||||
uint64_t zcb_asize_count[SPA_MAX_FOR_16M];
|
uint64_t zcb_asize_count[SPA_MAX_FOR_16M];
|
||||||
|
@ -5368,6 +5377,8 @@ typedef struct zdb_cb {
|
||||||
int zcb_haderrors;
|
int zcb_haderrors;
|
||||||
spa_t *zcb_spa;
|
spa_t *zcb_spa;
|
||||||
uint32_t **zcb_vd_obsolete_counts;
|
uint32_t **zcb_vd_obsolete_counts;
|
||||||
|
avl_tree_t zcb_brt;
|
||||||
|
boolean_t zcb_brt_is_active;
|
||||||
} zdb_cb_t;
|
} zdb_cb_t;
|
||||||
|
|
||||||
/* test if two DVA offsets from same vdev are within the same metaslab */
|
/* test if two DVA offsets from same vdev are within the same metaslab */
|
||||||
|
@ -5662,6 +5673,45 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||||
zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp);
|
zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp);
|
||||||
zcb->zcb_asize_total += BP_GET_ASIZE(bp);
|
zcb->zcb_asize_total += BP_GET_ASIZE(bp);
|
||||||
|
|
||||||
|
if (zcb->zcb_brt_is_active && brt_maybe_exists(zcb->zcb_spa, bp)) {
|
||||||
|
/*
|
||||||
|
* Cloned blocks are special. We need to count them, so we can
|
||||||
|
* later uncount them when reporting leaked space, and we must
|
||||||
|
* only claim them them once.
|
||||||
|
*
|
||||||
|
* To do this, we keep our own in-memory BRT. For each block
|
||||||
|
* we haven't seen before, we look it up in the real BRT and
|
||||||
|
* if its there, we note it and its refcount then proceed as
|
||||||
|
* normal. If we see the block again, we count it as a clone
|
||||||
|
* and then give it no further consideration.
|
||||||
|
*/
|
||||||
|
zdb_brt_entry_t zbre_search, *zbre;
|
||||||
|
avl_index_t where;
|
||||||
|
|
||||||
|
zbre_search.zbre_dva = bp->blk_dva[0];
|
||||||
|
zbre = avl_find(&zcb->zcb_brt, &zbre_search, &where);
|
||||||
|
if (zbre != NULL) {
|
||||||
|
zcb->zcb_clone_asize += BP_GET_ASIZE(bp);
|
||||||
|
zcb->zcb_clone_blocks++;
|
||||||
|
|
||||||
|
zbre->zbre_refcount--;
|
||||||
|
if (zbre->zbre_refcount == 0) {
|
||||||
|
avl_remove(&zcb->zcb_brt, zbre);
|
||||||
|
umem_free(zbre, sizeof (zdb_brt_entry_t));
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t crefcnt = brt_entry_get_refcount(zcb->zcb_spa, bp);
|
||||||
|
if (crefcnt > 0) {
|
||||||
|
zbre = umem_zalloc(sizeof (zdb_brt_entry_t),
|
||||||
|
UMEM_NOFAIL);
|
||||||
|
zbre->zbre_dva = bp->blk_dva[0];
|
||||||
|
zbre->zbre_refcount = crefcnt;
|
||||||
|
avl_insert(&zcb->zcb_brt, zbre, where);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (dump_opt['L'])
|
if (dump_opt['L'])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -6664,6 +6714,20 @@ deleted_livelists_dump_mos(spa_t *spa)
|
||||||
iterate_deleted_livelists(spa, dump_livelist_cb, NULL);
|
iterate_deleted_livelists(spa, dump_livelist_cb, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
zdb_brt_entry_compare(const void *zcn1, const void *zcn2)
|
||||||
|
{
|
||||||
|
const dva_t *dva1 = &((const zdb_brt_entry_t *)zcn1)->zbre_dva;
|
||||||
|
const dva_t *dva2 = &((const zdb_brt_entry_t *)zcn2)->zbre_dva;
|
||||||
|
int cmp;
|
||||||
|
|
||||||
|
cmp = TREE_CMP(DVA_GET_VDEV(dva1), DVA_GET_VDEV(dva2));
|
||||||
|
if (cmp == 0)
|
||||||
|
cmp = TREE_CMP(DVA_GET_OFFSET(dva1), DVA_GET_OFFSET(dva2));
|
||||||
|
|
||||||
|
return (cmp);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
dump_block_stats(spa_t *spa)
|
dump_block_stats(spa_t *spa)
|
||||||
{
|
{
|
||||||
|
@ -6678,6 +6742,13 @@ dump_block_stats(spa_t *spa)
|
||||||
|
|
||||||
zcb = umem_zalloc(sizeof (zdb_cb_t), UMEM_NOFAIL);
|
zcb = umem_zalloc(sizeof (zdb_cb_t), UMEM_NOFAIL);
|
||||||
|
|
||||||
|
if (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) {
|
||||||
|
avl_create(&zcb->zcb_brt, zdb_brt_entry_compare,
|
||||||
|
sizeof (zdb_brt_entry_t),
|
||||||
|
offsetof(zdb_brt_entry_t, zbre_node));
|
||||||
|
zcb->zcb_brt_is_active = B_TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
|
(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
|
||||||
(dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
|
(dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
|
||||||
(dump_opt['c'] == 1) ? "metadata " : "",
|
(dump_opt['c'] == 1) ? "metadata " : "",
|
||||||
|
@ -6779,7 +6850,8 @@ dump_block_stats(spa_t *spa)
|
||||||
metaslab_class_get_alloc(spa_special_class(spa)) +
|
metaslab_class_get_alloc(spa_special_class(spa)) +
|
||||||
metaslab_class_get_alloc(spa_dedup_class(spa)) +
|
metaslab_class_get_alloc(spa_dedup_class(spa)) +
|
||||||
get_unflushed_alloc_space(spa);
|
get_unflushed_alloc_space(spa);
|
||||||
total_found = tzb->zb_asize - zcb->zcb_dedup_asize +
|
total_found =
|
||||||
|
tzb->zb_asize - zcb->zcb_dedup_asize - zcb->zcb_clone_asize +
|
||||||
zcb->zcb_removing_size + zcb->zcb_checkpoint_size;
|
zcb->zcb_removing_size + zcb->zcb_checkpoint_size;
|
||||||
|
|
||||||
if (total_found == total_alloc && !dump_opt['L']) {
|
if (total_found == total_alloc && !dump_opt['L']) {
|
||||||
|
@ -6820,6 +6892,9 @@ dump_block_stats(spa_t *spa)
|
||||||
"bp deduped:", (u_longlong_t)zcb->zcb_dedup_asize,
|
"bp deduped:", (u_longlong_t)zcb->zcb_dedup_asize,
|
||||||
(u_longlong_t)zcb->zcb_dedup_blocks,
|
(u_longlong_t)zcb->zcb_dedup_blocks,
|
||||||
(double)zcb->zcb_dedup_asize / tzb->zb_asize + 1.0);
|
(double)zcb->zcb_dedup_asize / tzb->zb_asize + 1.0);
|
||||||
|
(void) printf("\t%-16s %14llu count: %6llu\n",
|
||||||
|
"bp cloned:", (u_longlong_t)zcb->zcb_clone_asize,
|
||||||
|
(u_longlong_t)zcb->zcb_clone_blocks);
|
||||||
(void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:",
|
(void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:",
|
||||||
(u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
|
(u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# Turn off disk's enclosure slot if it becomes FAULTED.
|
||||||
|
#
|
||||||
|
# Bad SCSI disks can often "disappear and reappear" causing all sorts of chaos
|
||||||
|
# as they flip between FAULTED and ONLINE. If
|
||||||
|
# ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT is set in zed.rc, and the disk gets
|
||||||
|
# FAULTED, then power down the slot via sysfs:
|
||||||
|
#
|
||||||
|
# /sys/class/enclosure/<enclosure>/<slot>/power_status
|
||||||
|
#
|
||||||
|
# We assume the user will be responsible for turning the slot back on again.
|
||||||
|
#
|
||||||
|
# Note that this script requires that your enclosure be supported by the
|
||||||
|
# Linux SCSI Enclosure services (SES) driver. The script will do nothing
|
||||||
|
# if you have no enclosure, or if your enclosure isn't supported.
|
||||||
|
#
|
||||||
|
# Exit codes:
|
||||||
|
# 0: slot successfully powered off
|
||||||
|
# 1: enclosure not available
|
||||||
|
# 2: ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT disabled
|
||||||
|
# 3: vdev was not FAULTED
|
||||||
|
# 4: The enclosure sysfs path passed from ZFS does not exist
|
||||||
|
# 5: Enclosure slot didn't actually turn off after we told it to
|
||||||
|
|
||||||
|
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||||
|
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||||
|
|
||||||
|
if [ ! -d /sys/class/enclosure ] ; then
|
||||||
|
# No JBOD enclosure or NVMe slots
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "${ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT}" != "1" ] ; then
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$ZEVENT_VDEV_STATE_STR" != "FAULTED" ] ; then
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status" ] ; then
|
||||||
|
exit 4
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "off" | tee "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status"
|
||||||
|
|
||||||
|
# Wait for sysfs for report that the slot is off. It can take ~400ms on some
|
||||||
|
# enclosures.
|
||||||
|
for i in $(seq 1 20) ; do
|
||||||
|
if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" == "off" ] ; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 0.1
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" != "off" ] ; then
|
||||||
|
exit 5
|
||||||
|
fi
|
||||||
|
|
||||||
|
zed_log_msg "powered down slot $ZEVENT_VDEV_ENC_SYSFS_PATH for $ZEVENT_VDEV_PATH"
|
|
@ -142,3 +142,8 @@ ZED_SYSLOG_SUBCLASS_EXCLUDE="history_event"
|
||||||
# Disabled by default, 1 to enable and 0 to disable.
|
# Disabled by default, 1 to enable and 0 to disable.
|
||||||
#ZED_SYSLOG_DISPLAY_GUIDS=1
|
#ZED_SYSLOG_DISPLAY_GUIDS=1
|
||||||
|
|
||||||
|
##
|
||||||
|
# Power off the drive's slot in the enclosure if it becomes FAULTED. This can
|
||||||
|
# help silence misbehaving drives. This assumes your drive enclosure fully
|
||||||
|
# supports slot power control via sysfs.
|
||||||
|
#ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT=1
|
||||||
|
|
|
@ -2412,7 +2412,6 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
ASSERT3P(lwb, !=, NULL);
|
ASSERT3P(lwb, !=, NULL);
|
||||||
ASSERT3P(zio, !=, NULL);
|
|
||||||
ASSERT3U(size, !=, 0);
|
ASSERT3U(size, !=, 0);
|
||||||
|
|
||||||
ztest_object_lock(zd, object, RL_READER);
|
ztest_object_lock(zd, object, RL_READER);
|
||||||
|
@ -2446,6 +2445,7 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
||||||
DMU_READ_NO_PREFETCH);
|
DMU_READ_NO_PREFETCH);
|
||||||
ASSERT0(error);
|
ASSERT0(error);
|
||||||
} else {
|
} else {
|
||||||
|
ASSERT3P(zio, !=, NULL);
|
||||||
size = doi.doi_data_block_size;
|
size = doi.doi_data_block_size;
|
||||||
if (ISP2(size)) {
|
if (ISP2(size)) {
|
||||||
offset = P2ALIGN(offset, size);
|
offset = P2ALIGN(offset, size);
|
||||||
|
|
|
@ -12,6 +12,7 @@ ExecStart=/bin/sh -c '
|
||||||
decode_root_args || exit 0; \
|
decode_root_args || exit 0; \
|
||||||
[ "$root" = "zfs:AUTO" ] && root="$(@sbindir@/zpool list -H -o bootfs | grep -m1 -vFx -)"; \
|
[ "$root" = "zfs:AUTO" ] && root="$(@sbindir@/zpool list -H -o bootfs | grep -m1 -vFx -)"; \
|
||||||
rootflags="$(getarg rootflags=)"; \
|
rootflags="$(getarg rootflags=)"; \
|
||||||
|
[ "$(@sbindir@/zfs get -H -o value mountpoint "$root")" = legacy ] || \
|
||||||
case ",$rootflags," in \
|
case ",$rootflags," in \
|
||||||
*,zfsutil,*) ;; \
|
*,zfsutil,*) ;; \
|
||||||
,,) rootflags=zfsutil ;; \
|
,,) rootflags=zfsutil ;; \
|
||||||
|
|
|
@ -198,6 +198,14 @@ extern uint64_t spl_kmem_cache_entry_size(kmem_cache_t *cache);
|
||||||
spl_kmem_cache_create(name, size, align, ctor, dtor, rclm, priv, vmp, fl)
|
spl_kmem_cache_create(name, size, align, ctor, dtor, rclm, priv, vmp, fl)
|
||||||
#define kmem_cache_set_move(skc, move) spl_kmem_cache_set_move(skc, move)
|
#define kmem_cache_set_move(skc, move) spl_kmem_cache_set_move(skc, move)
|
||||||
#define kmem_cache_destroy(skc) spl_kmem_cache_destroy(skc)
|
#define kmem_cache_destroy(skc) spl_kmem_cache_destroy(skc)
|
||||||
|
/*
|
||||||
|
* This is necessary to be compatible with other kernel modules
|
||||||
|
* or in-tree filesystem that may define kmem_cache_alloc,
|
||||||
|
* like bcachefs does it now.
|
||||||
|
*/
|
||||||
|
#ifdef kmem_cache_alloc
|
||||||
|
#undef kmem_cache_alloc
|
||||||
|
#endif
|
||||||
#define kmem_cache_alloc(skc, flags) spl_kmem_cache_alloc(skc, flags)
|
#define kmem_cache_alloc(skc, flags) spl_kmem_cache_alloc(skc, flags)
|
||||||
#define kmem_cache_free(skc, obj) spl_kmem_cache_free(skc, obj)
|
#define kmem_cache_free(skc, obj) spl_kmem_cache_free(skc, obj)
|
||||||
#define kmem_cache_reap_now(skc) spl_kmem_cache_reap_now(skc)
|
#define kmem_cache_reap_now(skc) spl_kmem_cache_reap_now(skc)
|
||||||
|
|
|
@ -36,6 +36,7 @@ extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern boolean_t brt_entry_decref(spa_t *spa, const blkptr_t *bp);
|
extern boolean_t brt_entry_decref(spa_t *spa, const blkptr_t *bp);
|
||||||
|
extern uint64_t brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp);
|
||||||
|
|
||||||
extern uint64_t brt_get_dspace(spa_t *spa);
|
extern uint64_t brt_get_dspace(spa_t *spa);
|
||||||
extern uint64_t brt_get_used(spa_t *spa);
|
extern uint64_t brt_get_used(spa_t *spa);
|
||||||
|
|
|
@ -572,11 +572,15 @@ int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
int dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
|
int dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
uint64_t length, int read, const void *tag, int *numbufsp,
|
uint64_t length, int read, const void *tag, int *numbufsp,
|
||||||
dmu_buf_t ***dbpp);
|
dmu_buf_t ***dbpp);
|
||||||
|
int dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
|
const void *tag, dmu_buf_t **dbp);
|
||||||
int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
|
int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
|
||||||
const void *tag, dmu_buf_t **dbp, int flags);
|
const void *tag, dmu_buf_t **dbp, int flags);
|
||||||
int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
|
int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
|
||||||
uint64_t length, boolean_t read, const void *tag, int *numbufsp,
|
uint64_t length, boolean_t read, const void *tag, int *numbufsp,
|
||||||
dmu_buf_t ***dbpp, uint32_t flags);
|
dmu_buf_t ***dbpp, uint32_t flags);
|
||||||
|
int dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset, const void *tag,
|
||||||
|
dmu_buf_t **dbp);
|
||||||
/*
|
/*
|
||||||
* Add a reference to a dmu buffer that has already been held via
|
* Add a reference to a dmu buffer that has already been held via
|
||||||
* dmu_buf_hold() in the current context.
|
* dmu_buf_hold() in the current context.
|
||||||
|
|
|
@ -247,8 +247,6 @@ typedef struct dmu_sendstatus {
|
||||||
|
|
||||||
void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
|
void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
|
||||||
void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);
|
void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);
|
||||||
int dmu_buf_hold_noread(objset_t *, uint64_t, uint64_t,
|
|
||||||
const void *, dmu_buf_t **);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -80,7 +80,6 @@ uint64_t metaslab_largest_allocatable(metaslab_t *);
|
||||||
#define METASLAB_ASYNC_ALLOC 0x8
|
#define METASLAB_ASYNC_ALLOC 0x8
|
||||||
#define METASLAB_DONT_THROTTLE 0x10
|
#define METASLAB_DONT_THROTTLE 0x10
|
||||||
#define METASLAB_MUST_RESERVE 0x20
|
#define METASLAB_MUST_RESERVE 0x20
|
||||||
#define METASLAB_FASTWRITE 0x40
|
|
||||||
#define METASLAB_ZIL 0x80
|
#define METASLAB_ZIL 0x80
|
||||||
|
|
||||||
int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t,
|
int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t,
|
||||||
|
@ -96,8 +95,6 @@ void metaslab_unalloc_dva(spa_t *, const dva_t *, uint64_t);
|
||||||
int metaslab_claim(spa_t *, const blkptr_t *, uint64_t);
|
int metaslab_claim(spa_t *, const blkptr_t *, uint64_t);
|
||||||
int metaslab_claim_impl(vdev_t *, uint64_t, uint64_t, uint64_t);
|
int metaslab_claim_impl(vdev_t *, uint64_t, uint64_t, uint64_t);
|
||||||
void metaslab_check_free(spa_t *, const blkptr_t *);
|
void metaslab_check_free(spa_t *, const blkptr_t *);
|
||||||
void metaslab_fastwrite_mark(spa_t *, const blkptr_t *);
|
|
||||||
void metaslab_fastwrite_unmark(spa_t *, const blkptr_t *);
|
|
||||||
|
|
||||||
void metaslab_stat_init(void);
|
void metaslab_stat_init(void);
|
||||||
void metaslab_stat_fini(void);
|
void metaslab_stat_fini(void);
|
||||||
|
|
|
@ -313,7 +313,7 @@ struct metaslab_group {
|
||||||
* Each metaslab maintains a set of in-core trees to track metaslab
|
* Each metaslab maintains a set of in-core trees to track metaslab
|
||||||
* operations. The in-core free tree (ms_allocatable) contains the list of
|
* operations. The in-core free tree (ms_allocatable) contains the list of
|
||||||
* free segments which are eligible for allocation. As blocks are
|
* free segments which are eligible for allocation. As blocks are
|
||||||
* allocated, the allocated segment are removed from the ms_allocatable and
|
* allocated, the allocated segments are removed from the ms_allocatable and
|
||||||
* added to a per txg allocation tree (ms_allocating). As blocks are
|
* added to a per txg allocation tree (ms_allocating). As blocks are
|
||||||
* freed, they are added to the free tree (ms_freeing). These trees
|
* freed, they are added to the free tree (ms_freeing). These trees
|
||||||
* allow us to process all allocations and frees in syncing context
|
* allow us to process all allocations and frees in syncing context
|
||||||
|
@ -366,9 +366,9 @@ struct metaslab_group {
|
||||||
struct metaslab {
|
struct metaslab {
|
||||||
/*
|
/*
|
||||||
* This is the main lock of the metaslab and its purpose is to
|
* This is the main lock of the metaslab and its purpose is to
|
||||||
* coordinate our allocations and frees [e.g metaslab_block_alloc(),
|
* coordinate our allocations and frees [e.g., metaslab_block_alloc(),
|
||||||
* metaslab_free_concrete(), ..etc] with our various syncing
|
* metaslab_free_concrete(), ..etc] with our various syncing
|
||||||
* procedures [e.g. metaslab_sync(), metaslab_sync_done(), ..etc].
|
* procedures [e.g., metaslab_sync(), metaslab_sync_done(), ..etc].
|
||||||
*
|
*
|
||||||
* The lock is also used during some miscellaneous operations like
|
* The lock is also used during some miscellaneous operations like
|
||||||
* using the metaslab's histogram for the metaslab group's histogram
|
* using the metaslab's histogram for the metaslab group's histogram
|
||||||
|
|
|
@ -266,7 +266,6 @@ struct vdev {
|
||||||
metaslab_group_t *vdev_mg; /* metaslab group */
|
metaslab_group_t *vdev_mg; /* metaslab group */
|
||||||
metaslab_group_t *vdev_log_mg; /* embedded slog metaslab group */
|
metaslab_group_t *vdev_log_mg; /* embedded slog metaslab group */
|
||||||
metaslab_t **vdev_ms; /* metaslab array */
|
metaslab_t **vdev_ms; /* metaslab array */
|
||||||
uint64_t vdev_pending_fastwrite; /* allocated fastwrites */
|
|
||||||
txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
|
txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
|
||||||
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
|
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
|
||||||
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
|
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
|
||||||
|
|
|
@ -38,14 +38,22 @@ extern "C" {
|
||||||
/*
|
/*
|
||||||
* Possible states for a given lwb structure.
|
* Possible states for a given lwb structure.
|
||||||
*
|
*
|
||||||
* An lwb will start out in the "closed" state, and then transition to
|
* An lwb will start out in the "new" state, and transition to the "opened"
|
||||||
* the "opened" state via a call to zil_lwb_write_open(). When
|
* state via a call to zil_lwb_write_open() on first itx assignment. When
|
||||||
* transitioning from "closed" to "opened" the zilog's "zl_issuer_lock"
|
* transitioning from "new" to "opened" the zilog's "zl_issuer_lock" must be
|
||||||
* must be held.
|
* held.
|
||||||
*
|
*
|
||||||
* After the lwb is "opened", it can transition into the "issued" state
|
* After the lwb is "opened", it can be assigned number of itxs and transition
|
||||||
* via zil_lwb_write_close(). Again, the zilog's "zl_issuer_lock" must
|
* into the "closed" state via zil_lwb_write_close() when full or on timeout.
|
||||||
* be held when making this transition.
|
* When transitioning from "opened" to "closed" the zilog's "zl_issuer_lock"
|
||||||
|
* must be held. New lwb allocation also takes "zl_lock" to protect the list.
|
||||||
|
*
|
||||||
|
* After the lwb is "closed", it can transition into the "ready" state via
|
||||||
|
* zil_lwb_write_issue(). "zl_lock" must be held when making this transition.
|
||||||
|
* Since it is done by the same thread, "zl_issuer_lock" is not needed.
|
||||||
|
*
|
||||||
|
* When lwb in "ready" state receives its block pointer, it can transition to
|
||||||
|
* "issued". "zl_lock" must be held when making this transition.
|
||||||
*
|
*
|
||||||
* After the lwb's write zio completes, it transitions into the "write
|
* After the lwb's write zio completes, it transitions into the "write
|
||||||
* done" state via zil_lwb_write_done(); and then into the "flush done"
|
* done" state via zil_lwb_write_done(); and then into the "flush done"
|
||||||
|
@ -62,17 +70,20 @@ extern "C" {
|
||||||
*
|
*
|
||||||
* Additionally, correctness when reading an lwb's state is often
|
* Additionally, correctness when reading an lwb's state is often
|
||||||
* achieved by exploiting the fact that these state transitions occur in
|
* achieved by exploiting the fact that these state transitions occur in
|
||||||
* this specific order; i.e. "closed" to "opened" to "issued" to "done".
|
* this specific order; i.e. "new" to "opened" to "closed" to "ready" to
|
||||||
|
* "issued" to "write_done" and finally "flush_done".
|
||||||
*
|
*
|
||||||
* Thus, if an lwb is in the "closed" or "opened" state, holding the
|
* Thus, if an lwb is in the "new" or "opened" state, holding the
|
||||||
* "zl_issuer_lock" will prevent a concurrent thread from transitioning
|
* "zl_issuer_lock" will prevent a concurrent thread from transitioning
|
||||||
* that lwb to the "issued" state. Likewise, if an lwb is already in the
|
* that lwb to the "closed" state. Likewise, if an lwb is already in the
|
||||||
* "issued" state, holding the "zl_lock" will prevent a concurrent
|
* "ready" state, holding the "zl_lock" will prevent a concurrent thread
|
||||||
* thread from transitioning that lwb to the "write done" state.
|
* from transitioning that lwb to the "issued" state.
|
||||||
*/
|
*/
|
||||||
typedef enum {
|
typedef enum {
|
||||||
LWB_STATE_CLOSED,
|
LWB_STATE_NEW,
|
||||||
LWB_STATE_OPENED,
|
LWB_STATE_OPENED,
|
||||||
|
LWB_STATE_CLOSED,
|
||||||
|
LWB_STATE_READY,
|
||||||
LWB_STATE_ISSUED,
|
LWB_STATE_ISSUED,
|
||||||
LWB_STATE_WRITE_DONE,
|
LWB_STATE_WRITE_DONE,
|
||||||
LWB_STATE_FLUSH_DONE,
|
LWB_STATE_FLUSH_DONE,
|
||||||
|
@ -91,18 +102,21 @@ typedef enum {
|
||||||
typedef struct lwb {
|
typedef struct lwb {
|
||||||
zilog_t *lwb_zilog; /* back pointer to log struct */
|
zilog_t *lwb_zilog; /* back pointer to log struct */
|
||||||
blkptr_t lwb_blk; /* on disk address of this log blk */
|
blkptr_t lwb_blk; /* on disk address of this log blk */
|
||||||
boolean_t lwb_fastwrite; /* is blk marked for fastwrite? */
|
boolean_t lwb_slim; /* log block has slim format */
|
||||||
boolean_t lwb_slog; /* lwb_blk is on SLOG device */
|
boolean_t lwb_slog; /* lwb_blk is on SLOG device */
|
||||||
boolean_t lwb_indirect; /* do not postpone zil_lwb_commit() */
|
int lwb_error; /* log block allocation error */
|
||||||
|
int lwb_nmax; /* max bytes in the buffer */
|
||||||
int lwb_nused; /* # used bytes in buffer */
|
int lwb_nused; /* # used bytes in buffer */
|
||||||
int lwb_nfilled; /* # filled bytes in buffer */
|
int lwb_nfilled; /* # filled bytes in buffer */
|
||||||
int lwb_sz; /* size of block and buffer */
|
int lwb_sz; /* size of block and buffer */
|
||||||
lwb_state_t lwb_state; /* the state of this lwb */
|
lwb_state_t lwb_state; /* the state of this lwb */
|
||||||
char *lwb_buf; /* log write buffer */
|
char *lwb_buf; /* log write buffer */
|
||||||
|
zio_t *lwb_child_zio; /* parent zio for children */
|
||||||
zio_t *lwb_write_zio; /* zio for the lwb buffer */
|
zio_t *lwb_write_zio; /* zio for the lwb buffer */
|
||||||
zio_t *lwb_root_zio; /* root zio for lwb write and flushes */
|
zio_t *lwb_root_zio; /* root zio for lwb write and flushes */
|
||||||
hrtime_t lwb_issued_timestamp; /* when was the lwb issued? */
|
hrtime_t lwb_issued_timestamp; /* when was the lwb issued? */
|
||||||
uint64_t lwb_issued_txg; /* the txg when the write is issued */
|
uint64_t lwb_issued_txg; /* the txg when the write is issued */
|
||||||
|
uint64_t lwb_alloc_txg; /* the txg when lwb_blk is allocated */
|
||||||
uint64_t lwb_max_txg; /* highest txg in this lwb */
|
uint64_t lwb_max_txg; /* highest txg in this lwb */
|
||||||
list_node_t lwb_node; /* zilog->zl_lwb_list linkage */
|
list_node_t lwb_node; /* zilog->zl_lwb_list linkage */
|
||||||
list_node_t lwb_issue_node; /* linkage of lwbs ready for issue */
|
list_node_t lwb_issue_node; /* linkage of lwbs ready for issue */
|
||||||
|
|
|
@ -222,7 +222,6 @@ typedef uint64_t zio_flag_t;
|
||||||
#define ZIO_FLAG_NOPWRITE (1ULL << 28)
|
#define ZIO_FLAG_NOPWRITE (1ULL << 28)
|
||||||
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
|
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
|
||||||
#define ZIO_FLAG_DELEGATED (1ULL << 30)
|
#define ZIO_FLAG_DELEGATED (1ULL << 30)
|
||||||
#define ZIO_FLAG_FASTWRITE (1ULL << 31)
|
|
||||||
|
|
||||||
#define ZIO_FLAG_MUSTSUCCEED 0
|
#define ZIO_FLAG_MUSTSUCCEED 0
|
||||||
#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
|
#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
|
||||||
|
|
|
@ -57,7 +57,7 @@ libzfs_la_LIBADD = \
|
||||||
libzutil.la \
|
libzutil.la \
|
||||||
libuutil.la
|
libuutil.la
|
||||||
|
|
||||||
libzfs_la_LIBADD += -lm $(LIBCRYPTO_LIBS) $(ZLIB_LIBS) $(LIBFETCH_LIBS) $(LTLIBINTL)
|
libzfs_la_LIBADD += -lrt -lm $(LIBCRYPTO_LIBS) $(ZLIB_LIBS) $(LIBFETCH_LIBS) $(LTLIBINTL)
|
||||||
|
|
||||||
libzfs_la_LDFLAGS = -pthread
|
libzfs_la_LDFLAGS = -pthread
|
||||||
|
|
||||||
|
|
|
@ -3928,6 +3928,12 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
|
||||||
|
|
||||||
switch (errno) {
|
switch (errno) {
|
||||||
|
|
||||||
|
case EALREADY:
|
||||||
|
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||||
|
"removal for this vdev is already in progress."));
|
||||||
|
(void) zfs_error(hdl, EZFS_BUSY, errbuf);
|
||||||
|
break;
|
||||||
|
|
||||||
case EINVAL:
|
case EINVAL:
|
||||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||||
"invalid config; all top-level vdevs must "
|
"invalid config; all top-level vdevs must "
|
||||||
|
|
|
@ -928,6 +928,39 @@ zfs_send_progress(zfs_handle_t *zhp, int fd, uint64_t *bytes_written,
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static volatile boolean_t send_progress_thread_signal_duetotimer;
|
||||||
|
static void
|
||||||
|
send_progress_thread_act(int sig, siginfo_t *info, void *ucontext)
|
||||||
|
{
|
||||||
|
(void) sig, (void) ucontext;
|
||||||
|
send_progress_thread_signal_duetotimer = info->si_code == SI_TIMER;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct timer_desirability {
|
||||||
|
timer_t timer;
|
||||||
|
boolean_t desired;
|
||||||
|
};
|
||||||
|
static void
|
||||||
|
timer_delete_cleanup(void *timer)
|
||||||
|
{
|
||||||
|
struct timer_desirability *td = timer;
|
||||||
|
if (td->desired)
|
||||||
|
timer_delete(td->timer);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SIGINFO
|
||||||
|
#define SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO sigaddset(&new, SIGINFO)
|
||||||
|
#else
|
||||||
|
#define SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO
|
||||||
|
#endif
|
||||||
|
#define SEND_PROGRESS_THREAD_PARENT_BLOCK(old) { \
|
||||||
|
sigset_t new; \
|
||||||
|
sigemptyset(&new); \
|
||||||
|
sigaddset(&new, SIGUSR1); \
|
||||||
|
SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO; \
|
||||||
|
pthread_sigmask(SIG_BLOCK, &new, old); \
|
||||||
|
}
|
||||||
|
|
||||||
static void *
|
static void *
|
||||||
send_progress_thread(void *arg)
|
send_progress_thread(void *arg)
|
||||||
{
|
{
|
||||||
|
@ -941,6 +974,26 @@ send_progress_thread(void *arg)
|
||||||
struct tm tm;
|
struct tm tm;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
const struct sigaction signal_action =
|
||||||
|
{.sa_sigaction = send_progress_thread_act, .sa_flags = SA_SIGINFO};
|
||||||
|
struct sigevent timer_cfg =
|
||||||
|
{.sigev_notify = SIGEV_SIGNAL, .sigev_signo = SIGUSR1};
|
||||||
|
const struct itimerspec timer_time =
|
||||||
|
{.it_value = {.tv_sec = 1}, .it_interval = {.tv_sec = 1}};
|
||||||
|
struct timer_desirability timer = {};
|
||||||
|
|
||||||
|
sigaction(SIGUSR1, &signal_action, NULL);
|
||||||
|
#ifdef SIGINFO
|
||||||
|
sigaction(SIGINFO, &signal_action, NULL);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((timer.desired = pa->pa_progress || pa->pa_astitle)) {
|
||||||
|
if (timer_create(CLOCK_MONOTONIC, &timer_cfg, &timer.timer))
|
||||||
|
return ((void *)(uintptr_t)errno);
|
||||||
|
(void) timer_settime(timer.timer, 0, &timer_time, NULL);
|
||||||
|
}
|
||||||
|
pthread_cleanup_push(timer_delete_cleanup, &timer);
|
||||||
|
|
||||||
if (!pa->pa_parsable && pa->pa_progress) {
|
if (!pa->pa_parsable && pa->pa_progress) {
|
||||||
(void) fprintf(stderr,
|
(void) fprintf(stderr,
|
||||||
"TIME %s %sSNAPSHOT %s\n",
|
"TIME %s %sSNAPSHOT %s\n",
|
||||||
|
@ -953,12 +1006,12 @@ send_progress_thread(void *arg)
|
||||||
* Print the progress from ZFS_IOC_SEND_PROGRESS every second.
|
* Print the progress from ZFS_IOC_SEND_PROGRESS every second.
|
||||||
*/
|
*/
|
||||||
for (;;) {
|
for (;;) {
|
||||||
(void) sleep(1);
|
pause();
|
||||||
if ((err = zfs_send_progress(zhp, pa->pa_fd, &bytes,
|
if ((err = zfs_send_progress(zhp, pa->pa_fd, &bytes,
|
||||||
&blocks)) != 0) {
|
&blocks)) != 0) {
|
||||||
if (err == EINTR || err == ENOENT)
|
if (err == EINTR || err == ENOENT)
|
||||||
return ((void *)0);
|
err = 0;
|
||||||
return ((void *)(uintptr_t)err);
|
pthread_exit(((void *)(uintptr_t)err));
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) time(&t);
|
(void) time(&t);
|
||||||
|
@ -991,21 +1044,25 @@ send_progress_thread(void *arg)
|
||||||
(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
|
(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
|
||||||
tm.tm_hour, tm.tm_min, tm.tm_sec,
|
tm.tm_hour, tm.tm_min, tm.tm_sec,
|
||||||
(u_longlong_t)bytes, zhp->zfs_name);
|
(u_longlong_t)bytes, zhp->zfs_name);
|
||||||
} else if (pa->pa_progress) {
|
} else if (pa->pa_progress ||
|
||||||
|
!send_progress_thread_signal_duetotimer) {
|
||||||
zfs_nicebytes(bytes, buf, sizeof (buf));
|
zfs_nicebytes(bytes, buf, sizeof (buf));
|
||||||
(void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n",
|
(void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n",
|
||||||
tm.tm_hour, tm.tm_min, tm.tm_sec,
|
tm.tm_hour, tm.tm_min, tm.tm_sec,
|
||||||
buf, zhp->zfs_name);
|
buf, zhp->zfs_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
pthread_cleanup_pop(B_TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean_t
|
static boolean_t
|
||||||
send_progress_thread_exit(libzfs_handle_t *hdl, pthread_t ptid)
|
send_progress_thread_exit(
|
||||||
|
libzfs_handle_t *hdl, pthread_t ptid, sigset_t *oldmask)
|
||||||
{
|
{
|
||||||
void *status = NULL;
|
void *status = NULL;
|
||||||
(void) pthread_cancel(ptid);
|
(void) pthread_cancel(ptid);
|
||||||
(void) pthread_join(ptid, &status);
|
(void) pthread_join(ptid, &status);
|
||||||
|
pthread_sigmask(SIG_SETMASK, oldmask, NULL);
|
||||||
int error = (int)(uintptr_t)status;
|
int error = (int)(uintptr_t)status;
|
||||||
if (error != 0 && status != PTHREAD_CANCELED)
|
if (error != 0 && status != PTHREAD_CANCELED)
|
||||||
return (zfs_standard_error(hdl, error,
|
return (zfs_standard_error(hdl, error,
|
||||||
|
@ -1199,7 +1256,8 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
|
||||||
* If progress reporting is requested, spawn a new thread to
|
* If progress reporting is requested, spawn a new thread to
|
||||||
* poll ZFS_IOC_SEND_PROGRESS at a regular interval.
|
* poll ZFS_IOC_SEND_PROGRESS at a regular interval.
|
||||||
*/
|
*/
|
||||||
if (sdd->progress || sdd->progressastitle) {
|
sigset_t oldmask;
|
||||||
|
{
|
||||||
pa.pa_zhp = zhp;
|
pa.pa_zhp = zhp;
|
||||||
pa.pa_fd = sdd->outfd;
|
pa.pa_fd = sdd->outfd;
|
||||||
pa.pa_parsable = sdd->parsable;
|
pa.pa_parsable = sdd->parsable;
|
||||||
|
@ -1214,13 +1272,13 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
|
||||||
zfs_close(zhp);
|
zfs_close(zhp);
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
|
||||||
}
|
}
|
||||||
|
|
||||||
err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
|
err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
|
||||||
fromorigin, sdd->outfd, flags, sdd->debugnv);
|
fromorigin, sdd->outfd, flags, sdd->debugnv);
|
||||||
|
|
||||||
if ((sdd->progress || sdd->progressastitle) &&
|
if (send_progress_thread_exit(zhp->zfs_hdl, tid, &oldmask))
|
||||||
send_progress_thread_exit(zhp->zfs_hdl, tid))
|
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1562,8 +1620,9 @@ estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
|
||||||
progress_arg_t pa = { 0 };
|
progress_arg_t pa = { 0 };
|
||||||
int err = 0;
|
int err = 0;
|
||||||
pthread_t ptid;
|
pthread_t ptid;
|
||||||
|
sigset_t oldmask;
|
||||||
|
|
||||||
if (flags->progress || flags->progressastitle) {
|
{
|
||||||
pa.pa_zhp = zhp;
|
pa.pa_zhp = zhp;
|
||||||
pa.pa_fd = fd;
|
pa.pa_fd = fd;
|
||||||
pa.pa_parsable = flags->parsable;
|
pa.pa_parsable = flags->parsable;
|
||||||
|
@ -1577,6 +1636,7 @@ estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
|
||||||
return (zfs_error(zhp->zfs_hdl,
|
return (zfs_error(zhp->zfs_hdl,
|
||||||
EZFS_THREADCREATEFAILED, errbuf));
|
EZFS_THREADCREATEFAILED, errbuf));
|
||||||
}
|
}
|
||||||
|
SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
|
||||||
}
|
}
|
||||||
|
|
||||||
err = lzc_send_space_resume_redacted(zhp->zfs_name, from,
|
err = lzc_send_space_resume_redacted(zhp->zfs_name, from,
|
||||||
|
@ -1584,8 +1644,7 @@ estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
|
||||||
redactbook, fd, &size);
|
redactbook, fd, &size);
|
||||||
*sizep = size;
|
*sizep = size;
|
||||||
|
|
||||||
if ((flags->progress || flags->progressastitle) &&
|
if (send_progress_thread_exit(zhp->zfs_hdl, ptid, &oldmask))
|
||||||
send_progress_thread_exit(zhp->zfs_hdl, ptid))
|
|
||||||
return (-1);
|
return (-1);
|
||||||
|
|
||||||
if (!flags->progress && !flags->parsable)
|
if (!flags->progress && !flags->parsable)
|
||||||
|
@ -1876,11 +1935,12 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
|
||||||
if (!flags->dryrun) {
|
if (!flags->dryrun) {
|
||||||
progress_arg_t pa = { 0 };
|
progress_arg_t pa = { 0 };
|
||||||
pthread_t tid;
|
pthread_t tid;
|
||||||
|
sigset_t oldmask;
|
||||||
/*
|
/*
|
||||||
* If progress reporting is requested, spawn a new thread to
|
* If progress reporting is requested, spawn a new thread to
|
||||||
* poll ZFS_IOC_SEND_PROGRESS at a regular interval.
|
* poll ZFS_IOC_SEND_PROGRESS at a regular interval.
|
||||||
*/
|
*/
|
||||||
if (flags->progress || flags->progressastitle) {
|
{
|
||||||
pa.pa_zhp = zhp;
|
pa.pa_zhp = zhp;
|
||||||
pa.pa_fd = outfd;
|
pa.pa_fd = outfd;
|
||||||
pa.pa_parsable = flags->parsable;
|
pa.pa_parsable = flags->parsable;
|
||||||
|
@ -1898,6 +1958,7 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
|
||||||
zfs_close(zhp);
|
zfs_close(zhp);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
|
||||||
}
|
}
|
||||||
|
|
||||||
error = lzc_send_resume_redacted(zhp->zfs_name, fromname, outfd,
|
error = lzc_send_resume_redacted(zhp->zfs_name, fromname, outfd,
|
||||||
|
@ -1905,8 +1966,7 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
|
||||||
if (redact_book != NULL)
|
if (redact_book != NULL)
|
||||||
free(redact_book);
|
free(redact_book);
|
||||||
|
|
||||||
if ((flags->progressastitle || flags->progress) &&
|
if (send_progress_thread_exit(hdl, tid, &oldmask)) {
|
||||||
send_progress_thread_exit(hdl, tid)) {
|
|
||||||
zfs_close(zhp);
|
zfs_close(zhp);
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
|
@ -2691,7 +2751,8 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
|
||||||
* If progress reporting is requested, spawn a new thread to poll
|
* If progress reporting is requested, spawn a new thread to poll
|
||||||
* ZFS_IOC_SEND_PROGRESS at a regular interval.
|
* ZFS_IOC_SEND_PROGRESS at a regular interval.
|
||||||
*/
|
*/
|
||||||
if (flags->progress || flags->progressastitle) {
|
sigset_t oldmask;
|
||||||
|
{
|
||||||
pa.pa_zhp = zhp;
|
pa.pa_zhp = zhp;
|
||||||
pa.pa_fd = fd;
|
pa.pa_fd = fd;
|
||||||
pa.pa_parsable = flags->parsable;
|
pa.pa_parsable = flags->parsable;
|
||||||
|
@ -2708,13 +2769,13 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
|
||||||
return (zfs_error(zhp->zfs_hdl,
|
return (zfs_error(zhp->zfs_hdl,
|
||||||
EZFS_THREADCREATEFAILED, errbuf));
|
EZFS_THREADCREATEFAILED, errbuf));
|
||||||
}
|
}
|
||||||
|
SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
|
||||||
}
|
}
|
||||||
|
|
||||||
err = lzc_send_redacted(name, from, fd,
|
err = lzc_send_redacted(name, from, fd,
|
||||||
lzc_flags_from_sendflags(flags), redactbook);
|
lzc_flags_from_sendflags(flags), redactbook);
|
||||||
|
|
||||||
if ((flags->progress || flags->progressastitle) &&
|
if (send_progress_thread_exit(hdl, ptid, &oldmask))
|
||||||
send_progress_thread_exit(hdl, ptid))
|
|
||||||
return (-1);
|
return (-1);
|
||||||
|
|
||||||
if (err == 0 && (flags->props || flags->holds || flags->backup)) {
|
if (err == 0 && (flags->props || flags->holds || flags->backup)) {
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
.\" Copyright 2018 Nexenta Systems, Inc.
|
.\" Copyright 2018 Nexenta Systems, Inc.
|
||||||
.\" Copyright 2019 Joyent, Inc.
|
.\" Copyright 2019 Joyent, Inc.
|
||||||
.\"
|
.\"
|
||||||
.Dd January 12, 2023
|
.Dd July 27, 2023
|
||||||
.Dt ZFS-SEND 8
|
.Dt ZFS-SEND 8
|
||||||
.Os
|
.Os
|
||||||
.
|
.
|
||||||
|
@ -297,6 +297,12 @@ This flag can only be used in conjunction with
|
||||||
.It Fl v , -verbose
|
.It Fl v , -verbose
|
||||||
Print verbose information about the stream package generated.
|
Print verbose information about the stream package generated.
|
||||||
This information includes a per-second report of how much data has been sent.
|
This information includes a per-second report of how much data has been sent.
|
||||||
|
The same report can be requested by sending
|
||||||
|
.Dv SIGINFO
|
||||||
|
or
|
||||||
|
.Dv SIGUSR1 ,
|
||||||
|
regardless of
|
||||||
|
.Fl v .
|
||||||
.Pp
|
.Pp
|
||||||
The format of the stream is committed.
|
The format of the stream is committed.
|
||||||
You will be able to receive your streams on future versions of ZFS.
|
You will be able to receive your streams on future versions of ZFS.
|
||||||
|
@ -433,6 +439,12 @@ and the verbose output goes to standard error
|
||||||
.It Fl v , -verbose
|
.It Fl v , -verbose
|
||||||
Print verbose information about the stream package generated.
|
Print verbose information about the stream package generated.
|
||||||
This information includes a per-second report of how much data has been sent.
|
This information includes a per-second report of how much data has been sent.
|
||||||
|
The same report can be requested by sending
|
||||||
|
.Dv SIGINFO
|
||||||
|
or
|
||||||
|
.Dv SIGUSR1 ,
|
||||||
|
regardless of
|
||||||
|
.Fl v .
|
||||||
.El
|
.El
|
||||||
.It Xo
|
.It Xo
|
||||||
.Nm zfs
|
.Nm zfs
|
||||||
|
@ -669,6 +681,10 @@ ones on the source, and are ready to be used, while the parent snapshot on the
|
||||||
target contains none of the username and password data present on the source,
|
target contains none of the username and password data present on the source,
|
||||||
because it was removed by the redacted send operation.
|
because it was removed by the redacted send operation.
|
||||||
.
|
.
|
||||||
|
.Sh SIGNALS
|
||||||
|
See
|
||||||
|
.Fl v .
|
||||||
|
.
|
||||||
.Sh EXAMPLES
|
.Sh EXAMPLES
|
||||||
.\" These are, respectively, examples 12, 13 from zfs.8
|
.\" These are, respectively, examples 12, 13 from zfs.8
|
||||||
.\" Make sure to update them bidirectionally
|
.\" Make sure to update them bidirectionally
|
||||||
|
|
|
@ -6290,7 +6290,8 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
|
||||||
|
|
||||||
error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp),
|
error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp),
|
||||||
ap->a_outoffp, &len, ap->a_outcred);
|
ap->a_outoffp, &len, ap->a_outcred);
|
||||||
if (error == EXDEV || error == EOPNOTSUPP)
|
if (error == EXDEV || error == EAGAIN || error == EINVAL ||
|
||||||
|
error == EOPNOTSUPP)
|
||||||
goto bad_locked_fallback;
|
goto bad_locked_fallback;
|
||||||
*ap->a_lenp = (size_t)len;
|
*ap->a_lenp = (size_t)len;
|
||||||
out_locked:
|
out_locked:
|
||||||
|
|
|
@ -478,16 +478,18 @@ zfsctl_is_snapdir(struct inode *ip)
|
||||||
*/
|
*/
|
||||||
static struct inode *
|
static struct inode *
|
||||||
zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
|
zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
|
||||||
const struct file_operations *fops, const struct inode_operations *ops)
|
const struct file_operations *fops, const struct inode_operations *ops,
|
||||||
|
uint64_t creation)
|
||||||
{
|
{
|
||||||
inode_timespec_t now;
|
|
||||||
struct inode *ip;
|
struct inode *ip;
|
||||||
znode_t *zp;
|
znode_t *zp;
|
||||||
|
inode_timespec_t now = {.tv_sec = creation};
|
||||||
|
|
||||||
ip = new_inode(zfsvfs->z_sb);
|
ip = new_inode(zfsvfs->z_sb);
|
||||||
if (ip == NULL)
|
if (ip == NULL)
|
||||||
return (NULL);
|
return (NULL);
|
||||||
|
|
||||||
|
if (!creation)
|
||||||
now = current_time(ip);
|
now = current_time(ip);
|
||||||
zp = ITOZ(ip);
|
zp = ITOZ(ip);
|
||||||
ASSERT3P(zp->z_dirlocks, ==, NULL);
|
ASSERT3P(zp->z_dirlocks, ==, NULL);
|
||||||
|
@ -552,14 +554,28 @@ zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id,
|
||||||
const struct file_operations *fops, const struct inode_operations *ops)
|
const struct file_operations *fops, const struct inode_operations *ops)
|
||||||
{
|
{
|
||||||
struct inode *ip = NULL;
|
struct inode *ip = NULL;
|
||||||
|
uint64_t creation = 0;
|
||||||
|
dsl_dataset_t *snap_ds;
|
||||||
|
dsl_pool_t *pool;
|
||||||
|
|
||||||
while (ip == NULL) {
|
while (ip == NULL) {
|
||||||
ip = ilookup(zfsvfs->z_sb, (unsigned long)id);
|
ip = ilookup(zfsvfs->z_sb, (unsigned long)id);
|
||||||
if (ip)
|
if (ip)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
if (id <= ZFSCTL_INO_SNAPDIRS && !creation) {
|
||||||
|
pool = dmu_objset_pool(zfsvfs->z_os);
|
||||||
|
dsl_pool_config_enter(pool, FTAG);
|
||||||
|
if (!dsl_dataset_hold_obj(pool,
|
||||||
|
ZFSCTL_INO_SNAPDIRS - id, FTAG, &snap_ds)) {
|
||||||
|
creation = dsl_get_creation(snap_ds);
|
||||||
|
dsl_dataset_rele(snap_ds, FTAG);
|
||||||
|
}
|
||||||
|
dsl_pool_config_exit(pool, FTAG);
|
||||||
|
}
|
||||||
|
|
||||||
/* May fail due to concurrent zfsctl_inode_alloc() */
|
/* May fail due to concurrent zfsctl_inode_alloc() */
|
||||||
ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops);
|
ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops, creation);
|
||||||
}
|
}
|
||||||
|
|
||||||
return (ip);
|
return (ip);
|
||||||
|
@ -581,7 +597,7 @@ zfsctl_create(zfsvfs_t *zfsvfs)
|
||||||
ASSERT(zfsvfs->z_ctldir == NULL);
|
ASSERT(zfsvfs->z_ctldir == NULL);
|
||||||
|
|
||||||
zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT,
|
zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT,
|
||||||
&zpl_fops_root, &zpl_ops_root);
|
&zpl_fops_root, &zpl_ops_root, 0);
|
||||||
if (zfsvfs->z_ctldir == NULL)
|
if (zfsvfs->z_ctldir == NULL)
|
||||||
return (SET_ERROR(ENOENT));
|
return (SET_ERROR(ENOENT));
|
||||||
|
|
||||||
|
|
|
@ -103,9 +103,17 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
|
||||||
* Since Linux 5.3 the filesystem driver is responsible for executing
|
* Since Linux 5.3 the filesystem driver is responsible for executing
|
||||||
* an appropriate fallback, and a generic fallback function is provided.
|
* an appropriate fallback, and a generic fallback function is provided.
|
||||||
*/
|
*/
|
||||||
if (ret == -EOPNOTSUPP || ret == -EXDEV)
|
if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
|
||||||
|
ret == -EAGAIN)
|
||||||
ret = generic_copy_file_range(src_file, src_off, dst_file,
|
ret = generic_copy_file_range(src_file, src_off, dst_file,
|
||||||
dst_off, len, flags);
|
dst_off, len, flags);
|
||||||
|
#else
|
||||||
|
/*
|
||||||
|
* Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal
|
||||||
|
* to the kernel that it should fallback to a content copy.
|
||||||
|
*/
|
||||||
|
if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN)
|
||||||
|
ret = -EOPNOTSUPP;
|
||||||
#endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE */
|
#endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE */
|
||||||
|
|
||||||
return (ret);
|
return (ret);
|
||||||
|
|
|
@ -174,7 +174,7 @@
|
||||||
* size_t len, unsigned int flags);
|
* size_t len, unsigned int flags);
|
||||||
*
|
*
|
||||||
* Even though offsets and length represent bytes, they have to be
|
* Even though offsets and length represent bytes, they have to be
|
||||||
* block-aligned or we will return the EXDEV error so the upper layer can
|
* block-aligned or we will return an error so the upper layer can
|
||||||
* fallback to the generic mechanism that will just copy the data.
|
* fallback to the generic mechanism that will just copy the data.
|
||||||
* Using copy_file_range(2) will call OS-independent zfs_clone_range() function.
|
* Using copy_file_range(2) will call OS-independent zfs_clone_range() function.
|
||||||
* This function was implemented based on zfs_write(), but instead of writing
|
* This function was implemented based on zfs_write(), but instead of writing
|
||||||
|
@ -192,9 +192,9 @@
|
||||||
* Some special cases to consider and how we address them:
|
* Some special cases to consider and how we address them:
|
||||||
* - The block we want to clone may have been created within the same
|
* - The block we want to clone may have been created within the same
|
||||||
* transaction group that we are trying to clone. Such block has no BP
|
* transaction group that we are trying to clone. Such block has no BP
|
||||||
* allocated yet, so cannot be immediately cloned. We return EXDEV.
|
* allocated yet, so cannot be immediately cloned. We return EAGAIN.
|
||||||
* - The block we want to clone may have been modified within the same
|
* - The block we want to clone may have been modified within the same
|
||||||
* transaction group. We return EXDEV.
|
* transaction group. We return EAGAIN.
|
||||||
* - A block may be cloned multiple times during one transaction group (that's
|
* - A block may be cloned multiple times during one transaction group (that's
|
||||||
* why pending list is actually a tree and not an append-only list - this
|
* why pending list is actually a tree and not an append-only list - this
|
||||||
* way we can figure out faster if this block is cloned for the first time
|
* way we can figure out faster if this block is cloned for the first time
|
||||||
|
@ -1544,6 +1544,37 @@ out:
|
||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp)
|
||||||
|
{
|
||||||
|
brt_t *brt = spa->spa_brt;
|
||||||
|
brt_vdev_t *brtvd;
|
||||||
|
brt_entry_t bre_search, *bre;
|
||||||
|
uint64_t vdevid, refcnt;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
brt_entry_fill(bp, &bre_search, &vdevid);
|
||||||
|
|
||||||
|
brt_rlock(brt);
|
||||||
|
|
||||||
|
brtvd = brt_vdev(brt, vdevid);
|
||||||
|
ASSERT(brtvd != NULL);
|
||||||
|
|
||||||
|
bre = avl_find(&brtvd->bv_tree, &bre_search, NULL);
|
||||||
|
if (bre == NULL) {
|
||||||
|
error = brt_entry_lookup(brt, brtvd, &bre_search);
|
||||||
|
ASSERT(error == 0 || error == ENOENT);
|
||||||
|
if (error == ENOENT)
|
||||||
|
refcnt = 0;
|
||||||
|
else
|
||||||
|
refcnt = bre_search.bre_refcount;
|
||||||
|
} else
|
||||||
|
refcnt = bre->bre_refcount;
|
||||||
|
|
||||||
|
brt_unlock(brt);
|
||||||
|
return (refcnt);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
brt_prefetch(brt_t *brt, const blkptr_t *bp)
|
brt_prefetch(brt_t *brt, const blkptr_t *bp)
|
||||||
{
|
{
|
||||||
|
|
|
@ -165,7 +165,7 @@ dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
|
||||||
{ zfs_acl_byteswap, "acl" }
|
{ zfs_acl_byteswap, "acl" }
|
||||||
};
|
};
|
||||||
|
|
||||||
static int
|
int
|
||||||
dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
|
dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
|
||||||
const void *tag, dmu_buf_t **dbp)
|
const void *tag, dmu_buf_t **dbp)
|
||||||
{
|
{
|
||||||
|
@ -185,6 +185,7 @@ dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
|
||||||
*dbp = &db->db;
|
*dbp = &db->db;
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
const void *tag, dmu_buf_t **dbp)
|
const void *tag, dmu_buf_t **dbp)
|
||||||
|
@ -1653,10 +1654,22 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
|
||||||
{
|
{
|
||||||
dmu_sync_arg_t *dsa;
|
dmu_sync_arg_t *dsa;
|
||||||
dmu_tx_t *tx;
|
dmu_tx_t *tx;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
error = dbuf_read((dmu_buf_impl_t *)zgd->zgd_db, NULL,
|
||||||
|
DB_RF_CANFAIL | DB_RF_NOPREFETCH);
|
||||||
|
if (error != 0)
|
||||||
|
return (error);
|
||||||
|
|
||||||
tx = dmu_tx_create(os);
|
tx = dmu_tx_create(os);
|
||||||
dmu_tx_hold_space(tx, zgd->zgd_db->db_size);
|
dmu_tx_hold_space(tx, zgd->zgd_db->db_size);
|
||||||
if (dmu_tx_assign(tx, TXG_WAIT) != 0) {
|
/*
|
||||||
|
* This transaction does not produce any dirty data or log blocks, so
|
||||||
|
* it should not be throttled. All other cases wait for TXG sync, by
|
||||||
|
* which time the log block we are writing will be obsolete, so we can
|
||||||
|
* skip waiting and just return error here instead.
|
||||||
|
*/
|
||||||
|
if (dmu_tx_assign(tx, TXG_NOWAIT | TXG_NOTHROTTLE) != 0) {
|
||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
/* Make zl_get_data do txg_waited_synced() */
|
/* Make zl_get_data do txg_waited_synced() */
|
||||||
return (SET_ERROR(EIO));
|
return (SET_ERROR(EIO));
|
||||||
|
|
|
@ -1292,7 +1292,7 @@ metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this metaslab group is below its qmax or it's
|
* If this metaslab group is below its qmax or it's
|
||||||
* the only allocatable metasable group, then attempt
|
* the only allocatable metaslab group, then attempt
|
||||||
* to allocate from it.
|
* to allocate from it.
|
||||||
*/
|
*/
|
||||||
if (qdepth < qmax || mc->mc_alloc_groups == 1)
|
if (qdepth < qmax || mc->mc_alloc_groups == 1)
|
||||||
|
@ -5101,7 +5101,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||||
zio_alloc_list_t *zal, int allocator)
|
zio_alloc_list_t *zal, int allocator)
|
||||||
{
|
{
|
||||||
metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
|
metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
|
||||||
metaslab_group_t *mg, *fast_mg, *rotor;
|
metaslab_group_t *mg, *rotor;
|
||||||
vdev_t *vd;
|
vdev_t *vd;
|
||||||
boolean_t try_hard = B_FALSE;
|
boolean_t try_hard = B_FALSE;
|
||||||
|
|
||||||
|
@ -5164,15 +5164,6 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||||
} else if (d != 0) {
|
} else if (d != 0) {
|
||||||
vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
|
vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
|
||||||
mg = vd->vdev_mg->mg_next;
|
mg = vd->vdev_mg->mg_next;
|
||||||
} else if (flags & METASLAB_FASTWRITE) {
|
|
||||||
mg = fast_mg = mca->mca_rotor;
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (fast_mg->mg_vd->vdev_pending_fastwrite <
|
|
||||||
mg->mg_vd->vdev_pending_fastwrite)
|
|
||||||
mg = fast_mg;
|
|
||||||
} while ((fast_mg = fast_mg->mg_next) != mca->mca_rotor);
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
ASSERT(mca->mca_rotor != NULL);
|
ASSERT(mca->mca_rotor != NULL);
|
||||||
mg = mca->mca_rotor;
|
mg = mca->mca_rotor;
|
||||||
|
@ -5297,7 +5288,7 @@ top:
|
||||||
mg->mg_bias = 0;
|
mg->mg_bias = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((flags & METASLAB_FASTWRITE) ||
|
if ((flags & METASLAB_ZIL) ||
|
||||||
atomic_add_64_nv(&mca->mca_aliquot, asize) >=
|
atomic_add_64_nv(&mca->mca_aliquot, asize) >=
|
||||||
mg->mg_aliquot + mg->mg_bias) {
|
mg->mg_aliquot + mg->mg_bias) {
|
||||||
mca->mca_rotor = mg->mg_next;
|
mca->mca_rotor = mg->mg_next;
|
||||||
|
@ -5310,11 +5301,6 @@ top:
|
||||||
((flags & METASLAB_GANG_HEADER) ? 1 : 0));
|
((flags & METASLAB_GANG_HEADER) ? 1 : 0));
|
||||||
DVA_SET_ASIZE(&dva[d], asize);
|
DVA_SET_ASIZE(&dva[d], asize);
|
||||||
|
|
||||||
if (flags & METASLAB_FASTWRITE) {
|
|
||||||
atomic_add_64(&vd->vdev_pending_fastwrite,
|
|
||||||
psize);
|
|
||||||
}
|
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
next:
|
next:
|
||||||
|
@ -5950,55 +5936,6 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp)
|
|
||||||
{
|
|
||||||
const dva_t *dva = bp->blk_dva;
|
|
||||||
int ndvas = BP_GET_NDVAS(bp);
|
|
||||||
uint64_t psize = BP_GET_PSIZE(bp);
|
|
||||||
int d;
|
|
||||||
vdev_t *vd;
|
|
||||||
|
|
||||||
ASSERT(!BP_IS_HOLE(bp));
|
|
||||||
ASSERT(!BP_IS_EMBEDDED(bp));
|
|
||||||
ASSERT(psize > 0);
|
|
||||||
|
|
||||||
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
|
|
||||||
|
|
||||||
for (d = 0; d < ndvas; d++) {
|
|
||||||
if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
|
|
||||||
continue;
|
|
||||||
atomic_add_64(&vd->vdev_pending_fastwrite, psize);
|
|
||||||
}
|
|
||||||
|
|
||||||
spa_config_exit(spa, SCL_VDEV, FTAG);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp)
|
|
||||||
{
|
|
||||||
const dva_t *dva = bp->blk_dva;
|
|
||||||
int ndvas = BP_GET_NDVAS(bp);
|
|
||||||
uint64_t psize = BP_GET_PSIZE(bp);
|
|
||||||
int d;
|
|
||||||
vdev_t *vd;
|
|
||||||
|
|
||||||
ASSERT(!BP_IS_HOLE(bp));
|
|
||||||
ASSERT(!BP_IS_EMBEDDED(bp));
|
|
||||||
ASSERT(psize > 0);
|
|
||||||
|
|
||||||
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
|
|
||||||
|
|
||||||
for (d = 0; d < ndvas; d++) {
|
|
||||||
if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
|
|
||||||
continue;
|
|
||||||
ASSERT3U(vd->vdev_pending_fastwrite, >=, psize);
|
|
||||||
atomic_sub_64(&vd->vdev_pending_fastwrite, psize);
|
|
||||||
}
|
|
||||||
|
|
||||||
spa_config_exit(spa, SCL_VDEV, FTAG);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
metaslab_check_free_impl_cb(uint64_t inner, vdev_t *vd, uint64_t offset,
|
metaslab_check_free_impl_cb(uint64_t inner, vdev_t *vd, uint64_t offset,
|
||||||
uint64_t size, void *arg)
|
uint64_t size, void *arg)
|
||||||
|
|
|
@ -1192,7 +1192,6 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
|
||||||
|
|
||||||
ASSERT(tvd == tvd->vdev_top);
|
ASSERT(tvd == tvd->vdev_top);
|
||||||
|
|
||||||
tvd->vdev_pending_fastwrite = svd->vdev_pending_fastwrite;
|
|
||||||
tvd->vdev_ms_array = svd->vdev_ms_array;
|
tvd->vdev_ms_array = svd->vdev_ms_array;
|
||||||
tvd->vdev_ms_shift = svd->vdev_ms_shift;
|
tvd->vdev_ms_shift = svd->vdev_ms_shift;
|
||||||
tvd->vdev_ms_count = svd->vdev_ms_count;
|
tvd->vdev_ms_count = svd->vdev_ms_count;
|
||||||
|
@ -1655,7 +1654,6 @@ vdev_metaslab_fini(vdev_t *vd)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ASSERT0(vd->vdev_ms_count);
|
ASSERT0(vd->vdev_ms_count);
|
||||||
ASSERT3U(vd->vdev_pending_fastwrite, ==, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct vdev_probe_stats {
|
typedef struct vdev_probe_stats {
|
||||||
|
|
|
@ -839,7 +839,6 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
|
||||||
uint64_t zp_gen;
|
uint64_t zp_gen;
|
||||||
|
|
||||||
ASSERT3P(lwb, !=, NULL);
|
ASSERT3P(lwb, !=, NULL);
|
||||||
ASSERT3P(zio, !=, NULL);
|
|
||||||
ASSERT3U(size, !=, 0);
|
ASSERT3U(size, !=, 0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -889,6 +888,7 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
|
||||||
}
|
}
|
||||||
ASSERT(error == 0 || error == ENOENT);
|
ASSERT(error == 0 || error == ENOENT);
|
||||||
} else { /* indirect write */
|
} else { /* indirect write */
|
||||||
|
ASSERT3P(zio, !=, NULL);
|
||||||
/*
|
/*
|
||||||
* Have to lock the whole block to ensure when it's
|
* Have to lock the whole block to ensure when it's
|
||||||
* written out and its checksum is being calculated
|
* written out and its checksum is being calculated
|
||||||
|
@ -917,8 +917,8 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (error == 0)
|
if (error == 0)
|
||||||
error = dmu_buf_hold(os, object, offset, zgd, &db,
|
error = dmu_buf_hold_noread(os, object, offset, zgd,
|
||||||
DMU_READ_NO_PREFETCH);
|
&db);
|
||||||
|
|
||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
blkptr_t *bp = &lr->lr_blkptr;
|
blkptr_t *bp = &lr->lr_blkptr;
|
||||||
|
@ -1028,6 +1028,10 @@ zfs_exit_two(zfsvfs_t *zfsvfs1, zfsvfs_t *zfsvfs2, const char *tag)
|
||||||
*
|
*
|
||||||
* On success, the function return the number of bytes copied in *lenp.
|
* On success, the function return the number of bytes copied in *lenp.
|
||||||
* Note, it doesn't return how much bytes are left to be copied.
|
* Note, it doesn't return how much bytes are left to be copied.
|
||||||
|
* On errors which are caused by any file system limitations or
|
||||||
|
* brt limitations `EINVAL` is returned. In the most cases a user
|
||||||
|
* requested bad parameters, it could be possible to clone the file but
|
||||||
|
* some parameters don't match the requirements.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||||
|
@ -1171,7 +1175,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||||
* We cannot clone into files with different block size.
|
* We cannot clone into files with different block size.
|
||||||
*/
|
*/
|
||||||
if (inblksz != outzp->z_blksz && outzp->z_size > inblksz) {
|
if (inblksz != outzp->z_blksz && outzp->z_size > inblksz) {
|
||||||
error = SET_ERROR(EXDEV);
|
error = SET_ERROR(EINVAL);
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1179,7 +1183,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||||
* Offsets and len must be at block boundries.
|
* Offsets and len must be at block boundries.
|
||||||
*/
|
*/
|
||||||
if ((inoff % inblksz) != 0 || (outoff % inblksz) != 0) {
|
if ((inoff % inblksz) != 0 || (outoff % inblksz) != 0) {
|
||||||
error = SET_ERROR(EXDEV);
|
error = SET_ERROR(EINVAL);
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
|
@ -1187,7 +1191,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||||
*/
|
*/
|
||||||
if ((len % inblksz) != 0 &&
|
if ((len % inblksz) != 0 &&
|
||||||
(len < inzp->z_size - inoff || len < outzp->z_size - outoff)) {
|
(len < inzp->z_size - inoff || len < outzp->z_size - outoff)) {
|
||||||
error = SET_ERROR(EXDEV);
|
error = SET_ERROR(EINVAL);
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1242,13 +1246,11 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||||
&nbps);
|
&nbps);
|
||||||
if (error != 0) {
|
if (error != 0) {
|
||||||
/*
|
/*
|
||||||
* If we are tyring to clone a block that was created
|
* If we are trying to clone a block that was created
|
||||||
* in the current transaction group. Return an error,
|
* in the current transaction group, error will be
|
||||||
* so the caller can fallback to just copying the data.
|
* EAGAIN here, which we can just return to the caller
|
||||||
|
* so it can fallback if it likes.
|
||||||
*/
|
*/
|
||||||
if (error == EAGAIN) {
|
|
||||||
error = SET_ERROR(EXDEV);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
|
|
693
module/zfs/zil.c
693
module/zfs/zil.c
File diff suppressed because it is too large
Load Diff
|
@ -3024,11 +3024,6 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
|
||||||
*/
|
*/
|
||||||
pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
|
pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
|
||||||
|
|
||||||
/*
|
|
||||||
* We didn't allocate this bp, so make sure it doesn't get unmarked.
|
|
||||||
*/
|
|
||||||
pio->io_flags &= ~ZIO_FLAG_FASTWRITE;
|
|
||||||
|
|
||||||
zio_nowait(zio);
|
zio_nowait(zio);
|
||||||
|
|
||||||
return (pio);
|
return (pio);
|
||||||
|
@ -3616,7 +3611,6 @@ zio_dva_allocate(zio_t *zio)
|
||||||
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
|
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
|
||||||
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
|
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
|
||||||
|
|
||||||
flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0;
|
|
||||||
if (zio->io_flags & ZIO_FLAG_NODATA)
|
if (zio->io_flags & ZIO_FLAG_NODATA)
|
||||||
flags |= METASLAB_DONT_THROTTLE;
|
flags |= METASLAB_DONT_THROTTLE;
|
||||||
if (zio->io_flags & ZIO_FLAG_GANG_CHILD)
|
if (zio->io_flags & ZIO_FLAG_GANG_CHILD)
|
||||||
|
@ -3776,7 +3770,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
|
||||||
* of, so we just hash the objset ID to pick the allocator to get
|
* of, so we just hash the objset ID to pick the allocator to get
|
||||||
* some parallelism.
|
* some parallelism.
|
||||||
*/
|
*/
|
||||||
int flags = METASLAB_FASTWRITE | METASLAB_ZIL;
|
int flags = METASLAB_ZIL;
|
||||||
int allocator = (uint_t)cityhash4(0, 0, 0,
|
int allocator = (uint_t)cityhash4(0, 0, 0,
|
||||||
os->os_dsl_dataset->ds_object) % spa->spa_alloc_count;
|
os->os_dsl_dataset->ds_object) % spa->spa_alloc_count;
|
||||||
error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
|
error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
|
||||||
|
@ -4472,8 +4466,8 @@ zio_ready(zio_t *zio)
|
||||||
zio_t *pio, *pio_next;
|
zio_t *pio, *pio_next;
|
||||||
zio_link_t *zl = NULL;
|
zio_link_t *zl = NULL;
|
||||||
|
|
||||||
if (zio_wait_for_children(zio, ZIO_CHILD_GANG_BIT | ZIO_CHILD_DDT_BIT,
|
if (zio_wait_for_children(zio, ZIO_CHILD_LOGICAL_BIT |
|
||||||
ZIO_WAIT_READY)) {
|
ZIO_CHILD_GANG_BIT | ZIO_CHILD_DDT_BIT, ZIO_WAIT_READY)) {
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4931,12 +4925,6 @@ zio_done(zio_t *zio)
|
||||||
zfs_ereport_free_checksum(zcr);
|
zfs_ereport_free_checksum(zcr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp &&
|
|
||||||
!BP_IS_HOLE(zio->io_bp) && !BP_IS_EMBEDDED(zio->io_bp) &&
|
|
||||||
!(zio->io_flags & ZIO_FLAG_NOPWRITE)) {
|
|
||||||
metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It is the responsibility of the done callback to ensure that this
|
* It is the responsibility of the done callback to ensure that this
|
||||||
* particular zio is no longer discoverable for adoption, and as
|
* particular zio is no longer discoverable for adoption, and as
|
||||||
|
|
|
@ -698,7 +698,6 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
ASSERT3P(lwb, !=, NULL);
|
ASSERT3P(lwb, !=, NULL);
|
||||||
ASSERT3P(zio, !=, NULL);
|
|
||||||
ASSERT3U(size, !=, 0);
|
ASSERT3U(size, !=, 0);
|
||||||
|
|
||||||
zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
|
zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
|
||||||
|
@ -717,6 +716,7 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
||||||
error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf,
|
error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf,
|
||||||
DMU_READ_NO_PREFETCH);
|
DMU_READ_NO_PREFETCH);
|
||||||
} else { /* indirect write */
|
} else { /* indirect write */
|
||||||
|
ASSERT3P(zio, !=, NULL);
|
||||||
/*
|
/*
|
||||||
* Have to lock the whole block to ensure when it's written out
|
* Have to lock the whole block to ensure when it's written out
|
||||||
* and its checksum is being calculated that no one can change
|
* and its checksum is being calculated that no one can change
|
||||||
|
@ -727,8 +727,8 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
||||||
offset = P2ALIGN_TYPED(offset, size, uint64_t);
|
offset = P2ALIGN_TYPED(offset, size, uint64_t);
|
||||||
zgd->zgd_lr = zfs_rangelock_enter(&zv->zv_rangelock, offset,
|
zgd->zgd_lr = zfs_rangelock_enter(&zv->zv_rangelock, offset,
|
||||||
size, RL_READER);
|
size, RL_READER);
|
||||||
error = dmu_buf_hold_by_dnode(zv->zv_dn, offset, zgd, &db,
|
error = dmu_buf_hold_noread_by_dnode(zv->zv_dn, offset, zgd,
|
||||||
DMU_READ_NO_PREFETCH);
|
&db);
|
||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
blkptr_t *bp = &lr->lr_blkptr;
|
blkptr_t *bp = &lr->lr_blkptr;
|
||||||
|
|
||||||
|
|
|
@ -36,11 +36,13 @@ tags = ['functional', 'atime']
|
||||||
|
|
||||||
[tests/functional/block_cloning:Linux]
|
[tests/functional/block_cloning:Linux]
|
||||||
tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial',
|
tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial',
|
||||||
|
'block_cloning_copyfilerange_fallback',
|
||||||
'block_cloning_ficlone', 'block_cloning_ficlonerange',
|
'block_cloning_ficlone', 'block_cloning_ficlonerange',
|
||||||
'block_cloning_ficlonerange_partial',
|
'block_cloning_ficlonerange_partial',
|
||||||
'block_cloning_disabled_copyfilerange', 'block_cloning_disabled_ficlone',
|
'block_cloning_disabled_copyfilerange', 'block_cloning_disabled_ficlone',
|
||||||
'block_cloning_disabled_ficlonerange',
|
'block_cloning_disabled_ficlonerange',
|
||||||
'block_cloning_copyfilerange_cross_dataset']
|
'block_cloning_copyfilerange_cross_dataset',
|
||||||
|
'block_cloning_copyfilerange_fallback_same_txg']
|
||||||
tags = ['functional', 'block_cloning']
|
tags = ['functional', 'block_cloning']
|
||||||
|
|
||||||
[tests/functional/chattr:Linux]
|
[tests/functional/chattr:Linux]
|
||||||
|
|
|
@ -300,8 +300,12 @@ elif sys.platform.startswith('linux'):
|
||||||
['SKIP', cfr_reason],
|
['SKIP', cfr_reason],
|
||||||
'block_cloning/block_cloning_copyfilerange_partial':
|
'block_cloning/block_cloning_copyfilerange_partial':
|
||||||
['SKIP', cfr_reason],
|
['SKIP', cfr_reason],
|
||||||
|
'block_cloning/block_cloning_copyfilerange_fallback':
|
||||||
|
['SKIP', cfr_reason],
|
||||||
'block_cloning/block_cloning_copyfilerange_cross_dataset':
|
'block_cloning/block_cloning_copyfilerange_cross_dataset':
|
||||||
['SKIP', cfr_cross_reason],
|
['SKIP', cfr_cross_reason],
|
||||||
|
'block_cloning/block_cloning_copyfilerange_fallback_same_txg':
|
||||||
|
['SKIP', cfr_cross_reason],
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -212,7 +212,7 @@ main(int argc, char **argv)
|
||||||
|
|
||||||
int dfd = open(argv[optind+1], O_WRONLY|O_CREAT,
|
int dfd = open(argv[optind+1], O_WRONLY|O_CREAT,
|
||||||
S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
|
S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
|
||||||
if (sfd < 0) {
|
if (dfd < 0) {
|
||||||
fprintf(stderr, "open: %s: %s\n",
|
fprintf(stderr, "open: %s: %s\n",
|
||||||
argv[optind+1], strerror(errno));
|
argv[optind+1], strerror(errno));
|
||||||
close(sfd);
|
close(sfd);
|
||||||
|
|
|
@ -44,6 +44,7 @@
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
|
#include <sys/types.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
|
@ -441,6 +441,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||||
functional/block_cloning/cleanup.ksh \
|
functional/block_cloning/cleanup.ksh \
|
||||||
functional/block_cloning/setup.ksh \
|
functional/block_cloning/setup.ksh \
|
||||||
functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh \
|
functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh \
|
||||||
|
functional/block_cloning/block_cloning_copyfilerange_fallback.ksh \
|
||||||
|
functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh \
|
||||||
functional/block_cloning/block_cloning_copyfilerange.ksh \
|
functional/block_cloning/block_cloning_copyfilerange.ksh \
|
||||||
functional/block_cloning/block_cloning_copyfilerange_partial.ksh \
|
functional/block_cloning/block_cloning_copyfilerange_partial.ksh \
|
||||||
functional/block_cloning/block_cloning_disabled_copyfilerange.ksh \
|
functional/block_cloning/block_cloning_disabled_copyfilerange.ksh \
|
||||||
|
|
|
@ -0,0 +1,86 @@
|
||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright (c) 2023, Klara Inc.
|
||||||
|
# Copyright (c) 2023, Rob Norris <robn@despairlabs.com>
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
|
||||||
|
|
||||||
|
verify_runnable "global"
|
||||||
|
|
||||||
|
if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then
|
||||||
|
log_unsupported "copy_file_range not available before Linux 4.5"
|
||||||
|
fi
|
||||||
|
|
||||||
|
claim="copy_file_range will fall back to copy when cloning not possible."
|
||||||
|
|
||||||
|
log_assert $claim
|
||||||
|
|
||||||
|
function cleanup
|
||||||
|
{
|
||||||
|
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
|
||||||
|
}
|
||||||
|
|
||||||
|
log_onexit cleanup
|
||||||
|
|
||||||
|
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
|
||||||
|
|
||||||
|
log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4
|
||||||
|
log_must sync_pool $TESTPOOL
|
||||||
|
|
||||||
|
|
||||||
|
log_note "Copying entire file with copy_file_range"
|
||||||
|
|
||||||
|
log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288
|
||||||
|
log_must sync_pool $TESTPOOL
|
||||||
|
|
||||||
|
log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone
|
||||||
|
|
||||||
|
typeset blocks=$(unique_blocks $TESTPOOL file $TESTPOOL clone)
|
||||||
|
log_must [ "$blocks" = "1 2 3 4" ]
|
||||||
|
|
||||||
|
|
||||||
|
log_note "Copying within a block with copy_file_range"
|
||||||
|
|
||||||
|
log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 32768 32768 65536
|
||||||
|
log_must sync_pool $TESTPOOL
|
||||||
|
|
||||||
|
log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone
|
||||||
|
|
||||||
|
typeset blocks=$(unique_blocks $TESTPOOL file $TESTPOOL clone)
|
||||||
|
log_must [ "$blocks" = "2 3 4" ]
|
||||||
|
|
||||||
|
|
||||||
|
log_note "Copying across a block with copy_file_range"
|
||||||
|
|
||||||
|
log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 327680 327680 131072
|
||||||
|
log_must sync_pool $TESTPOOL
|
||||||
|
|
||||||
|
log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone
|
||||||
|
|
||||||
|
typeset blocks=$(unique_blocks $TESTPOOL file $TESTPOOL clone)
|
||||||
|
log_must [ "$blocks" = "2" ]
|
||||||
|
|
||||||
|
log_pass $claim
|
|
@ -0,0 +1,66 @@
|
||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright (c) 2023, Klara Inc.
|
||||||
|
# Copyright (c) 2023, Rob Norris <robn@despairlabs.com>
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
|
||||||
|
|
||||||
|
verify_runnable "global"
|
||||||
|
|
||||||
|
if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then
|
||||||
|
log_unsupported "copy_file_range not available before Linux 4.5"
|
||||||
|
fi
|
||||||
|
|
||||||
|
claim="copy_file_range will fall back to copy when cloning on same txg"
|
||||||
|
|
||||||
|
log_assert $claim
|
||||||
|
|
||||||
|
typeset timeout=$(get_tunable TXG_TIMEOUT)
|
||||||
|
|
||||||
|
function cleanup
|
||||||
|
{
|
||||||
|
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
|
||||||
|
set_tunable64 TXG_TIMEOUT $timeout
|
||||||
|
}
|
||||||
|
|
||||||
|
log_onexit cleanup
|
||||||
|
|
||||||
|
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
|
||||||
|
|
||||||
|
log_must set_tunable64 TXG_TIMEOUT 5000
|
||||||
|
|
||||||
|
log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4
|
||||||
|
log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288
|
||||||
|
|
||||||
|
log_must sync_pool $TESTPOOL
|
||||||
|
|
||||||
|
log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone
|
||||||
|
|
||||||
|
typeset blocks=$(unique_blocks $TESTPOOL file $TESTPOOL clone)
|
||||||
|
log_must [ "$blocks" = "" ]
|
||||||
|
|
||||||
|
log_pass $claim
|
||||||
|
|
Loading…
Reference in New Issue