Compare commits
24 Commits
master
...
zfs-2.2.0-
Author | SHA1 | Date |
---|---|---|
Brian Behlendorf | 70232483b4 | |
Rob N | c5273e0c31 | |
Rob N | 685ae4429f | |
Alexander Motin | 81be809a25 | |
Alexander Motin | 8a6fde8213 | |
Alan Somers | b6f618f8ff | |
Alan Somers | 51a2b59767 | |
Tony Hutter | 8c81c0b05d | |
Chunwei Chen | b221f43943 | |
Ameer Hamza | e037327bfe | |
Yuri Pankov | 1a2e486d25 | |
Ameer Hamza | d8011707cc | |
Wojciech Małota-Wójcik | f5f5a2db95 | |
Alexander Motin | 83b0967c1f | |
Coleman Kane | 73ba5df31a | |
Coleman Kane | 1bc244ae93 | |
Coleman Kane | 931dc70550 | |
Yuri Pankov | 5299f4f289 | |
Alan Somers | f917cf1c03 | |
Alexander Motin | 56ed389a57 | |
Alexander Motin | e613e4bbe3 | |
Alexander Motin | b4e630b00c | |
Mateusz Guzik | bf6cd30796 | |
Alexander Motin | 1266cebf87 |
2
META
2
META
|
@ -2,7 +2,7 @@ Meta: 1
|
|||
Name: zfs
|
||||
Branch: 1.0
|
||||
Version: 2.2.0
|
||||
Release: rc1
|
||||
Release: rc2
|
||||
Release-Tags: relext
|
||||
License: CDDL
|
||||
Author: OpenZFS
|
||||
|
|
|
@ -416,6 +416,11 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
|
|||
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0)
|
||||
return;
|
||||
|
||||
if (vdev_guid == 0) {
|
||||
fmd_hdl_debug(hdl, "Got a zero GUID");
|
||||
return;
|
||||
}
|
||||
|
||||
if (spare) {
|
||||
int nspares = find_and_remove_spares(zhdl, vdev_guid);
|
||||
fmd_hdl_debug(hdl, "%d spares removed", nspares);
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# Not following: a was not specified as input (see shellcheck -x). [SC1091]
|
||||
# Prefer putting braces around variable references even when not strictly required. [SC2250]
|
||||
# Consider invoking this command separately to avoid masking its return value (or use '|| true' to ignore). [SC2312]
|
||||
# Command appears to be unreachable. Check usage (or ignore if invoked indirectly). [SC2317]
|
||||
# In POSIX sh, 'local' is undefined. [SC2039] # older ShellCheck versions
|
||||
# In POSIX sh, 'local' is undefined. [SC3043] # newer ShellCheck versions
|
||||
|
||||
|
@ -18,7 +19,7 @@ PHONY += shellcheck
|
|||
_STGT = $(subst ^,/,$(subst shellcheck-here-,,$@))
|
||||
shellcheck-here-%:
|
||||
if HAVE_SHELLCHECK
|
||||
shellcheck --format=gcc --enable=all --exclude=SC1090,SC1091,SC2039,SC2250,SC2312,SC3043 $$([ -n "$(SHELLCHECK_SHELL)" ] && echo "--shell=$(SHELLCHECK_SHELL)") "$$([ -e "$(_STGT)" ] || echo "$(srcdir)/")$(_STGT)"
|
||||
shellcheck --format=gcc --enable=all --exclude=SC1090,SC1091,SC2039,SC2250,SC2312,SC2317,SC3043 $$([ -n "$(SHELLCHECK_SHELL)" ] && echo "--shell=$(SHELLCHECK_SHELL)") "$$([ -e "$(_STGT)" ] || echo "$(srcdir)/")$(_STGT)"
|
||||
else
|
||||
@echo "skipping shellcheck of" $(_STGT) "because shellcheck is not installed"
|
||||
endif
|
||||
|
|
|
@ -103,6 +103,33 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE], [
|
|||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 6.5.x API change
|
||||
dnl # disk_check_media_change() was added
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE], [
|
||||
ZFS_LINUX_TEST_SRC([disk_check_media_change], [
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
], [
|
||||
struct block_device *bdev = NULL;
|
||||
bool error;
|
||||
|
||||
error = disk_check_media_change(bdev->bd_disk);
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE], [
|
||||
AC_MSG_CHECKING([whether disk_check_media_change() exists])
|
||||
ZFS_LINUX_TEST_RESULT([disk_check_media_change], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_DISK_CHECK_MEDIA_CHANGE, 1,
|
||||
[disk_check_media_change() exists])
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # bdev_kobj() is introduced from 5.12
|
||||
dnl #
|
||||
|
@ -443,6 +470,29 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS], [
|
|||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 6.5.x API change
|
||||
dnl # BLK_STS_NEXUS replaced with BLK_STS_RESV_CONFLICT
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT], [
|
||||
ZFS_LINUX_TEST_SRC([blk_sts_resv_conflict], [
|
||||
#include <linux/blkdev.h>
|
||||
],[
|
||||
blk_status_t s __attribute__ ((unused)) = BLK_STS_RESV_CONFLICT;
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT], [
|
||||
AC_MSG_CHECKING([whether BLK_STS_RESV_CONFLICT is defined])
|
||||
ZFS_LINUX_TEST_RESULT([blk_sts_resv_conflict], [
|
||||
AC_DEFINE(HAVE_BLK_STS_RESV_CONFLICT, 1, [BLK_STS_RESV_CONFLICT is defined])
|
||||
AC_MSG_RESULT(yes)
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_PUT
|
||||
|
@ -458,6 +508,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
|
|||
ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
|
||||
|
@ -476,4 +528,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
|
|||
ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE
|
||||
ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ
|
||||
ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV
|
||||
ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE
|
||||
ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT
|
||||
])
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
Description=Rollback bootfs just before it is mounted
|
||||
Requisite=zfs-import.target
|
||||
After=zfs-import.target dracut-pre-mount.service zfs-snapshot-bootfs.service
|
||||
Before=dracut-mount.service
|
||||
Before=dracut-mount.service sysroot.mount
|
||||
DefaultDependencies=no
|
||||
ConditionKernelCommandLine=bootfs.rollback
|
||||
ConditionEnvironment=BOOTFS
|
||||
|
|
|
@ -36,7 +36,11 @@ struct xucred;
|
|||
typedef struct flock flock64_t;
|
||||
typedef struct vnode vnode_t;
|
||||
typedef struct vattr vattr_t;
|
||||
#if __FreeBSD_version < 1400093
|
||||
typedef enum vtype vtype_t;
|
||||
#else
|
||||
#define vtype_t __enum_uint8(vtype)
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/queue.h>
|
||||
|
|
|
@ -181,7 +181,11 @@ bi_status_to_errno(blk_status_t status)
|
|||
return (ENOLINK);
|
||||
case BLK_STS_TARGET:
|
||||
return (EREMOTEIO);
|
||||
#ifdef HAVE_BLK_STS_RESV_CONFLICT
|
||||
case BLK_STS_RESV_CONFLICT:
|
||||
#else
|
||||
case BLK_STS_NEXUS:
|
||||
#endif
|
||||
return (EBADE);
|
||||
case BLK_STS_MEDIUM:
|
||||
return (ENODATA);
|
||||
|
@ -215,7 +219,11 @@ errno_to_bi_status(int error)
|
|||
case EREMOTEIO:
|
||||
return (BLK_STS_TARGET);
|
||||
case EBADE:
|
||||
#ifdef HAVE_BLK_STS_RESV_CONFLICT
|
||||
return (BLK_STS_RESV_CONFLICT);
|
||||
#else
|
||||
return (BLK_STS_NEXUS);
|
||||
#endif
|
||||
case ENODATA:
|
||||
return (BLK_STS_MEDIUM);
|
||||
case EILSEQ:
|
||||
|
@ -337,6 +345,8 @@ zfs_check_media_change(struct block_device *bdev)
|
|||
return (0);
|
||||
}
|
||||
#define vdev_bdev_reread_part(bdev) zfs_check_media_change(bdev)
|
||||
#elif defined(HAVE_DISK_CHECK_MEDIA_CHANGE)
|
||||
#define vdev_bdev_reread_part(bdev) disk_check_media_change(bdev->bd_disk)
|
||||
#else
|
||||
/*
|
||||
* This is encountered if check_disk_change() and bdev_check_media_change()
|
||||
|
|
|
@ -38,7 +38,7 @@ typedef unsigned long ulong_t;
|
|||
typedef unsigned long long u_longlong_t;
|
||||
typedef long long longlong_t;
|
||||
|
||||
typedef unsigned long intptr_t;
|
||||
typedef long intptr_t;
|
||||
typedef unsigned long long rlim64_t;
|
||||
|
||||
typedef struct task_struct kthread_t;
|
||||
|
|
|
@ -60,7 +60,7 @@ typedef struct bpobj {
|
|||
kmutex_t bpo_lock;
|
||||
objset_t *bpo_os;
|
||||
uint64_t bpo_object;
|
||||
int bpo_epb;
|
||||
uint32_t bpo_epb;
|
||||
uint8_t bpo_havecomp;
|
||||
uint8_t bpo_havesubobj;
|
||||
uint8_t bpo_havefreed;
|
||||
|
|
|
@ -36,8 +36,6 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern uint64_t zfetch_array_rd_sz;
|
||||
|
||||
struct dnode; /* so we can reference dnode */
|
||||
|
||||
typedef struct zfetch {
|
||||
|
|
|
@ -102,8 +102,6 @@ extern "C" {
|
|||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP "zio_timestamp"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA "zio_delta"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED "cksum_expected"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL "cksum_actual"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO "cksum_algorithm"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP "cksum_byteswap"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES "bad_ranges"
|
||||
|
@ -112,8 +110,6 @@ extern "C" {
|
|||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS "bad_range_clears"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS "bad_set_bits"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS "bad_cleared_bits"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM "bad_set_histogram"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM "bad_cleared_histogram"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_SNAPSHOT_NAME "snapshot_name"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_DEVICE_NAME "device_name"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_RAW_DEVICE_NAME "raw_name"
|
||||
|
|
|
@ -723,16 +723,10 @@ typedef enum spa_mode {
|
|||
* Send TRIM commands in-line during normal pool operation while deleting.
|
||||
* OFF: no
|
||||
* ON: yes
|
||||
* NB: IN_FREEBSD_BASE is defined within the FreeBSD sources.
|
||||
*/
|
||||
typedef enum {
|
||||
SPA_AUTOTRIM_OFF = 0, /* default */
|
||||
SPA_AUTOTRIM_ON,
|
||||
#ifdef IN_FREEBSD_BASE
|
||||
SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_ON,
|
||||
#else
|
||||
SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_OFF,
|
||||
#endif
|
||||
} spa_autotrim_t;
|
||||
|
||||
/*
|
||||
|
|
|
@ -250,6 +250,7 @@ struct spa {
|
|||
uint64_t spa_min_ashift; /* of vdevs in normal class */
|
||||
uint64_t spa_max_ashift; /* of vdevs in normal class */
|
||||
uint64_t spa_min_alloc; /* of vdevs in normal class */
|
||||
uint64_t spa_gcd_alloc; /* of vdevs in normal class */
|
||||
uint64_t spa_config_guid; /* config pool guid */
|
||||
uint64_t spa_load_guid; /* spa_load initialized guid */
|
||||
uint64_t spa_last_synced_guid; /* last synced guid */
|
||||
|
|
|
@ -420,6 +420,7 @@ struct vdev {
|
|||
boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */
|
||||
boolean_t vdev_resilver_deferred; /* resilver deferred */
|
||||
boolean_t vdev_kobj_flag; /* kobj event record */
|
||||
boolean_t vdev_attaching; /* vdev attach ashift handling */
|
||||
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
|
||||
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */
|
||||
zio_t *vdev_probe_zio; /* root of current probe */
|
||||
|
|
|
@ -94,8 +94,6 @@ typedef const struct zio_checksum_info {
|
|||
} zio_checksum_info_t;
|
||||
|
||||
typedef struct zio_bad_cksum {
|
||||
zio_cksum_t zbc_expected;
|
||||
zio_cksum_t zbc_actual;
|
||||
const char *zbc_checksum_name;
|
||||
uint8_t zbc_byteswapped;
|
||||
uint8_t zbc_injected;
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
.\" own identifying information:
|
||||
.\" Portions Copyright [yyyy] [name of copyright owner]
|
||||
.\"
|
||||
.Dd January 10, 2023
|
||||
.Dd July 21, 2023
|
||||
.Dt ZFS 4
|
||||
.Os
|
||||
.
|
||||
|
@ -239,6 +239,11 @@ relative to the pool.
|
|||
Make some blocks above a certain size be gang blocks.
|
||||
This option is used by the test suite to facilitate testing.
|
||||
.
|
||||
.It Sy metaslab_force_ganging_pct Ns = Ns Sy 3 Ns % Pq uint
|
||||
For blocks that could be forced to be a gang block (due to
|
||||
.Sy metaslab_force_ganging ) ,
|
||||
force this many of them to be gang blocks.
|
||||
.
|
||||
.It Sy zfs_ddt_zap_default_bs Ns = Ns Sy 15 Po 32 KiB Pc Pq int
|
||||
Default DDT ZAP data block size as a power of 2. Note that changing this after
|
||||
creating a DDT on the pool will not affect existing DDTs, only newly created
|
||||
|
@ -519,9 +524,6 @@ However, this is limited by
|
|||
Maximum micro ZAP size.
|
||||
A micro ZAP is upgraded to a fat ZAP, once it grows beyond the specified size.
|
||||
.
|
||||
.It Sy zfetch_array_rd_sz Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq u64
|
||||
If prefetching is enabled, disable prefetching for reads larger than this size.
|
||||
.
|
||||
.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint
|
||||
Min bytes to prefetch per stream.
|
||||
Prefetch distance starts from the demand access size and quickly grows to
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
.\" Copyright 2017 Nexenta Systems, Inc.
|
||||
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||
.\"
|
||||
.Dd May 27, 2021
|
||||
.Dd July 11, 2023
|
||||
.Dt ZPOOL-EVENTS 8
|
||||
.Os
|
||||
.
|
||||
|
@ -305,10 +305,6 @@ The time when a given I/O request was submitted.
|
|||
The time required to service a given I/O request.
|
||||
.It Sy prev_state
|
||||
The previous state of the vdev.
|
||||
.It Sy cksum_expected
|
||||
The expected checksum value for the block.
|
||||
.It Sy cksum_actual
|
||||
The actual checksum value for an errant block.
|
||||
.It Sy cksum_algorithm
|
||||
Checksum algorithm used.
|
||||
See
|
||||
|
@ -362,23 +358,6 @@ Like
|
|||
but contains
|
||||
.Pq Ar good data No & ~( Ns Ar bad data ) ;
|
||||
that is, the bits set in the good data which are cleared in the bad data.
|
||||
.It Sy bad_set_histogram
|
||||
If this field exists, it is an array of counters.
|
||||
Each entry counts bits set in a particular bit of a big-endian uint64 type.
|
||||
The first entry counts bits
|
||||
set in the high-order bit of the first byte, the 9th byte, etc, and the last
|
||||
entry counts bits set of the low-order bit of the 8th byte, the 16th byte, etc.
|
||||
This information is useful for observing a stuck bit in a parallel data path,
|
||||
such as IDE or parallel SCSI.
|
||||
.It Sy bad_cleared_histogram
|
||||
If this field exists, it is an array of counters.
|
||||
Each entry counts bit clears in a particular bit of a big-endian uint64 type.
|
||||
The first entry counts bits
|
||||
clears of the high-order bit of the first byte, the 9th byte, etc, and the
|
||||
last entry counts clears of the low-order bit of the 8th byte, the 16th byte,
|
||||
etc.
|
||||
This information is useful for observing a stuck bit in a parallel data
|
||||
path, such as IDE or parallel SCSI.
|
||||
.El
|
||||
.
|
||||
.Sh I/O STAGES
|
||||
|
|
|
@ -6263,7 +6263,8 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
|
|||
goto bad_write_fallback;
|
||||
}
|
||||
} else {
|
||||
#if __FreeBSD_version >= 1400086
|
||||
#if (__FreeBSD_version >= 1302506 && __FreeBSD_version < 1400000) || \
|
||||
__FreeBSD_version >= 1400086
|
||||
vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false,
|
||||
LK_EXCLUSIVE);
|
||||
#else
|
||||
|
|
|
@ -1662,6 +1662,7 @@ zfs_umount(struct super_block *sb)
|
|||
}
|
||||
|
||||
zfsvfs_free(zfsvfs);
|
||||
sb->s_fs_info = NULL;
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
|
|
@ -277,8 +277,6 @@ zpl_test_super(struct super_block *s, void *data)
|
|||
{
|
||||
zfsvfs_t *zfsvfs = s->s_fs_info;
|
||||
objset_t *os = data;
|
||||
int match;
|
||||
|
||||
/*
|
||||
* If the os doesn't match the z_os in the super_block, assume it is
|
||||
* not a match. Matching would imply a multimount of a dataset. It is
|
||||
|
@ -286,19 +284,7 @@ zpl_test_super(struct super_block *s, void *data)
|
|||
* that changes the z_os, e.g., rollback, where the match will be
|
||||
* missed, but in that case the user will get an EBUSY.
|
||||
*/
|
||||
if (zfsvfs == NULL || os != zfsvfs->z_os)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* If they do match, recheck with the lock held to prevent mounting the
|
||||
* wrong dataset since z_os can be stale when the teardown lock is held.
|
||||
*/
|
||||
if (zpl_enter(zfsvfs, FTAG) != 0)
|
||||
return (0);
|
||||
match = (os == zfsvfs->z_os);
|
||||
zpl_exit(zfsvfs, FTAG);
|
||||
|
||||
return (match);
|
||||
return (zfsvfs != NULL && os == zfsvfs->z_os);
|
||||
}
|
||||
|
||||
static struct super_block *
|
||||
|
@ -324,12 +310,35 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
|
|||
|
||||
s = sget(fs_type, zpl_test_super, set_anon_super, flags, os);
|
||||
|
||||
/*
|
||||
* Recheck with the lock held to prevent mounting the wrong dataset
|
||||
* since z_os can be stale when the teardown lock is held.
|
||||
*
|
||||
* We can't do this in zpl_test_super in since it's under spinlock and
|
||||
* also s_umount lock is not held there so it would race with
|
||||
* zfs_umount and zfsvfs can be freed.
|
||||
*/
|
||||
if (!IS_ERR(s) && s->s_fs_info != NULL) {
|
||||
zfsvfs_t *zfsvfs = s->s_fs_info;
|
||||
if (zpl_enter(zfsvfs, FTAG) == 0) {
|
||||
if (os != zfsvfs->z_os)
|
||||
err = -SET_ERROR(EBUSY);
|
||||
zpl_exit(zfsvfs, FTAG);
|
||||
} else {
|
||||
err = -SET_ERROR(EBUSY);
|
||||
}
|
||||
}
|
||||
dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
|
||||
dsl_dataset_rele(dmu_objset_ds(os), FTAG);
|
||||
|
||||
if (IS_ERR(s))
|
||||
return (ERR_CAST(s));
|
||||
|
||||
if (err) {
|
||||
deactivate_locked_super(s);
|
||||
return (ERR_PTR(err));
|
||||
}
|
||||
|
||||
if (s->s_root == NULL) {
|
||||
err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
|
||||
if (err) {
|
||||
|
|
|
@ -160,7 +160,7 @@ zpool_prop_init(void)
|
|||
"wait | continue | panic", "FAILMODE", failuremode_table,
|
||||
sfeatures);
|
||||
zprop_register_index(ZPOOL_PROP_AUTOTRIM, "autotrim",
|
||||
SPA_AUTOTRIM_DEFAULT, PROP_DEFAULT, ZFS_TYPE_POOL,
|
||||
SPA_AUTOTRIM_OFF, PROP_DEFAULT, ZFS_TYPE_POOL,
|
||||
"on | off", "AUTOTRIM", boolean_table, sfeatures);
|
||||
|
||||
/* hidden properties */
|
||||
|
|
|
@ -284,7 +284,17 @@ bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg,
|
|||
dmu_buf_t *dbuf = NULL;
|
||||
bpobj_t *bpo = bpi->bpi_bpo;
|
||||
|
||||
for (int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= start; i--) {
|
||||
int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1;
|
||||
uint64_t pe = P2ALIGN_TYPED(i, bpo->bpo_epb, uint64_t) *
|
||||
sizeof (blkptr_t);
|
||||
uint64_t ps = start * sizeof (blkptr_t);
|
||||
uint64_t pb = MAX((pe > dmu_prefetch_max) ? pe - dmu_prefetch_max : 0,
|
||||
ps);
|
||||
if (pe > pb) {
|
||||
dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0, pb, pe - pb,
|
||||
ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
for (; i >= start; i--) {
|
||||
uint64_t offset = i * sizeof (blkptr_t);
|
||||
uint64_t blkoff = P2PHASE(i, bpo->bpo_epb);
|
||||
|
||||
|
@ -292,9 +302,16 @@ bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg,
|
|||
if (dbuf)
|
||||
dmu_buf_rele(dbuf, FTAG);
|
||||
err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
|
||||
offset, FTAG, &dbuf, 0);
|
||||
offset, FTAG, &dbuf, DMU_READ_NO_PREFETCH);
|
||||
if (err)
|
||||
break;
|
||||
pe = pb;
|
||||
pb = MAX((dbuf->db_offset > dmu_prefetch_max) ?
|
||||
dbuf->db_offset - dmu_prefetch_max : 0, ps);
|
||||
if (pe > pb) {
|
||||
dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0,
|
||||
pb, pe - pb, ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT3U(offset, >=, dbuf->db_offset);
|
||||
|
@ -466,22 +483,30 @@ bpobj_iterate_impl(bpobj_t *initial_bpo, bpobj_itor_t func, void *arg,
|
|||
int64_t i = bpi->bpi_unprocessed_subobjs - 1;
|
||||
uint64_t offset = i * sizeof (uint64_t);
|
||||
|
||||
uint64_t obj_from_sublist;
|
||||
uint64_t subobj;
|
||||
err = dmu_read(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
|
||||
offset, sizeof (uint64_t), &obj_from_sublist,
|
||||
DMU_READ_PREFETCH);
|
||||
offset, sizeof (uint64_t), &subobj,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
if (err)
|
||||
break;
|
||||
bpobj_t *sublist = kmem_alloc(sizeof (bpobj_t),
|
||||
|
||||
bpobj_t *subbpo = kmem_alloc(sizeof (bpobj_t),
|
||||
KM_SLEEP);
|
||||
|
||||
err = bpobj_open(sublist, bpo->bpo_os,
|
||||
obj_from_sublist);
|
||||
if (err)
|
||||
err = bpobj_open(subbpo, bpo->bpo_os, subobj);
|
||||
if (err) {
|
||||
kmem_free(subbpo, sizeof (bpobj_t));
|
||||
break;
|
||||
}
|
||||
|
||||
list_insert_head(&stack, bpi_alloc(sublist, bpi, i));
|
||||
mutex_enter(&sublist->bpo_lock);
|
||||
if (subbpo->bpo_havesubobj &&
|
||||
subbpo->bpo_phys->bpo_subobjs != 0) {
|
||||
dmu_prefetch(subbpo->bpo_os,
|
||||
subbpo->bpo_phys->bpo_subobjs, 0, 0, 0,
|
||||
ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
|
||||
list_insert_head(&stack, bpi_alloc(subbpo, bpi, i));
|
||||
mutex_enter(&subbpo->bpo_lock);
|
||||
bpi->bpi_unprocessed_subobjs--;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -89,7 +89,11 @@ static int zfs_dmu_offset_next_sync = 1;
|
|||
* helps to limit the amount of memory that can be used by prefetching.
|
||||
* Larger objects should be prefetched a bit at a time.
|
||||
*/
|
||||
#ifdef _ILP32
|
||||
uint_t dmu_prefetch_max = 8 * 1024 * 1024;
|
||||
#else
|
||||
uint_t dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE;
|
||||
#endif
|
||||
|
||||
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
|
||||
{DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" },
|
||||
|
@ -552,8 +556,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
|||
zio = zio_root(dn->dn_objset->os_spa, NULL, NULL,
|
||||
ZIO_FLAG_CANFAIL);
|
||||
blkid = dbuf_whichblock(dn, 0, offset);
|
||||
if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
|
||||
length <= zfetch_array_rd_sz) {
|
||||
if ((flags & DMU_READ_NO_PREFETCH) == 0) {
|
||||
/*
|
||||
* Prepare the zfetch before initiating the demand reads, so
|
||||
* that if multiple threads block on same indirect block, we
|
||||
|
|
|
@ -1795,17 +1795,19 @@ receive_handle_existing_object(const struct receive_writer_arg *rwa,
|
|||
}
|
||||
|
||||
/*
|
||||
* The dmu does not currently support decreasing nlevels
|
||||
* or changing the number of dnode slots on an object. For
|
||||
* non-raw sends, this does not matter and the new object
|
||||
* can just use the previous one's nlevels. For raw sends,
|
||||
* however, the structure of the received dnode (including
|
||||
* nlevels and dnode slots) must match that of the send
|
||||
* side. Therefore, instead of using dmu_object_reclaim(),
|
||||
* we must free the object completely and call
|
||||
* dmu_object_claim_dnsize() instead.
|
||||
* The dmu does not currently support decreasing nlevels or changing
|
||||
* indirect block size if there is already one, same as changing the
|
||||
* number of of dnode slots on an object. For non-raw sends this
|
||||
* does not matter and the new object can just use the previous one's
|
||||
* parameters. For raw sends, however, the structure of the received
|
||||
* dnode (including indirects and dnode slots) must match that of the
|
||||
* send side. Therefore, instead of using dmu_object_reclaim(), we
|
||||
* must free the object completely and call dmu_object_claim_dnsize()
|
||||
* instead.
|
||||
*/
|
||||
if ((rwa->raw && drro->drr_nlevels < doi->doi_indirection) ||
|
||||
if ((rwa->raw && ((doi->doi_indirection > 1 &&
|
||||
indblksz != doi->doi_metadata_block_size) ||
|
||||
drro->drr_nlevels < doi->doi_indirection)) ||
|
||||
dn_slots != doi->doi_dnodesize >> DNODE_SHIFT) {
|
||||
err = dmu_free_long_object(rwa->os, drro->drr_object);
|
||||
if (err != 0)
|
||||
|
|
|
@ -52,14 +52,19 @@ static unsigned int zfetch_max_streams = 8;
|
|||
static unsigned int zfetch_min_sec_reap = 1;
|
||||
/* max time before stream delete */
|
||||
static unsigned int zfetch_max_sec_reap = 2;
|
||||
#ifdef _ILP32
|
||||
/* min bytes to prefetch per stream (default 2MB) */
|
||||
static unsigned int zfetch_min_distance = 2 * 1024 * 1024;
|
||||
/* max bytes to prefetch per stream (default 8MB) */
|
||||
unsigned int zfetch_max_distance = 8 * 1024 * 1024;
|
||||
#else
|
||||
/* min bytes to prefetch per stream (default 4MB) */
|
||||
static unsigned int zfetch_min_distance = 4 * 1024 * 1024;
|
||||
/* max bytes to prefetch per stream (default 64MB) */
|
||||
unsigned int zfetch_max_distance = 64 * 1024 * 1024;
|
||||
#endif
|
||||
/* max bytes to prefetch indirects for per stream (default 64MB) */
|
||||
unsigned int zfetch_max_idistance = 64 * 1024 * 1024;
|
||||
/* max number of bytes in an array_read in which we allow prefetching (1MB) */
|
||||
uint64_t zfetch_array_rd_sz = 1024 * 1024;
|
||||
|
||||
typedef struct zfetch_stats {
|
||||
kstat_named_t zfetchstat_hits;
|
||||
|
@ -580,6 +585,3 @@ ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW,
|
|||
|
||||
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_idistance, UINT, ZMOD_RW,
|
||||
"Max bytes to prefetch indirects for per stream");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, array_rd_sz, U64, ZMOD_RW,
|
||||
"Number of bytes in a array_read");
|
||||
|
|
|
@ -1882,7 +1882,7 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
|
|||
if (ibs == dn->dn_indblkshift)
|
||||
ibs = 0;
|
||||
|
||||
if (size >> SPA_MINBLOCKSHIFT == dn->dn_datablkszsec && ibs == 0)
|
||||
if (size == dn->dn_datablksz && ibs == 0)
|
||||
return (0);
|
||||
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
||||
|
@ -1905,6 +1905,8 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
|
|||
if (ibs && dn->dn_nlevels != 1)
|
||||
goto fail;
|
||||
|
||||
dnode_setdirty(dn, tx);
|
||||
if (size != dn->dn_datablksz) {
|
||||
/* resize the old block */
|
||||
err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
|
||||
if (err == 0) {
|
||||
|
@ -1914,15 +1916,14 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
|
|||
}
|
||||
|
||||
dnode_setdblksz(dn, size);
|
||||
dnode_setdirty(dn, tx);
|
||||
dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = size;
|
||||
if (db)
|
||||
dbuf_rele(db, FTAG);
|
||||
}
|
||||
if (ibs) {
|
||||
dn->dn_indblkshift = ibs;
|
||||
dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs;
|
||||
}
|
||||
/* release after we have fixed the blocksize in the dnode */
|
||||
if (db)
|
||||
dbuf_rele(db, FTAG);
|
||||
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
return (0);
|
||||
|
|
|
@ -892,9 +892,9 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
|
|||
for (zap_cursor_init(&zc, dl->dl_os, obj);
|
||||
(error = zap_cursor_retrieve(&zc, za)) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
uint64_t mintxg = zfs_strtonum(za->za_name, NULL);
|
||||
dsl_deadlist_insert_bpobj(dl, za->za_first_integer, mintxg, tx);
|
||||
VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx));
|
||||
dsl_deadlist_insert_bpobj(dl, za->za_first_integer,
|
||||
zfs_strtonum(za->za_name, NULL), tx);
|
||||
VERIFY0(zap_remove(dl->dl_os, obj, za->za_name, tx));
|
||||
if (perror == 0) {
|
||||
dsl_deadlist_prefetch_bpobj(dl, pza->za_first_integer,
|
||||
zfs_strtonum(pza->za_name, NULL));
|
||||
|
|
|
@ -2015,6 +2015,11 @@ dsl_scan_prefetch_cb(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
|
|||
zb->zb_objset, DMU_META_DNODE_OBJECT);
|
||||
|
||||
if (OBJSET_BUF_HAS_USERUSED(buf)) {
|
||||
if (OBJSET_BUF_HAS_PROJECTUSED(buf)) {
|
||||
dsl_scan_prefetch_dnode(scn,
|
||||
&osp->os_projectused_dnode, zb->zb_objset,
|
||||
DMU_PROJECTUSED_OBJECT);
|
||||
}
|
||||
dsl_scan_prefetch_dnode(scn,
|
||||
&osp->os_groupused_dnode, zb->zb_objset,
|
||||
DMU_GROUPUSED_OBJECT);
|
||||
|
@ -2075,10 +2080,16 @@ dsl_scan_prefetch_thread(void *arg)
|
|||
zio_flags |= ZIO_FLAG_RAW;
|
||||
}
|
||||
|
||||
/* We don't need data L1 buffer since we do not prefetch L0. */
|
||||
blkptr_t *bp = &spic->spic_bp;
|
||||
if (BP_GET_LEVEL(bp) == 1 && BP_GET_TYPE(bp) != DMU_OT_DNODE &&
|
||||
BP_GET_TYPE(bp) != DMU_OT_OBJSET)
|
||||
flags |= ARC_FLAG_NO_BUF;
|
||||
|
||||
/* issue the prefetch asynchronously */
|
||||
(void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa,
|
||||
&spic->spic_bp, dsl_scan_prefetch_cb, spic->spic_spc,
|
||||
ZIO_PRIORITY_SCRUB, zio_flags, &flags, &spic->spic_zb);
|
||||
(void) arc_read(scn->scn_zio_root, spa, bp,
|
||||
dsl_scan_prefetch_cb, spic->spic_spc, ZIO_PRIORITY_SCRUB,
|
||||
zio_flags, &flags, &spic->spic_zb);
|
||||
|
||||
kmem_free(spic, sizeof (scan_prefetch_issue_ctx_t));
|
||||
}
|
||||
|
|
|
@ -58,6 +58,11 @@ static uint64_t metaslab_aliquot = 1024 * 1024;
|
|||
*/
|
||||
uint64_t metaslab_force_ganging = SPA_MAXBLOCKSIZE + 1;
|
||||
|
||||
/*
|
||||
* Of blocks of size >= metaslab_force_ganging, actually gang them this often.
|
||||
*/
|
||||
uint_t metaslab_force_ganging_pct = 3;
|
||||
|
||||
/*
|
||||
* In pools where the log space map feature is not enabled we touch
|
||||
* multiple metaslabs (and their respective space maps) with each
|
||||
|
@ -5109,7 +5114,9 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
|||
* damage can result in extremely long reconstruction times. This
|
||||
* will also test spilling from special to normal.
|
||||
*/
|
||||
if (psize >= metaslab_force_ganging && (random_in_range(100) < 3)) {
|
||||
if (psize >= metaslab_force_ganging &&
|
||||
metaslab_force_ganging_pct > 0 &&
|
||||
(random_in_range(100) < MIN(metaslab_force_ganging_pct, 100))) {
|
||||
metaslab_trace_add(zal, NULL, NULL, psize, d, TRACE_FORCE_GANG,
|
||||
allocator);
|
||||
return (SET_ERROR(ENOSPC));
|
||||
|
@ -6266,7 +6273,10 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, switch_threshold, INT, ZMOD_RW,
|
|||
"Segment-based metaslab selection maximum buckets before switching");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging, U64, ZMOD_RW,
|
||||
"Blocks larger than this size are forced to be gang blocks");
|
||||
"Blocks larger than this size are sometimes forced to be gang blocks");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging_pct, UINT, ZMOD_RW,
|
||||
"Percentage of large blocks that will be forced to be gang blocks");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW,
|
||||
"Max distance (bytes) to search forward before using size tree");
|
||||
|
|
|
@ -772,6 +772,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
|
|||
spa->spa_min_ashift = INT_MAX;
|
||||
spa->spa_max_ashift = 0;
|
||||
spa->spa_min_alloc = INT_MAX;
|
||||
spa->spa_gcd_alloc = INT_MAX;
|
||||
|
||||
/* Reset cached value */
|
||||
spa->spa_dedup_dspace = ~0ULL;
|
||||
|
|
|
@ -889,9 +889,15 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
|||
&vd->vdev_not_present);
|
||||
|
||||
/*
|
||||
* Get the alignment requirement.
|
||||
* Get the alignment requirement. Ignore pool ashift for vdev
|
||||
* attach case.
|
||||
*/
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift);
|
||||
if (alloctype != VDEV_ALLOC_ATTACH) {
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT,
|
||||
&vd->vdev_ashift);
|
||||
} else {
|
||||
vd->vdev_attaching = B_TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve the vdev creation time.
|
||||
|
@ -1393,6 +1399,36 @@ vdev_remove_parent(vdev_t *cvd)
|
|||
vdev_free(mvd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Choose GCD for spa_gcd_alloc.
|
||||
*/
|
||||
static uint64_t
|
||||
vdev_gcd(uint64_t a, uint64_t b)
|
||||
{
|
||||
while (b != 0) {
|
||||
uint64_t t = b;
|
||||
b = a % b;
|
||||
a = t;
|
||||
}
|
||||
return (a);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set spa_min_alloc and spa_gcd_alloc.
|
||||
*/
|
||||
static void
|
||||
vdev_spa_set_alloc(spa_t *spa, uint64_t min_alloc)
|
||||
{
|
||||
if (min_alloc < spa->spa_min_alloc)
|
||||
spa->spa_min_alloc = min_alloc;
|
||||
if (spa->spa_gcd_alloc == INT_MAX) {
|
||||
spa->spa_gcd_alloc = min_alloc;
|
||||
} else {
|
||||
spa->spa_gcd_alloc = vdev_gcd(min_alloc,
|
||||
spa->spa_gcd_alloc);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vdev_metaslab_group_create(vdev_t *vd)
|
||||
{
|
||||
|
@ -1445,8 +1481,7 @@ vdev_metaslab_group_create(vdev_t *vd)
|
|||
spa->spa_min_ashift = vd->vdev_ashift;
|
||||
|
||||
uint64_t min_alloc = vdev_get_min_alloc(vd);
|
||||
if (min_alloc < spa->spa_min_alloc)
|
||||
spa->spa_min_alloc = min_alloc;
|
||||
vdev_spa_set_alloc(spa, min_alloc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2144,9 +2179,9 @@ vdev_open(vdev_t *vd)
|
|||
return (SET_ERROR(EDOM));
|
||||
}
|
||||
|
||||
if (vd->vdev_top == vd) {
|
||||
if (vd->vdev_top == vd && vd->vdev_attaching == B_FALSE)
|
||||
vdev_ashift_optimize(vd);
|
||||
}
|
||||
vd->vdev_attaching = B_FALSE;
|
||||
}
|
||||
if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN ||
|
||||
vd->vdev_ashift > ASHIFT_MAX)) {
|
||||
|
@ -2207,8 +2242,7 @@ vdev_open(vdev_t *vd)
|
|||
if (vd->vdev_top == vd && vd->vdev_ashift != 0 &&
|
||||
vd->vdev_islog == 0 && vd->vdev_aux == NULL) {
|
||||
uint64_t min_alloc = vdev_get_min_alloc(vd);
|
||||
if (min_alloc < spa->spa_min_alloc)
|
||||
spa->spa_min_alloc = min_alloc;
|
||||
vdev_spa_set_alloc(spa, min_alloc);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -5688,6 +5722,7 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
|
|||
objset_t *mos = spa->spa_meta_objset;
|
||||
nvpair_t *elem = NULL;
|
||||
uint64_t vdev_guid;
|
||||
uint64_t objid;
|
||||
nvlist_t *nvprops;
|
||||
|
||||
vdev_guid = fnvlist_lookup_uint64(nvp, ZPOOL_VDEV_PROPS_SET_VDEV);
|
||||
|
@ -5698,15 +5733,6 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
|
|||
if (vd == NULL)
|
||||
return;
|
||||
|
||||
mutex_enter(&spa->spa_props_lock);
|
||||
|
||||
while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) {
|
||||
uint64_t intval, objid = 0;
|
||||
const char *strval;
|
||||
vdev_prop_t prop;
|
||||
const char *propname = nvpair_name(elem);
|
||||
zprop_type_t proptype;
|
||||
|
||||
/*
|
||||
* Set vdev property values in the vdev props mos object.
|
||||
*/
|
||||
|
@ -5717,12 +5743,18 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
|
|||
} else if (vd->vdev_leaf_zap != 0) {
|
||||
objid = vd->vdev_leaf_zap;
|
||||
} else {
|
||||
/*
|
||||
* XXX: implement vdev_props_set_check()
|
||||
*/
|
||||
panic("vdev not root/top/leaf");
|
||||
panic("unexpected vdev type");
|
||||
}
|
||||
|
||||
mutex_enter(&spa->spa_props_lock);
|
||||
|
||||
while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) {
|
||||
uint64_t intval;
|
||||
const char *strval;
|
||||
vdev_prop_t prop;
|
||||
const char *propname = nvpair_name(elem);
|
||||
zprop_type_t proptype;
|
||||
|
||||
switch (prop = vdev_name_to_prop(propname)) {
|
||||
case VDEV_PROP_USERPROP:
|
||||
if (vdev_prop_user(propname)) {
|
||||
|
@ -5791,6 +5823,12 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
|||
|
||||
ASSERT(vd != NULL);
|
||||
|
||||
/* Check that vdev has a zap we can use */
|
||||
if (vd->vdev_root_zap == 0 &&
|
||||
vd->vdev_top_zap == 0 &&
|
||||
vd->vdev_leaf_zap == 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
|
||||
&vdev_guid) != 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
|
|
@ -1398,7 +1398,7 @@ vdev_indirect_checksum_error(zio_t *zio,
|
|||
vd->vdev_stat.vs_checksum_errors++;
|
||||
mutex_exit(&vd->vdev_stat_lock);
|
||||
|
||||
zio_bad_cksum_t zbc = {{{ 0 }}};
|
||||
zio_bad_cksum_t zbc = { 0 };
|
||||
abd_t *bad_abd = ic->ic_data;
|
||||
abd_t *good_abd = is->is_good_child->ic_data;
|
||||
(void) zfs_ereport_post_checksum(zio->io_spa, vd, NULL, zio,
|
||||
|
|
|
@ -1785,7 +1785,7 @@ vdev_raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
|
|||
static int
|
||||
raidz_checksum_verify(zio_t *zio)
|
||||
{
|
||||
zio_bad_cksum_t zbc = {{{0}}};
|
||||
zio_bad_cksum_t zbc = {0};
|
||||
raidz_map_t *rm = zio->io_vsd;
|
||||
|
||||
int ret = zio_checksum_error(zio, &zbc);
|
||||
|
|
|
@ -754,10 +754,6 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
|
|||
#define MAX_RANGES 16
|
||||
|
||||
typedef struct zfs_ecksum_info {
|
||||
/* histograms of set and cleared bits by bit number in a 64-bit word */
|
||||
uint8_t zei_histogram_set[sizeof (uint64_t) * NBBY];
|
||||
uint8_t zei_histogram_cleared[sizeof (uint64_t) * NBBY];
|
||||
|
||||
/* inline arrays of bits set and cleared. */
|
||||
uint64_t zei_bits_set[ZFM_MAX_INLINE];
|
||||
uint64_t zei_bits_cleared[ZFM_MAX_INLINE];
|
||||
|
@ -781,7 +777,7 @@ typedef struct zfs_ecksum_info {
|
|||
} zfs_ecksum_info_t;
|
||||
|
||||
static void
|
||||
update_histogram(uint64_t value_arg, uint8_t *hist, uint32_t *count)
|
||||
update_bad_bits(uint64_t value_arg, uint32_t *count)
|
||||
{
|
||||
size_t i;
|
||||
size_t bits = 0;
|
||||
|
@ -789,11 +785,9 @@ update_histogram(uint64_t value_arg, uint8_t *hist, uint32_t *count)
|
|||
|
||||
/* We store the bits in big-endian (largest-first) order */
|
||||
for (i = 0; i < 64; i++) {
|
||||
if (value & (1ull << i)) {
|
||||
hist[63 - i]++;
|
||||
if (value & (1ull << i))
|
||||
++bits;
|
||||
}
|
||||
}
|
||||
/* update the count of bits changed */
|
||||
*count += bits;
|
||||
}
|
||||
|
@ -920,14 +914,6 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
|
|||
|
||||
if (info != NULL && info->zbc_has_cksum) {
|
||||
fm_payload_set(ereport,
|
||||
FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED,
|
||||
DATA_TYPE_UINT64_ARRAY,
|
||||
sizeof (info->zbc_expected) / sizeof (uint64_t),
|
||||
(uint64_t *)&info->zbc_expected,
|
||||
FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL,
|
||||
DATA_TYPE_UINT64_ARRAY,
|
||||
sizeof (info->zbc_actual) / sizeof (uint64_t),
|
||||
(uint64_t *)&info->zbc_actual,
|
||||
FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO,
|
||||
DATA_TYPE_STRING,
|
||||
info->zbc_checksum_name,
|
||||
|
@ -1010,10 +996,8 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
|
|||
offset++;
|
||||
}
|
||||
|
||||
update_histogram(set, eip->zei_histogram_set,
|
||||
&eip->zei_range_sets[range]);
|
||||
update_histogram(cleared, eip->zei_histogram_cleared,
|
||||
&eip->zei_range_clears[range]);
|
||||
update_bad_bits(set, &eip->zei_range_sets[range]);
|
||||
update_bad_bits(cleared, &eip->zei_range_clears[range]);
|
||||
}
|
||||
|
||||
/* convert to byte offsets */
|
||||
|
@ -1049,15 +1033,6 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
|
|||
DATA_TYPE_UINT8_ARRAY,
|
||||
inline_size, (uint8_t *)eip->zei_bits_cleared,
|
||||
NULL);
|
||||
} else {
|
||||
fm_payload_set(ereport,
|
||||
FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM,
|
||||
DATA_TYPE_UINT8_ARRAY,
|
||||
NBBY * sizeof (uint64_t), eip->zei_histogram_set,
|
||||
FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM,
|
||||
DATA_TYPE_UINT8_ARRAY,
|
||||
NBBY * sizeof (uint64_t), eip->zei_histogram_cleared,
|
||||
NULL);
|
||||
}
|
||||
return (eip);
|
||||
}
|
||||
|
|
|
@ -1596,6 +1596,19 @@ zio_shrink(zio_t *zio, uint64_t size)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Round provided allocation size up to a value that can be allocated
|
||||
* by at least some vdev(s) in the pool with minimum or no additional
|
||||
* padding and without extra space usage on others
|
||||
*/
|
||||
static uint64_t
|
||||
zio_roundup_alloc_size(spa_t *spa, uint64_t size)
|
||||
{
|
||||
if (size > spa->spa_min_alloc)
|
||||
return (roundup(size, spa->spa_gcd_alloc));
|
||||
return (spa->spa_min_alloc);
|
||||
}
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
* Prepare to read and write logical blocks
|
||||
|
@ -1802,9 +1815,8 @@ zio_write_compress(zio_t *zio)
|
|||
* in that we charge for the padding used to fill out
|
||||
* the last sector.
|
||||
*/
|
||||
ASSERT3U(spa->spa_min_alloc, >=, SPA_MINBLOCKSHIFT);
|
||||
size_t rounded = (size_t)roundup(psize,
|
||||
spa->spa_min_alloc);
|
||||
size_t rounded = (size_t)zio_roundup_alloc_size(spa,
|
||||
psize);
|
||||
if (rounded >= lsize) {
|
||||
compress = ZIO_COMPRESS_OFF;
|
||||
zio_buf_free(cbuf, lsize);
|
||||
|
@ -1847,8 +1859,8 @@ zio_write_compress(zio_t *zio)
|
|||
* take this codepath because it will change the on-disk block
|
||||
* and decryption will fail.
|
||||
*/
|
||||
size_t rounded = MIN((size_t)roundup(psize,
|
||||
spa->spa_min_alloc), lsize);
|
||||
size_t rounded = MIN((size_t)zio_roundup_alloc_size(spa, psize),
|
||||
lsize);
|
||||
|
||||
if (rounded != psize) {
|
||||
abd_t *cdata = abd_alloc_linear(rounded, B_TRUE);
|
||||
|
|
|
@ -515,8 +515,6 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
|
|||
}
|
||||
|
||||
if (info != NULL) {
|
||||
info->zbc_expected = expected_cksum;
|
||||
info->zbc_actual = actual_cksum;
|
||||
info->zbc_checksum_name = ci->ci_name;
|
||||
info->zbc_byteswapped = byteswap;
|
||||
info->zbc_injected = 0;
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
#
|
||||
# STRATEGY:
|
||||
# 1. Create various pools with different ashift values.
|
||||
# 2. Verify 'attach -o ashift=<n>' works only with allowed values.
|
||||
# 2. Verify 'attach' works.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
@ -65,28 +65,16 @@ log_must set_tunable32 VDEV_FILE_PHYSICAL_ASHIFT 16
|
|||
|
||||
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
|
||||
for ashift in ${ashifts[@]}
|
||||
do
|
||||
for cmdval in ${ashifts[@]}
|
||||
do
|
||||
log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1
|
||||
log_must verify_ashift $disk1 $ashift
|
||||
|
||||
# ashift_of(attached_disk) <= ashift_of(existing_vdev)
|
||||
if [[ $cmdval -le $ashift ]]
|
||||
then
|
||||
log_must zpool attach -o ashift=$cmdval $TESTPOOL1 \
|
||||
$disk1 $disk2
|
||||
log_must zpool attach $TESTPOOL1 $disk1 $disk2
|
||||
log_must verify_ashift $disk2 $ashift
|
||||
else
|
||||
log_mustnot zpool attach -o ashift=$cmdval $TESTPOOL1 \
|
||||
$disk1 $disk2
|
||||
fi
|
||||
# clean things for the next run
|
||||
log_must zpool destroy $TESTPOOL1
|
||||
log_must zpool labelclear $disk1
|
||||
log_must zpool labelclear $disk2
|
||||
done
|
||||
done
|
||||
|
||||
typeset badvals=("off" "on" "1" "8" "17" "1b" "ff" "-")
|
||||
for badval in ${badvals[@]}
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
#
|
||||
# STRATEGY:
|
||||
# 1. Create various pools with different ashift values.
|
||||
# 2. Verify 'replace -o ashift=<n>' works only with allowed values.
|
||||
# 2. Verify 'replace' works.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
@ -65,28 +65,18 @@ log_must set_tunable32 VDEV_FILE_PHYSICAL_ASHIFT 16
|
|||
|
||||
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
|
||||
for ashift in ${ashifts[@]}
|
||||
do
|
||||
for cmdval in ${ashifts[@]}
|
||||
do
|
||||
log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1
|
||||
log_must verify_ashift $disk1 $ashift
|
||||
# ashift_of(replacing_disk) <= ashift_of(existing_vdev)
|
||||
if [[ $cmdval -le $ashift ]]
|
||||
then
|
||||
log_must zpool replace -o ashift=$cmdval $TESTPOOL1 \
|
||||
$disk1 $disk2
|
||||
log_must zpool replace $TESTPOOL1 $disk1 $disk2
|
||||
log_must verify_ashift $disk2 $ashift
|
||||
wait_replacing $TESTPOOL1
|
||||
else
|
||||
log_mustnot zpool replace -o ashift=$cmdval $TESTPOOL1 \
|
||||
$disk1 $disk2
|
||||
fi
|
||||
# clean things for the next run
|
||||
log_must zpool destroy $TESTPOOL1
|
||||
log_must zpool labelclear $disk1
|
||||
log_must zpool labelclear $disk2
|
||||
done
|
||||
done
|
||||
|
||||
typeset badvals=("off" "on" "1" "8" "17" "1b" "ff" "-")
|
||||
for badval in ${badvals[@]}
|
||||
|
|
|
@ -34,10 +34,8 @@
|
|||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a pool with default values.
|
||||
# 2. Verify 'zpool replace' uses the ashift pool property value when
|
||||
# replacing an existing device.
|
||||
# 3. Verify the default ashift value can still be overridden by manually
|
||||
# specifying '-o ashift=<n>' from the command line.
|
||||
# 2. Override the pool ashift property.
|
||||
# 3. Verify 'zpool replace' works.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
@ -72,21 +70,9 @@ do
|
|||
do
|
||||
log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1
|
||||
log_must zpool set ashift=$pprop $TESTPOOL1
|
||||
# ashift_of(replacing_disk) <= ashift_of(existing_vdev)
|
||||
if [[ $pprop -le $ashift ]]
|
||||
then
|
||||
log_must zpool replace $TESTPOOL1 $disk1 $disk2
|
||||
wait_replacing $TESTPOOL1
|
||||
log_must verify_ashift $disk2 $ashift
|
||||
else
|
||||
# cannot replace if pool prop ashift > vdev ashift
|
||||
log_mustnot zpool replace $TESTPOOL1 $disk1 $disk2
|
||||
# verify we can override the pool prop value manually
|
||||
log_must zpool replace -o ashift=$ashift $TESTPOOL1 \
|
||||
$disk1 $disk2
|
||||
wait_replacing $TESTPOOL1
|
||||
log_must verify_ashift $disk2 $ashift
|
||||
fi
|
||||
# clean things for the next run
|
||||
log_must zpool destroy $TESTPOOL1
|
||||
log_must zpool labelclear $disk1
|
||||
|
|
Loading…
Reference in New Issue