Merge pull request #149 from truenas/truenas/zfs-2.2-release-rc2

[2.2] Various backports for the 2.2 release
This commit is contained in:
Ameer Hamza 2023-07-25 18:47:15 +05:00 committed by GitHub
commit 190fb1a5fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 334 additions and 238 deletions

2
META
View File

@ -2,7 +2,7 @@ Meta: 1
Name: zfs Name: zfs
Branch: 1.0 Branch: 1.0
Version: 2.2.0 Version: 2.2.0
Release: rc1 Release: rc2
Release-Tags: relext Release-Tags: relext
License: CDDL License: CDDL
Author: OpenZFS Author: OpenZFS

View File

@ -416,6 +416,11 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0) FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0)
return; return;
if (vdev_guid == 0) {
fmd_hdl_debug(hdl, "Got a zero GUID");
return;
}
if (spare) { if (spare) {
int nspares = find_and_remove_spares(zhdl, vdev_guid); int nspares = find_and_remove_spares(zhdl, vdev_guid);
fmd_hdl_debug(hdl, "%d spares removed", nspares); fmd_hdl_debug(hdl, "%d spares removed", nspares);

View File

@ -4,6 +4,7 @@
# Not following: a was not specified as input (see shellcheck -x). [SC1091] # Not following: a was not specified as input (see shellcheck -x). [SC1091]
# Prefer putting braces around variable references even when not strictly required. [SC2250] # Prefer putting braces around variable references even when not strictly required. [SC2250]
# Consider invoking this command separately to avoid masking its return value (or use '|| true' to ignore). [SC2312] # Consider invoking this command separately to avoid masking its return value (or use '|| true' to ignore). [SC2312]
# Command appears to be unreachable. Check usage (or ignore if invoked indirectly). [SC2317]
# In POSIX sh, 'local' is undefined. [SC2039] # older ShellCheck versions # In POSIX sh, 'local' is undefined. [SC2039] # older ShellCheck versions
# In POSIX sh, 'local' is undefined. [SC3043] # newer ShellCheck versions # In POSIX sh, 'local' is undefined. [SC3043] # newer ShellCheck versions
@ -18,7 +19,7 @@ PHONY += shellcheck
_STGT = $(subst ^,/,$(subst shellcheck-here-,,$@)) _STGT = $(subst ^,/,$(subst shellcheck-here-,,$@))
shellcheck-here-%: shellcheck-here-%:
if HAVE_SHELLCHECK if HAVE_SHELLCHECK
shellcheck --format=gcc --enable=all --exclude=SC1090,SC1091,SC2039,SC2250,SC2312,SC3043 $$([ -n "$(SHELLCHECK_SHELL)" ] && echo "--shell=$(SHELLCHECK_SHELL)") "$$([ -e "$(_STGT)" ] || echo "$(srcdir)/")$(_STGT)" shellcheck --format=gcc --enable=all --exclude=SC1090,SC1091,SC2039,SC2250,SC2312,SC2317,SC3043 $$([ -n "$(SHELLCHECK_SHELL)" ] && echo "--shell=$(SHELLCHECK_SHELL)") "$$([ -e "$(_STGT)" ] || echo "$(srcdir)/")$(_STGT)"
else else
@echo "skipping shellcheck of" $(_STGT) "because shellcheck is not installed" @echo "skipping shellcheck of" $(_STGT) "because shellcheck is not installed"
endif endif

View File

@ -103,6 +103,33 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE], [
]) ])
]) ])
dnl #
dnl # 6.5.x API change
dnl # disk_check_media_change() was added
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE], [
ZFS_LINUX_TEST_SRC([disk_check_media_change], [
#include <linux/fs.h>
#include <linux/blkdev.h>
], [
struct block_device *bdev = NULL;
bool error;
error = disk_check_media_change(bdev->bd_disk);
])
])
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE], [
AC_MSG_CHECKING([whether disk_check_media_change() exists])
ZFS_LINUX_TEST_RESULT([disk_check_media_change], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_DISK_CHECK_MEDIA_CHANGE, 1,
[disk_check_media_change() exists])
], [
AC_MSG_RESULT(no)
])
])
dnl # dnl #
dnl # bdev_kobj() is introduced from 5.12 dnl # bdev_kobj() is introduced from 5.12
dnl # dnl #
@ -443,6 +470,29 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS], [
]) ])
]) ])
dnl #
dnl # 6.5.x API change
dnl # BLK_STS_NEXUS replaced with BLK_STS_RESV_CONFLICT
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT], [
ZFS_LINUX_TEST_SRC([blk_sts_resv_conflict], [
#include <linux/blkdev.h>
],[
blk_status_t s __attribute__ ((unused)) = BLK_STS_RESV_CONFLICT;
])
])
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT], [
AC_MSG_CHECKING([whether BLK_STS_RESV_CONFLICT is defined])
ZFS_LINUX_TEST_RESULT([blk_sts_resv_conflict], [
AC_DEFINE(HAVE_BLK_STS_RESV_CONFLICT, 1, [BLK_STS_RESV_CONFLICT is defined])
AC_MSG_RESULT(yes)
], [
AC_MSG_RESULT(no)
])
])
])
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH
ZFS_AC_KERNEL_SRC_BLKDEV_PUT ZFS_AC_KERNEL_SRC_BLKDEV_PUT
@ -458,6 +508,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ
ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV
ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE
ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT
]) ])
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
@ -476,4 +528,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE
ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ
ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV
ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE
ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT
]) ])

View File

@ -2,7 +2,7 @@
Description=Rollback bootfs just before it is mounted Description=Rollback bootfs just before it is mounted
Requisite=zfs-import.target Requisite=zfs-import.target
After=zfs-import.target dracut-pre-mount.service zfs-snapshot-bootfs.service After=zfs-import.target dracut-pre-mount.service zfs-snapshot-bootfs.service
Before=dracut-mount.service Before=dracut-mount.service sysroot.mount
DefaultDependencies=no DefaultDependencies=no
ConditionKernelCommandLine=bootfs.rollback ConditionKernelCommandLine=bootfs.rollback
ConditionEnvironment=BOOTFS ConditionEnvironment=BOOTFS

View File

@ -36,7 +36,11 @@ struct xucred;
typedef struct flock flock64_t; typedef struct flock flock64_t;
typedef struct vnode vnode_t; typedef struct vnode vnode_t;
typedef struct vattr vattr_t; typedef struct vattr vattr_t;
#if __FreeBSD_version < 1400093
typedef enum vtype vtype_t; typedef enum vtype vtype_t;
#else
#define vtype_t __enum_uint8(vtype)
#endif
#include <sys/types.h> #include <sys/types.h>
#include <sys/queue.h> #include <sys/queue.h>

View File

@ -181,7 +181,11 @@ bi_status_to_errno(blk_status_t status)
return (ENOLINK); return (ENOLINK);
case BLK_STS_TARGET: case BLK_STS_TARGET:
return (EREMOTEIO); return (EREMOTEIO);
#ifdef HAVE_BLK_STS_RESV_CONFLICT
case BLK_STS_RESV_CONFLICT:
#else
case BLK_STS_NEXUS: case BLK_STS_NEXUS:
#endif
return (EBADE); return (EBADE);
case BLK_STS_MEDIUM: case BLK_STS_MEDIUM:
return (ENODATA); return (ENODATA);
@ -215,7 +219,11 @@ errno_to_bi_status(int error)
case EREMOTEIO: case EREMOTEIO:
return (BLK_STS_TARGET); return (BLK_STS_TARGET);
case EBADE: case EBADE:
#ifdef HAVE_BLK_STS_RESV_CONFLICT
return (BLK_STS_RESV_CONFLICT);
#else
return (BLK_STS_NEXUS); return (BLK_STS_NEXUS);
#endif
case ENODATA: case ENODATA:
return (BLK_STS_MEDIUM); return (BLK_STS_MEDIUM);
case EILSEQ: case EILSEQ:
@ -337,6 +345,8 @@ zfs_check_media_change(struct block_device *bdev)
return (0); return (0);
} }
#define vdev_bdev_reread_part(bdev) zfs_check_media_change(bdev) #define vdev_bdev_reread_part(bdev) zfs_check_media_change(bdev)
#elif defined(HAVE_DISK_CHECK_MEDIA_CHANGE)
#define vdev_bdev_reread_part(bdev) disk_check_media_change(bdev->bd_disk)
#else #else
/* /*
* This is encountered if check_disk_change() and bdev_check_media_change() * This is encountered if check_disk_change() and bdev_check_media_change()

View File

@ -38,7 +38,7 @@ typedef unsigned long ulong_t;
typedef unsigned long long u_longlong_t; typedef unsigned long long u_longlong_t;
typedef long long longlong_t; typedef long long longlong_t;
typedef unsigned long intptr_t; typedef long intptr_t;
typedef unsigned long long rlim64_t; typedef unsigned long long rlim64_t;
typedef struct task_struct kthread_t; typedef struct task_struct kthread_t;

View File

@ -60,7 +60,7 @@ typedef struct bpobj {
kmutex_t bpo_lock; kmutex_t bpo_lock;
objset_t *bpo_os; objset_t *bpo_os;
uint64_t bpo_object; uint64_t bpo_object;
int bpo_epb; uint32_t bpo_epb;
uint8_t bpo_havecomp; uint8_t bpo_havecomp;
uint8_t bpo_havesubobj; uint8_t bpo_havesubobj;
uint8_t bpo_havefreed; uint8_t bpo_havefreed;

View File

@ -36,8 +36,6 @@
extern "C" { extern "C" {
#endif #endif
extern uint64_t zfetch_array_rd_sz;
struct dnode; /* so we can reference dnode */ struct dnode; /* so we can reference dnode */
typedef struct zfetch { typedef struct zfetch {

View File

@ -102,8 +102,6 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP "zio_timestamp" #define FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP "zio_timestamp"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA "zio_delta" #define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA "zio_delta"
#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state" #define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED "cksum_expected"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL "cksum_actual"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO "cksum_algorithm" #define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO "cksum_algorithm"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP "cksum_byteswap" #define FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP "cksum_byteswap"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES "bad_ranges" #define FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES "bad_ranges"
@ -112,8 +110,6 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS "bad_range_clears" #define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS "bad_range_clears"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS "bad_set_bits" #define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS "bad_set_bits"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS "bad_cleared_bits" #define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS "bad_cleared_bits"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM "bad_set_histogram"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM "bad_cleared_histogram"
#define FM_EREPORT_PAYLOAD_ZFS_SNAPSHOT_NAME "snapshot_name" #define FM_EREPORT_PAYLOAD_ZFS_SNAPSHOT_NAME "snapshot_name"
#define FM_EREPORT_PAYLOAD_ZFS_DEVICE_NAME "device_name" #define FM_EREPORT_PAYLOAD_ZFS_DEVICE_NAME "device_name"
#define FM_EREPORT_PAYLOAD_ZFS_RAW_DEVICE_NAME "raw_name" #define FM_EREPORT_PAYLOAD_ZFS_RAW_DEVICE_NAME "raw_name"

View File

@ -723,16 +723,10 @@ typedef enum spa_mode {
* Send TRIM commands in-line during normal pool operation while deleting. * Send TRIM commands in-line during normal pool operation while deleting.
* OFF: no * OFF: no
* ON: yes * ON: yes
* NB: IN_FREEBSD_BASE is defined within the FreeBSD sources.
*/ */
typedef enum { typedef enum {
SPA_AUTOTRIM_OFF = 0, /* default */ SPA_AUTOTRIM_OFF = 0, /* default */
SPA_AUTOTRIM_ON, SPA_AUTOTRIM_ON,
#ifdef IN_FREEBSD_BASE
SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_ON,
#else
SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_OFF,
#endif
} spa_autotrim_t; } spa_autotrim_t;
/* /*

View File

@ -250,6 +250,7 @@ struct spa {
uint64_t spa_min_ashift; /* of vdevs in normal class */ uint64_t spa_min_ashift; /* of vdevs in normal class */
uint64_t spa_max_ashift; /* of vdevs in normal class */ uint64_t spa_max_ashift; /* of vdevs in normal class */
uint64_t spa_min_alloc; /* of vdevs in normal class */ uint64_t spa_min_alloc; /* of vdevs in normal class */
uint64_t spa_gcd_alloc; /* of vdevs in normal class */
uint64_t spa_config_guid; /* config pool guid */ uint64_t spa_config_guid; /* config pool guid */
uint64_t spa_load_guid; /* spa_load initialized guid */ uint64_t spa_load_guid; /* spa_load initialized guid */
uint64_t spa_last_synced_guid; /* last synced guid */ uint64_t spa_last_synced_guid; /* last synced guid */

View File

@ -420,6 +420,7 @@ struct vdev {
boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */ boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */
boolean_t vdev_resilver_deferred; /* resilver deferred */ boolean_t vdev_resilver_deferred; /* resilver deferred */
boolean_t vdev_kobj_flag; /* kobj event record */ boolean_t vdev_kobj_flag; /* kobj event record */
boolean_t vdev_attaching; /* vdev attach ashift handling */
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */ vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */ spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */
zio_t *vdev_probe_zio; /* root of current probe */ zio_t *vdev_probe_zio; /* root of current probe */

View File

@ -94,8 +94,6 @@ typedef const struct zio_checksum_info {
} zio_checksum_info_t; } zio_checksum_info_t;
typedef struct zio_bad_cksum { typedef struct zio_bad_cksum {
zio_cksum_t zbc_expected;
zio_cksum_t zbc_actual;
const char *zbc_checksum_name; const char *zbc_checksum_name;
uint8_t zbc_byteswapped; uint8_t zbc_byteswapped;
uint8_t zbc_injected; uint8_t zbc_injected;

View File

@ -15,7 +15,7 @@
.\" own identifying information: .\" own identifying information:
.\" Portions Copyright [yyyy] [name of copyright owner] .\" Portions Copyright [yyyy] [name of copyright owner]
.\" .\"
.Dd January 10, 2023 .Dd July 21, 2023
.Dt ZFS 4 .Dt ZFS 4
.Os .Os
. .
@ -239,6 +239,11 @@ relative to the pool.
Make some blocks above a certain size be gang blocks. Make some blocks above a certain size be gang blocks.
This option is used by the test suite to facilitate testing. This option is used by the test suite to facilitate testing.
. .
.It Sy metaslab_force_ganging_pct Ns = Ns Sy 3 Ns % Pq uint
For blocks that could be forced to be a gang block (due to
.Sy metaslab_force_ganging ) ,
force this many of them to be gang blocks.
.
.It Sy zfs_ddt_zap_default_bs Ns = Ns Sy 15 Po 32 KiB Pc Pq int .It Sy zfs_ddt_zap_default_bs Ns = Ns Sy 15 Po 32 KiB Pc Pq int
Default DDT ZAP data block size as a power of 2. Note that changing this after Default DDT ZAP data block size as a power of 2. Note that changing this after
creating a DDT on the pool will not affect existing DDTs, only newly created creating a DDT on the pool will not affect existing DDTs, only newly created
@ -519,9 +524,6 @@ However, this is limited by
Maximum micro ZAP size. Maximum micro ZAP size.
A micro ZAP is upgraded to a fat ZAP, once it grows beyond the specified size. A micro ZAP is upgraded to a fat ZAP, once it grows beyond the specified size.
. .
.It Sy zfetch_array_rd_sz Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq u64
If prefetching is enabled, disable prefetching for reads larger than this size.
.
.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint .It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint
Min bytes to prefetch per stream. Min bytes to prefetch per stream.
Prefetch distance starts from the demand access size and quickly grows to Prefetch distance starts from the demand access size and quickly grows to

View File

@ -26,7 +26,7 @@
.\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" .\"
.Dd May 27, 2021 .Dd July 11, 2023
.Dt ZPOOL-EVENTS 8 .Dt ZPOOL-EVENTS 8
.Os .Os
. .
@ -305,10 +305,6 @@ The time when a given I/O request was submitted.
The time required to service a given I/O request. The time required to service a given I/O request.
.It Sy prev_state .It Sy prev_state
The previous state of the vdev. The previous state of the vdev.
.It Sy cksum_expected
The expected checksum value for the block.
.It Sy cksum_actual
The actual checksum value for an errant block.
.It Sy cksum_algorithm .It Sy cksum_algorithm
Checksum algorithm used. Checksum algorithm used.
See See
@ -362,23 +358,6 @@ Like
but contains but contains
.Pq Ar good data No & ~( Ns Ar bad data ) ; .Pq Ar good data No & ~( Ns Ar bad data ) ;
that is, the bits set in the good data which are cleared in the bad data. that is, the bits set in the good data which are cleared in the bad data.
.It Sy bad_set_histogram
If this field exists, it is an array of counters.
Each entry counts bits set in a particular bit of a big-endian uint64 type.
The first entry counts bits
set in the high-order bit of the first byte, the 9th byte, etc, and the last
entry counts bits set of the low-order bit of the 8th byte, the 16th byte, etc.
This information is useful for observing a stuck bit in a parallel data path,
such as IDE or parallel SCSI.
.It Sy bad_cleared_histogram
If this field exists, it is an array of counters.
Each entry counts bit clears in a particular bit of a big-endian uint64 type.
The first entry counts bits
clears of the high-order bit of the first byte, the 9th byte, etc, and the
last entry counts clears of the low-order bit of the 8th byte, the 16th byte,
etc.
This information is useful for observing a stuck bit in a parallel data
path, such as IDE or parallel SCSI.
.El .El
. .
.Sh I/O STAGES .Sh I/O STAGES

View File

@ -6263,7 +6263,8 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
goto bad_write_fallback; goto bad_write_fallback;
} }
} else { } else {
#if __FreeBSD_version >= 1400086 #if (__FreeBSD_version >= 1302506 && __FreeBSD_version < 1400000) || \
__FreeBSD_version >= 1400086
vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false, vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false,
LK_EXCLUSIVE); LK_EXCLUSIVE);
#else #else

View File

@ -1680,6 +1680,7 @@ zfs_umount(struct super_block *sb)
} }
zfsvfs_free(zfsvfs); zfsvfs_free(zfsvfs);
sb->s_fs_info = NULL;
return (0); return (0);
} }

View File

@ -280,8 +280,6 @@ zpl_test_super(struct super_block *s, void *data)
{ {
zfsvfs_t *zfsvfs = s->s_fs_info; zfsvfs_t *zfsvfs = s->s_fs_info;
objset_t *os = data; objset_t *os = data;
int match;
/* /*
* If the os doesn't match the z_os in the super_block, assume it is * If the os doesn't match the z_os in the super_block, assume it is
* not a match. Matching would imply a multimount of a dataset. It is * not a match. Matching would imply a multimount of a dataset. It is
@ -289,19 +287,7 @@ zpl_test_super(struct super_block *s, void *data)
* that changes the z_os, e.g., rollback, where the match will be * that changes the z_os, e.g., rollback, where the match will be
* missed, but in that case the user will get an EBUSY. * missed, but in that case the user will get an EBUSY.
*/ */
if (zfsvfs == NULL || os != zfsvfs->z_os) return (zfsvfs != NULL && os == zfsvfs->z_os);
return (0);
/*
* If they do match, recheck with the lock held to prevent mounting the
* wrong dataset since z_os can be stale when the teardown lock is held.
*/
if (zpl_enter(zfsvfs, FTAG) != 0)
return (0);
match = (os == zfsvfs->z_os);
zpl_exit(zfsvfs, FTAG);
return (match);
} }
static struct super_block * static struct super_block *
@ -327,12 +313,35 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
s = sget(fs_type, zpl_test_super, set_anon_super, flags, os); s = sget(fs_type, zpl_test_super, set_anon_super, flags, os);
/*
* Recheck with the lock held to prevent mounting the wrong dataset
* since z_os can be stale when the teardown lock is held.
*
* We can't do this in zpl_test_super in since it's under spinlock and
* also s_umount lock is not held there so it would race with
* zfs_umount and zfsvfs can be freed.
*/
if (!IS_ERR(s) && s->s_fs_info != NULL) {
zfsvfs_t *zfsvfs = s->s_fs_info;
if (zpl_enter(zfsvfs, FTAG) == 0) {
if (os != zfsvfs->z_os)
err = -SET_ERROR(EBUSY);
zpl_exit(zfsvfs, FTAG);
} else {
err = -SET_ERROR(EBUSY);
}
}
dsl_dataset_long_rele(dmu_objset_ds(os), FTAG); dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
dsl_dataset_rele(dmu_objset_ds(os), FTAG); dsl_dataset_rele(dmu_objset_ds(os), FTAG);
if (IS_ERR(s)) if (IS_ERR(s))
return (ERR_CAST(s)); return (ERR_CAST(s));
if (err) {
deactivate_locked_super(s);
return (ERR_PTR(err));
}
if (s->s_root == NULL) { if (s->s_root == NULL) {
err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0); err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
if (err) { if (err) {

View File

@ -160,7 +160,7 @@ zpool_prop_init(void)
"wait | continue | panic", "FAILMODE", failuremode_table, "wait | continue | panic", "FAILMODE", failuremode_table,
sfeatures); sfeatures);
zprop_register_index(ZPOOL_PROP_AUTOTRIM, "autotrim", zprop_register_index(ZPOOL_PROP_AUTOTRIM, "autotrim",
SPA_AUTOTRIM_DEFAULT, PROP_DEFAULT, ZFS_TYPE_POOL, SPA_AUTOTRIM_OFF, PROP_DEFAULT, ZFS_TYPE_POOL,
"on | off", "AUTOTRIM", boolean_table, sfeatures); "on | off", "AUTOTRIM", boolean_table, sfeatures);
/* hidden properties */ /* hidden properties */

View File

@ -284,7 +284,17 @@ bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg,
dmu_buf_t *dbuf = NULL; dmu_buf_t *dbuf = NULL;
bpobj_t *bpo = bpi->bpi_bpo; bpobj_t *bpo = bpi->bpi_bpo;
for (int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= start; i--) { int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1;
uint64_t pe = P2ALIGN_TYPED(i, bpo->bpo_epb, uint64_t) *
sizeof (blkptr_t);
uint64_t ps = start * sizeof (blkptr_t);
uint64_t pb = MAX((pe > dmu_prefetch_max) ? pe - dmu_prefetch_max : 0,
ps);
if (pe > pb) {
dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0, pb, pe - pb,
ZIO_PRIORITY_ASYNC_READ);
}
for (; i >= start; i--) {
uint64_t offset = i * sizeof (blkptr_t); uint64_t offset = i * sizeof (blkptr_t);
uint64_t blkoff = P2PHASE(i, bpo->bpo_epb); uint64_t blkoff = P2PHASE(i, bpo->bpo_epb);
@ -292,9 +302,16 @@ bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg,
if (dbuf) if (dbuf)
dmu_buf_rele(dbuf, FTAG); dmu_buf_rele(dbuf, FTAG);
err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
offset, FTAG, &dbuf, 0); offset, FTAG, &dbuf, DMU_READ_NO_PREFETCH);
if (err) if (err)
break; break;
pe = pb;
pb = MAX((dbuf->db_offset > dmu_prefetch_max) ?
dbuf->db_offset - dmu_prefetch_max : 0, ps);
if (pe > pb) {
dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0,
pb, pe - pb, ZIO_PRIORITY_ASYNC_READ);
}
} }
ASSERT3U(offset, >=, dbuf->db_offset); ASSERT3U(offset, >=, dbuf->db_offset);
@ -466,22 +483,30 @@ bpobj_iterate_impl(bpobj_t *initial_bpo, bpobj_itor_t func, void *arg,
int64_t i = bpi->bpi_unprocessed_subobjs - 1; int64_t i = bpi->bpi_unprocessed_subobjs - 1;
uint64_t offset = i * sizeof (uint64_t); uint64_t offset = i * sizeof (uint64_t);
uint64_t obj_from_sublist; uint64_t subobj;
err = dmu_read(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, err = dmu_read(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
offset, sizeof (uint64_t), &obj_from_sublist, offset, sizeof (uint64_t), &subobj,
DMU_READ_PREFETCH); DMU_READ_NO_PREFETCH);
if (err) if (err)
break; break;
bpobj_t *sublist = kmem_alloc(sizeof (bpobj_t),
bpobj_t *subbpo = kmem_alloc(sizeof (bpobj_t),
KM_SLEEP); KM_SLEEP);
err = bpobj_open(subbpo, bpo->bpo_os, subobj);
err = bpobj_open(sublist, bpo->bpo_os, if (err) {
obj_from_sublist); kmem_free(subbpo, sizeof (bpobj_t));
if (err)
break; break;
}
list_insert_head(&stack, bpi_alloc(sublist, bpi, i)); if (subbpo->bpo_havesubobj &&
mutex_enter(&sublist->bpo_lock); subbpo->bpo_phys->bpo_subobjs != 0) {
dmu_prefetch(subbpo->bpo_os,
subbpo->bpo_phys->bpo_subobjs, 0, 0, 0,
ZIO_PRIORITY_ASYNC_READ);
}
list_insert_head(&stack, bpi_alloc(subbpo, bpi, i));
mutex_enter(&subbpo->bpo_lock);
bpi->bpi_unprocessed_subobjs--; bpi->bpi_unprocessed_subobjs--;
} }
} }

View File

@ -89,7 +89,11 @@ static int zfs_dmu_offset_next_sync = 1;
* helps to limit the amount of memory that can be used by prefetching. * helps to limit the amount of memory that can be used by prefetching.
* Larger objects should be prefetched a bit at a time. * Larger objects should be prefetched a bit at a time.
*/ */
#ifdef _ILP32
uint_t dmu_prefetch_max = 8 * 1024 * 1024;
#else
uint_t dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE; uint_t dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE;
#endif
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
{DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" }, {DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" },
@ -552,8 +556,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, zio = zio_root(dn->dn_objset->os_spa, NULL, NULL,
ZIO_FLAG_CANFAIL); ZIO_FLAG_CANFAIL);
blkid = dbuf_whichblock(dn, 0, offset); blkid = dbuf_whichblock(dn, 0, offset);
if ((flags & DMU_READ_NO_PREFETCH) == 0 && if ((flags & DMU_READ_NO_PREFETCH) == 0) {
length <= zfetch_array_rd_sz) {
/* /*
* Prepare the zfetch before initiating the demand reads, so * Prepare the zfetch before initiating the demand reads, so
* that if multiple threads block on same indirect block, we * that if multiple threads block on same indirect block, we

View File

@ -1795,17 +1795,19 @@ receive_handle_existing_object(const struct receive_writer_arg *rwa,
} }
/* /*
* The dmu does not currently support decreasing nlevels * The dmu does not currently support decreasing nlevels or changing
* or changing the number of dnode slots on an object. For * indirect block size if there is already one, same as changing the
* non-raw sends, this does not matter and the new object * number of of dnode slots on an object. For non-raw sends this
* can just use the previous one's nlevels. For raw sends, * does not matter and the new object can just use the previous one's
* however, the structure of the received dnode (including * parameters. For raw sends, however, the structure of the received
* nlevels and dnode slots) must match that of the send * dnode (including indirects and dnode slots) must match that of the
* side. Therefore, instead of using dmu_object_reclaim(), * send side. Therefore, instead of using dmu_object_reclaim(), we
* we must free the object completely and call * must free the object completely and call dmu_object_claim_dnsize()
* dmu_object_claim_dnsize() instead. * instead.
*/ */
if ((rwa->raw && drro->drr_nlevels < doi->doi_indirection) || if ((rwa->raw && ((doi->doi_indirection > 1 &&
indblksz != doi->doi_metadata_block_size) ||
drro->drr_nlevels < doi->doi_indirection)) ||
dn_slots != doi->doi_dnodesize >> DNODE_SHIFT) { dn_slots != doi->doi_dnodesize >> DNODE_SHIFT) {
err = dmu_free_long_object(rwa->os, drro->drr_object); err = dmu_free_long_object(rwa->os, drro->drr_object);
if (err != 0) if (err != 0)

View File

@ -52,14 +52,19 @@ static unsigned int zfetch_max_streams = 8;
static unsigned int zfetch_min_sec_reap = 1; static unsigned int zfetch_min_sec_reap = 1;
/* max time before stream delete */ /* max time before stream delete */
static unsigned int zfetch_max_sec_reap = 2; static unsigned int zfetch_max_sec_reap = 2;
#ifdef _ILP32
/* min bytes to prefetch per stream (default 2MB) */
static unsigned int zfetch_min_distance = 2 * 1024 * 1024;
/* max bytes to prefetch per stream (default 8MB) */
unsigned int zfetch_max_distance = 8 * 1024 * 1024;
#else
/* min bytes to prefetch per stream (default 4MB) */ /* min bytes to prefetch per stream (default 4MB) */
static unsigned int zfetch_min_distance = 4 * 1024 * 1024; static unsigned int zfetch_min_distance = 4 * 1024 * 1024;
/* max bytes to prefetch per stream (default 64MB) */ /* max bytes to prefetch per stream (default 64MB) */
unsigned int zfetch_max_distance = 64 * 1024 * 1024; unsigned int zfetch_max_distance = 64 * 1024 * 1024;
#endif
/* max bytes to prefetch indirects for per stream (default 64MB) */ /* max bytes to prefetch indirects for per stream (default 64MB) */
unsigned int zfetch_max_idistance = 64 * 1024 * 1024; unsigned int zfetch_max_idistance = 64 * 1024 * 1024;
/* max number of bytes in an array_read in which we allow prefetching (1MB) */
uint64_t zfetch_array_rd_sz = 1024 * 1024;
typedef struct zfetch_stats { typedef struct zfetch_stats {
kstat_named_t zfetchstat_hits; kstat_named_t zfetchstat_hits;
@ -580,6 +585,3 @@ ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_idistance, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_idistance, UINT, ZMOD_RW,
"Max bytes to prefetch indirects for per stream"); "Max bytes to prefetch indirects for per stream");
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, array_rd_sz, U64, ZMOD_RW,
"Number of bytes in a array_read");

View File

@ -1882,7 +1882,7 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
if (ibs == dn->dn_indblkshift) if (ibs == dn->dn_indblkshift)
ibs = 0; ibs = 0;
if (size >> SPA_MINBLOCKSHIFT == dn->dn_datablkszsec && ibs == 0) if (size == dn->dn_datablksz && ibs == 0)
return (0); return (0);
rw_enter(&dn->dn_struct_rwlock, RW_WRITER); rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
@ -1905,24 +1905,25 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
if (ibs && dn->dn_nlevels != 1) if (ibs && dn->dn_nlevels != 1)
goto fail; goto fail;
/* resize the old block */
err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
if (err == 0) {
dbuf_new_size(db, size, tx);
} else if (err != ENOENT) {
goto fail;
}
dnode_setdblksz(dn, size);
dnode_setdirty(dn, tx); dnode_setdirty(dn, tx);
dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = size; if (size != dn->dn_datablksz) {
/* resize the old block */
err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
if (err == 0) {
dbuf_new_size(db, size, tx);
} else if (err != ENOENT) {
goto fail;
}
dnode_setdblksz(dn, size);
dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = size;
if (db)
dbuf_rele(db, FTAG);
}
if (ibs) { if (ibs) {
dn->dn_indblkshift = ibs; dn->dn_indblkshift = ibs;
dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs; dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs;
} }
/* release after we have fixed the blocksize in the dnode */
if (db)
dbuf_rele(db, FTAG);
rw_exit(&dn->dn_struct_rwlock); rw_exit(&dn->dn_struct_rwlock);
return (0); return (0);

View File

@ -892,9 +892,9 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
for (zap_cursor_init(&zc, dl->dl_os, obj); for (zap_cursor_init(&zc, dl->dl_os, obj);
(error = zap_cursor_retrieve(&zc, za)) == 0; (error = zap_cursor_retrieve(&zc, za)) == 0;
zap_cursor_advance(&zc)) { zap_cursor_advance(&zc)) {
uint64_t mintxg = zfs_strtonum(za->za_name, NULL); dsl_deadlist_insert_bpobj(dl, za->za_first_integer,
dsl_deadlist_insert_bpobj(dl, za->za_first_integer, mintxg, tx); zfs_strtonum(za->za_name, NULL), tx);
VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx)); VERIFY0(zap_remove(dl->dl_os, obj, za->za_name, tx));
if (perror == 0) { if (perror == 0) {
dsl_deadlist_prefetch_bpobj(dl, pza->za_first_integer, dsl_deadlist_prefetch_bpobj(dl, pza->za_first_integer,
zfs_strtonum(pza->za_name, NULL)); zfs_strtonum(pza->za_name, NULL));

View File

@ -2015,6 +2015,11 @@ dsl_scan_prefetch_cb(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
zb->zb_objset, DMU_META_DNODE_OBJECT); zb->zb_objset, DMU_META_DNODE_OBJECT);
if (OBJSET_BUF_HAS_USERUSED(buf)) { if (OBJSET_BUF_HAS_USERUSED(buf)) {
if (OBJSET_BUF_HAS_PROJECTUSED(buf)) {
dsl_scan_prefetch_dnode(scn,
&osp->os_projectused_dnode, zb->zb_objset,
DMU_PROJECTUSED_OBJECT);
}
dsl_scan_prefetch_dnode(scn, dsl_scan_prefetch_dnode(scn,
&osp->os_groupused_dnode, zb->zb_objset, &osp->os_groupused_dnode, zb->zb_objset,
DMU_GROUPUSED_OBJECT); DMU_GROUPUSED_OBJECT);
@ -2075,10 +2080,16 @@ dsl_scan_prefetch_thread(void *arg)
zio_flags |= ZIO_FLAG_RAW; zio_flags |= ZIO_FLAG_RAW;
} }
/* We don't need data L1 buffer since we do not prefetch L0. */
blkptr_t *bp = &spic->spic_bp;
if (BP_GET_LEVEL(bp) == 1 && BP_GET_TYPE(bp) != DMU_OT_DNODE &&
BP_GET_TYPE(bp) != DMU_OT_OBJSET)
flags |= ARC_FLAG_NO_BUF;
/* issue the prefetch asynchronously */ /* issue the prefetch asynchronously */
(void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, (void) arc_read(scn->scn_zio_root, spa, bp,
&spic->spic_bp, dsl_scan_prefetch_cb, spic->spic_spc, dsl_scan_prefetch_cb, spic->spic_spc, ZIO_PRIORITY_SCRUB,
ZIO_PRIORITY_SCRUB, zio_flags, &flags, &spic->spic_zb); zio_flags, &flags, &spic->spic_zb);
kmem_free(spic, sizeof (scan_prefetch_issue_ctx_t)); kmem_free(spic, sizeof (scan_prefetch_issue_ctx_t));
} }

View File

@ -58,6 +58,11 @@ static uint64_t metaslab_aliquot = 1024 * 1024;
*/ */
uint64_t metaslab_force_ganging = SPA_MAXBLOCKSIZE + 1; uint64_t metaslab_force_ganging = SPA_MAXBLOCKSIZE + 1;
/*
* Of blocks of size >= metaslab_force_ganging, actually gang them this often.
*/
uint_t metaslab_force_ganging_pct = 3;
/* /*
* In pools where the log space map feature is not enabled we touch * In pools where the log space map feature is not enabled we touch
* multiple metaslabs (and their respective space maps) with each * multiple metaslabs (and their respective space maps) with each
@ -5109,7 +5114,9 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
* damage can result in extremely long reconstruction times. This * damage can result in extremely long reconstruction times. This
* will also test spilling from special to normal. * will also test spilling from special to normal.
*/ */
if (psize >= metaslab_force_ganging && (random_in_range(100) < 3)) { if (psize >= metaslab_force_ganging &&
metaslab_force_ganging_pct > 0 &&
(random_in_range(100) < MIN(metaslab_force_ganging_pct, 100))) {
metaslab_trace_add(zal, NULL, NULL, psize, d, TRACE_FORCE_GANG, metaslab_trace_add(zal, NULL, NULL, psize, d, TRACE_FORCE_GANG,
allocator); allocator);
return (SET_ERROR(ENOSPC)); return (SET_ERROR(ENOSPC));
@ -6266,7 +6273,10 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, switch_threshold, INT, ZMOD_RW,
"Segment-based metaslab selection maximum buckets before switching"); "Segment-based metaslab selection maximum buckets before switching");
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging, U64, ZMOD_RW, ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging, U64, ZMOD_RW,
"Blocks larger than this size are forced to be gang blocks"); "Blocks larger than this size are sometimes forced to be gang blocks");
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging_pct, UINT, ZMOD_RW,
"Percentage of large blocks that will be forced to be gang blocks");
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW,
"Max distance (bytes) to search forward before using size tree"); "Max distance (bytes) to search forward before using size tree");

View File

@ -772,6 +772,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
spa->spa_min_ashift = INT_MAX; spa->spa_min_ashift = INT_MAX;
spa->spa_max_ashift = 0; spa->spa_max_ashift = 0;
spa->spa_min_alloc = INT_MAX; spa->spa_min_alloc = INT_MAX;
spa->spa_gcd_alloc = INT_MAX;
/* Reset cached value */ /* Reset cached value */
spa->spa_dedup_dspace = ~0ULL; spa->spa_dedup_dspace = ~0ULL;

View File

@ -889,9 +889,15 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
&vd->vdev_not_present); &vd->vdev_not_present);
/* /*
* Get the alignment requirement. * Get the alignment requirement. Ignore pool ashift for vdev
* attach case.
*/ */
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift); if (alloctype != VDEV_ALLOC_ATTACH) {
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT,
&vd->vdev_ashift);
} else {
vd->vdev_attaching = B_TRUE;
}
/* /*
* Retrieve the vdev creation time. * Retrieve the vdev creation time.
@ -1393,6 +1399,36 @@ vdev_remove_parent(vdev_t *cvd)
vdev_free(mvd); vdev_free(mvd);
} }
/*
* Choose GCD for spa_gcd_alloc.
*/
static uint64_t
vdev_gcd(uint64_t a, uint64_t b)
{
while (b != 0) {
uint64_t t = b;
b = a % b;
a = t;
}
return (a);
}
/*
* Set spa_min_alloc and spa_gcd_alloc.
*/
static void
vdev_spa_set_alloc(spa_t *spa, uint64_t min_alloc)
{
if (min_alloc < spa->spa_min_alloc)
spa->spa_min_alloc = min_alloc;
if (spa->spa_gcd_alloc == INT_MAX) {
spa->spa_gcd_alloc = min_alloc;
} else {
spa->spa_gcd_alloc = vdev_gcd(min_alloc,
spa->spa_gcd_alloc);
}
}
void void
vdev_metaslab_group_create(vdev_t *vd) vdev_metaslab_group_create(vdev_t *vd)
{ {
@ -1445,8 +1481,7 @@ vdev_metaslab_group_create(vdev_t *vd)
spa->spa_min_ashift = vd->vdev_ashift; spa->spa_min_ashift = vd->vdev_ashift;
uint64_t min_alloc = vdev_get_min_alloc(vd); uint64_t min_alloc = vdev_get_min_alloc(vd);
if (min_alloc < spa->spa_min_alloc) vdev_spa_set_alloc(spa, min_alloc);
spa->spa_min_alloc = min_alloc;
} }
} }
} }
@ -2144,9 +2179,9 @@ vdev_open(vdev_t *vd)
return (SET_ERROR(EDOM)); return (SET_ERROR(EDOM));
} }
if (vd->vdev_top == vd) { if (vd->vdev_top == vd && vd->vdev_attaching == B_FALSE)
vdev_ashift_optimize(vd); vdev_ashift_optimize(vd);
} vd->vdev_attaching = B_FALSE;
} }
if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN || if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN ||
vd->vdev_ashift > ASHIFT_MAX)) { vd->vdev_ashift > ASHIFT_MAX)) {
@ -2207,8 +2242,7 @@ vdev_open(vdev_t *vd)
if (vd->vdev_top == vd && vd->vdev_ashift != 0 && if (vd->vdev_top == vd && vd->vdev_ashift != 0 &&
vd->vdev_islog == 0 && vd->vdev_aux == NULL) { vd->vdev_islog == 0 && vd->vdev_aux == NULL) {
uint64_t min_alloc = vdev_get_min_alloc(vd); uint64_t min_alloc = vdev_get_min_alloc(vd);
if (min_alloc < spa->spa_min_alloc) vdev_spa_set_alloc(spa, min_alloc);
spa->spa_min_alloc = min_alloc;
} }
/* /*
@ -5688,6 +5722,7 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
objset_t *mos = spa->spa_meta_objset; objset_t *mos = spa->spa_meta_objset;
nvpair_t *elem = NULL; nvpair_t *elem = NULL;
uint64_t vdev_guid; uint64_t vdev_guid;
uint64_t objid;
nvlist_t *nvprops; nvlist_t *nvprops;
vdev_guid = fnvlist_lookup_uint64(nvp, ZPOOL_VDEV_PROPS_SET_VDEV); vdev_guid = fnvlist_lookup_uint64(nvp, ZPOOL_VDEV_PROPS_SET_VDEV);
@ -5698,31 +5733,28 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
if (vd == NULL) if (vd == NULL)
return; return;
/*
* Set vdev property values in the vdev props mos object.
*/
if (vd->vdev_root_zap != 0) {
objid = vd->vdev_root_zap;
} else if (vd->vdev_top_zap != 0) {
objid = vd->vdev_top_zap;
} else if (vd->vdev_leaf_zap != 0) {
objid = vd->vdev_leaf_zap;
} else {
panic("unexpected vdev type");
}
mutex_enter(&spa->spa_props_lock); mutex_enter(&spa->spa_props_lock);
while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) { while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) {
uint64_t intval, objid = 0; uint64_t intval;
const char *strval; const char *strval;
vdev_prop_t prop; vdev_prop_t prop;
const char *propname = nvpair_name(elem); const char *propname = nvpair_name(elem);
zprop_type_t proptype; zprop_type_t proptype;
/*
* Set vdev property values in the vdev props mos object.
*/
if (vd->vdev_root_zap != 0) {
objid = vd->vdev_root_zap;
} else if (vd->vdev_top_zap != 0) {
objid = vd->vdev_top_zap;
} else if (vd->vdev_leaf_zap != 0) {
objid = vd->vdev_leaf_zap;
} else {
/*
* XXX: implement vdev_props_set_check()
*/
panic("vdev not root/top/leaf");
}
switch (prop = vdev_name_to_prop(propname)) { switch (prop = vdev_name_to_prop(propname)) {
case VDEV_PROP_USERPROP: case VDEV_PROP_USERPROP:
if (vdev_prop_user(propname)) { if (vdev_prop_user(propname)) {
@ -5791,6 +5823,12 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
ASSERT(vd != NULL); ASSERT(vd != NULL);
/* Check that vdev has a zap we can use */
if (vd->vdev_root_zap == 0 &&
vd->vdev_top_zap == 0 &&
vd->vdev_leaf_zap == 0)
return (SET_ERROR(EINVAL));
if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV, if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
&vdev_guid) != 0) &vdev_guid) != 0)
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));

View File

@ -1398,7 +1398,7 @@ vdev_indirect_checksum_error(zio_t *zio,
vd->vdev_stat.vs_checksum_errors++; vd->vdev_stat.vs_checksum_errors++;
mutex_exit(&vd->vdev_stat_lock); mutex_exit(&vd->vdev_stat_lock);
zio_bad_cksum_t zbc = {{{ 0 }}}; zio_bad_cksum_t zbc = { 0 };
abd_t *bad_abd = ic->ic_data; abd_t *bad_abd = ic->ic_data;
abd_t *good_abd = is->is_good_child->ic_data; abd_t *good_abd = is->is_good_child->ic_data;
(void) zfs_ereport_post_checksum(zio->io_spa, vd, NULL, zio, (void) zfs_ereport_post_checksum(zio->io_spa, vd, NULL, zio,

View File

@ -1785,7 +1785,7 @@ vdev_raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
static int static int
raidz_checksum_verify(zio_t *zio) raidz_checksum_verify(zio_t *zio)
{ {
zio_bad_cksum_t zbc = {{{0}}}; zio_bad_cksum_t zbc = {0};
raidz_map_t *rm = zio->io_vsd; raidz_map_t *rm = zio->io_vsd;
int ret = zio_checksum_error(zio, &zbc); int ret = zio_checksum_error(zio, &zbc);

View File

@ -754,10 +754,6 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
#define MAX_RANGES 16 #define MAX_RANGES 16
typedef struct zfs_ecksum_info { typedef struct zfs_ecksum_info {
/* histograms of set and cleared bits by bit number in a 64-bit word */
uint8_t zei_histogram_set[sizeof (uint64_t) * NBBY];
uint8_t zei_histogram_cleared[sizeof (uint64_t) * NBBY];
/* inline arrays of bits set and cleared. */ /* inline arrays of bits set and cleared. */
uint64_t zei_bits_set[ZFM_MAX_INLINE]; uint64_t zei_bits_set[ZFM_MAX_INLINE];
uint64_t zei_bits_cleared[ZFM_MAX_INLINE]; uint64_t zei_bits_cleared[ZFM_MAX_INLINE];
@ -781,7 +777,7 @@ typedef struct zfs_ecksum_info {
} zfs_ecksum_info_t; } zfs_ecksum_info_t;
static void static void
update_histogram(uint64_t value_arg, uint8_t *hist, uint32_t *count) update_bad_bits(uint64_t value_arg, uint32_t *count)
{ {
size_t i; size_t i;
size_t bits = 0; size_t bits = 0;
@ -789,10 +785,8 @@ update_histogram(uint64_t value_arg, uint8_t *hist, uint32_t *count)
/* We store the bits in big-endian (largest-first) order */ /* We store the bits in big-endian (largest-first) order */
for (i = 0; i < 64; i++) { for (i = 0; i < 64; i++) {
if (value & (1ull << i)) { if (value & (1ull << i))
hist[63 - i]++;
++bits; ++bits;
}
} }
/* update the count of bits changed */ /* update the count of bits changed */
*count += bits; *count += bits;
@ -920,14 +914,6 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
if (info != NULL && info->zbc_has_cksum) { if (info != NULL && info->zbc_has_cksum) {
fm_payload_set(ereport, fm_payload_set(ereport,
FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED,
DATA_TYPE_UINT64_ARRAY,
sizeof (info->zbc_expected) / sizeof (uint64_t),
(uint64_t *)&info->zbc_expected,
FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL,
DATA_TYPE_UINT64_ARRAY,
sizeof (info->zbc_actual) / sizeof (uint64_t),
(uint64_t *)&info->zbc_actual,
FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO, FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO,
DATA_TYPE_STRING, DATA_TYPE_STRING,
info->zbc_checksum_name, info->zbc_checksum_name,
@ -1010,10 +996,8 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
offset++; offset++;
} }
update_histogram(set, eip->zei_histogram_set, update_bad_bits(set, &eip->zei_range_sets[range]);
&eip->zei_range_sets[range]); update_bad_bits(cleared, &eip->zei_range_clears[range]);
update_histogram(cleared, eip->zei_histogram_cleared,
&eip->zei_range_clears[range]);
} }
/* convert to byte offsets */ /* convert to byte offsets */
@ -1049,15 +1033,6 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
DATA_TYPE_UINT8_ARRAY, DATA_TYPE_UINT8_ARRAY,
inline_size, (uint8_t *)eip->zei_bits_cleared, inline_size, (uint8_t *)eip->zei_bits_cleared,
NULL); NULL);
} else {
fm_payload_set(ereport,
FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM,
DATA_TYPE_UINT8_ARRAY,
NBBY * sizeof (uint64_t), eip->zei_histogram_set,
FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM,
DATA_TYPE_UINT8_ARRAY,
NBBY * sizeof (uint64_t), eip->zei_histogram_cleared,
NULL);
} }
return (eip); return (eip);
} }

View File

@ -1596,6 +1596,19 @@ zio_shrink(zio_t *zio, uint64_t size)
} }
} }
/*
* Round provided allocation size up to a value that can be allocated
* by at least some vdev(s) in the pool with minimum or no additional
* padding and without extra space usage on others
*/
static uint64_t
zio_roundup_alloc_size(spa_t *spa, uint64_t size)
{
if (size > spa->spa_min_alloc)
return (roundup(size, spa->spa_gcd_alloc));
return (spa->spa_min_alloc);
}
/* /*
* ========================================================================== * ==========================================================================
* Prepare to read and write logical blocks * Prepare to read and write logical blocks
@ -1802,9 +1815,8 @@ zio_write_compress(zio_t *zio)
* in that we charge for the padding used to fill out * in that we charge for the padding used to fill out
* the last sector. * the last sector.
*/ */
ASSERT3U(spa->spa_min_alloc, >=, SPA_MINBLOCKSHIFT); size_t rounded = (size_t)zio_roundup_alloc_size(spa,
size_t rounded = (size_t)roundup(psize, psize);
spa->spa_min_alloc);
if (rounded >= lsize) { if (rounded >= lsize) {
compress = ZIO_COMPRESS_OFF; compress = ZIO_COMPRESS_OFF;
zio_buf_free(cbuf, lsize); zio_buf_free(cbuf, lsize);
@ -1847,8 +1859,8 @@ zio_write_compress(zio_t *zio)
* take this codepath because it will change the on-disk block * take this codepath because it will change the on-disk block
* and decryption will fail. * and decryption will fail.
*/ */
size_t rounded = MIN((size_t)roundup(psize, size_t rounded = MIN((size_t)zio_roundup_alloc_size(spa, psize),
spa->spa_min_alloc), lsize); lsize);
if (rounded != psize) { if (rounded != psize) {
abd_t *cdata = abd_alloc_linear(rounded, B_TRUE); abd_t *cdata = abd_alloc_linear(rounded, B_TRUE);

View File

@ -515,8 +515,6 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
} }
if (info != NULL) { if (info != NULL) {
info->zbc_expected = expected_cksum;
info->zbc_actual = actual_cksum;
info->zbc_checksum_name = ci->ci_name; info->zbc_checksum_name = ci->ci_name;
info->zbc_byteswapped = byteswap; info->zbc_byteswapped = byteswap;
info->zbc_injected = 0; info->zbc_injected = 0;

View File

@ -35,7 +35,7 @@
# #
# STRATEGY: # STRATEGY:
# 1. Create various pools with different ashift values. # 1. Create various pools with different ashift values.
# 2. Verify 'attach -o ashift=<n>' works only with allowed values. # 2. Verify 'attach' works.
# #
verify_runnable "global" verify_runnable "global"
@ -66,26 +66,14 @@ log_must set_tunable32 VDEV_FILE_PHYSICAL_ASHIFT 16
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16") typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
for ashift in ${ashifts[@]} for ashift in ${ashifts[@]}
do do
for cmdval in ${ashifts[@]} log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1
do log_must verify_ashift $disk1 $ashift
log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1 log_must zpool attach $TESTPOOL1 $disk1 $disk2
log_must verify_ashift $disk1 $ashift log_must verify_ashift $disk2 $ashift
# clean things for the next run
# ashift_of(attached_disk) <= ashift_of(existing_vdev) log_must zpool destroy $TESTPOOL1
if [[ $cmdval -le $ashift ]] log_must zpool labelclear $disk1
then log_must zpool labelclear $disk2
log_must zpool attach -o ashift=$cmdval $TESTPOOL1 \
$disk1 $disk2
log_must verify_ashift $disk2 $ashift
else
log_mustnot zpool attach -o ashift=$cmdval $TESTPOOL1 \
$disk1 $disk2
fi
# clean things for the next run
log_must zpool destroy $TESTPOOL1
log_must zpool labelclear $disk1
log_must zpool labelclear $disk2
done
done done
typeset badvals=("off" "on" "1" "8" "17" "1b" "ff" "-") typeset badvals=("off" "on" "1" "8" "17" "1b" "ff" "-")

View File

@ -35,7 +35,7 @@
# #
# STRATEGY: # STRATEGY:
# 1. Create various pools with different ashift values. # 1. Create various pools with different ashift values.
# 2. Verify 'replace -o ashift=<n>' works only with allowed values. # 2. Verify 'replace' works.
# #
verify_runnable "global" verify_runnable "global"
@ -66,26 +66,16 @@ log_must set_tunable32 VDEV_FILE_PHYSICAL_ASHIFT 16
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16") typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
for ashift in ${ashifts[@]} for ashift in ${ashifts[@]}
do do
for cmdval in ${ashifts[@]} log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1
do log_must verify_ashift $disk1 $ashift
log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1 # ashift_of(replacing_disk) <= ashift_of(existing_vdev)
log_must verify_ashift $disk1 $ashift log_must zpool replace $TESTPOOL1 $disk1 $disk2
# ashift_of(replacing_disk) <= ashift_of(existing_vdev) log_must verify_ashift $disk2 $ashift
if [[ $cmdval -le $ashift ]] wait_replacing $TESTPOOL1
then # clean things for the next run
log_must zpool replace -o ashift=$cmdval $TESTPOOL1 \ log_must zpool destroy $TESTPOOL1
$disk1 $disk2 log_must zpool labelclear $disk1
log_must verify_ashift $disk2 $ashift log_must zpool labelclear $disk2
wait_replacing $TESTPOOL1
else
log_mustnot zpool replace -o ashift=$cmdval $TESTPOOL1 \
$disk1 $disk2
fi
# clean things for the next run
log_must zpool destroy $TESTPOOL1
log_must zpool labelclear $disk1
log_must zpool labelclear $disk2
done
done done
typeset badvals=("off" "on" "1" "8" "17" "1b" "ff" "-") typeset badvals=("off" "on" "1" "8" "17" "1b" "ff" "-")

View File

@ -34,10 +34,8 @@
# #
# STRATEGY: # STRATEGY:
# 1. Create a pool with default values. # 1. Create a pool with default values.
# 2. Verify 'zpool replace' uses the ashift pool property value when # 2. Override the pool ashift property.
# replacing an existing device. # 3. Verify 'zpool replace' works.
# 3. Verify the default ashift value can still be overridden by manually
# specifying '-o ashift=<n>' from the command line.
# #
verify_runnable "global" verify_runnable "global"
@ -72,21 +70,9 @@ do
do do
log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1 log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1
log_must zpool set ashift=$pprop $TESTPOOL1 log_must zpool set ashift=$pprop $TESTPOOL1
# ashift_of(replacing_disk) <= ashift_of(existing_vdev) log_must zpool replace $TESTPOOL1 $disk1 $disk2
if [[ $pprop -le $ashift ]] wait_replacing $TESTPOOL1
then log_must verify_ashift $disk2 $ashift
log_must zpool replace $TESTPOOL1 $disk1 $disk2
wait_replacing $TESTPOOL1
log_must verify_ashift $disk2 $ashift
else
# cannot replace if pool prop ashift > vdev ashift
log_mustnot zpool replace $TESTPOOL1 $disk1 $disk2
# verify we can override the pool prop value manually
log_must zpool replace -o ashift=$ashift $TESTPOOL1 \
$disk1 $disk2
wait_replacing $TESTPOOL1
log_must verify_ashift $disk2 $ashift
fi
# clean things for the next run # clean things for the next run
log_must zpool destroy $TESTPOOL1 log_must zpool destroy $TESTPOOL1
log_must zpool labelclear $disk1 log_must zpool labelclear $disk1