Compare commits
24 Commits
master
...
zfs-2.2.0-
Author | SHA1 | Date |
---|---|---|
Brian Behlendorf | 70232483b4 | |
Rob N | c5273e0c31 | |
Rob N | 685ae4429f | |
Alexander Motin | 81be809a25 | |
Alexander Motin | 8a6fde8213 | |
Alan Somers | b6f618f8ff | |
Alan Somers | 51a2b59767 | |
Tony Hutter | 8c81c0b05d | |
Chunwei Chen | b221f43943 | |
Ameer Hamza | e037327bfe | |
Yuri Pankov | 1a2e486d25 | |
Ameer Hamza | d8011707cc | |
Wojciech Małota-Wójcik | f5f5a2db95 | |
Alexander Motin | 83b0967c1f | |
Coleman Kane | 73ba5df31a | |
Coleman Kane | 1bc244ae93 | |
Coleman Kane | 931dc70550 | |
Yuri Pankov | 5299f4f289 | |
Alan Somers | f917cf1c03 | |
Alexander Motin | 56ed389a57 | |
Alexander Motin | e613e4bbe3 | |
Alexander Motin | b4e630b00c | |
Mateusz Guzik | bf6cd30796 | |
Alexander Motin | 1266cebf87 |
2
META
2
META
|
@ -2,7 +2,7 @@ Meta: 1
|
||||||
Name: zfs
|
Name: zfs
|
||||||
Branch: 1.0
|
Branch: 1.0
|
||||||
Version: 2.2.0
|
Version: 2.2.0
|
||||||
Release: rc1
|
Release: rc2
|
||||||
Release-Tags: relext
|
Release-Tags: relext
|
||||||
License: CDDL
|
License: CDDL
|
||||||
Author: OpenZFS
|
Author: OpenZFS
|
||||||
|
|
|
@ -416,6 +416,11 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
|
||||||
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0)
|
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (vdev_guid == 0) {
|
||||||
|
fmd_hdl_debug(hdl, "Got a zero GUID");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (spare) {
|
if (spare) {
|
||||||
int nspares = find_and_remove_spares(zhdl, vdev_guid);
|
int nspares = find_and_remove_spares(zhdl, vdev_guid);
|
||||||
fmd_hdl_debug(hdl, "%d spares removed", nspares);
|
fmd_hdl_debug(hdl, "%d spares removed", nspares);
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
# Not following: a was not specified as input (see shellcheck -x). [SC1091]
|
# Not following: a was not specified as input (see shellcheck -x). [SC1091]
|
||||||
# Prefer putting braces around variable references even when not strictly required. [SC2250]
|
# Prefer putting braces around variable references even when not strictly required. [SC2250]
|
||||||
# Consider invoking this command separately to avoid masking its return value (or use '|| true' to ignore). [SC2312]
|
# Consider invoking this command separately to avoid masking its return value (or use '|| true' to ignore). [SC2312]
|
||||||
|
# Command appears to be unreachable. Check usage (or ignore if invoked indirectly). [SC2317]
|
||||||
# In POSIX sh, 'local' is undefined. [SC2039] # older ShellCheck versions
|
# In POSIX sh, 'local' is undefined. [SC2039] # older ShellCheck versions
|
||||||
# In POSIX sh, 'local' is undefined. [SC3043] # newer ShellCheck versions
|
# In POSIX sh, 'local' is undefined. [SC3043] # newer ShellCheck versions
|
||||||
|
|
||||||
|
@ -18,7 +19,7 @@ PHONY += shellcheck
|
||||||
_STGT = $(subst ^,/,$(subst shellcheck-here-,,$@))
|
_STGT = $(subst ^,/,$(subst shellcheck-here-,,$@))
|
||||||
shellcheck-here-%:
|
shellcheck-here-%:
|
||||||
if HAVE_SHELLCHECK
|
if HAVE_SHELLCHECK
|
||||||
shellcheck --format=gcc --enable=all --exclude=SC1090,SC1091,SC2039,SC2250,SC2312,SC3043 $$([ -n "$(SHELLCHECK_SHELL)" ] && echo "--shell=$(SHELLCHECK_SHELL)") "$$([ -e "$(_STGT)" ] || echo "$(srcdir)/")$(_STGT)"
|
shellcheck --format=gcc --enable=all --exclude=SC1090,SC1091,SC2039,SC2250,SC2312,SC2317,SC3043 $$([ -n "$(SHELLCHECK_SHELL)" ] && echo "--shell=$(SHELLCHECK_SHELL)") "$$([ -e "$(_STGT)" ] || echo "$(srcdir)/")$(_STGT)"
|
||||||
else
|
else
|
||||||
@echo "skipping shellcheck of" $(_STGT) "because shellcheck is not installed"
|
@echo "skipping shellcheck of" $(_STGT) "because shellcheck is not installed"
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -103,6 +103,33 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE], [
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
|
|
||||||
|
dnl #
|
||||||
|
dnl # 6.5.x API change
|
||||||
|
dnl # disk_check_media_change() was added
|
||||||
|
dnl #
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE], [
|
||||||
|
ZFS_LINUX_TEST_SRC([disk_check_media_change], [
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
], [
|
||||||
|
struct block_device *bdev = NULL;
|
||||||
|
bool error;
|
||||||
|
|
||||||
|
error = disk_check_media_change(bdev->bd_disk);
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE], [
|
||||||
|
AC_MSG_CHECKING([whether disk_check_media_change() exists])
|
||||||
|
ZFS_LINUX_TEST_RESULT([disk_check_media_change], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_DISK_CHECK_MEDIA_CHANGE, 1,
|
||||||
|
[disk_check_media_change() exists])
|
||||||
|
], [
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
dnl #
|
dnl #
|
||||||
dnl # bdev_kobj() is introduced from 5.12
|
dnl # bdev_kobj() is introduced from 5.12
|
||||||
dnl #
|
dnl #
|
||||||
|
@ -443,6 +470,29 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS], [
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
|
|
||||||
|
dnl #
|
||||||
|
dnl # 6.5.x API change
|
||||||
|
dnl # BLK_STS_NEXUS replaced with BLK_STS_RESV_CONFLICT
|
||||||
|
dnl #
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT], [
|
||||||
|
ZFS_LINUX_TEST_SRC([blk_sts_resv_conflict], [
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
],[
|
||||||
|
blk_status_t s __attribute__ ((unused)) = BLK_STS_RESV_CONFLICT;
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT], [
|
||||||
|
AC_MSG_CHECKING([whether BLK_STS_RESV_CONFLICT is defined])
|
||||||
|
ZFS_LINUX_TEST_RESULT([blk_sts_resv_conflict], [
|
||||||
|
AC_DEFINE(HAVE_BLK_STS_RESV_CONFLICT, 1, [BLK_STS_RESV_CONFLICT is defined])
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
], [
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
|
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH
|
ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_PUT
|
ZFS_AC_KERNEL_SRC_BLKDEV_PUT
|
||||||
|
@ -458,6 +508,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE
|
ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ
|
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV
|
ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV
|
||||||
|
ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE
|
||||||
|
ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT
|
||||||
])
|
])
|
||||||
|
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
|
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
|
||||||
|
@ -476,4 +528,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
|
||||||
ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE
|
ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE
|
||||||
ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ
|
ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ
|
||||||
ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV
|
ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV
|
||||||
|
ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE
|
||||||
|
ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT
|
||||||
])
|
])
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
Description=Rollback bootfs just before it is mounted
|
Description=Rollback bootfs just before it is mounted
|
||||||
Requisite=zfs-import.target
|
Requisite=zfs-import.target
|
||||||
After=zfs-import.target dracut-pre-mount.service zfs-snapshot-bootfs.service
|
After=zfs-import.target dracut-pre-mount.service zfs-snapshot-bootfs.service
|
||||||
Before=dracut-mount.service
|
Before=dracut-mount.service sysroot.mount
|
||||||
DefaultDependencies=no
|
DefaultDependencies=no
|
||||||
ConditionKernelCommandLine=bootfs.rollback
|
ConditionKernelCommandLine=bootfs.rollback
|
||||||
ConditionEnvironment=BOOTFS
|
ConditionEnvironment=BOOTFS
|
||||||
|
|
|
@ -36,7 +36,11 @@ struct xucred;
|
||||||
typedef struct flock flock64_t;
|
typedef struct flock flock64_t;
|
||||||
typedef struct vnode vnode_t;
|
typedef struct vnode vnode_t;
|
||||||
typedef struct vattr vattr_t;
|
typedef struct vattr vattr_t;
|
||||||
|
#if __FreeBSD_version < 1400093
|
||||||
typedef enum vtype vtype_t;
|
typedef enum vtype vtype_t;
|
||||||
|
#else
|
||||||
|
#define vtype_t __enum_uint8(vtype)
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/queue.h>
|
#include <sys/queue.h>
|
||||||
|
|
|
@ -181,7 +181,11 @@ bi_status_to_errno(blk_status_t status)
|
||||||
return (ENOLINK);
|
return (ENOLINK);
|
||||||
case BLK_STS_TARGET:
|
case BLK_STS_TARGET:
|
||||||
return (EREMOTEIO);
|
return (EREMOTEIO);
|
||||||
|
#ifdef HAVE_BLK_STS_RESV_CONFLICT
|
||||||
|
case BLK_STS_RESV_CONFLICT:
|
||||||
|
#else
|
||||||
case BLK_STS_NEXUS:
|
case BLK_STS_NEXUS:
|
||||||
|
#endif
|
||||||
return (EBADE);
|
return (EBADE);
|
||||||
case BLK_STS_MEDIUM:
|
case BLK_STS_MEDIUM:
|
||||||
return (ENODATA);
|
return (ENODATA);
|
||||||
|
@ -215,7 +219,11 @@ errno_to_bi_status(int error)
|
||||||
case EREMOTEIO:
|
case EREMOTEIO:
|
||||||
return (BLK_STS_TARGET);
|
return (BLK_STS_TARGET);
|
||||||
case EBADE:
|
case EBADE:
|
||||||
|
#ifdef HAVE_BLK_STS_RESV_CONFLICT
|
||||||
|
return (BLK_STS_RESV_CONFLICT);
|
||||||
|
#else
|
||||||
return (BLK_STS_NEXUS);
|
return (BLK_STS_NEXUS);
|
||||||
|
#endif
|
||||||
case ENODATA:
|
case ENODATA:
|
||||||
return (BLK_STS_MEDIUM);
|
return (BLK_STS_MEDIUM);
|
||||||
case EILSEQ:
|
case EILSEQ:
|
||||||
|
@ -337,6 +345,8 @@ zfs_check_media_change(struct block_device *bdev)
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
#define vdev_bdev_reread_part(bdev) zfs_check_media_change(bdev)
|
#define vdev_bdev_reread_part(bdev) zfs_check_media_change(bdev)
|
||||||
|
#elif defined(HAVE_DISK_CHECK_MEDIA_CHANGE)
|
||||||
|
#define vdev_bdev_reread_part(bdev) disk_check_media_change(bdev->bd_disk)
|
||||||
#else
|
#else
|
||||||
/*
|
/*
|
||||||
* This is encountered if check_disk_change() and bdev_check_media_change()
|
* This is encountered if check_disk_change() and bdev_check_media_change()
|
||||||
|
|
|
@ -38,7 +38,7 @@ typedef unsigned long ulong_t;
|
||||||
typedef unsigned long long u_longlong_t;
|
typedef unsigned long long u_longlong_t;
|
||||||
typedef long long longlong_t;
|
typedef long long longlong_t;
|
||||||
|
|
||||||
typedef unsigned long intptr_t;
|
typedef long intptr_t;
|
||||||
typedef unsigned long long rlim64_t;
|
typedef unsigned long long rlim64_t;
|
||||||
|
|
||||||
typedef struct task_struct kthread_t;
|
typedef struct task_struct kthread_t;
|
||||||
|
|
|
@ -60,7 +60,7 @@ typedef struct bpobj {
|
||||||
kmutex_t bpo_lock;
|
kmutex_t bpo_lock;
|
||||||
objset_t *bpo_os;
|
objset_t *bpo_os;
|
||||||
uint64_t bpo_object;
|
uint64_t bpo_object;
|
||||||
int bpo_epb;
|
uint32_t bpo_epb;
|
||||||
uint8_t bpo_havecomp;
|
uint8_t bpo_havecomp;
|
||||||
uint8_t bpo_havesubobj;
|
uint8_t bpo_havesubobj;
|
||||||
uint8_t bpo_havefreed;
|
uint8_t bpo_havefreed;
|
||||||
|
|
|
@ -36,8 +36,6 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern uint64_t zfetch_array_rd_sz;
|
|
||||||
|
|
||||||
struct dnode; /* so we can reference dnode */
|
struct dnode; /* so we can reference dnode */
|
||||||
|
|
||||||
typedef struct zfetch {
|
typedef struct zfetch {
|
||||||
|
|
|
@ -102,8 +102,6 @@ extern "C" {
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP "zio_timestamp"
|
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP "zio_timestamp"
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA "zio_delta"
|
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA "zio_delta"
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
|
#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED "cksum_expected"
|
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL "cksum_actual"
|
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO "cksum_algorithm"
|
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO "cksum_algorithm"
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP "cksum_byteswap"
|
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP "cksum_byteswap"
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES "bad_ranges"
|
#define FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES "bad_ranges"
|
||||||
|
@ -112,8 +110,6 @@ extern "C" {
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS "bad_range_clears"
|
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS "bad_range_clears"
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS "bad_set_bits"
|
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS "bad_set_bits"
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS "bad_cleared_bits"
|
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS "bad_cleared_bits"
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM "bad_set_histogram"
|
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM "bad_cleared_histogram"
|
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_SNAPSHOT_NAME "snapshot_name"
|
#define FM_EREPORT_PAYLOAD_ZFS_SNAPSHOT_NAME "snapshot_name"
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_DEVICE_NAME "device_name"
|
#define FM_EREPORT_PAYLOAD_ZFS_DEVICE_NAME "device_name"
|
||||||
#define FM_EREPORT_PAYLOAD_ZFS_RAW_DEVICE_NAME "raw_name"
|
#define FM_EREPORT_PAYLOAD_ZFS_RAW_DEVICE_NAME "raw_name"
|
||||||
|
|
|
@ -723,16 +723,10 @@ typedef enum spa_mode {
|
||||||
* Send TRIM commands in-line during normal pool operation while deleting.
|
* Send TRIM commands in-line during normal pool operation while deleting.
|
||||||
* OFF: no
|
* OFF: no
|
||||||
* ON: yes
|
* ON: yes
|
||||||
* NB: IN_FREEBSD_BASE is defined within the FreeBSD sources.
|
|
||||||
*/
|
*/
|
||||||
typedef enum {
|
typedef enum {
|
||||||
SPA_AUTOTRIM_OFF = 0, /* default */
|
SPA_AUTOTRIM_OFF = 0, /* default */
|
||||||
SPA_AUTOTRIM_ON,
|
SPA_AUTOTRIM_ON,
|
||||||
#ifdef IN_FREEBSD_BASE
|
|
||||||
SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_ON,
|
|
||||||
#else
|
|
||||||
SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_OFF,
|
|
||||||
#endif
|
|
||||||
} spa_autotrim_t;
|
} spa_autotrim_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -250,6 +250,7 @@ struct spa {
|
||||||
uint64_t spa_min_ashift; /* of vdevs in normal class */
|
uint64_t spa_min_ashift; /* of vdevs in normal class */
|
||||||
uint64_t spa_max_ashift; /* of vdevs in normal class */
|
uint64_t spa_max_ashift; /* of vdevs in normal class */
|
||||||
uint64_t spa_min_alloc; /* of vdevs in normal class */
|
uint64_t spa_min_alloc; /* of vdevs in normal class */
|
||||||
|
uint64_t spa_gcd_alloc; /* of vdevs in normal class */
|
||||||
uint64_t spa_config_guid; /* config pool guid */
|
uint64_t spa_config_guid; /* config pool guid */
|
||||||
uint64_t spa_load_guid; /* spa_load initialized guid */
|
uint64_t spa_load_guid; /* spa_load initialized guid */
|
||||||
uint64_t spa_last_synced_guid; /* last synced guid */
|
uint64_t spa_last_synced_guid; /* last synced guid */
|
||||||
|
|
|
@ -420,6 +420,7 @@ struct vdev {
|
||||||
boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */
|
boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */
|
||||||
boolean_t vdev_resilver_deferred; /* resilver deferred */
|
boolean_t vdev_resilver_deferred; /* resilver deferred */
|
||||||
boolean_t vdev_kobj_flag; /* kobj event record */
|
boolean_t vdev_kobj_flag; /* kobj event record */
|
||||||
|
boolean_t vdev_attaching; /* vdev attach ashift handling */
|
||||||
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
|
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
|
||||||
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */
|
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */
|
||||||
zio_t *vdev_probe_zio; /* root of current probe */
|
zio_t *vdev_probe_zio; /* root of current probe */
|
||||||
|
|
|
@ -94,8 +94,6 @@ typedef const struct zio_checksum_info {
|
||||||
} zio_checksum_info_t;
|
} zio_checksum_info_t;
|
||||||
|
|
||||||
typedef struct zio_bad_cksum {
|
typedef struct zio_bad_cksum {
|
||||||
zio_cksum_t zbc_expected;
|
|
||||||
zio_cksum_t zbc_actual;
|
|
||||||
const char *zbc_checksum_name;
|
const char *zbc_checksum_name;
|
||||||
uint8_t zbc_byteswapped;
|
uint8_t zbc_byteswapped;
|
||||||
uint8_t zbc_injected;
|
uint8_t zbc_injected;
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
.\" own identifying information:
|
.\" own identifying information:
|
||||||
.\" Portions Copyright [yyyy] [name of copyright owner]
|
.\" Portions Copyright [yyyy] [name of copyright owner]
|
||||||
.\"
|
.\"
|
||||||
.Dd January 10, 2023
|
.Dd July 21, 2023
|
||||||
.Dt ZFS 4
|
.Dt ZFS 4
|
||||||
.Os
|
.Os
|
||||||
.
|
.
|
||||||
|
@ -239,6 +239,11 @@ relative to the pool.
|
||||||
Make some blocks above a certain size be gang blocks.
|
Make some blocks above a certain size be gang blocks.
|
||||||
This option is used by the test suite to facilitate testing.
|
This option is used by the test suite to facilitate testing.
|
||||||
.
|
.
|
||||||
|
.It Sy metaslab_force_ganging_pct Ns = Ns Sy 3 Ns % Pq uint
|
||||||
|
For blocks that could be forced to be a gang block (due to
|
||||||
|
.Sy metaslab_force_ganging ) ,
|
||||||
|
force this many of them to be gang blocks.
|
||||||
|
.
|
||||||
.It Sy zfs_ddt_zap_default_bs Ns = Ns Sy 15 Po 32 KiB Pc Pq int
|
.It Sy zfs_ddt_zap_default_bs Ns = Ns Sy 15 Po 32 KiB Pc Pq int
|
||||||
Default DDT ZAP data block size as a power of 2. Note that changing this after
|
Default DDT ZAP data block size as a power of 2. Note that changing this after
|
||||||
creating a DDT on the pool will not affect existing DDTs, only newly created
|
creating a DDT on the pool will not affect existing DDTs, only newly created
|
||||||
|
@ -519,9 +524,6 @@ However, this is limited by
|
||||||
Maximum micro ZAP size.
|
Maximum micro ZAP size.
|
||||||
A micro ZAP is upgraded to a fat ZAP, once it grows beyond the specified size.
|
A micro ZAP is upgraded to a fat ZAP, once it grows beyond the specified size.
|
||||||
.
|
.
|
||||||
.It Sy zfetch_array_rd_sz Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq u64
|
|
||||||
If prefetching is enabled, disable prefetching for reads larger than this size.
|
|
||||||
.
|
|
||||||
.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint
|
.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint
|
||||||
Min bytes to prefetch per stream.
|
Min bytes to prefetch per stream.
|
||||||
Prefetch distance starts from the demand access size and quickly grows to
|
Prefetch distance starts from the demand access size and quickly grows to
|
||||||
|
|
|
@ -26,7 +26,7 @@
|
||||||
.\" Copyright 2017 Nexenta Systems, Inc.
|
.\" Copyright 2017 Nexenta Systems, Inc.
|
||||||
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||||
.\"
|
.\"
|
||||||
.Dd May 27, 2021
|
.Dd July 11, 2023
|
||||||
.Dt ZPOOL-EVENTS 8
|
.Dt ZPOOL-EVENTS 8
|
||||||
.Os
|
.Os
|
||||||
.
|
.
|
||||||
|
@ -305,10 +305,6 @@ The time when a given I/O request was submitted.
|
||||||
The time required to service a given I/O request.
|
The time required to service a given I/O request.
|
||||||
.It Sy prev_state
|
.It Sy prev_state
|
||||||
The previous state of the vdev.
|
The previous state of the vdev.
|
||||||
.It Sy cksum_expected
|
|
||||||
The expected checksum value for the block.
|
|
||||||
.It Sy cksum_actual
|
|
||||||
The actual checksum value for an errant block.
|
|
||||||
.It Sy cksum_algorithm
|
.It Sy cksum_algorithm
|
||||||
Checksum algorithm used.
|
Checksum algorithm used.
|
||||||
See
|
See
|
||||||
|
@ -362,23 +358,6 @@ Like
|
||||||
but contains
|
but contains
|
||||||
.Pq Ar good data No & ~( Ns Ar bad data ) ;
|
.Pq Ar good data No & ~( Ns Ar bad data ) ;
|
||||||
that is, the bits set in the good data which are cleared in the bad data.
|
that is, the bits set in the good data which are cleared in the bad data.
|
||||||
.It Sy bad_set_histogram
|
|
||||||
If this field exists, it is an array of counters.
|
|
||||||
Each entry counts bits set in a particular bit of a big-endian uint64 type.
|
|
||||||
The first entry counts bits
|
|
||||||
set in the high-order bit of the first byte, the 9th byte, etc, and the last
|
|
||||||
entry counts bits set of the low-order bit of the 8th byte, the 16th byte, etc.
|
|
||||||
This information is useful for observing a stuck bit in a parallel data path,
|
|
||||||
such as IDE or parallel SCSI.
|
|
||||||
.It Sy bad_cleared_histogram
|
|
||||||
If this field exists, it is an array of counters.
|
|
||||||
Each entry counts bit clears in a particular bit of a big-endian uint64 type.
|
|
||||||
The first entry counts bits
|
|
||||||
clears of the high-order bit of the first byte, the 9th byte, etc, and the
|
|
||||||
last entry counts clears of the low-order bit of the 8th byte, the 16th byte,
|
|
||||||
etc.
|
|
||||||
This information is useful for observing a stuck bit in a parallel data
|
|
||||||
path, such as IDE or parallel SCSI.
|
|
||||||
.El
|
.El
|
||||||
.
|
.
|
||||||
.Sh I/O STAGES
|
.Sh I/O STAGES
|
||||||
|
|
|
@ -6263,7 +6263,8 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
|
||||||
goto bad_write_fallback;
|
goto bad_write_fallback;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
#if __FreeBSD_version >= 1400086
|
#if (__FreeBSD_version >= 1302506 && __FreeBSD_version < 1400000) || \
|
||||||
|
__FreeBSD_version >= 1400086
|
||||||
vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false,
|
vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false,
|
||||||
LK_EXCLUSIVE);
|
LK_EXCLUSIVE);
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -1662,6 +1662,7 @@ zfs_umount(struct super_block *sb)
|
||||||
}
|
}
|
||||||
|
|
||||||
zfsvfs_free(zfsvfs);
|
zfsvfs_free(zfsvfs);
|
||||||
|
sb->s_fs_info = NULL;
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -277,8 +277,6 @@ zpl_test_super(struct super_block *s, void *data)
|
||||||
{
|
{
|
||||||
zfsvfs_t *zfsvfs = s->s_fs_info;
|
zfsvfs_t *zfsvfs = s->s_fs_info;
|
||||||
objset_t *os = data;
|
objset_t *os = data;
|
||||||
int match;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the os doesn't match the z_os in the super_block, assume it is
|
* If the os doesn't match the z_os in the super_block, assume it is
|
||||||
* not a match. Matching would imply a multimount of a dataset. It is
|
* not a match. Matching would imply a multimount of a dataset. It is
|
||||||
|
@ -286,19 +284,7 @@ zpl_test_super(struct super_block *s, void *data)
|
||||||
* that changes the z_os, e.g., rollback, where the match will be
|
* that changes the z_os, e.g., rollback, where the match will be
|
||||||
* missed, but in that case the user will get an EBUSY.
|
* missed, but in that case the user will get an EBUSY.
|
||||||
*/
|
*/
|
||||||
if (zfsvfs == NULL || os != zfsvfs->z_os)
|
return (zfsvfs != NULL && os == zfsvfs->z_os);
|
||||||
return (0);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If they do match, recheck with the lock held to prevent mounting the
|
|
||||||
* wrong dataset since z_os can be stale when the teardown lock is held.
|
|
||||||
*/
|
|
||||||
if (zpl_enter(zfsvfs, FTAG) != 0)
|
|
||||||
return (0);
|
|
||||||
match = (os == zfsvfs->z_os);
|
|
||||||
zpl_exit(zfsvfs, FTAG);
|
|
||||||
|
|
||||||
return (match);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct super_block *
|
static struct super_block *
|
||||||
|
@ -324,12 +310,35 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
|
||||||
|
|
||||||
s = sget(fs_type, zpl_test_super, set_anon_super, flags, os);
|
s = sget(fs_type, zpl_test_super, set_anon_super, flags, os);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Recheck with the lock held to prevent mounting the wrong dataset
|
||||||
|
* since z_os can be stale when the teardown lock is held.
|
||||||
|
*
|
||||||
|
* We can't do this in zpl_test_super in since it's under spinlock and
|
||||||
|
* also s_umount lock is not held there so it would race with
|
||||||
|
* zfs_umount and zfsvfs can be freed.
|
||||||
|
*/
|
||||||
|
if (!IS_ERR(s) && s->s_fs_info != NULL) {
|
||||||
|
zfsvfs_t *zfsvfs = s->s_fs_info;
|
||||||
|
if (zpl_enter(zfsvfs, FTAG) == 0) {
|
||||||
|
if (os != zfsvfs->z_os)
|
||||||
|
err = -SET_ERROR(EBUSY);
|
||||||
|
zpl_exit(zfsvfs, FTAG);
|
||||||
|
} else {
|
||||||
|
err = -SET_ERROR(EBUSY);
|
||||||
|
}
|
||||||
|
}
|
||||||
dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
|
dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
|
||||||
dsl_dataset_rele(dmu_objset_ds(os), FTAG);
|
dsl_dataset_rele(dmu_objset_ds(os), FTAG);
|
||||||
|
|
||||||
if (IS_ERR(s))
|
if (IS_ERR(s))
|
||||||
return (ERR_CAST(s));
|
return (ERR_CAST(s));
|
||||||
|
|
||||||
|
if (err) {
|
||||||
|
deactivate_locked_super(s);
|
||||||
|
return (ERR_PTR(err));
|
||||||
|
}
|
||||||
|
|
||||||
if (s->s_root == NULL) {
|
if (s->s_root == NULL) {
|
||||||
err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
|
err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
|
||||||
if (err) {
|
if (err) {
|
||||||
|
|
|
@ -160,7 +160,7 @@ zpool_prop_init(void)
|
||||||
"wait | continue | panic", "FAILMODE", failuremode_table,
|
"wait | continue | panic", "FAILMODE", failuremode_table,
|
||||||
sfeatures);
|
sfeatures);
|
||||||
zprop_register_index(ZPOOL_PROP_AUTOTRIM, "autotrim",
|
zprop_register_index(ZPOOL_PROP_AUTOTRIM, "autotrim",
|
||||||
SPA_AUTOTRIM_DEFAULT, PROP_DEFAULT, ZFS_TYPE_POOL,
|
SPA_AUTOTRIM_OFF, PROP_DEFAULT, ZFS_TYPE_POOL,
|
||||||
"on | off", "AUTOTRIM", boolean_table, sfeatures);
|
"on | off", "AUTOTRIM", boolean_table, sfeatures);
|
||||||
|
|
||||||
/* hidden properties */
|
/* hidden properties */
|
||||||
|
|
|
@ -284,7 +284,17 @@ bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg,
|
||||||
dmu_buf_t *dbuf = NULL;
|
dmu_buf_t *dbuf = NULL;
|
||||||
bpobj_t *bpo = bpi->bpi_bpo;
|
bpobj_t *bpo = bpi->bpi_bpo;
|
||||||
|
|
||||||
for (int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= start; i--) {
|
int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1;
|
||||||
|
uint64_t pe = P2ALIGN_TYPED(i, bpo->bpo_epb, uint64_t) *
|
||||||
|
sizeof (blkptr_t);
|
||||||
|
uint64_t ps = start * sizeof (blkptr_t);
|
||||||
|
uint64_t pb = MAX((pe > dmu_prefetch_max) ? pe - dmu_prefetch_max : 0,
|
||||||
|
ps);
|
||||||
|
if (pe > pb) {
|
||||||
|
dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0, pb, pe - pb,
|
||||||
|
ZIO_PRIORITY_ASYNC_READ);
|
||||||
|
}
|
||||||
|
for (; i >= start; i--) {
|
||||||
uint64_t offset = i * sizeof (blkptr_t);
|
uint64_t offset = i * sizeof (blkptr_t);
|
||||||
uint64_t blkoff = P2PHASE(i, bpo->bpo_epb);
|
uint64_t blkoff = P2PHASE(i, bpo->bpo_epb);
|
||||||
|
|
||||||
|
@ -292,9 +302,16 @@ bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg,
|
||||||
if (dbuf)
|
if (dbuf)
|
||||||
dmu_buf_rele(dbuf, FTAG);
|
dmu_buf_rele(dbuf, FTAG);
|
||||||
err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
|
err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
|
||||||
offset, FTAG, &dbuf, 0);
|
offset, FTAG, &dbuf, DMU_READ_NO_PREFETCH);
|
||||||
if (err)
|
if (err)
|
||||||
break;
|
break;
|
||||||
|
pe = pb;
|
||||||
|
pb = MAX((dbuf->db_offset > dmu_prefetch_max) ?
|
||||||
|
dbuf->db_offset - dmu_prefetch_max : 0, ps);
|
||||||
|
if (pe > pb) {
|
||||||
|
dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0,
|
||||||
|
pb, pe - pb, ZIO_PRIORITY_ASYNC_READ);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT3U(offset, >=, dbuf->db_offset);
|
ASSERT3U(offset, >=, dbuf->db_offset);
|
||||||
|
@ -466,22 +483,30 @@ bpobj_iterate_impl(bpobj_t *initial_bpo, bpobj_itor_t func, void *arg,
|
||||||
int64_t i = bpi->bpi_unprocessed_subobjs - 1;
|
int64_t i = bpi->bpi_unprocessed_subobjs - 1;
|
||||||
uint64_t offset = i * sizeof (uint64_t);
|
uint64_t offset = i * sizeof (uint64_t);
|
||||||
|
|
||||||
uint64_t obj_from_sublist;
|
uint64_t subobj;
|
||||||
err = dmu_read(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
|
err = dmu_read(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
|
||||||
offset, sizeof (uint64_t), &obj_from_sublist,
|
offset, sizeof (uint64_t), &subobj,
|
||||||
DMU_READ_PREFETCH);
|
DMU_READ_NO_PREFETCH);
|
||||||
if (err)
|
if (err)
|
||||||
break;
|
break;
|
||||||
bpobj_t *sublist = kmem_alloc(sizeof (bpobj_t),
|
|
||||||
|
bpobj_t *subbpo = kmem_alloc(sizeof (bpobj_t),
|
||||||
KM_SLEEP);
|
KM_SLEEP);
|
||||||
|
err = bpobj_open(subbpo, bpo->bpo_os, subobj);
|
||||||
err = bpobj_open(sublist, bpo->bpo_os,
|
if (err) {
|
||||||
obj_from_sublist);
|
kmem_free(subbpo, sizeof (bpobj_t));
|
||||||
if (err)
|
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
list_insert_head(&stack, bpi_alloc(sublist, bpi, i));
|
if (subbpo->bpo_havesubobj &&
|
||||||
mutex_enter(&sublist->bpo_lock);
|
subbpo->bpo_phys->bpo_subobjs != 0) {
|
||||||
|
dmu_prefetch(subbpo->bpo_os,
|
||||||
|
subbpo->bpo_phys->bpo_subobjs, 0, 0, 0,
|
||||||
|
ZIO_PRIORITY_ASYNC_READ);
|
||||||
|
}
|
||||||
|
|
||||||
|
list_insert_head(&stack, bpi_alloc(subbpo, bpi, i));
|
||||||
|
mutex_enter(&subbpo->bpo_lock);
|
||||||
bpi->bpi_unprocessed_subobjs--;
|
bpi->bpi_unprocessed_subobjs--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -89,7 +89,11 @@ static int zfs_dmu_offset_next_sync = 1;
|
||||||
* helps to limit the amount of memory that can be used by prefetching.
|
* helps to limit the amount of memory that can be used by prefetching.
|
||||||
* Larger objects should be prefetched a bit at a time.
|
* Larger objects should be prefetched a bit at a time.
|
||||||
*/
|
*/
|
||||||
|
#ifdef _ILP32
|
||||||
|
uint_t dmu_prefetch_max = 8 * 1024 * 1024;
|
||||||
|
#else
|
||||||
uint_t dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE;
|
uint_t dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE;
|
||||||
|
#endif
|
||||||
|
|
||||||
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
|
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
|
||||||
{DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" },
|
{DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" },
|
||||||
|
@ -552,8 +556,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
||||||
zio = zio_root(dn->dn_objset->os_spa, NULL, NULL,
|
zio = zio_root(dn->dn_objset->os_spa, NULL, NULL,
|
||||||
ZIO_FLAG_CANFAIL);
|
ZIO_FLAG_CANFAIL);
|
||||||
blkid = dbuf_whichblock(dn, 0, offset);
|
blkid = dbuf_whichblock(dn, 0, offset);
|
||||||
if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
|
if ((flags & DMU_READ_NO_PREFETCH) == 0) {
|
||||||
length <= zfetch_array_rd_sz) {
|
|
||||||
/*
|
/*
|
||||||
* Prepare the zfetch before initiating the demand reads, so
|
* Prepare the zfetch before initiating the demand reads, so
|
||||||
* that if multiple threads block on same indirect block, we
|
* that if multiple threads block on same indirect block, we
|
||||||
|
|
|
@ -1795,17 +1795,19 @@ receive_handle_existing_object(const struct receive_writer_arg *rwa,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The dmu does not currently support decreasing nlevels
|
* The dmu does not currently support decreasing nlevels or changing
|
||||||
* or changing the number of dnode slots on an object. For
|
* indirect block size if there is already one, same as changing the
|
||||||
* non-raw sends, this does not matter and the new object
|
* number of of dnode slots on an object. For non-raw sends this
|
||||||
* can just use the previous one's nlevels. For raw sends,
|
* does not matter and the new object can just use the previous one's
|
||||||
* however, the structure of the received dnode (including
|
* parameters. For raw sends, however, the structure of the received
|
||||||
* nlevels and dnode slots) must match that of the send
|
* dnode (including indirects and dnode slots) must match that of the
|
||||||
* side. Therefore, instead of using dmu_object_reclaim(),
|
* send side. Therefore, instead of using dmu_object_reclaim(), we
|
||||||
* we must free the object completely and call
|
* must free the object completely and call dmu_object_claim_dnsize()
|
||||||
* dmu_object_claim_dnsize() instead.
|
* instead.
|
||||||
*/
|
*/
|
||||||
if ((rwa->raw && drro->drr_nlevels < doi->doi_indirection) ||
|
if ((rwa->raw && ((doi->doi_indirection > 1 &&
|
||||||
|
indblksz != doi->doi_metadata_block_size) ||
|
||||||
|
drro->drr_nlevels < doi->doi_indirection)) ||
|
||||||
dn_slots != doi->doi_dnodesize >> DNODE_SHIFT) {
|
dn_slots != doi->doi_dnodesize >> DNODE_SHIFT) {
|
||||||
err = dmu_free_long_object(rwa->os, drro->drr_object);
|
err = dmu_free_long_object(rwa->os, drro->drr_object);
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
|
|
|
@ -52,14 +52,19 @@ static unsigned int zfetch_max_streams = 8;
|
||||||
static unsigned int zfetch_min_sec_reap = 1;
|
static unsigned int zfetch_min_sec_reap = 1;
|
||||||
/* max time before stream delete */
|
/* max time before stream delete */
|
||||||
static unsigned int zfetch_max_sec_reap = 2;
|
static unsigned int zfetch_max_sec_reap = 2;
|
||||||
|
#ifdef _ILP32
|
||||||
|
/* min bytes to prefetch per stream (default 2MB) */
|
||||||
|
static unsigned int zfetch_min_distance = 2 * 1024 * 1024;
|
||||||
|
/* max bytes to prefetch per stream (default 8MB) */
|
||||||
|
unsigned int zfetch_max_distance = 8 * 1024 * 1024;
|
||||||
|
#else
|
||||||
/* min bytes to prefetch per stream (default 4MB) */
|
/* min bytes to prefetch per stream (default 4MB) */
|
||||||
static unsigned int zfetch_min_distance = 4 * 1024 * 1024;
|
static unsigned int zfetch_min_distance = 4 * 1024 * 1024;
|
||||||
/* max bytes to prefetch per stream (default 64MB) */
|
/* max bytes to prefetch per stream (default 64MB) */
|
||||||
unsigned int zfetch_max_distance = 64 * 1024 * 1024;
|
unsigned int zfetch_max_distance = 64 * 1024 * 1024;
|
||||||
|
#endif
|
||||||
/* max bytes to prefetch indirects for per stream (default 64MB) */
|
/* max bytes to prefetch indirects for per stream (default 64MB) */
|
||||||
unsigned int zfetch_max_idistance = 64 * 1024 * 1024;
|
unsigned int zfetch_max_idistance = 64 * 1024 * 1024;
|
||||||
/* max number of bytes in an array_read in which we allow prefetching (1MB) */
|
|
||||||
uint64_t zfetch_array_rd_sz = 1024 * 1024;
|
|
||||||
|
|
||||||
typedef struct zfetch_stats {
|
typedef struct zfetch_stats {
|
||||||
kstat_named_t zfetchstat_hits;
|
kstat_named_t zfetchstat_hits;
|
||||||
|
@ -580,6 +585,3 @@ ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW,
|
||||||
|
|
||||||
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_idistance, UINT, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_idistance, UINT, ZMOD_RW,
|
||||||
"Max bytes to prefetch indirects for per stream");
|
"Max bytes to prefetch indirects for per stream");
|
||||||
|
|
||||||
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, array_rd_sz, U64, ZMOD_RW,
|
|
||||||
"Number of bytes in a array_read");
|
|
||||||
|
|
|
@ -1882,7 +1882,7 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
|
||||||
if (ibs == dn->dn_indblkshift)
|
if (ibs == dn->dn_indblkshift)
|
||||||
ibs = 0;
|
ibs = 0;
|
||||||
|
|
||||||
if (size >> SPA_MINBLOCKSHIFT == dn->dn_datablkszsec && ibs == 0)
|
if (size == dn->dn_datablksz && ibs == 0)
|
||||||
return (0);
|
return (0);
|
||||||
|
|
||||||
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
||||||
|
@ -1905,24 +1905,25 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
|
||||||
if (ibs && dn->dn_nlevels != 1)
|
if (ibs && dn->dn_nlevels != 1)
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
/* resize the old block */
|
|
||||||
err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
|
|
||||||
if (err == 0) {
|
|
||||||
dbuf_new_size(db, size, tx);
|
|
||||||
} else if (err != ENOENT) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
dnode_setdblksz(dn, size);
|
|
||||||
dnode_setdirty(dn, tx);
|
dnode_setdirty(dn, tx);
|
||||||
dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = size;
|
if (size != dn->dn_datablksz) {
|
||||||
|
/* resize the old block */
|
||||||
|
err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
|
||||||
|
if (err == 0) {
|
||||||
|
dbuf_new_size(db, size, tx);
|
||||||
|
} else if (err != ENOENT) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
dnode_setdblksz(dn, size);
|
||||||
|
dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = size;
|
||||||
|
if (db)
|
||||||
|
dbuf_rele(db, FTAG);
|
||||||
|
}
|
||||||
if (ibs) {
|
if (ibs) {
|
||||||
dn->dn_indblkshift = ibs;
|
dn->dn_indblkshift = ibs;
|
||||||
dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs;
|
dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs;
|
||||||
}
|
}
|
||||||
/* release after we have fixed the blocksize in the dnode */
|
|
||||||
if (db)
|
|
||||||
dbuf_rele(db, FTAG);
|
|
||||||
|
|
||||||
rw_exit(&dn->dn_struct_rwlock);
|
rw_exit(&dn->dn_struct_rwlock);
|
||||||
return (0);
|
return (0);
|
||||||
|
|
|
@ -892,9 +892,9 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
|
||||||
for (zap_cursor_init(&zc, dl->dl_os, obj);
|
for (zap_cursor_init(&zc, dl->dl_os, obj);
|
||||||
(error = zap_cursor_retrieve(&zc, za)) == 0;
|
(error = zap_cursor_retrieve(&zc, za)) == 0;
|
||||||
zap_cursor_advance(&zc)) {
|
zap_cursor_advance(&zc)) {
|
||||||
uint64_t mintxg = zfs_strtonum(za->za_name, NULL);
|
dsl_deadlist_insert_bpobj(dl, za->za_first_integer,
|
||||||
dsl_deadlist_insert_bpobj(dl, za->za_first_integer, mintxg, tx);
|
zfs_strtonum(za->za_name, NULL), tx);
|
||||||
VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx));
|
VERIFY0(zap_remove(dl->dl_os, obj, za->za_name, tx));
|
||||||
if (perror == 0) {
|
if (perror == 0) {
|
||||||
dsl_deadlist_prefetch_bpobj(dl, pza->za_first_integer,
|
dsl_deadlist_prefetch_bpobj(dl, pza->za_first_integer,
|
||||||
zfs_strtonum(pza->za_name, NULL));
|
zfs_strtonum(pza->za_name, NULL));
|
||||||
|
|
|
@ -2015,6 +2015,11 @@ dsl_scan_prefetch_cb(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
|
||||||
zb->zb_objset, DMU_META_DNODE_OBJECT);
|
zb->zb_objset, DMU_META_DNODE_OBJECT);
|
||||||
|
|
||||||
if (OBJSET_BUF_HAS_USERUSED(buf)) {
|
if (OBJSET_BUF_HAS_USERUSED(buf)) {
|
||||||
|
if (OBJSET_BUF_HAS_PROJECTUSED(buf)) {
|
||||||
|
dsl_scan_prefetch_dnode(scn,
|
||||||
|
&osp->os_projectused_dnode, zb->zb_objset,
|
||||||
|
DMU_PROJECTUSED_OBJECT);
|
||||||
|
}
|
||||||
dsl_scan_prefetch_dnode(scn,
|
dsl_scan_prefetch_dnode(scn,
|
||||||
&osp->os_groupused_dnode, zb->zb_objset,
|
&osp->os_groupused_dnode, zb->zb_objset,
|
||||||
DMU_GROUPUSED_OBJECT);
|
DMU_GROUPUSED_OBJECT);
|
||||||
|
@ -2075,10 +2080,16 @@ dsl_scan_prefetch_thread(void *arg)
|
||||||
zio_flags |= ZIO_FLAG_RAW;
|
zio_flags |= ZIO_FLAG_RAW;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* We don't need data L1 buffer since we do not prefetch L0. */
|
||||||
|
blkptr_t *bp = &spic->spic_bp;
|
||||||
|
if (BP_GET_LEVEL(bp) == 1 && BP_GET_TYPE(bp) != DMU_OT_DNODE &&
|
||||||
|
BP_GET_TYPE(bp) != DMU_OT_OBJSET)
|
||||||
|
flags |= ARC_FLAG_NO_BUF;
|
||||||
|
|
||||||
/* issue the prefetch asynchronously */
|
/* issue the prefetch asynchronously */
|
||||||
(void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa,
|
(void) arc_read(scn->scn_zio_root, spa, bp,
|
||||||
&spic->spic_bp, dsl_scan_prefetch_cb, spic->spic_spc,
|
dsl_scan_prefetch_cb, spic->spic_spc, ZIO_PRIORITY_SCRUB,
|
||||||
ZIO_PRIORITY_SCRUB, zio_flags, &flags, &spic->spic_zb);
|
zio_flags, &flags, &spic->spic_zb);
|
||||||
|
|
||||||
kmem_free(spic, sizeof (scan_prefetch_issue_ctx_t));
|
kmem_free(spic, sizeof (scan_prefetch_issue_ctx_t));
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,6 +58,11 @@ static uint64_t metaslab_aliquot = 1024 * 1024;
|
||||||
*/
|
*/
|
||||||
uint64_t metaslab_force_ganging = SPA_MAXBLOCKSIZE + 1;
|
uint64_t metaslab_force_ganging = SPA_MAXBLOCKSIZE + 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Of blocks of size >= metaslab_force_ganging, actually gang them this often.
|
||||||
|
*/
|
||||||
|
uint_t metaslab_force_ganging_pct = 3;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In pools where the log space map feature is not enabled we touch
|
* In pools where the log space map feature is not enabled we touch
|
||||||
* multiple metaslabs (and their respective space maps) with each
|
* multiple metaslabs (and their respective space maps) with each
|
||||||
|
@ -5109,7 +5114,9 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||||
* damage can result in extremely long reconstruction times. This
|
* damage can result in extremely long reconstruction times. This
|
||||||
* will also test spilling from special to normal.
|
* will also test spilling from special to normal.
|
||||||
*/
|
*/
|
||||||
if (psize >= metaslab_force_ganging && (random_in_range(100) < 3)) {
|
if (psize >= metaslab_force_ganging &&
|
||||||
|
metaslab_force_ganging_pct > 0 &&
|
||||||
|
(random_in_range(100) < MIN(metaslab_force_ganging_pct, 100))) {
|
||||||
metaslab_trace_add(zal, NULL, NULL, psize, d, TRACE_FORCE_GANG,
|
metaslab_trace_add(zal, NULL, NULL, psize, d, TRACE_FORCE_GANG,
|
||||||
allocator);
|
allocator);
|
||||||
return (SET_ERROR(ENOSPC));
|
return (SET_ERROR(ENOSPC));
|
||||||
|
@ -6266,7 +6273,10 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, switch_threshold, INT, ZMOD_RW,
|
||||||
"Segment-based metaslab selection maximum buckets before switching");
|
"Segment-based metaslab selection maximum buckets before switching");
|
||||||
|
|
||||||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging, U64, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging, U64, ZMOD_RW,
|
||||||
"Blocks larger than this size are forced to be gang blocks");
|
"Blocks larger than this size are sometimes forced to be gang blocks");
|
||||||
|
|
||||||
|
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging_pct, UINT, ZMOD_RW,
|
||||||
|
"Percentage of large blocks that will be forced to be gang blocks");
|
||||||
|
|
||||||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW,
|
||||||
"Max distance (bytes) to search forward before using size tree");
|
"Max distance (bytes) to search forward before using size tree");
|
||||||
|
|
|
@ -772,6 +772,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
|
||||||
spa->spa_min_ashift = INT_MAX;
|
spa->spa_min_ashift = INT_MAX;
|
||||||
spa->spa_max_ashift = 0;
|
spa->spa_max_ashift = 0;
|
||||||
spa->spa_min_alloc = INT_MAX;
|
spa->spa_min_alloc = INT_MAX;
|
||||||
|
spa->spa_gcd_alloc = INT_MAX;
|
||||||
|
|
||||||
/* Reset cached value */
|
/* Reset cached value */
|
||||||
spa->spa_dedup_dspace = ~0ULL;
|
spa->spa_dedup_dspace = ~0ULL;
|
||||||
|
|
|
@ -889,9 +889,15 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
||||||
&vd->vdev_not_present);
|
&vd->vdev_not_present);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get the alignment requirement.
|
* Get the alignment requirement. Ignore pool ashift for vdev
|
||||||
|
* attach case.
|
||||||
*/
|
*/
|
||||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift);
|
if (alloctype != VDEV_ALLOC_ATTACH) {
|
||||||
|
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT,
|
||||||
|
&vd->vdev_ashift);
|
||||||
|
} else {
|
||||||
|
vd->vdev_attaching = B_TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Retrieve the vdev creation time.
|
* Retrieve the vdev creation time.
|
||||||
|
@ -1393,6 +1399,36 @@ vdev_remove_parent(vdev_t *cvd)
|
||||||
vdev_free(mvd);
|
vdev_free(mvd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Choose GCD for spa_gcd_alloc.
|
||||||
|
*/
|
||||||
|
static uint64_t
|
||||||
|
vdev_gcd(uint64_t a, uint64_t b)
|
||||||
|
{
|
||||||
|
while (b != 0) {
|
||||||
|
uint64_t t = b;
|
||||||
|
b = a % b;
|
||||||
|
a = t;
|
||||||
|
}
|
||||||
|
return (a);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set spa_min_alloc and spa_gcd_alloc.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
vdev_spa_set_alloc(spa_t *spa, uint64_t min_alloc)
|
||||||
|
{
|
||||||
|
if (min_alloc < spa->spa_min_alloc)
|
||||||
|
spa->spa_min_alloc = min_alloc;
|
||||||
|
if (spa->spa_gcd_alloc == INT_MAX) {
|
||||||
|
spa->spa_gcd_alloc = min_alloc;
|
||||||
|
} else {
|
||||||
|
spa->spa_gcd_alloc = vdev_gcd(min_alloc,
|
||||||
|
spa->spa_gcd_alloc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
vdev_metaslab_group_create(vdev_t *vd)
|
vdev_metaslab_group_create(vdev_t *vd)
|
||||||
{
|
{
|
||||||
|
@ -1445,8 +1481,7 @@ vdev_metaslab_group_create(vdev_t *vd)
|
||||||
spa->spa_min_ashift = vd->vdev_ashift;
|
spa->spa_min_ashift = vd->vdev_ashift;
|
||||||
|
|
||||||
uint64_t min_alloc = vdev_get_min_alloc(vd);
|
uint64_t min_alloc = vdev_get_min_alloc(vd);
|
||||||
if (min_alloc < spa->spa_min_alloc)
|
vdev_spa_set_alloc(spa, min_alloc);
|
||||||
spa->spa_min_alloc = min_alloc;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2144,9 +2179,9 @@ vdev_open(vdev_t *vd)
|
||||||
return (SET_ERROR(EDOM));
|
return (SET_ERROR(EDOM));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vd->vdev_top == vd) {
|
if (vd->vdev_top == vd && vd->vdev_attaching == B_FALSE)
|
||||||
vdev_ashift_optimize(vd);
|
vdev_ashift_optimize(vd);
|
||||||
}
|
vd->vdev_attaching = B_FALSE;
|
||||||
}
|
}
|
||||||
if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN ||
|
if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN ||
|
||||||
vd->vdev_ashift > ASHIFT_MAX)) {
|
vd->vdev_ashift > ASHIFT_MAX)) {
|
||||||
|
@ -2207,8 +2242,7 @@ vdev_open(vdev_t *vd)
|
||||||
if (vd->vdev_top == vd && vd->vdev_ashift != 0 &&
|
if (vd->vdev_top == vd && vd->vdev_ashift != 0 &&
|
||||||
vd->vdev_islog == 0 && vd->vdev_aux == NULL) {
|
vd->vdev_islog == 0 && vd->vdev_aux == NULL) {
|
||||||
uint64_t min_alloc = vdev_get_min_alloc(vd);
|
uint64_t min_alloc = vdev_get_min_alloc(vd);
|
||||||
if (min_alloc < spa->spa_min_alloc)
|
vdev_spa_set_alloc(spa, min_alloc);
|
||||||
spa->spa_min_alloc = min_alloc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -5688,6 +5722,7 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
|
||||||
objset_t *mos = spa->spa_meta_objset;
|
objset_t *mos = spa->spa_meta_objset;
|
||||||
nvpair_t *elem = NULL;
|
nvpair_t *elem = NULL;
|
||||||
uint64_t vdev_guid;
|
uint64_t vdev_guid;
|
||||||
|
uint64_t objid;
|
||||||
nvlist_t *nvprops;
|
nvlist_t *nvprops;
|
||||||
|
|
||||||
vdev_guid = fnvlist_lookup_uint64(nvp, ZPOOL_VDEV_PROPS_SET_VDEV);
|
vdev_guid = fnvlist_lookup_uint64(nvp, ZPOOL_VDEV_PROPS_SET_VDEV);
|
||||||
|
@ -5698,31 +5733,28 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
|
||||||
if (vd == NULL)
|
if (vd == NULL)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set vdev property values in the vdev props mos object.
|
||||||
|
*/
|
||||||
|
if (vd->vdev_root_zap != 0) {
|
||||||
|
objid = vd->vdev_root_zap;
|
||||||
|
} else if (vd->vdev_top_zap != 0) {
|
||||||
|
objid = vd->vdev_top_zap;
|
||||||
|
} else if (vd->vdev_leaf_zap != 0) {
|
||||||
|
objid = vd->vdev_leaf_zap;
|
||||||
|
} else {
|
||||||
|
panic("unexpected vdev type");
|
||||||
|
}
|
||||||
|
|
||||||
mutex_enter(&spa->spa_props_lock);
|
mutex_enter(&spa->spa_props_lock);
|
||||||
|
|
||||||
while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) {
|
while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) {
|
||||||
uint64_t intval, objid = 0;
|
uint64_t intval;
|
||||||
const char *strval;
|
const char *strval;
|
||||||
vdev_prop_t prop;
|
vdev_prop_t prop;
|
||||||
const char *propname = nvpair_name(elem);
|
const char *propname = nvpair_name(elem);
|
||||||
zprop_type_t proptype;
|
zprop_type_t proptype;
|
||||||
|
|
||||||
/*
|
|
||||||
* Set vdev property values in the vdev props mos object.
|
|
||||||
*/
|
|
||||||
if (vd->vdev_root_zap != 0) {
|
|
||||||
objid = vd->vdev_root_zap;
|
|
||||||
} else if (vd->vdev_top_zap != 0) {
|
|
||||||
objid = vd->vdev_top_zap;
|
|
||||||
} else if (vd->vdev_leaf_zap != 0) {
|
|
||||||
objid = vd->vdev_leaf_zap;
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* XXX: implement vdev_props_set_check()
|
|
||||||
*/
|
|
||||||
panic("vdev not root/top/leaf");
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (prop = vdev_name_to_prop(propname)) {
|
switch (prop = vdev_name_to_prop(propname)) {
|
||||||
case VDEV_PROP_USERPROP:
|
case VDEV_PROP_USERPROP:
|
||||||
if (vdev_prop_user(propname)) {
|
if (vdev_prop_user(propname)) {
|
||||||
|
@ -5791,6 +5823,12 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||||
|
|
||||||
ASSERT(vd != NULL);
|
ASSERT(vd != NULL);
|
||||||
|
|
||||||
|
/* Check that vdev has a zap we can use */
|
||||||
|
if (vd->vdev_root_zap == 0 &&
|
||||||
|
vd->vdev_top_zap == 0 &&
|
||||||
|
vd->vdev_leaf_zap == 0)
|
||||||
|
return (SET_ERROR(EINVAL));
|
||||||
|
|
||||||
if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
|
if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
|
||||||
&vdev_guid) != 0)
|
&vdev_guid) != 0)
|
||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
|
|
|
@ -1398,7 +1398,7 @@ vdev_indirect_checksum_error(zio_t *zio,
|
||||||
vd->vdev_stat.vs_checksum_errors++;
|
vd->vdev_stat.vs_checksum_errors++;
|
||||||
mutex_exit(&vd->vdev_stat_lock);
|
mutex_exit(&vd->vdev_stat_lock);
|
||||||
|
|
||||||
zio_bad_cksum_t zbc = {{{ 0 }}};
|
zio_bad_cksum_t zbc = { 0 };
|
||||||
abd_t *bad_abd = ic->ic_data;
|
abd_t *bad_abd = ic->ic_data;
|
||||||
abd_t *good_abd = is->is_good_child->ic_data;
|
abd_t *good_abd = is->is_good_child->ic_data;
|
||||||
(void) zfs_ereport_post_checksum(zio->io_spa, vd, NULL, zio,
|
(void) zfs_ereport_post_checksum(zio->io_spa, vd, NULL, zio,
|
||||||
|
|
|
@ -1785,7 +1785,7 @@ vdev_raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
|
||||||
static int
|
static int
|
||||||
raidz_checksum_verify(zio_t *zio)
|
raidz_checksum_verify(zio_t *zio)
|
||||||
{
|
{
|
||||||
zio_bad_cksum_t zbc = {{{0}}};
|
zio_bad_cksum_t zbc = {0};
|
||||||
raidz_map_t *rm = zio->io_vsd;
|
raidz_map_t *rm = zio->io_vsd;
|
||||||
|
|
||||||
int ret = zio_checksum_error(zio, &zbc);
|
int ret = zio_checksum_error(zio, &zbc);
|
||||||
|
|
|
@ -754,10 +754,6 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
|
||||||
#define MAX_RANGES 16
|
#define MAX_RANGES 16
|
||||||
|
|
||||||
typedef struct zfs_ecksum_info {
|
typedef struct zfs_ecksum_info {
|
||||||
/* histograms of set and cleared bits by bit number in a 64-bit word */
|
|
||||||
uint8_t zei_histogram_set[sizeof (uint64_t) * NBBY];
|
|
||||||
uint8_t zei_histogram_cleared[sizeof (uint64_t) * NBBY];
|
|
||||||
|
|
||||||
/* inline arrays of bits set and cleared. */
|
/* inline arrays of bits set and cleared. */
|
||||||
uint64_t zei_bits_set[ZFM_MAX_INLINE];
|
uint64_t zei_bits_set[ZFM_MAX_INLINE];
|
||||||
uint64_t zei_bits_cleared[ZFM_MAX_INLINE];
|
uint64_t zei_bits_cleared[ZFM_MAX_INLINE];
|
||||||
|
@ -781,7 +777,7 @@ typedef struct zfs_ecksum_info {
|
||||||
} zfs_ecksum_info_t;
|
} zfs_ecksum_info_t;
|
||||||
|
|
||||||
static void
|
static void
|
||||||
update_histogram(uint64_t value_arg, uint8_t *hist, uint32_t *count)
|
update_bad_bits(uint64_t value_arg, uint32_t *count)
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
size_t bits = 0;
|
size_t bits = 0;
|
||||||
|
@ -789,10 +785,8 @@ update_histogram(uint64_t value_arg, uint8_t *hist, uint32_t *count)
|
||||||
|
|
||||||
/* We store the bits in big-endian (largest-first) order */
|
/* We store the bits in big-endian (largest-first) order */
|
||||||
for (i = 0; i < 64; i++) {
|
for (i = 0; i < 64; i++) {
|
||||||
if (value & (1ull << i)) {
|
if (value & (1ull << i))
|
||||||
hist[63 - i]++;
|
|
||||||
++bits;
|
++bits;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
/* update the count of bits changed */
|
/* update the count of bits changed */
|
||||||
*count += bits;
|
*count += bits;
|
||||||
|
@ -920,14 +914,6 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
|
||||||
|
|
||||||
if (info != NULL && info->zbc_has_cksum) {
|
if (info != NULL && info->zbc_has_cksum) {
|
||||||
fm_payload_set(ereport,
|
fm_payload_set(ereport,
|
||||||
FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED,
|
|
||||||
DATA_TYPE_UINT64_ARRAY,
|
|
||||||
sizeof (info->zbc_expected) / sizeof (uint64_t),
|
|
||||||
(uint64_t *)&info->zbc_expected,
|
|
||||||
FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL,
|
|
||||||
DATA_TYPE_UINT64_ARRAY,
|
|
||||||
sizeof (info->zbc_actual) / sizeof (uint64_t),
|
|
||||||
(uint64_t *)&info->zbc_actual,
|
|
||||||
FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO,
|
FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO,
|
||||||
DATA_TYPE_STRING,
|
DATA_TYPE_STRING,
|
||||||
info->zbc_checksum_name,
|
info->zbc_checksum_name,
|
||||||
|
@ -1010,10 +996,8 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
|
||||||
offset++;
|
offset++;
|
||||||
}
|
}
|
||||||
|
|
||||||
update_histogram(set, eip->zei_histogram_set,
|
update_bad_bits(set, &eip->zei_range_sets[range]);
|
||||||
&eip->zei_range_sets[range]);
|
update_bad_bits(cleared, &eip->zei_range_clears[range]);
|
||||||
update_histogram(cleared, eip->zei_histogram_cleared,
|
|
||||||
&eip->zei_range_clears[range]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* convert to byte offsets */
|
/* convert to byte offsets */
|
||||||
|
@ -1049,15 +1033,6 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
|
||||||
DATA_TYPE_UINT8_ARRAY,
|
DATA_TYPE_UINT8_ARRAY,
|
||||||
inline_size, (uint8_t *)eip->zei_bits_cleared,
|
inline_size, (uint8_t *)eip->zei_bits_cleared,
|
||||||
NULL);
|
NULL);
|
||||||
} else {
|
|
||||||
fm_payload_set(ereport,
|
|
||||||
FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM,
|
|
||||||
DATA_TYPE_UINT8_ARRAY,
|
|
||||||
NBBY * sizeof (uint64_t), eip->zei_histogram_set,
|
|
||||||
FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM,
|
|
||||||
DATA_TYPE_UINT8_ARRAY,
|
|
||||||
NBBY * sizeof (uint64_t), eip->zei_histogram_cleared,
|
|
||||||
NULL);
|
|
||||||
}
|
}
|
||||||
return (eip);
|
return (eip);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1596,6 +1596,19 @@ zio_shrink(zio_t *zio, uint64_t size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Round provided allocation size up to a value that can be allocated
|
||||||
|
* by at least some vdev(s) in the pool with minimum or no additional
|
||||||
|
* padding and without extra space usage on others
|
||||||
|
*/
|
||||||
|
static uint64_t
|
||||||
|
zio_roundup_alloc_size(spa_t *spa, uint64_t size)
|
||||||
|
{
|
||||||
|
if (size > spa->spa_min_alloc)
|
||||||
|
return (roundup(size, spa->spa_gcd_alloc));
|
||||||
|
return (spa->spa_min_alloc);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ==========================================================================
|
* ==========================================================================
|
||||||
* Prepare to read and write logical blocks
|
* Prepare to read and write logical blocks
|
||||||
|
@ -1802,9 +1815,8 @@ zio_write_compress(zio_t *zio)
|
||||||
* in that we charge for the padding used to fill out
|
* in that we charge for the padding used to fill out
|
||||||
* the last sector.
|
* the last sector.
|
||||||
*/
|
*/
|
||||||
ASSERT3U(spa->spa_min_alloc, >=, SPA_MINBLOCKSHIFT);
|
size_t rounded = (size_t)zio_roundup_alloc_size(spa,
|
||||||
size_t rounded = (size_t)roundup(psize,
|
psize);
|
||||||
spa->spa_min_alloc);
|
|
||||||
if (rounded >= lsize) {
|
if (rounded >= lsize) {
|
||||||
compress = ZIO_COMPRESS_OFF;
|
compress = ZIO_COMPRESS_OFF;
|
||||||
zio_buf_free(cbuf, lsize);
|
zio_buf_free(cbuf, lsize);
|
||||||
|
@ -1847,8 +1859,8 @@ zio_write_compress(zio_t *zio)
|
||||||
* take this codepath because it will change the on-disk block
|
* take this codepath because it will change the on-disk block
|
||||||
* and decryption will fail.
|
* and decryption will fail.
|
||||||
*/
|
*/
|
||||||
size_t rounded = MIN((size_t)roundup(psize,
|
size_t rounded = MIN((size_t)zio_roundup_alloc_size(spa, psize),
|
||||||
spa->spa_min_alloc), lsize);
|
lsize);
|
||||||
|
|
||||||
if (rounded != psize) {
|
if (rounded != psize) {
|
||||||
abd_t *cdata = abd_alloc_linear(rounded, B_TRUE);
|
abd_t *cdata = abd_alloc_linear(rounded, B_TRUE);
|
||||||
|
|
|
@ -515,8 +515,6 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (info != NULL) {
|
if (info != NULL) {
|
||||||
info->zbc_expected = expected_cksum;
|
|
||||||
info->zbc_actual = actual_cksum;
|
|
||||||
info->zbc_checksum_name = ci->ci_name;
|
info->zbc_checksum_name = ci->ci_name;
|
||||||
info->zbc_byteswapped = byteswap;
|
info->zbc_byteswapped = byteswap;
|
||||||
info->zbc_injected = 0;
|
info->zbc_injected = 0;
|
||||||
|
|
|
@ -35,7 +35,7 @@
|
||||||
#
|
#
|
||||||
# STRATEGY:
|
# STRATEGY:
|
||||||
# 1. Create various pools with different ashift values.
|
# 1. Create various pools with different ashift values.
|
||||||
# 2. Verify 'attach -o ashift=<n>' works only with allowed values.
|
# 2. Verify 'attach' works.
|
||||||
#
|
#
|
||||||
|
|
||||||
verify_runnable "global"
|
verify_runnable "global"
|
||||||
|
@ -66,26 +66,14 @@ log_must set_tunable32 VDEV_FILE_PHYSICAL_ASHIFT 16
|
||||||
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
|
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
|
||||||
for ashift in ${ashifts[@]}
|
for ashift in ${ashifts[@]}
|
||||||
do
|
do
|
||||||
for cmdval in ${ashifts[@]}
|
log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1
|
||||||
do
|
log_must verify_ashift $disk1 $ashift
|
||||||
log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1
|
log_must zpool attach $TESTPOOL1 $disk1 $disk2
|
||||||
log_must verify_ashift $disk1 $ashift
|
log_must verify_ashift $disk2 $ashift
|
||||||
|
# clean things for the next run
|
||||||
# ashift_of(attached_disk) <= ashift_of(existing_vdev)
|
log_must zpool destroy $TESTPOOL1
|
||||||
if [[ $cmdval -le $ashift ]]
|
log_must zpool labelclear $disk1
|
||||||
then
|
log_must zpool labelclear $disk2
|
||||||
log_must zpool attach -o ashift=$cmdval $TESTPOOL1 \
|
|
||||||
$disk1 $disk2
|
|
||||||
log_must verify_ashift $disk2 $ashift
|
|
||||||
else
|
|
||||||
log_mustnot zpool attach -o ashift=$cmdval $TESTPOOL1 \
|
|
||||||
$disk1 $disk2
|
|
||||||
fi
|
|
||||||
# clean things for the next run
|
|
||||||
log_must zpool destroy $TESTPOOL1
|
|
||||||
log_must zpool labelclear $disk1
|
|
||||||
log_must zpool labelclear $disk2
|
|
||||||
done
|
|
||||||
done
|
done
|
||||||
|
|
||||||
typeset badvals=("off" "on" "1" "8" "17" "1b" "ff" "-")
|
typeset badvals=("off" "on" "1" "8" "17" "1b" "ff" "-")
|
||||||
|
|
|
@ -35,7 +35,7 @@
|
||||||
#
|
#
|
||||||
# STRATEGY:
|
# STRATEGY:
|
||||||
# 1. Create various pools with different ashift values.
|
# 1. Create various pools with different ashift values.
|
||||||
# 2. Verify 'replace -o ashift=<n>' works only with allowed values.
|
# 2. Verify 'replace' works.
|
||||||
#
|
#
|
||||||
|
|
||||||
verify_runnable "global"
|
verify_runnable "global"
|
||||||
|
@ -66,26 +66,16 @@ log_must set_tunable32 VDEV_FILE_PHYSICAL_ASHIFT 16
|
||||||
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
|
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
|
||||||
for ashift in ${ashifts[@]}
|
for ashift in ${ashifts[@]}
|
||||||
do
|
do
|
||||||
for cmdval in ${ashifts[@]}
|
log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1
|
||||||
do
|
log_must verify_ashift $disk1 $ashift
|
||||||
log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1
|
# ashift_of(replacing_disk) <= ashift_of(existing_vdev)
|
||||||
log_must verify_ashift $disk1 $ashift
|
log_must zpool replace $TESTPOOL1 $disk1 $disk2
|
||||||
# ashift_of(replacing_disk) <= ashift_of(existing_vdev)
|
log_must verify_ashift $disk2 $ashift
|
||||||
if [[ $cmdval -le $ashift ]]
|
wait_replacing $TESTPOOL1
|
||||||
then
|
# clean things for the next run
|
||||||
log_must zpool replace -o ashift=$cmdval $TESTPOOL1 \
|
log_must zpool destroy $TESTPOOL1
|
||||||
$disk1 $disk2
|
log_must zpool labelclear $disk1
|
||||||
log_must verify_ashift $disk2 $ashift
|
log_must zpool labelclear $disk2
|
||||||
wait_replacing $TESTPOOL1
|
|
||||||
else
|
|
||||||
log_mustnot zpool replace -o ashift=$cmdval $TESTPOOL1 \
|
|
||||||
$disk1 $disk2
|
|
||||||
fi
|
|
||||||
# clean things for the next run
|
|
||||||
log_must zpool destroy $TESTPOOL1
|
|
||||||
log_must zpool labelclear $disk1
|
|
||||||
log_must zpool labelclear $disk2
|
|
||||||
done
|
|
||||||
done
|
done
|
||||||
|
|
||||||
typeset badvals=("off" "on" "1" "8" "17" "1b" "ff" "-")
|
typeset badvals=("off" "on" "1" "8" "17" "1b" "ff" "-")
|
||||||
|
|
|
@ -34,10 +34,8 @@
|
||||||
#
|
#
|
||||||
# STRATEGY:
|
# STRATEGY:
|
||||||
# 1. Create a pool with default values.
|
# 1. Create a pool with default values.
|
||||||
# 2. Verify 'zpool replace' uses the ashift pool property value when
|
# 2. Override the pool ashift property.
|
||||||
# replacing an existing device.
|
# 3. Verify 'zpool replace' works.
|
||||||
# 3. Verify the default ashift value can still be overridden by manually
|
|
||||||
# specifying '-o ashift=<n>' from the command line.
|
|
||||||
#
|
#
|
||||||
|
|
||||||
verify_runnable "global"
|
verify_runnable "global"
|
||||||
|
@ -72,21 +70,9 @@ do
|
||||||
do
|
do
|
||||||
log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1
|
log_must zpool create -o ashift=$ashift $TESTPOOL1 $disk1
|
||||||
log_must zpool set ashift=$pprop $TESTPOOL1
|
log_must zpool set ashift=$pprop $TESTPOOL1
|
||||||
# ashift_of(replacing_disk) <= ashift_of(existing_vdev)
|
log_must zpool replace $TESTPOOL1 $disk1 $disk2
|
||||||
if [[ $pprop -le $ashift ]]
|
wait_replacing $TESTPOOL1
|
||||||
then
|
log_must verify_ashift $disk2 $ashift
|
||||||
log_must zpool replace $TESTPOOL1 $disk1 $disk2
|
|
||||||
wait_replacing $TESTPOOL1
|
|
||||||
log_must verify_ashift $disk2 $ashift
|
|
||||||
else
|
|
||||||
# cannot replace if pool prop ashift > vdev ashift
|
|
||||||
log_mustnot zpool replace $TESTPOOL1 $disk1 $disk2
|
|
||||||
# verify we can override the pool prop value manually
|
|
||||||
log_must zpool replace -o ashift=$ashift $TESTPOOL1 \
|
|
||||||
$disk1 $disk2
|
|
||||||
wait_replacing $TESTPOOL1
|
|
||||||
log_must verify_ashift $disk2 $ashift
|
|
||||||
fi
|
|
||||||
# clean things for the next run
|
# clean things for the next run
|
||||||
log_must zpool destroy $TESTPOOL1
|
log_must zpool destroy $TESTPOOL1
|
||||||
log_must zpool labelclear $disk1
|
log_must zpool labelclear $disk1
|
||||||
|
|
Loading…
Reference in New Issue