Merge 'ozfs/master' into truenas/zfs-2.3-release
Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
This commit is contained in:
commit
cd0d13cb96
|
@ -793,18 +793,27 @@ def section_dmu(kstats_dict):
|
|||
|
||||
zfetch_stats = isolate_section('zfetchstats', kstats_dict)
|
||||
|
||||
zfetch_access_total = int(zfetch_stats['hits'])+int(zfetch_stats['misses'])
|
||||
zfetch_access_total = int(zfetch_stats['hits']) +\
|
||||
int(zfetch_stats['future']) + int(zfetch_stats['stride']) +\
|
||||
int(zfetch_stats['past']) + int(zfetch_stats['misses'])
|
||||
|
||||
prt_1('DMU predictive prefetcher calls:', f_hits(zfetch_access_total))
|
||||
prt_i2('Stream hits:',
|
||||
f_perc(zfetch_stats['hits'], zfetch_access_total),
|
||||
f_hits(zfetch_stats['hits']))
|
||||
future = int(zfetch_stats['future']) + int(zfetch_stats['stride'])
|
||||
prt_i2('Hits ahead of stream:', f_perc(future, zfetch_access_total),
|
||||
f_hits(future))
|
||||
prt_i2('Hits behind stream:',
|
||||
f_perc(zfetch_stats['past'], zfetch_access_total),
|
||||
f_hits(zfetch_stats['past']))
|
||||
prt_i2('Stream misses:',
|
||||
f_perc(zfetch_stats['misses'], zfetch_access_total),
|
||||
f_hits(zfetch_stats['misses']))
|
||||
prt_i2('Streams limit reached:',
|
||||
f_perc(zfetch_stats['max_streams'], zfetch_stats['misses']),
|
||||
f_hits(zfetch_stats['max_streams']))
|
||||
prt_i1('Stream strides:', f_hits(zfetch_stats['stride']))
|
||||
prt_i1('Prefetches issued', f_hits(zfetch_stats['io_issued']))
|
||||
print()
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
@ -208,6 +209,37 @@ type_to_name(uint64_t type)
|
|||
}
|
||||
}
|
||||
|
||||
struct errstr {
|
||||
int err;
|
||||
const char *str;
|
||||
};
|
||||
static const struct errstr errstrtable[] = {
|
||||
{ EIO, "io" },
|
||||
{ ECKSUM, "checksum" },
|
||||
{ EINVAL, "decompress" },
|
||||
{ EACCES, "decrypt" },
|
||||
{ ENXIO, "nxio" },
|
||||
{ ECHILD, "dtl" },
|
||||
{ EILSEQ, "corrupt" },
|
||||
{ 0, NULL },
|
||||
};
|
||||
|
||||
static int
|
||||
str_to_err(const char *str)
|
||||
{
|
||||
for (int i = 0; errstrtable[i].str != NULL; i++)
|
||||
if (strcasecmp(errstrtable[i].str, str) == 0)
|
||||
return (errstrtable[i].err);
|
||||
return (-1);
|
||||
}
|
||||
static const char *
|
||||
err_to_str(int err)
|
||||
{
|
||||
for (int i = 0; errstrtable[i].str != NULL; i++)
|
||||
if (errstrtable[i].err == err)
|
||||
return (errstrtable[i].str);
|
||||
return ("[unknown]");
|
||||
}
|
||||
|
||||
/*
|
||||
* Print usage message.
|
||||
|
@ -233,7 +265,7 @@ usage(void)
|
|||
"\t\tspa_vdev_exit() will trigger a panic.\n"
|
||||
"\n"
|
||||
"\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
|
||||
"\t\t[-T <read|write|free|claim|all>] [-f frequency] pool\n\n"
|
||||
"\t\t[-T <read|write|free|claim|ioctl|all>] [-f frequency] pool\n\n"
|
||||
"\t\tInject a fault into a particular device or the device's\n"
|
||||
"\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
|
||||
"\t\t'pad1', or 'pad2'.\n"
|
||||
|
@ -392,6 +424,10 @@ static int
|
|||
print_device_handler(int id, const char *pool, zinject_record_t *record,
|
||||
void *data)
|
||||
{
|
||||
static const char *iotypestr[] = {
|
||||
"null", "read", "write", "free", "claim", "ioctl", "trim", "all",
|
||||
};
|
||||
|
||||
int *count = data;
|
||||
|
||||
if (record->zi_guid == 0 || record->zi_func[0] != '\0')
|
||||
|
@ -401,14 +437,21 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
|
|||
return (0);
|
||||
|
||||
if (*count == 0) {
|
||||
(void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID");
|
||||
(void) printf("--- --------------- ----------------\n");
|
||||
(void) printf("%3s %-15s %-16s %-5s %-10s %-9s\n",
|
||||
"ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ");
|
||||
(void) printf(
|
||||
"--- --------------- ---------------- "
|
||||
"----- ---------- ---------\n");
|
||||
}
|
||||
|
||||
*count += 1;
|
||||
|
||||
(void) printf("%3d %-15s %llx\n", id, pool,
|
||||
(u_longlong_t)record->zi_guid);
|
||||
double freq = record->zi_freq == 0 ? 100.0f :
|
||||
(((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
|
||||
|
||||
(void) printf("%3d %-15s %llx %-5s %-10s %8.4f%%\n", id, pool,
|
||||
(u_longlong_t)record->zi_guid, iotypestr[record->zi_iotype],
|
||||
err_to_str(record->zi_error), freq);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
@ -842,24 +885,12 @@ main(int argc, char **argv)
|
|||
}
|
||||
break;
|
||||
case 'e':
|
||||
if (strcasecmp(optarg, "io") == 0) {
|
||||
error = EIO;
|
||||
} else if (strcasecmp(optarg, "checksum") == 0) {
|
||||
error = ECKSUM;
|
||||
} else if (strcasecmp(optarg, "decompress") == 0) {
|
||||
error = EINVAL;
|
||||
} else if (strcasecmp(optarg, "decrypt") == 0) {
|
||||
error = EACCES;
|
||||
} else if (strcasecmp(optarg, "nxio") == 0) {
|
||||
error = ENXIO;
|
||||
} else if (strcasecmp(optarg, "dtl") == 0) {
|
||||
error = ECHILD;
|
||||
} else if (strcasecmp(optarg, "corrupt") == 0) {
|
||||
error = EILSEQ;
|
||||
} else {
|
||||
error = str_to_err(optarg);
|
||||
if (error < 0) {
|
||||
(void) fprintf(stderr, "invalid error type "
|
||||
"'%s': must be 'io', 'checksum' or "
|
||||
"'nxio'\n", optarg);
|
||||
"'%s': must be one of: io decompress "
|
||||
"decrypt nxio dtl corrupt\n",
|
||||
optarg);
|
||||
usage();
|
||||
libzfs_fini(g_zfs);
|
||||
return (1);
|
||||
|
@ -947,12 +978,14 @@ main(int argc, char **argv)
|
|||
io_type = ZIO_TYPE_FREE;
|
||||
} else if (strcasecmp(optarg, "claim") == 0) {
|
||||
io_type = ZIO_TYPE_CLAIM;
|
||||
} else if (strcasecmp(optarg, "ioctl") == 0) {
|
||||
io_type = ZIO_TYPE_IOCTL;
|
||||
} else if (strcasecmp(optarg, "all") == 0) {
|
||||
io_type = ZIO_TYPES;
|
||||
} else {
|
||||
(void) fprintf(stderr, "invalid I/O type "
|
||||
"'%s': must be 'read', 'write', 'free', "
|
||||
"'claim' or 'all'\n", optarg);
|
||||
"'claim', 'ioctl' or 'all'\n", optarg);
|
||||
usage();
|
||||
libzfs_fini(g_zfs);
|
||||
return (1);
|
||||
|
|
|
@ -2289,7 +2289,6 @@ print_status_initialize(vdev_stat_t *vs, boolean_t verbose)
|
|||
!vs->vs_scan_removing) {
|
||||
char zbuf[1024];
|
||||
char tbuf[256];
|
||||
struct tm zaction_ts;
|
||||
|
||||
time_t t = vs->vs_initialize_action_time;
|
||||
int initialize_pct = 100;
|
||||
|
@ -2299,8 +2298,8 @@ print_status_initialize(vdev_stat_t *vs, boolean_t verbose)
|
|||
100 / (vs->vs_initialize_bytes_est + 1));
|
||||
}
|
||||
|
||||
(void) localtime_r(&t, &zaction_ts);
|
||||
(void) strftime(tbuf, sizeof (tbuf), "%c", &zaction_ts);
|
||||
(void) ctime_r(&t, tbuf);
|
||||
tbuf[24] = 0;
|
||||
|
||||
switch (vs->vs_initialize_state) {
|
||||
case VDEV_INITIALIZE_SUSPENDED:
|
||||
|
@ -2340,7 +2339,6 @@ print_status_trim(vdev_stat_t *vs, boolean_t verbose)
|
|||
!vs->vs_scan_removing) {
|
||||
char zbuf[1024];
|
||||
char tbuf[256];
|
||||
struct tm zaction_ts;
|
||||
|
||||
time_t t = vs->vs_trim_action_time;
|
||||
int trim_pct = 100;
|
||||
|
@ -2349,8 +2347,8 @@ print_status_trim(vdev_stat_t *vs, boolean_t verbose)
|
|||
100 / (vs->vs_trim_bytes_est + 1));
|
||||
}
|
||||
|
||||
(void) localtime_r(&t, &zaction_ts);
|
||||
(void) strftime(tbuf, sizeof (tbuf), "%c", &zaction_ts);
|
||||
(void) ctime_r(&t, tbuf);
|
||||
tbuf[24] = 0;
|
||||
|
||||
switch (vs->vs_trim_state) {
|
||||
case VDEV_TRIM_SUSPENDED:
|
||||
|
@ -10793,11 +10791,10 @@ found:
|
|||
}
|
||||
} else {
|
||||
/*
|
||||
* The first arg isn't a pool name,
|
||||
* The first arg isn't the name of a valid pool.
|
||||
*/
|
||||
fprintf(stderr, gettext("missing pool name.\n"));
|
||||
fprintf(stderr, "\n");
|
||||
usage(B_FALSE);
|
||||
fprintf(stderr, gettext("Cannot get properties of %s: "
|
||||
"no such pool available.\n"), argv[0]);
|
||||
return (1);
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ subst_sed_cmd = \
|
|||
-e 's|@ASAN_ENABLED[@]|$(ASAN_ENABLED)|g' \
|
||||
-e 's|@DEFAULT_INIT_NFS_SERVER[@]|$(DEFAULT_INIT_NFS_SERVER)|g' \
|
||||
-e 's|@DEFAULT_INIT_SHELL[@]|$(DEFAULT_INIT_SHELL)|g' \
|
||||
-e 's|@IS_SYSV_RC[@]|$(IS_SYSV_RC)|g' \
|
||||
-e 's|@LIBFETCH_DYNAMIC[@]|$(LIBFETCH_DYNAMIC)|g' \
|
||||
-e 's|@LIBFETCH_SONAME[@]|$(LIBFETCH_SONAME)|g' \
|
||||
-e 's|@PYTHON[@]|$(PYTHON)|g' \
|
||||
|
|
|
@ -377,6 +377,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ], [
|
|||
(void) blk_mq_alloc_tag_set(&tag_set);
|
||||
return BLK_STS_OK;
|
||||
], [])
|
||||
ZFS_LINUX_TEST_SRC([blk_mq_rq_hctx], [
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blkdev.h>
|
||||
], [
|
||||
struct request rq = {0};
|
||||
struct blk_mq_hw_ctx *hctx = NULL;
|
||||
rq.mq_hctx = hctx;
|
||||
], [])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [
|
||||
|
@ -384,6 +392,13 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [
|
|||
ZFS_LINUX_TEST_RESULT([blk_mq], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BLK_MQ, 1, [block multiqueue is available])
|
||||
AC_MSG_CHECKING([whether block multiqueue hardware context is cached in struct request])
|
||||
ZFS_LINUX_TEST_RESULT([blk_mq_rq_hctx], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BLK_MQ_RQ_HCTX, 1, [block multiqueue hardware context is cached in struct request])
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
|
|
|
@ -54,6 +54,26 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_OPEN_BY_PATH], [
|
|||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 6.9.x API change
|
||||
dnl # bdev_file_open_by_path() replaced bdev_open_by_path(),
|
||||
dnl # and returns struct file*
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_FILE_OPEN_BY_PATH], [
|
||||
ZFS_LINUX_TEST_SRC([bdev_file_open_by_path], [
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
], [
|
||||
struct file *file __attribute__ ((unused)) = NULL;
|
||||
const char *path = "path";
|
||||
fmode_t mode = 0;
|
||||
void *holder = NULL;
|
||||
struct blk_holder_ops h;
|
||||
|
||||
file = bdev_file_open_by_path(path, mode, holder, &h);
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [
|
||||
AC_MSG_CHECKING([whether blkdev_get_by_path() exists and takes 3 args])
|
||||
ZFS_LINUX_TEST_RESULT([blkdev_get_by_path], [
|
||||
|
@ -73,7 +93,16 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [
|
|||
[bdev_open_by_path() exists])
|
||||
AC_MSG_RESULT(yes)
|
||||
], [
|
||||
ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()])
|
||||
AC_MSG_RESULT(no)
|
||||
AC_MSG_CHECKING([whether bdev_file_open_by_path() exists])
|
||||
ZFS_LINUX_TEST_RESULT([bdev_file_open_by_path], [
|
||||
AC_DEFINE(HAVE_BDEV_FILE_OPEN_BY_PATH, 1,
|
||||
[bdev_file_open_by_path() exists])
|
||||
AC_MSG_RESULT(yes)
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()])
|
||||
])
|
||||
])
|
||||
])
|
||||
])
|
||||
|
@ -149,10 +178,19 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_RELEASE], [
|
|||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 6.9.x API change
|
||||
dnl #
|
||||
dnl # bdev_release() now private, but because bdev_file_open_by_path() returns
|
||||
dnl # struct file*, we can just use fput(). So the blkdev_put test no longer
|
||||
dnl # fails if not found.
|
||||
dnl #
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PUT], [
|
||||
AC_MSG_CHECKING([whether blkdev_put() exists])
|
||||
ZFS_LINUX_TEST_RESULT([blkdev_put], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BLKDEV_PUT, 1, [blkdev_put() exists])
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
AC_MSG_CHECKING([whether blkdev_put() accepts void* as arg 2])
|
||||
|
@ -168,7 +206,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PUT], [
|
|||
AC_DEFINE(HAVE_BDEV_RELEASE, 1,
|
||||
[bdev_release() exists])
|
||||
], [
|
||||
ZFS_LINUX_TEST_ERROR([blkdev_put()])
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
])
|
||||
|
@ -523,12 +561,29 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEVNAME], [
|
|||
])
|
||||
|
||||
dnl #
|
||||
dnl # 5.19 API: blkdev_issue_secure_erase()
|
||||
dnl # 4.7 API: __blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
|
||||
dnl # 3.10 API: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
|
||||
dnl # TRIM support: discard and secure erase. We make use of asynchronous
|
||||
dnl # functions when available.
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
|
||||
ZFS_LINUX_TEST_SRC([blkdev_issue_secure_erase], [
|
||||
dnl # 3.10:
|
||||
dnl # sync discard: blkdev_issue_discard(..., 0)
|
||||
dnl # sync erase: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
|
||||
dnl # async discard: [not available]
|
||||
dnl # async erase: [not available]
|
||||
dnl #
|
||||
dnl # 4.7:
|
||||
dnl # sync discard: blkdev_issue_discard(..., 0)
|
||||
dnl # sync erase: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
|
||||
dnl # async discard: __blkdev_issue_discard(..., 0)
|
||||
dnl # async erase: __blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
|
||||
dnl #
|
||||
dnl # 5.19:
|
||||
dnl # sync discard: blkdev_issue_discard(...)
|
||||
dnl # sync erase: blkdev_issue_secure_erase(...)
|
||||
dnl # async discard: __blkdev_issue_discard(...)
|
||||
dnl # async erase: [not available]
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_DISCARD], [
|
||||
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_noflags], [
|
||||
#include <linux/blkdev.h>
|
||||
],[
|
||||
struct block_device *bdev = NULL;
|
||||
|
@ -536,10 +591,33 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
|
|||
sector_t nr_sects = 0;
|
||||
int error __attribute__ ((unused));
|
||||
|
||||
error = blkdev_issue_secure_erase(bdev,
|
||||
error = blkdev_issue_discard(bdev,
|
||||
sector, nr_sects, GFP_KERNEL);
|
||||
])
|
||||
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_flags], [
|
||||
#include <linux/blkdev.h>
|
||||
],[
|
||||
struct block_device *bdev = NULL;
|
||||
sector_t sector = 0;
|
||||
sector_t nr_sects = 0;
|
||||
unsigned long flags = 0;
|
||||
int error __attribute__ ((unused));
|
||||
|
||||
error = blkdev_issue_discard(bdev,
|
||||
sector, nr_sects, GFP_KERNEL, flags);
|
||||
])
|
||||
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_async_noflags], [
|
||||
#include <linux/blkdev.h>
|
||||
],[
|
||||
struct block_device *bdev = NULL;
|
||||
sector_t sector = 0;
|
||||
sector_t nr_sects = 0;
|
||||
struct bio *biop = NULL;
|
||||
int error __attribute__ ((unused));
|
||||
|
||||
error = __blkdev_issue_discard(bdev,
|
||||
sector, nr_sects, GFP_KERNEL, &biop);
|
||||
])
|
||||
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_async_flags], [
|
||||
#include <linux/blkdev.h>
|
||||
],[
|
||||
|
@ -553,22 +631,52 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
|
|||
error = __blkdev_issue_discard(bdev,
|
||||
sector, nr_sects, GFP_KERNEL, flags, &biop);
|
||||
])
|
||||
|
||||
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_flags], [
|
||||
ZFS_LINUX_TEST_SRC([blkdev_issue_secure_erase], [
|
||||
#include <linux/blkdev.h>
|
||||
],[
|
||||
struct block_device *bdev = NULL;
|
||||
sector_t sector = 0;
|
||||
sector_t nr_sects = 0;
|
||||
unsigned long flags = 0;
|
||||
int error __attribute__ ((unused));
|
||||
|
||||
error = blkdev_issue_discard(bdev,
|
||||
sector, nr_sects, GFP_KERNEL, flags);
|
||||
error = blkdev_issue_secure_erase(bdev,
|
||||
sector, nr_sects, GFP_KERNEL);
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE], [
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_DISCARD], [
|
||||
AC_MSG_CHECKING([whether blkdev_issue_discard() is available])
|
||||
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_noflags], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_NOFLAGS, 1,
|
||||
[blkdev_issue_discard() is available])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
AC_MSG_CHECKING([whether blkdev_issue_discard(flags) is available])
|
||||
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_FLAGS, 1,
|
||||
[blkdev_issue_discard(flags) is available])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
AC_MSG_CHECKING([whether __blkdev_issue_discard() is available])
|
||||
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_async_noflags], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_NOFLAGS, 1,
|
||||
[__blkdev_issue_discard() is available])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
AC_MSG_CHECKING([whether __blkdev_issue_discard(flags) is available])
|
||||
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_async_flags], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_FLAGS, 1,
|
||||
[__blkdev_issue_discard(flags) is available])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
AC_MSG_CHECKING([whether blkdev_issue_secure_erase() is available])
|
||||
ZFS_LINUX_TEST_RESULT([blkdev_issue_secure_erase], [
|
||||
AC_MSG_RESULT(yes)
|
||||
|
@ -576,24 +684,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE], [
|
|||
[blkdev_issue_secure_erase() is available])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
|
||||
AC_MSG_CHECKING([whether __blkdev_issue_discard() is available])
|
||||
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_async_flags], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC, 1,
|
||||
[__blkdev_issue_discard() is available])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
|
||||
AC_MSG_CHECKING([whether blkdev_issue_discard() is available])
|
||||
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD, 1,
|
||||
[blkdev_issue_discard() is available])
|
||||
],[
|
||||
ZFS_LINUX_TEST_ERROR([blkdev_issue_discard()])
|
||||
])
|
||||
])
|
||||
])
|
||||
])
|
||||
|
||||
|
@ -645,6 +735,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
|
|||
ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_OPEN_BY_PATH
|
||||
ZFS_AC_KERNEL_SRC_BDEV_FILE_OPEN_BY_PATH
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_PUT
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_RELEASE
|
||||
|
@ -657,7 +748,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
|
|||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_CHECK_MEDIA_CHANGE
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEVNAME
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_DISCARD
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE
|
||||
|
@ -678,7 +769,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
|
|||
ZFS_AC_KERNEL_BLKDEV_BDEV_WHOLE
|
||||
ZFS_AC_KERNEL_BLKDEV_BDEVNAME
|
||||
ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS
|
||||
ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE
|
||||
ZFS_AC_KERNEL_BLKDEV_ISSUE_DISCARD
|
||||
ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ
|
||||
ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV
|
||||
ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE
|
||||
|
|
|
@ -50,6 +50,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [
|
|||
disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
])
|
||||
|
||||
ZFS_LINUX_TEST_SRC([blk_alloc_disk_2arg], [
|
||||
#include <linux/blkdev.h>
|
||||
],[
|
||||
struct queue_limits *lim = NULL;
|
||||
struct gendisk *disk __attribute__ ((unused));
|
||||
disk = blk_alloc_disk(lim, NUMA_NO_NODE);
|
||||
])
|
||||
|
||||
ZFS_LINUX_TEST_SRC([blk_cleanup_disk], [
|
||||
#include <linux/blkdev.h>
|
||||
],[
|
||||
|
@ -96,6 +104,31 @@ AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [
|
|||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # Linux 6.9 API Change:
|
||||
dnl # blk_alloc_queue() takes a nullable queue_limits arg.
|
||||
dnl #
|
||||
AC_MSG_CHECKING([whether blk_alloc_disk() exists and takes 2 args])
|
||||
ZFS_LINUX_TEST_RESULT([blk_alloc_disk_2arg], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE([HAVE_BLK_ALLOC_DISK_2ARG], 1, [blk_alloc_disk() exists and takes 2 args])
|
||||
|
||||
dnl #
|
||||
dnl # 5.20 API change,
|
||||
dnl # Removed blk_cleanup_disk(), put_disk() should be used.
|
||||
dnl #
|
||||
AC_MSG_CHECKING([whether blk_cleanup_disk() exists])
|
||||
ZFS_LINUX_TEST_RESULT([blk_cleanup_disk], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE([HAVE_BLK_CLEANUP_DISK], 1,
|
||||
[blk_cleanup_disk() exists])
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
|
||||
|
|
|
@ -578,13 +578,15 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [
|
|||
|
||||
AC_MSG_CHECKING([default shell])
|
||||
case "$VENDOR" in
|
||||
gentoo) DEFAULT_INIT_SHELL="/sbin/openrc-run";;
|
||||
alpine) DEFAULT_INIT_SHELL="/sbin/openrc-run";;
|
||||
*) DEFAULT_INIT_SHELL="/bin/sh" ;;
|
||||
gentoo|alpine) DEFAULT_INIT_SHELL=/sbin/openrc-run
|
||||
IS_SYSV_RC=false ;;
|
||||
*) DEFAULT_INIT_SHELL=/bin/sh
|
||||
IS_SYSV_RC=true ;;
|
||||
esac
|
||||
|
||||
AC_MSG_RESULT([$DEFAULT_INIT_SHELL])
|
||||
AC_SUBST(DEFAULT_INIT_SHELL)
|
||||
AC_SUBST(IS_SYSV_RC)
|
||||
|
||||
AC_MSG_CHECKING([default nfs server init script])
|
||||
AS_IF([test "$VENDOR" = "debian"],
|
||||
|
|
|
@ -7,11 +7,7 @@ DESCRIPTION
|
|||
|
||||
They have been tested successfully on:
|
||||
|
||||
* Debian GNU/Linux Wheezy
|
||||
* Debian GNU/Linux Jessie
|
||||
* Ubuntu Trusty
|
||||
* CentOS 6.0
|
||||
* CentOS 6.6
|
||||
* Debian GNU/Linux Bookworm
|
||||
* Gentoo
|
||||
|
||||
SUPPORT
|
||||
|
|
|
@ -307,7 +307,7 @@ do_start()
|
|||
|
||||
# ----------------------------------------------------
|
||||
|
||||
if [ ! -e /sbin/openrc-run ]
|
||||
if @IS_SYSV_RC@
|
||||
then
|
||||
case "$1" in
|
||||
start)
|
||||
|
|
|
@ -104,7 +104,7 @@ do_stop()
|
|||
|
||||
# ----------------------------------------------------
|
||||
|
||||
if [ ! -e /sbin/openrc-run ]
|
||||
if @IS_SYSV_RC@
|
||||
then
|
||||
case "$1" in
|
||||
start)
|
||||
|
|
|
@ -114,7 +114,7 @@ do_stop()
|
|||
|
||||
# ----------------------------------------------------
|
||||
|
||||
if [ ! -e /sbin/openrc-run ]
|
||||
if @IS_SYSV_RC@
|
||||
then
|
||||
case "$1" in
|
||||
start)
|
||||
|
|
|
@ -57,7 +57,8 @@ do_stop()
|
|||
|
||||
# ----------------------------------------------------
|
||||
|
||||
if [ ! -e /sbin/openrc-run ]; then
|
||||
if @IS_SYSV_RC@
|
||||
then
|
||||
case "$1" in
|
||||
start)
|
||||
do_start
|
||||
|
|
|
@ -93,7 +93,8 @@ do_reload()
|
|||
|
||||
# ----------------------------------------------------
|
||||
|
||||
if [ ! -e /sbin/openrc-run ]; then
|
||||
if @IS_SYSV_RC@
|
||||
then
|
||||
case "$1" in
|
||||
start)
|
||||
do_start
|
||||
|
|
|
@ -4,8 +4,6 @@ noinst_HEADERS = \
|
|||
\
|
||||
%D%/spl/acl/acl_common.h \
|
||||
\
|
||||
%D%/spl/rpc/xdr.h \
|
||||
\
|
||||
%D%/spl/sys/ia32/asm_linkage.h \
|
||||
\
|
||||
%D%/spl/sys/acl.h \
|
||||
|
|
|
@ -1,71 +0,0 @@
|
|||
/*
|
||||
* Sun RPC is a product of Sun Microsystems, Inc. and is provided for
|
||||
* unrestricted use provided that this legend is included on all tape
|
||||
* media and as a part of the software program in whole or part. Users
|
||||
* may copy or modify Sun RPC without charge, but are not authorized
|
||||
* to license or distribute it to anyone else except as part of a product or
|
||||
* program developed by the user.
|
||||
*
|
||||
* SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
|
||||
* WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
|
||||
*
|
||||
* Sun RPC is provided with no support and without any obligation on the
|
||||
* part of Sun Microsystems, Inc. to assist in its use, correction,
|
||||
* modification or enhancement.
|
||||
*
|
||||
* SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
|
||||
* INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
|
||||
* OR ANY PART THEREOF.
|
||||
*
|
||||
* In no event will Sun Microsystems, Inc. be liable for any lost revenue
|
||||
* or profits or other special, indirect and consequential damages, even if
|
||||
* Sun has been advised of the possibility of such damages.
|
||||
*
|
||||
* Sun Microsystems, Inc.
|
||||
* 2550 Garcia Avenue
|
||||
* Mountain View, California 94043
|
||||
*/
|
||||
|
||||
#ifndef _OPENSOLARIS_RPC_XDR_H_
|
||||
#define _OPENSOLARIS_RPC_XDR_H_
|
||||
|
||||
#include <rpc/types.h>
|
||||
#include_next <rpc/xdr.h>
|
||||
|
||||
#if !defined(_KERNEL) && !defined(_STANDALONE)
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
/*
|
||||
* Taken from sys/xdr/xdr_mem.c.
|
||||
*
|
||||
* FreeBSD's userland XDR doesn't implement control method (only the kernel),
|
||||
* but OpenSolaris nvpair still depend on it, so we have to implement it here.
|
||||
*/
|
||||
static __inline bool_t
|
||||
xdrmem_control(XDR *xdrs, int request, void *info)
|
||||
{
|
||||
xdr_bytesrec *xptr;
|
||||
|
||||
switch (request) {
|
||||
case XDR_GET_BYTES_AVAIL:
|
||||
xptr = (xdr_bytesrec *)info;
|
||||
xptr->xc_is_last_record = TRUE;
|
||||
xptr->xc_num_avail = xdrs->x_handy;
|
||||
return (TRUE);
|
||||
default:
|
||||
assert(!"unexpected request");
|
||||
}
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
#undef XDR_CONTROL
|
||||
#define XDR_CONTROL(xdrs, req, op) \
|
||||
(((xdrs)->x_ops->x_control == NULL) ? \
|
||||
xdrmem_control((xdrs), (req), (op)) : \
|
||||
(*(xdrs)->x_ops->x_control)(xdrs, req, op))
|
||||
|
||||
#endif /* !_KERNEL && !_STANDALONE */
|
||||
|
||||
#endif /* !_OPENSOLARIS_RPC_XDR_H_ */
|
|
@ -47,6 +47,7 @@ kernel_sys_HEADERS = \
|
|||
|
||||
kernel_spl_rpcdir = $(kerneldir)/spl/rpc
|
||||
kernel_spl_rpc_HEADERS = \
|
||||
%D%/spl/rpc/types.h \
|
||||
%D%/spl/rpc/xdr.h
|
||||
|
||||
kernel_spl_sysdir = $(kerneldir)/spl/sys
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Copyright (c) 2008 Sun Microsystems, Inc.
|
||||
* Written by Ricardo Correia <Ricardo.M.Correia@Sun.COM>
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _SPL_RPC_TYPES_H
|
||||
#define _SPL_RPC_TYPES_H
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
/* Just enough to support rpc/xdr.h */
|
||||
|
||||
typedef int bool_t;
|
||||
|
||||
#endif /* SPL_RPC_TYPES_H */
|
|
@ -24,8 +24,6 @@
|
|||
#include <sys/types.h>
|
||||
#include <sys/sysmacros.h>
|
||||
|
||||
typedef int bool_t;
|
||||
|
||||
/*
|
||||
* XDR enums and types.
|
||||
*/
|
||||
|
|
|
@ -45,18 +45,24 @@ typedef struct zfetch {
|
|||
int zf_numstreams; /* number of zstream_t's */
|
||||
} zfetch_t;
|
||||
|
||||
typedef struct zsrange {
|
||||
uint16_t start;
|
||||
uint16_t end;
|
||||
} zsrange_t;
|
||||
|
||||
#define ZFETCH_RANGES 9 /* Fits zstream_t into 128 bytes */
|
||||
|
||||
typedef struct zstream {
|
||||
list_node_t zs_node; /* link for zf_stream */
|
||||
uint64_t zs_blkid; /* expect next access at this blkid */
|
||||
uint_t zs_atime; /* time last prefetch issued */
|
||||
zsrange_t zs_ranges[ZFETCH_RANGES]; /* ranges from future */
|
||||
unsigned int zs_pf_dist; /* data prefetch distance in bytes */
|
||||
unsigned int zs_ipf_dist; /* L1 prefetch distance in bytes */
|
||||
uint64_t zs_pf_start; /* first data block to prefetch */
|
||||
uint64_t zs_pf_end; /* data block to prefetch up to */
|
||||
uint64_t zs_ipf_start; /* first data block to prefetch L1 */
|
||||
uint64_t zs_ipf_end; /* data block to prefetch L1 up to */
|
||||
|
||||
list_node_t zs_node; /* link for zf_stream */
|
||||
hrtime_t zs_atime; /* time last prefetch issued */
|
||||
zfetch_t *zs_fetch; /* parent fetch */
|
||||
boolean_t zs_missed; /* stream saw cache misses */
|
||||
boolean_t zs_more; /* need more distant prefetch */
|
||||
zfs_refcount_t zs_callers; /* number of pending callers */
|
||||
|
@ -74,7 +80,7 @@ void dmu_zfetch_init(zfetch_t *, struct dnode *);
|
|||
void dmu_zfetch_fini(zfetch_t *);
|
||||
zstream_t *dmu_zfetch_prepare(zfetch_t *, uint64_t, uint64_t, boolean_t,
|
||||
boolean_t);
|
||||
void dmu_zfetch_run(zstream_t *, boolean_t, boolean_t);
|
||||
void dmu_zfetch_run(zfetch_t *, zstream_t *, boolean_t, boolean_t);
|
||||
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
|
||||
boolean_t);
|
||||
|
||||
|
|
|
@ -455,7 +455,7 @@ struct vdev {
|
|||
zfs_ratelimit_t vdev_checksum_rl;
|
||||
|
||||
/*
|
||||
* Vdev properties for tuning ZED
|
||||
* Vdev properties for tuning ZED or zfsd
|
||||
*/
|
||||
uint64_t vdev_checksum_n;
|
||||
uint64_t vdev_checksum_t;
|
||||
|
|
|
@ -132,7 +132,7 @@ typedef struct zap_leaf_phys {
|
|||
* with the ZAP_LEAF_CHUNK() macro.
|
||||
*/
|
||||
|
||||
uint16_t l_hash[1];
|
||||
uint16_t l_hash[];
|
||||
} zap_leaf_phys_t;
|
||||
|
||||
typedef union zap_leaf_chunk {
|
||||
|
|
|
@ -153,7 +153,7 @@ enum zio_stage {
|
|||
ZIO_STAGE_READY = 1 << 20, /* RWFCIT */
|
||||
|
||||
ZIO_STAGE_VDEV_IO_START = 1 << 21, /* RW--IT */
|
||||
ZIO_STAGE_VDEV_IO_DONE = 1 << 22, /* RW---T */
|
||||
ZIO_STAGE_VDEV_IO_DONE = 1 << 22, /* RW--IT */
|
||||
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 23, /* RW--IT */
|
||||
|
||||
ZIO_STAGE_CHECKSUM_VERIFY = 1 << 24, /* R----- */
|
||||
|
@ -261,8 +261,7 @@ enum zio_stage {
|
|||
|
||||
#define ZIO_IOCTL_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
ZIO_STAGE_VDEV_IO_START | \
|
||||
ZIO_STAGE_VDEV_IO_ASSESS)
|
||||
ZIO_VDEV_IO_STAGES)
|
||||
|
||||
#define ZIO_TRIM_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
|
|
|
@ -1900,7 +1900,8 @@ zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
|
|||
(void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
|
||||
|
||||
if (localtime_r((time_t *)&rewindto, &t) != NULL &&
|
||||
strftime(timestr, 128, "%c", &t) != 0) {
|
||||
ctime_r((time_t *)&rewindto, timestr) != NULL) {
|
||||
timestr[24] = 0;
|
||||
if (dryrun) {
|
||||
(void) printf(dgettext(TEXT_DOMAIN,
|
||||
"Would be able to return %s "
|
||||
|
@ -1962,7 +1963,8 @@ zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
|
|||
"Recovery is possible, but will result in some data loss.\n"));
|
||||
|
||||
if (localtime_r((time_t *)&rewindto, &t) != NULL &&
|
||||
strftime(timestr, 128, "%c", &t) != 0) {
|
||||
ctime_r((time_t *)&rewindto, timestr) != NULL) {
|
||||
timestr[24] = 0;
|
||||
(void) printf(dgettext(TEXT_DOMAIN,
|
||||
"\tReturning the pool to its state as of %s\n"
|
||||
"\tshould correct the problem. "),
|
||||
|
|
|
@ -62,7 +62,6 @@ dist_man_MANS = \
|
|||
%D%/man8/zfs-userspace.8 \
|
||||
%D%/man8/zfs-wait.8 \
|
||||
%D%/man8/zfs_ids_to_path.8 \
|
||||
%D%/man8/zfs_prepare_disk.8 \
|
||||
%D%/man8/zgenhostid.8 \
|
||||
%D%/man8/zinject.8 \
|
||||
%D%/man8/zpool.8 \
|
||||
|
@ -115,7 +114,8 @@ endif
|
|||
|
||||
nodist_man_MANS = \
|
||||
%D%/man8/zed.8 \
|
||||
%D%/man8/zfs-mount-generator.8
|
||||
%D%/man8/zfs-mount-generator.8 \
|
||||
%D%/man8/zfs_prepare_disk.8
|
||||
|
||||
dist_noinst_DATA += $(dist_noinst_man_MANS) $(dist_man_MANS)
|
||||
|
||||
|
|
|
@ -564,6 +564,10 @@ However, this is limited by
|
|||
Maximum micro ZAP size.
|
||||
A micro ZAP is upgraded to a fat ZAP, once it grows beyond the specified size.
|
||||
.
|
||||
.It Sy zfetch_hole_shift Ns = Ns Sy 2 Pq uint
|
||||
Log2 fraction of holes in speculative prefetch stream allowed for it to
|
||||
proceed.
|
||||
.
|
||||
.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint
|
||||
Min bytes to prefetch per stream.
|
||||
Prefetch distance starts from the demand access size and quickly grows to
|
||||
|
@ -578,6 +582,13 @@ Max bytes to prefetch per stream.
|
|||
.It Sy zfetch_max_idistance Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq uint
|
||||
Max bytes to prefetch indirects for per stream.
|
||||
.
|
||||
.It Sy zfetch_max_reorder Ns = Ns Sy 16777216 Ns B Po 16 MiB Pc Pq uint
|
||||
Requests within this byte distance from the current prefetch stream position
|
||||
are considered parts of the stream, reordered due to parallel processing.
|
||||
Such requests do not advance the stream position immediately unless
|
||||
.Sy zfetch_hole_shift
|
||||
fill threshold is reached, but saved to fill holes in the stream later.
|
||||
.
|
||||
.It Sy zfetch_max_streams Ns = Ns Sy 8 Pq uint
|
||||
Max number of streams per zfetch (prefetch streams per file).
|
||||
.
|
||||
|
@ -2387,6 +2398,13 @@ The number of requests which can be handled concurrently is controlled by
|
|||
is ignored when running on a kernel that supports block multiqueue
|
||||
.Pq Li blk-mq .
|
||||
.
|
||||
.It Sy zvol_num_taskqs Ns = Ns Sy 0 Pq uint
|
||||
Number of zvol taskqs.
|
||||
If
|
||||
.Sy 0
|
||||
(the default) then scaling is done internally to prefer 6 threads per taskq.
|
||||
This only applies on Linux.
|
||||
.
|
||||
.It Sy zvol_threads Ns = Ns Sy 0 Pq uint
|
||||
The number of system wide threads to use for processing zvol block IOs.
|
||||
If
|
||||
|
|
|
@ -127,7 +127,13 @@ If the property is only set on the top-level vdev, this value will be used.
|
|||
The value of these properties do not persist across vdev replacement.
|
||||
For this reason, it is advisable to set the property on the top-level vdev -
|
||||
not on the leaf vdev itself.
|
||||
The default values are 10 errors in 600 seconds.
|
||||
The default values for
|
||||
.Sy OpenZFS on Linux
|
||||
are 10 errors in 600 seconds.
|
||||
For
|
||||
.Sy OpenZFS on FreeBSD
|
||||
defaults see
|
||||
.Xr zfsd 8 .
|
||||
.It Sy comment
|
||||
A text comment up to 8192 characters long
|
||||
.It Sy bootsize
|
||||
|
|
|
@ -19,10 +19,11 @@
|
|||
.\" CDDL HEADER END
|
||||
.\"
|
||||
.\" Copyright 2013 Darik Horn <dajhorn@vanadac.com>. All rights reserved.
|
||||
.\" Copyright (c) 2024, Klara Inc.
|
||||
.\"
|
||||
.\" lint-ok: WARNING: sections out of conventional order: Sh SYNOPSIS
|
||||
.\"
|
||||
.Dd May 26, 2021
|
||||
.Dd April 4, 2024
|
||||
.Dt ZINJECT 8
|
||||
.Os
|
||||
.
|
||||
|
@ -257,6 +258,7 @@ Run for this many seconds before reporting failure.
|
|||
.It Fl T Ar failure
|
||||
Set the failure type to one of
|
||||
.Sy all ,
|
||||
.Sy ioctl ,
|
||||
.Sy claim ,
|
||||
.Sy free ,
|
||||
.Sy read ,
|
||||
|
|
|
@ -404,7 +404,7 @@ ZIO_STAGE_DVA_CLAIM:0x00080000:---C--
|
|||
ZIO_STAGE_READY:0x00100000:RWFCIT
|
||||
|
||||
ZIO_STAGE_VDEV_IO_START:0x00200000:RW--IT
|
||||
ZIO_STAGE_VDEV_IO_DONE:0x00400000:RW---T
|
||||
ZIO_STAGE_VDEV_IO_DONE:0x00400000:RW--IT
|
||||
ZIO_STAGE_VDEV_IO_ASSESS:0x00800000:RW--IT
|
||||
|
||||
ZIO_STAGE_CHECKSUM_VERIFY:0x01000000:R-----
|
||||
|
|
|
@ -82,12 +82,9 @@ CFLAGS+= -DBITS_PER_LONG=64
|
|||
|
||||
SRCS= vnode_if.h device_if.h bus_if.h
|
||||
|
||||
# avl
|
||||
#avl
|
||||
SRCS+= avl.c
|
||||
|
||||
# icp
|
||||
SRCS+= edonr.c
|
||||
|
||||
#icp/algs/blake3
|
||||
SRCS+= blake3.c \
|
||||
blake3_generic.c \
|
||||
|
@ -107,9 +104,12 @@ SRCS+= blake3_avx2.S \
|
|||
blake3_sse2.S \
|
||||
blake3_sse41.S
|
||||
|
||||
#icp/algs/edonr
|
||||
SRCS+= edonr.c
|
||||
|
||||
#icp/algs/sha2
|
||||
SRCS+= sha2_generic.c \
|
||||
sha256_impl.c \
|
||||
SRCS+= sha256_impl.c \
|
||||
sha2_generic.c \
|
||||
sha512_impl.c
|
||||
|
||||
#icp/asm-arm/sha2
|
||||
|
@ -122,8 +122,8 @@ SRCS+= sha256-armv8.S \
|
|||
|
||||
#icp/asm-ppc64/sha2
|
||||
SRCS+= sha256-p8.S \
|
||||
sha512-p8.S \
|
||||
sha256-ppc.S \
|
||||
sha512-p8.S \
|
||||
sha512-ppc.S
|
||||
|
||||
#icp/asm-x86_64/sha2
|
||||
|
@ -157,10 +157,10 @@ SRCS+= lapi.c \
|
|||
lzio.c
|
||||
|
||||
#nvpair
|
||||
SRCS+= nvpair.c \
|
||||
fnvpair.c \
|
||||
nvpair_alloc_spl.c \
|
||||
nvpair_alloc_fixed.c
|
||||
SRCS+= fnvpair.c \
|
||||
nvpair.c \
|
||||
nvpair_alloc_fixed.c \
|
||||
nvpair_alloc_spl.c
|
||||
|
||||
#os/freebsd/spl
|
||||
SRCS+= acl_common.c \
|
||||
|
@ -184,7 +184,6 @@ SRCS+= acl_common.c \
|
|||
spl_zlib.c \
|
||||
spl_zone.c
|
||||
|
||||
|
||||
.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" || \
|
||||
${MACHINE_ARCH} == "powerpcspe" || ${MACHINE_ARCH} == "arm"
|
||||
SRCS+= spl_atomic.c
|
||||
|
@ -207,6 +206,7 @@ SRCS+= abd_os.c \
|
|||
zfs_ctldir.c \
|
||||
zfs_debug.c \
|
||||
zfs_dir.c \
|
||||
zfs_file_os.c \
|
||||
zfs_ioctl_compat.c \
|
||||
zfs_ioctl_os.c \
|
||||
zfs_racct.c \
|
||||
|
@ -217,19 +217,20 @@ SRCS+= abd_os.c \
|
|||
zvol_os.c
|
||||
|
||||
#unicode
|
||||
SRCS+= uconv.c \
|
||||
u8_textprep.c
|
||||
SRCS+= u8_textprep.c \
|
||||
uconv.c
|
||||
|
||||
#zcommon
|
||||
SRCS+= zfeature_common.c \
|
||||
SRCS+= cityhash.c \
|
||||
zfeature_common.c \
|
||||
zfs_comutil.c \
|
||||
zfs_deleg.c \
|
||||
zfs_fletcher.c \
|
||||
zfs_fletcher_avx512.c \
|
||||
zfs_fletcher.c \
|
||||
zfs_fletcher_intel.c \
|
||||
zfs_fletcher_sse.c \
|
||||
zfs_fletcher_superscalar.c \
|
||||
zfs_fletcher_superscalar4.c \
|
||||
zfs_fletcher_superscalar.c \
|
||||
zfs_namecheck.c \
|
||||
zfs_prop.c \
|
||||
zpool_prop.c \
|
||||
|
@ -243,14 +244,13 @@ SRCS+= abd.c \
|
|||
blkptr.c \
|
||||
bplist.c \
|
||||
bpobj.c \
|
||||
brt.c \
|
||||
btree.c \
|
||||
cityhash.c \
|
||||
dbuf.c \
|
||||
dbuf_stats.c \
|
||||
bptree.c \
|
||||
bqueue.c \
|
||||
brt.c \
|
||||
btree.c \
|
||||
dataset_kstats.c \
|
||||
dbuf.c \
|
||||
dbuf_stats.c \
|
||||
ddt.c \
|
||||
ddt_stats.c \
|
||||
ddt_zap.c \
|
||||
|
@ -266,13 +266,13 @@ SRCS+= abd.c \
|
|||
dmu_zfetch.c \
|
||||
dnode.c \
|
||||
dnode_sync.c \
|
||||
dsl_bookmark.c \
|
||||
dsl_crypt.c \
|
||||
dsl_dataset.c \
|
||||
dsl_deadlist.c \
|
||||
dsl_deleg.c \
|
||||
dsl_bookmark.c \
|
||||
dsl_dir.c \
|
||||
dsl_crypt.c \
|
||||
dsl_destroy.c \
|
||||
dsl_dir.c \
|
||||
dsl_pool.c \
|
||||
dsl_prop.c \
|
||||
dsl_scan.c \
|
||||
|
@ -281,9 +281,9 @@ SRCS+= abd.c \
|
|||
edonr_zfs.c \
|
||||
fm.c \
|
||||
gzip.c \
|
||||
lzjb.c \
|
||||
lz4.c \
|
||||
lz4_zfs.c \
|
||||
lzjb.c \
|
||||
metaslab.c \
|
||||
mmp.c \
|
||||
multilist.c \
|
||||
|
@ -296,6 +296,8 @@ SRCS+= abd.c \
|
|||
sha2_zfs.c \
|
||||
skein_zfs.c \
|
||||
spa.c \
|
||||
space_map.c \
|
||||
space_reftree.c \
|
||||
spa_checkpoint.c \
|
||||
spa_config.c \
|
||||
spa_errlog.c \
|
||||
|
@ -303,16 +305,14 @@ SRCS+= abd.c \
|
|||
spa_log_spacemap.c \
|
||||
spa_misc.c \
|
||||
spa_stats.c \
|
||||
space_map.c \
|
||||
space_reftree.c \
|
||||
txg.c \
|
||||
uberblock.c \
|
||||
unique.c \
|
||||
vdev.c \
|
||||
vdev_draid.c \
|
||||
vdev_draid_rand.c \
|
||||
vdev_indirect.c \
|
||||
vdev_indirect_births.c \
|
||||
vdev_indirect.c \
|
||||
vdev_indirect_mapping.c \
|
||||
vdev_initialize.c \
|
||||
vdev_label.c \
|
||||
|
@ -320,11 +320,11 @@ SRCS+= abd.c \
|
|||
vdev_missing.c \
|
||||
vdev_queue.c \
|
||||
vdev_raidz.c \
|
||||
vdev_raidz_math.c \
|
||||
vdev_raidz_math_scalar.c \
|
||||
vdev_raidz_math_avx2.c \
|
||||
vdev_raidz_math_avx512bw.c \
|
||||
vdev_raidz_math_avx512f.c \
|
||||
vdev_raidz_math.c \
|
||||
vdev_raidz_math_scalar.c \
|
||||
vdev_raidz_math_sse2.c \
|
||||
vdev_raidz_math_ssse3.c \
|
||||
vdev_rebuild.c \
|
||||
|
@ -343,7 +343,6 @@ SRCS+= abd.c \
|
|||
zfeature.c \
|
||||
zfs_byteswap.c \
|
||||
zfs_chksum.c \
|
||||
zfs_file_os.c \
|
||||
zfs_fm.c \
|
||||
zfs_fuid.c \
|
||||
zfs_impl.c \
|
||||
|
@ -367,30 +366,36 @@ SRCS+= abd.c \
|
|||
zvol.c
|
||||
|
||||
#zstd
|
||||
SRCS+= zfs_zstd.c \
|
||||
entropy_common.c \
|
||||
SRCS+= zfs_zstd.c
|
||||
|
||||
#zstd/common
|
||||
SRCS+= entropy_common.c \
|
||||
error_private.c \
|
||||
fse_compress.c \
|
||||
fse_decompress.c \
|
||||
hist.c \
|
||||
huf_compress.c \
|
||||
huf_decompress.c \
|
||||
pool.c \
|
||||
xxhash.c \
|
||||
zstd_common.c \
|
||||
|
||||
#zstd/compress
|
||||
SRCS+= fse_compress.c \
|
||||
hist.c \
|
||||
huf_compress.c \
|
||||
zstd_compress.c \
|
||||
zstd_compress_literals.c \
|
||||
zstd_compress_sequences.c \
|
||||
zstd_compress_superblock.c \
|
||||
zstd_ddict.c \
|
||||
zstd_decompress.c \
|
||||
zstd_decompress_block.c \
|
||||
zstd_double_fast.c \
|
||||
zstd_fast.c \
|
||||
zstd_lazy.c \
|
||||
zstd_ldm.c \
|
||||
zstd_opt.c
|
||||
|
||||
#zstd/decompress
|
||||
SRCS+= huf_decompress.c \
|
||||
zstd_ddict.c \
|
||||
zstd_decompress_block.c \
|
||||
zstd_decompress.c
|
||||
|
||||
beforeinstall:
|
||||
.if ${MK_DEBUG_FILES} != "no"
|
||||
mtree -eu \
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/string.h>
|
||||
#include <rpc/types.h>
|
||||
#include <rpc/xdr.h>
|
||||
#include <sys/mod.h>
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <sys/debug.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <rpc/types.h>
|
||||
#include <rpc/xdr.h>
|
||||
|
||||
/*
|
||||
|
|
|
@ -45,15 +45,25 @@
|
|||
/*
|
||||
* Linux 6.8.x uses a bdev_handle as an instance/refcount for an underlying
|
||||
* block_device. Since it carries the block_device inside, its convenient to
|
||||
* just use the handle as a proxy. For pre-6.8, we just emulate this with
|
||||
* a cast, since we don't need any of the other fields inside the handle.
|
||||
* just use the handle as a proxy.
|
||||
*
|
||||
* Linux 6.9.x uses a file for the same purpose.
|
||||
*
|
||||
* For pre-6.8, we just emulate this with a cast, since we don't need any of
|
||||
* the other fields inside the handle.
|
||||
*/
|
||||
#ifdef HAVE_BDEV_OPEN_BY_PATH
|
||||
#if defined(HAVE_BDEV_OPEN_BY_PATH)
|
||||
typedef struct bdev_handle zfs_bdev_handle_t;
|
||||
#define BDH_BDEV(bdh) ((bdh)->bdev)
|
||||
#define BDH_IS_ERR(bdh) (IS_ERR(bdh))
|
||||
#define BDH_PTR_ERR(bdh) (PTR_ERR(bdh))
|
||||
#define BDH_ERR_PTR(err) (ERR_PTR(err))
|
||||
#elif defined(HAVE_BDEV_FILE_OPEN_BY_PATH)
|
||||
typedef struct file zfs_bdev_handle_t;
|
||||
#define BDH_BDEV(bdh) (file_bdev(bdh))
|
||||
#define BDH_IS_ERR(bdh) (IS_ERR(bdh))
|
||||
#define BDH_PTR_ERR(bdh) (PTR_ERR(bdh))
|
||||
#define BDH_ERR_PTR(err) (ERR_PTR(err))
|
||||
#else
|
||||
typedef void zfs_bdev_handle_t;
|
||||
#define BDH_BDEV(bdh) ((struct block_device *)bdh)
|
||||
|
@ -242,7 +252,9 @@ vdev_blkdev_get_by_path(const char *path, spa_mode_t smode, void *holder)
|
|||
{
|
||||
vdev_bdev_mode_t bmode = vdev_bdev_mode(smode);
|
||||
|
||||
#if defined(HAVE_BDEV_OPEN_BY_PATH)
|
||||
#if defined(HAVE_BDEV_FILE_OPEN_BY_PATH)
|
||||
return (bdev_file_open_by_path(path, bmode, holder, NULL));
|
||||
#elif defined(HAVE_BDEV_OPEN_BY_PATH)
|
||||
return (bdev_open_by_path(path, bmode, holder, NULL));
|
||||
#elif defined(HAVE_BLKDEV_GET_BY_PATH_4ARG)
|
||||
return (blkdev_get_by_path(path, bmode, holder, NULL));
|
||||
|
@ -258,8 +270,10 @@ vdev_blkdev_put(zfs_bdev_handle_t *bdh, spa_mode_t smode, void *holder)
|
|||
return (bdev_release(bdh));
|
||||
#elif defined(HAVE_BLKDEV_PUT_HOLDER)
|
||||
return (blkdev_put(BDH_BDEV(bdh), holder));
|
||||
#else
|
||||
#elif defined(HAVE_BLKDEV_PUT)
|
||||
return (blkdev_put(BDH_BDEV(bdh), vdev_bdev_mode(smode)));
|
||||
#else
|
||||
fput(bdh);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1238,8 +1252,6 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
|
|||
return (0);
|
||||
}
|
||||
|
||||
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) || \
|
||||
defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC)
|
||||
BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error)
|
||||
{
|
||||
zio_t *zio = bio->bi_private;
|
||||
|
@ -1254,54 +1266,99 @@ BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error)
|
|||
zio_interrupt(zio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wrappers for the different secure erase and discard APIs. We use async
|
||||
* when available; in this case, *biop is set to the last bio in the chain.
|
||||
*/
|
||||
static int
|
||||
vdev_issue_discard_trim(zio_t *zio, unsigned long flags)
|
||||
vdev_bdev_issue_secure_erase(zfs_bdev_handle_t *bdh, sector_t sector,
|
||||
sector_t nsect, struct bio **biop)
|
||||
{
|
||||
int ret;
|
||||
struct bio *bio = NULL;
|
||||
*biop = NULL;
|
||||
int error;
|
||||
|
||||
#if defined(BLKDEV_DISCARD_SECURE)
|
||||
ret = - __blkdev_issue_discard(
|
||||
BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
|
||||
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, flags, &bio);
|
||||
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
|
||||
error = blkdev_issue_secure_erase(BDH_BDEV(bdh),
|
||||
sector, nsect, GFP_NOFS);
|
||||
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_FLAGS)
|
||||
error = __blkdev_issue_discard(BDH_BDEV(bdh),
|
||||
sector, nsect, GFP_NOFS, BLKDEV_DISCARD_SECURE, biop);
|
||||
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_FLAGS)
|
||||
error = blkdev_issue_discard(BDH_BDEV(bdh),
|
||||
sector, nsect, GFP_NOFS, BLKDEV_DISCARD_SECURE);
|
||||
#else
|
||||
(void) flags;
|
||||
ret = - __blkdev_issue_discard(
|
||||
BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
|
||||
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, &bio);
|
||||
#error "unsupported kernel"
|
||||
#endif
|
||||
if (!ret && bio) {
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_bdev_issue_discard(zfs_bdev_handle_t *bdh, sector_t sector,
|
||||
sector_t nsect, struct bio **biop)
|
||||
{
|
||||
*biop = NULL;
|
||||
int error;
|
||||
|
||||
#if defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_FLAGS)
|
||||
error = __blkdev_issue_discard(BDH_BDEV(bdh),
|
||||
sector, nsect, GFP_NOFS, 0, biop);
|
||||
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_NOFLAGS)
|
||||
error = __blkdev_issue_discard(BDH_BDEV(bdh),
|
||||
sector, nsect, GFP_NOFS, biop);
|
||||
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_FLAGS)
|
||||
error = blkdev_issue_discard(BDH_BDEV(bdh),
|
||||
sector, nsect, GFP_NOFS, 0);
|
||||
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_NOFLAGS)
|
||||
error = blkdev_issue_discard(BDH_BDEV(bdh),
|
||||
sector, nsect, GFP_NOFS);
|
||||
#else
|
||||
#error "unsupported kernel"
|
||||
#endif
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Entry point for TRIM ops. This calls the right wrapper for secure erase or
|
||||
* discard, and then does the appropriate finishing work for error vs success
|
||||
* and async vs sync.
|
||||
*/
|
||||
static int
|
||||
vdev_disk_io_trim(zio_t *zio)
|
||||
{
|
||||
int error;
|
||||
struct bio *bio;
|
||||
|
||||
zfs_bdev_handle_t *bdh = ((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh;
|
||||
sector_t sector = zio->io_offset >> 9;
|
||||
sector_t nsects = zio->io_size >> 9;
|
||||
|
||||
if (zio->io_trim_flags & ZIO_TRIM_SECURE)
|
||||
error = vdev_bdev_issue_secure_erase(bdh, sector, nsects, &bio);
|
||||
else
|
||||
error = vdev_bdev_issue_discard(bdh, sector, nsects, &bio);
|
||||
|
||||
if (error != 0)
|
||||
return (SET_ERROR(-error));
|
||||
|
||||
if (bio == NULL) {
|
||||
/*
|
||||
* This was a synchronous op that completed successfully, so
|
||||
* return it to ZFS immediately.
|
||||
*/
|
||||
zio_interrupt(zio);
|
||||
} else {
|
||||
/*
|
||||
* This was an asynchronous op; set up completion callback and
|
||||
* issue it.
|
||||
*/
|
||||
bio->bi_private = zio;
|
||||
bio->bi_end_io = vdev_disk_discard_end_io;
|
||||
vdev_submit_bio(bio);
|
||||
}
|
||||
return (ret);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
vdev_disk_io_trim(zio_t *zio)
|
||||
{
|
||||
unsigned long trim_flags = 0;
|
||||
if (zio->io_trim_flags & ZIO_TRIM_SECURE) {
|
||||
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
|
||||
return (-blkdev_issue_secure_erase(
|
||||
BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
|
||||
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS));
|
||||
#elif defined(BLKDEV_DISCARD_SECURE)
|
||||
trim_flags |= BLKDEV_DISCARD_SECURE;
|
||||
#endif
|
||||
}
|
||||
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) || \
|
||||
defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC)
|
||||
return (vdev_issue_discard_trim(zio, trim_flags));
|
||||
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD)
|
||||
return (-blkdev_issue_discard(
|
||||
BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
|
||||
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, trim_flags));
|
||||
#else
|
||||
#error "Unsupported kernel"
|
||||
#endif
|
||||
return (0);
|
||||
}
|
||||
|
||||
int (*vdev_disk_io_rw_fn)(zio_t *zio) = NULL;
|
||||
|
@ -1376,14 +1433,12 @@ vdev_disk_io_start(zio_t *zio)
|
|||
return;
|
||||
|
||||
case ZIO_TYPE_TRIM:
|
||||
zio->io_error = vdev_disk_io_trim(zio);
|
||||
error = vdev_disk_io_trim(zio);
|
||||
rw_exit(&vd->vd_lock);
|
||||
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
|
||||
if (zio->io_trim_flags & ZIO_TRIM_SECURE)
|
||||
zio_interrupt(zio);
|
||||
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD)
|
||||
zio_interrupt(zio);
|
||||
#endif
|
||||
if (error) {
|
||||
zio->io_error = error;
|
||||
zio_execute(zio);
|
||||
}
|
||||
return;
|
||||
|
||||
case ZIO_TYPE_READ:
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include <sys/spa_impl.h>
|
||||
#include <sys/zvol.h>
|
||||
#include <sys/zvol_impl.h>
|
||||
#include <cityhash.h>
|
||||
|
||||
#include <linux/blkdev_compat.h>
|
||||
#include <linux/task_io_accounting_ops.h>
|
||||
|
@ -53,6 +54,12 @@ static unsigned int zvol_request_sync = 0;
|
|||
static unsigned int zvol_prefetch_bytes = (128 * 1024);
|
||||
static unsigned long zvol_max_discard_blocks = 16384;
|
||||
|
||||
/*
|
||||
* Switch taskq at multiple of 512 MB offset. This can be set to a lower value
|
||||
* to utilize more threads for small files but may affect prefetch hits.
|
||||
*/
|
||||
#define ZVOL_TASKQ_OFFSET_SHIFT 29
|
||||
|
||||
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
|
||||
static unsigned int zvol_open_timeout_ms = 1000;
|
||||
#endif
|
||||
|
@ -76,6 +83,8 @@ static boolean_t zvol_use_blk_mq = B_FALSE;
|
|||
static unsigned int zvol_blk_mq_blocks_per_thread = 8;
|
||||
#endif
|
||||
|
||||
static unsigned int zvol_num_taskqs = 0;
|
||||
|
||||
#ifndef BLKDEV_DEFAULT_RQ
|
||||
/* BLKDEV_MAX_RQ was renamed to BLKDEV_DEFAULT_RQ in the 5.16 kernel */
|
||||
#define BLKDEV_DEFAULT_RQ BLKDEV_MAX_RQ
|
||||
|
@ -114,7 +123,11 @@ struct zvol_state_os {
|
|||
boolean_t use_blk_mq;
|
||||
};
|
||||
|
||||
static taskq_t *zvol_taskq;
|
||||
typedef struct zv_taskq {
|
||||
uint_t tqs_cnt;
|
||||
taskq_t **tqs_taskq;
|
||||
} zv_taskq_t;
|
||||
static zv_taskq_t zvol_taskqs;
|
||||
static struct ida zvol_ida;
|
||||
|
||||
typedef struct zv_request_stack {
|
||||
|
@ -532,6 +545,22 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
|
|||
}
|
||||
|
||||
zv_request_task_t *task;
|
||||
zv_taskq_t *ztqs = &zvol_taskqs;
|
||||
uint_t blk_mq_hw_queue = 0;
|
||||
uint_t tq_idx;
|
||||
uint_t taskq_hash;
|
||||
#ifdef HAVE_BLK_MQ
|
||||
if (rq)
|
||||
#ifdef HAVE_BLK_MQ_RQ_HCTX
|
||||
blk_mq_hw_queue = rq->mq_hctx->queue_num;
|
||||
#else
|
||||
blk_mq_hw_queue =
|
||||
rq->q->queue_hw_ctx[rq->q->mq_map[rq->cpu]]->queue_num;
|
||||
#endif
|
||||
#endif
|
||||
taskq_hash = cityhash4((uintptr_t)zv, offset >> ZVOL_TASKQ_OFFSET_SHIFT,
|
||||
blk_mq_hw_queue, 0);
|
||||
tq_idx = taskq_hash % ztqs->tqs_cnt;
|
||||
|
||||
if (rw == WRITE) {
|
||||
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
|
||||
|
@ -601,7 +630,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
|
|||
zvol_discard(&zvr);
|
||||
} else {
|
||||
task = zv_request_task_create(zvr);
|
||||
taskq_dispatch_ent(zvol_taskq,
|
||||
taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
|
||||
zvol_discard_task, task, 0, &task->ent);
|
||||
}
|
||||
} else {
|
||||
|
@ -609,7 +638,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
|
|||
zvol_write(&zvr);
|
||||
} else {
|
||||
task = zv_request_task_create(zvr);
|
||||
taskq_dispatch_ent(zvol_taskq,
|
||||
taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
|
||||
zvol_write_task, task, 0, &task->ent);
|
||||
}
|
||||
}
|
||||
|
@ -631,7 +660,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
|
|||
zvol_read(&zvr);
|
||||
} else {
|
||||
task = zv_request_task_create(zvr);
|
||||
taskq_dispatch_ent(zvol_taskq,
|
||||
taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
|
||||
zvol_read_task, task, 0, &task->ent);
|
||||
}
|
||||
}
|
||||
|
@ -1053,6 +1082,16 @@ zvol_alloc_non_blk_mq(struct zvol_state_os *zso)
|
|||
if (zso->zvo_disk == NULL)
|
||||
return (1);
|
||||
|
||||
zso->zvo_disk->minors = ZVOL_MINORS;
|
||||
zso->zvo_queue = zso->zvo_disk->queue;
|
||||
#elif defined(HAVE_BLK_ALLOC_DISK_2ARG)
|
||||
struct gendisk *disk = blk_alloc_disk(NULL, NUMA_NO_NODE);
|
||||
if (IS_ERR(disk)) {
|
||||
zso->zvo_disk = NULL;
|
||||
return (1);
|
||||
}
|
||||
|
||||
zso->zvo_disk = disk;
|
||||
zso->zvo_disk->minors = ZVOL_MINORS;
|
||||
zso->zvo_queue = zso->zvo_disk->queue;
|
||||
#else
|
||||
|
@ -1103,6 +1142,17 @@ zvol_alloc_blk_mq(zvol_state_t *zv)
|
|||
}
|
||||
zso->zvo_queue = zso->zvo_disk->queue;
|
||||
zso->zvo_disk->minors = ZVOL_MINORS;
|
||||
#elif defined(HAVE_BLK_ALLOC_DISK_2ARG)
|
||||
struct gendisk *disk = blk_mq_alloc_disk(&zso->tag_set, NULL, zv);
|
||||
if (IS_ERR(disk)) {
|
||||
zso->zvo_disk = NULL;
|
||||
blk_mq_free_tag_set(&zso->tag_set);
|
||||
return (1);
|
||||
}
|
||||
|
||||
zso->zvo_disk = disk;
|
||||
zso->zvo_queue = zso->zvo_disk->queue;
|
||||
zso->zvo_disk->minors = ZVOL_MINORS;
|
||||
#else
|
||||
zso->zvo_disk = alloc_disk(ZVOL_MINORS);
|
||||
if (zso->zvo_disk == NULL) {
|
||||
|
@ -1256,7 +1306,7 @@ zvol_os_free(zvol_state_t *zv)
|
|||
|
||||
del_gendisk(zv->zv_zso->zvo_disk);
|
||||
#if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
|
||||
defined(HAVE_BLK_ALLOC_DISK)
|
||||
(defined(HAVE_BLK_ALLOC_DISK) || defined(HAVE_BLK_ALLOC_DISK_2ARG))
|
||||
#if defined(HAVE_BLK_CLEANUP_DISK)
|
||||
blk_cleanup_disk(zv->zv_zso->zvo_disk);
|
||||
#else
|
||||
|
@ -1577,8 +1627,40 @@ zvol_init(void)
|
|||
zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use atleast 32 zvol_threads but for many core system,
|
||||
* prefer 6 threads per taskq, but no more taskqs
|
||||
* than threads in them on large systems.
|
||||
*
|
||||
* taskq total
|
||||
* cpus taskqs threads threads
|
||||
* ------- ------- ------- -------
|
||||
* 1 1 32 32
|
||||
* 2 1 32 32
|
||||
* 4 1 32 32
|
||||
* 8 2 16 32
|
||||
* 16 3 11 33
|
||||
* 32 5 7 35
|
||||
* 64 8 8 64
|
||||
* 128 11 12 132
|
||||
* 256 16 16 256
|
||||
*/
|
||||
zv_taskq_t *ztqs = &zvol_taskqs;
|
||||
uint_t num_tqs = MIN(num_online_cpus(), zvol_num_taskqs);
|
||||
if (num_tqs == 0) {
|
||||
num_tqs = 1 + num_online_cpus() / 6;
|
||||
while (num_tqs * num_tqs > zvol_actual_threads)
|
||||
num_tqs--;
|
||||
}
|
||||
uint_t per_tq_thread = zvol_actual_threads / num_tqs;
|
||||
if (per_tq_thread * num_tqs < zvol_actual_threads)
|
||||
per_tq_thread++;
|
||||
ztqs->tqs_cnt = num_tqs;
|
||||
ztqs->tqs_taskq = kmem_alloc(num_tqs * sizeof (taskq_t *), KM_SLEEP);
|
||||
error = register_blkdev(zvol_major, ZVOL_DRIVER);
|
||||
if (error) {
|
||||
kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt * sizeof (taskq_t *));
|
||||
ztqs->tqs_taskq = NULL;
|
||||
printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
|
||||
return (error);
|
||||
}
|
||||
|
@ -1598,11 +1680,22 @@ zvol_init(void)
|
|||
1024);
|
||||
}
|
||||
#endif
|
||||
zvol_taskq = taskq_create(ZVOL_DRIVER, zvol_actual_threads, maxclsyspri,
|
||||
zvol_actual_threads, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
||||
if (zvol_taskq == NULL) {
|
||||
unregister_blkdev(zvol_major, ZVOL_DRIVER);
|
||||
return (-ENOMEM);
|
||||
for (uint_t i = 0; i < num_tqs; i++) {
|
||||
char name[32];
|
||||
(void) snprintf(name, sizeof (name), "%s_tq-%u",
|
||||
ZVOL_DRIVER, i);
|
||||
ztqs->tqs_taskq[i] = taskq_create(name, per_tq_thread,
|
||||
maxclsyspri, per_tq_thread, INT_MAX,
|
||||
TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
||||
if (ztqs->tqs_taskq[i] == NULL) {
|
||||
for (int j = i - 1; j >= 0; j--)
|
||||
taskq_destroy(ztqs->tqs_taskq[j]);
|
||||
unregister_blkdev(zvol_major, ZVOL_DRIVER);
|
||||
kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt *
|
||||
sizeof (taskq_t *));
|
||||
ztqs->tqs_taskq = NULL;
|
||||
return (-ENOMEM);
|
||||
}
|
||||
}
|
||||
|
||||
zvol_init_impl();
|
||||
|
@ -1613,9 +1706,22 @@ zvol_init(void)
|
|||
void
|
||||
zvol_fini(void)
|
||||
{
|
||||
zv_taskq_t *ztqs = &zvol_taskqs;
|
||||
zvol_fini_impl();
|
||||
unregister_blkdev(zvol_major, ZVOL_DRIVER);
|
||||
taskq_destroy(zvol_taskq);
|
||||
|
||||
if (ztqs->tqs_taskq == NULL) {
|
||||
ASSERT3U(ztqs->tqs_cnt, ==, 0);
|
||||
} else {
|
||||
for (uint_t i = 0; i < ztqs->tqs_cnt; i++) {
|
||||
ASSERT3P(ztqs->tqs_taskq[i], !=, NULL);
|
||||
taskq_destroy(ztqs->tqs_taskq[i]);
|
||||
}
|
||||
kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt *
|
||||
sizeof (taskq_t *));
|
||||
ztqs->tqs_taskq = NULL;
|
||||
}
|
||||
|
||||
ida_destroy(&zvol_ida);
|
||||
}
|
||||
|
||||
|
@ -1636,6 +1742,9 @@ MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");
|
|||
module_param(zvol_max_discard_blocks, ulong, 0444);
|
||||
MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard");
|
||||
|
||||
module_param(zvol_num_taskqs, uint, 0444);
|
||||
MODULE_PARM_DESC(zvol_num_taskqs, "Number of zvol taskqs");
|
||||
|
||||
module_param(zvol_prefetch_bytes, uint, 0644);
|
||||
MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
|
||||
|
||||
|
|
|
@ -1557,17 +1557,14 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags)
|
|||
* returning.
|
||||
*/
|
||||
static int
|
||||
dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
|
||||
dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags,
|
||||
db_lock_type_t dblt, const void *tag)
|
||||
{
|
||||
dnode_t *dn;
|
||||
zbookmark_phys_t zb;
|
||||
uint32_t aflags = ARC_FLAG_NOWAIT;
|
||||
int err, zio_flags;
|
||||
blkptr_t bp, *bpp;
|
||||
blkptr_t bp, *bpp = NULL;
|
||||
|
||||
DB_DNODE_ENTER(db);
|
||||
dn = DB_DNODE(db);
|
||||
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
|
||||
ASSERT(MUTEX_HELD(&db->db_mtx));
|
||||
ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
|
||||
|
@ -1580,29 +1577,28 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
|
|||
goto early_unlock;
|
||||
}
|
||||
|
||||
if (db->db_state == DB_UNCACHED) {
|
||||
if (db->db_blkptr == NULL) {
|
||||
bpp = NULL;
|
||||
} else {
|
||||
bp = *db->db_blkptr;
|
||||
/*
|
||||
* If we have a pending block clone, we don't want to read the
|
||||
* underlying block, but the content of the block being cloned,
|
||||
* pointed by the dirty record, so we have the most recent data.
|
||||
* If there is no dirty record, then we hit a race in a sync
|
||||
* process when the dirty record is already removed, while the
|
||||
* dbuf is not yet destroyed. Such case is equivalent to uncached.
|
||||
*/
|
||||
if (db->db_state == DB_NOFILL) {
|
||||
dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records);
|
||||
if (dr != NULL) {
|
||||
if (!dr->dt.dl.dr_brtwrite) {
|
||||
err = EIO;
|
||||
goto early_unlock;
|
||||
}
|
||||
bp = dr->dt.dl.dr_overridden_by;
|
||||
bpp = &bp;
|
||||
}
|
||||
} else {
|
||||
dbuf_dirty_record_t *dr;
|
||||
}
|
||||
|
||||
ASSERT3S(db->db_state, ==, DB_NOFILL);
|
||||
|
||||
/*
|
||||
* Block cloning: If we have a pending block clone,
|
||||
* we don't want to read the underlying block, but the content
|
||||
* of the block being cloned, so we have the most recent data.
|
||||
*/
|
||||
dr = list_head(&db->db_dirty_records);
|
||||
if (dr == NULL || !dr->dt.dl.dr_brtwrite) {
|
||||
err = EIO;
|
||||
goto early_unlock;
|
||||
}
|
||||
bp = dr->dt.dl.dr_overridden_by;
|
||||
if (bpp == NULL && db->db_blkptr != NULL) {
|
||||
bp = *db->db_blkptr;
|
||||
bpp = &bp;
|
||||
}
|
||||
|
||||
|
@ -1643,8 +1639,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
|
|||
if (err != 0)
|
||||
goto early_unlock;
|
||||
|
||||
DB_DNODE_EXIT(db);
|
||||
|
||||
db->db_state = DB_READ;
|
||||
DTRACE_SET_STATE(db, "read issued");
|
||||
mutex_exit(&db->db_mtx);
|
||||
|
@ -1669,12 +1663,11 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
|
|||
* parent's rwlock, which would be a lock ordering violation.
|
||||
*/
|
||||
dmu_buf_unlock_parent(db, dblt, tag);
|
||||
(void) arc_read(zio, db->db_objset->os_spa, bpp,
|
||||
return (arc_read(zio, db->db_objset->os_spa, bpp,
|
||||
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags,
|
||||
&aflags, &zb);
|
||||
return (err);
|
||||
&aflags, &zb));
|
||||
|
||||
early_unlock:
|
||||
DB_DNODE_EXIT(db);
|
||||
mutex_exit(&db->db_mtx);
|
||||
dmu_buf_unlock_parent(db, dblt, tag);
|
||||
return (err);
|
||||
|
@ -1759,7 +1752,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
|
|||
}
|
||||
|
||||
int
|
||||
dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
||||
dbuf_read(dmu_buf_impl_t *db, zio_t *pio, uint32_t flags)
|
||||
{
|
||||
int err = 0;
|
||||
boolean_t prefetch;
|
||||
|
@ -1775,7 +1768,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
|||
dn = DB_DNODE(db);
|
||||
|
||||
prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
|
||||
(flags & DB_RF_NOPREFETCH) == 0 && dn != NULL;
|
||||
(flags & DB_RF_NOPREFETCH) == 0;
|
||||
|
||||
mutex_enter(&db->db_mtx);
|
||||
if (flags & DB_RF_PARTIAL_FIRST)
|
||||
|
@ -1822,13 +1815,13 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
|||
|
||||
db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG);
|
||||
|
||||
if (zio == NULL && (db->db_state == DB_NOFILL ||
|
||||
if (pio == NULL && (db->db_state == DB_NOFILL ||
|
||||
(db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)))) {
|
||||
spa_t *spa = dn->dn_objset->os_spa;
|
||||
zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
|
||||
pio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
|
||||
need_wait = B_TRUE;
|
||||
}
|
||||
err = dbuf_read_impl(db, zio, flags, dblt, FTAG);
|
||||
err = dbuf_read_impl(db, dn, pio, flags, dblt, FTAG);
|
||||
/*
|
||||
* dbuf_read_impl has dropped db_mtx and our parent's rwlock
|
||||
* for us
|
||||
|
@ -1849,9 +1842,10 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
|||
*/
|
||||
if (need_wait) {
|
||||
if (err == 0)
|
||||
err = zio_wait(zio);
|
||||
err = zio_wait(pio);
|
||||
else
|
||||
VERIFY0(zio_wait(zio));
|
||||
(void) zio_wait(pio);
|
||||
pio = NULL;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
|
@ -1878,7 +1872,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
|||
ASSERT(db->db_state == DB_READ ||
|
||||
(flags & DB_RF_HAVESTRUCT) == 0);
|
||||
DTRACE_PROBE2(blocked__read, dmu_buf_impl_t *,
|
||||
db, zio_t *, zio);
|
||||
db, zio_t *, pio);
|
||||
cv_wait(&db->db_changed, &db->db_mtx);
|
||||
}
|
||||
if (db->db_state == DB_UNCACHED)
|
||||
|
@ -1887,6 +1881,13 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
|||
}
|
||||
}
|
||||
|
||||
if (pio && err != 0) {
|
||||
zio_t *zio = zio_null(pio, pio->io_spa, NULL, NULL, NULL,
|
||||
ZIO_FLAG_CANFAIL);
|
||||
zio->io_error = err;
|
||||
zio_nowait(zio);
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
@ -2631,26 +2632,24 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
|
|||
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
|
||||
|
||||
/*
|
||||
* Quick check for dirtiness. For already dirty blocks, this
|
||||
* reduces runtime of this function by >90%, and overall performance
|
||||
* by 50% for some workloads (e.g. file deletion with indirect blocks
|
||||
* cached).
|
||||
* Quick check for dirtiness to improve performance for some workloads
|
||||
* (e.g. file deletion with indirect blocks cached).
|
||||
*/
|
||||
mutex_enter(&db->db_mtx);
|
||||
|
||||
if (db->db_state == DB_CACHED || db->db_state == DB_NOFILL) {
|
||||
dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg);
|
||||
/*
|
||||
* It's possible that it is already dirty but not cached,
|
||||
* It's possible that the dbuf is already dirty but not cached,
|
||||
* because there are some calls to dbuf_dirty() that don't
|
||||
* go through dmu_buf_will_dirty().
|
||||
*/
|
||||
dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg);
|
||||
if (dr != NULL) {
|
||||
if (dr->dt.dl.dr_brtwrite) {
|
||||
if (db->db_level == 0 &&
|
||||
dr->dt.dl.dr_brtwrite) {
|
||||
/*
|
||||
* Block cloning: If we are dirtying a cloned
|
||||
* block, we cannot simply redirty it, because
|
||||
* this dr has no data associated with it.
|
||||
* level 0 block, we cannot simply redirty it,
|
||||
* because this dr has no associated data.
|
||||
* We will go through a full undirtying below,
|
||||
* before dirtying it again.
|
||||
*/
|
||||
|
@ -4597,11 +4596,10 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
|||
if (os->os_encrypted && dn->dn_object == DMU_META_DNODE_OBJECT)
|
||||
dbuf_prepare_encrypted_dnode_leaf(dr);
|
||||
|
||||
if (db->db_state != DB_NOFILL &&
|
||||
if (*datap != NULL && *datap == db->db_buf &&
|
||||
dn->dn_object != DMU_META_DNODE_OBJECT &&
|
||||
zfs_refcount_count(&db->db_holds) > 1 &&
|
||||
dr->dt.dl.dr_override_state != DR_OVERRIDDEN &&
|
||||
*datap == db->db_buf) {
|
||||
dr->dt.dl.dr_override_state != DR_OVERRIDDEN) {
|
||||
/*
|
||||
* If this buffer is currently "in use" (i.e., there
|
||||
* are active holds and db_data still references it),
|
||||
|
@ -4890,11 +4888,9 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
|||
if (db->db_level == 0) {
|
||||
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
||||
ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN);
|
||||
if (db->db_state != DB_NOFILL) {
|
||||
if (dr->dt.dl.dr_data != NULL &&
|
||||
dr->dt.dl.dr_data != db->db_buf) {
|
||||
arc_buf_destroy(dr->dt.dl.dr_data, db);
|
||||
}
|
||||
if (dr->dt.dl.dr_data != NULL &&
|
||||
dr->dt.dl.dr_data != db->db_buf) {
|
||||
arc_buf_destroy(dr->dt.dl.dr_data, db);
|
||||
}
|
||||
} else {
|
||||
ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
|
||||
|
@ -5097,21 +5093,18 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
|||
|
||||
os = dn->dn_objset;
|
||||
|
||||
if (db->db_state != DB_NOFILL) {
|
||||
if (db->db_level > 0 || dn->dn_type == DMU_OT_DNODE) {
|
||||
/*
|
||||
* Private object buffers are released here rather
|
||||
* than in dbuf_dirty() since they are only modified
|
||||
* in the syncing context and we don't want the
|
||||
* overhead of making multiple copies of the data.
|
||||
*/
|
||||
if (BP_IS_HOLE(db->db_blkptr)) {
|
||||
arc_buf_thaw(data);
|
||||
} else {
|
||||
dbuf_release_bp(db);
|
||||
}
|
||||
dbuf_remap(dn, db, tx);
|
||||
}
|
||||
if (db->db_level > 0 || dn->dn_type == DMU_OT_DNODE) {
|
||||
/*
|
||||
* Private object buffers are released here rather than in
|
||||
* dbuf_dirty() since they are only modified in the syncing
|
||||
* context and we don't want the overhead of making multiple
|
||||
* copies of the data.
|
||||
*/
|
||||
if (BP_IS_HOLE(db->db_blkptr))
|
||||
arc_buf_thaw(data);
|
||||
else
|
||||
dbuf_release_bp(db);
|
||||
dbuf_remap(dn, db, tx);
|
||||
}
|
||||
|
||||
if (parent != dn->dn_dbuf) {
|
||||
|
@ -5147,7 +5140,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
|||
|
||||
if (db->db_blkid == DMU_SPILL_BLKID)
|
||||
wp_flag = WP_SPILL;
|
||||
wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0;
|
||||
wp_flag |= (data == NULL) ? WP_NOFILL : 0;
|
||||
|
||||
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
|
||||
|
||||
|
@ -5179,7 +5172,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
|||
dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite,
|
||||
dr->dt.dl.dr_brtwrite);
|
||||
mutex_exit(&db->db_mtx);
|
||||
} else if (db->db_state == DB_NOFILL) {
|
||||
} else if (data == NULL) {
|
||||
ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF ||
|
||||
zp.zp_checksum == ZIO_CHECKSUM_NOPARITY);
|
||||
dr->dr_zio = zio_write(pio, os->os_spa, txg,
|
||||
|
|
|
@ -569,8 +569,10 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
|||
for (i = 0; i < nblks; i++) {
|
||||
dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
|
||||
if (db == NULL) {
|
||||
if (zs)
|
||||
dmu_zfetch_run(zs, missed, B_TRUE);
|
||||
if (zs) {
|
||||
dmu_zfetch_run(&dn->dn_zfetch, zs, missed,
|
||||
B_TRUE);
|
||||
}
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
dmu_buf_rele_array(dbp, nblks, tag);
|
||||
if (read)
|
||||
|
@ -606,7 +608,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
|||
zfs_racct_write(length, nblks);
|
||||
|
||||
if (zs)
|
||||
dmu_zfetch_run(zs, missed, B_TRUE);
|
||||
dmu_zfetch_run(&dn->dn_zfetch, zs, missed, B_TRUE);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
if (read) {
|
||||
|
|
|
@ -65,9 +65,16 @@ unsigned int zfetch_max_distance = 64 * 1024 * 1024;
|
|||
#endif
|
||||
/* max bytes to prefetch indirects for per stream (default 64MB) */
|
||||
unsigned int zfetch_max_idistance = 64 * 1024 * 1024;
|
||||
/* max request reorder distance within a stream (default 16MB) */
|
||||
unsigned int zfetch_max_reorder = 16 * 1024 * 1024;
|
||||
/* Max log2 fraction of holes in a stream */
|
||||
unsigned int zfetch_hole_shift = 2;
|
||||
|
||||
typedef struct zfetch_stats {
|
||||
kstat_named_t zfetchstat_hits;
|
||||
kstat_named_t zfetchstat_future;
|
||||
kstat_named_t zfetchstat_stride;
|
||||
kstat_named_t zfetchstat_past;
|
||||
kstat_named_t zfetchstat_misses;
|
||||
kstat_named_t zfetchstat_max_streams;
|
||||
kstat_named_t zfetchstat_io_issued;
|
||||
|
@ -76,6 +83,9 @@ typedef struct zfetch_stats {
|
|||
|
||||
static zfetch_stats_t zfetch_stats = {
|
||||
{ "hits", KSTAT_DATA_UINT64 },
|
||||
{ "future", KSTAT_DATA_UINT64 },
|
||||
{ "stride", KSTAT_DATA_UINT64 },
|
||||
{ "past", KSTAT_DATA_UINT64 },
|
||||
{ "misses", KSTAT_DATA_UINT64 },
|
||||
{ "max_streams", KSTAT_DATA_UINT64 },
|
||||
{ "io_issued", KSTAT_DATA_UINT64 },
|
||||
|
@ -84,6 +94,9 @@ static zfetch_stats_t zfetch_stats = {
|
|||
|
||||
struct {
|
||||
wmsum_t zfetchstat_hits;
|
||||
wmsum_t zfetchstat_future;
|
||||
wmsum_t zfetchstat_stride;
|
||||
wmsum_t zfetchstat_past;
|
||||
wmsum_t zfetchstat_misses;
|
||||
wmsum_t zfetchstat_max_streams;
|
||||
wmsum_t zfetchstat_io_issued;
|
||||
|
@ -107,6 +120,12 @@ zfetch_kstats_update(kstat_t *ksp, int rw)
|
|||
return (EACCES);
|
||||
zs->zfetchstat_hits.value.ui64 =
|
||||
wmsum_value(&zfetch_sums.zfetchstat_hits);
|
||||
zs->zfetchstat_future.value.ui64 =
|
||||
wmsum_value(&zfetch_sums.zfetchstat_future);
|
||||
zs->zfetchstat_stride.value.ui64 =
|
||||
wmsum_value(&zfetch_sums.zfetchstat_stride);
|
||||
zs->zfetchstat_past.value.ui64 =
|
||||
wmsum_value(&zfetch_sums.zfetchstat_past);
|
||||
zs->zfetchstat_misses.value.ui64 =
|
||||
wmsum_value(&zfetch_sums.zfetchstat_misses);
|
||||
zs->zfetchstat_max_streams.value.ui64 =
|
||||
|
@ -122,6 +141,9 @@ void
|
|||
zfetch_init(void)
|
||||
{
|
||||
wmsum_init(&zfetch_sums.zfetchstat_hits, 0);
|
||||
wmsum_init(&zfetch_sums.zfetchstat_future, 0);
|
||||
wmsum_init(&zfetch_sums.zfetchstat_stride, 0);
|
||||
wmsum_init(&zfetch_sums.zfetchstat_past, 0);
|
||||
wmsum_init(&zfetch_sums.zfetchstat_misses, 0);
|
||||
wmsum_init(&zfetch_sums.zfetchstat_max_streams, 0);
|
||||
wmsum_init(&zfetch_sums.zfetchstat_io_issued, 0);
|
||||
|
@ -147,6 +169,9 @@ zfetch_fini(void)
|
|||
}
|
||||
|
||||
wmsum_fini(&zfetch_sums.zfetchstat_hits);
|
||||
wmsum_fini(&zfetch_sums.zfetchstat_future);
|
||||
wmsum_fini(&zfetch_sums.zfetchstat_stride);
|
||||
wmsum_fini(&zfetch_sums.zfetchstat_past);
|
||||
wmsum_fini(&zfetch_sums.zfetchstat_misses);
|
||||
wmsum_fini(&zfetch_sums.zfetchstat_max_streams);
|
||||
wmsum_fini(&zfetch_sums.zfetchstat_io_issued);
|
||||
|
@ -222,22 +247,22 @@ static void
|
|||
dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
|
||||
{
|
||||
zstream_t *zs, *zs_next, *zs_old = NULL;
|
||||
hrtime_t now = gethrtime(), t;
|
||||
uint_t now = gethrestime_sec(), t;
|
||||
|
||||
ASSERT(MUTEX_HELD(&zf->zf_lock));
|
||||
|
||||
/*
|
||||
* Delete too old streams, reusing the first found one.
|
||||
*/
|
||||
t = now - SEC2NSEC(zfetch_max_sec_reap);
|
||||
t = now - zfetch_max_sec_reap;
|
||||
for (zs = list_head(&zf->zf_stream); zs != NULL; zs = zs_next) {
|
||||
zs_next = list_next(&zf->zf_stream, zs);
|
||||
/*
|
||||
* Skip if still active. 1 -- zf_stream reference.
|
||||
*/
|
||||
if (zfs_refcount_count(&zs->zs_refs) != 1)
|
||||
if ((int)(zs->zs_atime - t) >= 0)
|
||||
continue;
|
||||
if (zs->zs_atime > t)
|
||||
if (zfs_refcount_count(&zs->zs_refs) != 1)
|
||||
continue;
|
||||
if (zs_old)
|
||||
dmu_zfetch_stream_remove(zf, zs);
|
||||
|
@ -246,6 +271,7 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
|
|||
}
|
||||
if (zs_old) {
|
||||
zs = zs_old;
|
||||
list_remove(&zf->zf_stream, zs);
|
||||
goto reuse;
|
||||
}
|
||||
|
||||
|
@ -255,21 +281,23 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
|
|||
* for all the streams to be non-overlapping.
|
||||
*/
|
||||
uint32_t max_streams = MAX(1, MIN(zfetch_max_streams,
|
||||
zf->zf_dnode->dn_maxblkid * zf->zf_dnode->dn_datablksz /
|
||||
(zf->zf_dnode->dn_maxblkid << zf->zf_dnode->dn_datablkshift) /
|
||||
zfetch_max_distance));
|
||||
if (zf->zf_numstreams >= max_streams) {
|
||||
t = now - SEC2NSEC(zfetch_min_sec_reap);
|
||||
t = now - zfetch_min_sec_reap;
|
||||
for (zs = list_head(&zf->zf_stream); zs != NULL;
|
||||
zs = list_next(&zf->zf_stream, zs)) {
|
||||
if ((int)(zs->zs_atime - t) >= 0)
|
||||
continue;
|
||||
if (zfs_refcount_count(&zs->zs_refs) != 1)
|
||||
continue;
|
||||
if (zs->zs_atime > t)
|
||||
continue;
|
||||
if (zs_old == NULL || zs->zs_atime < zs_old->zs_atime)
|
||||
if (zs_old == NULL ||
|
||||
(int)(zs_old->zs_atime - zs->zs_atime) >= 0)
|
||||
zs_old = zs;
|
||||
}
|
||||
if (zs_old) {
|
||||
zs = zs_old;
|
||||
list_remove(&zf->zf_stream, zs);
|
||||
goto reuse;
|
||||
}
|
||||
ZFETCHSTAT_BUMP(zfetchstat_max_streams);
|
||||
|
@ -277,24 +305,24 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
|
|||
}
|
||||
|
||||
zs = kmem_zalloc(sizeof (*zs), KM_SLEEP);
|
||||
zs->zs_fetch = zf;
|
||||
zfs_refcount_create(&zs->zs_callers);
|
||||
zfs_refcount_create(&zs->zs_refs);
|
||||
/* One reference for zf_stream. */
|
||||
zfs_refcount_add(&zs->zs_refs, NULL);
|
||||
zf->zf_numstreams++;
|
||||
list_insert_head(&zf->zf_stream, zs);
|
||||
|
||||
reuse:
|
||||
list_insert_head(&zf->zf_stream, zs);
|
||||
zs->zs_blkid = blkid;
|
||||
/* Allow immediate stream reuse until first hit. */
|
||||
zs->zs_atime = now - zfetch_min_sec_reap;
|
||||
memset(zs->zs_ranges, 0, sizeof (zs->zs_ranges));
|
||||
zs->zs_pf_dist = 0;
|
||||
zs->zs_ipf_dist = 0;
|
||||
zs->zs_pf_start = blkid;
|
||||
zs->zs_pf_end = blkid;
|
||||
zs->zs_ipf_dist = 0;
|
||||
zs->zs_ipf_start = blkid;
|
||||
zs->zs_ipf_end = blkid;
|
||||
/* Allow immediate stream reuse until first hit. */
|
||||
zs->zs_atime = now - SEC2NSEC(zfetch_min_sec_reap);
|
||||
zs->zs_missed = B_FALSE;
|
||||
zs->zs_more = B_FALSE;
|
||||
}
|
||||
|
@ -311,6 +339,120 @@ dmu_zfetch_done(void *arg, uint64_t level, uint64_t blkid, boolean_t io_issued)
|
|||
aggsum_add(&zfetch_sums.zfetchstat_io_active, -1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Process stream hit access for nblks blocks starting at zs_blkid. Return
|
||||
* number of blocks to proceed for after aggregation with future ranges.
|
||||
*/
|
||||
static uint64_t
|
||||
dmu_zfetch_hit(zstream_t *zs, uint64_t nblks)
|
||||
{
|
||||
uint_t i, j;
|
||||
|
||||
/* Optimize sequential accesses (no future ranges). */
|
||||
if (zs->zs_ranges[0].start == 0)
|
||||
goto done;
|
||||
|
||||
/* Look for intersections with further ranges. */
|
||||
for (i = 0; i < ZFETCH_RANGES; i++) {
|
||||
zsrange_t *r = &zs->zs_ranges[i];
|
||||
if (r->start == 0 || r->start > nblks)
|
||||
break;
|
||||
if (r->end >= nblks) {
|
||||
nblks = r->end;
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Delete all found intersecting ranges, updates remaining. */
|
||||
for (j = 0; i < ZFETCH_RANGES; i++, j++) {
|
||||
if (zs->zs_ranges[i].start == 0)
|
||||
break;
|
||||
ASSERT3U(zs->zs_ranges[i].start, >, nblks);
|
||||
ASSERT3U(zs->zs_ranges[i].end, >, nblks);
|
||||
zs->zs_ranges[j].start = zs->zs_ranges[i].start - nblks;
|
||||
zs->zs_ranges[j].end = zs->zs_ranges[i].end - nblks;
|
||||
}
|
||||
if (j < ZFETCH_RANGES) {
|
||||
zs->zs_ranges[j].start = 0;
|
||||
zs->zs_ranges[j].end = 0;
|
||||
}
|
||||
|
||||
done:
|
||||
zs->zs_blkid += nblks;
|
||||
return (nblks);
|
||||
}
|
||||
|
||||
/*
|
||||
* Process future stream access for nblks blocks starting at blkid. Return
|
||||
* number of blocks to proceed for if future ranges reach fill threshold.
|
||||
*/
|
||||
static uint64_t
|
||||
dmu_zfetch_future(zstream_t *zs, uint64_t blkid, uint64_t nblks)
|
||||
{
|
||||
ASSERT3U(blkid, >, zs->zs_blkid);
|
||||
blkid -= zs->zs_blkid;
|
||||
ASSERT3U(blkid + nblks, <=, UINT16_MAX);
|
||||
|
||||
/* Search for first and last intersection or insert point. */
|
||||
uint_t f = ZFETCH_RANGES, l = 0, i;
|
||||
for (i = 0; i < ZFETCH_RANGES; i++) {
|
||||
zsrange_t *r = &zs->zs_ranges[i];
|
||||
if (r->start == 0 || r->start > blkid + nblks)
|
||||
break;
|
||||
if (r->end < blkid)
|
||||
continue;
|
||||
if (f > i)
|
||||
f = i;
|
||||
if (l < i)
|
||||
l = i;
|
||||
}
|
||||
if (f <= l) {
|
||||
/* Got some intersecting range, expand it if needed. */
|
||||
if (zs->zs_ranges[f].start > blkid)
|
||||
zs->zs_ranges[f].start = blkid;
|
||||
zs->zs_ranges[f].end = MAX(zs->zs_ranges[l].end, blkid + nblks);
|
||||
if (f < l) {
|
||||
/* Got more than one intersection, remove others. */
|
||||
for (f++, l++; l < ZFETCH_RANGES; f++, l++) {
|
||||
zs->zs_ranges[f].start = zs->zs_ranges[l].start;
|
||||
zs->zs_ranges[f].end = zs->zs_ranges[l].end;
|
||||
}
|
||||
zs->zs_ranges[ZFETCH_RANGES - 1].start = 0;
|
||||
zs->zs_ranges[ZFETCH_RANGES - 1].end = 0;
|
||||
}
|
||||
} else if (i < ZFETCH_RANGES) {
|
||||
/* Got no intersecting ranges, insert new one. */
|
||||
for (l = ZFETCH_RANGES - 1; l > i; l--) {
|
||||
zs->zs_ranges[l].start = zs->zs_ranges[l - 1].start;
|
||||
zs->zs_ranges[l].end = zs->zs_ranges[l - 1].end;
|
||||
}
|
||||
zs->zs_ranges[i].start = blkid;
|
||||
zs->zs_ranges[i].end = blkid + nblks;
|
||||
} else {
|
||||
/* No space left to insert. Drop the range. */
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Check if with the new access addition we reached fill threshold. */
|
||||
if (zfetch_hole_shift >= 16)
|
||||
return (0);
|
||||
uint_t hole = 0;
|
||||
for (i = f = l = 0; i < ZFETCH_RANGES; i++) {
|
||||
zsrange_t *r = &zs->zs_ranges[i];
|
||||
if (r->start == 0)
|
||||
break;
|
||||
hole += r->start - f;
|
||||
f = r->end;
|
||||
if (hole <= r->end >> zfetch_hole_shift)
|
||||
l = r->end;
|
||||
}
|
||||
if (l > 0)
|
||||
return (dmu_zfetch_hit(zs, l));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the predictive prefetch entry point. dmu_zfetch_prepare()
|
||||
* associates dnode access specified with blkid and nblks arguments with
|
||||
|
@ -370,53 +512,92 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
|
|||
mutex_enter(&zf->zf_lock);
|
||||
|
||||
/*
|
||||
* Find matching prefetch stream. Depending on whether the accesses
|
||||
* Find perfect prefetch stream. Depending on whether the accesses
|
||||
* are block-aligned, first block of the new access may either follow
|
||||
* the last block of the previous access, or be equal to it.
|
||||
*/
|
||||
unsigned int dbs = zf->zf_dnode->dn_datablkshift;
|
||||
uint64_t end_blkid = blkid + nblks;
|
||||
for (zs = list_head(&zf->zf_stream); zs != NULL;
|
||||
zs = list_next(&zf->zf_stream, zs)) {
|
||||
if (blkid == zs->zs_blkid) {
|
||||
break;
|
||||
goto hit;
|
||||
} else if (blkid + 1 == zs->zs_blkid) {
|
||||
blkid++;
|
||||
nblks--;
|
||||
break;
|
||||
goto hit;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the file is ending, remove the matching stream if found.
|
||||
* If not found then it is too late to create a new one now.
|
||||
* Find close enough prefetch stream. Access crossing stream position
|
||||
* is a hit in its new part. Access ahead of stream position considered
|
||||
* a hit for metadata prefetch, since we do not care about fill percent,
|
||||
* or stored for future otherwise. Access behind stream position is
|
||||
* silently ignored, since we already skipped it reaching fill percent.
|
||||
*/
|
||||
uint64_t end_of_access_blkid = blkid + nblks;
|
||||
if (end_of_access_blkid >= maxblkid) {
|
||||
if (zs != NULL)
|
||||
dmu_zfetch_stream_remove(zf, zs);
|
||||
mutex_exit(&zf->zf_lock);
|
||||
if (!have_lock)
|
||||
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
||||
return (NULL);
|
||||
uint_t max_reorder = MIN((zfetch_max_reorder >> dbs) + 1, UINT16_MAX);
|
||||
uint_t t = gethrestime_sec() - zfetch_max_sec_reap;
|
||||
for (zs = list_head(&zf->zf_stream); zs != NULL;
|
||||
zs = list_next(&zf->zf_stream, zs)) {
|
||||
if (blkid > zs->zs_blkid) {
|
||||
if (end_blkid <= zs->zs_blkid + max_reorder) {
|
||||
if (!fetch_data) {
|
||||
nblks = dmu_zfetch_hit(zs,
|
||||
end_blkid - zs->zs_blkid);
|
||||
ZFETCHSTAT_BUMP(zfetchstat_stride);
|
||||
goto future;
|
||||
}
|
||||
nblks = dmu_zfetch_future(zs, blkid, nblks);
|
||||
if (nblks > 0)
|
||||
ZFETCHSTAT_BUMP(zfetchstat_stride);
|
||||
else
|
||||
ZFETCHSTAT_BUMP(zfetchstat_future);
|
||||
goto future;
|
||||
}
|
||||
} else if (end_blkid >= zs->zs_blkid) {
|
||||
nblks -= zs->zs_blkid - blkid;
|
||||
blkid += zs->zs_blkid - blkid;
|
||||
goto hit;
|
||||
} else if (end_blkid + max_reorder > zs->zs_blkid &&
|
||||
(int)(zs->zs_atime - t) >= 0) {
|
||||
ZFETCHSTAT_BUMP(zfetchstat_past);
|
||||
zs->zs_atime = gethrestime_sec();
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* Exit if we already prefetched this block before. */
|
||||
if (nblks == 0) {
|
||||
mutex_exit(&zf->zf_lock);
|
||||
if (!have_lock)
|
||||
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
||||
return (NULL);
|
||||
}
|
||||
/*
|
||||
* This access is not part of any existing stream. Create a new
|
||||
* stream for it unless we are at the end of file.
|
||||
*/
|
||||
if (end_blkid < maxblkid)
|
||||
dmu_zfetch_stream_create(zf, end_blkid);
|
||||
mutex_exit(&zf->zf_lock);
|
||||
if (!have_lock)
|
||||
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
||||
ZFETCHSTAT_BUMP(zfetchstat_misses);
|
||||
return (NULL);
|
||||
|
||||
if (zs == NULL) {
|
||||
/*
|
||||
* This access is not part of any existing stream. Create
|
||||
* a new stream for it.
|
||||
*/
|
||||
dmu_zfetch_stream_create(zf, end_of_access_blkid);
|
||||
hit:
|
||||
nblks = dmu_zfetch_hit(zs, nblks);
|
||||
ZFETCHSTAT_BUMP(zfetchstat_hits);
|
||||
|
||||
future:
|
||||
zs->zs_atime = gethrestime_sec();
|
||||
|
||||
/* Exit if we already prefetched for this position before. */
|
||||
if (nblks == 0)
|
||||
goto out;
|
||||
|
||||
/* If the file is ending, remove the stream. */
|
||||
end_blkid = zs->zs_blkid;
|
||||
if (end_blkid >= maxblkid) {
|
||||
dmu_zfetch_stream_remove(zf, zs);
|
||||
out:
|
||||
mutex_exit(&zf->zf_lock);
|
||||
if (!have_lock)
|
||||
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
||||
ZFETCHSTAT_BUMP(zfetchstat_misses);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
|
@ -432,7 +613,6 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
|
|||
* than ~6% of ARC held by active prefetches. It should help with
|
||||
* getting out of RAM on some badly mispredicted read patterns.
|
||||
*/
|
||||
unsigned int dbs = zf->zf_dnode->dn_datablkshift;
|
||||
unsigned int nbytes = nblks << dbs;
|
||||
unsigned int pf_nblks;
|
||||
if (fetch_data) {
|
||||
|
@ -452,10 +632,10 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
|
|||
} else {
|
||||
pf_nblks = 0;
|
||||
}
|
||||
if (zs->zs_pf_start < end_of_access_blkid)
|
||||
zs->zs_pf_start = end_of_access_blkid;
|
||||
if (zs->zs_pf_end < end_of_access_blkid + pf_nblks)
|
||||
zs->zs_pf_end = end_of_access_blkid + pf_nblks;
|
||||
if (zs->zs_pf_start < end_blkid)
|
||||
zs->zs_pf_start = end_blkid;
|
||||
if (zs->zs_pf_end < end_blkid + pf_nblks)
|
||||
zs->zs_pf_end = end_blkid + pf_nblks;
|
||||
|
||||
/*
|
||||
* Do the same for indirects, starting where we will stop reading
|
||||
|
@ -473,9 +653,6 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
|
|||
if (zs->zs_ipf_end < zs->zs_pf_end + pf_nblks)
|
||||
zs->zs_ipf_end = zs->zs_pf_end + pf_nblks;
|
||||
|
||||
zs->zs_blkid = end_of_access_blkid;
|
||||
/* Protect the stream from reclamation. */
|
||||
zs->zs_atime = gethrtime();
|
||||
zfs_refcount_add(&zs->zs_refs, NULL);
|
||||
/* Count concurrent callers. */
|
||||
zfs_refcount_add(&zs->zs_callers, NULL);
|
||||
|
@ -483,15 +660,13 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
|
|||
|
||||
if (!have_lock)
|
||||
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
||||
|
||||
ZFETCHSTAT_BUMP(zfetchstat_hits);
|
||||
return (zs);
|
||||
}
|
||||
|
||||
void
|
||||
dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock)
|
||||
dmu_zfetch_run(zfetch_t *zf, zstream_t *zs, boolean_t missed,
|
||||
boolean_t have_lock)
|
||||
{
|
||||
zfetch_t *zf = zs->zs_fetch;
|
||||
int64_t pf_start, pf_end, ipf_start, ipf_end;
|
||||
int epbs, issued;
|
||||
|
||||
|
@ -567,7 +742,7 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
|
|||
|
||||
zs = dmu_zfetch_prepare(zf, blkid, nblks, fetch_data, have_lock);
|
||||
if (zs)
|
||||
dmu_zfetch_run(zs, missed, have_lock);
|
||||
dmu_zfetch_run(zf, zs, missed, have_lock);
|
||||
}
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_prefetch, zfs_prefetch_, disable, INT, ZMOD_RW,
|
||||
|
@ -590,3 +765,9 @@ ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW,
|
|||
|
||||
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_idistance, UINT, ZMOD_RW,
|
||||
"Max bytes to prefetch indirects for per stream");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_reorder, UINT, ZMOD_RW,
|
||||
"Max request reorder distance within a stream");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, hole_shift, UINT, ZMOD_RW,
|
||||
"Max log2 fraction of holes in a stream");
|
||||
|
|
|
@ -4086,14 +4086,17 @@ zio_vdev_io_done(zio_t *zio)
|
|||
}
|
||||
|
||||
ASSERT(zio->io_type == ZIO_TYPE_READ ||
|
||||
zio->io_type == ZIO_TYPE_WRITE || zio->io_type == ZIO_TYPE_TRIM);
|
||||
zio->io_type == ZIO_TYPE_WRITE ||
|
||||
zio->io_type == ZIO_TYPE_IOCTL ||
|
||||
zio->io_type == ZIO_TYPE_TRIM);
|
||||
|
||||
if (zio->io_delay)
|
||||
zio->io_delay = gethrtime() - zio->io_delay;
|
||||
|
||||
if (vd != NULL && vd->vdev_ops->vdev_op_leaf &&
|
||||
vd->vdev_ops != &vdev_draid_spare_ops) {
|
||||
vdev_queue_io_done(zio);
|
||||
if (zio->io_type != ZIO_TYPE_IOCTL)
|
||||
vdev_queue_io_done(zio);
|
||||
|
||||
if (zio_injection_enabled && zio->io_error == 0)
|
||||
zio->io_error = zio_handle_device_injections(vd, zio,
|
||||
|
|
|
@ -364,10 +364,10 @@ zio_handle_device_injection_impl(vdev_t *vd, zio_t *zio, int err1, int err2)
|
|||
int ret = 0;
|
||||
|
||||
/*
|
||||
* We skip over faults in the labels unless it's during
|
||||
* device open (i.e. zio == NULL).
|
||||
* We skip over faults in the labels unless it's during device open
|
||||
* (i.e. zio == NULL) or a device flush (offset is meaningless)
|
||||
*/
|
||||
if (zio != NULL) {
|
||||
if (zio != NULL && zio->io_type != ZIO_TYPE_IOCTL) {
|
||||
uint64_t offset = zio->io_offset;
|
||||
|
||||
if (offset < VDEV_LABEL_START_SIZE ||
|
||||
|
|
|
@ -153,6 +153,12 @@ tests = [ 'clean_mirror_001_pos', 'clean_mirror_002_pos',
|
|||
'clean_mirror_003_pos', 'clean_mirror_004_pos']
|
||||
tags = ['functional', 'clean_mirror']
|
||||
|
||||
[tests/functional/cli_root/zinject]
|
||||
tests = ['zinject_args']
|
||||
pre =
|
||||
post =
|
||||
tags = ['functional', 'cli_root', 'zinject']
|
||||
|
||||
[tests/functional/cli_root/zdb]
|
||||
tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos',
|
||||
'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos',
|
||||
|
|
|
@ -606,6 +606,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
|||
functional/clean_mirror/clean_mirror_004_pos.ksh \
|
||||
functional/clean_mirror/cleanup.ksh \
|
||||
functional/clean_mirror/setup.ksh \
|
||||
functional/cli_root/zinject/zinject_args.ksh \
|
||||
functional/cli_root/zdb/zdb_002_pos.ksh \
|
||||
functional/cli_root/zdb/zdb_003_pos.ksh \
|
||||
functional/cli_root/zdb/zdb_004_pos.ksh \
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2024, Klara Inc.
|
||||
#
|
||||
|
||||
#
|
||||
# TODO: this only checks that the set of valid device fault types. It should
|
||||
# check all the other options, and that they work, and everything really.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
log_assert "Check zinject parameters."
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
DISK1=${DISKS%% *}
|
||||
|
||||
function cleanup
|
||||
{
|
||||
zinject -c all
|
||||
default_cleanup_noexit
|
||||
}
|
||||
|
||||
function test_device_fault
|
||||
{
|
||||
typeset -a errno=("io" "decompress" "decrypt" "nxio" "dtl" "corrupt")
|
||||
for e in ${errno[@]}; do
|
||||
log_must eval \
|
||||
"zinject -d $DISK1 -e $e -T read -f 0.001 $TESTPOOL"
|
||||
done
|
||||
zinject -c all
|
||||
}
|
||||
|
||||
default_mirror_setup_noexit $DISKS
|
||||
|
||||
test_device_fault
|
||||
|
||||
log_pass "zinject parameters work as expected."
|
Loading…
Reference in New Issue