Merge pull request #222 from truenas/truenas/zfs-2.3-master-sync
[truenas/zfs-2.3-release] sync with upstream master
This commit is contained in:
commit
d1ebe30abe
|
@ -793,18 +793,27 @@ def section_dmu(kstats_dict):
|
||||||
|
|
||||||
zfetch_stats = isolate_section('zfetchstats', kstats_dict)
|
zfetch_stats = isolate_section('zfetchstats', kstats_dict)
|
||||||
|
|
||||||
zfetch_access_total = int(zfetch_stats['hits'])+int(zfetch_stats['misses'])
|
zfetch_access_total = int(zfetch_stats['hits']) +\
|
||||||
|
int(zfetch_stats['future']) + int(zfetch_stats['stride']) +\
|
||||||
|
int(zfetch_stats['past']) + int(zfetch_stats['misses'])
|
||||||
|
|
||||||
prt_1('DMU predictive prefetcher calls:', f_hits(zfetch_access_total))
|
prt_1('DMU predictive prefetcher calls:', f_hits(zfetch_access_total))
|
||||||
prt_i2('Stream hits:',
|
prt_i2('Stream hits:',
|
||||||
f_perc(zfetch_stats['hits'], zfetch_access_total),
|
f_perc(zfetch_stats['hits'], zfetch_access_total),
|
||||||
f_hits(zfetch_stats['hits']))
|
f_hits(zfetch_stats['hits']))
|
||||||
|
future = int(zfetch_stats['future']) + int(zfetch_stats['stride'])
|
||||||
|
prt_i2('Hits ahead of stream:', f_perc(future, zfetch_access_total),
|
||||||
|
f_hits(future))
|
||||||
|
prt_i2('Hits behind stream:',
|
||||||
|
f_perc(zfetch_stats['past'], zfetch_access_total),
|
||||||
|
f_hits(zfetch_stats['past']))
|
||||||
prt_i2('Stream misses:',
|
prt_i2('Stream misses:',
|
||||||
f_perc(zfetch_stats['misses'], zfetch_access_total),
|
f_perc(zfetch_stats['misses'], zfetch_access_total),
|
||||||
f_hits(zfetch_stats['misses']))
|
f_hits(zfetch_stats['misses']))
|
||||||
prt_i2('Streams limit reached:',
|
prt_i2('Streams limit reached:',
|
||||||
f_perc(zfetch_stats['max_streams'], zfetch_stats['misses']),
|
f_perc(zfetch_stats['max_streams'], zfetch_stats['misses']),
|
||||||
f_hits(zfetch_stats['max_streams']))
|
f_hits(zfetch_stats['max_streams']))
|
||||||
|
prt_i1('Stream strides:', f_hits(zfetch_stats['stride']))
|
||||||
prt_i1('Prefetches issued', f_hits(zfetch_stats['io_issued']))
|
prt_i1('Prefetches issued', f_hits(zfetch_stats['io_issued']))
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||||
* Copyright (c) 2017, Intel Corporation.
|
* Copyright (c) 2017, Intel Corporation.
|
||||||
|
* Copyright (c) 2024, Klara Inc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -208,6 +209,37 @@ type_to_name(uint64_t type)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct errstr {
|
||||||
|
int err;
|
||||||
|
const char *str;
|
||||||
|
};
|
||||||
|
static const struct errstr errstrtable[] = {
|
||||||
|
{ EIO, "io" },
|
||||||
|
{ ECKSUM, "checksum" },
|
||||||
|
{ EINVAL, "decompress" },
|
||||||
|
{ EACCES, "decrypt" },
|
||||||
|
{ ENXIO, "nxio" },
|
||||||
|
{ ECHILD, "dtl" },
|
||||||
|
{ EILSEQ, "corrupt" },
|
||||||
|
{ 0, NULL },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int
|
||||||
|
str_to_err(const char *str)
|
||||||
|
{
|
||||||
|
for (int i = 0; errstrtable[i].str != NULL; i++)
|
||||||
|
if (strcasecmp(errstrtable[i].str, str) == 0)
|
||||||
|
return (errstrtable[i].err);
|
||||||
|
return (-1);
|
||||||
|
}
|
||||||
|
static const char *
|
||||||
|
err_to_str(int err)
|
||||||
|
{
|
||||||
|
for (int i = 0; errstrtable[i].str != NULL; i++)
|
||||||
|
if (errstrtable[i].err == err)
|
||||||
|
return (errstrtable[i].str);
|
||||||
|
return ("[unknown]");
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Print usage message.
|
* Print usage message.
|
||||||
|
@ -233,7 +265,7 @@ usage(void)
|
||||||
"\t\tspa_vdev_exit() will trigger a panic.\n"
|
"\t\tspa_vdev_exit() will trigger a panic.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
|
"\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
|
||||||
"\t\t[-T <read|write|free|claim|all>] [-f frequency] pool\n\n"
|
"\t\t[-T <read|write|free|claim|ioctl|all>] [-f frequency] pool\n\n"
|
||||||
"\t\tInject a fault into a particular device or the device's\n"
|
"\t\tInject a fault into a particular device or the device's\n"
|
||||||
"\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
|
"\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
|
||||||
"\t\t'pad1', or 'pad2'.\n"
|
"\t\t'pad1', or 'pad2'.\n"
|
||||||
|
@ -392,6 +424,10 @@ static int
|
||||||
print_device_handler(int id, const char *pool, zinject_record_t *record,
|
print_device_handler(int id, const char *pool, zinject_record_t *record,
|
||||||
void *data)
|
void *data)
|
||||||
{
|
{
|
||||||
|
static const char *iotypestr[] = {
|
||||||
|
"null", "read", "write", "free", "claim", "ioctl", "trim", "all",
|
||||||
|
};
|
||||||
|
|
||||||
int *count = data;
|
int *count = data;
|
||||||
|
|
||||||
if (record->zi_guid == 0 || record->zi_func[0] != '\0')
|
if (record->zi_guid == 0 || record->zi_func[0] != '\0')
|
||||||
|
@ -401,14 +437,21 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
|
||||||
return (0);
|
return (0);
|
||||||
|
|
||||||
if (*count == 0) {
|
if (*count == 0) {
|
||||||
(void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID");
|
(void) printf("%3s %-15s %-16s %-5s %-10s %-9s\n",
|
||||||
(void) printf("--- --------------- ----------------\n");
|
"ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ");
|
||||||
|
(void) printf(
|
||||||
|
"--- --------------- ---------------- "
|
||||||
|
"----- ---------- ---------\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
*count += 1;
|
*count += 1;
|
||||||
|
|
||||||
(void) printf("%3d %-15s %llx\n", id, pool,
|
double freq = record->zi_freq == 0 ? 100.0f :
|
||||||
(u_longlong_t)record->zi_guid);
|
(((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
|
||||||
|
|
||||||
|
(void) printf("%3d %-15s %llx %-5s %-10s %8.4f%%\n", id, pool,
|
||||||
|
(u_longlong_t)record->zi_guid, iotypestr[record->zi_iotype],
|
||||||
|
err_to_str(record->zi_error), freq);
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
@ -842,24 +885,12 @@ main(int argc, char **argv)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'e':
|
case 'e':
|
||||||
if (strcasecmp(optarg, "io") == 0) {
|
error = str_to_err(optarg);
|
||||||
error = EIO;
|
if (error < 0) {
|
||||||
} else if (strcasecmp(optarg, "checksum") == 0) {
|
|
||||||
error = ECKSUM;
|
|
||||||
} else if (strcasecmp(optarg, "decompress") == 0) {
|
|
||||||
error = EINVAL;
|
|
||||||
} else if (strcasecmp(optarg, "decrypt") == 0) {
|
|
||||||
error = EACCES;
|
|
||||||
} else if (strcasecmp(optarg, "nxio") == 0) {
|
|
||||||
error = ENXIO;
|
|
||||||
} else if (strcasecmp(optarg, "dtl") == 0) {
|
|
||||||
error = ECHILD;
|
|
||||||
} else if (strcasecmp(optarg, "corrupt") == 0) {
|
|
||||||
error = EILSEQ;
|
|
||||||
} else {
|
|
||||||
(void) fprintf(stderr, "invalid error type "
|
(void) fprintf(stderr, "invalid error type "
|
||||||
"'%s': must be 'io', 'checksum' or "
|
"'%s': must be one of: io decompress "
|
||||||
"'nxio'\n", optarg);
|
"decrypt nxio dtl corrupt\n",
|
||||||
|
optarg);
|
||||||
usage();
|
usage();
|
||||||
libzfs_fini(g_zfs);
|
libzfs_fini(g_zfs);
|
||||||
return (1);
|
return (1);
|
||||||
|
@ -947,12 +978,14 @@ main(int argc, char **argv)
|
||||||
io_type = ZIO_TYPE_FREE;
|
io_type = ZIO_TYPE_FREE;
|
||||||
} else if (strcasecmp(optarg, "claim") == 0) {
|
} else if (strcasecmp(optarg, "claim") == 0) {
|
||||||
io_type = ZIO_TYPE_CLAIM;
|
io_type = ZIO_TYPE_CLAIM;
|
||||||
|
} else if (strcasecmp(optarg, "ioctl") == 0) {
|
||||||
|
io_type = ZIO_TYPE_IOCTL;
|
||||||
} else if (strcasecmp(optarg, "all") == 0) {
|
} else if (strcasecmp(optarg, "all") == 0) {
|
||||||
io_type = ZIO_TYPES;
|
io_type = ZIO_TYPES;
|
||||||
} else {
|
} else {
|
||||||
(void) fprintf(stderr, "invalid I/O type "
|
(void) fprintf(stderr, "invalid I/O type "
|
||||||
"'%s': must be 'read', 'write', 'free', "
|
"'%s': must be 'read', 'write', 'free', "
|
||||||
"'claim' or 'all'\n", optarg);
|
"'claim', 'ioctl' or 'all'\n", optarg);
|
||||||
usage();
|
usage();
|
||||||
libzfs_fini(g_zfs);
|
libzfs_fini(g_zfs);
|
||||||
return (1);
|
return (1);
|
||||||
|
|
|
@ -2289,7 +2289,6 @@ print_status_initialize(vdev_stat_t *vs, boolean_t verbose)
|
||||||
!vs->vs_scan_removing) {
|
!vs->vs_scan_removing) {
|
||||||
char zbuf[1024];
|
char zbuf[1024];
|
||||||
char tbuf[256];
|
char tbuf[256];
|
||||||
struct tm zaction_ts;
|
|
||||||
|
|
||||||
time_t t = vs->vs_initialize_action_time;
|
time_t t = vs->vs_initialize_action_time;
|
||||||
int initialize_pct = 100;
|
int initialize_pct = 100;
|
||||||
|
@ -2299,8 +2298,8 @@ print_status_initialize(vdev_stat_t *vs, boolean_t verbose)
|
||||||
100 / (vs->vs_initialize_bytes_est + 1));
|
100 / (vs->vs_initialize_bytes_est + 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) localtime_r(&t, &zaction_ts);
|
(void) ctime_r(&t, tbuf);
|
||||||
(void) strftime(tbuf, sizeof (tbuf), "%c", &zaction_ts);
|
tbuf[24] = 0;
|
||||||
|
|
||||||
switch (vs->vs_initialize_state) {
|
switch (vs->vs_initialize_state) {
|
||||||
case VDEV_INITIALIZE_SUSPENDED:
|
case VDEV_INITIALIZE_SUSPENDED:
|
||||||
|
@ -2340,7 +2339,6 @@ print_status_trim(vdev_stat_t *vs, boolean_t verbose)
|
||||||
!vs->vs_scan_removing) {
|
!vs->vs_scan_removing) {
|
||||||
char zbuf[1024];
|
char zbuf[1024];
|
||||||
char tbuf[256];
|
char tbuf[256];
|
||||||
struct tm zaction_ts;
|
|
||||||
|
|
||||||
time_t t = vs->vs_trim_action_time;
|
time_t t = vs->vs_trim_action_time;
|
||||||
int trim_pct = 100;
|
int trim_pct = 100;
|
||||||
|
@ -2349,8 +2347,8 @@ print_status_trim(vdev_stat_t *vs, boolean_t verbose)
|
||||||
100 / (vs->vs_trim_bytes_est + 1));
|
100 / (vs->vs_trim_bytes_est + 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) localtime_r(&t, &zaction_ts);
|
(void) ctime_r(&t, tbuf);
|
||||||
(void) strftime(tbuf, sizeof (tbuf), "%c", &zaction_ts);
|
tbuf[24] = 0;
|
||||||
|
|
||||||
switch (vs->vs_trim_state) {
|
switch (vs->vs_trim_state) {
|
||||||
case VDEV_TRIM_SUSPENDED:
|
case VDEV_TRIM_SUSPENDED:
|
||||||
|
@ -10793,11 +10791,10 @@ found:
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* The first arg isn't a pool name,
|
* The first arg isn't the name of a valid pool.
|
||||||
*/
|
*/
|
||||||
fprintf(stderr, gettext("missing pool name.\n"));
|
fprintf(stderr, gettext("Cannot get properties of %s: "
|
||||||
fprintf(stderr, "\n");
|
"no such pool available.\n"), argv[0]);
|
||||||
usage(B_FALSE);
|
|
||||||
return (1);
|
return (1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ subst_sed_cmd = \
|
||||||
-e 's|@ASAN_ENABLED[@]|$(ASAN_ENABLED)|g' \
|
-e 's|@ASAN_ENABLED[@]|$(ASAN_ENABLED)|g' \
|
||||||
-e 's|@DEFAULT_INIT_NFS_SERVER[@]|$(DEFAULT_INIT_NFS_SERVER)|g' \
|
-e 's|@DEFAULT_INIT_NFS_SERVER[@]|$(DEFAULT_INIT_NFS_SERVER)|g' \
|
||||||
-e 's|@DEFAULT_INIT_SHELL[@]|$(DEFAULT_INIT_SHELL)|g' \
|
-e 's|@DEFAULT_INIT_SHELL[@]|$(DEFAULT_INIT_SHELL)|g' \
|
||||||
|
-e 's|@IS_SYSV_RC[@]|$(IS_SYSV_RC)|g' \
|
||||||
-e 's|@LIBFETCH_DYNAMIC[@]|$(LIBFETCH_DYNAMIC)|g' \
|
-e 's|@LIBFETCH_DYNAMIC[@]|$(LIBFETCH_DYNAMIC)|g' \
|
||||||
-e 's|@LIBFETCH_SONAME[@]|$(LIBFETCH_SONAME)|g' \
|
-e 's|@LIBFETCH_SONAME[@]|$(LIBFETCH_SONAME)|g' \
|
||||||
-e 's|@PYTHON[@]|$(PYTHON)|g' \
|
-e 's|@PYTHON[@]|$(PYTHON)|g' \
|
||||||
|
|
|
@ -377,6 +377,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ], [
|
||||||
(void) blk_mq_alloc_tag_set(&tag_set);
|
(void) blk_mq_alloc_tag_set(&tag_set);
|
||||||
return BLK_STS_OK;
|
return BLK_STS_OK;
|
||||||
], [])
|
], [])
|
||||||
|
ZFS_LINUX_TEST_SRC([blk_mq_rq_hctx], [
|
||||||
|
#include <linux/blk-mq.h>
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
], [
|
||||||
|
struct request rq = {0};
|
||||||
|
struct blk_mq_hw_ctx *hctx = NULL;
|
||||||
|
rq.mq_hctx = hctx;
|
||||||
|
], [])
|
||||||
])
|
])
|
||||||
|
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [
|
AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [
|
||||||
|
@ -384,6 +392,13 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [
|
||||||
ZFS_LINUX_TEST_RESULT([blk_mq], [
|
ZFS_LINUX_TEST_RESULT([blk_mq], [
|
||||||
AC_MSG_RESULT(yes)
|
AC_MSG_RESULT(yes)
|
||||||
AC_DEFINE(HAVE_BLK_MQ, 1, [block multiqueue is available])
|
AC_DEFINE(HAVE_BLK_MQ, 1, [block multiqueue is available])
|
||||||
|
AC_MSG_CHECKING([whether block multiqueue hardware context is cached in struct request])
|
||||||
|
ZFS_LINUX_TEST_RESULT([blk_mq_rq_hctx], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_BLK_MQ_RQ_HCTX, 1, [block multiqueue hardware context is cached in struct request])
|
||||||
|
], [
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
], [
|
], [
|
||||||
AC_MSG_RESULT(no)
|
AC_MSG_RESULT(no)
|
||||||
])
|
])
|
||||||
|
|
|
@ -54,6 +54,26 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_OPEN_BY_PATH], [
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
|
|
||||||
|
dnl #
|
||||||
|
dnl # 6.9.x API change
|
||||||
|
dnl # bdev_file_open_by_path() replaced bdev_open_by_path(),
|
||||||
|
dnl # and returns struct file*
|
||||||
|
dnl #
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_FILE_OPEN_BY_PATH], [
|
||||||
|
ZFS_LINUX_TEST_SRC([bdev_file_open_by_path], [
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
], [
|
||||||
|
struct file *file __attribute__ ((unused)) = NULL;
|
||||||
|
const char *path = "path";
|
||||||
|
fmode_t mode = 0;
|
||||||
|
void *holder = NULL;
|
||||||
|
struct blk_holder_ops h;
|
||||||
|
|
||||||
|
file = bdev_file_open_by_path(path, mode, holder, &h);
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [
|
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [
|
||||||
AC_MSG_CHECKING([whether blkdev_get_by_path() exists and takes 3 args])
|
AC_MSG_CHECKING([whether blkdev_get_by_path() exists and takes 3 args])
|
||||||
ZFS_LINUX_TEST_RESULT([blkdev_get_by_path], [
|
ZFS_LINUX_TEST_RESULT([blkdev_get_by_path], [
|
||||||
|
@ -73,11 +93,20 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [
|
||||||
[bdev_open_by_path() exists])
|
[bdev_open_by_path() exists])
|
||||||
AC_MSG_RESULT(yes)
|
AC_MSG_RESULT(yes)
|
||||||
], [
|
], [
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
AC_MSG_CHECKING([whether bdev_file_open_by_path() exists])
|
||||||
|
ZFS_LINUX_TEST_RESULT([bdev_file_open_by_path], [
|
||||||
|
AC_DEFINE(HAVE_BDEV_FILE_OPEN_BY_PATH, 1,
|
||||||
|
[bdev_file_open_by_path() exists])
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
], [
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()])
|
ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()])
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
|
])
|
||||||
|
|
||||||
dnl #
|
dnl #
|
||||||
dnl # 6.5.x API change
|
dnl # 6.5.x API change
|
||||||
|
@ -149,10 +178,19 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_RELEASE], [
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
|
|
||||||
|
dnl #
|
||||||
|
dnl # 6.9.x API change
|
||||||
|
dnl #
|
||||||
|
dnl # bdev_release() now private, but because bdev_file_open_by_path() returns
|
||||||
|
dnl # struct file*, we can just use fput(). So the blkdev_put test no longer
|
||||||
|
dnl # fails if not found.
|
||||||
|
dnl #
|
||||||
|
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PUT], [
|
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PUT], [
|
||||||
AC_MSG_CHECKING([whether blkdev_put() exists])
|
AC_MSG_CHECKING([whether blkdev_put() exists])
|
||||||
ZFS_LINUX_TEST_RESULT([blkdev_put], [
|
ZFS_LINUX_TEST_RESULT([blkdev_put], [
|
||||||
AC_MSG_RESULT(yes)
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_BLKDEV_PUT, 1, [blkdev_put() exists])
|
||||||
], [
|
], [
|
||||||
AC_MSG_RESULT(no)
|
AC_MSG_RESULT(no)
|
||||||
AC_MSG_CHECKING([whether blkdev_put() accepts void* as arg 2])
|
AC_MSG_CHECKING([whether blkdev_put() accepts void* as arg 2])
|
||||||
|
@ -168,7 +206,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PUT], [
|
||||||
AC_DEFINE(HAVE_BDEV_RELEASE, 1,
|
AC_DEFINE(HAVE_BDEV_RELEASE, 1,
|
||||||
[bdev_release() exists])
|
[bdev_release() exists])
|
||||||
], [
|
], [
|
||||||
ZFS_LINUX_TEST_ERROR([blkdev_put()])
|
AC_MSG_RESULT(no)
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
|
@ -523,12 +561,29 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEVNAME], [
|
||||||
])
|
])
|
||||||
|
|
||||||
dnl #
|
dnl #
|
||||||
dnl # 5.19 API: blkdev_issue_secure_erase()
|
dnl # TRIM support: discard and secure erase. We make use of asynchronous
|
||||||
dnl # 4.7 API: __blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
|
dnl # functions when available.
|
||||||
dnl # 3.10 API: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
|
|
||||||
dnl #
|
dnl #
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
|
dnl # 3.10:
|
||||||
ZFS_LINUX_TEST_SRC([blkdev_issue_secure_erase], [
|
dnl # sync discard: blkdev_issue_discard(..., 0)
|
||||||
|
dnl # sync erase: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
|
||||||
|
dnl # async discard: [not available]
|
||||||
|
dnl # async erase: [not available]
|
||||||
|
dnl #
|
||||||
|
dnl # 4.7:
|
||||||
|
dnl # sync discard: blkdev_issue_discard(..., 0)
|
||||||
|
dnl # sync erase: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
|
||||||
|
dnl # async discard: __blkdev_issue_discard(..., 0)
|
||||||
|
dnl # async erase: __blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
|
||||||
|
dnl #
|
||||||
|
dnl # 5.19:
|
||||||
|
dnl # sync discard: blkdev_issue_discard(...)
|
||||||
|
dnl # sync erase: blkdev_issue_secure_erase(...)
|
||||||
|
dnl # async discard: __blkdev_issue_discard(...)
|
||||||
|
dnl # async erase: [not available]
|
||||||
|
dnl #
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_DISCARD], [
|
||||||
|
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_noflags], [
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
],[
|
],[
|
||||||
struct block_device *bdev = NULL;
|
struct block_device *bdev = NULL;
|
||||||
|
@ -536,10 +591,33 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
|
||||||
sector_t nr_sects = 0;
|
sector_t nr_sects = 0;
|
||||||
int error __attribute__ ((unused));
|
int error __attribute__ ((unused));
|
||||||
|
|
||||||
error = blkdev_issue_secure_erase(bdev,
|
error = blkdev_issue_discard(bdev,
|
||||||
sector, nr_sects, GFP_KERNEL);
|
sector, nr_sects, GFP_KERNEL);
|
||||||
])
|
])
|
||||||
|
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_flags], [
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
],[
|
||||||
|
struct block_device *bdev = NULL;
|
||||||
|
sector_t sector = 0;
|
||||||
|
sector_t nr_sects = 0;
|
||||||
|
unsigned long flags = 0;
|
||||||
|
int error __attribute__ ((unused));
|
||||||
|
|
||||||
|
error = blkdev_issue_discard(bdev,
|
||||||
|
sector, nr_sects, GFP_KERNEL, flags);
|
||||||
|
])
|
||||||
|
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_async_noflags], [
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
],[
|
||||||
|
struct block_device *bdev = NULL;
|
||||||
|
sector_t sector = 0;
|
||||||
|
sector_t nr_sects = 0;
|
||||||
|
struct bio *biop = NULL;
|
||||||
|
int error __attribute__ ((unused));
|
||||||
|
|
||||||
|
error = __blkdev_issue_discard(bdev,
|
||||||
|
sector, nr_sects, GFP_KERNEL, &biop);
|
||||||
|
])
|
||||||
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_async_flags], [
|
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_async_flags], [
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
],[
|
],[
|
||||||
|
@ -553,22 +631,52 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
|
||||||
error = __blkdev_issue_discard(bdev,
|
error = __blkdev_issue_discard(bdev,
|
||||||
sector, nr_sects, GFP_KERNEL, flags, &biop);
|
sector, nr_sects, GFP_KERNEL, flags, &biop);
|
||||||
])
|
])
|
||||||
|
ZFS_LINUX_TEST_SRC([blkdev_issue_secure_erase], [
|
||||||
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_flags], [
|
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
],[
|
],[
|
||||||
struct block_device *bdev = NULL;
|
struct block_device *bdev = NULL;
|
||||||
sector_t sector = 0;
|
sector_t sector = 0;
|
||||||
sector_t nr_sects = 0;
|
sector_t nr_sects = 0;
|
||||||
unsigned long flags = 0;
|
|
||||||
int error __attribute__ ((unused));
|
int error __attribute__ ((unused));
|
||||||
|
|
||||||
error = blkdev_issue_discard(bdev,
|
error = blkdev_issue_secure_erase(bdev,
|
||||||
sector, nr_sects, GFP_KERNEL, flags);
|
sector, nr_sects, GFP_KERNEL);
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
|
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE], [
|
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_DISCARD], [
|
||||||
|
AC_MSG_CHECKING([whether blkdev_issue_discard() is available])
|
||||||
|
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_noflags], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_NOFLAGS, 1,
|
||||||
|
[blkdev_issue_discard() is available])
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
|
AC_MSG_CHECKING([whether blkdev_issue_discard(flags) is available])
|
||||||
|
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_FLAGS, 1,
|
||||||
|
[blkdev_issue_discard(flags) is available])
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
|
AC_MSG_CHECKING([whether __blkdev_issue_discard() is available])
|
||||||
|
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_async_noflags], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_NOFLAGS, 1,
|
||||||
|
[__blkdev_issue_discard() is available])
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
|
AC_MSG_CHECKING([whether __blkdev_issue_discard(flags) is available])
|
||||||
|
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_async_flags], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_FLAGS, 1,
|
||||||
|
[__blkdev_issue_discard(flags) is available])
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
AC_MSG_CHECKING([whether blkdev_issue_secure_erase() is available])
|
AC_MSG_CHECKING([whether blkdev_issue_secure_erase() is available])
|
||||||
ZFS_LINUX_TEST_RESULT([blkdev_issue_secure_erase], [
|
ZFS_LINUX_TEST_RESULT([blkdev_issue_secure_erase], [
|
||||||
AC_MSG_RESULT(yes)
|
AC_MSG_RESULT(yes)
|
||||||
|
@ -576,24 +684,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE], [
|
||||||
[blkdev_issue_secure_erase() is available])
|
[blkdev_issue_secure_erase() is available])
|
||||||
],[
|
],[
|
||||||
AC_MSG_RESULT(no)
|
AC_MSG_RESULT(no)
|
||||||
|
|
||||||
AC_MSG_CHECKING([whether __blkdev_issue_discard() is available])
|
|
||||||
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_async_flags], [
|
|
||||||
AC_MSG_RESULT(yes)
|
|
||||||
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC, 1,
|
|
||||||
[__blkdev_issue_discard() is available])
|
|
||||||
],[
|
|
||||||
AC_MSG_RESULT(no)
|
|
||||||
|
|
||||||
AC_MSG_CHECKING([whether blkdev_issue_discard() is available])
|
|
||||||
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [
|
|
||||||
AC_MSG_RESULT(yes)
|
|
||||||
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD, 1,
|
|
||||||
[blkdev_issue_discard() is available])
|
|
||||||
],[
|
|
||||||
ZFS_LINUX_TEST_ERROR([blkdev_issue_discard()])
|
|
||||||
])
|
|
||||||
])
|
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
|
|
||||||
|
@ -645,6 +735,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH
|
ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG
|
ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_OPEN_BY_PATH
|
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_OPEN_BY_PATH
|
||||||
|
ZFS_AC_KERNEL_SRC_BDEV_FILE_OPEN_BY_PATH
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_PUT
|
ZFS_AC_KERNEL_SRC_BLKDEV_PUT
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER
|
ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_RELEASE
|
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_RELEASE
|
||||||
|
@ -657,7 +748,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_CHECK_MEDIA_CHANGE
|
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_CHECK_MEDIA_CHANGE
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE
|
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEVNAME
|
ZFS_AC_KERNEL_SRC_BLKDEV_BDEVNAME
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE
|
ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_DISCARD
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ
|
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV
|
ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV
|
||||||
ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE
|
ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE
|
||||||
|
@ -678,7 +769,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
|
||||||
ZFS_AC_KERNEL_BLKDEV_BDEV_WHOLE
|
ZFS_AC_KERNEL_BLKDEV_BDEV_WHOLE
|
||||||
ZFS_AC_KERNEL_BLKDEV_BDEVNAME
|
ZFS_AC_KERNEL_BLKDEV_BDEVNAME
|
||||||
ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS
|
ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS
|
||||||
ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE
|
ZFS_AC_KERNEL_BLKDEV_ISSUE_DISCARD
|
||||||
ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ
|
ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ
|
||||||
ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV
|
ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV
|
||||||
ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE
|
ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE
|
||||||
|
|
|
@ -50,6 +50,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [
|
||||||
disk = blk_alloc_disk(NUMA_NO_NODE);
|
disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||||
])
|
])
|
||||||
|
|
||||||
|
ZFS_LINUX_TEST_SRC([blk_alloc_disk_2arg], [
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
],[
|
||||||
|
struct queue_limits *lim = NULL;
|
||||||
|
struct gendisk *disk __attribute__ ((unused));
|
||||||
|
disk = blk_alloc_disk(lim, NUMA_NO_NODE);
|
||||||
|
])
|
||||||
|
|
||||||
ZFS_LINUX_TEST_SRC([blk_cleanup_disk], [
|
ZFS_LINUX_TEST_SRC([blk_cleanup_disk], [
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
],[
|
],[
|
||||||
|
@ -96,6 +104,31 @@ AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [
|
||||||
], [
|
], [
|
||||||
AC_MSG_RESULT(no)
|
AC_MSG_RESULT(no)
|
||||||
])
|
])
|
||||||
|
|
||||||
|
dnl #
|
||||||
|
dnl # Linux 6.9 API Change:
|
||||||
|
dnl # blk_alloc_queue() takes a nullable queue_limits arg.
|
||||||
|
dnl #
|
||||||
|
AC_MSG_CHECKING([whether blk_alloc_disk() exists and takes 2 args])
|
||||||
|
ZFS_LINUX_TEST_RESULT([blk_alloc_disk_2arg], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE([HAVE_BLK_ALLOC_DISK_2ARG], 1, [blk_alloc_disk() exists and takes 2 args])
|
||||||
|
|
||||||
|
dnl #
|
||||||
|
dnl # 5.20 API change,
|
||||||
|
dnl # Removed blk_cleanup_disk(), put_disk() should be used.
|
||||||
|
dnl #
|
||||||
|
AC_MSG_CHECKING([whether blk_cleanup_disk() exists])
|
||||||
|
ZFS_LINUX_TEST_RESULT([blk_cleanup_disk], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE([HAVE_BLK_CLEANUP_DISK], 1,
|
||||||
|
[blk_cleanup_disk() exists])
|
||||||
|
], [
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
|
], [
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
],[
|
],[
|
||||||
AC_MSG_RESULT(no)
|
AC_MSG_RESULT(no)
|
||||||
|
|
||||||
|
|
|
@ -578,13 +578,15 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [
|
||||||
|
|
||||||
AC_MSG_CHECKING([default shell])
|
AC_MSG_CHECKING([default shell])
|
||||||
case "$VENDOR" in
|
case "$VENDOR" in
|
||||||
gentoo) DEFAULT_INIT_SHELL="/sbin/openrc-run";;
|
gentoo|alpine) DEFAULT_INIT_SHELL=/sbin/openrc-run
|
||||||
alpine) DEFAULT_INIT_SHELL="/sbin/openrc-run";;
|
IS_SYSV_RC=false ;;
|
||||||
*) DEFAULT_INIT_SHELL="/bin/sh" ;;
|
*) DEFAULT_INIT_SHELL=/bin/sh
|
||||||
|
IS_SYSV_RC=true ;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
AC_MSG_RESULT([$DEFAULT_INIT_SHELL])
|
AC_MSG_RESULT([$DEFAULT_INIT_SHELL])
|
||||||
AC_SUBST(DEFAULT_INIT_SHELL)
|
AC_SUBST(DEFAULT_INIT_SHELL)
|
||||||
|
AC_SUBST(IS_SYSV_RC)
|
||||||
|
|
||||||
AC_MSG_CHECKING([default nfs server init script])
|
AC_MSG_CHECKING([default nfs server init script])
|
||||||
AS_IF([test "$VENDOR" = "debian"],
|
AS_IF([test "$VENDOR" = "debian"],
|
||||||
|
|
|
@ -7,11 +7,7 @@ DESCRIPTION
|
||||||
|
|
||||||
They have been tested successfully on:
|
They have been tested successfully on:
|
||||||
|
|
||||||
* Debian GNU/Linux Wheezy
|
* Debian GNU/Linux Bookworm
|
||||||
* Debian GNU/Linux Jessie
|
|
||||||
* Ubuntu Trusty
|
|
||||||
* CentOS 6.0
|
|
||||||
* CentOS 6.6
|
|
||||||
* Gentoo
|
* Gentoo
|
||||||
|
|
||||||
SUPPORT
|
SUPPORT
|
||||||
|
|
|
@ -307,7 +307,7 @@ do_start()
|
||||||
|
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
|
|
||||||
if [ ! -e /sbin/openrc-run ]
|
if @IS_SYSV_RC@
|
||||||
then
|
then
|
||||||
case "$1" in
|
case "$1" in
|
||||||
start)
|
start)
|
||||||
|
|
|
@ -104,7 +104,7 @@ do_stop()
|
||||||
|
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
|
|
||||||
if [ ! -e /sbin/openrc-run ]
|
if @IS_SYSV_RC@
|
||||||
then
|
then
|
||||||
case "$1" in
|
case "$1" in
|
||||||
start)
|
start)
|
||||||
|
|
|
@ -114,7 +114,7 @@ do_stop()
|
||||||
|
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
|
|
||||||
if [ ! -e /sbin/openrc-run ]
|
if @IS_SYSV_RC@
|
||||||
then
|
then
|
||||||
case "$1" in
|
case "$1" in
|
||||||
start)
|
start)
|
||||||
|
|
|
@ -57,7 +57,8 @@ do_stop()
|
||||||
|
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
|
|
||||||
if [ ! -e /sbin/openrc-run ]; then
|
if @IS_SYSV_RC@
|
||||||
|
then
|
||||||
case "$1" in
|
case "$1" in
|
||||||
start)
|
start)
|
||||||
do_start
|
do_start
|
||||||
|
|
|
@ -93,7 +93,8 @@ do_reload()
|
||||||
|
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
|
|
||||||
if [ ! -e /sbin/openrc-run ]; then
|
if @IS_SYSV_RC@
|
||||||
|
then
|
||||||
case "$1" in
|
case "$1" in
|
||||||
start)
|
start)
|
||||||
do_start
|
do_start
|
||||||
|
|
|
@ -4,8 +4,6 @@ noinst_HEADERS = \
|
||||||
\
|
\
|
||||||
%D%/spl/acl/acl_common.h \
|
%D%/spl/acl/acl_common.h \
|
||||||
\
|
\
|
||||||
%D%/spl/rpc/xdr.h \
|
|
||||||
\
|
|
||||||
%D%/spl/sys/ia32/asm_linkage.h \
|
%D%/spl/sys/ia32/asm_linkage.h \
|
||||||
\
|
\
|
||||||
%D%/spl/sys/acl.h \
|
%D%/spl/sys/acl.h \
|
||||||
|
|
|
@ -1,71 +0,0 @@
|
||||||
/*
|
|
||||||
* Sun RPC is a product of Sun Microsystems, Inc. and is provided for
|
|
||||||
* unrestricted use provided that this legend is included on all tape
|
|
||||||
* media and as a part of the software program in whole or part. Users
|
|
||||||
* may copy or modify Sun RPC without charge, but are not authorized
|
|
||||||
* to license or distribute it to anyone else except as part of a product or
|
|
||||||
* program developed by the user.
|
|
||||||
*
|
|
||||||
* SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
|
|
||||||
* WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
|
|
||||||
*
|
|
||||||
* Sun RPC is provided with no support and without any obligation on the
|
|
||||||
* part of Sun Microsystems, Inc. to assist in its use, correction,
|
|
||||||
* modification or enhancement.
|
|
||||||
*
|
|
||||||
* SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
|
|
||||||
* INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
|
|
||||||
* OR ANY PART THEREOF.
|
|
||||||
*
|
|
||||||
* In no event will Sun Microsystems, Inc. be liable for any lost revenue
|
|
||||||
* or profits or other special, indirect and consequential damages, even if
|
|
||||||
* Sun has been advised of the possibility of such damages.
|
|
||||||
*
|
|
||||||
* Sun Microsystems, Inc.
|
|
||||||
* 2550 Garcia Avenue
|
|
||||||
* Mountain View, California 94043
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _OPENSOLARIS_RPC_XDR_H_
|
|
||||||
#define _OPENSOLARIS_RPC_XDR_H_
|
|
||||||
|
|
||||||
#include <rpc/types.h>
|
|
||||||
#include_next <rpc/xdr.h>
|
|
||||||
|
|
||||||
#if !defined(_KERNEL) && !defined(_STANDALONE)
|
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Taken from sys/xdr/xdr_mem.c.
|
|
||||||
*
|
|
||||||
* FreeBSD's userland XDR doesn't implement control method (only the kernel),
|
|
||||||
* but OpenSolaris nvpair still depend on it, so we have to implement it here.
|
|
||||||
*/
|
|
||||||
static __inline bool_t
|
|
||||||
xdrmem_control(XDR *xdrs, int request, void *info)
|
|
||||||
{
|
|
||||||
xdr_bytesrec *xptr;
|
|
||||||
|
|
||||||
switch (request) {
|
|
||||||
case XDR_GET_BYTES_AVAIL:
|
|
||||||
xptr = (xdr_bytesrec *)info;
|
|
||||||
xptr->xc_is_last_record = TRUE;
|
|
||||||
xptr->xc_num_avail = xdrs->x_handy;
|
|
||||||
return (TRUE);
|
|
||||||
default:
|
|
||||||
assert(!"unexpected request");
|
|
||||||
}
|
|
||||||
return (FALSE);
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef XDR_CONTROL
|
|
||||||
#define XDR_CONTROL(xdrs, req, op) \
|
|
||||||
(((xdrs)->x_ops->x_control == NULL) ? \
|
|
||||||
xdrmem_control((xdrs), (req), (op)) : \
|
|
||||||
(*(xdrs)->x_ops->x_control)(xdrs, req, op))
|
|
||||||
|
|
||||||
#endif /* !_KERNEL && !_STANDALONE */
|
|
||||||
|
|
||||||
#endif /* !_OPENSOLARIS_RPC_XDR_H_ */
|
|
|
@ -47,6 +47,7 @@ kernel_sys_HEADERS = \
|
||||||
|
|
||||||
kernel_spl_rpcdir = $(kerneldir)/spl/rpc
|
kernel_spl_rpcdir = $(kerneldir)/spl/rpc
|
||||||
kernel_spl_rpc_HEADERS = \
|
kernel_spl_rpc_HEADERS = \
|
||||||
|
%D%/spl/rpc/types.h \
|
||||||
%D%/spl/rpc/xdr.h
|
%D%/spl/rpc/xdr.h
|
||||||
|
|
||||||
kernel_spl_sysdir = $(kerneldir)/spl/sys
|
kernel_spl_sysdir = $(kerneldir)/spl/sys
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2008 Sun Microsystems, Inc.
|
||||||
|
* Written by Ricardo Correia <Ricardo.M.Correia@Sun.COM>
|
||||||
|
*
|
||||||
|
* This file is part of the SPL, Solaris Porting Layer.
|
||||||
|
*
|
||||||
|
* The SPL is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License as published by the
|
||||||
|
* Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
* option) any later version.
|
||||||
|
*
|
||||||
|
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along
|
||||||
|
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _SPL_RPC_TYPES_H
|
||||||
|
#define _SPL_RPC_TYPES_H
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
/* Just enough to support rpc/xdr.h */
|
||||||
|
|
||||||
|
typedef int bool_t;
|
||||||
|
|
||||||
|
#endif /* SPL_RPC_TYPES_H */
|
|
@ -24,8 +24,6 @@
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/sysmacros.h>
|
#include <sys/sysmacros.h>
|
||||||
|
|
||||||
typedef int bool_t;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XDR enums and types.
|
* XDR enums and types.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -45,18 +45,24 @@ typedef struct zfetch {
|
||||||
int zf_numstreams; /* number of zstream_t's */
|
int zf_numstreams; /* number of zstream_t's */
|
||||||
} zfetch_t;
|
} zfetch_t;
|
||||||
|
|
||||||
|
typedef struct zsrange {
|
||||||
|
uint16_t start;
|
||||||
|
uint16_t end;
|
||||||
|
} zsrange_t;
|
||||||
|
|
||||||
|
#define ZFETCH_RANGES 9 /* Fits zstream_t into 128 bytes */
|
||||||
|
|
||||||
typedef struct zstream {
|
typedef struct zstream {
|
||||||
|
list_node_t zs_node; /* link for zf_stream */
|
||||||
uint64_t zs_blkid; /* expect next access at this blkid */
|
uint64_t zs_blkid; /* expect next access at this blkid */
|
||||||
|
uint_t zs_atime; /* time last prefetch issued */
|
||||||
|
zsrange_t zs_ranges[ZFETCH_RANGES]; /* ranges from future */
|
||||||
unsigned int zs_pf_dist; /* data prefetch distance in bytes */
|
unsigned int zs_pf_dist; /* data prefetch distance in bytes */
|
||||||
unsigned int zs_ipf_dist; /* L1 prefetch distance in bytes */
|
unsigned int zs_ipf_dist; /* L1 prefetch distance in bytes */
|
||||||
uint64_t zs_pf_start; /* first data block to prefetch */
|
uint64_t zs_pf_start; /* first data block to prefetch */
|
||||||
uint64_t zs_pf_end; /* data block to prefetch up to */
|
uint64_t zs_pf_end; /* data block to prefetch up to */
|
||||||
uint64_t zs_ipf_start; /* first data block to prefetch L1 */
|
uint64_t zs_ipf_start; /* first data block to prefetch L1 */
|
||||||
uint64_t zs_ipf_end; /* data block to prefetch L1 up to */
|
uint64_t zs_ipf_end; /* data block to prefetch L1 up to */
|
||||||
|
|
||||||
list_node_t zs_node; /* link for zf_stream */
|
|
||||||
hrtime_t zs_atime; /* time last prefetch issued */
|
|
||||||
zfetch_t *zs_fetch; /* parent fetch */
|
|
||||||
boolean_t zs_missed; /* stream saw cache misses */
|
boolean_t zs_missed; /* stream saw cache misses */
|
||||||
boolean_t zs_more; /* need more distant prefetch */
|
boolean_t zs_more; /* need more distant prefetch */
|
||||||
zfs_refcount_t zs_callers; /* number of pending callers */
|
zfs_refcount_t zs_callers; /* number of pending callers */
|
||||||
|
@ -74,7 +80,7 @@ void dmu_zfetch_init(zfetch_t *, struct dnode *);
|
||||||
void dmu_zfetch_fini(zfetch_t *);
|
void dmu_zfetch_fini(zfetch_t *);
|
||||||
zstream_t *dmu_zfetch_prepare(zfetch_t *, uint64_t, uint64_t, boolean_t,
|
zstream_t *dmu_zfetch_prepare(zfetch_t *, uint64_t, uint64_t, boolean_t,
|
||||||
boolean_t);
|
boolean_t);
|
||||||
void dmu_zfetch_run(zstream_t *, boolean_t, boolean_t);
|
void dmu_zfetch_run(zfetch_t *, zstream_t *, boolean_t, boolean_t);
|
||||||
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
|
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
|
||||||
boolean_t);
|
boolean_t);
|
||||||
|
|
||||||
|
|
|
@ -455,7 +455,7 @@ struct vdev {
|
||||||
zfs_ratelimit_t vdev_checksum_rl;
|
zfs_ratelimit_t vdev_checksum_rl;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Vdev properties for tuning ZED
|
* Vdev properties for tuning ZED or zfsd
|
||||||
*/
|
*/
|
||||||
uint64_t vdev_checksum_n;
|
uint64_t vdev_checksum_n;
|
||||||
uint64_t vdev_checksum_t;
|
uint64_t vdev_checksum_t;
|
||||||
|
|
|
@ -132,7 +132,7 @@ typedef struct zap_leaf_phys {
|
||||||
* with the ZAP_LEAF_CHUNK() macro.
|
* with the ZAP_LEAF_CHUNK() macro.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
uint16_t l_hash[1];
|
uint16_t l_hash[];
|
||||||
} zap_leaf_phys_t;
|
} zap_leaf_phys_t;
|
||||||
|
|
||||||
typedef union zap_leaf_chunk {
|
typedef union zap_leaf_chunk {
|
||||||
|
|
|
@ -153,7 +153,7 @@ enum zio_stage {
|
||||||
ZIO_STAGE_READY = 1 << 20, /* RWFCIT */
|
ZIO_STAGE_READY = 1 << 20, /* RWFCIT */
|
||||||
|
|
||||||
ZIO_STAGE_VDEV_IO_START = 1 << 21, /* RW--IT */
|
ZIO_STAGE_VDEV_IO_START = 1 << 21, /* RW--IT */
|
||||||
ZIO_STAGE_VDEV_IO_DONE = 1 << 22, /* RW---T */
|
ZIO_STAGE_VDEV_IO_DONE = 1 << 22, /* RW--IT */
|
||||||
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 23, /* RW--IT */
|
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 23, /* RW--IT */
|
||||||
|
|
||||||
ZIO_STAGE_CHECKSUM_VERIFY = 1 << 24, /* R----- */
|
ZIO_STAGE_CHECKSUM_VERIFY = 1 << 24, /* R----- */
|
||||||
|
@ -261,8 +261,7 @@ enum zio_stage {
|
||||||
|
|
||||||
#define ZIO_IOCTL_PIPELINE \
|
#define ZIO_IOCTL_PIPELINE \
|
||||||
(ZIO_INTERLOCK_STAGES | \
|
(ZIO_INTERLOCK_STAGES | \
|
||||||
ZIO_STAGE_VDEV_IO_START | \
|
ZIO_VDEV_IO_STAGES)
|
||||||
ZIO_STAGE_VDEV_IO_ASSESS)
|
|
||||||
|
|
||||||
#define ZIO_TRIM_PIPELINE \
|
#define ZIO_TRIM_PIPELINE \
|
||||||
(ZIO_INTERLOCK_STAGES | \
|
(ZIO_INTERLOCK_STAGES | \
|
||||||
|
|
|
@ -1900,7 +1900,8 @@ zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
|
||||||
(void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
|
(void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
|
||||||
|
|
||||||
if (localtime_r((time_t *)&rewindto, &t) != NULL &&
|
if (localtime_r((time_t *)&rewindto, &t) != NULL &&
|
||||||
strftime(timestr, 128, "%c", &t) != 0) {
|
ctime_r((time_t *)&rewindto, timestr) != NULL) {
|
||||||
|
timestr[24] = 0;
|
||||||
if (dryrun) {
|
if (dryrun) {
|
||||||
(void) printf(dgettext(TEXT_DOMAIN,
|
(void) printf(dgettext(TEXT_DOMAIN,
|
||||||
"Would be able to return %s "
|
"Would be able to return %s "
|
||||||
|
@ -1962,7 +1963,8 @@ zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
|
||||||
"Recovery is possible, but will result in some data loss.\n"));
|
"Recovery is possible, but will result in some data loss.\n"));
|
||||||
|
|
||||||
if (localtime_r((time_t *)&rewindto, &t) != NULL &&
|
if (localtime_r((time_t *)&rewindto, &t) != NULL &&
|
||||||
strftime(timestr, 128, "%c", &t) != 0) {
|
ctime_r((time_t *)&rewindto, timestr) != NULL) {
|
||||||
|
timestr[24] = 0;
|
||||||
(void) printf(dgettext(TEXT_DOMAIN,
|
(void) printf(dgettext(TEXT_DOMAIN,
|
||||||
"\tReturning the pool to its state as of %s\n"
|
"\tReturning the pool to its state as of %s\n"
|
||||||
"\tshould correct the problem. "),
|
"\tshould correct the problem. "),
|
||||||
|
|
|
@ -62,7 +62,6 @@ dist_man_MANS = \
|
||||||
%D%/man8/zfs-userspace.8 \
|
%D%/man8/zfs-userspace.8 \
|
||||||
%D%/man8/zfs-wait.8 \
|
%D%/man8/zfs-wait.8 \
|
||||||
%D%/man8/zfs_ids_to_path.8 \
|
%D%/man8/zfs_ids_to_path.8 \
|
||||||
%D%/man8/zfs_prepare_disk.8 \
|
|
||||||
%D%/man8/zgenhostid.8 \
|
%D%/man8/zgenhostid.8 \
|
||||||
%D%/man8/zinject.8 \
|
%D%/man8/zinject.8 \
|
||||||
%D%/man8/zpool.8 \
|
%D%/man8/zpool.8 \
|
||||||
|
@ -115,7 +114,8 @@ endif
|
||||||
|
|
||||||
nodist_man_MANS = \
|
nodist_man_MANS = \
|
||||||
%D%/man8/zed.8 \
|
%D%/man8/zed.8 \
|
||||||
%D%/man8/zfs-mount-generator.8
|
%D%/man8/zfs-mount-generator.8 \
|
||||||
|
%D%/man8/zfs_prepare_disk.8
|
||||||
|
|
||||||
dist_noinst_DATA += $(dist_noinst_man_MANS) $(dist_man_MANS)
|
dist_noinst_DATA += $(dist_noinst_man_MANS) $(dist_man_MANS)
|
||||||
|
|
||||||
|
|
|
@ -564,6 +564,10 @@ However, this is limited by
|
||||||
Maximum micro ZAP size.
|
Maximum micro ZAP size.
|
||||||
A micro ZAP is upgraded to a fat ZAP, once it grows beyond the specified size.
|
A micro ZAP is upgraded to a fat ZAP, once it grows beyond the specified size.
|
||||||
.
|
.
|
||||||
|
.It Sy zfetch_hole_shift Ns = Ns Sy 2 Pq uint
|
||||||
|
Log2 fraction of holes in speculative prefetch stream allowed for it to
|
||||||
|
proceed.
|
||||||
|
.
|
||||||
.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint
|
.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint
|
||||||
Min bytes to prefetch per stream.
|
Min bytes to prefetch per stream.
|
||||||
Prefetch distance starts from the demand access size and quickly grows to
|
Prefetch distance starts from the demand access size and quickly grows to
|
||||||
|
@ -578,6 +582,13 @@ Max bytes to prefetch per stream.
|
||||||
.It Sy zfetch_max_idistance Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq uint
|
.It Sy zfetch_max_idistance Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq uint
|
||||||
Max bytes to prefetch indirects for per stream.
|
Max bytes to prefetch indirects for per stream.
|
||||||
.
|
.
|
||||||
|
.It Sy zfetch_max_reorder Ns = Ns Sy 16777216 Ns B Po 16 MiB Pc Pq uint
|
||||||
|
Requests within this byte distance from the current prefetch stream position
|
||||||
|
are considered parts of the stream, reordered due to parallel processing.
|
||||||
|
Such requests do not advance the stream position immediately unless
|
||||||
|
.Sy zfetch_hole_shift
|
||||||
|
fill threshold is reached, but saved to fill holes in the stream later.
|
||||||
|
.
|
||||||
.It Sy zfetch_max_streams Ns = Ns Sy 8 Pq uint
|
.It Sy zfetch_max_streams Ns = Ns Sy 8 Pq uint
|
||||||
Max number of streams per zfetch (prefetch streams per file).
|
Max number of streams per zfetch (prefetch streams per file).
|
||||||
.
|
.
|
||||||
|
@ -2387,6 +2398,13 @@ The number of requests which can be handled concurrently is controlled by
|
||||||
is ignored when running on a kernel that supports block multiqueue
|
is ignored when running on a kernel that supports block multiqueue
|
||||||
.Pq Li blk-mq .
|
.Pq Li blk-mq .
|
||||||
.
|
.
|
||||||
|
.It Sy zvol_num_taskqs Ns = Ns Sy 0 Pq uint
|
||||||
|
Number of zvol taskqs.
|
||||||
|
If
|
||||||
|
.Sy 0
|
||||||
|
(the default) then scaling is done internally to prefer 6 threads per taskq.
|
||||||
|
This only applies on Linux.
|
||||||
|
.
|
||||||
.It Sy zvol_threads Ns = Ns Sy 0 Pq uint
|
.It Sy zvol_threads Ns = Ns Sy 0 Pq uint
|
||||||
The number of system wide threads to use for processing zvol block IOs.
|
The number of system wide threads to use for processing zvol block IOs.
|
||||||
If
|
If
|
||||||
|
|
|
@ -127,7 +127,13 @@ If the property is only set on the top-level vdev, this value will be used.
|
||||||
The value of these properties do not persist across vdev replacement.
|
The value of these properties do not persist across vdev replacement.
|
||||||
For this reason, it is advisable to set the property on the top-level vdev -
|
For this reason, it is advisable to set the property on the top-level vdev -
|
||||||
not on the leaf vdev itself.
|
not on the leaf vdev itself.
|
||||||
The default values are 10 errors in 600 seconds.
|
The default values for
|
||||||
|
.Sy OpenZFS on Linux
|
||||||
|
are 10 errors in 600 seconds.
|
||||||
|
For
|
||||||
|
.Sy OpenZFS on FreeBSD
|
||||||
|
defaults see
|
||||||
|
.Xr zfsd 8 .
|
||||||
.It Sy comment
|
.It Sy comment
|
||||||
A text comment up to 8192 characters long
|
A text comment up to 8192 characters long
|
||||||
.It Sy bootsize
|
.It Sy bootsize
|
||||||
|
|
|
@ -19,10 +19,11 @@
|
||||||
.\" CDDL HEADER END
|
.\" CDDL HEADER END
|
||||||
.\"
|
.\"
|
||||||
.\" Copyright 2013 Darik Horn <dajhorn@vanadac.com>. All rights reserved.
|
.\" Copyright 2013 Darik Horn <dajhorn@vanadac.com>. All rights reserved.
|
||||||
|
.\" Copyright (c) 2024, Klara Inc.
|
||||||
.\"
|
.\"
|
||||||
.\" lint-ok: WARNING: sections out of conventional order: Sh SYNOPSIS
|
.\" lint-ok: WARNING: sections out of conventional order: Sh SYNOPSIS
|
||||||
.\"
|
.\"
|
||||||
.Dd May 26, 2021
|
.Dd April 4, 2024
|
||||||
.Dt ZINJECT 8
|
.Dt ZINJECT 8
|
||||||
.Os
|
.Os
|
||||||
.
|
.
|
||||||
|
@ -257,6 +258,7 @@ Run for this many seconds before reporting failure.
|
||||||
.It Fl T Ar failure
|
.It Fl T Ar failure
|
||||||
Set the failure type to one of
|
Set the failure type to one of
|
||||||
.Sy all ,
|
.Sy all ,
|
||||||
|
.Sy ioctl ,
|
||||||
.Sy claim ,
|
.Sy claim ,
|
||||||
.Sy free ,
|
.Sy free ,
|
||||||
.Sy read ,
|
.Sy read ,
|
||||||
|
|
|
@ -404,7 +404,7 @@ ZIO_STAGE_DVA_CLAIM:0x00080000:---C--
|
||||||
ZIO_STAGE_READY:0x00100000:RWFCIT
|
ZIO_STAGE_READY:0x00100000:RWFCIT
|
||||||
|
|
||||||
ZIO_STAGE_VDEV_IO_START:0x00200000:RW--IT
|
ZIO_STAGE_VDEV_IO_START:0x00200000:RW--IT
|
||||||
ZIO_STAGE_VDEV_IO_DONE:0x00400000:RW---T
|
ZIO_STAGE_VDEV_IO_DONE:0x00400000:RW--IT
|
||||||
ZIO_STAGE_VDEV_IO_ASSESS:0x00800000:RW--IT
|
ZIO_STAGE_VDEV_IO_ASSESS:0x00800000:RW--IT
|
||||||
|
|
||||||
ZIO_STAGE_CHECKSUM_VERIFY:0x01000000:R-----
|
ZIO_STAGE_CHECKSUM_VERIFY:0x01000000:R-----
|
||||||
|
|
|
@ -85,9 +85,6 @@ SRCS= vnode_if.h device_if.h bus_if.h
|
||||||
#avl
|
#avl
|
||||||
SRCS+= avl.c
|
SRCS+= avl.c
|
||||||
|
|
||||||
# icp
|
|
||||||
SRCS+= edonr.c
|
|
||||||
|
|
||||||
#icp/algs/blake3
|
#icp/algs/blake3
|
||||||
SRCS+= blake3.c \
|
SRCS+= blake3.c \
|
||||||
blake3_generic.c \
|
blake3_generic.c \
|
||||||
|
@ -107,9 +104,12 @@ SRCS+= blake3_avx2.S \
|
||||||
blake3_sse2.S \
|
blake3_sse2.S \
|
||||||
blake3_sse41.S
|
blake3_sse41.S
|
||||||
|
|
||||||
|
#icp/algs/edonr
|
||||||
|
SRCS+= edonr.c
|
||||||
|
|
||||||
#icp/algs/sha2
|
#icp/algs/sha2
|
||||||
SRCS+= sha2_generic.c \
|
SRCS+= sha256_impl.c \
|
||||||
sha256_impl.c \
|
sha2_generic.c \
|
||||||
sha512_impl.c
|
sha512_impl.c
|
||||||
|
|
||||||
#icp/asm-arm/sha2
|
#icp/asm-arm/sha2
|
||||||
|
@ -122,8 +122,8 @@ SRCS+= sha256-armv8.S \
|
||||||
|
|
||||||
#icp/asm-ppc64/sha2
|
#icp/asm-ppc64/sha2
|
||||||
SRCS+= sha256-p8.S \
|
SRCS+= sha256-p8.S \
|
||||||
sha512-p8.S \
|
|
||||||
sha256-ppc.S \
|
sha256-ppc.S \
|
||||||
|
sha512-p8.S \
|
||||||
sha512-ppc.S
|
sha512-ppc.S
|
||||||
|
|
||||||
#icp/asm-x86_64/sha2
|
#icp/asm-x86_64/sha2
|
||||||
|
@ -157,10 +157,10 @@ SRCS+= lapi.c \
|
||||||
lzio.c
|
lzio.c
|
||||||
|
|
||||||
#nvpair
|
#nvpair
|
||||||
SRCS+= nvpair.c \
|
SRCS+= fnvpair.c \
|
||||||
fnvpair.c \
|
nvpair.c \
|
||||||
nvpair_alloc_spl.c \
|
nvpair_alloc_fixed.c \
|
||||||
nvpair_alloc_fixed.c
|
nvpair_alloc_spl.c
|
||||||
|
|
||||||
#os/freebsd/spl
|
#os/freebsd/spl
|
||||||
SRCS+= acl_common.c \
|
SRCS+= acl_common.c \
|
||||||
|
@ -184,7 +184,6 @@ SRCS+= acl_common.c \
|
||||||
spl_zlib.c \
|
spl_zlib.c \
|
||||||
spl_zone.c
|
spl_zone.c
|
||||||
|
|
||||||
|
|
||||||
.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" || \
|
.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" || \
|
||||||
${MACHINE_ARCH} == "powerpcspe" || ${MACHINE_ARCH} == "arm"
|
${MACHINE_ARCH} == "powerpcspe" || ${MACHINE_ARCH} == "arm"
|
||||||
SRCS+= spl_atomic.c
|
SRCS+= spl_atomic.c
|
||||||
|
@ -207,6 +206,7 @@ SRCS+= abd_os.c \
|
||||||
zfs_ctldir.c \
|
zfs_ctldir.c \
|
||||||
zfs_debug.c \
|
zfs_debug.c \
|
||||||
zfs_dir.c \
|
zfs_dir.c \
|
||||||
|
zfs_file_os.c \
|
||||||
zfs_ioctl_compat.c \
|
zfs_ioctl_compat.c \
|
||||||
zfs_ioctl_os.c \
|
zfs_ioctl_os.c \
|
||||||
zfs_racct.c \
|
zfs_racct.c \
|
||||||
|
@ -217,19 +217,20 @@ SRCS+= abd_os.c \
|
||||||
zvol_os.c
|
zvol_os.c
|
||||||
|
|
||||||
#unicode
|
#unicode
|
||||||
SRCS+= uconv.c \
|
SRCS+= u8_textprep.c \
|
||||||
u8_textprep.c
|
uconv.c
|
||||||
|
|
||||||
#zcommon
|
#zcommon
|
||||||
SRCS+= zfeature_common.c \
|
SRCS+= cityhash.c \
|
||||||
|
zfeature_common.c \
|
||||||
zfs_comutil.c \
|
zfs_comutil.c \
|
||||||
zfs_deleg.c \
|
zfs_deleg.c \
|
||||||
zfs_fletcher.c \
|
|
||||||
zfs_fletcher_avx512.c \
|
zfs_fletcher_avx512.c \
|
||||||
|
zfs_fletcher.c \
|
||||||
zfs_fletcher_intel.c \
|
zfs_fletcher_intel.c \
|
||||||
zfs_fletcher_sse.c \
|
zfs_fletcher_sse.c \
|
||||||
zfs_fletcher_superscalar.c \
|
|
||||||
zfs_fletcher_superscalar4.c \
|
zfs_fletcher_superscalar4.c \
|
||||||
|
zfs_fletcher_superscalar.c \
|
||||||
zfs_namecheck.c \
|
zfs_namecheck.c \
|
||||||
zfs_prop.c \
|
zfs_prop.c \
|
||||||
zpool_prop.c \
|
zpool_prop.c \
|
||||||
|
@ -243,14 +244,13 @@ SRCS+= abd.c \
|
||||||
blkptr.c \
|
blkptr.c \
|
||||||
bplist.c \
|
bplist.c \
|
||||||
bpobj.c \
|
bpobj.c \
|
||||||
brt.c \
|
|
||||||
btree.c \
|
|
||||||
cityhash.c \
|
|
||||||
dbuf.c \
|
|
||||||
dbuf_stats.c \
|
|
||||||
bptree.c \
|
bptree.c \
|
||||||
bqueue.c \
|
bqueue.c \
|
||||||
|
brt.c \
|
||||||
|
btree.c \
|
||||||
dataset_kstats.c \
|
dataset_kstats.c \
|
||||||
|
dbuf.c \
|
||||||
|
dbuf_stats.c \
|
||||||
ddt.c \
|
ddt.c \
|
||||||
ddt_stats.c \
|
ddt_stats.c \
|
||||||
ddt_zap.c \
|
ddt_zap.c \
|
||||||
|
@ -266,13 +266,13 @@ SRCS+= abd.c \
|
||||||
dmu_zfetch.c \
|
dmu_zfetch.c \
|
||||||
dnode.c \
|
dnode.c \
|
||||||
dnode_sync.c \
|
dnode_sync.c \
|
||||||
|
dsl_bookmark.c \
|
||||||
|
dsl_crypt.c \
|
||||||
dsl_dataset.c \
|
dsl_dataset.c \
|
||||||
dsl_deadlist.c \
|
dsl_deadlist.c \
|
||||||
dsl_deleg.c \
|
dsl_deleg.c \
|
||||||
dsl_bookmark.c \
|
|
||||||
dsl_dir.c \
|
|
||||||
dsl_crypt.c \
|
|
||||||
dsl_destroy.c \
|
dsl_destroy.c \
|
||||||
|
dsl_dir.c \
|
||||||
dsl_pool.c \
|
dsl_pool.c \
|
||||||
dsl_prop.c \
|
dsl_prop.c \
|
||||||
dsl_scan.c \
|
dsl_scan.c \
|
||||||
|
@ -281,9 +281,9 @@ SRCS+= abd.c \
|
||||||
edonr_zfs.c \
|
edonr_zfs.c \
|
||||||
fm.c \
|
fm.c \
|
||||||
gzip.c \
|
gzip.c \
|
||||||
lzjb.c \
|
|
||||||
lz4.c \
|
lz4.c \
|
||||||
lz4_zfs.c \
|
lz4_zfs.c \
|
||||||
|
lzjb.c \
|
||||||
metaslab.c \
|
metaslab.c \
|
||||||
mmp.c \
|
mmp.c \
|
||||||
multilist.c \
|
multilist.c \
|
||||||
|
@ -296,6 +296,8 @@ SRCS+= abd.c \
|
||||||
sha2_zfs.c \
|
sha2_zfs.c \
|
||||||
skein_zfs.c \
|
skein_zfs.c \
|
||||||
spa.c \
|
spa.c \
|
||||||
|
space_map.c \
|
||||||
|
space_reftree.c \
|
||||||
spa_checkpoint.c \
|
spa_checkpoint.c \
|
||||||
spa_config.c \
|
spa_config.c \
|
||||||
spa_errlog.c \
|
spa_errlog.c \
|
||||||
|
@ -303,16 +305,14 @@ SRCS+= abd.c \
|
||||||
spa_log_spacemap.c \
|
spa_log_spacemap.c \
|
||||||
spa_misc.c \
|
spa_misc.c \
|
||||||
spa_stats.c \
|
spa_stats.c \
|
||||||
space_map.c \
|
|
||||||
space_reftree.c \
|
|
||||||
txg.c \
|
txg.c \
|
||||||
uberblock.c \
|
uberblock.c \
|
||||||
unique.c \
|
unique.c \
|
||||||
vdev.c \
|
vdev.c \
|
||||||
vdev_draid.c \
|
vdev_draid.c \
|
||||||
vdev_draid_rand.c \
|
vdev_draid_rand.c \
|
||||||
vdev_indirect.c \
|
|
||||||
vdev_indirect_births.c \
|
vdev_indirect_births.c \
|
||||||
|
vdev_indirect.c \
|
||||||
vdev_indirect_mapping.c \
|
vdev_indirect_mapping.c \
|
||||||
vdev_initialize.c \
|
vdev_initialize.c \
|
||||||
vdev_label.c \
|
vdev_label.c \
|
||||||
|
@ -320,11 +320,11 @@ SRCS+= abd.c \
|
||||||
vdev_missing.c \
|
vdev_missing.c \
|
||||||
vdev_queue.c \
|
vdev_queue.c \
|
||||||
vdev_raidz.c \
|
vdev_raidz.c \
|
||||||
vdev_raidz_math.c \
|
|
||||||
vdev_raidz_math_scalar.c \
|
|
||||||
vdev_raidz_math_avx2.c \
|
vdev_raidz_math_avx2.c \
|
||||||
vdev_raidz_math_avx512bw.c \
|
vdev_raidz_math_avx512bw.c \
|
||||||
vdev_raidz_math_avx512f.c \
|
vdev_raidz_math_avx512f.c \
|
||||||
|
vdev_raidz_math.c \
|
||||||
|
vdev_raidz_math_scalar.c \
|
||||||
vdev_raidz_math_sse2.c \
|
vdev_raidz_math_sse2.c \
|
||||||
vdev_raidz_math_ssse3.c \
|
vdev_raidz_math_ssse3.c \
|
||||||
vdev_rebuild.c \
|
vdev_rebuild.c \
|
||||||
|
@ -343,7 +343,6 @@ SRCS+= abd.c \
|
||||||
zfeature.c \
|
zfeature.c \
|
||||||
zfs_byteswap.c \
|
zfs_byteswap.c \
|
||||||
zfs_chksum.c \
|
zfs_chksum.c \
|
||||||
zfs_file_os.c \
|
|
||||||
zfs_fm.c \
|
zfs_fm.c \
|
||||||
zfs_fuid.c \
|
zfs_fuid.c \
|
||||||
zfs_impl.c \
|
zfs_impl.c \
|
||||||
|
@ -367,30 +366,36 @@ SRCS+= abd.c \
|
||||||
zvol.c
|
zvol.c
|
||||||
|
|
||||||
#zstd
|
#zstd
|
||||||
SRCS+= zfs_zstd.c \
|
SRCS+= zfs_zstd.c
|
||||||
entropy_common.c \
|
|
||||||
|
#zstd/common
|
||||||
|
SRCS+= entropy_common.c \
|
||||||
error_private.c \
|
error_private.c \
|
||||||
fse_compress.c \
|
|
||||||
fse_decompress.c \
|
fse_decompress.c \
|
||||||
hist.c \
|
|
||||||
huf_compress.c \
|
|
||||||
huf_decompress.c \
|
|
||||||
pool.c \
|
pool.c \
|
||||||
xxhash.c \
|
xxhash.c \
|
||||||
zstd_common.c \
|
zstd_common.c \
|
||||||
|
|
||||||
|
#zstd/compress
|
||||||
|
SRCS+= fse_compress.c \
|
||||||
|
hist.c \
|
||||||
|
huf_compress.c \
|
||||||
zstd_compress.c \
|
zstd_compress.c \
|
||||||
zstd_compress_literals.c \
|
zstd_compress_literals.c \
|
||||||
zstd_compress_sequences.c \
|
zstd_compress_sequences.c \
|
||||||
zstd_compress_superblock.c \
|
zstd_compress_superblock.c \
|
||||||
zstd_ddict.c \
|
|
||||||
zstd_decompress.c \
|
|
||||||
zstd_decompress_block.c \
|
|
||||||
zstd_double_fast.c \
|
zstd_double_fast.c \
|
||||||
zstd_fast.c \
|
zstd_fast.c \
|
||||||
zstd_lazy.c \
|
zstd_lazy.c \
|
||||||
zstd_ldm.c \
|
zstd_ldm.c \
|
||||||
zstd_opt.c
|
zstd_opt.c
|
||||||
|
|
||||||
|
#zstd/decompress
|
||||||
|
SRCS+= huf_decompress.c \
|
||||||
|
zstd_ddict.c \
|
||||||
|
zstd_decompress_block.c \
|
||||||
|
zstd_decompress.c
|
||||||
|
|
||||||
beforeinstall:
|
beforeinstall:
|
||||||
.if ${MK_DEBUG_FILES} != "no"
|
.if ${MK_DEBUG_FILES} != "no"
|
||||||
mtree -eu \
|
mtree -eu \
|
||||||
|
|
|
@ -41,6 +41,7 @@
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
#include <sys/string.h>
|
#include <sys/string.h>
|
||||||
|
#include <rpc/types.h>
|
||||||
#include <rpc/xdr.h>
|
#include <rpc/xdr.h>
|
||||||
#include <sys/mod.h>
|
#include <sys/mod.h>
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include <sys/debug.h>
|
#include <sys/debug.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/sysmacros.h>
|
#include <sys/sysmacros.h>
|
||||||
|
#include <rpc/types.h>
|
||||||
#include <rpc/xdr.h>
|
#include <rpc/xdr.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -45,15 +45,25 @@
|
||||||
/*
|
/*
|
||||||
* Linux 6.8.x uses a bdev_handle as an instance/refcount for an underlying
|
* Linux 6.8.x uses a bdev_handle as an instance/refcount for an underlying
|
||||||
* block_device. Since it carries the block_device inside, its convenient to
|
* block_device. Since it carries the block_device inside, its convenient to
|
||||||
* just use the handle as a proxy. For pre-6.8, we just emulate this with
|
* just use the handle as a proxy.
|
||||||
* a cast, since we don't need any of the other fields inside the handle.
|
*
|
||||||
|
* Linux 6.9.x uses a file for the same purpose.
|
||||||
|
*
|
||||||
|
* For pre-6.8, we just emulate this with a cast, since we don't need any of
|
||||||
|
* the other fields inside the handle.
|
||||||
*/
|
*/
|
||||||
#ifdef HAVE_BDEV_OPEN_BY_PATH
|
#if defined(HAVE_BDEV_OPEN_BY_PATH)
|
||||||
typedef struct bdev_handle zfs_bdev_handle_t;
|
typedef struct bdev_handle zfs_bdev_handle_t;
|
||||||
#define BDH_BDEV(bdh) ((bdh)->bdev)
|
#define BDH_BDEV(bdh) ((bdh)->bdev)
|
||||||
#define BDH_IS_ERR(bdh) (IS_ERR(bdh))
|
#define BDH_IS_ERR(bdh) (IS_ERR(bdh))
|
||||||
#define BDH_PTR_ERR(bdh) (PTR_ERR(bdh))
|
#define BDH_PTR_ERR(bdh) (PTR_ERR(bdh))
|
||||||
#define BDH_ERR_PTR(err) (ERR_PTR(err))
|
#define BDH_ERR_PTR(err) (ERR_PTR(err))
|
||||||
|
#elif defined(HAVE_BDEV_FILE_OPEN_BY_PATH)
|
||||||
|
typedef struct file zfs_bdev_handle_t;
|
||||||
|
#define BDH_BDEV(bdh) (file_bdev(bdh))
|
||||||
|
#define BDH_IS_ERR(bdh) (IS_ERR(bdh))
|
||||||
|
#define BDH_PTR_ERR(bdh) (PTR_ERR(bdh))
|
||||||
|
#define BDH_ERR_PTR(err) (ERR_PTR(err))
|
||||||
#else
|
#else
|
||||||
typedef void zfs_bdev_handle_t;
|
typedef void zfs_bdev_handle_t;
|
||||||
#define BDH_BDEV(bdh) ((struct block_device *)bdh)
|
#define BDH_BDEV(bdh) ((struct block_device *)bdh)
|
||||||
|
@ -242,7 +252,9 @@ vdev_blkdev_get_by_path(const char *path, spa_mode_t smode, void *holder)
|
||||||
{
|
{
|
||||||
vdev_bdev_mode_t bmode = vdev_bdev_mode(smode);
|
vdev_bdev_mode_t bmode = vdev_bdev_mode(smode);
|
||||||
|
|
||||||
#if defined(HAVE_BDEV_OPEN_BY_PATH)
|
#if defined(HAVE_BDEV_FILE_OPEN_BY_PATH)
|
||||||
|
return (bdev_file_open_by_path(path, bmode, holder, NULL));
|
||||||
|
#elif defined(HAVE_BDEV_OPEN_BY_PATH)
|
||||||
return (bdev_open_by_path(path, bmode, holder, NULL));
|
return (bdev_open_by_path(path, bmode, holder, NULL));
|
||||||
#elif defined(HAVE_BLKDEV_GET_BY_PATH_4ARG)
|
#elif defined(HAVE_BLKDEV_GET_BY_PATH_4ARG)
|
||||||
return (blkdev_get_by_path(path, bmode, holder, NULL));
|
return (blkdev_get_by_path(path, bmode, holder, NULL));
|
||||||
|
@ -258,8 +270,10 @@ vdev_blkdev_put(zfs_bdev_handle_t *bdh, spa_mode_t smode, void *holder)
|
||||||
return (bdev_release(bdh));
|
return (bdev_release(bdh));
|
||||||
#elif defined(HAVE_BLKDEV_PUT_HOLDER)
|
#elif defined(HAVE_BLKDEV_PUT_HOLDER)
|
||||||
return (blkdev_put(BDH_BDEV(bdh), holder));
|
return (blkdev_put(BDH_BDEV(bdh), holder));
|
||||||
#else
|
#elif defined(HAVE_BLKDEV_PUT)
|
||||||
return (blkdev_put(BDH_BDEV(bdh), vdev_bdev_mode(smode)));
|
return (blkdev_put(BDH_BDEV(bdh), vdev_bdev_mode(smode)));
|
||||||
|
#else
|
||||||
|
fput(bdh);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1238,8 +1252,6 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) || \
|
|
||||||
defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC)
|
|
||||||
BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error)
|
BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error)
|
||||||
{
|
{
|
||||||
zio_t *zio = bio->bi_private;
|
zio_t *zio = bio->bi_private;
|
||||||
|
@ -1254,54 +1266,99 @@ BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error)
|
||||||
zio_interrupt(zio);
|
zio_interrupt(zio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Wrappers for the different secure erase and discard APIs. We use async
|
||||||
|
* when available; in this case, *biop is set to the last bio in the chain.
|
||||||
|
*/
|
||||||
static int
|
static int
|
||||||
vdev_issue_discard_trim(zio_t *zio, unsigned long flags)
|
vdev_bdev_issue_secure_erase(zfs_bdev_handle_t *bdh, sector_t sector,
|
||||||
|
sector_t nsect, struct bio **biop)
|
||||||
{
|
{
|
||||||
int ret;
|
*biop = NULL;
|
||||||
struct bio *bio = NULL;
|
int error;
|
||||||
|
|
||||||
#if defined(BLKDEV_DISCARD_SECURE)
|
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
|
||||||
ret = - __blkdev_issue_discard(
|
error = blkdev_issue_secure_erase(BDH_BDEV(bdh),
|
||||||
BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
|
sector, nsect, GFP_NOFS);
|
||||||
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, flags, &bio);
|
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_FLAGS)
|
||||||
|
error = __blkdev_issue_discard(BDH_BDEV(bdh),
|
||||||
|
sector, nsect, GFP_NOFS, BLKDEV_DISCARD_SECURE, biop);
|
||||||
|
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_FLAGS)
|
||||||
|
error = blkdev_issue_discard(BDH_BDEV(bdh),
|
||||||
|
sector, nsect, GFP_NOFS, BLKDEV_DISCARD_SECURE);
|
||||||
#else
|
#else
|
||||||
(void) flags;
|
#error "unsupported kernel"
|
||||||
ret = - __blkdev_issue_discard(
|
|
||||||
BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
|
|
||||||
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, &bio);
|
|
||||||
#endif
|
#endif
|
||||||
if (!ret && bio) {
|
|
||||||
|
return (error);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
vdev_bdev_issue_discard(zfs_bdev_handle_t *bdh, sector_t sector,
|
||||||
|
sector_t nsect, struct bio **biop)
|
||||||
|
{
|
||||||
|
*biop = NULL;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
#if defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_FLAGS)
|
||||||
|
error = __blkdev_issue_discard(BDH_BDEV(bdh),
|
||||||
|
sector, nsect, GFP_NOFS, 0, biop);
|
||||||
|
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_NOFLAGS)
|
||||||
|
error = __blkdev_issue_discard(BDH_BDEV(bdh),
|
||||||
|
sector, nsect, GFP_NOFS, biop);
|
||||||
|
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_FLAGS)
|
||||||
|
error = blkdev_issue_discard(BDH_BDEV(bdh),
|
||||||
|
sector, nsect, GFP_NOFS, 0);
|
||||||
|
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_NOFLAGS)
|
||||||
|
error = blkdev_issue_discard(BDH_BDEV(bdh),
|
||||||
|
sector, nsect, GFP_NOFS);
|
||||||
|
#else
|
||||||
|
#error "unsupported kernel"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return (error);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Entry point for TRIM ops. This calls the right wrapper for secure erase or
|
||||||
|
* discard, and then does the appropriate finishing work for error vs success
|
||||||
|
* and async vs sync.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
vdev_disk_io_trim(zio_t *zio)
|
||||||
|
{
|
||||||
|
int error;
|
||||||
|
struct bio *bio;
|
||||||
|
|
||||||
|
zfs_bdev_handle_t *bdh = ((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh;
|
||||||
|
sector_t sector = zio->io_offset >> 9;
|
||||||
|
sector_t nsects = zio->io_size >> 9;
|
||||||
|
|
||||||
|
if (zio->io_trim_flags & ZIO_TRIM_SECURE)
|
||||||
|
error = vdev_bdev_issue_secure_erase(bdh, sector, nsects, &bio);
|
||||||
|
else
|
||||||
|
error = vdev_bdev_issue_discard(bdh, sector, nsects, &bio);
|
||||||
|
|
||||||
|
if (error != 0)
|
||||||
|
return (SET_ERROR(-error));
|
||||||
|
|
||||||
|
if (bio == NULL) {
|
||||||
|
/*
|
||||||
|
* This was a synchronous op that completed successfully, so
|
||||||
|
* return it to ZFS immediately.
|
||||||
|
*/
|
||||||
|
zio_interrupt(zio);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* This was an asynchronous op; set up completion callback and
|
||||||
|
* issue it.
|
||||||
|
*/
|
||||||
bio->bi_private = zio;
|
bio->bi_private = zio;
|
||||||
bio->bi_end_io = vdev_disk_discard_end_io;
|
bio->bi_end_io = vdev_disk_discard_end_io;
|
||||||
vdev_submit_bio(bio);
|
vdev_submit_bio(bio);
|
||||||
}
|
}
|
||||||
return (ret);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static int
|
return (0);
|
||||||
vdev_disk_io_trim(zio_t *zio)
|
|
||||||
{
|
|
||||||
unsigned long trim_flags = 0;
|
|
||||||
if (zio->io_trim_flags & ZIO_TRIM_SECURE) {
|
|
||||||
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
|
|
||||||
return (-blkdev_issue_secure_erase(
|
|
||||||
BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
|
|
||||||
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS));
|
|
||||||
#elif defined(BLKDEV_DISCARD_SECURE)
|
|
||||||
trim_flags |= BLKDEV_DISCARD_SECURE;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) || \
|
|
||||||
defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC)
|
|
||||||
return (vdev_issue_discard_trim(zio, trim_flags));
|
|
||||||
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD)
|
|
||||||
return (-blkdev_issue_discard(
|
|
||||||
BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
|
|
||||||
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, trim_flags));
|
|
||||||
#else
|
|
||||||
#error "Unsupported kernel"
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int (*vdev_disk_io_rw_fn)(zio_t *zio) = NULL;
|
int (*vdev_disk_io_rw_fn)(zio_t *zio) = NULL;
|
||||||
|
@ -1376,14 +1433,12 @@ vdev_disk_io_start(zio_t *zio)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case ZIO_TYPE_TRIM:
|
case ZIO_TYPE_TRIM:
|
||||||
zio->io_error = vdev_disk_io_trim(zio);
|
error = vdev_disk_io_trim(zio);
|
||||||
rw_exit(&vd->vd_lock);
|
rw_exit(&vd->vd_lock);
|
||||||
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
|
if (error) {
|
||||||
if (zio->io_trim_flags & ZIO_TRIM_SECURE)
|
zio->io_error = error;
|
||||||
zio_interrupt(zio);
|
zio_execute(zio);
|
||||||
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD)
|
}
|
||||||
zio_interrupt(zio);
|
|
||||||
#endif
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case ZIO_TYPE_READ:
|
case ZIO_TYPE_READ:
|
||||||
|
|
|
@ -37,6 +37,7 @@
|
||||||
#include <sys/spa_impl.h>
|
#include <sys/spa_impl.h>
|
||||||
#include <sys/zvol.h>
|
#include <sys/zvol.h>
|
||||||
#include <sys/zvol_impl.h>
|
#include <sys/zvol_impl.h>
|
||||||
|
#include <cityhash.h>
|
||||||
|
|
||||||
#include <linux/blkdev_compat.h>
|
#include <linux/blkdev_compat.h>
|
||||||
#include <linux/task_io_accounting_ops.h>
|
#include <linux/task_io_accounting_ops.h>
|
||||||
|
@ -53,6 +54,12 @@ static unsigned int zvol_request_sync = 0;
|
||||||
static unsigned int zvol_prefetch_bytes = (128 * 1024);
|
static unsigned int zvol_prefetch_bytes = (128 * 1024);
|
||||||
static unsigned long zvol_max_discard_blocks = 16384;
|
static unsigned long zvol_max_discard_blocks = 16384;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Switch taskq at multiple of 512 MB offset. This can be set to a lower value
|
||||||
|
* to utilize more threads for small files but may affect prefetch hits.
|
||||||
|
*/
|
||||||
|
#define ZVOL_TASKQ_OFFSET_SHIFT 29
|
||||||
|
|
||||||
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
|
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
|
||||||
static unsigned int zvol_open_timeout_ms = 1000;
|
static unsigned int zvol_open_timeout_ms = 1000;
|
||||||
#endif
|
#endif
|
||||||
|
@ -76,6 +83,8 @@ static boolean_t zvol_use_blk_mq = B_FALSE;
|
||||||
static unsigned int zvol_blk_mq_blocks_per_thread = 8;
|
static unsigned int zvol_blk_mq_blocks_per_thread = 8;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static unsigned int zvol_num_taskqs = 0;
|
||||||
|
|
||||||
#ifndef BLKDEV_DEFAULT_RQ
|
#ifndef BLKDEV_DEFAULT_RQ
|
||||||
/* BLKDEV_MAX_RQ was renamed to BLKDEV_DEFAULT_RQ in the 5.16 kernel */
|
/* BLKDEV_MAX_RQ was renamed to BLKDEV_DEFAULT_RQ in the 5.16 kernel */
|
||||||
#define BLKDEV_DEFAULT_RQ BLKDEV_MAX_RQ
|
#define BLKDEV_DEFAULT_RQ BLKDEV_MAX_RQ
|
||||||
|
@ -114,7 +123,11 @@ struct zvol_state_os {
|
||||||
boolean_t use_blk_mq;
|
boolean_t use_blk_mq;
|
||||||
};
|
};
|
||||||
|
|
||||||
static taskq_t *zvol_taskq;
|
typedef struct zv_taskq {
|
||||||
|
uint_t tqs_cnt;
|
||||||
|
taskq_t **tqs_taskq;
|
||||||
|
} zv_taskq_t;
|
||||||
|
static zv_taskq_t zvol_taskqs;
|
||||||
static struct ida zvol_ida;
|
static struct ida zvol_ida;
|
||||||
|
|
||||||
typedef struct zv_request_stack {
|
typedef struct zv_request_stack {
|
||||||
|
@ -532,6 +545,22 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
|
||||||
}
|
}
|
||||||
|
|
||||||
zv_request_task_t *task;
|
zv_request_task_t *task;
|
||||||
|
zv_taskq_t *ztqs = &zvol_taskqs;
|
||||||
|
uint_t blk_mq_hw_queue = 0;
|
||||||
|
uint_t tq_idx;
|
||||||
|
uint_t taskq_hash;
|
||||||
|
#ifdef HAVE_BLK_MQ
|
||||||
|
if (rq)
|
||||||
|
#ifdef HAVE_BLK_MQ_RQ_HCTX
|
||||||
|
blk_mq_hw_queue = rq->mq_hctx->queue_num;
|
||||||
|
#else
|
||||||
|
blk_mq_hw_queue =
|
||||||
|
rq->q->queue_hw_ctx[rq->q->mq_map[rq->cpu]]->queue_num;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
taskq_hash = cityhash4((uintptr_t)zv, offset >> ZVOL_TASKQ_OFFSET_SHIFT,
|
||||||
|
blk_mq_hw_queue, 0);
|
||||||
|
tq_idx = taskq_hash % ztqs->tqs_cnt;
|
||||||
|
|
||||||
if (rw == WRITE) {
|
if (rw == WRITE) {
|
||||||
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
|
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
|
||||||
|
@ -601,7 +630,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
|
||||||
zvol_discard(&zvr);
|
zvol_discard(&zvr);
|
||||||
} else {
|
} else {
|
||||||
task = zv_request_task_create(zvr);
|
task = zv_request_task_create(zvr);
|
||||||
taskq_dispatch_ent(zvol_taskq,
|
taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
|
||||||
zvol_discard_task, task, 0, &task->ent);
|
zvol_discard_task, task, 0, &task->ent);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -609,7 +638,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
|
||||||
zvol_write(&zvr);
|
zvol_write(&zvr);
|
||||||
} else {
|
} else {
|
||||||
task = zv_request_task_create(zvr);
|
task = zv_request_task_create(zvr);
|
||||||
taskq_dispatch_ent(zvol_taskq,
|
taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
|
||||||
zvol_write_task, task, 0, &task->ent);
|
zvol_write_task, task, 0, &task->ent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -631,7 +660,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
|
||||||
zvol_read(&zvr);
|
zvol_read(&zvr);
|
||||||
} else {
|
} else {
|
||||||
task = zv_request_task_create(zvr);
|
task = zv_request_task_create(zvr);
|
||||||
taskq_dispatch_ent(zvol_taskq,
|
taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
|
||||||
zvol_read_task, task, 0, &task->ent);
|
zvol_read_task, task, 0, &task->ent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1053,6 +1082,16 @@ zvol_alloc_non_blk_mq(struct zvol_state_os *zso)
|
||||||
if (zso->zvo_disk == NULL)
|
if (zso->zvo_disk == NULL)
|
||||||
return (1);
|
return (1);
|
||||||
|
|
||||||
|
zso->zvo_disk->minors = ZVOL_MINORS;
|
||||||
|
zso->zvo_queue = zso->zvo_disk->queue;
|
||||||
|
#elif defined(HAVE_BLK_ALLOC_DISK_2ARG)
|
||||||
|
struct gendisk *disk = blk_alloc_disk(NULL, NUMA_NO_NODE);
|
||||||
|
if (IS_ERR(disk)) {
|
||||||
|
zso->zvo_disk = NULL;
|
||||||
|
return (1);
|
||||||
|
}
|
||||||
|
|
||||||
|
zso->zvo_disk = disk;
|
||||||
zso->zvo_disk->minors = ZVOL_MINORS;
|
zso->zvo_disk->minors = ZVOL_MINORS;
|
||||||
zso->zvo_queue = zso->zvo_disk->queue;
|
zso->zvo_queue = zso->zvo_disk->queue;
|
||||||
#else
|
#else
|
||||||
|
@ -1103,6 +1142,17 @@ zvol_alloc_blk_mq(zvol_state_t *zv)
|
||||||
}
|
}
|
||||||
zso->zvo_queue = zso->zvo_disk->queue;
|
zso->zvo_queue = zso->zvo_disk->queue;
|
||||||
zso->zvo_disk->minors = ZVOL_MINORS;
|
zso->zvo_disk->minors = ZVOL_MINORS;
|
||||||
|
#elif defined(HAVE_BLK_ALLOC_DISK_2ARG)
|
||||||
|
struct gendisk *disk = blk_mq_alloc_disk(&zso->tag_set, NULL, zv);
|
||||||
|
if (IS_ERR(disk)) {
|
||||||
|
zso->zvo_disk = NULL;
|
||||||
|
blk_mq_free_tag_set(&zso->tag_set);
|
||||||
|
return (1);
|
||||||
|
}
|
||||||
|
|
||||||
|
zso->zvo_disk = disk;
|
||||||
|
zso->zvo_queue = zso->zvo_disk->queue;
|
||||||
|
zso->zvo_disk->minors = ZVOL_MINORS;
|
||||||
#else
|
#else
|
||||||
zso->zvo_disk = alloc_disk(ZVOL_MINORS);
|
zso->zvo_disk = alloc_disk(ZVOL_MINORS);
|
||||||
if (zso->zvo_disk == NULL) {
|
if (zso->zvo_disk == NULL) {
|
||||||
|
@ -1256,7 +1306,7 @@ zvol_os_free(zvol_state_t *zv)
|
||||||
|
|
||||||
del_gendisk(zv->zv_zso->zvo_disk);
|
del_gendisk(zv->zv_zso->zvo_disk);
|
||||||
#if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
|
#if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
|
||||||
defined(HAVE_BLK_ALLOC_DISK)
|
(defined(HAVE_BLK_ALLOC_DISK) || defined(HAVE_BLK_ALLOC_DISK_2ARG))
|
||||||
#if defined(HAVE_BLK_CLEANUP_DISK)
|
#if defined(HAVE_BLK_CLEANUP_DISK)
|
||||||
blk_cleanup_disk(zv->zv_zso->zvo_disk);
|
blk_cleanup_disk(zv->zv_zso->zvo_disk);
|
||||||
#else
|
#else
|
||||||
|
@ -1577,8 +1627,40 @@ zvol_init(void)
|
||||||
zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024);
|
zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use atleast 32 zvol_threads but for many core system,
|
||||||
|
* prefer 6 threads per taskq, but no more taskqs
|
||||||
|
* than threads in them on large systems.
|
||||||
|
*
|
||||||
|
* taskq total
|
||||||
|
* cpus taskqs threads threads
|
||||||
|
* ------- ------- ------- -------
|
||||||
|
* 1 1 32 32
|
||||||
|
* 2 1 32 32
|
||||||
|
* 4 1 32 32
|
||||||
|
* 8 2 16 32
|
||||||
|
* 16 3 11 33
|
||||||
|
* 32 5 7 35
|
||||||
|
* 64 8 8 64
|
||||||
|
* 128 11 12 132
|
||||||
|
* 256 16 16 256
|
||||||
|
*/
|
||||||
|
zv_taskq_t *ztqs = &zvol_taskqs;
|
||||||
|
uint_t num_tqs = MIN(num_online_cpus(), zvol_num_taskqs);
|
||||||
|
if (num_tqs == 0) {
|
||||||
|
num_tqs = 1 + num_online_cpus() / 6;
|
||||||
|
while (num_tqs * num_tqs > zvol_actual_threads)
|
||||||
|
num_tqs--;
|
||||||
|
}
|
||||||
|
uint_t per_tq_thread = zvol_actual_threads / num_tqs;
|
||||||
|
if (per_tq_thread * num_tqs < zvol_actual_threads)
|
||||||
|
per_tq_thread++;
|
||||||
|
ztqs->tqs_cnt = num_tqs;
|
||||||
|
ztqs->tqs_taskq = kmem_alloc(num_tqs * sizeof (taskq_t *), KM_SLEEP);
|
||||||
error = register_blkdev(zvol_major, ZVOL_DRIVER);
|
error = register_blkdev(zvol_major, ZVOL_DRIVER);
|
||||||
if (error) {
|
if (error) {
|
||||||
|
kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt * sizeof (taskq_t *));
|
||||||
|
ztqs->tqs_taskq = NULL;
|
||||||
printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
|
printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
@ -1598,12 +1680,23 @@ zvol_init(void)
|
||||||
1024);
|
1024);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
zvol_taskq = taskq_create(ZVOL_DRIVER, zvol_actual_threads, maxclsyspri,
|
for (uint_t i = 0; i < num_tqs; i++) {
|
||||||
zvol_actual_threads, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
char name[32];
|
||||||
if (zvol_taskq == NULL) {
|
(void) snprintf(name, sizeof (name), "%s_tq-%u",
|
||||||
|
ZVOL_DRIVER, i);
|
||||||
|
ztqs->tqs_taskq[i] = taskq_create(name, per_tq_thread,
|
||||||
|
maxclsyspri, per_tq_thread, INT_MAX,
|
||||||
|
TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
||||||
|
if (ztqs->tqs_taskq[i] == NULL) {
|
||||||
|
for (int j = i - 1; j >= 0; j--)
|
||||||
|
taskq_destroy(ztqs->tqs_taskq[j]);
|
||||||
unregister_blkdev(zvol_major, ZVOL_DRIVER);
|
unregister_blkdev(zvol_major, ZVOL_DRIVER);
|
||||||
|
kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt *
|
||||||
|
sizeof (taskq_t *));
|
||||||
|
ztqs->tqs_taskq = NULL;
|
||||||
return (-ENOMEM);
|
return (-ENOMEM);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
zvol_init_impl();
|
zvol_init_impl();
|
||||||
ida_init(&zvol_ida);
|
ida_init(&zvol_ida);
|
||||||
|
@ -1613,9 +1706,22 @@ zvol_init(void)
|
||||||
void
|
void
|
||||||
zvol_fini(void)
|
zvol_fini(void)
|
||||||
{
|
{
|
||||||
|
zv_taskq_t *ztqs = &zvol_taskqs;
|
||||||
zvol_fini_impl();
|
zvol_fini_impl();
|
||||||
unregister_blkdev(zvol_major, ZVOL_DRIVER);
|
unregister_blkdev(zvol_major, ZVOL_DRIVER);
|
||||||
taskq_destroy(zvol_taskq);
|
|
||||||
|
if (ztqs->tqs_taskq == NULL) {
|
||||||
|
ASSERT3U(ztqs->tqs_cnt, ==, 0);
|
||||||
|
} else {
|
||||||
|
for (uint_t i = 0; i < ztqs->tqs_cnt; i++) {
|
||||||
|
ASSERT3P(ztqs->tqs_taskq[i], !=, NULL);
|
||||||
|
taskq_destroy(ztqs->tqs_taskq[i]);
|
||||||
|
}
|
||||||
|
kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt *
|
||||||
|
sizeof (taskq_t *));
|
||||||
|
ztqs->tqs_taskq = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
ida_destroy(&zvol_ida);
|
ida_destroy(&zvol_ida);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1636,6 +1742,9 @@ MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");
|
||||||
module_param(zvol_max_discard_blocks, ulong, 0444);
|
module_param(zvol_max_discard_blocks, ulong, 0444);
|
||||||
MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard");
|
MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard");
|
||||||
|
|
||||||
|
module_param(zvol_num_taskqs, uint, 0444);
|
||||||
|
MODULE_PARM_DESC(zvol_num_taskqs, "Number of zvol taskqs");
|
||||||
|
|
||||||
module_param(zvol_prefetch_bytes, uint, 0644);
|
module_param(zvol_prefetch_bytes, uint, 0644);
|
||||||
MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
|
MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
|
||||||
|
|
||||||
|
|
|
@ -1557,17 +1557,14 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags)
|
||||||
* returning.
|
* returning.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
|
dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags,
|
||||||
db_lock_type_t dblt, const void *tag)
|
db_lock_type_t dblt, const void *tag)
|
||||||
{
|
{
|
||||||
dnode_t *dn;
|
|
||||||
zbookmark_phys_t zb;
|
zbookmark_phys_t zb;
|
||||||
uint32_t aflags = ARC_FLAG_NOWAIT;
|
uint32_t aflags = ARC_FLAG_NOWAIT;
|
||||||
int err, zio_flags;
|
int err, zio_flags;
|
||||||
blkptr_t bp, *bpp;
|
blkptr_t bp, *bpp = NULL;
|
||||||
|
|
||||||
DB_DNODE_ENTER(db);
|
|
||||||
dn = DB_DNODE(db);
|
|
||||||
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
|
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
|
||||||
ASSERT(MUTEX_HELD(&db->db_mtx));
|
ASSERT(MUTEX_HELD(&db->db_mtx));
|
||||||
ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
|
ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
|
||||||
|
@ -1580,31 +1577,30 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
|
||||||
goto early_unlock;
|
goto early_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (db->db_state == DB_UNCACHED) {
|
|
||||||
if (db->db_blkptr == NULL) {
|
|
||||||
bpp = NULL;
|
|
||||||
} else {
|
|
||||||
bp = *db->db_blkptr;
|
|
||||||
bpp = &bp;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
dbuf_dirty_record_t *dr;
|
|
||||||
|
|
||||||
ASSERT3S(db->db_state, ==, DB_NOFILL);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Block cloning: If we have a pending block clone,
|
* If we have a pending block clone, we don't want to read the
|
||||||
* we don't want to read the underlying block, but the content
|
* underlying block, but the content of the block being cloned,
|
||||||
* of the block being cloned, so we have the most recent data.
|
* pointed by the dirty record, so we have the most recent data.
|
||||||
|
* If there is no dirty record, then we hit a race in a sync
|
||||||
|
* process when the dirty record is already removed, while the
|
||||||
|
* dbuf is not yet destroyed. Such case is equivalent to uncached.
|
||||||
*/
|
*/
|
||||||
dr = list_head(&db->db_dirty_records);
|
if (db->db_state == DB_NOFILL) {
|
||||||
if (dr == NULL || !dr->dt.dl.dr_brtwrite) {
|
dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records);
|
||||||
|
if (dr != NULL) {
|
||||||
|
if (!dr->dt.dl.dr_brtwrite) {
|
||||||
err = EIO;
|
err = EIO;
|
||||||
goto early_unlock;
|
goto early_unlock;
|
||||||
}
|
}
|
||||||
bp = dr->dt.dl.dr_overridden_by;
|
bp = dr->dt.dl.dr_overridden_by;
|
||||||
bpp = &bp;
|
bpp = &bp;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bpp == NULL && db->db_blkptr != NULL) {
|
||||||
|
bp = *db->db_blkptr;
|
||||||
|
bpp = &bp;
|
||||||
|
}
|
||||||
|
|
||||||
err = dbuf_read_hole(db, dn, bpp);
|
err = dbuf_read_hole(db, dn, bpp);
|
||||||
if (err == 0)
|
if (err == 0)
|
||||||
|
@ -1643,8 +1639,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
goto early_unlock;
|
goto early_unlock;
|
||||||
|
|
||||||
DB_DNODE_EXIT(db);
|
|
||||||
|
|
||||||
db->db_state = DB_READ;
|
db->db_state = DB_READ;
|
||||||
DTRACE_SET_STATE(db, "read issued");
|
DTRACE_SET_STATE(db, "read issued");
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
|
@ -1669,12 +1663,11 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
|
||||||
* parent's rwlock, which would be a lock ordering violation.
|
* parent's rwlock, which would be a lock ordering violation.
|
||||||
*/
|
*/
|
||||||
dmu_buf_unlock_parent(db, dblt, tag);
|
dmu_buf_unlock_parent(db, dblt, tag);
|
||||||
(void) arc_read(zio, db->db_objset->os_spa, bpp,
|
return (arc_read(zio, db->db_objset->os_spa, bpp,
|
||||||
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags,
|
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags,
|
||||||
&aflags, &zb);
|
&aflags, &zb));
|
||||||
return (err);
|
|
||||||
early_unlock:
|
early_unlock:
|
||||||
DB_DNODE_EXIT(db);
|
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
dmu_buf_unlock_parent(db, dblt, tag);
|
dmu_buf_unlock_parent(db, dblt, tag);
|
||||||
return (err);
|
return (err);
|
||||||
|
@ -1759,7 +1752,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
dbuf_read(dmu_buf_impl_t *db, zio_t *pio, uint32_t flags)
|
||||||
{
|
{
|
||||||
int err = 0;
|
int err = 0;
|
||||||
boolean_t prefetch;
|
boolean_t prefetch;
|
||||||
|
@ -1775,7 +1768,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
||||||
dn = DB_DNODE(db);
|
dn = DB_DNODE(db);
|
||||||
|
|
||||||
prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
|
prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
|
||||||
(flags & DB_RF_NOPREFETCH) == 0 && dn != NULL;
|
(flags & DB_RF_NOPREFETCH) == 0;
|
||||||
|
|
||||||
mutex_enter(&db->db_mtx);
|
mutex_enter(&db->db_mtx);
|
||||||
if (flags & DB_RF_PARTIAL_FIRST)
|
if (flags & DB_RF_PARTIAL_FIRST)
|
||||||
|
@ -1822,13 +1815,13 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
||||||
|
|
||||||
db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG);
|
db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG);
|
||||||
|
|
||||||
if (zio == NULL && (db->db_state == DB_NOFILL ||
|
if (pio == NULL && (db->db_state == DB_NOFILL ||
|
||||||
(db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)))) {
|
(db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)))) {
|
||||||
spa_t *spa = dn->dn_objset->os_spa;
|
spa_t *spa = dn->dn_objset->os_spa;
|
||||||
zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
|
pio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
|
||||||
need_wait = B_TRUE;
|
need_wait = B_TRUE;
|
||||||
}
|
}
|
||||||
err = dbuf_read_impl(db, zio, flags, dblt, FTAG);
|
err = dbuf_read_impl(db, dn, pio, flags, dblt, FTAG);
|
||||||
/*
|
/*
|
||||||
* dbuf_read_impl has dropped db_mtx and our parent's rwlock
|
* dbuf_read_impl has dropped db_mtx and our parent's rwlock
|
||||||
* for us
|
* for us
|
||||||
|
@ -1849,9 +1842,10 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
||||||
*/
|
*/
|
||||||
if (need_wait) {
|
if (need_wait) {
|
||||||
if (err == 0)
|
if (err == 0)
|
||||||
err = zio_wait(zio);
|
err = zio_wait(pio);
|
||||||
else
|
else
|
||||||
VERIFY0(zio_wait(zio));
|
(void) zio_wait(pio);
|
||||||
|
pio = NULL;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
|
@ -1878,7 +1872,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
||||||
ASSERT(db->db_state == DB_READ ||
|
ASSERT(db->db_state == DB_READ ||
|
||||||
(flags & DB_RF_HAVESTRUCT) == 0);
|
(flags & DB_RF_HAVESTRUCT) == 0);
|
||||||
DTRACE_PROBE2(blocked__read, dmu_buf_impl_t *,
|
DTRACE_PROBE2(blocked__read, dmu_buf_impl_t *,
|
||||||
db, zio_t *, zio);
|
db, zio_t *, pio);
|
||||||
cv_wait(&db->db_changed, &db->db_mtx);
|
cv_wait(&db->db_changed, &db->db_mtx);
|
||||||
}
|
}
|
||||||
if (db->db_state == DB_UNCACHED)
|
if (db->db_state == DB_UNCACHED)
|
||||||
|
@ -1887,6 +1881,13 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pio && err != 0) {
|
||||||
|
zio_t *zio = zio_null(pio, pio->io_spa, NULL, NULL, NULL,
|
||||||
|
ZIO_FLAG_CANFAIL);
|
||||||
|
zio->io_error = err;
|
||||||
|
zio_nowait(zio);
|
||||||
|
}
|
||||||
|
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2631,26 +2632,24 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
|
||||||
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
|
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Quick check for dirtiness. For already dirty blocks, this
|
* Quick check for dirtiness to improve performance for some workloads
|
||||||
* reduces runtime of this function by >90%, and overall performance
|
* (e.g. file deletion with indirect blocks cached).
|
||||||
* by 50% for some workloads (e.g. file deletion with indirect blocks
|
|
||||||
* cached).
|
|
||||||
*/
|
*/
|
||||||
mutex_enter(&db->db_mtx);
|
mutex_enter(&db->db_mtx);
|
||||||
|
|
||||||
if (db->db_state == DB_CACHED || db->db_state == DB_NOFILL) {
|
if (db->db_state == DB_CACHED || db->db_state == DB_NOFILL) {
|
||||||
dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg);
|
|
||||||
/*
|
/*
|
||||||
* It's possible that it is already dirty but not cached,
|
* It's possible that the dbuf is already dirty but not cached,
|
||||||
* because there are some calls to dbuf_dirty() that don't
|
* because there are some calls to dbuf_dirty() that don't
|
||||||
* go through dmu_buf_will_dirty().
|
* go through dmu_buf_will_dirty().
|
||||||
*/
|
*/
|
||||||
|
dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg);
|
||||||
if (dr != NULL) {
|
if (dr != NULL) {
|
||||||
if (dr->dt.dl.dr_brtwrite) {
|
if (db->db_level == 0 &&
|
||||||
|
dr->dt.dl.dr_brtwrite) {
|
||||||
/*
|
/*
|
||||||
* Block cloning: If we are dirtying a cloned
|
* Block cloning: If we are dirtying a cloned
|
||||||
* block, we cannot simply redirty it, because
|
* level 0 block, we cannot simply redirty it,
|
||||||
* this dr has no data associated with it.
|
* because this dr has no associated data.
|
||||||
* We will go through a full undirtying below,
|
* We will go through a full undirtying below,
|
||||||
* before dirtying it again.
|
* before dirtying it again.
|
||||||
*/
|
*/
|
||||||
|
@ -4597,11 +4596,10 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
||||||
if (os->os_encrypted && dn->dn_object == DMU_META_DNODE_OBJECT)
|
if (os->os_encrypted && dn->dn_object == DMU_META_DNODE_OBJECT)
|
||||||
dbuf_prepare_encrypted_dnode_leaf(dr);
|
dbuf_prepare_encrypted_dnode_leaf(dr);
|
||||||
|
|
||||||
if (db->db_state != DB_NOFILL &&
|
if (*datap != NULL && *datap == db->db_buf &&
|
||||||
dn->dn_object != DMU_META_DNODE_OBJECT &&
|
dn->dn_object != DMU_META_DNODE_OBJECT &&
|
||||||
zfs_refcount_count(&db->db_holds) > 1 &&
|
zfs_refcount_count(&db->db_holds) > 1 &&
|
||||||
dr->dt.dl.dr_override_state != DR_OVERRIDDEN &&
|
dr->dt.dl.dr_override_state != DR_OVERRIDDEN) {
|
||||||
*datap == db->db_buf) {
|
|
||||||
/*
|
/*
|
||||||
* If this buffer is currently "in use" (i.e., there
|
* If this buffer is currently "in use" (i.e., there
|
||||||
* are active holds and db_data still references it),
|
* are active holds and db_data still references it),
|
||||||
|
@ -4890,12 +4888,10 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
||||||
if (db->db_level == 0) {
|
if (db->db_level == 0) {
|
||||||
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
||||||
ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN);
|
ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN);
|
||||||
if (db->db_state != DB_NOFILL) {
|
|
||||||
if (dr->dt.dl.dr_data != NULL &&
|
if (dr->dt.dl.dr_data != NULL &&
|
||||||
dr->dt.dl.dr_data != db->db_buf) {
|
dr->dt.dl.dr_data != db->db_buf) {
|
||||||
arc_buf_destroy(dr->dt.dl.dr_data, db);
|
arc_buf_destroy(dr->dt.dl.dr_data, db);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
|
ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
|
||||||
ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
|
ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
|
||||||
|
@ -5097,22 +5093,19 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
||||||
|
|
||||||
os = dn->dn_objset;
|
os = dn->dn_objset;
|
||||||
|
|
||||||
if (db->db_state != DB_NOFILL) {
|
|
||||||
if (db->db_level > 0 || dn->dn_type == DMU_OT_DNODE) {
|
if (db->db_level > 0 || dn->dn_type == DMU_OT_DNODE) {
|
||||||
/*
|
/*
|
||||||
* Private object buffers are released here rather
|
* Private object buffers are released here rather than in
|
||||||
* than in dbuf_dirty() since they are only modified
|
* dbuf_dirty() since they are only modified in the syncing
|
||||||
* in the syncing context and we don't want the
|
* context and we don't want the overhead of making multiple
|
||||||
* overhead of making multiple copies of the data.
|
* copies of the data.
|
||||||
*/
|
*/
|
||||||
if (BP_IS_HOLE(db->db_blkptr)) {
|
if (BP_IS_HOLE(db->db_blkptr))
|
||||||
arc_buf_thaw(data);
|
arc_buf_thaw(data);
|
||||||
} else {
|
else
|
||||||
dbuf_release_bp(db);
|
dbuf_release_bp(db);
|
||||||
}
|
|
||||||
dbuf_remap(dn, db, tx);
|
dbuf_remap(dn, db, tx);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (parent != dn->dn_dbuf) {
|
if (parent != dn->dn_dbuf) {
|
||||||
/* Our parent is an indirect block. */
|
/* Our parent is an indirect block. */
|
||||||
|
@ -5147,7 +5140,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
||||||
|
|
||||||
if (db->db_blkid == DMU_SPILL_BLKID)
|
if (db->db_blkid == DMU_SPILL_BLKID)
|
||||||
wp_flag = WP_SPILL;
|
wp_flag = WP_SPILL;
|
||||||
wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0;
|
wp_flag |= (data == NULL) ? WP_NOFILL : 0;
|
||||||
|
|
||||||
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
|
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
|
||||||
|
|
||||||
|
@ -5179,7 +5172,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
||||||
dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite,
|
dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite,
|
||||||
dr->dt.dl.dr_brtwrite);
|
dr->dt.dl.dr_brtwrite);
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
} else if (db->db_state == DB_NOFILL) {
|
} else if (data == NULL) {
|
||||||
ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF ||
|
ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF ||
|
||||||
zp.zp_checksum == ZIO_CHECKSUM_NOPARITY);
|
zp.zp_checksum == ZIO_CHECKSUM_NOPARITY);
|
||||||
dr->dr_zio = zio_write(pio, os->os_spa, txg,
|
dr->dr_zio = zio_write(pio, os->os_spa, txg,
|
||||||
|
|
|
@ -569,8 +569,10 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
||||||
for (i = 0; i < nblks; i++) {
|
for (i = 0; i < nblks; i++) {
|
||||||
dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
|
dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
|
||||||
if (db == NULL) {
|
if (db == NULL) {
|
||||||
if (zs)
|
if (zs) {
|
||||||
dmu_zfetch_run(zs, missed, B_TRUE);
|
dmu_zfetch_run(&dn->dn_zfetch, zs, missed,
|
||||||
|
B_TRUE);
|
||||||
|
}
|
||||||
rw_exit(&dn->dn_struct_rwlock);
|
rw_exit(&dn->dn_struct_rwlock);
|
||||||
dmu_buf_rele_array(dbp, nblks, tag);
|
dmu_buf_rele_array(dbp, nblks, tag);
|
||||||
if (read)
|
if (read)
|
||||||
|
@ -606,7 +608,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
||||||
zfs_racct_write(length, nblks);
|
zfs_racct_write(length, nblks);
|
||||||
|
|
||||||
if (zs)
|
if (zs)
|
||||||
dmu_zfetch_run(zs, missed, B_TRUE);
|
dmu_zfetch_run(&dn->dn_zfetch, zs, missed, B_TRUE);
|
||||||
rw_exit(&dn->dn_struct_rwlock);
|
rw_exit(&dn->dn_struct_rwlock);
|
||||||
|
|
||||||
if (read) {
|
if (read) {
|
||||||
|
|
|
@ -65,9 +65,16 @@ unsigned int zfetch_max_distance = 64 * 1024 * 1024;
|
||||||
#endif
|
#endif
|
||||||
/* max bytes to prefetch indirects for per stream (default 64MB) */
|
/* max bytes to prefetch indirects for per stream (default 64MB) */
|
||||||
unsigned int zfetch_max_idistance = 64 * 1024 * 1024;
|
unsigned int zfetch_max_idistance = 64 * 1024 * 1024;
|
||||||
|
/* max request reorder distance within a stream (default 16MB) */
|
||||||
|
unsigned int zfetch_max_reorder = 16 * 1024 * 1024;
|
||||||
|
/* Max log2 fraction of holes in a stream */
|
||||||
|
unsigned int zfetch_hole_shift = 2;
|
||||||
|
|
||||||
typedef struct zfetch_stats {
|
typedef struct zfetch_stats {
|
||||||
kstat_named_t zfetchstat_hits;
|
kstat_named_t zfetchstat_hits;
|
||||||
|
kstat_named_t zfetchstat_future;
|
||||||
|
kstat_named_t zfetchstat_stride;
|
||||||
|
kstat_named_t zfetchstat_past;
|
||||||
kstat_named_t zfetchstat_misses;
|
kstat_named_t zfetchstat_misses;
|
||||||
kstat_named_t zfetchstat_max_streams;
|
kstat_named_t zfetchstat_max_streams;
|
||||||
kstat_named_t zfetchstat_io_issued;
|
kstat_named_t zfetchstat_io_issued;
|
||||||
|
@ -76,6 +83,9 @@ typedef struct zfetch_stats {
|
||||||
|
|
||||||
static zfetch_stats_t zfetch_stats = {
|
static zfetch_stats_t zfetch_stats = {
|
||||||
{ "hits", KSTAT_DATA_UINT64 },
|
{ "hits", KSTAT_DATA_UINT64 },
|
||||||
|
{ "future", KSTAT_DATA_UINT64 },
|
||||||
|
{ "stride", KSTAT_DATA_UINT64 },
|
||||||
|
{ "past", KSTAT_DATA_UINT64 },
|
||||||
{ "misses", KSTAT_DATA_UINT64 },
|
{ "misses", KSTAT_DATA_UINT64 },
|
||||||
{ "max_streams", KSTAT_DATA_UINT64 },
|
{ "max_streams", KSTAT_DATA_UINT64 },
|
||||||
{ "io_issued", KSTAT_DATA_UINT64 },
|
{ "io_issued", KSTAT_DATA_UINT64 },
|
||||||
|
@ -84,6 +94,9 @@ static zfetch_stats_t zfetch_stats = {
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
wmsum_t zfetchstat_hits;
|
wmsum_t zfetchstat_hits;
|
||||||
|
wmsum_t zfetchstat_future;
|
||||||
|
wmsum_t zfetchstat_stride;
|
||||||
|
wmsum_t zfetchstat_past;
|
||||||
wmsum_t zfetchstat_misses;
|
wmsum_t zfetchstat_misses;
|
||||||
wmsum_t zfetchstat_max_streams;
|
wmsum_t zfetchstat_max_streams;
|
||||||
wmsum_t zfetchstat_io_issued;
|
wmsum_t zfetchstat_io_issued;
|
||||||
|
@ -107,6 +120,12 @@ zfetch_kstats_update(kstat_t *ksp, int rw)
|
||||||
return (EACCES);
|
return (EACCES);
|
||||||
zs->zfetchstat_hits.value.ui64 =
|
zs->zfetchstat_hits.value.ui64 =
|
||||||
wmsum_value(&zfetch_sums.zfetchstat_hits);
|
wmsum_value(&zfetch_sums.zfetchstat_hits);
|
||||||
|
zs->zfetchstat_future.value.ui64 =
|
||||||
|
wmsum_value(&zfetch_sums.zfetchstat_future);
|
||||||
|
zs->zfetchstat_stride.value.ui64 =
|
||||||
|
wmsum_value(&zfetch_sums.zfetchstat_stride);
|
||||||
|
zs->zfetchstat_past.value.ui64 =
|
||||||
|
wmsum_value(&zfetch_sums.zfetchstat_past);
|
||||||
zs->zfetchstat_misses.value.ui64 =
|
zs->zfetchstat_misses.value.ui64 =
|
||||||
wmsum_value(&zfetch_sums.zfetchstat_misses);
|
wmsum_value(&zfetch_sums.zfetchstat_misses);
|
||||||
zs->zfetchstat_max_streams.value.ui64 =
|
zs->zfetchstat_max_streams.value.ui64 =
|
||||||
|
@ -122,6 +141,9 @@ void
|
||||||
zfetch_init(void)
|
zfetch_init(void)
|
||||||
{
|
{
|
||||||
wmsum_init(&zfetch_sums.zfetchstat_hits, 0);
|
wmsum_init(&zfetch_sums.zfetchstat_hits, 0);
|
||||||
|
wmsum_init(&zfetch_sums.zfetchstat_future, 0);
|
||||||
|
wmsum_init(&zfetch_sums.zfetchstat_stride, 0);
|
||||||
|
wmsum_init(&zfetch_sums.zfetchstat_past, 0);
|
||||||
wmsum_init(&zfetch_sums.zfetchstat_misses, 0);
|
wmsum_init(&zfetch_sums.zfetchstat_misses, 0);
|
||||||
wmsum_init(&zfetch_sums.zfetchstat_max_streams, 0);
|
wmsum_init(&zfetch_sums.zfetchstat_max_streams, 0);
|
||||||
wmsum_init(&zfetch_sums.zfetchstat_io_issued, 0);
|
wmsum_init(&zfetch_sums.zfetchstat_io_issued, 0);
|
||||||
|
@ -147,6 +169,9 @@ zfetch_fini(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
wmsum_fini(&zfetch_sums.zfetchstat_hits);
|
wmsum_fini(&zfetch_sums.zfetchstat_hits);
|
||||||
|
wmsum_fini(&zfetch_sums.zfetchstat_future);
|
||||||
|
wmsum_fini(&zfetch_sums.zfetchstat_stride);
|
||||||
|
wmsum_fini(&zfetch_sums.zfetchstat_past);
|
||||||
wmsum_fini(&zfetch_sums.zfetchstat_misses);
|
wmsum_fini(&zfetch_sums.zfetchstat_misses);
|
||||||
wmsum_fini(&zfetch_sums.zfetchstat_max_streams);
|
wmsum_fini(&zfetch_sums.zfetchstat_max_streams);
|
||||||
wmsum_fini(&zfetch_sums.zfetchstat_io_issued);
|
wmsum_fini(&zfetch_sums.zfetchstat_io_issued);
|
||||||
|
@ -222,22 +247,22 @@ static void
|
||||||
dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
|
dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
|
||||||
{
|
{
|
||||||
zstream_t *zs, *zs_next, *zs_old = NULL;
|
zstream_t *zs, *zs_next, *zs_old = NULL;
|
||||||
hrtime_t now = gethrtime(), t;
|
uint_t now = gethrestime_sec(), t;
|
||||||
|
|
||||||
ASSERT(MUTEX_HELD(&zf->zf_lock));
|
ASSERT(MUTEX_HELD(&zf->zf_lock));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Delete too old streams, reusing the first found one.
|
* Delete too old streams, reusing the first found one.
|
||||||
*/
|
*/
|
||||||
t = now - SEC2NSEC(zfetch_max_sec_reap);
|
t = now - zfetch_max_sec_reap;
|
||||||
for (zs = list_head(&zf->zf_stream); zs != NULL; zs = zs_next) {
|
for (zs = list_head(&zf->zf_stream); zs != NULL; zs = zs_next) {
|
||||||
zs_next = list_next(&zf->zf_stream, zs);
|
zs_next = list_next(&zf->zf_stream, zs);
|
||||||
/*
|
/*
|
||||||
* Skip if still active. 1 -- zf_stream reference.
|
* Skip if still active. 1 -- zf_stream reference.
|
||||||
*/
|
*/
|
||||||
if (zfs_refcount_count(&zs->zs_refs) != 1)
|
if ((int)(zs->zs_atime - t) >= 0)
|
||||||
continue;
|
continue;
|
||||||
if (zs->zs_atime > t)
|
if (zfs_refcount_count(&zs->zs_refs) != 1)
|
||||||
continue;
|
continue;
|
||||||
if (zs_old)
|
if (zs_old)
|
||||||
dmu_zfetch_stream_remove(zf, zs);
|
dmu_zfetch_stream_remove(zf, zs);
|
||||||
|
@ -246,6 +271,7 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
|
||||||
}
|
}
|
||||||
if (zs_old) {
|
if (zs_old) {
|
||||||
zs = zs_old;
|
zs = zs_old;
|
||||||
|
list_remove(&zf->zf_stream, zs);
|
||||||
goto reuse;
|
goto reuse;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -255,21 +281,23 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
|
||||||
* for all the streams to be non-overlapping.
|
* for all the streams to be non-overlapping.
|
||||||
*/
|
*/
|
||||||
uint32_t max_streams = MAX(1, MIN(zfetch_max_streams,
|
uint32_t max_streams = MAX(1, MIN(zfetch_max_streams,
|
||||||
zf->zf_dnode->dn_maxblkid * zf->zf_dnode->dn_datablksz /
|
(zf->zf_dnode->dn_maxblkid << zf->zf_dnode->dn_datablkshift) /
|
||||||
zfetch_max_distance));
|
zfetch_max_distance));
|
||||||
if (zf->zf_numstreams >= max_streams) {
|
if (zf->zf_numstreams >= max_streams) {
|
||||||
t = now - SEC2NSEC(zfetch_min_sec_reap);
|
t = now - zfetch_min_sec_reap;
|
||||||
for (zs = list_head(&zf->zf_stream); zs != NULL;
|
for (zs = list_head(&zf->zf_stream); zs != NULL;
|
||||||
zs = list_next(&zf->zf_stream, zs)) {
|
zs = list_next(&zf->zf_stream, zs)) {
|
||||||
|
if ((int)(zs->zs_atime - t) >= 0)
|
||||||
|
continue;
|
||||||
if (zfs_refcount_count(&zs->zs_refs) != 1)
|
if (zfs_refcount_count(&zs->zs_refs) != 1)
|
||||||
continue;
|
continue;
|
||||||
if (zs->zs_atime > t)
|
if (zs_old == NULL ||
|
||||||
continue;
|
(int)(zs_old->zs_atime - zs->zs_atime) >= 0)
|
||||||
if (zs_old == NULL || zs->zs_atime < zs_old->zs_atime)
|
|
||||||
zs_old = zs;
|
zs_old = zs;
|
||||||
}
|
}
|
||||||
if (zs_old) {
|
if (zs_old) {
|
||||||
zs = zs_old;
|
zs = zs_old;
|
||||||
|
list_remove(&zf->zf_stream, zs);
|
||||||
goto reuse;
|
goto reuse;
|
||||||
}
|
}
|
||||||
ZFETCHSTAT_BUMP(zfetchstat_max_streams);
|
ZFETCHSTAT_BUMP(zfetchstat_max_streams);
|
||||||
|
@ -277,24 +305,24 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
|
||||||
}
|
}
|
||||||
|
|
||||||
zs = kmem_zalloc(sizeof (*zs), KM_SLEEP);
|
zs = kmem_zalloc(sizeof (*zs), KM_SLEEP);
|
||||||
zs->zs_fetch = zf;
|
|
||||||
zfs_refcount_create(&zs->zs_callers);
|
zfs_refcount_create(&zs->zs_callers);
|
||||||
zfs_refcount_create(&zs->zs_refs);
|
zfs_refcount_create(&zs->zs_refs);
|
||||||
/* One reference for zf_stream. */
|
/* One reference for zf_stream. */
|
||||||
zfs_refcount_add(&zs->zs_refs, NULL);
|
zfs_refcount_add(&zs->zs_refs, NULL);
|
||||||
zf->zf_numstreams++;
|
zf->zf_numstreams++;
|
||||||
list_insert_head(&zf->zf_stream, zs);
|
|
||||||
|
|
||||||
reuse:
|
reuse:
|
||||||
|
list_insert_head(&zf->zf_stream, zs);
|
||||||
zs->zs_blkid = blkid;
|
zs->zs_blkid = blkid;
|
||||||
|
/* Allow immediate stream reuse until first hit. */
|
||||||
|
zs->zs_atime = now - zfetch_min_sec_reap;
|
||||||
|
memset(zs->zs_ranges, 0, sizeof (zs->zs_ranges));
|
||||||
zs->zs_pf_dist = 0;
|
zs->zs_pf_dist = 0;
|
||||||
|
zs->zs_ipf_dist = 0;
|
||||||
zs->zs_pf_start = blkid;
|
zs->zs_pf_start = blkid;
|
||||||
zs->zs_pf_end = blkid;
|
zs->zs_pf_end = blkid;
|
||||||
zs->zs_ipf_dist = 0;
|
|
||||||
zs->zs_ipf_start = blkid;
|
zs->zs_ipf_start = blkid;
|
||||||
zs->zs_ipf_end = blkid;
|
zs->zs_ipf_end = blkid;
|
||||||
/* Allow immediate stream reuse until first hit. */
|
|
||||||
zs->zs_atime = now - SEC2NSEC(zfetch_min_sec_reap);
|
|
||||||
zs->zs_missed = B_FALSE;
|
zs->zs_missed = B_FALSE;
|
||||||
zs->zs_more = B_FALSE;
|
zs->zs_more = B_FALSE;
|
||||||
}
|
}
|
||||||
|
@ -311,6 +339,120 @@ dmu_zfetch_done(void *arg, uint64_t level, uint64_t blkid, boolean_t io_issued)
|
||||||
aggsum_add(&zfetch_sums.zfetchstat_io_active, -1);
|
aggsum_add(&zfetch_sums.zfetchstat_io_active, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Process stream hit access for nblks blocks starting at zs_blkid. Return
|
||||||
|
* number of blocks to proceed for after aggregation with future ranges.
|
||||||
|
*/
|
||||||
|
static uint64_t
|
||||||
|
dmu_zfetch_hit(zstream_t *zs, uint64_t nblks)
|
||||||
|
{
|
||||||
|
uint_t i, j;
|
||||||
|
|
||||||
|
/* Optimize sequential accesses (no future ranges). */
|
||||||
|
if (zs->zs_ranges[0].start == 0)
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
/* Look for intersections with further ranges. */
|
||||||
|
for (i = 0; i < ZFETCH_RANGES; i++) {
|
||||||
|
zsrange_t *r = &zs->zs_ranges[i];
|
||||||
|
if (r->start == 0 || r->start > nblks)
|
||||||
|
break;
|
||||||
|
if (r->end >= nblks) {
|
||||||
|
nblks = r->end;
|
||||||
|
i++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Delete all found intersecting ranges, updates remaining. */
|
||||||
|
for (j = 0; i < ZFETCH_RANGES; i++, j++) {
|
||||||
|
if (zs->zs_ranges[i].start == 0)
|
||||||
|
break;
|
||||||
|
ASSERT3U(zs->zs_ranges[i].start, >, nblks);
|
||||||
|
ASSERT3U(zs->zs_ranges[i].end, >, nblks);
|
||||||
|
zs->zs_ranges[j].start = zs->zs_ranges[i].start - nblks;
|
||||||
|
zs->zs_ranges[j].end = zs->zs_ranges[i].end - nblks;
|
||||||
|
}
|
||||||
|
if (j < ZFETCH_RANGES) {
|
||||||
|
zs->zs_ranges[j].start = 0;
|
||||||
|
zs->zs_ranges[j].end = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
|
zs->zs_blkid += nblks;
|
||||||
|
return (nblks);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Process future stream access for nblks blocks starting at blkid. Return
|
||||||
|
* number of blocks to proceed for if future ranges reach fill threshold.
|
||||||
|
*/
|
||||||
|
static uint64_t
|
||||||
|
dmu_zfetch_future(zstream_t *zs, uint64_t blkid, uint64_t nblks)
|
||||||
|
{
|
||||||
|
ASSERT3U(blkid, >, zs->zs_blkid);
|
||||||
|
blkid -= zs->zs_blkid;
|
||||||
|
ASSERT3U(blkid + nblks, <=, UINT16_MAX);
|
||||||
|
|
||||||
|
/* Search for first and last intersection or insert point. */
|
||||||
|
uint_t f = ZFETCH_RANGES, l = 0, i;
|
||||||
|
for (i = 0; i < ZFETCH_RANGES; i++) {
|
||||||
|
zsrange_t *r = &zs->zs_ranges[i];
|
||||||
|
if (r->start == 0 || r->start > blkid + nblks)
|
||||||
|
break;
|
||||||
|
if (r->end < blkid)
|
||||||
|
continue;
|
||||||
|
if (f > i)
|
||||||
|
f = i;
|
||||||
|
if (l < i)
|
||||||
|
l = i;
|
||||||
|
}
|
||||||
|
if (f <= l) {
|
||||||
|
/* Got some intersecting range, expand it if needed. */
|
||||||
|
if (zs->zs_ranges[f].start > blkid)
|
||||||
|
zs->zs_ranges[f].start = blkid;
|
||||||
|
zs->zs_ranges[f].end = MAX(zs->zs_ranges[l].end, blkid + nblks);
|
||||||
|
if (f < l) {
|
||||||
|
/* Got more than one intersection, remove others. */
|
||||||
|
for (f++, l++; l < ZFETCH_RANGES; f++, l++) {
|
||||||
|
zs->zs_ranges[f].start = zs->zs_ranges[l].start;
|
||||||
|
zs->zs_ranges[f].end = zs->zs_ranges[l].end;
|
||||||
|
}
|
||||||
|
zs->zs_ranges[ZFETCH_RANGES - 1].start = 0;
|
||||||
|
zs->zs_ranges[ZFETCH_RANGES - 1].end = 0;
|
||||||
|
}
|
||||||
|
} else if (i < ZFETCH_RANGES) {
|
||||||
|
/* Got no intersecting ranges, insert new one. */
|
||||||
|
for (l = ZFETCH_RANGES - 1; l > i; l--) {
|
||||||
|
zs->zs_ranges[l].start = zs->zs_ranges[l - 1].start;
|
||||||
|
zs->zs_ranges[l].end = zs->zs_ranges[l - 1].end;
|
||||||
|
}
|
||||||
|
zs->zs_ranges[i].start = blkid;
|
||||||
|
zs->zs_ranges[i].end = blkid + nblks;
|
||||||
|
} else {
|
||||||
|
/* No space left to insert. Drop the range. */
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if with the new access addition we reached fill threshold. */
|
||||||
|
if (zfetch_hole_shift >= 16)
|
||||||
|
return (0);
|
||||||
|
uint_t hole = 0;
|
||||||
|
for (i = f = l = 0; i < ZFETCH_RANGES; i++) {
|
||||||
|
zsrange_t *r = &zs->zs_ranges[i];
|
||||||
|
if (r->start == 0)
|
||||||
|
break;
|
||||||
|
hole += r->start - f;
|
||||||
|
f = r->end;
|
||||||
|
if (hole <= r->end >> zfetch_hole_shift)
|
||||||
|
l = r->end;
|
||||||
|
}
|
||||||
|
if (l > 0)
|
||||||
|
return (dmu_zfetch_hit(zs, l));
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is the predictive prefetch entry point. dmu_zfetch_prepare()
|
* This is the predictive prefetch entry point. dmu_zfetch_prepare()
|
||||||
* associates dnode access specified with blkid and nblks arguments with
|
* associates dnode access specified with blkid and nblks arguments with
|
||||||
|
@ -370,54 +512,93 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
|
||||||
mutex_enter(&zf->zf_lock);
|
mutex_enter(&zf->zf_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find matching prefetch stream. Depending on whether the accesses
|
* Find perfect prefetch stream. Depending on whether the accesses
|
||||||
* are block-aligned, first block of the new access may either follow
|
* are block-aligned, first block of the new access may either follow
|
||||||
* the last block of the previous access, or be equal to it.
|
* the last block of the previous access, or be equal to it.
|
||||||
*/
|
*/
|
||||||
|
unsigned int dbs = zf->zf_dnode->dn_datablkshift;
|
||||||
|
uint64_t end_blkid = blkid + nblks;
|
||||||
for (zs = list_head(&zf->zf_stream); zs != NULL;
|
for (zs = list_head(&zf->zf_stream); zs != NULL;
|
||||||
zs = list_next(&zf->zf_stream, zs)) {
|
zs = list_next(&zf->zf_stream, zs)) {
|
||||||
if (blkid == zs->zs_blkid) {
|
if (blkid == zs->zs_blkid) {
|
||||||
break;
|
goto hit;
|
||||||
} else if (blkid + 1 == zs->zs_blkid) {
|
} else if (blkid + 1 == zs->zs_blkid) {
|
||||||
blkid++;
|
blkid++;
|
||||||
nblks--;
|
nblks--;
|
||||||
break;
|
goto hit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the file is ending, remove the matching stream if found.
|
* Find close enough prefetch stream. Access crossing stream position
|
||||||
* If not found then it is too late to create a new one now.
|
* is a hit in its new part. Access ahead of stream position considered
|
||||||
|
* a hit for metadata prefetch, since we do not care about fill percent,
|
||||||
|
* or stored for future otherwise. Access behind stream position is
|
||||||
|
* silently ignored, since we already skipped it reaching fill percent.
|
||||||
*/
|
*/
|
||||||
uint64_t end_of_access_blkid = blkid + nblks;
|
uint_t max_reorder = MIN((zfetch_max_reorder >> dbs) + 1, UINT16_MAX);
|
||||||
if (end_of_access_blkid >= maxblkid) {
|
uint_t t = gethrestime_sec() - zfetch_max_sec_reap;
|
||||||
if (zs != NULL)
|
for (zs = list_head(&zf->zf_stream); zs != NULL;
|
||||||
dmu_zfetch_stream_remove(zf, zs);
|
zs = list_next(&zf->zf_stream, zs)) {
|
||||||
mutex_exit(&zf->zf_lock);
|
if (blkid > zs->zs_blkid) {
|
||||||
if (!have_lock)
|
if (end_blkid <= zs->zs_blkid + max_reorder) {
|
||||||
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
if (!fetch_data) {
|
||||||
return (NULL);
|
nblks = dmu_zfetch_hit(zs,
|
||||||
|
end_blkid - zs->zs_blkid);
|
||||||
|
ZFETCHSTAT_BUMP(zfetchstat_stride);
|
||||||
|
goto future;
|
||||||
|
}
|
||||||
|
nblks = dmu_zfetch_future(zs, blkid, nblks);
|
||||||
|
if (nblks > 0)
|
||||||
|
ZFETCHSTAT_BUMP(zfetchstat_stride);
|
||||||
|
else
|
||||||
|
ZFETCHSTAT_BUMP(zfetchstat_future);
|
||||||
|
goto future;
|
||||||
|
}
|
||||||
|
} else if (end_blkid >= zs->zs_blkid) {
|
||||||
|
nblks -= zs->zs_blkid - blkid;
|
||||||
|
blkid += zs->zs_blkid - blkid;
|
||||||
|
goto hit;
|
||||||
|
} else if (end_blkid + max_reorder > zs->zs_blkid &&
|
||||||
|
(int)(zs->zs_atime - t) >= 0) {
|
||||||
|
ZFETCHSTAT_BUMP(zfetchstat_past);
|
||||||
|
zs->zs_atime = gethrestime_sec();
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Exit if we already prefetched this block before. */
|
|
||||||
if (nblks == 0) {
|
|
||||||
mutex_exit(&zf->zf_lock);
|
|
||||||
if (!have_lock)
|
|
||||||
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (zs == NULL) {
|
|
||||||
/*
|
/*
|
||||||
* This access is not part of any existing stream. Create
|
* This access is not part of any existing stream. Create a new
|
||||||
* a new stream for it.
|
* stream for it unless we are at the end of file.
|
||||||
*/
|
*/
|
||||||
dmu_zfetch_stream_create(zf, end_of_access_blkid);
|
if (end_blkid < maxblkid)
|
||||||
|
dmu_zfetch_stream_create(zf, end_blkid);
|
||||||
mutex_exit(&zf->zf_lock);
|
mutex_exit(&zf->zf_lock);
|
||||||
if (!have_lock)
|
if (!have_lock)
|
||||||
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
||||||
ZFETCHSTAT_BUMP(zfetchstat_misses);
|
ZFETCHSTAT_BUMP(zfetchstat_misses);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
|
|
||||||
|
hit:
|
||||||
|
nblks = dmu_zfetch_hit(zs, nblks);
|
||||||
|
ZFETCHSTAT_BUMP(zfetchstat_hits);
|
||||||
|
|
||||||
|
future:
|
||||||
|
zs->zs_atime = gethrestime_sec();
|
||||||
|
|
||||||
|
/* Exit if we already prefetched for this position before. */
|
||||||
|
if (nblks == 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* If the file is ending, remove the stream. */
|
||||||
|
end_blkid = zs->zs_blkid;
|
||||||
|
if (end_blkid >= maxblkid) {
|
||||||
|
dmu_zfetch_stream_remove(zf, zs);
|
||||||
|
out:
|
||||||
|
mutex_exit(&zf->zf_lock);
|
||||||
|
if (!have_lock)
|
||||||
|
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
||||||
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -432,7 +613,6 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
|
||||||
* than ~6% of ARC held by active prefetches. It should help with
|
* than ~6% of ARC held by active prefetches. It should help with
|
||||||
* getting out of RAM on some badly mispredicted read patterns.
|
* getting out of RAM on some badly mispredicted read patterns.
|
||||||
*/
|
*/
|
||||||
unsigned int dbs = zf->zf_dnode->dn_datablkshift;
|
|
||||||
unsigned int nbytes = nblks << dbs;
|
unsigned int nbytes = nblks << dbs;
|
||||||
unsigned int pf_nblks;
|
unsigned int pf_nblks;
|
||||||
if (fetch_data) {
|
if (fetch_data) {
|
||||||
|
@ -452,10 +632,10 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
|
||||||
} else {
|
} else {
|
||||||
pf_nblks = 0;
|
pf_nblks = 0;
|
||||||
}
|
}
|
||||||
if (zs->zs_pf_start < end_of_access_blkid)
|
if (zs->zs_pf_start < end_blkid)
|
||||||
zs->zs_pf_start = end_of_access_blkid;
|
zs->zs_pf_start = end_blkid;
|
||||||
if (zs->zs_pf_end < end_of_access_blkid + pf_nblks)
|
if (zs->zs_pf_end < end_blkid + pf_nblks)
|
||||||
zs->zs_pf_end = end_of_access_blkid + pf_nblks;
|
zs->zs_pf_end = end_blkid + pf_nblks;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Do the same for indirects, starting where we will stop reading
|
* Do the same for indirects, starting where we will stop reading
|
||||||
|
@ -473,9 +653,6 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
|
||||||
if (zs->zs_ipf_end < zs->zs_pf_end + pf_nblks)
|
if (zs->zs_ipf_end < zs->zs_pf_end + pf_nblks)
|
||||||
zs->zs_ipf_end = zs->zs_pf_end + pf_nblks;
|
zs->zs_ipf_end = zs->zs_pf_end + pf_nblks;
|
||||||
|
|
||||||
zs->zs_blkid = end_of_access_blkid;
|
|
||||||
/* Protect the stream from reclamation. */
|
|
||||||
zs->zs_atime = gethrtime();
|
|
||||||
zfs_refcount_add(&zs->zs_refs, NULL);
|
zfs_refcount_add(&zs->zs_refs, NULL);
|
||||||
/* Count concurrent callers. */
|
/* Count concurrent callers. */
|
||||||
zfs_refcount_add(&zs->zs_callers, NULL);
|
zfs_refcount_add(&zs->zs_callers, NULL);
|
||||||
|
@ -483,15 +660,13 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
|
||||||
|
|
||||||
if (!have_lock)
|
if (!have_lock)
|
||||||
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
|
||||||
|
|
||||||
ZFETCHSTAT_BUMP(zfetchstat_hits);
|
|
||||||
return (zs);
|
return (zs);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock)
|
dmu_zfetch_run(zfetch_t *zf, zstream_t *zs, boolean_t missed,
|
||||||
|
boolean_t have_lock)
|
||||||
{
|
{
|
||||||
zfetch_t *zf = zs->zs_fetch;
|
|
||||||
int64_t pf_start, pf_end, ipf_start, ipf_end;
|
int64_t pf_start, pf_end, ipf_start, ipf_end;
|
||||||
int epbs, issued;
|
int epbs, issued;
|
||||||
|
|
||||||
|
@ -567,7 +742,7 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
|
||||||
|
|
||||||
zs = dmu_zfetch_prepare(zf, blkid, nblks, fetch_data, have_lock);
|
zs = dmu_zfetch_prepare(zf, blkid, nblks, fetch_data, have_lock);
|
||||||
if (zs)
|
if (zs)
|
||||||
dmu_zfetch_run(zs, missed, have_lock);
|
dmu_zfetch_run(zf, zs, missed, have_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
ZFS_MODULE_PARAM(zfs_prefetch, zfs_prefetch_, disable, INT, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs_prefetch, zfs_prefetch_, disable, INT, ZMOD_RW,
|
||||||
|
@ -590,3 +765,9 @@ ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW,
|
||||||
|
|
||||||
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_idistance, UINT, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_idistance, UINT, ZMOD_RW,
|
||||||
"Max bytes to prefetch indirects for per stream");
|
"Max bytes to prefetch indirects for per stream");
|
||||||
|
|
||||||
|
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_reorder, UINT, ZMOD_RW,
|
||||||
|
"Max request reorder distance within a stream");
|
||||||
|
|
||||||
|
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, hole_shift, UINT, ZMOD_RW,
|
||||||
|
"Max log2 fraction of holes in a stream");
|
||||||
|
|
|
@ -4086,13 +4086,16 @@ zio_vdev_io_done(zio_t *zio)
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT(zio->io_type == ZIO_TYPE_READ ||
|
ASSERT(zio->io_type == ZIO_TYPE_READ ||
|
||||||
zio->io_type == ZIO_TYPE_WRITE || zio->io_type == ZIO_TYPE_TRIM);
|
zio->io_type == ZIO_TYPE_WRITE ||
|
||||||
|
zio->io_type == ZIO_TYPE_IOCTL ||
|
||||||
|
zio->io_type == ZIO_TYPE_TRIM);
|
||||||
|
|
||||||
if (zio->io_delay)
|
if (zio->io_delay)
|
||||||
zio->io_delay = gethrtime() - zio->io_delay;
|
zio->io_delay = gethrtime() - zio->io_delay;
|
||||||
|
|
||||||
if (vd != NULL && vd->vdev_ops->vdev_op_leaf &&
|
if (vd != NULL && vd->vdev_ops->vdev_op_leaf &&
|
||||||
vd->vdev_ops != &vdev_draid_spare_ops) {
|
vd->vdev_ops != &vdev_draid_spare_ops) {
|
||||||
|
if (zio->io_type != ZIO_TYPE_IOCTL)
|
||||||
vdev_queue_io_done(zio);
|
vdev_queue_io_done(zio);
|
||||||
|
|
||||||
if (zio_injection_enabled && zio->io_error == 0)
|
if (zio_injection_enabled && zio->io_error == 0)
|
||||||
|
|
|
@ -364,10 +364,10 @@ zio_handle_device_injection_impl(vdev_t *vd, zio_t *zio, int err1, int err2)
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We skip over faults in the labels unless it's during
|
* We skip over faults in the labels unless it's during device open
|
||||||
* device open (i.e. zio == NULL).
|
* (i.e. zio == NULL) or a device flush (offset is meaningless)
|
||||||
*/
|
*/
|
||||||
if (zio != NULL) {
|
if (zio != NULL && zio->io_type != ZIO_TYPE_IOCTL) {
|
||||||
uint64_t offset = zio->io_offset;
|
uint64_t offset = zio->io_offset;
|
||||||
|
|
||||||
if (offset < VDEV_LABEL_START_SIZE ||
|
if (offset < VDEV_LABEL_START_SIZE ||
|
||||||
|
|
|
@ -153,6 +153,12 @@ tests = [ 'clean_mirror_001_pos', 'clean_mirror_002_pos',
|
||||||
'clean_mirror_003_pos', 'clean_mirror_004_pos']
|
'clean_mirror_003_pos', 'clean_mirror_004_pos']
|
||||||
tags = ['functional', 'clean_mirror']
|
tags = ['functional', 'clean_mirror']
|
||||||
|
|
||||||
|
[tests/functional/cli_root/zinject]
|
||||||
|
tests = ['zinject_args']
|
||||||
|
pre =
|
||||||
|
post =
|
||||||
|
tags = ['functional', 'cli_root', 'zinject']
|
||||||
|
|
||||||
[tests/functional/cli_root/zdb]
|
[tests/functional/cli_root/zdb]
|
||||||
tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos',
|
tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos',
|
||||||
'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos',
|
'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos',
|
||||||
|
|
|
@ -606,6 +606,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||||
functional/clean_mirror/clean_mirror_004_pos.ksh \
|
functional/clean_mirror/clean_mirror_004_pos.ksh \
|
||||||
functional/clean_mirror/cleanup.ksh \
|
functional/clean_mirror/cleanup.ksh \
|
||||||
functional/clean_mirror/setup.ksh \
|
functional/clean_mirror/setup.ksh \
|
||||||
|
functional/cli_root/zinject/zinject_args.ksh \
|
||||||
functional/cli_root/zdb/zdb_002_pos.ksh \
|
functional/cli_root/zdb/zdb_002_pos.ksh \
|
||||||
functional/cli_root/zdb/zdb_003_pos.ksh \
|
functional/cli_root/zdb/zdb_003_pos.ksh \
|
||||||
functional/cli_root/zdb/zdb_004_pos.ksh \
|
functional/cli_root/zdb/zdb_004_pos.ksh \
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright (c) 2024, Klara Inc.
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# TODO: this only checks that the set of valid device fault types. It should
|
||||||
|
# check all the other options, and that they work, and everything really.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
|
||||||
|
verify_runnable "global"
|
||||||
|
|
||||||
|
log_assert "Check zinject parameters."
|
||||||
|
|
||||||
|
log_onexit cleanup
|
||||||
|
|
||||||
|
DISK1=${DISKS%% *}
|
||||||
|
|
||||||
|
function cleanup
|
||||||
|
{
|
||||||
|
zinject -c all
|
||||||
|
default_cleanup_noexit
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_device_fault
|
||||||
|
{
|
||||||
|
typeset -a errno=("io" "decompress" "decrypt" "nxio" "dtl" "corrupt")
|
||||||
|
for e in ${errno[@]}; do
|
||||||
|
log_must eval \
|
||||||
|
"zinject -d $DISK1 -e $e -T read -f 0.001 $TESTPOOL"
|
||||||
|
done
|
||||||
|
zinject -c all
|
||||||
|
}
|
||||||
|
|
||||||
|
default_mirror_setup_noexit $DISKS
|
||||||
|
|
||||||
|
test_device_fault
|
||||||
|
|
||||||
|
log_pass "zinject parameters work as expected."
|
Loading…
Reference in New Issue