Merge branch 'zfs-2.2.3-staging' into truenas/zfs-2.2-release
Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
This commit is contained in:
commit
628e26fc0e
|
@ -168,7 +168,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg)
|
|||
(u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset,
|
||||
(u_longlong_t)lr->lr_length);
|
||||
|
||||
if (txtype == TX_WRITE2 || verbose < 5)
|
||||
if (txtype == TX_WRITE2 || verbose < 4)
|
||||
return;
|
||||
|
||||
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
|
||||
|
@ -178,6 +178,8 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg)
|
|||
"will claim" : "won't claim");
|
||||
print_log_bp(bp, tab_prefix);
|
||||
|
||||
if (verbose < 5)
|
||||
return;
|
||||
if (BP_IS_HOLE(bp)) {
|
||||
(void) printf("\t\t\tLSIZE 0x%llx\n",
|
||||
(u_longlong_t)BP_GET_LSIZE(bp));
|
||||
|
@ -202,6 +204,9 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg)
|
|||
if (error)
|
||||
goto out;
|
||||
} else {
|
||||
if (verbose < 5)
|
||||
return;
|
||||
|
||||
/* data is stored after the end of the lr_write record */
|
||||
data = abd_alloc(lr->lr_length, B_FALSE);
|
||||
abd_copy_from_buf(data, lr + 1, lr->lr_length);
|
||||
|
@ -217,6 +222,28 @@ out:
|
|||
abd_free(data);
|
||||
}
|
||||
|
||||
static void
|
||||
zil_prt_rec_write_enc(zilog_t *zilog, int txtype, const void *arg)
|
||||
{
|
||||
(void) txtype;
|
||||
const lr_write_t *lr = arg;
|
||||
const blkptr_t *bp = &lr->lr_blkptr;
|
||||
int verbose = MAX(dump_opt['d'], dump_opt['i']);
|
||||
|
||||
(void) printf("%s(encrypted)\n", tab_prefix);
|
||||
|
||||
if (verbose < 4)
|
||||
return;
|
||||
|
||||
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
|
||||
(void) printf("%shas blkptr, %s\n", tab_prefix,
|
||||
!BP_IS_HOLE(bp) &&
|
||||
bp->blk_birth >= spa_min_claim_txg(zilog->zl_spa) ?
|
||||
"will claim" : "won't claim");
|
||||
print_log_bp(bp, tab_prefix);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
zil_prt_rec_truncate(zilog_t *zilog, int txtype, const void *arg)
|
||||
{
|
||||
|
@ -312,11 +339,34 @@ zil_prt_rec_clone_range(zilog_t *zilog, int txtype, const void *arg)
|
|||
{
|
||||
(void) zilog, (void) txtype;
|
||||
const lr_clone_range_t *lr = arg;
|
||||
int verbose = MAX(dump_opt['d'], dump_opt['i']);
|
||||
|
||||
(void) printf("%sfoid %llu, offset %llx, length %llx, blksize %llx\n",
|
||||
tab_prefix, (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset,
|
||||
(u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blksz);
|
||||
|
||||
if (verbose < 4)
|
||||
return;
|
||||
|
||||
for (unsigned int i = 0; i < lr->lr_nbps; i++) {
|
||||
(void) printf("%s[%u/%llu] ", tab_prefix, i + 1,
|
||||
(u_longlong_t)lr->lr_nbps);
|
||||
print_log_bp(&lr->lr_bps[i], "");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
zil_prt_rec_clone_range_enc(zilog_t *zilog, int txtype, const void *arg)
|
||||
{
|
||||
(void) zilog, (void) txtype;
|
||||
const lr_clone_range_t *lr = arg;
|
||||
int verbose = MAX(dump_opt['d'], dump_opt['i']);
|
||||
|
||||
(void) printf("%s(encrypted)\n", tab_prefix);
|
||||
|
||||
if (verbose < 4)
|
||||
return;
|
||||
|
||||
for (unsigned int i = 0; i < lr->lr_nbps; i++) {
|
||||
(void) printf("%s[%u/%llu] ", tab_prefix, i + 1,
|
||||
(u_longlong_t)lr->lr_nbps);
|
||||
|
@ -327,6 +377,7 @@ zil_prt_rec_clone_range(zilog_t *zilog, int txtype, const void *arg)
|
|||
typedef void (*zil_prt_rec_func_t)(zilog_t *, int, const void *);
|
||||
typedef struct zil_rec_info {
|
||||
zil_prt_rec_func_t zri_print;
|
||||
zil_prt_rec_func_t zri_print_enc;
|
||||
const char *zri_name;
|
||||
uint64_t zri_count;
|
||||
} zil_rec_info_t;
|
||||
|
@ -341,7 +392,9 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
|
|||
{.zri_print = zil_prt_rec_remove, .zri_name = "TX_RMDIR "},
|
||||
{.zri_print = zil_prt_rec_link, .zri_name = "TX_LINK "},
|
||||
{.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME "},
|
||||
{.zri_print = zil_prt_rec_write, .zri_name = "TX_WRITE "},
|
||||
{.zri_print = zil_prt_rec_write,
|
||||
.zri_print_enc = zil_prt_rec_write_enc,
|
||||
.zri_name = "TX_WRITE "},
|
||||
{.zri_print = zil_prt_rec_truncate, .zri_name = "TX_TRUNCATE "},
|
||||
{.zri_print = zil_prt_rec_setattr, .zri_name = "TX_SETATTR "},
|
||||
{.zri_print = zil_prt_rec_acl, .zri_name = "TX_ACL_V0 "},
|
||||
|
@ -358,6 +411,7 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
|
|||
{.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_EXCHANGE "},
|
||||
{.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_WHITEOUT "},
|
||||
{.zri_print = zil_prt_rec_clone_range,
|
||||
.zri_print_enc = zil_prt_rec_clone_range_enc,
|
||||
.zri_name = "TX_CLONE_RANGE "},
|
||||
};
|
||||
|
||||
|
@ -384,6 +438,8 @@ print_log_record(zilog_t *zilog, const lr_t *lr, void *arg, uint64_t claim_txg)
|
|||
if (txtype && verbose >= 3) {
|
||||
if (!zilog->zl_os->os_encrypted) {
|
||||
zil_rec_info[txtype].zri_print(zilog, txtype, lr);
|
||||
} else if (zil_rec_info[txtype].zri_print_enc) {
|
||||
zil_rec_info[txtype].zri_print_enc(zilog, txtype, lr);
|
||||
} else {
|
||||
(void) printf("%s(encrypted)\n", tab_prefix);
|
||||
}
|
||||
|
|
|
@ -2,12 +2,15 @@ dnl #
|
|||
dnl # 4.9, current_time() added
|
||||
dnl # 4.18, return type changed from timespec to timespec64
|
||||
dnl #
|
||||
dnl # Note that we don't care about the return type in this check. If we have
|
||||
dnl # to implement a fallback, we'll know we're <4.9, which was timespec.
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_CURRENT_TIME], [
|
||||
ZFS_LINUX_TEST_SRC([current_time], [
|
||||
#include <linux/fs.h>
|
||||
], [
|
||||
struct inode ip __attribute__ ((unused));
|
||||
ip.i_atime = current_time(&ip);
|
||||
(void) current_time(&ip);
|
||||
])
|
||||
])
|
||||
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
dnl #
|
||||
dnl # Starting from Linux 5.13, flush_dcache_page() becomes an inline
|
||||
dnl # function and may indirectly referencing GPL-only cpu_feature_keys on
|
||||
dnl # powerpc
|
||||
dnl # function and may indirectly referencing GPL-only symbols:
|
||||
dnl # on powerpc: cpu_feature_keys
|
||||
dnl # on riscv: PageHuge (added from 6.2)
|
||||
dnl #
|
||||
|
||||
dnl #
|
||||
|
|
|
@ -79,6 +79,12 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
|
|||
__kernel_fpu_end();
|
||||
], [], [])
|
||||
|
||||
ZFS_LINUX_TEST_SRC([kernel_neon], [
|
||||
#include <asm/neon.h>
|
||||
], [
|
||||
kernel_neon_begin();
|
||||
kernel_neon_end();
|
||||
], [], [ZFS_META_LICENSE])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_FPU], [
|
||||
|
@ -105,9 +111,20 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [
|
|||
AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1,
|
||||
[kernel exports FPU functions])
|
||||
],[
|
||||
AC_MSG_RESULT(internal)
|
||||
AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1,
|
||||
[kernel fpu internal])
|
||||
dnl #
|
||||
dnl # ARM neon symbols (only on arm and arm64)
|
||||
dnl # could be GPL-only on arm64 after Linux 6.2
|
||||
dnl #
|
||||
ZFS_LINUX_TEST_RESULT([kernel_neon_license],[
|
||||
AC_MSG_RESULT(kernel_neon_*)
|
||||
AC_DEFINE(HAVE_KERNEL_NEON, 1,
|
||||
[kernel has kernel_neon_* functions])
|
||||
],[
|
||||
# catch-all
|
||||
AC_MSG_RESULT(internal)
|
||||
AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1,
|
||||
[kernel fpu internal])
|
||||
])
|
||||
])
|
||||
])
|
||||
])
|
||||
|
|
|
@ -52,6 +52,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [
|
|||
memset(&ip, 0, sizeof(ip));
|
||||
inode_set_ctime_to_ts(&ip, ts);
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 6.7 API change
|
||||
dnl # i_atime/i_mtime no longer directly accessible, must use
|
||||
dnl # inode_get_mtime(ip), inode_set_mtime*(ip) to
|
||||
dnl # read/write.
|
||||
dnl #
|
||||
ZFS_LINUX_TEST_SRC([inode_get_atime], [
|
||||
#include <linux/fs.h>
|
||||
],[
|
||||
struct inode ip;
|
||||
|
||||
memset(&ip, 0, sizeof(ip));
|
||||
inode_get_atime(&ip);
|
||||
])
|
||||
ZFS_LINUX_TEST_SRC([inode_get_mtime], [
|
||||
#include <linux/fs.h>
|
||||
],[
|
||||
struct inode ip;
|
||||
|
||||
memset(&ip, 0, sizeof(ip));
|
||||
inode_get_mtime(&ip);
|
||||
])
|
||||
|
||||
ZFS_LINUX_TEST_SRC([inode_set_atime_to_ts], [
|
||||
#include <linux/fs.h>
|
||||
],[
|
||||
struct inode ip;
|
||||
struct timespec64 ts = {0};
|
||||
|
||||
memset(&ip, 0, sizeof(ip));
|
||||
inode_set_atime_to_ts(&ip, ts);
|
||||
])
|
||||
ZFS_LINUX_TEST_SRC([inode_set_mtime_to_ts], [
|
||||
#include <linux/fs.h>
|
||||
],[
|
||||
struct inode ip;
|
||||
struct timespec64 ts = {0};
|
||||
|
||||
memset(&ip, 0, sizeof(ip));
|
||||
inode_set_mtime_to_ts(&ip, ts);
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [
|
||||
|
@ -90,4 +132,40 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [
|
|||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
|
||||
AC_MSG_CHECKING([whether inode_get_atime() exists])
|
||||
ZFS_LINUX_TEST_RESULT([inode_get_atime], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_INODE_GET_ATIME, 1,
|
||||
[inode_get_atime() exists in linux/fs.h])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
|
||||
AC_MSG_CHECKING([whether inode_set_atime_to_ts() exists])
|
||||
ZFS_LINUX_TEST_RESULT([inode_set_atime_to_ts], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_INODE_SET_ATIME_TO_TS, 1,
|
||||
[inode_set_atime_to_ts() exists in linux/fs.h])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
|
||||
AC_MSG_CHECKING([whether inode_get_mtime() exists])
|
||||
ZFS_LINUX_TEST_RESULT([inode_get_mtime], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_INODE_GET_MTIME, 1,
|
||||
[inode_get_mtime() exists in linux/fs.h])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
|
||||
AC_MSG_CHECKING([whether inode_set_mtime_to_ts() exists])
|
||||
ZFS_LINUX_TEST_RESULT([inode_set_mtime_to_ts], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_INODE_SET_MTIME_TO_TS, 1,
|
||||
[inode_set_mtime_to_ts() exists in linux/fs.h])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
|
|
|
@ -19,12 +19,44 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK], [
|
|||
],[])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 6.7 API change
|
||||
dnl # s_shrink is now a pointer.
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK_PTR], [
|
||||
ZFS_LINUX_TEST_SRC([super_block_s_shrink_ptr], [
|
||||
#include <linux/fs.h>
|
||||
unsigned long shrinker_cb(struct shrinker *shrink,
|
||||
struct shrink_control *sc) { return 0; }
|
||||
static struct shrinker shrinker = {
|
||||
.count_objects = shrinker_cb,
|
||||
.scan_objects = shrinker_cb,
|
||||
.seeks = DEFAULT_SEEKS,
|
||||
};
|
||||
static const struct super_block
|
||||
sb __attribute__ ((unused)) = {
|
||||
.s_shrink = &shrinker,
|
||||
};
|
||||
],[])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK], [
|
||||
AC_MSG_CHECKING([whether super_block has s_shrink])
|
||||
ZFS_LINUX_TEST_RESULT([super_block_s_shrink], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_SUPER_BLOCK_S_SHRINK, 1,
|
||||
[have super_block s_shrink])
|
||||
],[
|
||||
ZFS_LINUX_TEST_ERROR([sb->s_shrink()])
|
||||
AC_MSG_RESULT(no)
|
||||
AC_MSG_CHECKING([whether super_block has s_shrink pointer])
|
||||
ZFS_LINUX_TEST_RESULT([super_block_s_shrink_ptr], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_SUPER_BLOCK_S_SHRINK_PTR, 1,
|
||||
[have super_block s_shrink pointer])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
ZFS_LINUX_TEST_ERROR([sb->s_shrink()])
|
||||
])
|
||||
])
|
||||
])
|
||||
|
||||
|
@ -96,6 +128,25 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [
|
|||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 6.7 API change
|
||||
dnl # register_shrinker has been replaced by shrinker_register.
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER], [
|
||||
ZFS_LINUX_TEST_SRC([shrinker_register], [
|
||||
#include <linux/shrinker.h>
|
||||
unsigned long shrinker_cb(struct shrinker *shrink,
|
||||
struct shrink_control *sc) { return 0; }
|
||||
],[
|
||||
struct shrinker cache_shrinker = {
|
||||
.count_objects = shrinker_cb,
|
||||
.scan_objects = shrinker_cb,
|
||||
.seeks = DEFAULT_SEEKS,
|
||||
};
|
||||
shrinker_register(&cache_shrinker);
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[
|
||||
dnl #
|
||||
dnl # 6.0 API change
|
||||
|
@ -133,14 +184,36 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[
|
|||
dnl # cs->shrink() is logically split in to
|
||||
dnl # cs->count_objects() and cs->scan_objects()
|
||||
dnl #
|
||||
AC_MSG_CHECKING([if cs->count_objects callback exists])
|
||||
AC_MSG_CHECKING(
|
||||
[whether cs->count_objects callback exists])
|
||||
ZFS_LINUX_TEST_RESULT(
|
||||
[shrinker_cb_shrink_control_split],[
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1,
|
||||
[cs->count_objects exists])
|
||||
[shrinker_cb_shrink_control_split],[
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1,
|
||||
[cs->count_objects exists])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
|
||||
AC_MSG_CHECKING(
|
||||
[whether shrinker_register exists])
|
||||
ZFS_LINUX_TEST_RESULT([shrinker_register], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_SHRINKER_REGISTER, 1,
|
||||
[shrinker_register exists])
|
||||
|
||||
dnl # We assume that the split shrinker
|
||||
dnl # callback exists if
|
||||
dnl # shrinker_register() exists,
|
||||
dnl # because the latter is a much more
|
||||
dnl # recent addition, and the macro
|
||||
dnl # test for shrinker_register() only
|
||||
dnl # works if the callback is split
|
||||
AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK,
|
||||
1, [cs->count_objects exists])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
ZFS_LINUX_TEST_ERROR([shrinker])
|
||||
])
|
||||
])
|
||||
])
|
||||
])
|
||||
|
@ -174,10 +247,12 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT], [
|
|||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER], [
|
||||
ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK
|
||||
ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK_PTR
|
||||
ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID
|
||||
ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK
|
||||
ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT
|
||||
ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG
|
||||
ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SHRINKER], [
|
||||
|
|
|
@ -168,6 +168,9 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
|||
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
|
||||
ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE
|
||||
;;
|
||||
riscv*)
|
||||
ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_MSG_CHECKING([for available kernel interfaces])
|
||||
|
@ -310,6 +313,9 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
|||
ZFS_AC_KERNEL_CPU_HAS_FEATURE
|
||||
ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE
|
||||
;;
|
||||
riscv*)
|
||||
ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE
|
||||
;;
|
||||
esac
|
||||
])
|
||||
|
||||
|
|
|
@ -91,6 +91,12 @@
|
|||
#define param_set_max_auto_ashift_args(var) \
|
||||
CTLTYPE_UINT, NULL, 0, param_set_max_auto_ashift, "IU"
|
||||
|
||||
#define spa_taskq_read_param_set_args(var) \
|
||||
CTLTYPE_STRING, NULL, 0, spa_taskq_read_param, "A"
|
||||
|
||||
#define spa_taskq_write_param_set_args(var) \
|
||||
CTLTYPE_STRING, NULL, 0, spa_taskq_write_param, "A"
|
||||
|
||||
#define fletcher_4_param_set_args(var) \
|
||||
CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
|
||||
|
||||
|
|
|
@ -42,8 +42,8 @@
|
|||
/*
|
||||
* Starting from Linux 5.13, flush_dcache_page() becomes an inline function
|
||||
* and under some configurations, may indirectly referencing GPL-only
|
||||
* cpu_feature_keys on powerpc. Override this function when it is detected
|
||||
* being GPL-only.
|
||||
* symbols, e.g., cpu_feature_keys on powerpc and PageHuge on riscv.
|
||||
* Override this function when it is detected being GPL-only.
|
||||
*/
|
||||
#if defined __powerpc__ && defined HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY
|
||||
#include <linux/simd_powerpc.h>
|
||||
|
@ -53,6 +53,17 @@
|
|||
clear_bit(PG_dcache_clean, &(page)->flags); \
|
||||
} while (0)
|
||||
#endif
|
||||
/*
|
||||
* For riscv implementation, the use of PageHuge can be safely removed.
|
||||
* Because it handles pages allocated by HugeTLB, while flush_dcache_page
|
||||
* in zfs module is only called on kernel pages.
|
||||
*/
|
||||
#if defined __riscv && defined HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY
|
||||
#define flush_dcache_page(page) do { \
|
||||
if (test_bit(PG_dcache_clean, &(page)->flags)) \
|
||||
clear_bit(PG_dcache_clean, &(page)->flags); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 2.6.30 API change,
|
||||
|
|
|
@ -71,9 +71,15 @@
|
|||
#define ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 1, 0)
|
||||
#define ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0)
|
||||
|
||||
#if (defined(HAVE_KERNEL_NEON) && defined(CONFIG_KERNEL_MODE_NEON))
|
||||
#define kfpu_allowed() 1
|
||||
#define kfpu_begin() kernel_neon_begin()
|
||||
#define kfpu_end() kernel_neon_end()
|
||||
#else
|
||||
#define kfpu_allowed() 0
|
||||
#define kfpu_begin() do {} while (0)
|
||||
#define kfpu_end() do {} while (0)
|
||||
#endif
|
||||
#define kfpu_init() (0)
|
||||
#define kfpu_fini() do {} while (0)
|
||||
|
||||
|
|
|
@ -53,9 +53,15 @@
|
|||
#include <asm/elf.h>
|
||||
#include <asm/hwcap.h>
|
||||
|
||||
#if (defined(HAVE_KERNEL_NEON) && defined(CONFIG_KERNEL_MODE_NEON))
|
||||
#define kfpu_allowed() 1
|
||||
#define kfpu_begin() kernel_neon_begin()
|
||||
#define kfpu_end() kernel_neon_end()
|
||||
#else
|
||||
#define kfpu_allowed() 0
|
||||
#define kfpu_begin() do {} while (0)
|
||||
#define kfpu_end() do {} while (0)
|
||||
#endif
|
||||
#define kfpu_init() (0)
|
||||
#define kfpu_fini() do {} while (0)
|
||||
|
||||
|
|
|
@ -70,8 +70,6 @@ typedef enum kmem_cbrc {
|
|||
#define KMC_REAP_CHUNK INT_MAX
|
||||
#define KMC_DEFAULT_SEEKS 1
|
||||
|
||||
#define KMC_RECLAIM_ONCE 0x1 /* Force a single shrinker pass */
|
||||
|
||||
extern struct list_head spl_kmem_cache_list;
|
||||
extern struct rw_semaphore spl_kmem_cache_sem;
|
||||
|
||||
|
|
|
@ -29,12 +29,13 @@
|
|||
|
||||
/*
|
||||
* Due to frequent changes in the shrinker API the following
|
||||
* compatibility wrappers should be used. They are as follows:
|
||||
* compatibility wrapper should be used.
|
||||
*
|
||||
* SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost);
|
||||
* shrinker = spl_register_shrinker(name, countfunc, scanfunc, seek_cost);
|
||||
* spl_unregister_shrinker(shrinker);
|
||||
*
|
||||
* SPL_SHRINKER_DECLARE is used to declare a shrinker with the name varname,
|
||||
* which is passed to spl_register_shrinker()/spl_unregister_shrinker().
|
||||
* spl_register_shrinker is used to create and register a shrinker with the
|
||||
* given name.
|
||||
* The countfunc returns the number of free-able objects.
|
||||
* The scanfunc returns the number of objects that were freed.
|
||||
* The callbacks can return SHRINK_STOP if further calls can't make any more
|
||||
|
@ -57,57 +58,28 @@
|
|||
* ...scan objects in the cache and reclaim them...
|
||||
* }
|
||||
*
|
||||
* SPL_SHRINKER_DECLARE(my_shrinker, my_count, my_scan, DEFAULT_SEEKS);
|
||||
* static struct shrinker *my_shrinker;
|
||||
*
|
||||
* void my_init_func(void) {
|
||||
* spl_register_shrinker(&my_shrinker);
|
||||
* my_shrinker = spl_register_shrinker("my-shrinker",
|
||||
* my_count, my_scan, DEFAULT_SEEKS);
|
||||
* }
|
||||
*
|
||||
* void my_fini_func(void) {
|
||||
* spl_unregister_shrinker(my_shrinker);
|
||||
* }
|
||||
*/
|
||||
|
||||
#ifdef HAVE_REGISTER_SHRINKER_VARARG
|
||||
#define spl_register_shrinker(x) register_shrinker(x, "zfs-arc-shrinker")
|
||||
#else
|
||||
#define spl_register_shrinker(x) register_shrinker(x)
|
||||
#endif
|
||||
#define spl_unregister_shrinker(x) unregister_shrinker(x)
|
||||
typedef unsigned long (*spl_shrinker_cb)
|
||||
(struct shrinker *, struct shrink_control *);
|
||||
|
||||
/*
|
||||
* Linux 3.0 to 3.11 Shrinker API Compatibility.
|
||||
*/
|
||||
#if defined(HAVE_SINGLE_SHRINKER_CALLBACK)
|
||||
#define SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost) \
|
||||
static int \
|
||||
__ ## varname ## _wrapper(struct shrinker *shrink, struct shrink_control *sc)\
|
||||
{ \
|
||||
if (sc->nr_to_scan != 0) { \
|
||||
(void) scanfunc(shrink, sc); \
|
||||
} \
|
||||
return (countfunc(shrink, sc)); \
|
||||
} \
|
||||
\
|
||||
static struct shrinker varname = { \
|
||||
.shrink = __ ## varname ## _wrapper, \
|
||||
.seeks = seek_cost, \
|
||||
}
|
||||
struct shrinker *spl_register_shrinker(const char *name,
|
||||
spl_shrinker_cb countfunc, spl_shrinker_cb scanfunc, int seek_cost);
|
||||
void spl_unregister_shrinker(struct shrinker *);
|
||||
|
||||
#ifndef SHRINK_STOP
|
||||
/* 3.0-3.11 compatibility */
|
||||
#define SHRINK_STOP (-1)
|
||||
|
||||
/*
|
||||
* Linux 3.12 and later Shrinker API Compatibility.
|
||||
*/
|
||||
#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
|
||||
#define SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost) \
|
||||
static struct shrinker varname = { \
|
||||
.count_objects = countfunc, \
|
||||
.scan_objects = scanfunc, \
|
||||
.seeks = seek_cost, \
|
||||
}
|
||||
|
||||
#else
|
||||
/*
|
||||
* Linux 2.x to 2.6.22, or a newer shrinker API has been introduced.
|
||||
*/
|
||||
#error "Unknown shrinker callback"
|
||||
#endif
|
||||
|
||||
#endif /* SPL_SHRINKER_H */
|
||||
|
|
|
@ -282,5 +282,25 @@ extern long zpl_ioctl_fideduperange(struct file *filp, void *arg);
|
|||
#else
|
||||
#define zpl_inode_set_ctime_to_ts(ip, ts) (ip->i_ctime = ts)
|
||||
#endif
|
||||
#ifdef HAVE_INODE_GET_ATIME
|
||||
#define zpl_inode_get_atime(ip) inode_get_atime(ip)
|
||||
#else
|
||||
#define zpl_inode_get_atime(ip) (ip->i_atime)
|
||||
#endif
|
||||
#ifdef HAVE_INODE_SET_ATIME_TO_TS
|
||||
#define zpl_inode_set_atime_to_ts(ip, ts) inode_set_atime_to_ts(ip, ts)
|
||||
#else
|
||||
#define zpl_inode_set_atime_to_ts(ip, ts) (ip->i_atime = ts)
|
||||
#endif
|
||||
#ifdef HAVE_INODE_GET_MTIME
|
||||
#define zpl_inode_get_mtime(ip) inode_get_mtime(ip)
|
||||
#else
|
||||
#define zpl_inode_get_mtime(ip) (ip->i_mtime)
|
||||
#endif
|
||||
#ifdef HAVE_INODE_SET_MTIME_TO_TS
|
||||
#define zpl_inode_set_mtime_to_ts(ip, ts) inode_set_mtime_to_ts(ip, ts)
|
||||
#else
|
||||
#define zpl_inode_set_mtime_to_ts(ip, ts) (ip->i_mtime = ts)
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZPL_H */
|
||||
|
|
|
@ -71,6 +71,7 @@ typedef struct dataset_kstats {
|
|||
|
||||
int dataset_kstats_create(dataset_kstats_t *, objset_t *);
|
||||
void dataset_kstats_destroy(dataset_kstats_t *);
|
||||
void dataset_kstats_rename(dataset_kstats_t *dk, const char *);
|
||||
|
||||
void dataset_kstats_update_write_kstats(dataset_kstats_t *, int64_t);
|
||||
void dataset_kstats_update_read_kstats(dataset_kstats_t *, int64_t);
|
||||
|
|
|
@ -206,6 +206,7 @@ void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin,
|
|||
dmu_tx_t *tx);
|
||||
int dmu_objset_create_crypt_check(dsl_dir_t *parentdd,
|
||||
dsl_crypto_params_t *dcp, boolean_t *will_encrypt);
|
||||
boolean_t dmu_objset_crypto_key_equal(objset_t *osa, objset_t *osb);
|
||||
void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd,
|
||||
struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx);
|
||||
uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey,
|
||||
|
|
|
@ -64,6 +64,9 @@ libspl_assert(const char *buf, const char *file, const char *func, int line)
|
|||
#undef verify
|
||||
#endif
|
||||
|
||||
#define PANIC(fmt, a...) \
|
||||
libspl_assertf(__FILE__, __FUNCTION__, __LINE__, fmt, ## a)
|
||||
|
||||
#define VERIFY(cond) \
|
||||
(void) ((!(cond)) && \
|
||||
libspl_assert(#cond, __FILE__, __FUNCTION__, __LINE__))
|
||||
|
|
|
@ -31,14 +31,6 @@ for use by the kmem caches.
|
|||
For the majority of systems and workloads only a small number of threads are
|
||||
required.
|
||||
.
|
||||
.It Sy spl_kmem_cache_reclaim Ns = Ns Sy 0 Pq uint
|
||||
When this is set it prevents Linux from being able to rapidly reclaim all the
|
||||
memory held by the kmem caches.
|
||||
This may be useful in circumstances where it's preferable that Linux
|
||||
reclaim memory from some other subsystem first.
|
||||
Setting this will increase the likelihood out of memory events on a memory
|
||||
constrained system.
|
||||
.
|
||||
.It Sy spl_kmem_cache_obj_per_slab Ns = Ns Sy 8 Pq uint
|
||||
The preferred number of objects per slab in the cache.
|
||||
In general, a larger value will increase the caches memory footprint
|
||||
|
|
|
@ -2280,6 +2280,16 @@ If
|
|||
.Sy 0 ,
|
||||
generate a system-dependent value close to 6 threads per taskq.
|
||||
.
|
||||
.It Sy zio_taskq_read Ns = Ns Sy fixed,1,8 null scale null Pq charp
|
||||
Set the queue and thread configuration for the IO read queues.
|
||||
This is an advanced debugging parameter.
|
||||
Don't change this unless you understand what it does.
|
||||
.
|
||||
.It Sy zio_taskq_write Ns = Ns Sy batch fixed,1,5 scale fixed,1,5 Pq charp
|
||||
Set the queue and thread configuration for the IO write queues.
|
||||
This is an advanced debugging parameter.
|
||||
Don't change this unless you understand what it does.
|
||||
.
|
||||
.It Sy zvol_inhibit_dev Ns = Ns Sy 0 Ns | Ns 1 Pq uint
|
||||
Do not create zvol device nodes.
|
||||
This may slightly improve startup time on
|
||||
|
|
|
@ -364,9 +364,12 @@ When this feature is enabled ZFS will use block cloning for operations like
|
|||
Block cloning allows to create multiple references to a single block.
|
||||
It is much faster than copying the data (as the actual data is neither read nor
|
||||
written) and takes no additional space.
|
||||
Blocks can be cloned across datasets under some conditions (like disabled
|
||||
encryption and equal
|
||||
.Nm recordsize ) .
|
||||
Blocks can be cloned across datasets under some conditions (like equal
|
||||
.Nm recordsize ,
|
||||
the same master encryption key, etc.).
|
||||
ZFS tries its best to clone across datasets including encrypted ones.
|
||||
This is limited for various (nontrivial) reasons depending on the OS
|
||||
and/or ZFS internals.
|
||||
.Pp
|
||||
This feature becomes
|
||||
.Sy active
|
||||
|
|
|
@ -80,6 +80,7 @@ SPL_OBJS := \
|
|||
spl-kstat.o \
|
||||
spl-proc.o \
|
||||
spl-procfs-list.o \
|
||||
spl-shrinker.o \
|
||||
spl-taskq.o \
|
||||
spl-thread.o \
|
||||
spl-trace.o \
|
||||
|
|
|
@ -187,19 +187,18 @@ kstat_sysctl_dataset_string(SYSCTL_HANDLER_ARGS)
|
|||
static int
|
||||
kstat_sysctl_io(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
struct sbuf *sb;
|
||||
struct sbuf sb;
|
||||
kstat_t *ksp = arg1;
|
||||
kstat_io_t *kip = ksp->ks_data;
|
||||
int rc;
|
||||
|
||||
sb = sbuf_new_auto();
|
||||
if (sb == NULL)
|
||||
return (ENOMEM);
|
||||
sbuf_new_for_sysctl(&sb, NULL, 0, req);
|
||||
|
||||
/* Update the aggsums before reading */
|
||||
(void) ksp->ks_update(ksp, KSTAT_READ);
|
||||
|
||||
/* though wlentime & friends are signed, they will never be negative */
|
||||
sbuf_printf(sb,
|
||||
sbuf_printf(&sb,
|
||||
"%-8llu %-8llu %-8u %-8u %-8llu %-8llu "
|
||||
"%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n",
|
||||
kip->nread, kip->nwritten,
|
||||
|
@ -207,25 +206,21 @@ kstat_sysctl_io(SYSCTL_HANDLER_ARGS)
|
|||
kip->wtime, kip->wlentime, kip->wlastupdate,
|
||||
kip->rtime, kip->rlentime, kip->rlastupdate,
|
||||
kip->wcnt, kip->rcnt);
|
||||
rc = sbuf_finish(sb);
|
||||
if (rc == 0)
|
||||
rc = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
|
||||
sbuf_delete(sb);
|
||||
rc = sbuf_finish(&sb);
|
||||
sbuf_delete(&sb);
|
||||
return (rc);
|
||||
}
|
||||
|
||||
static int
|
||||
kstat_sysctl_raw(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
struct sbuf *sb;
|
||||
struct sbuf sb;
|
||||
void *data;
|
||||
kstat_t *ksp = arg1;
|
||||
void *(*addr_op)(kstat_t *ksp, loff_t index);
|
||||
int n, has_header, rc = 0;
|
||||
|
||||
sb = sbuf_new_auto();
|
||||
if (sb == NULL)
|
||||
return (ENOMEM);
|
||||
sbuf_new_for_sysctl(&sb, NULL, PAGE_SIZE, req);
|
||||
|
||||
if (ksp->ks_raw_ops.addr)
|
||||
addr_op = ksp->ks_raw_ops.addr;
|
||||
|
@ -258,8 +253,10 @@ restart_headers:
|
|||
if (has_header) {
|
||||
if (rc == ENOMEM && !kstat_resize_raw(ksp))
|
||||
goto restart_headers;
|
||||
if (rc == 0)
|
||||
sbuf_printf(sb, "\n%s", ksp->ks_raw_buf);
|
||||
if (rc == 0) {
|
||||
sbuf_cat(&sb, "\n");
|
||||
sbuf_cat(&sb, ksp->ks_raw_buf);
|
||||
}
|
||||
}
|
||||
|
||||
while ((data = addr_op(ksp, n)) != NULL) {
|
||||
|
@ -270,22 +267,19 @@ restart:
|
|||
if (rc == ENOMEM && !kstat_resize_raw(ksp))
|
||||
goto restart;
|
||||
if (rc == 0)
|
||||
sbuf_printf(sb, "%s", ksp->ks_raw_buf);
|
||||
sbuf_cat(&sb, ksp->ks_raw_buf);
|
||||
|
||||
} else {
|
||||
ASSERT3U(ksp->ks_ndata, ==, 1);
|
||||
sbuf_hexdump(sb, ksp->ks_data,
|
||||
sbuf_hexdump(&sb, ksp->ks_data,
|
||||
ksp->ks_data_size, NULL, 0);
|
||||
}
|
||||
n++;
|
||||
}
|
||||
free(ksp->ks_raw_buf, M_TEMP);
|
||||
mutex_exit(ksp->ks_lock);
|
||||
sbuf_trim(sb);
|
||||
rc = sbuf_finish(sb);
|
||||
if (rc == 0)
|
||||
rc = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
|
||||
sbuf_delete(sb);
|
||||
rc = sbuf_finish(&sb);
|
||||
sbuf_delete(&sb);
|
||||
return (rc);
|
||||
}
|
||||
|
||||
|
|
|
@ -1251,7 +1251,7 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
|
|||
iovec_t *dst_iovecs;
|
||||
zil_chain_t *zilc;
|
||||
lr_t *lr;
|
||||
uint64_t txtype, lr_len;
|
||||
uint64_t txtype, lr_len, nused;
|
||||
uint_t crypt_len, nr_iovecs, vec;
|
||||
uint_t aad_len = 0, total_len = 0;
|
||||
|
||||
|
@ -1268,7 +1268,10 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
|
|||
zilc = (zil_chain_t *)src;
|
||||
slrp = src + sizeof (zil_chain_t);
|
||||
aadp = aadbuf;
|
||||
blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
|
||||
nused = ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
|
||||
ASSERT3U(nused, >=, sizeof (zil_chain_t));
|
||||
ASSERT3U(nused, <=, datalen);
|
||||
blkend = src + nused;
|
||||
|
||||
/*
|
||||
* Calculate the number of encrypted iovecs we will need.
|
||||
|
@ -1287,6 +1290,8 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
|
|||
txtype = lr->lrc_txtype;
|
||||
lr_len = lr->lrc_reclen;
|
||||
}
|
||||
ASSERT3U(lr_len, >=, sizeof (lr_t));
|
||||
ASSERT3U(lr_len, <=, blkend - slrp);
|
||||
|
||||
nr_iovecs++;
|
||||
if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t))
|
||||
|
|
|
@ -1333,6 +1333,7 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
|
|||
}
|
||||
}
|
||||
strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
|
||||
dataset_kstats_rename(&zv->zv_kstat, newname);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -76,17 +76,6 @@ module_param(spl_kmem_cache_magazine_size, uint, 0444);
|
|||
MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
|
||||
"Default magazine size (2-256), set automatically (0)");
|
||||
|
||||
/*
|
||||
* The default behavior is to report the number of objects remaining in the
|
||||
* cache. This allows the Linux VM to repeatedly reclaim objects from the
|
||||
* cache when memory is low satisfy other memory allocations. Alternately,
|
||||
* setting this value to KMC_RECLAIM_ONCE limits how aggressively the cache
|
||||
* is reclaimed. This may increase the likelihood of out of memory events.
|
||||
*/
|
||||
static unsigned int spl_kmem_cache_reclaim = 0 /* KMC_RECLAIM_ONCE */;
|
||||
module_param(spl_kmem_cache_reclaim, uint, 0644);
|
||||
MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");
|
||||
|
||||
static unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;
|
||||
module_param(spl_kmem_cache_obj_per_slab, uint, 0644);
|
||||
MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
|
||||
* Copyright (C) 2007 The Regents of the University of California.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* UCRL-CODE-235197
|
||||
*
|
||||
* This file is part of the SPL, Solaris Porting Layer.
|
||||
*
|
||||
* The SPL is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* The SPL is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Solaris Porting Layer (SPL) Shrinker Implementation.
|
||||
*/
|
||||
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/shrinker.h>
|
||||
|
||||
#ifdef HAVE_SINGLE_SHRINKER_CALLBACK
|
||||
/* 3.0-3.11: single shrink() callback, which we wrap to carry both functions */
|
||||
struct spl_shrinker_wrap {
|
||||
struct shrinker shrinker;
|
||||
spl_shrinker_cb countfunc;
|
||||
spl_shrinker_cb scanfunc;
|
||||
};
|
||||
|
||||
static int
|
||||
spl_shrinker_single_cb(struct shrinker *shrinker, struct shrink_control *sc)
|
||||
{
|
||||
struct spl_shrinker_wrap *sw = (struct spl_shrinker_wrap *)shrinker;
|
||||
|
||||
if (sc->nr_to_scan != 0)
|
||||
(void) sw->scanfunc(&sw->shrinker, sc);
|
||||
return (sw->countfunc(&sw->shrinker, sc));
|
||||
}
|
||||
#endif
|
||||
|
||||
struct shrinker *
|
||||
spl_register_shrinker(const char *name, spl_shrinker_cb countfunc,
|
||||
spl_shrinker_cb scanfunc, int seek_cost)
|
||||
{
|
||||
struct shrinker *shrinker;
|
||||
|
||||
/* allocate shrinker */
|
||||
#if defined(HAVE_SHRINKER_REGISTER)
|
||||
/* 6.7: kernel will allocate the shrinker for us */
|
||||
shrinker = shrinker_alloc(0, name);
|
||||
#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
|
||||
/* 3.12-6.6: we allocate the shrinker */
|
||||
shrinker = kmem_zalloc(sizeof (struct shrinker), KM_SLEEP);
|
||||
#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK)
|
||||
/* 3.0-3.11: allocate a wrapper */
|
||||
struct spl_shrinker_wrap *sw =
|
||||
kmem_zalloc(sizeof (struct spl_shrinker_wrap), KM_SLEEP);
|
||||
shrinker = &sw->shrinker;
|
||||
#else
|
||||
/* 2.x-2.6.22, or a newer shrinker API has been introduced. */
|
||||
#error "Unknown shrinker API"
|
||||
#endif
|
||||
|
||||
if (shrinker == NULL)
|
||||
return (NULL);
|
||||
|
||||
/* set callbacks */
|
||||
#ifdef HAVE_SINGLE_SHRINKER_CALLBACK
|
||||
sw->countfunc = countfunc;
|
||||
sw->scanfunc = scanfunc;
|
||||
shrinker->shrink = spl_shrinker_single_cb;
|
||||
#else
|
||||
shrinker->count_objects = countfunc;
|
||||
shrinker->scan_objects = scanfunc;
|
||||
#endif
|
||||
|
||||
/* set params */
|
||||
shrinker->seeks = seek_cost;
|
||||
|
||||
/* register with kernel */
|
||||
#if defined(HAVE_SHRINKER_REGISTER)
|
||||
shrinker_register(shrinker);
|
||||
#elif defined(HAVE_REGISTER_SHRINKER_VARARG)
|
||||
register_shrinker(shrinker, name);
|
||||
#else
|
||||
register_shrinker(shrinker);
|
||||
#endif
|
||||
|
||||
return (shrinker);
|
||||
}
|
||||
EXPORT_SYMBOL(spl_register_shrinker);
|
||||
|
||||
void
|
||||
spl_unregister_shrinker(struct shrinker *shrinker)
|
||||
{
|
||||
#if defined(HAVE_SHRINKER_REGISTER)
|
||||
shrinker_free(shrinker);
|
||||
#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
|
||||
unregister_shrinker(shrinker);
|
||||
kmem_free(shrinker, sizeof (struct shrinker));
|
||||
#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK)
|
||||
unregister_shrinker(shrinker);
|
||||
kmem_free(shrinker, sizeof (struct spl_shrinker_wrap));
|
||||
#else
|
||||
#error "Unknown shrinker API"
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(spl_unregister_shrinker);
|
|
@ -247,8 +247,7 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
|
|||
return (sc->nr_to_scan);
|
||||
}
|
||||
|
||||
SPL_SHRINKER_DECLARE(arc_shrinker,
|
||||
arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS);
|
||||
static struct shrinker *arc_shrinker = NULL;
|
||||
|
||||
int
|
||||
arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
|
||||
|
@ -351,14 +350,18 @@ arc_lowmem_init(void)
|
|||
* reclaim from the arc. This is done to prevent kswapd from
|
||||
* swapping out pages when it is preferable to shrink the arc.
|
||||
*/
|
||||
spl_register_shrinker(&arc_shrinker);
|
||||
arc_shrinker = spl_register_shrinker("zfs-arc-shrinker",
|
||||
arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS);
|
||||
VERIFY(arc_shrinker);
|
||||
|
||||
arc_set_sys_free(allmem);
|
||||
}
|
||||
|
||||
void
|
||||
arc_lowmem_fini(void)
|
||||
{
|
||||
spl_unregister_shrinker(&arc_shrinker);
|
||||
spl_unregister_shrinker(arc_shrinker);
|
||||
arc_shrinker = NULL;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -85,7 +85,7 @@ static blk_mode_t
|
|||
#else
|
||||
static fmode_t
|
||||
#endif
|
||||
vdev_bdev_mode(spa_mode_t spa_mode)
|
||||
vdev_bdev_mode(spa_mode_t spa_mode, boolean_t exclusive)
|
||||
{
|
||||
#ifdef HAVE_BLK_MODE_T
|
||||
blk_mode_t mode = 0;
|
||||
|
@ -95,6 +95,9 @@ vdev_bdev_mode(spa_mode_t spa_mode)
|
|||
|
||||
if (spa_mode & SPA_MODE_WRITE)
|
||||
mode |= BLK_OPEN_WRITE;
|
||||
|
||||
if (exclusive)
|
||||
mode |= BLK_OPEN_EXCL;
|
||||
#else
|
||||
fmode_t mode = 0;
|
||||
|
||||
|
@ -103,6 +106,9 @@ vdev_bdev_mode(spa_mode_t spa_mode)
|
|||
|
||||
if (spa_mode & SPA_MODE_WRITE)
|
||||
mode |= FMODE_WRITE;
|
||||
|
||||
if (exclusive)
|
||||
mode |= FMODE_EXCL;
|
||||
#endif
|
||||
|
||||
return (mode);
|
||||
|
@ -225,10 +231,10 @@ vdev_blkdev_get_by_path(const char *path, spa_mode_t mode, void *holder,
|
|||
{
|
||||
#ifdef HAVE_BLKDEV_GET_BY_PATH_4ARG
|
||||
return (blkdev_get_by_path(path,
|
||||
vdev_bdev_mode(mode) | BLK_OPEN_EXCL, holder, hops));
|
||||
vdev_bdev_mode(mode, B_TRUE), holder, hops));
|
||||
#else
|
||||
return (blkdev_get_by_path(path,
|
||||
vdev_bdev_mode(mode) | FMODE_EXCL, holder));
|
||||
vdev_bdev_mode(mode, B_TRUE), holder));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -238,7 +244,7 @@ vdev_blkdev_put(struct block_device *bdev, spa_mode_t mode, void *holder)
|
|||
#ifdef HAVE_BLKDEV_PUT_HOLDER
|
||||
return (blkdev_put(bdev, holder));
|
||||
#else
|
||||
return (blkdev_put(bdev, vdev_bdev_mode(mode) | FMODE_EXCL));
|
||||
return (blkdev_put(bdev, vdev_bdev_mode(mode, B_TRUE)));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -248,9 +254,9 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
|
|||
{
|
||||
struct block_device *bdev;
|
||||
#ifdef HAVE_BLK_MODE_T
|
||||
blk_mode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
|
||||
blk_mode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa), B_FALSE);
|
||||
#else
|
||||
fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
|
||||
fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa), B_FALSE);
|
||||
#endif
|
||||
hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms);
|
||||
vdev_disk_t *vd;
|
||||
|
|
|
@ -520,8 +520,8 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
|
|||
ip->i_uid = SUID_TO_KUID(0);
|
||||
ip->i_gid = SGID_TO_KGID(0);
|
||||
ip->i_blkbits = SPA_MINBLOCKSHIFT;
|
||||
ip->i_atime = now;
|
||||
ip->i_mtime = now;
|
||||
zpl_inode_set_atime_to_ts(ip, now);
|
||||
zpl_inode_set_mtime_to_ts(ip, now);
|
||||
zpl_inode_set_ctime_to_ts(ip, now);
|
||||
ip->i_fop = fops;
|
||||
ip->i_op = ops;
|
||||
|
|
|
@ -1258,12 +1258,18 @@ zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
|
|||
* and inode caches. This can occur when the ARC needs to free meta data
|
||||
* blocks but can't because they are all pinned by entries in these caches.
|
||||
*/
|
||||
#if defined(HAVE_SUPER_BLOCK_S_SHRINK)
|
||||
#define S_SHRINK(sb) (&(sb)->s_shrink)
|
||||
#elif defined(HAVE_SUPER_BLOCK_S_SHRINK_PTR)
|
||||
#define S_SHRINK(sb) ((sb)->s_shrink)
|
||||
#endif
|
||||
|
||||
int
|
||||
zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = sb->s_fs_info;
|
||||
int error = 0;
|
||||
struct shrinker *shrinker = &sb->s_shrink;
|
||||
struct shrinker *shrinker = S_SHRINK(sb);
|
||||
struct shrink_control sc = {
|
||||
.nr_to_scan = nr_to_scan,
|
||||
.gfp_mask = GFP_KERNEL,
|
||||
|
@ -1275,7 +1281,7 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
|
|||
#if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \
|
||||
defined(SHRINK_CONTROL_HAS_NID) && \
|
||||
defined(SHRINKER_NUMA_AWARE)
|
||||
if (sb->s_shrink.flags & SHRINKER_NUMA_AWARE) {
|
||||
if (shrinker->flags & SHRINKER_NUMA_AWARE) {
|
||||
*objects = 0;
|
||||
for_each_online_node(sc.nid) {
|
||||
*objects += (*shrinker->scan_objects)(shrinker, &sc);
|
||||
|
|
|
@ -2464,15 +2464,17 @@ top:
|
|||
|
||||
if ((mask & ATTR_ATIME) || zp->z_atime_dirty) {
|
||||
zp->z_atime_dirty = B_FALSE;
|
||||
ZFS_TIME_ENCODE(&ip->i_atime, atime);
|
||||
inode_timespec_t tmp_atime;
|
||||
ZFS_TIME_ENCODE(&tmp_atime, atime);
|
||||
zpl_inode_set_atime_to_ts(ZTOI(zp), tmp_atime);
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
|
||||
&atime, sizeof (atime));
|
||||
}
|
||||
|
||||
if (mask & (ATTR_MTIME | ATTR_SIZE)) {
|
||||
ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
|
||||
ZTOI(zp)->i_mtime = zpl_inode_timestamp_truncate(
|
||||
vap->va_mtime, ZTOI(zp));
|
||||
zpl_inode_set_mtime_to_ts(ZTOI(zp),
|
||||
zpl_inode_timestamp_truncate(vap->va_mtime, ZTOI(zp)));
|
||||
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
|
||||
mtime, sizeof (mtime));
|
||||
|
@ -3686,7 +3688,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
|||
caddr_t va;
|
||||
int err = 0;
|
||||
uint64_t mtime[2], ctime[2];
|
||||
inode_timespec_t tmp_ctime;
|
||||
inode_timespec_t tmp_ts;
|
||||
sa_bulk_attr_t bulk[3];
|
||||
int cnt = 0;
|
||||
struct address_space *mapping;
|
||||
|
@ -3850,9 +3852,10 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
|||
&zp->z_pflags, 8);
|
||||
|
||||
/* Preserve the mtime and ctime provided by the inode */
|
||||
ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
|
||||
tmp_ctime = zpl_inode_get_ctime(ip);
|
||||
ZFS_TIME_ENCODE(&tmp_ctime, ctime);
|
||||
tmp_ts = zpl_inode_get_mtime(ip);
|
||||
ZFS_TIME_ENCODE(&tmp_ts, mtime);
|
||||
tmp_ts = zpl_inode_get_ctime(ip);
|
||||
ZFS_TIME_ENCODE(&tmp_ts, ctime);
|
||||
zp->z_atime_dirty = B_FALSE;
|
||||
zp->z_seq++;
|
||||
|
||||
|
@ -3902,7 +3905,7 @@ zfs_dirty_inode(struct inode *ip, int flags)
|
|||
zfsvfs_t *zfsvfs = ITOZSB(ip);
|
||||
dmu_tx_t *tx;
|
||||
uint64_t mode, atime[2], mtime[2], ctime[2];
|
||||
inode_timespec_t tmp_ctime;
|
||||
inode_timespec_t tmp_ts;
|
||||
sa_bulk_attr_t bulk[4];
|
||||
int error = 0;
|
||||
int cnt = 0;
|
||||
|
@ -3947,10 +3950,12 @@ zfs_dirty_inode(struct inode *ip, int flags)
|
|||
SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
|
||||
|
||||
/* Preserve the mode, mtime and ctime provided by the inode */
|
||||
ZFS_TIME_ENCODE(&ip->i_atime, atime);
|
||||
ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
|
||||
tmp_ctime = zpl_inode_get_ctime(ip);
|
||||
ZFS_TIME_ENCODE(&tmp_ctime, ctime);
|
||||
tmp_ts = zpl_inode_get_atime(ip);
|
||||
ZFS_TIME_ENCODE(&tmp_ts, atime);
|
||||
tmp_ts = zpl_inode_get_mtime(ip);
|
||||
ZFS_TIME_ENCODE(&tmp_ts, mtime);
|
||||
tmp_ts = zpl_inode_get_ctime(ip);
|
||||
ZFS_TIME_ENCODE(&tmp_ts, ctime);
|
||||
mode = ip->i_mode;
|
||||
|
||||
zp->z_mode = mode;
|
||||
|
@ -3993,7 +3998,9 @@ zfs_inactive(struct inode *ip)
|
|||
if (error) {
|
||||
dmu_tx_abort(tx);
|
||||
} else {
|
||||
ZFS_TIME_ENCODE(&ip->i_atime, atime);
|
||||
inode_timespec_t tmp_atime;
|
||||
tmp_atime = zpl_inode_get_atime(ip);
|
||||
ZFS_TIME_ENCODE(&tmp_atime, atime);
|
||||
mutex_enter(&zp->z_lock);
|
||||
(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
|
||||
(void *)&atime, sizeof (atime), tx);
|
||||
|
|
|
@ -542,7 +542,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
|
|||
uint64_t links;
|
||||
uint64_t z_uid, z_gid;
|
||||
uint64_t atime[2], mtime[2], ctime[2], btime[2];
|
||||
inode_timespec_t tmp_ctime;
|
||||
inode_timespec_t tmp_ts;
|
||||
uint64_t projid = ZFS_DEFAULT_PROJID;
|
||||
sa_bulk_attr_t bulk[12];
|
||||
int count = 0;
|
||||
|
@ -614,10 +614,12 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
|
|||
if (zp->z_pflags & ZFS_XATTR)
|
||||
zp->z_xattr_parent = parent;
|
||||
|
||||
ZFS_TIME_DECODE(&ip->i_atime, atime);
|
||||
ZFS_TIME_DECODE(&ip->i_mtime, mtime);
|
||||
ZFS_TIME_DECODE(&tmp_ctime, ctime);
|
||||
zpl_inode_set_ctime_to_ts(ip, tmp_ctime);
|
||||
ZFS_TIME_DECODE(&tmp_ts, atime);
|
||||
zpl_inode_set_atime_to_ts(ip, tmp_ts);
|
||||
ZFS_TIME_DECODE(&tmp_ts, mtime);
|
||||
zpl_inode_set_mtime_to_ts(ip, tmp_ts);
|
||||
ZFS_TIME_DECODE(&tmp_ts, ctime);
|
||||
zpl_inode_set_ctime_to_ts(ip, tmp_ts);
|
||||
ZFS_TIME_DECODE(&zp->z_btime, btime);
|
||||
|
||||
ip->i_ino = zp->z_id;
|
||||
|
@ -1197,7 +1199,7 @@ zfs_rezget(znode_t *zp)
|
|||
uint64_t gen;
|
||||
uint64_t z_uid, z_gid;
|
||||
uint64_t atime[2], mtime[2], ctime[2], btime[2];
|
||||
inode_timespec_t tmp_ctime;
|
||||
inode_timespec_t tmp_ts;
|
||||
uint64_t projid = ZFS_DEFAULT_PROJID;
|
||||
znode_hold_t *zh;
|
||||
|
||||
|
@ -1290,10 +1292,12 @@ zfs_rezget(znode_t *zp)
|
|||
zfs_uid_write(ZTOI(zp), z_uid);
|
||||
zfs_gid_write(ZTOI(zp), z_gid);
|
||||
|
||||
ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
|
||||
ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
|
||||
ZFS_TIME_DECODE(&tmp_ctime, ctime);
|
||||
zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime);
|
||||
ZFS_TIME_DECODE(&tmp_ts, atime);
|
||||
zpl_inode_set_atime_to_ts(ZTOI(zp), tmp_ts);
|
||||
ZFS_TIME_DECODE(&tmp_ts, mtime);
|
||||
zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts);
|
||||
ZFS_TIME_DECODE(&tmp_ts, ctime);
|
||||
zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts);
|
||||
ZFS_TIME_DECODE(&zp->z_btime, btime);
|
||||
|
||||
if ((uint32_t)gen != ZTOI(zp)->i_generation) {
|
||||
|
@ -1401,22 +1405,24 @@ zfs_zinactive(znode_t *zp)
|
|||
boolean_t
|
||||
zfs_relatime_need_update(const struct inode *ip)
|
||||
{
|
||||
inode_timespec_t now, tmp_ctime;
|
||||
inode_timespec_t now, tmp_atime, tmp_ts;
|
||||
|
||||
gethrestime(&now);
|
||||
tmp_atime = zpl_inode_get_atime(ip);
|
||||
/*
|
||||
* In relatime mode, only update the atime if the previous atime
|
||||
* is earlier than either the ctime or mtime or if at least a day
|
||||
* has passed since the last update of atime.
|
||||
*/
|
||||
if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0)
|
||||
tmp_ts = zpl_inode_get_mtime(ip);
|
||||
if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0)
|
||||
return (B_TRUE);
|
||||
|
||||
tmp_ctime = zpl_inode_get_ctime(ip);
|
||||
if (zfs_compare_timespec(&tmp_ctime, &ip->i_atime) >= 0)
|
||||
tmp_ts = zpl_inode_get_ctime(ip);
|
||||
if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0)
|
||||
return (B_TRUE);
|
||||
|
||||
if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60)
|
||||
if ((hrtime_t)now.tv_sec - (hrtime_t)tmp_atime.tv_sec >= 24*60*60)
|
||||
return (B_TRUE);
|
||||
|
||||
return (B_FALSE);
|
||||
|
@ -1439,7 +1445,7 @@ void
|
|||
zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
|
||||
uint64_t ctime[2])
|
||||
{
|
||||
inode_timespec_t now, tmp_ctime;
|
||||
inode_timespec_t now, tmp_ts;
|
||||
|
||||
gethrestime(&now);
|
||||
|
||||
|
@ -1447,7 +1453,8 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
|
|||
|
||||
if (flag & ATTR_MTIME) {
|
||||
ZFS_TIME_ENCODE(&now, mtime);
|
||||
ZFS_TIME_DECODE(&(ZTOI(zp)->i_mtime), mtime);
|
||||
ZFS_TIME_DECODE(&tmp_ts, mtime);
|
||||
zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts);
|
||||
if (ZTOZSB(zp)->z_use_fuids) {
|
||||
zp->z_pflags |= (ZFS_ARCHIVE |
|
||||
ZFS_AV_MODIFIED);
|
||||
|
@ -1456,8 +1463,8 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
|
|||
|
||||
if (flag & ATTR_CTIME) {
|
||||
ZFS_TIME_ENCODE(&now, ctime);
|
||||
ZFS_TIME_DECODE(&tmp_ctime, ctime);
|
||||
zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime);
|
||||
ZFS_TIME_DECODE(&tmp_ts, ctime);
|
||||
zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts);
|
||||
if (ZTOZSB(zp)->z_use_fuids)
|
||||
zp->z_pflags |= ZFS_ARCHIVE;
|
||||
}
|
||||
|
|
|
@ -1405,7 +1405,7 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
|
|||
boolean_t *no_crypt)
|
||||
{
|
||||
int ret;
|
||||
uint64_t txtype, lr_len;
|
||||
uint64_t txtype, lr_len, nused;
|
||||
uint_t nr_src, nr_dst, crypt_len;
|
||||
uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
|
||||
iovec_t *src_iovecs = NULL, *dst_iovecs = NULL;
|
||||
|
@ -1432,7 +1432,10 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
|
|||
zilc = (zil_chain_t *)src;
|
||||
slrp = src + sizeof (zil_chain_t);
|
||||
aadp = aadbuf;
|
||||
blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
|
||||
nused = ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
|
||||
ASSERT3U(nused, >=, sizeof (zil_chain_t));
|
||||
ASSERT3U(nused, <=, datalen);
|
||||
blkend = src + nused;
|
||||
|
||||
/* calculate the number of encrypted iovecs we will need */
|
||||
for (; slrp < blkend; slrp += lr_len) {
|
||||
|
@ -1445,6 +1448,8 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
|
|||
txtype = BSWAP_64(lr->lrc_txtype);
|
||||
lr_len = BSWAP_64(lr->lrc_reclen);
|
||||
}
|
||||
ASSERT3U(lr_len, >=, sizeof (lr_t));
|
||||
ASSERT3U(lr_len, <=, blkend - slrp);
|
||||
|
||||
nr_iovecs++;
|
||||
if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t))
|
||||
|
|
|
@ -526,7 +526,8 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
|
|||
vap->va_ctime = ia->ia_ctime;
|
||||
|
||||
if (vap->va_mask & ATTR_ATIME)
|
||||
ip->i_atime = zpl_inode_timestamp_truncate(ia->ia_atime, ip);
|
||||
zpl_inode_set_atime_to_ts(ip,
|
||||
zpl_inode_timestamp_truncate(ia->ia_atime, ip));
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
#ifdef HAVE_USERNS_IOPS_SETATTR
|
||||
|
|
|
@ -1528,6 +1528,8 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
|
|||
*/
|
||||
set_disk_ro(zv->zv_zso->zvo_disk, !readonly);
|
||||
set_disk_ro(zv->zv_zso->zvo_disk, readonly);
|
||||
|
||||
dataset_kstats_rename(&zv->zv_kstat, newname);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
104
module/zfs/abd.c
104
module/zfs/abd.c
|
@ -802,13 +802,10 @@ abd_iterate_func(abd_t *abd, size_t off, size_t size,
|
|||
abd_verify(abd);
|
||||
ASSERT3U(off + size, <=, abd->abd_size);
|
||||
|
||||
boolean_t gang = abd_is_gang(abd);
|
||||
abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off);
|
||||
|
||||
while (size > 0) {
|
||||
/* If we are at the end of the gang ABD we are done */
|
||||
if (gang && !c_abd)
|
||||
break;
|
||||
IMPLY(abd_is_gang(abd), c_abd != NULL);
|
||||
|
||||
abd_iter_map(&aiter);
|
||||
|
||||
|
@ -930,7 +927,6 @@ abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff,
|
|||
{
|
||||
int ret = 0;
|
||||
struct abd_iter daiter, saiter;
|
||||
boolean_t dabd_is_gang_abd, sabd_is_gang_abd;
|
||||
abd_t *c_dabd, *c_sabd;
|
||||
|
||||
if (size == 0)
|
||||
|
@ -942,16 +938,12 @@ abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff,
|
|||
ASSERT3U(doff + size, <=, dabd->abd_size);
|
||||
ASSERT3U(soff + size, <=, sabd->abd_size);
|
||||
|
||||
dabd_is_gang_abd = abd_is_gang(dabd);
|
||||
sabd_is_gang_abd = abd_is_gang(sabd);
|
||||
c_dabd = abd_init_abd_iter(dabd, &daiter, doff);
|
||||
c_sabd = abd_init_abd_iter(sabd, &saiter, soff);
|
||||
|
||||
while (size > 0) {
|
||||
/* if we are at the end of the gang ABD we are done */
|
||||
if ((dabd_is_gang_abd && !c_dabd) ||
|
||||
(sabd_is_gang_abd && !c_sabd))
|
||||
break;
|
||||
IMPLY(abd_is_gang(dabd), c_dabd != NULL);
|
||||
IMPLY(abd_is_gang(sabd), c_sabd != NULL);
|
||||
|
||||
abd_iter_map(&daiter);
|
||||
abd_iter_map(&saiter);
|
||||
|
@ -1032,66 +1024,40 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
|||
int i;
|
||||
ssize_t len, dlen;
|
||||
struct abd_iter caiters[3];
|
||||
struct abd_iter daiter = {0};
|
||||
struct abd_iter daiter;
|
||||
void *caddrs[3];
|
||||
unsigned long flags __maybe_unused = 0;
|
||||
abd_t *c_cabds[3];
|
||||
abd_t *c_dabd = NULL;
|
||||
boolean_t cabds_is_gang_abd[3];
|
||||
boolean_t dabd_is_gang_abd = B_FALSE;
|
||||
|
||||
ASSERT3U(parity, <=, 3);
|
||||
|
||||
for (i = 0; i < parity; i++) {
|
||||
cabds_is_gang_abd[i] = abd_is_gang(cabds[i]);
|
||||
abd_verify(cabds[i]);
|
||||
ASSERT3U(csize, <=, cabds[i]->abd_size);
|
||||
c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], 0);
|
||||
}
|
||||
|
||||
if (dabd) {
|
||||
dabd_is_gang_abd = abd_is_gang(dabd);
|
||||
ASSERT3S(dsize, >=, 0);
|
||||
if (dsize > 0) {
|
||||
ASSERT(dabd);
|
||||
abd_verify(dabd);
|
||||
ASSERT3U(dsize, <=, dabd->abd_size);
|
||||
c_dabd = abd_init_abd_iter(dabd, &daiter, 0);
|
||||
}
|
||||
|
||||
ASSERT3S(dsize, >=, 0);
|
||||
|
||||
abd_enter_critical(flags);
|
||||
while (csize > 0) {
|
||||
/* if we are at the end of the gang ABD we are done */
|
||||
if (dabd_is_gang_abd && !c_dabd)
|
||||
break;
|
||||
|
||||
len = csize;
|
||||
for (i = 0; i < parity; i++) {
|
||||
/*
|
||||
* If we are at the end of the gang ABD we are
|
||||
* done.
|
||||
*/
|
||||
if (cabds_is_gang_abd[i] && !c_cabds[i])
|
||||
break;
|
||||
IMPLY(abd_is_gang(cabds[i]), c_cabds[i] != NULL);
|
||||
abd_iter_map(&caiters[i]);
|
||||
caddrs[i] = caiters[i].iter_mapaddr;
|
||||
len = MIN(caiters[i].iter_mapsize, len);
|
||||
}
|
||||
|
||||
len = csize;
|
||||
|
||||
if (dabd && dsize > 0)
|
||||
if (dsize > 0) {
|
||||
IMPLY(abd_is_gang(dabd), c_dabd != NULL);
|
||||
abd_iter_map(&daiter);
|
||||
|
||||
switch (parity) {
|
||||
case 3:
|
||||
len = MIN(caiters[2].iter_mapsize, len);
|
||||
zfs_fallthrough;
|
||||
case 2:
|
||||
len = MIN(caiters[1].iter_mapsize, len);
|
||||
zfs_fallthrough;
|
||||
case 1:
|
||||
len = MIN(caiters[0].iter_mapsize, len);
|
||||
}
|
||||
|
||||
/* must be progressive */
|
||||
ASSERT3S(len, >, 0);
|
||||
|
||||
if (dabd && dsize > 0) {
|
||||
/* this needs precise iter.length */
|
||||
len = MIN(daiter.iter_mapsize, len);
|
||||
dlen = len;
|
||||
} else
|
||||
|
@ -1114,7 +1080,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
|||
&caiters[i], len);
|
||||
}
|
||||
|
||||
if (dabd && dsize > 0) {
|
||||
if (dsize > 0) {
|
||||
abd_iter_unmap(&daiter);
|
||||
c_dabd =
|
||||
abd_advance_abd_iter(dabd, c_dabd, &daiter,
|
||||
|
@ -1153,16 +1119,16 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
|
|||
struct abd_iter xiters[3];
|
||||
void *caddrs[3], *xaddrs[3];
|
||||
unsigned long flags __maybe_unused = 0;
|
||||
boolean_t cabds_is_gang_abd[3];
|
||||
boolean_t tabds_is_gang_abd[3];
|
||||
abd_t *c_cabds[3];
|
||||
abd_t *c_tabds[3];
|
||||
|
||||
ASSERT3U(parity, <=, 3);
|
||||
|
||||
for (i = 0; i < parity; i++) {
|
||||
cabds_is_gang_abd[i] = abd_is_gang(cabds[i]);
|
||||
tabds_is_gang_abd[i] = abd_is_gang(tabds[i]);
|
||||
abd_verify(cabds[i]);
|
||||
abd_verify(tabds[i]);
|
||||
ASSERT3U(tsize, <=, cabds[i]->abd_size);
|
||||
ASSERT3U(tsize, <=, tabds[i]->abd_size);
|
||||
c_cabds[i] =
|
||||
abd_init_abd_iter(cabds[i], &citers[i], 0);
|
||||
c_tabds[i] =
|
||||
|
@ -1171,36 +1137,18 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
|
|||
|
||||
abd_enter_critical(flags);
|
||||
while (tsize > 0) {
|
||||
|
||||
len = tsize;
|
||||
for (i = 0; i < parity; i++) {
|
||||
/*
|
||||
* If we are at the end of the gang ABD we
|
||||
* are done.
|
||||
*/
|
||||
if (cabds_is_gang_abd[i] && !c_cabds[i])
|
||||
break;
|
||||
if (tabds_is_gang_abd[i] && !c_tabds[i])
|
||||
break;
|
||||
IMPLY(abd_is_gang(cabds[i]), c_cabds[i] != NULL);
|
||||
IMPLY(abd_is_gang(tabds[i]), c_tabds[i] != NULL);
|
||||
abd_iter_map(&citers[i]);
|
||||
abd_iter_map(&xiters[i]);
|
||||
caddrs[i] = citers[i].iter_mapaddr;
|
||||
xaddrs[i] = xiters[i].iter_mapaddr;
|
||||
len = MIN(citers[i].iter_mapsize, len);
|
||||
len = MIN(xiters[i].iter_mapsize, len);
|
||||
}
|
||||
|
||||
len = tsize;
|
||||
switch (parity) {
|
||||
case 3:
|
||||
len = MIN(xiters[2].iter_mapsize, len);
|
||||
len = MIN(citers[2].iter_mapsize, len);
|
||||
zfs_fallthrough;
|
||||
case 2:
|
||||
len = MIN(xiters[1].iter_mapsize, len);
|
||||
len = MIN(citers[1].iter_mapsize, len);
|
||||
zfs_fallthrough;
|
||||
case 1:
|
||||
len = MIN(xiters[0].iter_mapsize, len);
|
||||
len = MIN(citers[0].iter_mapsize, len);
|
||||
}
|
||||
/* must be progressive */
|
||||
ASSERT3S(len, >, 0);
|
||||
/*
|
||||
|
|
|
@ -8042,9 +8042,8 @@ l2arc_write_size(l2arc_dev_t *dev)
|
|||
*/
|
||||
size = l2arc_write_max;
|
||||
if (size == 0) {
|
||||
cmn_err(CE_NOTE, "Bad value for l2arc_write_max, value must "
|
||||
"be greater than zero, resetting it to the default (%d)",
|
||||
L2ARC_WRITE_SIZE);
|
||||
cmn_err(CE_NOTE, "l2arc_write_max must be greater than zero, "
|
||||
"resetting it to the default (%d)", L2ARC_WRITE_SIZE);
|
||||
size = l2arc_write_max = L2ARC_WRITE_SIZE;
|
||||
}
|
||||
|
||||
|
@ -8067,30 +8066,9 @@ l2arc_write_size(l2arc_dev_t *dev)
|
|||
* device. This is important in l2arc_evict(), otherwise infinite
|
||||
* iteration can occur.
|
||||
*/
|
||||
if (size > dev->l2ad_end - dev->l2ad_start) {
|
||||
cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
|
||||
"plus the overhead of log blocks (persistent L2ARC, "
|
||||
"%llu bytes) exceeds the size of the cache device "
|
||||
"(guid %llu), resetting them to the default (%d)",
|
||||
(u_longlong_t)l2arc_log_blk_overhead(size, dev),
|
||||
(u_longlong_t)dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
|
||||
size = MIN(size, (dev->l2ad_end - dev->l2ad_start) / 4);
|
||||
|
||||
size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;
|
||||
|
||||
if (l2arc_trim_ahead > 1) {
|
||||
cmn_err(CE_NOTE, "l2arc_trim_ahead set to 1");
|
||||
l2arc_trim_ahead = 1;
|
||||
}
|
||||
|
||||
if (arc_warm == B_FALSE)
|
||||
size += l2arc_write_boost;
|
||||
|
||||
size += l2arc_log_blk_overhead(size, dev);
|
||||
if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
|
||||
size += MAX(64 * 1024 * 1024,
|
||||
(size * l2arc_trim_ahead) / 100);
|
||||
}
|
||||
}
|
||||
size = P2ROUNDUP(size, 1ULL << dev->l2ad_vdev->vdev_ashift);
|
||||
|
||||
return (size);
|
||||
|
||||
|
|
|
@ -157,10 +157,8 @@
|
|||
* (copying the file content to the new dataset and removing the source file).
|
||||
* In that case Block Cloning will only be used briefly, because the BRT entries
|
||||
* will be removed when the source is removed.
|
||||
* Note: currently it is not possible to clone blocks between encrypted
|
||||
* datasets, even if those datasets use the same encryption key (this includes
|
||||
* snapshots of encrypted datasets). Cloning blocks between datasets that use
|
||||
* the same keys should be possible and should be implemented in the future.
|
||||
* Block Cloning across encrypted datasets is supported as long as both
|
||||
* datasets share the same master key (e.g. snapshots and clones)
|
||||
*
|
||||
* Block Cloning flow through ZFS layers.
|
||||
*
|
||||
|
@ -344,7 +342,7 @@ brt_vdev_entcount_get(const brt_vdev_t *brtvd, uint64_t idx)
|
|||
|
||||
ASSERT3U(idx, <, brtvd->bv_size);
|
||||
|
||||
if (brtvd->bv_need_byteswap) {
|
||||
if (unlikely(brtvd->bv_need_byteswap)) {
|
||||
return (BSWAP_16(brtvd->bv_entcount[idx]));
|
||||
} else {
|
||||
return (brtvd->bv_entcount[idx]);
|
||||
|
@ -357,7 +355,7 @@ brt_vdev_entcount_set(brt_vdev_t *brtvd, uint64_t idx, uint16_t entcnt)
|
|||
|
||||
ASSERT3U(idx, <, brtvd->bv_size);
|
||||
|
||||
if (brtvd->bv_need_byteswap) {
|
||||
if (unlikely(brtvd->bv_need_byteswap)) {
|
||||
brtvd->bv_entcount[idx] = BSWAP_16(entcnt);
|
||||
} else {
|
||||
brtvd->bv_entcount[idx] = entcnt;
|
||||
|
@ -392,55 +390,39 @@ brt_vdev_entcount_dec(brt_vdev_t *brtvd, uint64_t idx)
|
|||
|
||||
#ifdef ZFS_DEBUG
|
||||
static void
|
||||
brt_vdev_dump(brt_t *brt)
|
||||
brt_vdev_dump(brt_vdev_t *brtvd)
|
||||
{
|
||||
brt_vdev_t *brtvd;
|
||||
uint64_t vdevid;
|
||||
uint64_t idx;
|
||||
|
||||
if ((zfs_flags & ZFS_DEBUG_BRT) == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (brt->brt_nvdevs == 0) {
|
||||
zfs_dbgmsg("BRT empty");
|
||||
return;
|
||||
}
|
||||
|
||||
zfs_dbgmsg("BRT vdev dump:");
|
||||
for (vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
|
||||
uint64_t idx;
|
||||
|
||||
brtvd = &brt->brt_vdevs[vdevid];
|
||||
zfs_dbgmsg(" vdevid=%llu/%llu meta_dirty=%d entcount_dirty=%d "
|
||||
"size=%llu totalcount=%llu nblocks=%llu bitmapsize=%zu\n",
|
||||
(u_longlong_t)vdevid, (u_longlong_t)brtvd->bv_vdevid,
|
||||
brtvd->bv_meta_dirty, brtvd->bv_entcount_dirty,
|
||||
(u_longlong_t)brtvd->bv_size,
|
||||
(u_longlong_t)brtvd->bv_totalcount,
|
||||
(u_longlong_t)brtvd->bv_nblocks,
|
||||
(size_t)BT_SIZEOFMAP(brtvd->bv_nblocks));
|
||||
if (brtvd->bv_totalcount > 0) {
|
||||
zfs_dbgmsg(" entcounts:");
|
||||
for (idx = 0; idx < brtvd->bv_size; idx++) {
|
||||
if (brt_vdev_entcount_get(brtvd, idx) > 0) {
|
||||
zfs_dbgmsg(" [%04llu] %hu",
|
||||
(u_longlong_t)idx,
|
||||
brt_vdev_entcount_get(brtvd, idx));
|
||||
}
|
||||
zfs_dbgmsg(" BRT vdevid=%llu meta_dirty=%d entcount_dirty=%d "
|
||||
"size=%llu totalcount=%llu nblocks=%llu bitmapsize=%zu\n",
|
||||
(u_longlong_t)brtvd->bv_vdevid,
|
||||
brtvd->bv_meta_dirty, brtvd->bv_entcount_dirty,
|
||||
(u_longlong_t)brtvd->bv_size,
|
||||
(u_longlong_t)brtvd->bv_totalcount,
|
||||
(u_longlong_t)brtvd->bv_nblocks,
|
||||
(size_t)BT_SIZEOFMAP(brtvd->bv_nblocks));
|
||||
if (brtvd->bv_totalcount > 0) {
|
||||
zfs_dbgmsg(" entcounts:");
|
||||
for (idx = 0; idx < brtvd->bv_size; idx++) {
|
||||
uint16_t entcnt = brt_vdev_entcount_get(brtvd, idx);
|
||||
if (entcnt > 0) {
|
||||
zfs_dbgmsg(" [%04llu] %hu",
|
||||
(u_longlong_t)idx, entcnt);
|
||||
}
|
||||
}
|
||||
if (brtvd->bv_entcount_dirty) {
|
||||
char *bitmap;
|
||||
}
|
||||
if (brtvd->bv_entcount_dirty) {
|
||||
char *bitmap;
|
||||
|
||||
bitmap = kmem_alloc(brtvd->bv_nblocks + 1, KM_SLEEP);
|
||||
for (idx = 0; idx < brtvd->bv_nblocks; idx++) {
|
||||
bitmap[idx] =
|
||||
BT_TEST(brtvd->bv_bitmap, idx) ? 'x' : '.';
|
||||
}
|
||||
bitmap[idx] = '\0';
|
||||
zfs_dbgmsg(" bitmap: %s", bitmap);
|
||||
kmem_free(bitmap, brtvd->bv_nblocks + 1);
|
||||
bitmap = kmem_alloc(brtvd->bv_nblocks + 1, KM_SLEEP);
|
||||
for (idx = 0; idx < brtvd->bv_nblocks; idx++) {
|
||||
bitmap[idx] =
|
||||
BT_TEST(brtvd->bv_bitmap, idx) ? 'x' : '.';
|
||||
}
|
||||
bitmap[idx] = '\0';
|
||||
zfs_dbgmsg(" dirty: %s", bitmap);
|
||||
kmem_free(bitmap, brtvd->bv_nblocks + 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -769,7 +751,8 @@ brt_vdev_addref(brt_t *brt, brt_vdev_t *brtvd, const brt_entry_t *bre,
|
|||
BT_SET(brtvd->bv_bitmap, idx);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
brt_vdev_dump(brt);
|
||||
if (zfs_flags & ZFS_DEBUG_BRT)
|
||||
brt_vdev_dump(brtvd);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -805,7 +788,8 @@ brt_vdev_decref(brt_t *brt, brt_vdev_t *brtvd, const brt_entry_t *bre,
|
|||
BT_SET(brtvd->bv_bitmap, idx);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
brt_vdev_dump(brt);
|
||||
if (zfs_flags & ZFS_DEBUG_BRT)
|
||||
brt_vdev_dump(brtvd);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -198,6 +198,18 @@ dataset_kstats_destroy(dataset_kstats_t *dk)
|
|||
zil_sums_fini(&dk->dk_zil_sums);
|
||||
}
|
||||
|
||||
void
|
||||
dataset_kstats_rename(dataset_kstats_t *dk, const char *name)
|
||||
{
|
||||
dataset_kstat_values_t *dkv = dk->dk_kstats->ks_data;
|
||||
char *ds_name;
|
||||
|
||||
ds_name = KSTAT_NAMED_STR_PTR(&dkv->dkv_ds_name);
|
||||
ASSERT3S(ds_name, !=, NULL);
|
||||
(void) strlcpy(ds_name, name,
|
||||
KSTAT_NAMED_STR_BUFLEN(&dkv->dkv_ds_name));
|
||||
}
|
||||
|
||||
void
|
||||
dataset_kstats_update_write_kstats(dataset_kstats_t *dk,
|
||||
int64_t nwritten)
|
||||
|
|
|
@ -1619,8 +1619,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
|
|||
*/
|
||||
if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bpp)) {
|
||||
spa_log_error(db->db_objset->os_spa, &zb, &bpp->blk_birth);
|
||||
zfs_panic_recover("unencrypted block in encrypted "
|
||||
"object set %llu", dmu_objset_id(db->db_objset));
|
||||
err = SET_ERROR(EIO);
|
||||
goto early_unlock;
|
||||
}
|
||||
|
@ -1925,7 +1923,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
|
|||
zio_free(db->db_objset->os_spa, txg, bp);
|
||||
|
||||
if (dr->dt.dl.dr_brtwrite) {
|
||||
ASSERT0P(dr->dt.dl.dr_data);
|
||||
ASSERT0(dr->dt.dl.dr_data);
|
||||
dr->dt.dl.dr_data = db->db_buf;
|
||||
}
|
||||
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
|
||||
|
|
|
@ -1482,9 +1482,9 @@ dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
|
|||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
blkid = dbuf_whichblock(dn, 0, offset);
|
||||
db = dbuf_hold(dn, blkid, FTAG);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
if (db == NULL)
|
||||
return (SET_ERROR(EIO));
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
/*
|
||||
* We can only assign if the offset is aligned and the arc buf is the
|
||||
|
|
|
@ -1124,8 +1124,6 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||
if (sta->os->os_encrypted &&
|
||||
!BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) {
|
||||
spa_log_error(spa, zb, &bp->blk_birth);
|
||||
zfs_panic_recover("unencrypted block in encrypted "
|
||||
"object set %llu", dmu_objset_id(sta->os));
|
||||
return (SET_ERROR(EIO));
|
||||
}
|
||||
|
||||
|
|
|
@ -266,6 +266,40 @@ spa_crypto_key_compare(const void *a, const void *b)
|
|||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* this compares a crypto key based on zk_guid. See comment on
|
||||
* spa_crypto_key_compare for more information.
|
||||
*/
|
||||
boolean_t
|
||||
dmu_objset_crypto_key_equal(objset_t *osa, objset_t *osb)
|
||||
{
|
||||
dsl_crypto_key_t *dcka = NULL;
|
||||
dsl_crypto_key_t *dckb = NULL;
|
||||
uint64_t obja, objb;
|
||||
boolean_t equal;
|
||||
spa_t *spa;
|
||||
|
||||
spa = dmu_objset_spa(osa);
|
||||
if (spa != dmu_objset_spa(osb))
|
||||
return (B_FALSE);
|
||||
obja = dmu_objset_ds(osa)->ds_object;
|
||||
objb = dmu_objset_ds(osb)->ds_object;
|
||||
|
||||
if (spa_keystore_lookup_key(spa, obja, FTAG, &dcka) != 0)
|
||||
return (B_FALSE);
|
||||
if (spa_keystore_lookup_key(spa, objb, FTAG, &dckb) != 0) {
|
||||
spa_keystore_dsl_key_rele(spa, dcka, FTAG);
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
equal = (dcka->dck_key.zk_guid == dckb->dck_key.zk_guid);
|
||||
|
||||
spa_keystore_dsl_key_rele(spa, dcka, FTAG);
|
||||
spa_keystore_dsl_key_rele(spa, dckb, FTAG);
|
||||
|
||||
return (equal);
|
||||
}
|
||||
|
||||
static int
|
||||
spa_key_mapping_compare(const void *a, const void *b)
|
||||
{
|
||||
|
|
|
@ -1000,8 +1000,6 @@ livelist_compare(const void *larg, const void *rarg)
|
|||
/* if vdevs are equal, sort by offsets. */
|
||||
uint64_t l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]);
|
||||
uint64_t r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]);
|
||||
if (l_dva0_offset == r_dva0_offset)
|
||||
ASSERT3U(l->blk_birth, ==, r->blk_birth);
|
||||
return (TREE_CMP(l_dva0_offset, r_dva0_offset));
|
||||
}
|
||||
|
||||
|
@ -1016,9 +1014,9 @@ struct livelist_iter_arg {
|
|||
* and used to match up ALLOC/FREE pairs. ALLOC'd blkptrs without a
|
||||
* corresponding FREE are stored in the supplied bplist.
|
||||
*
|
||||
* Note that multiple FREE and ALLOC entries for the same blkptr may
|
||||
* be encountered when dedup is involved. For this reason we keep a
|
||||
* refcount for all the FREE entries of each blkptr and ensure that
|
||||
* Note that multiple FREE and ALLOC entries for the same blkptr may be
|
||||
* encountered when dedup or block cloning is involved. For this reason we
|
||||
* keep a refcount for all the FREE entries of each blkptr and ensure that
|
||||
* each of those FREE entries has a corresponding ALLOC preceding it.
|
||||
*/
|
||||
static int
|
||||
|
@ -1037,6 +1035,13 @@ dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed,
|
|||
livelist_entry_t node;
|
||||
node.le_bp = *bp;
|
||||
livelist_entry_t *found = avl_find(avl, &node, NULL);
|
||||
if (found) {
|
||||
ASSERT3U(BP_GET_PSIZE(bp), ==, BP_GET_PSIZE(&found->le_bp));
|
||||
ASSERT3U(BP_GET_CHECKSUM(bp), ==,
|
||||
BP_GET_CHECKSUM(&found->le_bp));
|
||||
ASSERT3U(BP_PHYSICAL_BIRTH(bp), ==,
|
||||
BP_PHYSICAL_BIRTH(&found->le_bp));
|
||||
}
|
||||
if (bp_freed) {
|
||||
if (found == NULL) {
|
||||
/* first free entry for this blkptr */
|
||||
|
@ -1046,10 +1051,10 @@ dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed,
|
|||
e->le_refcnt = 1;
|
||||
avl_add(avl, e);
|
||||
} else {
|
||||
/* dedup block free */
|
||||
ASSERT(BP_GET_DEDUP(bp));
|
||||
ASSERT3U(BP_GET_CHECKSUM(bp), ==,
|
||||
BP_GET_CHECKSUM(&found->le_bp));
|
||||
/*
|
||||
* Deduped or cloned block free. We could assert D bit
|
||||
* for dedup, but there is no such one for cloning.
|
||||
*/
|
||||
ASSERT3U(found->le_refcnt + 1, >, found->le_refcnt);
|
||||
found->le_refcnt++;
|
||||
}
|
||||
|
@ -1065,14 +1070,6 @@ dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed,
|
|||
/* all tracked free pairs have been matched */
|
||||
avl_remove(avl, found);
|
||||
kmem_free(found, sizeof (livelist_entry_t));
|
||||
} else {
|
||||
/*
|
||||
* This is definitely a deduped blkptr so
|
||||
* let's validate it.
|
||||
*/
|
||||
ASSERT(BP_GET_DEDUP(bp));
|
||||
ASSERT3U(BP_GET_CHECKSUM(bp), ==,
|
||||
BP_GET_CHECKSUM(&found->le_bp));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
280
module/zfs/spa.c
280
module/zfs/spa.c
|
@ -151,7 +151,7 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
|
|||
* and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
|
||||
* need to be handled with minimum delay.
|
||||
*/
|
||||
static const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
|
||||
static zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
|
||||
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
|
||||
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
|
||||
{ ZTI_N(8), ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* READ */
|
||||
|
@ -1164,6 +1164,275 @@ spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
|
|||
tqs->stqs_taskq = NULL;
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* The READ and WRITE rows of zio_taskqs are configurable at module load time
|
||||
* by setting zio_taskq_read or zio_taskq_write.
|
||||
*
|
||||
* Example (the defaults for READ and WRITE)
|
||||
* zio_taskq_read='fixed,1,8 null scale null'
|
||||
* zio_taskq_write='batch fixed,1,5 scale fixed,1,5'
|
||||
*
|
||||
* Each sets the entire row at a time.
|
||||
*
|
||||
* 'fixed' is parameterised: fixed,Q,T where Q is number of taskqs, T is number
|
||||
* of threads per taskq.
|
||||
*
|
||||
* 'null' can only be set on the high-priority queues (queue selection for
|
||||
* high-priority queues will fall back to the regular queue if the high-pri
|
||||
* is NULL.
|
||||
*/
|
||||
static const char *const modes[ZTI_NMODES] = {
|
||||
"fixed", "batch", "scale", "null"
|
||||
};
|
||||
|
||||
/* Parse the incoming config string. Modifies cfg */
|
||||
static int
|
||||
spa_taskq_param_set(zio_type_t t, char *cfg)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
zio_taskq_info_t row[ZIO_TASKQ_TYPES] = {{0}};
|
||||
|
||||
char *next = cfg, *tok, *c;
|
||||
|
||||
/*
|
||||
* Parse out each element from the string and fill `row`. The entire
|
||||
* row has to be set at once, so any errors are flagged by just
|
||||
* breaking out of this loop early.
|
||||
*/
|
||||
uint_t q;
|
||||
for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
|
||||
/* `next` is the start of the config */
|
||||
if (next == NULL)
|
||||
break;
|
||||
|
||||
/* Eat up leading space */
|
||||
while (isspace(*next))
|
||||
next++;
|
||||
if (*next == '\0')
|
||||
break;
|
||||
|
||||
/* Mode ends at space or end of string */
|
||||
tok = next;
|
||||
next = strchr(tok, ' ');
|
||||
if (next != NULL) *next++ = '\0';
|
||||
|
||||
/* Parameters start after a comma */
|
||||
c = strchr(tok, ',');
|
||||
if (c != NULL) *c++ = '\0';
|
||||
|
||||
/* Match mode string */
|
||||
uint_t mode;
|
||||
for (mode = 0; mode < ZTI_NMODES; mode++)
|
||||
if (strcmp(tok, modes[mode]) == 0)
|
||||
break;
|
||||
if (mode == ZTI_NMODES)
|
||||
break;
|
||||
|
||||
/* Invalid canary */
|
||||
row[q].zti_mode = ZTI_NMODES;
|
||||
|
||||
/* Per-mode setup */
|
||||
switch (mode) {
|
||||
|
||||
/*
|
||||
* FIXED is parameterised: number of queues, and number of
|
||||
* threads per queue.
|
||||
*/
|
||||
case ZTI_MODE_FIXED: {
|
||||
/* No parameters? */
|
||||
if (c == NULL || *c == '\0')
|
||||
break;
|
||||
|
||||
/* Find next parameter */
|
||||
tok = c;
|
||||
c = strchr(tok, ',');
|
||||
if (c == NULL)
|
||||
break;
|
||||
|
||||
/* Take digits and convert */
|
||||
unsigned long long nq;
|
||||
if (!(isdigit(*tok)))
|
||||
break;
|
||||
err = ddi_strtoull(tok, &tok, 10, &nq);
|
||||
/* Must succeed and also end at the next param sep */
|
||||
if (err != 0 || tok != c)
|
||||
break;
|
||||
|
||||
/* Move past the comma */
|
||||
tok++;
|
||||
/* Need another number */
|
||||
if (!(isdigit(*tok)))
|
||||
break;
|
||||
/* Remember start to make sure we moved */
|
||||
c = tok;
|
||||
|
||||
/* Take digits */
|
||||
unsigned long long ntpq;
|
||||
err = ddi_strtoull(tok, &tok, 10, &ntpq);
|
||||
/* Must succeed, and moved forward */
|
||||
if (err != 0 || tok == c || *tok != '\0')
|
||||
break;
|
||||
|
||||
/*
|
||||
* sanity; zero queues/threads make no sense, and
|
||||
* 16K is almost certainly more than anyone will ever
|
||||
* need and avoids silly numbers like UINT32_MAX
|
||||
*/
|
||||
if (nq == 0 || nq >= 16384 ||
|
||||
ntpq == 0 || ntpq >= 16384)
|
||||
break;
|
||||
|
||||
const zio_taskq_info_t zti = ZTI_P(ntpq, nq);
|
||||
row[q] = zti;
|
||||
break;
|
||||
}
|
||||
|
||||
case ZTI_MODE_BATCH: {
|
||||
const zio_taskq_info_t zti = ZTI_BATCH;
|
||||
row[q] = zti;
|
||||
break;
|
||||
}
|
||||
|
||||
case ZTI_MODE_SCALE: {
|
||||
const zio_taskq_info_t zti = ZTI_SCALE;
|
||||
row[q] = zti;
|
||||
break;
|
||||
}
|
||||
|
||||
case ZTI_MODE_NULL: {
|
||||
/*
|
||||
* Can only null the high-priority queues; the general-
|
||||
* purpose ones have to exist.
|
||||
*/
|
||||
if (q != ZIO_TASKQ_ISSUE_HIGH &&
|
||||
q != ZIO_TASKQ_INTERRUPT_HIGH)
|
||||
break;
|
||||
|
||||
const zio_taskq_info_t zti = ZTI_NULL;
|
||||
row[q] = zti;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Ensure we set a mode */
|
||||
if (row[q].zti_mode == ZTI_NMODES)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Didn't get a full row, fail */
|
||||
if (q < ZIO_TASKQ_TYPES)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
/* Eat trailing space */
|
||||
if (next != NULL)
|
||||
while (isspace(*next))
|
||||
next++;
|
||||
|
||||
/* If there's anything left over then fail */
|
||||
if (next != NULL && *next != '\0')
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
/* Success! Copy it into the real config */
|
||||
for (q = 0; q < ZIO_TASKQ_TYPES; q++)
|
||||
zio_taskqs[t][q] = row[q];
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
spa_taskq_param_get(zio_type_t t, char *buf, boolean_t add_newline)
|
||||
{
|
||||
int pos = 0;
|
||||
|
||||
/* Build paramater string from live config */
|
||||
const char *sep = "";
|
||||
for (uint_t q = 0; q < ZIO_TASKQ_TYPES; q++) {
|
||||
const zio_taskq_info_t *zti = &zio_taskqs[t][q];
|
||||
if (zti->zti_mode == ZTI_MODE_FIXED)
|
||||
pos += sprintf(&buf[pos], "%s%s,%u,%u", sep,
|
||||
modes[zti->zti_mode], zti->zti_count,
|
||||
zti->zti_value);
|
||||
else
|
||||
pos += sprintf(&buf[pos], "%s%s", sep,
|
||||
modes[zti->zti_mode]);
|
||||
sep = " ";
|
||||
}
|
||||
|
||||
if (add_newline)
|
||||
buf[pos++] = '\n';
|
||||
buf[pos] = '\0';
|
||||
|
||||
return (pos);
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
static int
|
||||
spa_taskq_read_param_set(const char *val, zfs_kernel_param_t *kp)
|
||||
{
|
||||
char *cfg = kmem_strdup(val);
|
||||
int err = spa_taskq_param_set(ZIO_TYPE_READ, cfg);
|
||||
kmem_free(cfg, strlen(val)+1);
|
||||
return (-err);
|
||||
}
|
||||
static int
|
||||
spa_taskq_read_param_get(char *buf, zfs_kernel_param_t *kp)
|
||||
{
|
||||
return (spa_taskq_param_get(ZIO_TYPE_READ, buf, TRUE));
|
||||
}
|
||||
|
||||
static int
|
||||
spa_taskq_write_param_set(const char *val, zfs_kernel_param_t *kp)
|
||||
{
|
||||
char *cfg = kmem_strdup(val);
|
||||
int err = spa_taskq_param_set(ZIO_TYPE_WRITE, cfg);
|
||||
kmem_free(cfg, strlen(val)+1);
|
||||
return (-err);
|
||||
}
|
||||
static int
|
||||
spa_taskq_write_param_get(char *buf, zfs_kernel_param_t *kp)
|
||||
{
|
||||
return (spa_taskq_param_get(ZIO_TYPE_WRITE, buf, TRUE));
|
||||
}
|
||||
#else
|
||||
/*
|
||||
* On FreeBSD load-time parameters can be set up before malloc() is available,
|
||||
* so we have to do all the parsing work on the stack.
|
||||
*/
|
||||
#define SPA_TASKQ_PARAM_MAX (128)
|
||||
|
||||
static int
|
||||
spa_taskq_read_param(ZFS_MODULE_PARAM_ARGS)
|
||||
{
|
||||
char buf[SPA_TASKQ_PARAM_MAX];
|
||||
int err;
|
||||
|
||||
(void) spa_taskq_param_get(ZIO_TYPE_READ, buf, FALSE);
|
||||
err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
|
||||
if (err || req->newptr == NULL)
|
||||
return (err);
|
||||
return (spa_taskq_param_set(ZIO_TYPE_READ, buf));
|
||||
}
|
||||
|
||||
static int
|
||||
spa_taskq_write_param(ZFS_MODULE_PARAM_ARGS)
|
||||
{
|
||||
char buf[SPA_TASKQ_PARAM_MAX];
|
||||
int err;
|
||||
|
||||
(void) spa_taskq_param_get(ZIO_TYPE_WRITE, buf, FALSE);
|
||||
err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
|
||||
if (err || req->newptr == NULL)
|
||||
return (err);
|
||||
return (spa_taskq_param_set(ZIO_TYPE_WRITE, buf));
|
||||
}
|
||||
#endif
|
||||
#endif /* _KERNEL */
|
||||
|
||||
/*
|
||||
* Dispatch a task to the appropriate taskq for the ZFS I/O type and priority.
|
||||
* Note that a type may have multiple discrete taskqs to avoid lock contention
|
||||
|
@ -10210,4 +10479,13 @@ ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, new_alloc, INT,
|
|||
ZMOD_RW,
|
||||
"Whether extra ALLOC blkptrs were added to a livelist entry while it "
|
||||
"was being condensed");
|
||||
|
||||
#ifdef _KERNEL
|
||||
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_read,
|
||||
spa_taskq_read_param_set, spa_taskq_read_param_get, ZMOD_RD,
|
||||
"Configure IO queues for read IO");
|
||||
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_write,
|
||||
spa_taskq_write_param_set, spa_taskq_write_param_get, ZMOD_RD,
|
||||
"Configure IO queues for write IO");
|
||||
#endif
|
||||
/* END CSTYLED */
|
||||
|
|
|
@ -309,6 +309,8 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap)
|
|||
uint64_t dnodesize;
|
||||
int error;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lracl));
|
||||
|
||||
txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
|
||||
if (byteswap) {
|
||||
byteswap_uint64_array(lracl, sizeof (*lracl));
|
||||
|
@ -470,6 +472,8 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
|
|||
uint64_t dnodesize;
|
||||
int error;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr));
|
||||
|
||||
txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
|
||||
if (byteswap) {
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
@ -613,6 +617,8 @@ zfs_replay_remove(void *arg1, void *arg2, boolean_t byteswap)
|
|||
int error;
|
||||
int vflg = 0;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr));
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
|
@ -648,6 +654,8 @@ zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap)
|
|||
int error;
|
||||
int vflg = 0;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr));
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
|
@ -715,12 +723,14 @@ zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
|
|||
{
|
||||
zfsvfs_t *zfsvfs = arg1;
|
||||
lr_rename_t *lr = arg2;
|
||||
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
|
||||
char *tname = sname + strlen(sname) + 1;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr));
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
|
||||
char *tname = sname + strlen(sname) + 1;
|
||||
return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, 0, NULL));
|
||||
}
|
||||
|
||||
|
@ -730,12 +740,14 @@ zfs_replay_rename_exchange(void *arg1, void *arg2, boolean_t byteswap)
|
|||
#ifdef __linux__
|
||||
zfsvfs_t *zfsvfs = arg1;
|
||||
lr_rename_t *lr = arg2;
|
||||
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
|
||||
char *tname = sname + strlen(sname) + 1;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr));
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
|
||||
char *tname = sname + strlen(sname) + 1;
|
||||
return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, RENAME_EXCHANGE,
|
||||
NULL));
|
||||
#else
|
||||
|
@ -750,14 +762,13 @@ zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap)
|
|||
zfsvfs_t *zfsvfs = arg1;
|
||||
lr_rename_whiteout_t *lr = arg2;
|
||||
int error;
|
||||
/* sname and tname follow lr_rename_whiteout_t */
|
||||
char *sname = (char *)(lr + 1);
|
||||
char *tname = sname + strlen(sname) + 1;
|
||||
/* For the whiteout file. */
|
||||
xvattr_t xva;
|
||||
uint64_t objid;
|
||||
uint64_t dnodesize;
|
||||
|
||||
ASSERT3U(lr->lr_rename.lr_common.lrc_reclen, >, sizeof (*lr));
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
|
@ -783,6 +794,9 @@ zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap)
|
|||
if (error)
|
||||
return (error);
|
||||
|
||||
/* sname and tname follow lr_rename_whiteout_t */
|
||||
char *sname = (char *)(lr + 1);
|
||||
char *tname = sname + strlen(sname) + 1;
|
||||
return (do_zfs_replay_rename(zfsvfs, &lr->lr_rename, sname, tname,
|
||||
RENAME_WHITEOUT, &xva.xva_vattr));
|
||||
#else
|
||||
|
@ -800,6 +814,8 @@ zfs_replay_write(void *arg1, void *arg2, boolean_t byteswap)
|
|||
int error;
|
||||
uint64_t eod, offset, length;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
|
@ -863,6 +879,8 @@ zfs_replay_write2(void *arg1, void *arg2, boolean_t byteswap)
|
|||
int error;
|
||||
uint64_t end;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
|
@ -910,6 +928,8 @@ zfs_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
|
|||
flock64_t fl = {0};
|
||||
int error;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
|
@ -940,6 +960,8 @@ zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap)
|
|||
int error;
|
||||
void *start;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
|
||||
|
||||
xva_init(&xva);
|
||||
if (byteswap) {
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
@ -1002,6 +1024,9 @@ zfs_replay_setsaxattr(void *arg1, void *arg2, boolean_t byteswap)
|
|||
size_t size;
|
||||
int error = 0;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr) + lr->lr_size);
|
||||
|
||||
ASSERT(spa_feature_is_active(zfsvfs->z_os->os_spa,
|
||||
SPA_FEATURE_ZILSAXATTR));
|
||||
if (byteswap)
|
||||
|
@ -1079,6 +1104,10 @@ zfs_replay_acl_v0(void *arg1, void *arg2, boolean_t byteswap)
|
|||
znode_t *zp;
|
||||
int error;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr) +
|
||||
sizeof (ace_t) * lr->lr_aclcnt);
|
||||
|
||||
if (byteswap) {
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
zfs_oldace_byteswap(ace, lr->lr_aclcnt);
|
||||
|
@ -1124,6 +1153,9 @@ zfs_replay_acl(void *arg1, void *arg2, boolean_t byteswap)
|
|||
znode_t *zp;
|
||||
int error;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr) + lr->lr_acl_bytes);
|
||||
|
||||
if (byteswap) {
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE);
|
||||
|
@ -1171,6 +1203,10 @@ zfs_replay_clone_range(void *arg1, void *arg2, boolean_t byteswap)
|
|||
znode_t *zp;
|
||||
int error;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >=, offsetof(lr_clone_range_t,
|
||||
lr_bps[lr->lr_nbps]));
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
#include <sys/fs/zfs.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dsl_crypt.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/dbuf.h>
|
||||
|
@ -1103,6 +1104,16 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
|||
return (SET_ERROR(EXDEV));
|
||||
}
|
||||
|
||||
/*
|
||||
* Cloning across encrypted datasets is possible only if they
|
||||
* share the same master key.
|
||||
*/
|
||||
if (inos != outos && inos->os_encrypted &&
|
||||
!dmu_objset_crypto_key_equal(inos, outos)) {
|
||||
zfs_exit_two(inzfsvfs, outzfsvfs, FTAG);
|
||||
return (SET_ERROR(EXDEV));
|
||||
}
|
||||
|
||||
error = zfs_verify_zp(inzp);
|
||||
if (error == 0)
|
||||
error = zfs_verify_zp(outzp);
|
||||
|
@ -1181,11 +1192,18 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
|||
inblksz = inzp->z_blksz;
|
||||
|
||||
/*
|
||||
* We cannot clone into files with different block size if we can't
|
||||
* grow it (block size is already bigger or more than one block).
|
||||
* We cannot clone into a file with different block size if we can't
|
||||
* grow it (block size is already bigger, has more than one block, or
|
||||
* not locked for growth). There are other possible reasons for the
|
||||
* grow to fail, but we cover what we can before opening transaction
|
||||
* and the rest detect after we try to do it.
|
||||
*/
|
||||
if (inblksz < outzp->z_blksz) {
|
||||
error = SET_ERROR(EINVAL);
|
||||
goto unlock;
|
||||
}
|
||||
if (inblksz != outzp->z_blksz && (outzp->z_size > outzp->z_blksz ||
|
||||
outzp->z_size > inblksz)) {
|
||||
outlr->lr_length != UINT64_MAX)) {
|
||||
error = SET_ERROR(EINVAL);
|
||||
goto unlock;
|
||||
}
|
||||
|
@ -1286,20 +1304,6 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
|||
*/
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* Encrypted data is fine as long as it comes from the same
|
||||
* dataset.
|
||||
* TODO: We want to extend it in the future to allow cloning to
|
||||
* datasets with the same keys, like clones or to be able to
|
||||
* clone a file from a snapshot of an encrypted dataset into the
|
||||
* dataset itself.
|
||||
*/
|
||||
if (BP_IS_PROTECTED(&bps[0])) {
|
||||
if (inzfsvfs != outzfsvfs) {
|
||||
error = SET_ERROR(EXDEV);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Start a transaction.
|
||||
|
@ -1318,12 +1322,24 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
|||
}
|
||||
|
||||
/*
|
||||
* Copy source znode's block size. This only happens on the
|
||||
* first iteration since zfs_rangelock_reduce() will shrink down
|
||||
* lr_len to the appropriate size.
|
||||
* Copy source znode's block size. This is done only if the
|
||||
* whole znode is locked (see zfs_rangelock_cb()) and only
|
||||
* on the first iteration since zfs_rangelock_reduce() will
|
||||
* shrink down lr_length to the appropriate size.
|
||||
*/
|
||||
if (outlr->lr_length == UINT64_MAX) {
|
||||
zfs_grow_blocksize(outzp, inblksz, tx);
|
||||
|
||||
/*
|
||||
* Block growth may fail for many reasons we can not
|
||||
* predict here. If it happen the cloning is doomed.
|
||||
*/
|
||||
if (inblksz != outzp->z_blksz) {
|
||||
error = SET_ERROR(EINVAL);
|
||||
dmu_tx_abort(tx);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Round range lock up to the block boundary, so we
|
||||
* prevent appends until we are done.
|
||||
|
|
|
@ -522,6 +522,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
|
|||
lr_t *lr = (lr_t *)lrp;
|
||||
reclen = lr->lrc_reclen;
|
||||
ASSERT3U(reclen, >=, sizeof (lr_t));
|
||||
ASSERT3U(reclen, <=, end - lrp);
|
||||
if (lr->lrc_seq > claim_lr_seq) {
|
||||
arc_buf_destroy(abuf, &abuf);
|
||||
goto done;
|
||||
|
@ -604,7 +605,7 @@ zil_claim_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t first_txg)
|
|||
lr_write_t *lr = (lr_write_t *)lrc;
|
||||
int error;
|
||||
|
||||
ASSERT(lrc->lrc_txtype == TX_WRITE);
|
||||
ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr));
|
||||
|
||||
/*
|
||||
* If the block is not readable, don't claim it. This can happen
|
||||
|
@ -632,7 +633,9 @@ zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx,
|
|||
spa_t *spa = zilog->zl_spa;
|
||||
uint_t ii;
|
||||
|
||||
ASSERT(lrc->lrc_txtype == TX_CLONE_RANGE);
|
||||
ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr));
|
||||
ASSERT3U(lrc->lrc_reclen, >=, offsetof(lr_clone_range_t,
|
||||
lr_bps[lr->lr_nbps]));
|
||||
|
||||
if (tx == NULL) {
|
||||
return (0);
|
||||
|
@ -646,9 +649,9 @@ zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx,
|
|||
bp = &lr->lr_bps[ii];
|
||||
|
||||
/*
|
||||
* When data are embedded into BP there is no need to create
|
||||
* BRT entry as there is no data block. Just copy the BP as
|
||||
* it contains the data.
|
||||
* When data is embedded into the BP there is no need to create
|
||||
* BRT entry as there is no data block. Just copy the BP as it
|
||||
* contains the data.
|
||||
*/
|
||||
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
|
||||
continue;
|
||||
|
@ -709,7 +712,7 @@ zil_free_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t claim_txg)
|
|||
lr_write_t *lr = (lr_write_t *)lrc;
|
||||
blkptr_t *bp = &lr->lr_blkptr;
|
||||
|
||||
ASSERT(lrc->lrc_txtype == TX_WRITE);
|
||||
ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr));
|
||||
|
||||
/*
|
||||
* If we previously claimed it, we need to free it.
|
||||
|
@ -730,7 +733,9 @@ zil_free_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx)
|
|||
spa_t *spa;
|
||||
uint_t ii;
|
||||
|
||||
ASSERT(lrc->lrc_txtype == TX_CLONE_RANGE);
|
||||
ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr));
|
||||
ASSERT3U(lrc->lrc_reclen, >=, offsetof(lr_clone_range_t,
|
||||
lr_bps[lr->lr_nbps]));
|
||||
|
||||
if (tx == NULL) {
|
||||
return (0);
|
||||
|
@ -1810,6 +1815,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
|
|||
itx = list_next(&lwb->lwb_itxs, itx))
|
||||
zil_lwb_commit(zilog, lwb, itx);
|
||||
lwb->lwb_nused = lwb->lwb_nfilled;
|
||||
ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_nmax);
|
||||
|
||||
lwb->lwb_root_zio = zio_root(spa, zil_lwb_flush_vdevs_done, lwb,
|
||||
ZIO_FLAG_CANFAIL);
|
||||
|
@ -2039,13 +2045,16 @@ zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs)
|
|||
return (lwb);
|
||||
}
|
||||
|
||||
reclen = lr->lrc_reclen;
|
||||
if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
|
||||
ASSERT3U(reclen, ==, sizeof (lr_write_t));
|
||||
dlen = P2ROUNDUP_TYPED(
|
||||
lrw->lr_length, sizeof (uint64_t), uint64_t);
|
||||
} else {
|
||||
ASSERT3U(reclen, >=, sizeof (lr_t));
|
||||
dlen = 0;
|
||||
}
|
||||
reclen = lr->lrc_reclen;
|
||||
ASSERT3U(reclen, <=, zil_max_log_data(zilog, 0));
|
||||
zilog->zl_cur_used += (reclen + dlen);
|
||||
|
||||
cont:
|
||||
|
@ -2064,19 +2073,19 @@ cont:
|
|||
if (lwb == NULL)
|
||||
return (NULL);
|
||||
lwb_sp = lwb->lwb_nmax - lwb->lwb_nused;
|
||||
|
||||
/*
|
||||
* There must be enough space in the new, empty log block to
|
||||
* hold reclen. For WR_COPIED, we need to fit the whole
|
||||
* record in one block, and reclen is the header size + the
|
||||
* data size. For WR_NEED_COPY, we can create multiple
|
||||
* records, splitting the data into multiple blocks, so we
|
||||
* only need to fit one word of data per block; in this case
|
||||
* reclen is just the header size (no data).
|
||||
*/
|
||||
ASSERT3U(reclen + MIN(dlen, sizeof (uint64_t)), <=, lwb_sp);
|
||||
}
|
||||
|
||||
/*
|
||||
* There must be enough space in the log block to hold reclen.
|
||||
* For WR_COPIED, we need to fit the whole record in one block,
|
||||
* and reclen is the write record header size + the data size.
|
||||
* For WR_NEED_COPY, we can create multiple records, splitting
|
||||
* the data into multiple blocks, so we only need to fit one
|
||||
* word of data per block; in this case reclen is just the header
|
||||
* size (no data).
|
||||
*/
|
||||
ASSERT3U(reclen + MIN(dlen, sizeof (uint64_t)), <=, lwb_sp);
|
||||
|
||||
dnow = MIN(dlen, lwb_sp - reclen);
|
||||
if (dlen > dnow) {
|
||||
ASSERT3U(lr->lrc_txtype, ==, TX_WRITE);
|
||||
|
@ -2252,7 +2261,9 @@ zil_itx_create(uint64_t txtype, size_t olrsize)
|
|||
size_t itxsize, lrsize;
|
||||
itx_t *itx;
|
||||
|
||||
ASSERT3U(olrsize, >=, sizeof (lr_t));
|
||||
lrsize = P2ROUNDUP_TYPED(olrsize, sizeof (uint64_t), size_t);
|
||||
ASSERT3U(lrsize, >=, olrsize);
|
||||
itxsize = offsetof(itx_t, itx_lr) + lrsize;
|
||||
|
||||
itx = zio_data_buf_alloc(itxsize);
|
||||
|
@ -2271,6 +2282,10 @@ zil_itx_create(uint64_t txtype, size_t olrsize)
|
|||
static itx_t *
|
||||
zil_itx_clone(itx_t *oitx)
|
||||
{
|
||||
ASSERT3U(oitx->itx_size, >=, sizeof (itx_t));
|
||||
ASSERT3U(oitx->itx_size, ==,
|
||||
offsetof(itx_t, itx_lr) + oitx->itx_lr.lrc_reclen);
|
||||
|
||||
itx_t *itx = zio_data_buf_alloc(oitx->itx_size);
|
||||
memcpy(itx, oitx, oitx->itx_size);
|
||||
itx->itx_callback = NULL;
|
||||
|
@ -2281,6 +2296,9 @@ zil_itx_clone(itx_t *oitx)
|
|||
void
|
||||
zil_itx_destroy(itx_t *itx)
|
||||
{
|
||||
ASSERT3U(itx->itx_size, >=, sizeof (itx_t));
|
||||
ASSERT3U(itx->itx_lr.lrc_reclen, ==,
|
||||
itx->itx_size - offsetof(itx_t, itx_lr));
|
||||
IMPLY(itx->itx_lr.lrc_txtype == TX_COMMIT, itx->itx_callback == NULL);
|
||||
IMPLY(itx->itx_callback != NULL, itx->itx_lr.lrc_txtype != TX_COMMIT);
|
||||
|
||||
|
@ -2364,7 +2382,7 @@ void
|
|||
zil_remove_async(zilog_t *zilog, uint64_t oid)
|
||||
{
|
||||
uint64_t otxg, txg;
|
||||
itx_async_node_t *ian;
|
||||
itx_async_node_t *ian, ian_search;
|
||||
avl_tree_t *t;
|
||||
avl_index_t where;
|
||||
list_t clean_list;
|
||||
|
@ -2391,7 +2409,8 @@ zil_remove_async(zilog_t *zilog, uint64_t oid)
|
|||
* Locate the object node and append its list.
|
||||
*/
|
||||
t = &itxg->itxg_itxs->i_async_tree;
|
||||
ian = avl_find(t, &oid, &where);
|
||||
ian_search.ia_foid = oid;
|
||||
ian = avl_find(t, &ian_search, &where);
|
||||
if (ian != NULL)
|
||||
list_move_tail(&clean_list, &ian->ia_list);
|
||||
mutex_exit(&itxg->itxg_lock);
|
||||
|
@ -2589,7 +2608,7 @@ void
|
|||
zil_async_to_sync(zilog_t *zilog, uint64_t foid)
|
||||
{
|
||||
uint64_t otxg, txg;
|
||||
itx_async_node_t *ian;
|
||||
itx_async_node_t *ian, ian_search;
|
||||
avl_tree_t *t;
|
||||
avl_index_t where;
|
||||
|
||||
|
@ -2619,7 +2638,8 @@ zil_async_to_sync(zilog_t *zilog, uint64_t foid)
|
|||
*/
|
||||
t = &itxg->itxg_itxs->i_async_tree;
|
||||
if (foid != 0) {
|
||||
ian = avl_find(t, &foid, &where);
|
||||
ian_search.ia_foid = foid;
|
||||
ian = avl_find(t, &ian_search, &where);
|
||||
if (ian != NULL) {
|
||||
list_move_tail(&itxg->itxg_itxs->i_sync_list,
|
||||
&ian->ia_list);
|
||||
|
|
|
@ -306,6 +306,53 @@ zio_fini(void)
|
|||
* ==========================================================================
|
||||
*/
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
static const ulong_t zio_buf_canary = (ulong_t)0xdeadc0dedead210b;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Use empty space after the buffer to detect overflows.
|
||||
*
|
||||
* Since zio_init() creates kmem caches only for certain set of buffer sizes,
|
||||
* allocations of different sizes may have some unused space after the data.
|
||||
* Filling part of that space with a known pattern on allocation and checking
|
||||
* it on free should allow us to detect some buffer overflows.
|
||||
*/
|
||||
static void
|
||||
zio_buf_put_canary(ulong_t *p, size_t size, kmem_cache_t **cache, size_t c)
|
||||
{
|
||||
#ifdef ZFS_DEBUG
|
||||
size_t off = P2ROUNDUP(size, sizeof (ulong_t));
|
||||
ulong_t *canary = p + off / sizeof (ulong_t);
|
||||
size_t asize = (c + 1) << SPA_MINBLOCKSHIFT;
|
||||
if (c + 1 < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT &&
|
||||
cache[c] == cache[c + 1])
|
||||
asize = (c + 2) << SPA_MINBLOCKSHIFT;
|
||||
for (; off < asize; canary++, off += sizeof (ulong_t))
|
||||
*canary = zio_buf_canary;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
zio_buf_check_canary(ulong_t *p, size_t size, kmem_cache_t **cache, size_t c)
|
||||
{
|
||||
#ifdef ZFS_DEBUG
|
||||
size_t off = P2ROUNDUP(size, sizeof (ulong_t));
|
||||
ulong_t *canary = p + off / sizeof (ulong_t);
|
||||
size_t asize = (c + 1) << SPA_MINBLOCKSHIFT;
|
||||
if (c + 1 < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT &&
|
||||
cache[c] == cache[c + 1])
|
||||
asize = (c + 2) << SPA_MINBLOCKSHIFT;
|
||||
for (; off < asize; canary++, off += sizeof (ulong_t)) {
|
||||
if (unlikely(*canary != zio_buf_canary)) {
|
||||
PANIC("ZIO buffer overflow %p (%zu) + %zu %#lx != %#lx",
|
||||
p, size, (canary - p) * sizeof (ulong_t),
|
||||
*canary, zio_buf_canary);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Use zio_buf_alloc to allocate ZFS metadata. This data will appear in a
|
||||
* crashdump if the kernel panics, so use it judiciously. Obviously, it's
|
||||
|
@ -322,7 +369,9 @@ zio_buf_alloc(size_t size)
|
|||
atomic_add_64(&zio_buf_cache_allocs[c], 1);
|
||||
#endif
|
||||
|
||||
return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE));
|
||||
void *p = kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE);
|
||||
zio_buf_put_canary(p, size, zio_buf_cache, c);
|
||||
return (p);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -338,7 +387,9 @@ zio_data_buf_alloc(size_t size)
|
|||
|
||||
VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
|
||||
|
||||
return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE));
|
||||
void *p = kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE);
|
||||
zio_buf_put_canary(p, size, zio_data_buf_cache, c);
|
||||
return (p);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -351,6 +402,7 @@ zio_buf_free(void *buf, size_t size)
|
|||
atomic_add_64(&zio_buf_cache_frees[c], 1);
|
||||
#endif
|
||||
|
||||
zio_buf_check_canary(buf, size, zio_buf_cache, c);
|
||||
kmem_cache_free(zio_buf_cache[c], buf);
|
||||
}
|
||||
|
||||
|
@ -361,6 +413,7 @@ zio_data_buf_free(void *buf, size_t size)
|
|||
|
||||
VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
|
||||
|
||||
zio_buf_check_canary(buf, size, zio_data_buf_cache, c);
|
||||
kmem_cache_free(zio_data_buf_cache[c], buf);
|
||||
}
|
||||
|
||||
|
|
|
@ -363,11 +363,14 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
|
|||
zil_chain_t zilc;
|
||||
abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t));
|
||||
|
||||
size = P2ROUNDUP_TYPED(zilc.zc_nused, ZIL_MIN_BLKSZ,
|
||||
uint64_t);
|
||||
uint64_t nused = P2ROUNDUP_TYPED(zilc.zc_nused,
|
||||
ZIL_MIN_BLKSZ, uint64_t);
|
||||
ASSERT3U(size, >=, nused);
|
||||
size = nused;
|
||||
eck = zilc.zc_eck;
|
||||
eck_offset = offsetof(zil_chain_t, zc_eck);
|
||||
} else {
|
||||
ASSERT3U(size, >=, sizeof (zio_eck_t));
|
||||
eck_offset = size - sizeof (zio_eck_t);
|
||||
abd_copy_to_buf_off(&eck, abd, eck_offset,
|
||||
sizeof (zio_eck_t));
|
||||
|
@ -448,12 +451,13 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
|
|||
return (SET_ERROR(ECKSUM));
|
||||
}
|
||||
|
||||
if (nused > size) {
|
||||
nused = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t);
|
||||
if (size < nused)
|
||||
return (SET_ERROR(ECKSUM));
|
||||
}
|
||||
|
||||
size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t);
|
||||
size = nused;
|
||||
} else {
|
||||
if (size < sizeof (zio_eck_t))
|
||||
return (SET_ERROR(ECKSUM));
|
||||
eck_offset = size - sizeof (zio_eck_t);
|
||||
abd_copy_to_buf_off(&eck, abd, eck_offset,
|
||||
sizeof (zio_eck_t));
|
||||
|
|
|
@ -451,6 +451,8 @@ zvol_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
|
|||
lr_truncate_t *lr = arg2;
|
||||
uint64_t offset, length;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
|
@ -487,6 +489,8 @@ zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap)
|
|||
dmu_tx_t *tx;
|
||||
int error;
|
||||
|
||||
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
|
@ -516,60 +520,6 @@ zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap)
|
|||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Replay a TX_CLONE_RANGE ZIL transaction that didn't get committed
|
||||
* after a system failure.
|
||||
*
|
||||
* TODO: For now we drop block cloning transations for ZVOLs as they are
|
||||
* unsupported, but we still need to inform BRT about that as we
|
||||
* claimed them during pool import.
|
||||
* This situation can occur when we try to import a pool from a ZFS
|
||||
* version supporting block cloning for ZVOLs into a system that
|
||||
* has this ZFS version, that doesn't support block cloning for ZVOLs.
|
||||
*/
|
||||
static int
|
||||
zvol_replay_clone_range(void *arg1, void *arg2, boolean_t byteswap)
|
||||
{
|
||||
char name[ZFS_MAX_DATASET_NAME_LEN];
|
||||
zvol_state_t *zv = arg1;
|
||||
objset_t *os = zv->zv_objset;
|
||||
lr_clone_range_t *lr = arg2;
|
||||
blkptr_t *bp;
|
||||
dmu_tx_t *tx;
|
||||
spa_t *spa;
|
||||
uint_t ii;
|
||||
int error;
|
||||
|
||||
dmu_objset_name(os, name);
|
||||
cmn_err(CE_WARN, "ZFS dropping block cloning transaction for %s.",
|
||||
name);
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
tx = dmu_tx_create(os);
|
||||
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (error) {
|
||||
dmu_tx_abort(tx);
|
||||
return (error);
|
||||
}
|
||||
|
||||
spa = os->os_spa;
|
||||
|
||||
for (ii = 0; ii < lr->lr_nbps; ii++) {
|
||||
bp = &lr->lr_bps[ii];
|
||||
|
||||
if (!BP_IS_HOLE(bp)) {
|
||||
zio_free(spa, dmu_tx_get_txg(tx), bp);
|
||||
}
|
||||
}
|
||||
|
||||
(void) zil_replaying(zv->zv_zilog, tx);
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
zvol_replay_err(void *arg1, void *arg2, boolean_t byteswap)
|
||||
{
|
||||
|
@ -604,7 +554,7 @@ zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE] = {
|
|||
zvol_replay_err, /* TX_SETSAXATTR */
|
||||
zvol_replay_err, /* TX_RENAME_EXCHANGE */
|
||||
zvol_replay_err, /* TX_RENAME_WHITEOUT */
|
||||
zvol_replay_clone_range /* TX_CLONE_RANGE */
|
||||
zvol_replay_err, /* TX_CLONE_RANGE */
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -42,6 +42,7 @@ tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial',
|
|||
'block_cloning_disabled_copyfilerange', 'block_cloning_disabled_ficlone',
|
||||
'block_cloning_disabled_ficlonerange',
|
||||
'block_cloning_copyfilerange_cross_dataset',
|
||||
'block_cloning_cross_enc_dataset',
|
||||
'block_cloning_copyfilerange_fallback_same_txg']
|
||||
tags = ['functional', 'block_cloning']
|
||||
|
||||
|
|
|
@ -305,6 +305,8 @@ elif sys.platform.startswith('linux'):
|
|||
['SKIP', cfr_cross_reason],
|
||||
'block_cloning/block_cloning_copyfilerange_fallback_same_txg':
|
||||
['SKIP', cfr_cross_reason],
|
||||
'block_cloning/block_cloning_cross_enc_dataset':
|
||||
['SKIP', cfr_cross_reason],
|
||||
})
|
||||
|
||||
|
||||
|
|
|
@ -451,6 +451,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
|||
functional/block_cloning/block_cloning_ficlone.ksh \
|
||||
functional/block_cloning/block_cloning_ficlonerange.ksh \
|
||||
functional/block_cloning/block_cloning_ficlonerange_partial.ksh \
|
||||
functional/block_cloning/block_cloning_cross_enc_dataset.ksh \
|
||||
functional/bootfs/bootfs_001_pos.ksh \
|
||||
functional/bootfs/bootfs_002_neg.ksh \
|
||||
functional/bootfs/bootfs_003_pos.ksh \
|
||||
|
|
|
@ -28,8 +28,8 @@
|
|||
|
||||
function have_same_content
|
||||
{
|
||||
typeset hash1=$(cat $1 | md5sum)
|
||||
typeset hash2=$(cat $2 | md5sum)
|
||||
typeset hash1=$(md5digest $1)
|
||||
typeset hash2=$(md5digest $2)
|
||||
|
||||
log_must [ "$hash1" = "$hash2" ]
|
||||
}
|
||||
|
@ -44,10 +44,14 @@ function have_same_content
|
|||
#
|
||||
function get_same_blocks
|
||||
{
|
||||
KEY=$5
|
||||
if [ ${#KEY} -gt 0 ]; then
|
||||
KEY="--key=$KEY"
|
||||
fi
|
||||
typeset zdbout=${TMPDIR:-$TEST_BASE_DIR}/zdbout.$$
|
||||
zdb -vvvvv $1 -O $2 | \
|
||||
zdb $KEY -vvvvv $1 -O $2 | \
|
||||
awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.a
|
||||
zdb -vvvvv $3 -O $4 | \
|
||||
zdb $KEY -vvvvv $3 -O $4 | \
|
||||
awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.b
|
||||
echo $(sort $zdbout.a $zdbout.b | uniq -d | cut -f1 -d' ')
|
||||
}
|
||||
|
|
|
@ -0,0 +1,170 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023, Kay Pedersen <mail@mkwg.de>
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
if [[ $(linux_version) -lt $(linux_version "5.3") ]]; then
|
||||
log_unsupported "copy_file_range can't copy cross-filesystem before Linux 5.3"
|
||||
fi
|
||||
|
||||
claim="Block cloning across encrypted datasets."
|
||||
|
||||
log_assert $claim
|
||||
|
||||
DS1="$TESTPOOL/encrypted1"
|
||||
DS2="$TESTPOOL/encrypted2"
|
||||
DS1_NC="$TESTPOOL/notcrypted1"
|
||||
PASSPHRASE="top_secret"
|
||||
|
||||
function prepare_enc
|
||||
{
|
||||
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
|
||||
log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
|
||||
"-o keyformat=passphrase -o keylocation=prompt $DS1"
|
||||
log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
|
||||
"-o keyformat=passphrase -o keylocation=prompt $DS2"
|
||||
log_must zfs create $DS1/child1
|
||||
log_must zfs create $DS1/child2
|
||||
log_must zfs create $DS1_NC
|
||||
|
||||
log_note "Create test file"
|
||||
# we must wait until the src file txg is written to the disk otherwise we
|
||||
# will fallback to normal copy. See "dmu_read_l0_bps" in
|
||||
# "zfs/module/zfs/dmu.c" and "zfs_clone_range" in
|
||||
# "zfs/module/zfs/zfs_vnops.c"
|
||||
log_must dd if=/dev/urandom of=/$DS1/file bs=128K count=4
|
||||
log_must dd if=/dev/urandom of=/$DS1/child1/file bs=128K count=4
|
||||
log_must dd if=/dev/urandom of=/$DS1_NC/file bs=128K count=4
|
||||
log_must sync_pool $TESTPOOL
|
||||
}
|
||||
|
||||
function cleanup_enc
|
||||
{
|
||||
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
|
||||
}
|
||||
|
||||
function clone_and_check
|
||||
{
|
||||
I_FILE="$1"
|
||||
O_FILE=$2
|
||||
I_DS=$3
|
||||
O_DS=$4
|
||||
SAME_BLOCKS=$5
|
||||
# the CLONE option provides a choice between copy_file_range
|
||||
# which should clone and a dd which is a copy no matter what
|
||||
CLONE=$6
|
||||
SNAPSHOT=$7
|
||||
if [ ${#SNAPSHOT} -gt 0 ]; then
|
||||
I_FILE=".zfs/snapshot/$SNAPSHOT/$1"
|
||||
fi
|
||||
if [ $CLONE ]; then
|
||||
log_must clonefile -f "/$I_DS/$I_FILE" "/$O_DS/$O_FILE" 0 0 524288
|
||||
else
|
||||
log_must dd if="/$I_DS/$I_FILE" of="/$O_DS/$O_FILE" bs=128K
|
||||
fi
|
||||
log_must sync_pool $TESTPOOL
|
||||
|
||||
log_must have_same_content "/$I_DS/$I_FILE" "/$O_DS/$O_FILE"
|
||||
|
||||
if [ ${#SNAPSHOT} -gt 0 ]; then
|
||||
I_DS="$I_DS@$SNAPSHOT"
|
||||
I_FILE="$1"
|
||||
fi
|
||||
typeset blocks=$(get_same_blocks \
|
||||
$I_DS $I_FILE $O_DS $O_FILE $PASSPHRASE)
|
||||
log_must [ "$blocks" = "$SAME_BLOCKS" ]
|
||||
}
|
||||
|
||||
log_onexit cleanup_enc
|
||||
|
||||
prepare_enc
|
||||
|
||||
log_note "Cloning entire file with copy_file_range across different enc" \
|
||||
"roots, should fallback"
|
||||
# we are expecting no same block map.
|
||||
clone_and_check "file" "clone" $DS1 $DS2 "" true
|
||||
log_note "check if the file is still readable and the same after" \
|
||||
"unmount and key unload, shouldn't fail"
|
||||
typeset hash1=$(md5digest "/$DS1/file")
|
||||
log_must zfs umount $DS1 && zfs unload-key $DS1
|
||||
typeset hash2=$(md5digest "/$DS2/clone")
|
||||
log_must [ "$hash1" = "$hash2" ]
|
||||
|
||||
cleanup_enc
|
||||
prepare_enc
|
||||
|
||||
log_note "Cloning entire file with copy_file_range across different child datasets"
|
||||
# clone shouldn't work because of deriving a new master key for the child
|
||||
# we are expecting no same block map.
|
||||
clone_and_check "file" "clone" $DS1 "$DS1/child1" "" true
|
||||
clone_and_check "file" "clone" "$DS1/child1" "$DS1/child2" "" true
|
||||
|
||||
cleanup_enc
|
||||
prepare_enc
|
||||
|
||||
log_note "Copying entire file with copy_file_range across same snapshot"
|
||||
log_must zfs snapshot -r $DS1@s1
|
||||
log_must sync_pool $TESTPOOL
|
||||
log_must rm -f "/$DS1/file"
|
||||
log_must sync_pool $TESTPOOL
|
||||
clone_and_check "file" "clone" "$DS1" "$DS1" "0 1 2 3" true "s1"
|
||||
|
||||
cleanup_enc
|
||||
prepare_enc
|
||||
|
||||
log_note "Copying entire file with copy_file_range across different snapshot"
|
||||
clone_and_check "file" "file" $DS1 $DS2 "" true
|
||||
log_must zfs snapshot -r $DS2@s1
|
||||
log_must sync_pool $TESTPOOL
|
||||
log_must rm -f "/$DS1/file" "/$DS2/file"
|
||||
log_must sync_pool $TESTPOOL
|
||||
clone_and_check "file" "clone" "$DS2" "$DS1" "" true "s1"
|
||||
typeset hash1=$(md5digest "/$DS1/.zfs/snapshot/s1/file")
|
||||
log_note "destroy the snapshot and check if the file is still readable and" \
|
||||
"has the same content"
|
||||
log_must zfs destroy -r $DS2@s1
|
||||
log_must sync_pool $TESTPOOL
|
||||
typeset hash2=$(md5digest "/$DS1/file")
|
||||
log_must [ "$hash1" = "$hash2" ]
|
||||
|
||||
cleanup_enc
|
||||
prepare_enc
|
||||
|
||||
log_note "Copying with copy_file_range from non encrypted to encrypted"
|
||||
clone_and_check "file" "copy" $DS1_NC $DS1 "" true
|
||||
|
||||
cleanup_enc
|
||||
prepare_enc
|
||||
|
||||
log_note "Copying with copy_file_range from encrypted to non encrypted"
|
||||
clone_and_check "file" "copy" $DS1 $DS1_NC "" true
|
||||
|
||||
log_must sync_pool $TESTPOOL
|
||||
|
||||
log_pass $claim
|
|
@ -31,15 +31,13 @@
|
|||
# 2. Set l2arc_write_max to a value larger than the cache device.
|
||||
# 3. Create a file larger than the cache device and random read
|
||||
# for 10 sec.
|
||||
# 4. Verify that l2arc_write_max is set back to the default.
|
||||
# 5. Set l2arc_write_max to a value less than the cache device size but
|
||||
# 4. Set l2arc_write_max to a value less than the cache device size but
|
||||
# larger than the default (256MB).
|
||||
# 6. Record the l2_size.
|
||||
# 7. Random read for 1 sec.
|
||||
# 8. Record the l2_size again.
|
||||
# 9. If (6) <= (8) then we have not looped around yet.
|
||||
# 10. If (6) > (8) then we looped around. Break out of the loop and test.
|
||||
# 11. Destroy pool.
|
||||
# 5. Record the l2_size.
|
||||
# 6. Random read for 1 sec.
|
||||
# 7. Record the l2_size again.
|
||||
# 8. If (5) <= (7) then we have not looped around yet.
|
||||
# 9. Destroy pool.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
@ -93,10 +91,6 @@ log_must zfs set relatime=off $TESTPOOL
|
|||
log_must fio $FIO_SCRIPTS/mkfiles.fio
|
||||
log_must fio $FIO_SCRIPTS/random_reads.fio
|
||||
|
||||
typeset write_max2=$(get_tunable L2ARC_WRITE_MAX)
|
||||
|
||||
log_must test $write_max2 -eq $write_max
|
||||
|
||||
log_must set_tunable32 L2ARC_WRITE_MAX $(( 256 * 1024 * 1024 ))
|
||||
export RUNTIME=1
|
||||
|
||||
|
@ -108,8 +102,6 @@ while $do_once || [[ $l2_size1 -le $l2_size2 ]]; do
|
|||
do_once=false
|
||||
done
|
||||
|
||||
log_must test $l2_size1 -gt $l2_size2
|
||||
|
||||
log_must zpool destroy $TESTPOOL
|
||||
|
||||
log_pass "Looping around a cache device succeeds."
|
||||
|
|
|
@ -44,6 +44,13 @@ if ! $(grep -q "CONFIG_IO_URING=y" /boot/config-$(uname -r)); then
|
|||
log_unsupported "Requires io_uring support"
|
||||
fi
|
||||
|
||||
if [ -e /etc/os-release ] ; then
|
||||
source /etc/os-release
|
||||
if [ -n "$REDHAT_SUPPORT_PRODUCT_VERSION" ] && ((floor($REDHAT_SUPPORT_PRODUCT_VERSION) == 9)) ; then
|
||||
log_unsupported "Disabled on CentOS 9, fails with 'Operation not permitted'"
|
||||
fi
|
||||
fi
|
||||
|
||||
fio --ioengine=io_uring --parse-only || log_unsupported "fio io_uring support required"
|
||||
|
||||
function cleanup
|
||||
|
|
Loading…
Reference in New Issue