Linux 5.0 compat: SIMD compatibility
Restore the SIMD optimization for 4.19.38 LTS, 4.14.120 LTS, and 5.0 and newer kernels. This commit squashes the following commits from master in to a single commit which can be applied to 0.8.2.10fa2545
- Linux 4.14, 4.19, 5.0+ compat: SIMD save/restoreb88ca2ac
- Enable SIMD for encryption095b5412
- Fix CONFIG_X86_DEBUG_FPU build failuree5db3134
- Linux 5.0 compat: SIMD compatibility Reviewed-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> TEST_ZIMPORT_SKIP="yes"
This commit is contained in:
parent
988b040476
commit
62c034f6d4
|
@ -107,6 +107,7 @@
|
||||||
#include <sys/vdev_impl.h>
|
#include <sys/vdev_impl.h>
|
||||||
#include <sys/vdev_file.h>
|
#include <sys/vdev_file.h>
|
||||||
#include <sys/vdev_initialize.h>
|
#include <sys/vdev_initialize.h>
|
||||||
|
#include <sys/vdev_raidz.h>
|
||||||
#include <sys/vdev_trim.h>
|
#include <sys/vdev_trim.h>
|
||||||
#include <sys/spa_impl.h>
|
#include <sys/spa_impl.h>
|
||||||
#include <sys/metaslab_impl.h>
|
#include <sys/metaslab_impl.h>
|
||||||
|
@ -7110,6 +7111,8 @@ ztest_run(ztest_shared_t *zs)
|
||||||
metaslab_preload_limit = ztest_random(20) + 1;
|
metaslab_preload_limit = ztest_random(20) + 1;
|
||||||
ztest_spa = spa;
|
ztest_spa = spa;
|
||||||
|
|
||||||
|
VERIFY0(vdev_raidz_impl_set("cycle"));
|
||||||
|
|
||||||
dmu_objset_stats_t dds;
|
dmu_objset_stats_t dds;
|
||||||
VERIFY0(ztest_dmu_objset_own(ztest_opts.zo_pool,
|
VERIFY0(ztest_dmu_objset_own(ztest_opts.zo_pool,
|
||||||
DMU_OST_ANY, B_TRUE, B_TRUE, FTAG, &os));
|
DMU_OST_ANY, B_TRUE, B_TRUE, FTAG, &os));
|
||||||
|
|
|
@ -2,8 +2,9 @@ dnl #
|
||||||
dnl # Handle differences in kernel FPU code.
|
dnl # Handle differences in kernel FPU code.
|
||||||
dnl #
|
dnl #
|
||||||
dnl # Kernel
|
dnl # Kernel
|
||||||
dnl # 5.0: All kernel fpu functions are GPL only, so we can't use them.
|
dnl # 5.0: Wrappers have been introduced to save/restore the FPU state.
|
||||||
dnl # (nothing defined)
|
dnl # This change was made to the 4.19.38 and 4.14.120 LTS kernels.
|
||||||
|
dnl # HAVE_KERNEL_FPU_INTERNAL
|
||||||
dnl #
|
dnl #
|
||||||
dnl # 4.2: Use __kernel_fpu_{begin,end}()
|
dnl # 4.2: Use __kernel_fpu_{begin,end}()
|
||||||
dnl # HAVE_UNDERSCORE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU
|
dnl # HAVE_UNDERSCORE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU
|
||||||
|
@ -12,7 +13,11 @@ dnl # Pre-4.2: Use kernel_fpu_{begin,end}()
|
||||||
dnl # HAVE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU
|
dnl # HAVE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU
|
||||||
dnl #
|
dnl #
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_FPU], [
|
AC_DEFUN([ZFS_AC_KERNEL_FPU], [
|
||||||
AC_MSG_CHECKING([which kernel_fpu header to use])
|
dnl #
|
||||||
|
dnl # N.B. The header check is performed before all other checks since
|
||||||
|
dnl # it depends on HAVE_KERNEL_FPU_API_HEADER being set in confdefs.h.
|
||||||
|
dnl #
|
||||||
|
AC_MSG_CHECKING([whether fpu headers are available])
|
||||||
ZFS_LINUX_TRY_COMPILE([
|
ZFS_LINUX_TRY_COMPILE([
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <asm/fpu/api.h>
|
#include <asm/fpu/api.h>
|
||||||
|
@ -25,9 +30,13 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [
|
||||||
AC_MSG_RESULT(i387.h & xcr.h)
|
AC_MSG_RESULT(i387.h & xcr.h)
|
||||||
])
|
])
|
||||||
|
|
||||||
AC_MSG_CHECKING([which kernel_fpu function to use])
|
dnl #
|
||||||
|
dnl # Legacy kernel
|
||||||
|
dnl #
|
||||||
|
AC_MSG_CHECKING([whether kernel fpu is available])
|
||||||
ZFS_LINUX_TRY_COMPILE_SYMBOL([
|
ZFS_LINUX_TRY_COMPILE_SYMBOL([
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
|
#include <linux/types.h>
|
||||||
#ifdef HAVE_KERNEL_FPU_API_HEADER
|
#ifdef HAVE_KERNEL_FPU_API_HEADER
|
||||||
#include <asm/fpu/api.h>
|
#include <asm/fpu/api.h>
|
||||||
#else
|
#else
|
||||||
|
@ -45,8 +54,12 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [
|
||||||
AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1,
|
AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1,
|
||||||
[kernel exports FPU functions])
|
[kernel exports FPU functions])
|
||||||
],[
|
],[
|
||||||
|
dnl #
|
||||||
|
dnl # Linux 4.2 kernel
|
||||||
|
dnl #
|
||||||
ZFS_LINUX_TRY_COMPILE_SYMBOL([
|
ZFS_LINUX_TRY_COMPILE_SYMBOL([
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
|
#include <linux/types.h>
|
||||||
#ifdef HAVE_KERNEL_FPU_API_HEADER
|
#ifdef HAVE_KERNEL_FPU_API_HEADER
|
||||||
#include <asm/fpu/api.h>
|
#include <asm/fpu/api.h>
|
||||||
#else
|
#else
|
||||||
|
@ -57,12 +70,60 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [
|
||||||
],[
|
],[
|
||||||
__kernel_fpu_begin();
|
__kernel_fpu_begin();
|
||||||
__kernel_fpu_end();
|
__kernel_fpu_end();
|
||||||
], [__kernel_fpu_begin], [arch/x86/kernel/fpu/core.c arch/x86/kernel/i387.c], [
|
], [__kernel_fpu_begin],
|
||||||
|
[arch/x86/kernel/fpu/core.c arch/x86/kernel/i387.c], [
|
||||||
AC_MSG_RESULT(__kernel_fpu_*)
|
AC_MSG_RESULT(__kernel_fpu_*)
|
||||||
AC_DEFINE(HAVE_UNDERSCORE_KERNEL_FPU, 1, [kernel has __kernel_fpu_* functions])
|
AC_DEFINE(HAVE_UNDERSCORE_KERNEL_FPU, 1,
|
||||||
AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, [kernel exports FPU functions])
|
[kernel has __kernel_fpu_* functions])
|
||||||
|
AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1,
|
||||||
|
[kernel exports FPU functions])
|
||||||
],[
|
],[
|
||||||
AC_MSG_RESULT(not exported)
|
ZFS_LINUX_TRY_COMPILE([
|
||||||
|
#include <linux/module.h>
|
||||||
|
|
||||||
|
#if defined(__x86_64) || defined(__x86_64__) || \
|
||||||
|
defined(__i386) || defined(__i386__)
|
||||||
|
#if !defined(__x86)
|
||||||
|
#define __x86
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__x86)
|
||||||
|
#error Unsupported architecture
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
#ifdef HAVE_KERNEL_FPU_API_HEADER
|
||||||
|
#include <asm/fpu/api.h>
|
||||||
|
#include <asm/fpu/internal.h>
|
||||||
|
#else
|
||||||
|
#include <asm/i387.h>
|
||||||
|
#include <asm/xcr.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(XSTATE_XSAVE)
|
||||||
|
#error XSTATE_XSAVE not defined
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(XSTATE_XRESTORE)
|
||||||
|
#error XSTATE_XRESTORE not defined
|
||||||
|
#endif
|
||||||
|
],[
|
||||||
|
struct fpu *fpu = ¤t->thread.fpu;
|
||||||
|
union fpregs_state *st = &fpu->state;
|
||||||
|
struct fregs_state *fr __attribute__ ((unused)) =
|
||||||
|
&st->fsave;
|
||||||
|
struct fxregs_state *fxr __attribute__ ((unused)) =
|
||||||
|
&st->fxsave;
|
||||||
|
struct xregs_state *xr __attribute__ ((unused)) =
|
||||||
|
&st->xsave;
|
||||||
|
], [
|
||||||
|
AC_MSG_RESULT(internal)
|
||||||
|
AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1,
|
||||||
|
[kernel fpu internal])
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT(unavailable)
|
||||||
|
])
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
|
|
|
@ -7,6 +7,7 @@ KERNEL_H = \
|
||||||
$(top_srcdir)/include/linux/blkdev_compat.h \
|
$(top_srcdir)/include/linux/blkdev_compat.h \
|
||||||
$(top_srcdir)/include/linux/utsname_compat.h \
|
$(top_srcdir)/include/linux/utsname_compat.h \
|
||||||
$(top_srcdir)/include/linux/kmap_compat.h \
|
$(top_srcdir)/include/linux/kmap_compat.h \
|
||||||
|
$(top_srcdir)/include/linux/simd.h \
|
||||||
$(top_srcdir)/include/linux/simd_x86.h \
|
$(top_srcdir)/include/linux/simd_x86.h \
|
||||||
$(top_srcdir)/include/linux/simd_aarch64.h \
|
$(top_srcdir)/include/linux/simd_aarch64.h \
|
||||||
$(top_srcdir)/include/linux/mod_compat.h \
|
$(top_srcdir)/include/linux/mod_compat.h \
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
/*
|
||||||
|
* CDDL HEADER START
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the terms of the
|
||||||
|
* Common Development and Distribution License (the "License").
|
||||||
|
* You may not use this file except in compliance with the License.
|
||||||
|
*
|
||||||
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
* or http://www.opensolaris.org/os/licensing.
|
||||||
|
* See the License for the specific language governing permissions
|
||||||
|
* and limitations under the License.
|
||||||
|
*
|
||||||
|
* When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
* If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
*
|
||||||
|
* CDDL HEADER END
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2019 Lawrence Livermore National Security, LLC.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _SIMD_H
|
||||||
|
#define _SIMD_H
|
||||||
|
|
||||||
|
#if defined(__x86)
|
||||||
|
#include <linux/simd_x86.h>
|
||||||
|
|
||||||
|
#elif defined(__aarch64__)
|
||||||
|
#include <linux/simd_aarch64.h>
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define kfpu_allowed() 0
|
||||||
|
#define kfpu_begin() do {} while (0)
|
||||||
|
#define kfpu_end() do {} while (0)
|
||||||
|
#define kfpu_init() 0
|
||||||
|
#define kfpu_fini() ((void) 0)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#endif /* _SIMD_H */
|
|
@ -27,9 +27,10 @@
|
||||||
*
|
*
|
||||||
* Kernel fpu methods:
|
* Kernel fpu methods:
|
||||||
* kfpu_allowed()
|
* kfpu_allowed()
|
||||||
* kfpu_initialize()
|
|
||||||
* kfpu_begin()
|
* kfpu_begin()
|
||||||
* kfpu_end()
|
* kfpu_end()
|
||||||
|
* kfpu_init()
|
||||||
|
* kfpu_fini()
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _SIMD_AARCH64_H
|
#ifndef _SIMD_AARCH64_H
|
||||||
|
@ -43,20 +44,20 @@
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#include <asm/neon.h>
|
#include <asm/neon.h>
|
||||||
#define kfpu_begin() \
|
#define kfpu_allowed() 1
|
||||||
{ \
|
#define kfpu_begin() kernel_neon_begin()
|
||||||
kernel_neon_begin(); \
|
#define kfpu_end() kernel_neon_end()
|
||||||
}
|
#define kfpu_init() 0
|
||||||
#define kfpu_end() \
|
#define kfpu_fini() ((void) 0)
|
||||||
{ \
|
|
||||||
kernel_neon_end(); \
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
/*
|
/*
|
||||||
* fpu dummy methods for userspace
|
* fpu dummy methods for userspace
|
||||||
*/
|
*/
|
||||||
|
#define kfpu_allowed() 1
|
||||||
#define kfpu_begin() do {} while (0)
|
#define kfpu_begin() do {} while (0)
|
||||||
#define kfpu_end() do {} while (0)
|
#define kfpu_end() do {} while (0)
|
||||||
|
#define kfpu_init() 0
|
||||||
|
#define kfpu_fini() ((void) 0)
|
||||||
#endif /* defined(_KERNEL) */
|
#endif /* defined(_KERNEL) */
|
||||||
|
|
||||||
#endif /* __aarch64__ */
|
#endif /* __aarch64__ */
|
||||||
|
|
|
@ -27,9 +27,10 @@
|
||||||
*
|
*
|
||||||
* Kernel fpu methods:
|
* Kernel fpu methods:
|
||||||
* kfpu_allowed()
|
* kfpu_allowed()
|
||||||
* kfpu_initialize()
|
|
||||||
* kfpu_begin()
|
* kfpu_begin()
|
||||||
* kfpu_end()
|
* kfpu_end()
|
||||||
|
* kfpu_init()
|
||||||
|
* kfpu_fini()
|
||||||
*
|
*
|
||||||
* SIMD support:
|
* SIMD support:
|
||||||
*
|
*
|
||||||
|
@ -84,6 +85,15 @@
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disable the WARN_ON_FPU() macro to prevent additional dependencies
|
||||||
|
* when providing the kfpu_* functions. Relevant warnings are included
|
||||||
|
* as appropriate and are unconditionally enabled.
|
||||||
|
*/
|
||||||
|
#if defined(CONFIG_X86_DEBUG_FPU) && !defined(KERNEL_EXPORTS_X86_FPU)
|
||||||
|
#undef CONFIG_X86_DEBUG_FPU
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(HAVE_KERNEL_FPU_API_HEADER)
|
#if defined(HAVE_KERNEL_FPU_API_HEADER)
|
||||||
#include <asm/fpu/api.h>
|
#include <asm/fpu/api.h>
|
||||||
#include <asm/fpu/internal.h>
|
#include <asm/fpu/internal.h>
|
||||||
|
@ -92,6 +102,16 @@
|
||||||
#include <asm/xcr.h>
|
#include <asm/xcr.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The following cases are for kernels which export either the
|
||||||
|
* kernel_fpu_* or __kernel_fpu_* functions.
|
||||||
|
*/
|
||||||
|
#if defined(KERNEL_EXPORTS_X86_FPU)
|
||||||
|
|
||||||
|
#define kfpu_allowed() 1
|
||||||
|
#define kfpu_init() 0
|
||||||
|
#define kfpu_fini() ((void) 0)
|
||||||
|
|
||||||
#if defined(HAVE_UNDERSCORE_KERNEL_FPU)
|
#if defined(HAVE_UNDERSCORE_KERNEL_FPU)
|
||||||
#define kfpu_begin() \
|
#define kfpu_begin() \
|
||||||
{ \
|
{ \
|
||||||
|
@ -103,20 +123,208 @@
|
||||||
__kernel_fpu_end(); \
|
__kernel_fpu_end(); \
|
||||||
preempt_enable(); \
|
preempt_enable(); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined(HAVE_KERNEL_FPU)
|
#elif defined(HAVE_KERNEL_FPU)
|
||||||
#define kfpu_begin() kernel_fpu_begin()
|
#define kfpu_begin() kernel_fpu_begin()
|
||||||
#define kfpu_end() kernel_fpu_end()
|
#define kfpu_end() kernel_fpu_end()
|
||||||
#else
|
|
||||||
/* Kernel doesn't export any kernel_fpu_* functions */
|
|
||||||
#include <asm/fpu/internal.h> /* For kernel xgetbv() */
|
|
||||||
#define kfpu_begin() panic("This code should never run")
|
|
||||||
#define kfpu_end() panic("This code should never run")
|
|
||||||
#endif /* defined(HAVE_KERNEL_FPU) */
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
/*
|
/*
|
||||||
* fpu dummy methods for userspace
|
* This case is unreachable. When KERNEL_EXPORTS_X86_FPU is defined then
|
||||||
|
* either HAVE_UNDERSCORE_KERNEL_FPU or HAVE_KERNEL_FPU must be defined.
|
||||||
*/
|
*/
|
||||||
|
#error "Unreachable kernel configuration"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else /* defined(KERNEL_EXPORTS_X86_FPU) */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When the kernel_fpu_* symbols are unavailable then provide our own
|
||||||
|
* versions which allow the FPU to be safely used.
|
||||||
|
*/
|
||||||
|
#if defined(HAVE_KERNEL_FPU_INTERNAL)
|
||||||
|
|
||||||
|
extern union fpregs_state **zfs_kfpu_fpregs;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize per-cpu variables to store FPU state.
|
||||||
|
*/
|
||||||
|
static inline void
|
||||||
|
kfpu_fini(void)
|
||||||
|
{
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu) {
|
||||||
|
if (zfs_kfpu_fpregs[cpu] != NULL) {
|
||||||
|
kfree(zfs_kfpu_fpregs[cpu]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
kfree(zfs_kfpu_fpregs);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
kfpu_init(void)
|
||||||
|
{
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
zfs_kfpu_fpregs = kzalloc(num_possible_cpus() *
|
||||||
|
sizeof (union fpregs_state *), GFP_KERNEL);
|
||||||
|
if (zfs_kfpu_fpregs == NULL)
|
||||||
|
return (-ENOMEM);
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu) {
|
||||||
|
zfs_kfpu_fpregs[cpu] = kmalloc_node(sizeof (union fpregs_state),
|
||||||
|
GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu));
|
||||||
|
if (zfs_kfpu_fpregs[cpu] == NULL) {
|
||||||
|
kfpu_fini();
|
||||||
|
return (-ENOMEM);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define kfpu_allowed() 1
|
||||||
|
#define ex_handler_fprestore ex_handler_default
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FPU save and restore instructions.
|
||||||
|
*/
|
||||||
|
#define __asm __asm__ __volatile__
|
||||||
|
#define kfpu_fxsave(addr) __asm("fxsave %0" : "=m" (*(addr)))
|
||||||
|
#define kfpu_fxsaveq(addr) __asm("fxsaveq %0" : "=m" (*(addr)))
|
||||||
|
#define kfpu_fnsave(addr) __asm("fnsave %0; fwait" : "=m" (*(addr)))
|
||||||
|
#define kfpu_fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
|
||||||
|
#define kfpu_fxrstorq(addr) __asm("fxrstorq %0" : : "m" (*(addr)))
|
||||||
|
#define kfpu_frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
|
||||||
|
#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \
|
||||||
|
: : [addr] "m" (rval));
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
kfpu_save_xsave(struct xregs_state *addr, uint64_t mask)
|
||||||
|
{
|
||||||
|
uint32_t low, hi;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
low = mask;
|
||||||
|
hi = mask >> 32;
|
||||||
|
XSTATE_XSAVE(addr, low, hi, err);
|
||||||
|
WARN_ON_ONCE(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
kfpu_save_fxsr(struct fxregs_state *addr)
|
||||||
|
{
|
||||||
|
if (IS_ENABLED(CONFIG_X86_32))
|
||||||
|
kfpu_fxsave(addr);
|
||||||
|
else
|
||||||
|
kfpu_fxsaveq(addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
kfpu_save_fsave(struct fregs_state *addr)
|
||||||
|
{
|
||||||
|
kfpu_fnsave(addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
kfpu_begin(void)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Preemption and interrupts must be disabled for the critical
|
||||||
|
* region where the FPU state is being modified.
|
||||||
|
*/
|
||||||
|
preempt_disable();
|
||||||
|
local_irq_disable();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The current FPU registers need to be preserved by kfpu_begin()
|
||||||
|
* and restored by kfpu_end(). They are stored in a dedicated
|
||||||
|
* per-cpu variable, not in the task struct, this allows any user
|
||||||
|
* FPU state to be correctly preserved and restored.
|
||||||
|
*/
|
||||||
|
union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];
|
||||||
|
|
||||||
|
if (static_cpu_has(X86_FEATURE_XSAVE)) {
|
||||||
|
kfpu_save_xsave(&state->xsave, ~0);
|
||||||
|
} else if (static_cpu_has(X86_FEATURE_FXSR)) {
|
||||||
|
kfpu_save_fxsr(&state->fxsave);
|
||||||
|
} else {
|
||||||
|
kfpu_save_fsave(&state->fsave);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask)
|
||||||
|
{
|
||||||
|
uint32_t low, hi;
|
||||||
|
|
||||||
|
low = mask;
|
||||||
|
hi = mask >> 32;
|
||||||
|
XSTATE_XRESTORE(addr, low, hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
kfpu_restore_fxsr(struct fxregs_state *addr)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* On AuthenticAMD K7 and K8 processors the fxrstor instruction only
|
||||||
|
* restores the _x87 FOP, FIP, and FDP registers when an exception
|
||||||
|
* is pending. Clean the _x87 state to force the restore.
|
||||||
|
*/
|
||||||
|
if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK)))
|
||||||
|
kfpu_fxsr_clean(addr);
|
||||||
|
|
||||||
|
if (IS_ENABLED(CONFIG_X86_32)) {
|
||||||
|
kfpu_fxrstor(addr);
|
||||||
|
} else {
|
||||||
|
kfpu_fxrstorq(addr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
kfpu_restore_fsave(struct fregs_state *addr)
|
||||||
|
{
|
||||||
|
kfpu_frstor(addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
kfpu_end(void)
|
||||||
|
{
|
||||||
|
union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];
|
||||||
|
|
||||||
|
if (static_cpu_has(X86_FEATURE_XSAVE)) {
|
||||||
|
kfpu_restore_xsave(&state->xsave, ~0);
|
||||||
|
} else if (static_cpu_has(X86_FEATURE_FXSR)) {
|
||||||
|
kfpu_restore_fxsr(&state->fxsave);
|
||||||
|
} else {
|
||||||
|
kfpu_restore_fsave(&state->fsave);
|
||||||
|
}
|
||||||
|
|
||||||
|
local_irq_enable();
|
||||||
|
preempt_enable();
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FPU support is unavailable.
|
||||||
|
*/
|
||||||
|
#define kfpu_allowed() 0
|
||||||
|
#define kfpu_begin() do {} while (0)
|
||||||
|
#define kfpu_end() do {} while (0)
|
||||||
|
#define kfpu_init() 0
|
||||||
|
#define kfpu_fini() ((void) 0)
|
||||||
|
|
||||||
|
#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
|
||||||
|
#endif /* defined(KERNEL_EXPORTS_X86_FPU) */
|
||||||
|
|
||||||
|
#else /* defined(_KERNEL) */
|
||||||
|
/*
|
||||||
|
* FPU dummy methods for user space.
|
||||||
|
*/
|
||||||
|
#define kfpu_allowed() 1
|
||||||
#define kfpu_begin() do {} while (0)
|
#define kfpu_begin() do {} while (0)
|
||||||
#define kfpu_end() do {} while (0)
|
#define kfpu_end() do {} while (0)
|
||||||
#endif /* defined(_KERNEL) */
|
#endif /* defined(_KERNEL) */
|
||||||
|
@ -289,7 +497,6 @@ CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
|
||||||
|
|
||||||
#endif /* !defined(_KERNEL) */
|
#endif /* !defined(_KERNEL) */
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Detect register set support
|
* Detect register set support
|
||||||
*/
|
*/
|
||||||
|
@ -300,7 +507,7 @@ __simd_state_enabled(const uint64_t state)
|
||||||
uint64_t xcr0;
|
uint64_t xcr0;
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_OSXSAVE) && defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(X86_FEATURE_OSXSAVE)
|
||||||
has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE);
|
has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE);
|
||||||
#else
|
#else
|
||||||
has_osxsave = B_FALSE;
|
has_osxsave = B_FALSE;
|
||||||
|
@ -330,11 +537,7 @@ static inline boolean_t
|
||||||
zfs_sse_available(void)
|
zfs_sse_available(void)
|
||||||
{
|
{
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(KERNEL_EXPORTS_X86_FPU)
|
|
||||||
return (!!boot_cpu_has(X86_FEATURE_XMM));
|
return (!!boot_cpu_has(X86_FEATURE_XMM));
|
||||||
#else
|
|
||||||
return (B_FALSE);
|
|
||||||
#endif
|
|
||||||
#elif !defined(_KERNEL)
|
#elif !defined(_KERNEL)
|
||||||
return (__cpuid_has_sse());
|
return (__cpuid_has_sse());
|
||||||
#endif
|
#endif
|
||||||
|
@ -347,11 +550,7 @@ static inline boolean_t
|
||||||
zfs_sse2_available(void)
|
zfs_sse2_available(void)
|
||||||
{
|
{
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(KERNEL_EXPORTS_X86_FPU)
|
|
||||||
return (!!boot_cpu_has(X86_FEATURE_XMM2));
|
return (!!boot_cpu_has(X86_FEATURE_XMM2));
|
||||||
#else
|
|
||||||
return (B_FALSE);
|
|
||||||
#endif
|
|
||||||
#elif !defined(_KERNEL)
|
#elif !defined(_KERNEL)
|
||||||
return (__cpuid_has_sse2());
|
return (__cpuid_has_sse2());
|
||||||
#endif
|
#endif
|
||||||
|
@ -364,11 +563,7 @@ static inline boolean_t
|
||||||
zfs_sse3_available(void)
|
zfs_sse3_available(void)
|
||||||
{
|
{
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(KERNEL_EXPORTS_X86_FPU)
|
|
||||||
return (!!boot_cpu_has(X86_FEATURE_XMM3));
|
return (!!boot_cpu_has(X86_FEATURE_XMM3));
|
||||||
#else
|
|
||||||
return (B_FALSE);
|
|
||||||
#endif
|
|
||||||
#elif !defined(_KERNEL)
|
#elif !defined(_KERNEL)
|
||||||
return (__cpuid_has_sse3());
|
return (__cpuid_has_sse3());
|
||||||
#endif
|
#endif
|
||||||
|
@ -381,11 +576,7 @@ static inline boolean_t
|
||||||
zfs_ssse3_available(void)
|
zfs_ssse3_available(void)
|
||||||
{
|
{
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(KERNEL_EXPORTS_X86_FPU)
|
|
||||||
return (!!boot_cpu_has(X86_FEATURE_SSSE3));
|
return (!!boot_cpu_has(X86_FEATURE_SSSE3));
|
||||||
#else
|
|
||||||
return (B_FALSE);
|
|
||||||
#endif
|
|
||||||
#elif !defined(_KERNEL)
|
#elif !defined(_KERNEL)
|
||||||
return (__cpuid_has_ssse3());
|
return (__cpuid_has_ssse3());
|
||||||
#endif
|
#endif
|
||||||
|
@ -398,11 +589,7 @@ static inline boolean_t
|
||||||
zfs_sse4_1_available(void)
|
zfs_sse4_1_available(void)
|
||||||
{
|
{
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(KERNEL_EXPORTS_X86_FPU)
|
|
||||||
return (!!boot_cpu_has(X86_FEATURE_XMM4_1));
|
return (!!boot_cpu_has(X86_FEATURE_XMM4_1));
|
||||||
#else
|
|
||||||
return (B_FALSE);
|
|
||||||
#endif
|
|
||||||
#elif !defined(_KERNEL)
|
#elif !defined(_KERNEL)
|
||||||
return (__cpuid_has_sse4_1());
|
return (__cpuid_has_sse4_1());
|
||||||
#endif
|
#endif
|
||||||
|
@ -415,11 +602,7 @@ static inline boolean_t
|
||||||
zfs_sse4_2_available(void)
|
zfs_sse4_2_available(void)
|
||||||
{
|
{
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(KERNEL_EXPORTS_X86_FPU)
|
|
||||||
return (!!boot_cpu_has(X86_FEATURE_XMM4_2));
|
return (!!boot_cpu_has(X86_FEATURE_XMM4_2));
|
||||||
#else
|
|
||||||
return (B_FALSE);
|
|
||||||
#endif
|
|
||||||
#elif !defined(_KERNEL)
|
#elif !defined(_KERNEL)
|
||||||
return (__cpuid_has_sse4_2());
|
return (__cpuid_has_sse4_2());
|
||||||
#endif
|
#endif
|
||||||
|
@ -433,11 +616,7 @@ zfs_avx_available(void)
|
||||||
{
|
{
|
||||||
boolean_t has_avx;
|
boolean_t has_avx;
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(KERNEL_EXPORTS_X86_FPU)
|
|
||||||
has_avx = !!boot_cpu_has(X86_FEATURE_AVX);
|
has_avx = !!boot_cpu_has(X86_FEATURE_AVX);
|
||||||
#else
|
|
||||||
has_avx = B_FALSE;
|
|
||||||
#endif
|
|
||||||
#elif !defined(_KERNEL)
|
#elif !defined(_KERNEL)
|
||||||
has_avx = __cpuid_has_avx();
|
has_avx = __cpuid_has_avx();
|
||||||
#endif
|
#endif
|
||||||
|
@ -453,11 +632,7 @@ zfs_avx2_available(void)
|
||||||
{
|
{
|
||||||
boolean_t has_avx2;
|
boolean_t has_avx2;
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_AVX2) && defined(KERNEL_EXPORTS_X86_FPU)
|
|
||||||
has_avx2 = !!boot_cpu_has(X86_FEATURE_AVX2);
|
has_avx2 = !!boot_cpu_has(X86_FEATURE_AVX2);
|
||||||
#else
|
|
||||||
has_avx2 = B_FALSE;
|
|
||||||
#endif
|
|
||||||
#elif !defined(_KERNEL)
|
#elif !defined(_KERNEL)
|
||||||
has_avx2 = __cpuid_has_avx2();
|
has_avx2 = __cpuid_has_avx2();
|
||||||
#endif
|
#endif
|
||||||
|
@ -472,7 +647,7 @@ static inline boolean_t
|
||||||
zfs_bmi1_available(void)
|
zfs_bmi1_available(void)
|
||||||
{
|
{
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_BMI1) && defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(X86_FEATURE_BMI1)
|
||||||
return (!!boot_cpu_has(X86_FEATURE_BMI1));
|
return (!!boot_cpu_has(X86_FEATURE_BMI1));
|
||||||
#else
|
#else
|
||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
|
@ -489,7 +664,7 @@ static inline boolean_t
|
||||||
zfs_bmi2_available(void)
|
zfs_bmi2_available(void)
|
||||||
{
|
{
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_BMI2) && defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(X86_FEATURE_BMI2)
|
||||||
return (!!boot_cpu_has(X86_FEATURE_BMI2));
|
return (!!boot_cpu_has(X86_FEATURE_BMI2));
|
||||||
#else
|
#else
|
||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
|
@ -506,7 +681,7 @@ static inline boolean_t
|
||||||
zfs_aes_available(void)
|
zfs_aes_available(void)
|
||||||
{
|
{
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_AES) && defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(X86_FEATURE_AES)
|
||||||
return (!!boot_cpu_has(X86_FEATURE_AES));
|
return (!!boot_cpu_has(X86_FEATURE_AES));
|
||||||
#else
|
#else
|
||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
|
@ -523,7 +698,7 @@ static inline boolean_t
|
||||||
zfs_pclmulqdq_available(void)
|
zfs_pclmulqdq_available(void)
|
||||||
{
|
{
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_PCLMULQDQ) && defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(X86_FEATURE_PCLMULQDQ)
|
||||||
return (!!boot_cpu_has(X86_FEATURE_PCLMULQDQ));
|
return (!!boot_cpu_has(X86_FEATURE_PCLMULQDQ));
|
||||||
#else
|
#else
|
||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
|
@ -557,7 +732,7 @@ zfs_avx512f_available(void)
|
||||||
boolean_t has_avx512 = B_FALSE;
|
boolean_t has_avx512 = B_FALSE;
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_AVX512F) && defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(X86_FEATURE_AVX512F)
|
||||||
has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F);
|
has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F);
|
||||||
#else
|
#else
|
||||||
has_avx512 = B_FALSE;
|
has_avx512 = B_FALSE;
|
||||||
|
@ -576,7 +751,7 @@ zfs_avx512cd_available(void)
|
||||||
boolean_t has_avx512 = B_FALSE;
|
boolean_t has_avx512 = B_FALSE;
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_AVX512CD) && defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(X86_FEATURE_AVX512CD)
|
||||||
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||||
boot_cpu_has(X86_FEATURE_AVX512CD);
|
boot_cpu_has(X86_FEATURE_AVX512CD);
|
||||||
#else
|
#else
|
||||||
|
@ -596,7 +771,7 @@ zfs_avx512er_available(void)
|
||||||
boolean_t has_avx512 = B_FALSE;
|
boolean_t has_avx512 = B_FALSE;
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_AVX512ER) && defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(X86_FEATURE_AVX512ER)
|
||||||
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||||
boot_cpu_has(X86_FEATURE_AVX512ER);
|
boot_cpu_has(X86_FEATURE_AVX512ER);
|
||||||
#else
|
#else
|
||||||
|
@ -616,7 +791,7 @@ zfs_avx512pf_available(void)
|
||||||
boolean_t has_avx512 = B_FALSE;
|
boolean_t has_avx512 = B_FALSE;
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_AVX512PF) && defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(X86_FEATURE_AVX512PF)
|
||||||
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||||
boot_cpu_has(X86_FEATURE_AVX512PF);
|
boot_cpu_has(X86_FEATURE_AVX512PF);
|
||||||
#else
|
#else
|
||||||
|
@ -636,7 +811,7 @@ zfs_avx512bw_available(void)
|
||||||
boolean_t has_avx512 = B_FALSE;
|
boolean_t has_avx512 = B_FALSE;
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_AVX512BW) && defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(X86_FEATURE_AVX512BW)
|
||||||
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||||
boot_cpu_has(X86_FEATURE_AVX512BW);
|
boot_cpu_has(X86_FEATURE_AVX512BW);
|
||||||
#else
|
#else
|
||||||
|
@ -656,7 +831,7 @@ zfs_avx512dq_available(void)
|
||||||
boolean_t has_avx512 = B_FALSE;
|
boolean_t has_avx512 = B_FALSE;
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_AVX512DQ) && defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(X86_FEATURE_AVX512DQ)
|
||||||
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||||
boot_cpu_has(X86_FEATURE_AVX512DQ);
|
boot_cpu_has(X86_FEATURE_AVX512DQ);
|
||||||
#else
|
#else
|
||||||
|
@ -676,7 +851,7 @@ zfs_avx512vl_available(void)
|
||||||
boolean_t has_avx512 = B_FALSE;
|
boolean_t has_avx512 = B_FALSE;
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_AVX512VL) && defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(X86_FEATURE_AVX512VL)
|
||||||
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||||
boot_cpu_has(X86_FEATURE_AVX512VL);
|
boot_cpu_has(X86_FEATURE_AVX512VL);
|
||||||
#else
|
#else
|
||||||
|
@ -696,7 +871,7 @@ zfs_avx512ifma_available(void)
|
||||||
boolean_t has_avx512 = B_FALSE;
|
boolean_t has_avx512 = B_FALSE;
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_AVX512IFMA) && defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(X86_FEATURE_AVX512IFMA)
|
||||||
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||||
boot_cpu_has(X86_FEATURE_AVX512IFMA);
|
boot_cpu_has(X86_FEATURE_AVX512IFMA);
|
||||||
#else
|
#else
|
||||||
|
@ -716,7 +891,7 @@ zfs_avx512vbmi_available(void)
|
||||||
boolean_t has_avx512 = B_FALSE;
|
boolean_t has_avx512 = B_FALSE;
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
#if defined(X86_FEATURE_AVX512VBMI) && defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(X86_FEATURE_AVX512VBMI)
|
||||||
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||||
boot_cpu_has(X86_FEATURE_AVX512VBMI);
|
boot_cpu_has(X86_FEATURE_AVX512VBMI);
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -51,7 +51,7 @@ int vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
|
||||||
*/
|
*/
|
||||||
void vdev_raidz_math_init(void);
|
void vdev_raidz_math_init(void);
|
||||||
void vdev_raidz_math_fini(void);
|
void vdev_raidz_math_fini(void);
|
||||||
struct raidz_impl_ops *vdev_raidz_math_get_ops(void);
|
const struct raidz_impl_ops *vdev_raidz_math_get_ops(void);
|
||||||
int vdev_raidz_math_generate(struct raidz_map *);
|
int vdev_raidz_math_generate(struct raidz_map *);
|
||||||
int vdev_raidz_math_reconstruct(struct raidz_map *, const int *, const int *,
|
int vdev_raidz_math_reconstruct(struct raidz_map *, const int *, const int *,
|
||||||
const int);
|
const int);
|
||||||
|
|
|
@ -126,7 +126,7 @@ typedef struct raidz_map {
|
||||||
uintptr_t rm_reports; /* # of referencing checksum reports */
|
uintptr_t rm_reports; /* # of referencing checksum reports */
|
||||||
uint8_t rm_freed; /* map no longer has referencing ZIO */
|
uint8_t rm_freed; /* map no longer has referencing ZIO */
|
||||||
uint8_t rm_ecksuminjected; /* checksum error was injected */
|
uint8_t rm_ecksuminjected; /* checksum error was injected */
|
||||||
raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
|
const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
|
||||||
raidz_col_t rm_col[1]; /* Flexible array of I/O columns */
|
raidz_col_t rm_col[1]; /* Flexible array of I/O columns */
|
||||||
} raidz_map_t;
|
} raidz_map_t;
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include <sys/crypto/spi.h>
|
#include <sys/crypto/spi.h>
|
||||||
#include <modes/modes.h>
|
#include <modes/modes.h>
|
||||||
#include <aes/aes_impl.h>
|
#include <aes/aes_impl.h>
|
||||||
|
#include <linux/simd.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize AES encryption and decryption key schedules.
|
* Initialize AES encryption and decryption key schedules.
|
||||||
|
@ -40,7 +41,7 @@
|
||||||
void
|
void
|
||||||
aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched)
|
aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched)
|
||||||
{
|
{
|
||||||
aes_impl_ops_t *ops = aes_impl_get_ops();
|
const aes_impl_ops_t *ops = aes_impl_get_ops();
|
||||||
aes_key_t *newbie = keysched;
|
aes_key_t *newbie = keysched;
|
||||||
uint_t keysize, i, j;
|
uint_t keysize, i, j;
|
||||||
union {
|
union {
|
||||||
|
@ -252,12 +253,17 @@ static size_t aes_supp_impl_cnt = 0;
|
||||||
static aes_impl_ops_t *aes_supp_impl[ARRAY_SIZE(aes_all_impl)];
|
static aes_impl_ops_t *aes_supp_impl[ARRAY_SIZE(aes_all_impl)];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Selects the aes operations for encrypt/decrypt/key setup
|
* Returns the AES operations for encrypt/decrypt/key setup. When a
|
||||||
|
* SIMD implementation is not allowed in the current context, then
|
||||||
|
* fallback to the fastest generic implementation.
|
||||||
*/
|
*/
|
||||||
aes_impl_ops_t *
|
const aes_impl_ops_t *
|
||||||
aes_impl_get_ops()
|
aes_impl_get_ops(void)
|
||||||
{
|
{
|
||||||
aes_impl_ops_t *ops = NULL;
|
if (!kfpu_allowed())
|
||||||
|
return (&aes_generic_impl);
|
||||||
|
|
||||||
|
const aes_impl_ops_t *ops = NULL;
|
||||||
const uint32_t impl = AES_IMPL_READ(icp_aes_impl);
|
const uint32_t impl = AES_IMPL_READ(icp_aes_impl);
|
||||||
|
|
||||||
switch (impl) {
|
switch (impl) {
|
||||||
|
@ -266,14 +272,12 @@ aes_impl_get_ops()
|
||||||
ops = &aes_fastest_impl;
|
ops = &aes_fastest_impl;
|
||||||
break;
|
break;
|
||||||
case IMPL_CYCLE:
|
case IMPL_CYCLE:
|
||||||
{
|
/* Cycle through supported implementations */
|
||||||
ASSERT(aes_impl_initialized);
|
ASSERT(aes_impl_initialized);
|
||||||
ASSERT3U(aes_supp_impl_cnt, >, 0);
|
ASSERT3U(aes_supp_impl_cnt, >, 0);
|
||||||
/* Cycle through supported implementations */
|
|
||||||
static size_t cycle_impl_idx = 0;
|
static size_t cycle_impl_idx = 0;
|
||||||
size_t idx = (++cycle_impl_idx) % aes_supp_impl_cnt;
|
size_t idx = (++cycle_impl_idx) % aes_supp_impl_cnt;
|
||||||
ops = aes_supp_impl[idx];
|
ops = aes_supp_impl[idx];
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ASSERT3U(impl, <, aes_supp_impl_cnt);
|
ASSERT3U(impl, <, aes_supp_impl_cnt);
|
||||||
|
@ -288,13 +292,16 @@ aes_impl_get_ops()
|
||||||
return (ops);
|
return (ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize all supported implementations.
|
||||||
|
*/
|
||||||
void
|
void
|
||||||
aes_impl_init(void)
|
aes_impl_init(void)
|
||||||
{
|
{
|
||||||
aes_impl_ops_t *curr_impl;
|
aes_impl_ops_t *curr_impl;
|
||||||
int i, c;
|
int i, c;
|
||||||
|
|
||||||
/* move supported impl into aes_supp_impls */
|
/* Move supported implementations into aes_supp_impls */
|
||||||
for (i = 0, c = 0; i < ARRAY_SIZE(aes_all_impl); i++) {
|
for (i = 0, c = 0; i < ARRAY_SIZE(aes_all_impl); i++) {
|
||||||
curr_impl = (aes_impl_ops_t *)aes_all_impl[i];
|
curr_impl = (aes_impl_ops_t *)aes_all_impl[i];
|
||||||
|
|
||||||
|
|
|
@ -108,7 +108,7 @@ aes_aesni_decrypt(const uint32_t rk[], int Nr, const uint32_t ct[4],
|
||||||
static boolean_t
|
static boolean_t
|
||||||
aes_aesni_will_work(void)
|
aes_aesni_will_work(void)
|
||||||
{
|
{
|
||||||
return (zfs_aes_available());
|
return (kfpu_allowed() && zfs_aes_available());
|
||||||
}
|
}
|
||||||
|
|
||||||
const aes_impl_ops_t aes_aesni_impl = {
|
const aes_impl_ops_t aes_aesni_impl = {
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#include <sys/crypto/impl.h>
|
#include <sys/crypto/impl.h>
|
||||||
#include <sys/byteorder.h>
|
#include <sys/byteorder.h>
|
||||||
#include <modes/gcm_impl.h>
|
#include <modes/gcm_impl.h>
|
||||||
|
#include <linux/simd.h>
|
||||||
|
|
||||||
#define GHASH(c, d, t, o) \
|
#define GHASH(c, d, t, o) \
|
||||||
xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
|
xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
|
||||||
|
@ -46,7 +47,7 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
|
||||||
void (*copy_block)(uint8_t *, uint8_t *),
|
void (*copy_block)(uint8_t *, uint8_t *),
|
||||||
void (*xor_block)(uint8_t *, uint8_t *))
|
void (*xor_block)(uint8_t *, uint8_t *))
|
||||||
{
|
{
|
||||||
gcm_impl_ops_t *gops;
|
const gcm_impl_ops_t *gops;
|
||||||
size_t remainder = length;
|
size_t remainder = length;
|
||||||
size_t need = 0;
|
size_t need = 0;
|
||||||
uint8_t *datap = (uint8_t *)data;
|
uint8_t *datap = (uint8_t *)data;
|
||||||
|
@ -168,7 +169,7 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
|
||||||
void (*copy_block)(uint8_t *, uint8_t *),
|
void (*copy_block)(uint8_t *, uint8_t *),
|
||||||
void (*xor_block)(uint8_t *, uint8_t *))
|
void (*xor_block)(uint8_t *, uint8_t *))
|
||||||
{
|
{
|
||||||
gcm_impl_ops_t *gops;
|
const gcm_impl_ops_t *gops;
|
||||||
uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
|
uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
|
||||||
uint8_t *ghash, *macp = NULL;
|
uint8_t *ghash, *macp = NULL;
|
||||||
int i, rv;
|
int i, rv;
|
||||||
|
@ -320,7 +321,7 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
|
||||||
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
|
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
|
||||||
void (*xor_block)(uint8_t *, uint8_t *))
|
void (*xor_block)(uint8_t *, uint8_t *))
|
||||||
{
|
{
|
||||||
gcm_impl_ops_t *gops;
|
const gcm_impl_ops_t *gops;
|
||||||
size_t pt_len;
|
size_t pt_len;
|
||||||
size_t remainder;
|
size_t remainder;
|
||||||
uint8_t *ghash;
|
uint8_t *ghash;
|
||||||
|
@ -427,7 +428,7 @@ gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
|
||||||
void (*copy_block)(uint8_t *, uint8_t *),
|
void (*copy_block)(uint8_t *, uint8_t *),
|
||||||
void (*xor_block)(uint8_t *, uint8_t *))
|
void (*xor_block)(uint8_t *, uint8_t *))
|
||||||
{
|
{
|
||||||
gcm_impl_ops_t *gops;
|
const gcm_impl_ops_t *gops;
|
||||||
uint8_t *cb;
|
uint8_t *cb;
|
||||||
ulong_t remainder = iv_len;
|
ulong_t remainder = iv_len;
|
||||||
ulong_t processed = 0;
|
ulong_t processed = 0;
|
||||||
|
@ -481,7 +482,7 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
|
||||||
void (*copy_block)(uint8_t *, uint8_t *),
|
void (*copy_block)(uint8_t *, uint8_t *),
|
||||||
void (*xor_block)(uint8_t *, uint8_t *))
|
void (*xor_block)(uint8_t *, uint8_t *))
|
||||||
{
|
{
|
||||||
gcm_impl_ops_t *gops;
|
const gcm_impl_ops_t *gops;
|
||||||
uint8_t *ghash, *datap, *authp;
|
uint8_t *ghash, *datap, *authp;
|
||||||
size_t remainder, processed;
|
size_t remainder, processed;
|
||||||
|
|
||||||
|
@ -660,12 +661,17 @@ static size_t gcm_supp_impl_cnt = 0;
|
||||||
static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
|
static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Selects the gcm operation
|
* Returns the GCM operations for encrypt/decrypt/key setup. When a
|
||||||
|
* SIMD implementation is not allowed in the current context, then
|
||||||
|
* fallback to the fastest generic implementation.
|
||||||
*/
|
*/
|
||||||
gcm_impl_ops_t *
|
const gcm_impl_ops_t *
|
||||||
gcm_impl_get_ops()
|
gcm_impl_get_ops()
|
||||||
{
|
{
|
||||||
gcm_impl_ops_t *ops = NULL;
|
if (!kfpu_allowed())
|
||||||
|
return (&gcm_generic_impl);
|
||||||
|
|
||||||
|
const gcm_impl_ops_t *ops = NULL;
|
||||||
const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
|
const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
|
||||||
|
|
||||||
switch (impl) {
|
switch (impl) {
|
||||||
|
@ -674,14 +680,12 @@ gcm_impl_get_ops()
|
||||||
ops = &gcm_fastest_impl;
|
ops = &gcm_fastest_impl;
|
||||||
break;
|
break;
|
||||||
case IMPL_CYCLE:
|
case IMPL_CYCLE:
|
||||||
{
|
/* Cycle through supported implementations */
|
||||||
ASSERT(gcm_impl_initialized);
|
ASSERT(gcm_impl_initialized);
|
||||||
ASSERT3U(gcm_supp_impl_cnt, >, 0);
|
ASSERT3U(gcm_supp_impl_cnt, >, 0);
|
||||||
/* Cycle through supported implementations */
|
|
||||||
static size_t cycle_impl_idx = 0;
|
static size_t cycle_impl_idx = 0;
|
||||||
size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
|
size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
|
||||||
ops = gcm_supp_impl[idx];
|
ops = gcm_supp_impl[idx];
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ASSERT3U(impl, <, gcm_supp_impl_cnt);
|
ASSERT3U(impl, <, gcm_supp_impl_cnt);
|
||||||
|
@ -696,13 +700,16 @@ gcm_impl_get_ops()
|
||||||
return (ops);
|
return (ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize all supported implementations.
|
||||||
|
*/
|
||||||
void
|
void
|
||||||
gcm_impl_init(void)
|
gcm_impl_init(void)
|
||||||
{
|
{
|
||||||
gcm_impl_ops_t *curr_impl;
|
gcm_impl_ops_t *curr_impl;
|
||||||
int i, c;
|
int i, c;
|
||||||
|
|
||||||
/* move supported impl into aes_supp_impls */
|
/* Move supported implementations into gcm_supp_impls */
|
||||||
for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
|
for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
|
||||||
curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
|
curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
|
||||||
|
|
||||||
|
@ -711,7 +718,10 @@ gcm_impl_init(void)
|
||||||
}
|
}
|
||||||
gcm_supp_impl_cnt = c;
|
gcm_supp_impl_cnt = c;
|
||||||
|
|
||||||
/* set fastest implementation. assume hardware accelerated is fastest */
|
/*
|
||||||
|
* Set the fastest implementation given the assumption that the
|
||||||
|
* hardware accelerated version is the fastest.
|
||||||
|
*/
|
||||||
#if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
|
#if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
|
||||||
if (gcm_pclmulqdq_impl.is_supported()) {
|
if (gcm_pclmulqdq_impl.is_supported()) {
|
||||||
memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
|
memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
|
||||||
|
|
|
@ -52,7 +52,7 @@ gcm_pclmulqdq_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
|
||||||
static boolean_t
|
static boolean_t
|
||||||
gcm_pclmulqdq_will_work(void)
|
gcm_pclmulqdq_will_work(void)
|
||||||
{
|
{
|
||||||
return (zfs_pclmulqdq_available());
|
return (kfpu_allowed() && zfs_pclmulqdq_available());
|
||||||
}
|
}
|
||||||
|
|
||||||
const gcm_impl_ops_t gcm_pclmulqdq_impl = {
|
const gcm_impl_ops_t gcm_pclmulqdq_impl = {
|
||||||
|
|
|
@ -201,9 +201,9 @@ extern const aes_impl_ops_t aes_aesni_impl;
|
||||||
void aes_impl_init(void);
|
void aes_impl_init(void);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get selected aes implementation
|
* Returns optimal allowed AES implementation
|
||||||
*/
|
*/
|
||||||
struct aes_impl_ops *aes_impl_get_ops(void);
|
const struct aes_impl_ops *aes_impl_get_ops(void);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -64,9 +64,9 @@ extern const gcm_impl_ops_t gcm_pclmulqdq_impl;
|
||||||
void gcm_impl_init(void);
|
void gcm_impl_init(void);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get selected aes implementation
|
* Returns optimal allowed GCM implementation
|
||||||
*/
|
*/
|
||||||
struct gcm_impl_ops *gcm_impl_get_ops(void);
|
const struct gcm_impl_ops *gcm_impl_get_ops(void);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -206,7 +206,7 @@ aes_mod_init(void)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/* find fastest implementations and set any requested implementations */
|
/* Determine the fastest available implementation. */
|
||||||
aes_impl_init();
|
aes_impl_init();
|
||||||
gcm_impl_init();
|
gcm_impl_init();
|
||||||
|
|
||||||
|
|
|
@ -140,6 +140,7 @@
|
||||||
#include <sys/zio_checksum.h>
|
#include <sys/zio_checksum.h>
|
||||||
#include <sys/zfs_context.h>
|
#include <sys/zfs_context.h>
|
||||||
#include <zfs_fletcher.h>
|
#include <zfs_fletcher.h>
|
||||||
|
#include <linux/simd.h>
|
||||||
|
|
||||||
#define FLETCHER_MIN_SIMD_SIZE 64
|
#define FLETCHER_MIN_SIMD_SIZE 64
|
||||||
|
|
||||||
|
@ -205,21 +206,19 @@ static struct fletcher_4_impl_selector {
|
||||||
const char *fis_name;
|
const char *fis_name;
|
||||||
uint32_t fis_sel;
|
uint32_t fis_sel;
|
||||||
} fletcher_4_impl_selectors[] = {
|
} fletcher_4_impl_selectors[] = {
|
||||||
#if !defined(_KERNEL)
|
|
||||||
{ "cycle", IMPL_CYCLE },
|
{ "cycle", IMPL_CYCLE },
|
||||||
#endif
|
|
||||||
{ "fastest", IMPL_FASTEST },
|
{ "fastest", IMPL_FASTEST },
|
||||||
{ "scalar", IMPL_SCALAR }
|
{ "scalar", IMPL_SCALAR }
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
static kstat_t *fletcher_4_kstat;
|
static kstat_t *fletcher_4_kstat;
|
||||||
#endif
|
|
||||||
|
|
||||||
static struct fletcher_4_kstat {
|
static struct fletcher_4_kstat {
|
||||||
uint64_t native;
|
uint64_t native;
|
||||||
uint64_t byteswap;
|
uint64_t byteswap;
|
||||||
} fletcher_4_stat_data[ARRAY_SIZE(fletcher_4_impls) + 1];
|
} fletcher_4_stat_data[ARRAY_SIZE(fletcher_4_impls) + 1];
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Indicate that benchmark has been completed */
|
/* Indicate that benchmark has been completed */
|
||||||
static boolean_t fletcher_4_initialized = B_FALSE;
|
static boolean_t fletcher_4_initialized = B_FALSE;
|
||||||
|
@ -408,32 +407,36 @@ fletcher_4_impl_set(const char *val)
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the Fletcher 4 operations for checksums. When a SIMD
|
||||||
|
* implementation is not allowed in the current context, then fallback
|
||||||
|
* to the fastest generic implementation.
|
||||||
|
*/
|
||||||
static inline const fletcher_4_ops_t *
|
static inline const fletcher_4_ops_t *
|
||||||
fletcher_4_impl_get(void)
|
fletcher_4_impl_get(void)
|
||||||
{
|
{
|
||||||
fletcher_4_ops_t *ops = NULL;
|
if (!kfpu_allowed())
|
||||||
const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
|
return (&fletcher_4_superscalar4_ops);
|
||||||
|
|
||||||
|
const fletcher_4_ops_t *ops = NULL;
|
||||||
|
uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
|
||||||
|
|
||||||
switch (impl) {
|
switch (impl) {
|
||||||
case IMPL_FASTEST:
|
case IMPL_FASTEST:
|
||||||
ASSERT(fletcher_4_initialized);
|
ASSERT(fletcher_4_initialized);
|
||||||
ops = &fletcher_4_fastest_impl;
|
ops = &fletcher_4_fastest_impl;
|
||||||
break;
|
break;
|
||||||
#if !defined(_KERNEL)
|
case IMPL_CYCLE:
|
||||||
case IMPL_CYCLE: {
|
/* Cycle through supported implementations */
|
||||||
ASSERT(fletcher_4_initialized);
|
ASSERT(fletcher_4_initialized);
|
||||||
ASSERT3U(fletcher_4_supp_impls_cnt, >, 0);
|
ASSERT3U(fletcher_4_supp_impls_cnt, >, 0);
|
||||||
|
|
||||||
static uint32_t cycle_count = 0;
|
static uint32_t cycle_count = 0;
|
||||||
uint32_t idx = (++cycle_count) % fletcher_4_supp_impls_cnt;
|
uint32_t idx = (++cycle_count) % fletcher_4_supp_impls_cnt;
|
||||||
ops = fletcher_4_supp_impls[idx];
|
ops = fletcher_4_supp_impls[idx];
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
#endif
|
|
||||||
default:
|
default:
|
||||||
ASSERT3U(fletcher_4_supp_impls_cnt, >, 0);
|
ASSERT3U(fletcher_4_supp_impls_cnt, >, 0);
|
||||||
ASSERT3U(impl, <, fletcher_4_supp_impls_cnt);
|
ASSERT3U(impl, <, fletcher_4_supp_impls_cnt);
|
||||||
|
|
||||||
ops = fletcher_4_supp_impls[impl];
|
ops = fletcher_4_supp_impls[impl];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -659,6 +662,7 @@ fletcher_4_kstat_addr(kstat_t *ksp, loff_t n)
|
||||||
typedef void fletcher_checksum_func_t(const void *, uint64_t, const void *,
|
typedef void fletcher_checksum_func_t(const void *, uint64_t, const void *,
|
||||||
zio_cksum_t *);
|
zio_cksum_t *);
|
||||||
|
|
||||||
|
#if defined(_KERNEL)
|
||||||
static void
|
static void
|
||||||
fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size)
|
fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size)
|
||||||
{
|
{
|
||||||
|
@ -716,16 +720,18 @@ fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size)
|
||||||
/* restore original selection */
|
/* restore original selection */
|
||||||
atomic_swap_32(&fletcher_4_impl_chosen, sel_save);
|
atomic_swap_32(&fletcher_4_impl_chosen, sel_save);
|
||||||
}
|
}
|
||||||
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
void
|
/*
|
||||||
fletcher_4_init(void)
|
* Initialize and benchmark all supported implementations.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
fletcher_4_benchmark(void)
|
||||||
{
|
{
|
||||||
static const size_t data_size = 1 << SPA_OLD_MAXBLOCKSHIFT; /* 128kiB */
|
|
||||||
fletcher_4_ops_t *curr_impl;
|
fletcher_4_ops_t *curr_impl;
|
||||||
char *databuf;
|
|
||||||
int i, c;
|
int i, c;
|
||||||
|
|
||||||
/* move supported impl into fletcher_4_supp_impls */
|
/* Move supported implementations into fletcher_4_supp_impls */
|
||||||
for (i = 0, c = 0; i < ARRAY_SIZE(fletcher_4_impls); i++) {
|
for (i = 0, c = 0; i < ARRAY_SIZE(fletcher_4_impls); i++) {
|
||||||
curr_impl = (fletcher_4_ops_t *)fletcher_4_impls[i];
|
curr_impl = (fletcher_4_ops_t *)fletcher_4_impls[i];
|
||||||
|
|
||||||
|
@ -735,19 +741,10 @@ fletcher_4_init(void)
|
||||||
membar_producer(); /* complete fletcher_4_supp_impls[] init */
|
membar_producer(); /* complete fletcher_4_supp_impls[] init */
|
||||||
fletcher_4_supp_impls_cnt = c; /* number of supported impl */
|
fletcher_4_supp_impls_cnt = c; /* number of supported impl */
|
||||||
|
|
||||||
#if !defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
/* Skip benchmarking and use last implementation as fastest */
|
static const size_t data_size = 1 << SPA_OLD_MAXBLOCKSHIFT; /* 128kiB */
|
||||||
memcpy(&fletcher_4_fastest_impl,
|
char *databuf = vmem_alloc(data_size, KM_SLEEP);
|
||||||
fletcher_4_supp_impls[fletcher_4_supp_impls_cnt-1],
|
|
||||||
sizeof (fletcher_4_fastest_impl));
|
|
||||||
fletcher_4_fastest_impl.name = "fastest";
|
|
||||||
membar_producer();
|
|
||||||
|
|
||||||
fletcher_4_initialized = B_TRUE;
|
|
||||||
return;
|
|
||||||
#endif
|
|
||||||
/* Benchmark all supported implementations */
|
|
||||||
databuf = vmem_alloc(data_size, KM_SLEEP);
|
|
||||||
for (i = 0; i < data_size / sizeof (uint64_t); i++)
|
for (i = 0; i < data_size / sizeof (uint64_t); i++)
|
||||||
((uint64_t *)databuf)[i] = (uintptr_t)(databuf+i); /* warm-up */
|
((uint64_t *)databuf)[i] = (uintptr_t)(databuf+i); /* warm-up */
|
||||||
|
|
||||||
|
@ -755,9 +752,28 @@ fletcher_4_init(void)
|
||||||
fletcher_4_benchmark_impl(B_TRUE, databuf, data_size);
|
fletcher_4_benchmark_impl(B_TRUE, databuf, data_size);
|
||||||
|
|
||||||
vmem_free(databuf, data_size);
|
vmem_free(databuf, data_size);
|
||||||
|
#else
|
||||||
|
/*
|
||||||
|
* Skip the benchmark in user space to avoid impacting libzpool
|
||||||
|
* consumers (zdb, zhack, zinject, ztest). The last implementation
|
||||||
|
* is assumed to be the fastest and used by default.
|
||||||
|
*/
|
||||||
|
memcpy(&fletcher_4_fastest_impl,
|
||||||
|
fletcher_4_supp_impls[fletcher_4_supp_impls_cnt - 1],
|
||||||
|
sizeof (fletcher_4_fastest_impl));
|
||||||
|
fletcher_4_fastest_impl.name = "fastest";
|
||||||
|
membar_producer();
|
||||||
|
#endif /* _KERNEL */
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
fletcher_4_init(void)
|
||||||
|
{
|
||||||
|
/* Determine the fastest available implementation. */
|
||||||
|
fletcher_4_benchmark();
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
/* install kstats for all implementations */
|
/* Install kstats for all implementations */
|
||||||
fletcher_4_kstat = kstat_create("zfs", 0, "fletcher_4_bench", "misc",
|
fletcher_4_kstat = kstat_create("zfs", 0, "fletcher_4_bench", "misc",
|
||||||
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
|
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
|
||||||
if (fletcher_4_kstat != NULL) {
|
if (fletcher_4_kstat != NULL) {
|
||||||
|
|
|
@ -198,7 +198,7 @@ unsigned char SRC __attribute__((vector_size(16)));
|
||||||
|
|
||||||
static boolean_t fletcher_4_aarch64_neon_valid(void)
|
static boolean_t fletcher_4_aarch64_neon_valid(void)
|
||||||
{
|
{
|
||||||
return (B_TRUE);
|
return (kfpu_allowed());
|
||||||
}
|
}
|
||||||
|
|
||||||
const fletcher_4_ops_t fletcher_4_aarch64_neon_ops = {
|
const fletcher_4_ops_t fletcher_4_aarch64_neon_ops = {
|
||||||
|
|
|
@ -157,7 +157,7 @@ STACK_FRAME_NON_STANDARD(fletcher_4_avx512f_byteswap);
|
||||||
static boolean_t
|
static boolean_t
|
||||||
fletcher_4_avx512f_valid(void)
|
fletcher_4_avx512f_valid(void)
|
||||||
{
|
{
|
||||||
return (zfs_avx512f_available());
|
return (kfpu_allowed() && zfs_avx512f_available());
|
||||||
}
|
}
|
||||||
|
|
||||||
const fletcher_4_ops_t fletcher_4_avx512f_ops = {
|
const fletcher_4_ops_t fletcher_4_avx512f_ops = {
|
||||||
|
|
|
@ -156,7 +156,7 @@ fletcher_4_avx2_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
|
||||||
|
|
||||||
static boolean_t fletcher_4_avx2_valid(void)
|
static boolean_t fletcher_4_avx2_valid(void)
|
||||||
{
|
{
|
||||||
return (zfs_avx_available() && zfs_avx2_available());
|
return (kfpu_allowed() && zfs_avx_available() && zfs_avx2_available());
|
||||||
}
|
}
|
||||||
|
|
||||||
const fletcher_4_ops_t fletcher_4_avx2_ops = {
|
const fletcher_4_ops_t fletcher_4_avx2_ops = {
|
||||||
|
|
|
@ -157,7 +157,7 @@ fletcher_4_sse2_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
|
||||||
|
|
||||||
static boolean_t fletcher_4_sse2_valid(void)
|
static boolean_t fletcher_4_sse2_valid(void)
|
||||||
{
|
{
|
||||||
return (zfs_sse2_available());
|
return (kfpu_allowed() && zfs_sse2_available());
|
||||||
}
|
}
|
||||||
|
|
||||||
const fletcher_4_ops_t fletcher_4_sse2_ops = {
|
const fletcher_4_ops_t fletcher_4_sse2_ops = {
|
||||||
|
@ -214,7 +214,8 @@ fletcher_4_ssse3_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
|
||||||
|
|
||||||
static boolean_t fletcher_4_ssse3_valid(void)
|
static boolean_t fletcher_4_ssse3_valid(void)
|
||||||
{
|
{
|
||||||
return (zfs_sse2_available() && zfs_ssse3_available());
|
return (kfpu_allowed() && zfs_sse2_available() &&
|
||||||
|
zfs_ssse3_available());
|
||||||
}
|
}
|
||||||
|
|
||||||
const fletcher_4_ops_t fletcher_4_ssse3_ops = {
|
const fletcher_4_ops_t fletcher_4_ssse3_ops = {
|
||||||
|
|
|
@ -853,10 +853,23 @@ zfs_prop_align_right(zfs_prop_t prop)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
|
|
||||||
|
#include <linux/simd.h>
|
||||||
|
|
||||||
|
#if defined(HAVE_KERNEL_FPU_INTERNAL)
|
||||||
|
union fpregs_state **zfs_kfpu_fpregs;
|
||||||
|
EXPORT_SYMBOL(zfs_kfpu_fpregs);
|
||||||
|
#endif /* HAVE_KERNEL_FPU_INTERNAL */
|
||||||
|
|
||||||
static int __init
|
static int __init
|
||||||
zcommon_init(void)
|
zcommon_init(void)
|
||||||
{
|
{
|
||||||
|
int error = kfpu_init();
|
||||||
|
if (error)
|
||||||
|
return (error);
|
||||||
|
|
||||||
fletcher_4_init();
|
fletcher_4_init();
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -864,6 +877,7 @@ static void __exit
|
||||||
zcommon_fini(void)
|
zcommon_fini(void)
|
||||||
{
|
{
|
||||||
fletcher_4_fini();
|
fletcher_4_fini();
|
||||||
|
kfpu_fini();
|
||||||
}
|
}
|
||||||
|
|
||||||
module_init(zcommon_init);
|
module_init(zcommon_init);
|
||||||
|
|
|
@ -27,9 +27,9 @@
|
||||||
#include <sys/zio.h>
|
#include <sys/zio.h>
|
||||||
#include <sys/debug.h>
|
#include <sys/debug.h>
|
||||||
#include <sys/zfs_debug.h>
|
#include <sys/zfs_debug.h>
|
||||||
|
|
||||||
#include <sys/vdev_raidz.h>
|
#include <sys/vdev_raidz.h>
|
||||||
#include <sys/vdev_raidz_impl.h>
|
#include <sys/vdev_raidz_impl.h>
|
||||||
|
#include <linux/simd.h>
|
||||||
|
|
||||||
extern boolean_t raidz_will_scalar_work(void);
|
extern boolean_t raidz_will_scalar_work(void);
|
||||||
|
|
||||||
|
@ -87,6 +87,7 @@ static uint32_t user_sel_impl = IMPL_FASTEST;
|
||||||
static size_t raidz_supp_impl_cnt = 0;
|
static size_t raidz_supp_impl_cnt = 0;
|
||||||
static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)];
|
static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)];
|
||||||
|
|
||||||
|
#if defined(_KERNEL)
|
||||||
/*
|
/*
|
||||||
* kstats values for supported implementations
|
* kstats values for supported implementations
|
||||||
* Values represent per disk throughput of 8 disk+parity raidz vdev [B/s]
|
* Values represent per disk throughput of 8 disk+parity raidz vdev [B/s]
|
||||||
|
@ -95,14 +96,19 @@ static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1];
|
||||||
|
|
||||||
/* kstat for benchmarked implementations */
|
/* kstat for benchmarked implementations */
|
||||||
static kstat_t *raidz_math_kstat = NULL;
|
static kstat_t *raidz_math_kstat = NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Selects the raidz operation for raidz_map
|
* Returns the RAIDZ operations for raidz_map() parity calculations. When
|
||||||
* If rm_ops is set to NULL original raidz implementation will be used
|
* a SIMD implementation is not allowed in the current context, then fallback
|
||||||
|
* to the fastest generic implementation.
|
||||||
*/
|
*/
|
||||||
raidz_impl_ops_t *
|
const raidz_impl_ops_t *
|
||||||
vdev_raidz_math_get_ops()
|
vdev_raidz_math_get_ops(void)
|
||||||
{
|
{
|
||||||
|
if (!kfpu_allowed())
|
||||||
|
return (&vdev_raidz_scalar_impl);
|
||||||
|
|
||||||
raidz_impl_ops_t *ops = NULL;
|
raidz_impl_ops_t *ops = NULL;
|
||||||
const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
|
const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
|
||||||
|
|
||||||
|
@ -111,18 +117,14 @@ vdev_raidz_math_get_ops()
|
||||||
ASSERT(raidz_math_initialized);
|
ASSERT(raidz_math_initialized);
|
||||||
ops = &vdev_raidz_fastest_impl;
|
ops = &vdev_raidz_fastest_impl;
|
||||||
break;
|
break;
|
||||||
#if !defined(_KERNEL)
|
|
||||||
case IMPL_CYCLE:
|
case IMPL_CYCLE:
|
||||||
{
|
/* Cycle through all supported implementations */
|
||||||
ASSERT(raidz_math_initialized);
|
ASSERT(raidz_math_initialized);
|
||||||
ASSERT3U(raidz_supp_impl_cnt, >, 0);
|
ASSERT3U(raidz_supp_impl_cnt, >, 0);
|
||||||
/* Cycle through all supported implementations */
|
|
||||||
static size_t cycle_impl_idx = 0;
|
static size_t cycle_impl_idx = 0;
|
||||||
size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt;
|
size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt;
|
||||||
ops = raidz_supp_impl[idx];
|
ops = raidz_supp_impl[idx];
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
#endif
|
|
||||||
case IMPL_ORIGINAL:
|
case IMPL_ORIGINAL:
|
||||||
ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl;
|
ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl;
|
||||||
break;
|
break;
|
||||||
|
@ -273,6 +275,8 @@ const char *raidz_rec_name[] = {
|
||||||
"rec_pq", "rec_pr", "rec_qr", "rec_pqr"
|
"rec_pq", "rec_pr", "rec_qr", "rec_pqr"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(_KERNEL)
|
||||||
|
|
||||||
#define RAIDZ_KSTAT_LINE_LEN (17 + 10*12 + 1)
|
#define RAIDZ_KSTAT_LINE_LEN (17 + 10*12 + 1)
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -435,21 +439,21 @@ benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void
|
/*
|
||||||
vdev_raidz_math_init(void)
|
* Initialize and benchmark all supported implementations.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
benchmark_raidz(void)
|
||||||
{
|
{
|
||||||
raidz_impl_ops_t *curr_impl;
|
raidz_impl_ops_t *curr_impl;
|
||||||
zio_t *bench_zio = NULL;
|
int i, c;
|
||||||
raidz_map_t *bench_rm = NULL;
|
|
||||||
uint64_t bench_parity;
|
|
||||||
int i, c, fn;
|
|
||||||
|
|
||||||
/* move supported impl into raidz_supp_impl */
|
/* Move supported impl into raidz_supp_impl */
|
||||||
for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
|
for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
|
||||||
curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i];
|
curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i];
|
||||||
|
|
||||||
/* initialize impl */
|
|
||||||
if (curr_impl->init)
|
if (curr_impl->init)
|
||||||
curr_impl->init();
|
curr_impl->init();
|
||||||
|
|
||||||
|
@ -459,18 +463,10 @@ vdev_raidz_math_init(void)
|
||||||
membar_producer(); /* complete raidz_supp_impl[] init */
|
membar_producer(); /* complete raidz_supp_impl[] init */
|
||||||
raidz_supp_impl_cnt = c; /* number of supported impl */
|
raidz_supp_impl_cnt = c; /* number of supported impl */
|
||||||
|
|
||||||
#if !defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
/* Skip benchmarking and use last implementation as fastest */
|
zio_t *bench_zio = NULL;
|
||||||
memcpy(&vdev_raidz_fastest_impl, raidz_supp_impl[raidz_supp_impl_cnt-1],
|
raidz_map_t *bench_rm = NULL;
|
||||||
sizeof (vdev_raidz_fastest_impl));
|
uint64_t bench_parity;
|
||||||
strcpy(vdev_raidz_fastest_impl.name, "fastest");
|
|
||||||
|
|
||||||
raidz_math_initialized = B_TRUE;
|
|
||||||
|
|
||||||
/* Use 'cycle' math selection method for userspace */
|
|
||||||
VERIFY0(vdev_raidz_impl_set("cycle"));
|
|
||||||
return;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Fake a zio and run the benchmark on a warmed up buffer */
|
/* Fake a zio and run the benchmark on a warmed up buffer */
|
||||||
bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
|
bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
|
||||||
|
@ -480,7 +476,7 @@ vdev_raidz_math_init(void)
|
||||||
memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE);
|
memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE);
|
||||||
|
|
||||||
/* Benchmark parity generation methods */
|
/* Benchmark parity generation methods */
|
||||||
for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
|
for (int fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
|
||||||
bench_parity = fn + 1;
|
bench_parity = fn + 1;
|
||||||
/* New raidz_map is needed for each generate_p/q/r */
|
/* New raidz_map is needed for each generate_p/q/r */
|
||||||
bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
|
bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
|
||||||
|
@ -495,7 +491,7 @@ vdev_raidz_math_init(void)
|
||||||
bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
|
bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
|
||||||
BENCH_COLS, PARITY_PQR);
|
BENCH_COLS, PARITY_PQR);
|
||||||
|
|
||||||
for (fn = 0; fn < RAIDZ_REC_NUM; fn++)
|
for (int fn = 0; fn < RAIDZ_REC_NUM; fn++)
|
||||||
benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
|
benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
|
||||||
|
|
||||||
vdev_raidz_map_free(bench_rm);
|
vdev_raidz_map_free(bench_rm);
|
||||||
|
@ -503,11 +499,29 @@ vdev_raidz_math_init(void)
|
||||||
/* cleanup the bench zio */
|
/* cleanup the bench zio */
|
||||||
abd_free(bench_zio->io_abd);
|
abd_free(bench_zio->io_abd);
|
||||||
kmem_free(bench_zio, sizeof (zio_t));
|
kmem_free(bench_zio, sizeof (zio_t));
|
||||||
|
#else
|
||||||
|
/*
|
||||||
|
* Skip the benchmark in user space to avoid impacting libzpool
|
||||||
|
* consumers (zdb, zhack, zinject, ztest). The last implementation
|
||||||
|
* is assumed to be the fastest and used by default.
|
||||||
|
*/
|
||||||
|
memcpy(&vdev_raidz_fastest_impl,
|
||||||
|
raidz_supp_impl[raidz_supp_impl_cnt - 1],
|
||||||
|
sizeof (vdev_raidz_fastest_impl));
|
||||||
|
strcpy(vdev_raidz_fastest_impl.name, "fastest");
|
||||||
|
#endif /* _KERNEL */
|
||||||
|
}
|
||||||
|
|
||||||
/* install kstats for all impl */
|
void
|
||||||
|
vdev_raidz_math_init(void)
|
||||||
|
{
|
||||||
|
/* Determine the fastest available implementation. */
|
||||||
|
benchmark_raidz();
|
||||||
|
|
||||||
|
#if defined(_KERNEL)
|
||||||
|
/* Install kstats for all implementations */
|
||||||
raidz_math_kstat = kstat_create("zfs", 0, "vdev_raidz_bench", "misc",
|
raidz_math_kstat = kstat_create("zfs", 0, "vdev_raidz_bench", "misc",
|
||||||
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
|
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
|
||||||
|
|
||||||
if (raidz_math_kstat != NULL) {
|
if (raidz_math_kstat != NULL) {
|
||||||
raidz_math_kstat->ks_data = NULL;
|
raidz_math_kstat->ks_data = NULL;
|
||||||
raidz_math_kstat->ks_ndata = UINT32_MAX;
|
raidz_math_kstat->ks_ndata = UINT32_MAX;
|
||||||
|
@ -517,6 +531,7 @@ vdev_raidz_math_init(void)
|
||||||
raidz_math_kstat_addr);
|
raidz_math_kstat_addr);
|
||||||
kstat_install(raidz_math_kstat);
|
kstat_install(raidz_math_kstat);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Finish initialization */
|
/* Finish initialization */
|
||||||
atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl);
|
atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl);
|
||||||
|
@ -527,15 +542,15 @@ void
|
||||||
vdev_raidz_math_fini(void)
|
vdev_raidz_math_fini(void)
|
||||||
{
|
{
|
||||||
raidz_impl_ops_t const *curr_impl;
|
raidz_impl_ops_t const *curr_impl;
|
||||||
int i;
|
|
||||||
|
|
||||||
|
#if defined(_KERNEL)
|
||||||
if (raidz_math_kstat != NULL) {
|
if (raidz_math_kstat != NULL) {
|
||||||
kstat_delete(raidz_math_kstat);
|
kstat_delete(raidz_math_kstat);
|
||||||
raidz_math_kstat = NULL;
|
raidz_math_kstat = NULL;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* fini impl */
|
for (int i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
|
||||||
for (i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
|
|
||||||
curr_impl = raidz_all_maths[i];
|
curr_impl = raidz_all_maths[i];
|
||||||
if (curr_impl->fini)
|
if (curr_impl->fini)
|
||||||
curr_impl->fini();
|
curr_impl->fini();
|
||||||
|
@ -546,9 +561,7 @@ static const struct {
|
||||||
char *name;
|
char *name;
|
||||||
uint32_t sel;
|
uint32_t sel;
|
||||||
} math_impl_opts[] = {
|
} math_impl_opts[] = {
|
||||||
#if !defined(_KERNEL)
|
|
||||||
{ "cycle", IMPL_CYCLE },
|
{ "cycle", IMPL_CYCLE },
|
||||||
#endif
|
|
||||||
{ "fastest", IMPL_FASTEST },
|
{ "fastest", IMPL_FASTEST },
|
||||||
{ "original", IMPL_ORIGINAL },
|
{ "original", IMPL_ORIGINAL },
|
||||||
{ "scalar", IMPL_SCALAR }
|
{ "scalar", IMPL_SCALAR }
|
||||||
|
|
|
@ -207,7 +207,7 @@ DEFINE_REC_METHODS(aarch64_neon);
|
||||||
static boolean_t
|
static boolean_t
|
||||||
raidz_will_aarch64_neon_work(void)
|
raidz_will_aarch64_neon_work(void)
|
||||||
{
|
{
|
||||||
return (B_TRUE); // __arch64__ requires NEON
|
return (kfpu_allowed());
|
||||||
}
|
}
|
||||||
|
|
||||||
const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl = {
|
const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl = {
|
||||||
|
|
|
@ -217,7 +217,7 @@ DEFINE_REC_METHODS(aarch64_neonx2);
|
||||||
static boolean_t
|
static boolean_t
|
||||||
raidz_will_aarch64_neonx2_work(void)
|
raidz_will_aarch64_neonx2_work(void)
|
||||||
{
|
{
|
||||||
return (B_TRUE); // __arch64__ requires NEON
|
return (kfpu_allowed());
|
||||||
}
|
}
|
||||||
|
|
||||||
const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl = {
|
const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl = {
|
||||||
|
|
|
@ -396,7 +396,7 @@ DEFINE_REC_METHODS(avx2);
|
||||||
static boolean_t
|
static boolean_t
|
||||||
raidz_will_avx2_work(void)
|
raidz_will_avx2_work(void)
|
||||||
{
|
{
|
||||||
return (zfs_avx_available() && zfs_avx2_available());
|
return (kfpu_allowed() && zfs_avx_available() && zfs_avx2_available());
|
||||||
}
|
}
|
||||||
|
|
||||||
const raidz_impl_ops_t vdev_raidz_avx2_impl = {
|
const raidz_impl_ops_t vdev_raidz_avx2_impl = {
|
||||||
|
|
|
@ -393,9 +393,8 @@ DEFINE_REC_METHODS(avx512bw);
|
||||||
static boolean_t
|
static boolean_t
|
||||||
raidz_will_avx512bw_work(void)
|
raidz_will_avx512bw_work(void)
|
||||||
{
|
{
|
||||||
return (zfs_avx_available() &&
|
return (kfpu_allowed() && zfs_avx_available() &&
|
||||||
zfs_avx512f_available() &&
|
zfs_avx512f_available() && zfs_avx512bw_available());
|
||||||
zfs_avx512bw_available());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const raidz_impl_ops_t vdev_raidz_avx512bw_impl = {
|
const raidz_impl_ops_t vdev_raidz_avx512bw_impl = {
|
||||||
|
|
|
@ -470,9 +470,8 @@ DEFINE_REC_METHODS(avx512f);
|
||||||
static boolean_t
|
static boolean_t
|
||||||
raidz_will_avx512f_work(void)
|
raidz_will_avx512f_work(void)
|
||||||
{
|
{
|
||||||
return (zfs_avx_available() &&
|
return (kfpu_allowed() && zfs_avx_available() &&
|
||||||
zfs_avx2_available() &&
|
zfs_avx2_available() && zfs_avx512f_available());
|
||||||
zfs_avx512f_available());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const raidz_impl_ops_t vdev_raidz_avx512f_impl = {
|
const raidz_impl_ops_t vdev_raidz_avx512f_impl = {
|
||||||
|
|
|
@ -607,7 +607,7 @@ DEFINE_REC_METHODS(sse2);
|
||||||
static boolean_t
|
static boolean_t
|
||||||
raidz_will_sse2_work(void)
|
raidz_will_sse2_work(void)
|
||||||
{
|
{
|
||||||
return (zfs_sse_available() && zfs_sse2_available());
|
return (kfpu_allowed() && zfs_sse_available() && zfs_sse2_available());
|
||||||
}
|
}
|
||||||
|
|
||||||
const raidz_impl_ops_t vdev_raidz_sse2_impl = {
|
const raidz_impl_ops_t vdev_raidz_sse2_impl = {
|
||||||
|
|
|
@ -399,8 +399,8 @@ DEFINE_REC_METHODS(ssse3);
|
||||||
static boolean_t
|
static boolean_t
|
||||||
raidz_will_ssse3_work(void)
|
raidz_will_ssse3_work(void)
|
||||||
{
|
{
|
||||||
return (zfs_sse_available() && zfs_sse2_available() &&
|
return (kfpu_allowed() && zfs_sse_available() &&
|
||||||
zfs_ssse3_available());
|
zfs_sse2_available() && zfs_ssse3_available());
|
||||||
}
|
}
|
||||||
|
|
||||||
const raidz_impl_ops_t vdev_raidz_ssse3_impl = {
|
const raidz_impl_ops_t vdev_raidz_ssse3_impl = {
|
||||||
|
|
|
@ -549,12 +549,12 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
|
||||||
uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv,
|
uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv,
|
||||||
uint8_t *mac, zio_crypt_key_t *key)
|
uint8_t *mac, zio_crypt_key_t *key)
|
||||||
{
|
{
|
||||||
int ret;
|
|
||||||
crypto_mechanism_t mech;
|
crypto_mechanism_t mech;
|
||||||
uio_t puio, cuio;
|
uio_t puio, cuio;
|
||||||
uint64_t aad[3];
|
uint64_t aad[3];
|
||||||
iovec_t plain_iovecs[2], cipher_iovecs[3];
|
iovec_t plain_iovecs[2], cipher_iovecs[3];
|
||||||
uint_t enc_len, keydata_len, aad_len;
|
uint_t enc_len, keydata_len, aad_len;
|
||||||
|
int ret;
|
||||||
|
|
||||||
ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
|
ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
|
||||||
ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
|
ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
|
||||||
|
|
Loading…
Reference in New Issue