Linux x86 SIMD: factor out unneeded kernel dependencies
Cleanup the kernel SIMD code by removing kernel dependencies. - Replace XSTATE_XSAVE with our own XSAVE implementation for all kernels not exporting kernel_fpu{begin,end}(), see #13059 - Replace union fpregs_state by a uint8_t * buffer and get the size of the buffer from the hardware via the CPUID instruction - Replace kernels xgetbv() by our own implementation which was already there for userspace. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Attila Fülöp <attila@fueloep.org> Closes #13102
This commit is contained in:
parent
a86e089415
commit
ce7a5dbf4b
|
@ -2,12 +2,6 @@ dnl #
|
||||||
dnl # Handle differences in kernel FPU code.
|
dnl # Handle differences in kernel FPU code.
|
||||||
dnl #
|
dnl #
|
||||||
dnl # Kernel
|
dnl # Kernel
|
||||||
dnl # 5.16: XCR code put into asm/fpu/xcr.h
|
|
||||||
dnl # HAVE_KERNEL_FPU_XCR_HEADER
|
|
||||||
dnl #
|
|
||||||
dnl # XSTATE_XSAVE and XSTATE_XRESTORE aren't accessible any more
|
|
||||||
dnl # HAVE_KERNEL_FPU_XSAVE_INTERNAL
|
|
||||||
dnl #
|
|
||||||
dnl # 5.11: kernel_fpu_begin() is an inlined function now, so don't check
|
dnl # 5.11: kernel_fpu_begin() is an inlined function now, so don't check
|
||||||
dnl # for it inside the kernel symbols.
|
dnl # for it inside the kernel symbols.
|
||||||
dnl #
|
dnl #
|
||||||
|
@ -34,20 +28,8 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU_HEADER], [
|
||||||
AC_DEFINE(HAVE_KERNEL_FPU_API_HEADER, 1,
|
AC_DEFINE(HAVE_KERNEL_FPU_API_HEADER, 1,
|
||||||
[kernel has asm/fpu/api.h])
|
[kernel has asm/fpu/api.h])
|
||||||
AC_MSG_RESULT(asm/fpu/api.h)
|
AC_MSG_RESULT(asm/fpu/api.h)
|
||||||
AC_MSG_CHECKING([whether fpu/xcr header is available])
|
|
||||||
ZFS_LINUX_TRY_COMPILE([
|
|
||||||
#include <linux/module.h>
|
|
||||||
#include <asm/fpu/xcr.h>
|
|
||||||
],[
|
],[
|
||||||
],[
|
AC_MSG_RESULT(i387.h)
|
||||||
AC_DEFINE(HAVE_KERNEL_FPU_XCR_HEADER, 1,
|
|
||||||
[kernel has asm/fpu/xcr.h])
|
|
||||||
AC_MSG_RESULT(asm/fpu/xcr.h)
|
|
||||||
],[
|
|
||||||
AC_MSG_RESULT(no asm/fpu/xcr.h)
|
|
||||||
])
|
|
||||||
],[
|
|
||||||
AC_MSG_RESULT(i387.h & xcr.h)
|
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
|
|
||||||
|
@ -56,9 +38,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#ifdef HAVE_KERNEL_FPU_API_HEADER
|
#ifdef HAVE_KERNEL_FPU_API_HEADER
|
||||||
#include <asm/fpu/api.h>
|
#include <asm/fpu/api.h>
|
||||||
|
#include <asm/fpu/internal.h>
|
||||||
#else
|
#else
|
||||||
#include <asm/i387.h>
|
#include <asm/i387.h>
|
||||||
#include <asm/xcr.h>
|
|
||||||
#endif
|
#endif
|
||||||
], [
|
], [
|
||||||
kernel_fpu_begin();
|
kernel_fpu_begin();
|
||||||
|
@ -69,80 +51,15 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#ifdef HAVE_KERNEL_FPU_API_HEADER
|
#ifdef HAVE_KERNEL_FPU_API_HEADER
|
||||||
#include <asm/fpu/api.h>
|
#include <asm/fpu/api.h>
|
||||||
|
#include <asm/fpu/internal.h>
|
||||||
#else
|
#else
|
||||||
#include <asm/i387.h>
|
#include <asm/i387.h>
|
||||||
#include <asm/xcr.h>
|
|
||||||
#endif
|
#endif
|
||||||
], [
|
], [
|
||||||
__kernel_fpu_begin();
|
__kernel_fpu_begin();
|
||||||
__kernel_fpu_end();
|
__kernel_fpu_end();
|
||||||
], [], [ZFS_META_LICENSE])
|
], [], [ZFS_META_LICENSE])
|
||||||
|
|
||||||
ZFS_LINUX_TEST_SRC([fpu_internal], [
|
|
||||||
#if defined(__x86_64) || defined(__x86_64__) || \
|
|
||||||
defined(__i386) || defined(__i386__)
|
|
||||||
#if !defined(__x86)
|
|
||||||
#define __x86
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__x86)
|
|
||||||
#error Unsupported architecture
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <linux/types.h>
|
|
||||||
#ifdef HAVE_KERNEL_FPU_API_HEADER
|
|
||||||
#include <asm/fpu/api.h>
|
|
||||||
#include <asm/fpu/internal.h>
|
|
||||||
#else
|
|
||||||
#include <asm/i387.h>
|
|
||||||
#include <asm/xcr.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(XSTATE_XSAVE)
|
|
||||||
#error XSTATE_XSAVE not defined
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(XSTATE_XRESTORE)
|
|
||||||
#error XSTATE_XRESTORE not defined
|
|
||||||
#endif
|
|
||||||
],[
|
|
||||||
struct fpu *fpu = ¤t->thread.fpu;
|
|
||||||
union fpregs_state *st = &fpu->state;
|
|
||||||
struct fregs_state *fr __attribute__ ((unused)) = &st->fsave;
|
|
||||||
struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave;
|
|
||||||
struct xregs_state *xr __attribute__ ((unused)) = &st->xsave;
|
|
||||||
])
|
|
||||||
|
|
||||||
ZFS_LINUX_TEST_SRC([fpu_xsave_internal], [
|
|
||||||
#include <linux/sched.h>
|
|
||||||
#if defined(__x86_64) || defined(__x86_64__) || \
|
|
||||||
defined(__i386) || defined(__i386__)
|
|
||||||
#if !defined(__x86)
|
|
||||||
#define __x86
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__x86)
|
|
||||||
#error Unsupported architecture
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <linux/types.h>
|
|
||||||
#ifdef HAVE_KERNEL_FPU_API_HEADER
|
|
||||||
#include <asm/fpu/api.h>
|
|
||||||
#include <asm/fpu/internal.h>
|
|
||||||
#else
|
|
||||||
#include <asm/i387.h>
|
|
||||||
#include <asm/xcr.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
],[
|
|
||||||
struct fpu *fpu = ¤t->thread.fpu;
|
|
||||||
union fpregs_state *st = &fpu->fpstate->regs;
|
|
||||||
struct fregs_state *fr __attribute__ ((unused)) = &st->fsave;
|
|
||||||
struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave;
|
|
||||||
struct xregs_state *xr __attribute__ ((unused)) = &st->xsave;
|
|
||||||
])
|
|
||||||
])
|
])
|
||||||
|
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_FPU], [
|
AC_DEFUN([ZFS_AC_KERNEL_FPU], [
|
||||||
|
@ -169,19 +86,9 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [
|
||||||
AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1,
|
AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1,
|
||||||
[kernel exports FPU functions])
|
[kernel exports FPU functions])
|
||||||
],[
|
],[
|
||||||
ZFS_LINUX_TEST_RESULT([fpu_internal], [
|
|
||||||
AC_MSG_RESULT(internal)
|
AC_MSG_RESULT(internal)
|
||||||
AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1,
|
AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1,
|
||||||
[kernel fpu internal])
|
[kernel fpu internal])
|
||||||
],[
|
|
||||||
ZFS_LINUX_TEST_RESULT([fpu_xsave_internal], [
|
|
||||||
AC_MSG_RESULT(internal with internal XSAVE)
|
|
||||||
AC_DEFINE(HAVE_KERNEL_FPU_XSAVE_INTERNAL, 1,
|
|
||||||
[kernel fpu and XSAVE internal])
|
|
||||||
],[
|
|
||||||
AC_MSG_RESULT(unavailable)
|
|
||||||
])
|
|
||||||
])
|
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
|
|
|
@ -85,23 +85,19 @@
|
||||||
#undef CONFIG_X86_DEBUG_FPU
|
#undef CONFIG_X86_DEBUG_FPU
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAVE_KERNEL_FPU_API_HEADER)
|
|
||||||
#include <asm/fpu/api.h>
|
|
||||||
#include <asm/fpu/internal.h>
|
|
||||||
#if defined(HAVE_KERNEL_FPU_XCR_HEADER)
|
|
||||||
#include <asm/fpu/xcr.h>
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#include <asm/i387.h>
|
|
||||||
#include <asm/xcr.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following cases are for kernels which export either the
|
* The following cases are for kernels which export either the
|
||||||
* kernel_fpu_* or __kernel_fpu_* functions.
|
* kernel_fpu_* or __kernel_fpu_* functions.
|
||||||
*/
|
*/
|
||||||
#if defined(KERNEL_EXPORTS_X86_FPU)
|
#if defined(KERNEL_EXPORTS_X86_FPU)
|
||||||
|
|
||||||
|
#if defined(HAVE_KERNEL_FPU_API_HEADER)
|
||||||
|
#include <asm/fpu/api.h>
|
||||||
|
#include <asm/fpu/internal.h>
|
||||||
|
#else
|
||||||
|
#include <asm/i387.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#define kfpu_allowed() 1
|
#define kfpu_allowed() 1
|
||||||
#define kfpu_init() 0
|
#define kfpu_init() 0
|
||||||
#define kfpu_fini() ((void) 0)
|
#define kfpu_fini() ((void) 0)
|
||||||
|
@ -136,29 +132,74 @@
|
||||||
* When the kernel_fpu_* symbols are unavailable then provide our own
|
* When the kernel_fpu_* symbols are unavailable then provide our own
|
||||||
* versions which allow the FPU to be safely used.
|
* versions which allow the FPU to be safely used.
|
||||||
*/
|
*/
|
||||||
#if defined(HAVE_KERNEL_FPU_INTERNAL) || defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL)
|
|
||||||
|
|
||||||
#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL)
|
|
||||||
/*
|
|
||||||
* Some sanity checks.
|
|
||||||
* HAVE_KERNEL_FPU_INTERNAL and HAVE_KERNEL_FPU_XSAVE_INTERNAL are exclusive.
|
|
||||||
*/
|
|
||||||
#if defined(HAVE_KERNEL_FPU_INTERNAL)
|
#if defined(HAVE_KERNEL_FPU_INTERNAL)
|
||||||
#error "HAVE_KERNEL_FPU_INTERNAL and HAVE_KERNEL_FPU_XSAVE_INTERNAL defined"
|
|
||||||
#endif
|
|
||||||
/*
|
/*
|
||||||
* For kernels >= 5.16 we have to use inline assembly with the XSAVE{,OPT,S}
|
* For kernels not exporting *kfpu_{begin,end} we have to use inline assembly
|
||||||
* instructions, so we need the toolchain to support at least XSAVE.
|
* with the XSAVE{,OPT,S} instructions, so we need the toolchain to support at
|
||||||
|
* least XSAVE.
|
||||||
*/
|
*/
|
||||||
#if !defined(HAVE_XSAVE)
|
#if !defined(HAVE_XSAVE)
|
||||||
#error "Toolchain needs to support the XSAVE assembler instruction"
|
#error "Toolchain needs to support the XSAVE assembler instruction"
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
|
||||||
extern union fpregs_state **zfs_kfpu_fpregs;
|
extern uint8_t **zfs_kfpu_fpregs;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return the size in bytes required by the XSAVE instruction for an
|
||||||
|
* XSAVE area containing all the user state components supported by this CPU.
|
||||||
|
* See: Intel 64 and IA-32 Architectures Software Developer’s Manual.
|
||||||
|
* Dec. 2021. Vol. 2A p. 3-222.
|
||||||
|
*/
|
||||||
|
static inline uint32_t
|
||||||
|
get_xsave_area_size(void)
|
||||||
|
{
|
||||||
|
if (!boot_cpu_has(X86_FEATURE_OSXSAVE)) {
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Call CPUID with leaf 13 and subleaf 0. The size is in ecx.
|
||||||
|
* We don't need to check for cpuid_max here, since if this CPU has
|
||||||
|
* OSXSAVE set, it has leaf 13 (0x0D) as well.
|
||||||
|
*/
|
||||||
|
uint32_t eax, ebx, ecx, edx;
|
||||||
|
|
||||||
|
eax = 13U;
|
||||||
|
ecx = 0U;
|
||||||
|
__asm__ __volatile__("cpuid"
|
||||||
|
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
|
||||||
|
: "a" (eax), "c" (ecx));
|
||||||
|
|
||||||
|
return (ecx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return the allocation order of the maximum buffer size required to save the
|
||||||
|
* FPU state on this architecture. The value returned is the same as Linux'
|
||||||
|
* get_order() function would return (i.e. 2^order = nr. of pages required).
|
||||||
|
* Currently this will always return 0 since the save area is below 4k even for
|
||||||
|
* a full fledged AVX-512 implementation.
|
||||||
|
*/
|
||||||
|
static inline int
|
||||||
|
get_fpuregs_save_area_order(void)
|
||||||
|
{
|
||||||
|
size_t area_size = (size_t)get_xsave_area_size();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we are dealing with a CPU not supporting XSAVE,
|
||||||
|
* get_xsave_area_size() will return 0. Thus the maximum memory
|
||||||
|
* required is the FXSAVE area size which is 512 bytes. See: Intel 64
|
||||||
|
* and IA-32 Architectures Software Developer’s Manual. Dec. 2021.
|
||||||
|
* Vol. 2A p. 3-451.
|
||||||
|
*/
|
||||||
|
if (area_size == 0) {
|
||||||
|
area_size = 512;
|
||||||
|
}
|
||||||
|
return (get_order(area_size));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize per-cpu variables to store FPU state.
|
* Initialize per-cpu variables to store FPU state.
|
||||||
|
@ -167,11 +208,11 @@ static inline void
|
||||||
kfpu_fini(void)
|
kfpu_fini(void)
|
||||||
{
|
{
|
||||||
int cpu;
|
int cpu;
|
||||||
|
int order = get_fpuregs_save_area_order();
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
for_each_possible_cpu(cpu) {
|
||||||
if (zfs_kfpu_fpregs[cpu] != NULL) {
|
if (zfs_kfpu_fpregs[cpu] != NULL) {
|
||||||
free_pages((unsigned long)zfs_kfpu_fpregs[cpu],
|
free_pages((unsigned long)zfs_kfpu_fpregs[cpu], order);
|
||||||
get_order(sizeof (union fpregs_state)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -181,8 +222,9 @@ kfpu_fini(void)
|
||||||
static inline int
|
static inline int
|
||||||
kfpu_init(void)
|
kfpu_init(void)
|
||||||
{
|
{
|
||||||
zfs_kfpu_fpregs = kzalloc(num_possible_cpus() *
|
zfs_kfpu_fpregs = kzalloc(num_possible_cpus() * sizeof (uint8_t *),
|
||||||
sizeof (union fpregs_state *), GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
|
|
||||||
if (zfs_kfpu_fpregs == NULL)
|
if (zfs_kfpu_fpregs == NULL)
|
||||||
return (-ENOMEM);
|
return (-ENOMEM);
|
||||||
|
|
||||||
|
@ -191,8 +233,8 @@ kfpu_init(void)
|
||||||
* the target memory. Since kmalloc() provides no alignment
|
* the target memory. Since kmalloc() provides no alignment
|
||||||
* guarantee instead use alloc_pages_node().
|
* guarantee instead use alloc_pages_node().
|
||||||
*/
|
*/
|
||||||
unsigned int order = get_order(sizeof (union fpregs_state));
|
|
||||||
int cpu;
|
int cpu;
|
||||||
|
int order = get_fpuregs_save_area_order();
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
for_each_possible_cpu(cpu) {
|
||||||
struct page *page = alloc_pages_node(cpu_to_node(cpu),
|
struct page *page = alloc_pages_node(cpu_to_node(cpu),
|
||||||
|
@ -209,9 +251,6 @@ kfpu_init(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
#define kfpu_allowed() 1
|
#define kfpu_allowed() 1
|
||||||
#if defined(HAVE_KERNEL_FPU_INTERNAL)
|
|
||||||
#define ex_handler_fprestore ex_handler_default
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* FPU save and restore instructions.
|
* FPU save and restore instructions.
|
||||||
|
@ -226,21 +265,6 @@ kfpu_init(void)
|
||||||
#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \
|
#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \
|
||||||
: : [addr] "m" (rval));
|
: : [addr] "m" (rval));
|
||||||
|
|
||||||
#if defined(HAVE_KERNEL_FPU_INTERNAL)
|
|
||||||
static inline void
|
|
||||||
kfpu_save_xsave(struct xregs_state *addr, uint64_t mask)
|
|
||||||
{
|
|
||||||
uint32_t low, hi;
|
|
||||||
int err;
|
|
||||||
|
|
||||||
low = mask;
|
|
||||||
hi = mask >> 32;
|
|
||||||
XSTATE_XSAVE(addr, low, hi, err);
|
|
||||||
WARN_ON_ONCE(err);
|
|
||||||
}
|
|
||||||
#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
|
|
||||||
|
|
||||||
#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL)
|
|
||||||
#define kfpu_do_xsave(instruction, addr, mask) \
|
#define kfpu_do_xsave(instruction, addr, mask) \
|
||||||
{ \
|
{ \
|
||||||
uint32_t low, hi; \
|
uint32_t low, hi; \
|
||||||
|
@ -252,10 +276,9 @@ kfpu_save_xsave(struct xregs_state *addr, uint64_t mask)
|
||||||
: [dst] "m" (*(addr)), "a" (low), "d" (hi) \
|
: [dst] "m" (*(addr)), "a" (low), "d" (hi) \
|
||||||
: "memory"); \
|
: "memory"); \
|
||||||
}
|
}
|
||||||
#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */
|
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
kfpu_save_fxsr(struct fxregs_state *addr)
|
kfpu_save_fxsr(uint8_t *addr)
|
||||||
{
|
{
|
||||||
if (IS_ENABLED(CONFIG_X86_32))
|
if (IS_ENABLED(CONFIG_X86_32))
|
||||||
kfpu_fxsave(addr);
|
kfpu_fxsave(addr);
|
||||||
|
@ -264,12 +287,11 @@ kfpu_save_fxsr(struct fxregs_state *addr)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
kfpu_save_fsave(struct fregs_state *addr)
|
kfpu_save_fsave(uint8_t *addr)
|
||||||
{
|
{
|
||||||
kfpu_fnsave(addr);
|
kfpu_fnsave(addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAVE_KERNEL_FPU_INTERNAL)
|
|
||||||
static inline void
|
static inline void
|
||||||
kfpu_begin(void)
|
kfpu_begin(void)
|
||||||
{
|
{
|
||||||
|
@ -286,70 +308,28 @@ kfpu_begin(void)
|
||||||
* per-cpu variable, not in the task struct, this allows any user
|
* per-cpu variable, not in the task struct, this allows any user
|
||||||
* FPU state to be correctly preserved and restored.
|
* FPU state to be correctly preserved and restored.
|
||||||
*/
|
*/
|
||||||
union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];
|
uint8_t *state = zfs_kfpu_fpregs[smp_processor_id()];
|
||||||
if (static_cpu_has(X86_FEATURE_XSAVE)) {
|
|
||||||
kfpu_save_xsave(&state->xsave, ~0);
|
|
||||||
} else if (static_cpu_has(X86_FEATURE_FXSR)) {
|
|
||||||
kfpu_save_fxsr(&state->fxsave);
|
|
||||||
} else {
|
|
||||||
kfpu_save_fsave(&state->fsave);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
|
|
||||||
|
|
||||||
#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL)
|
|
||||||
static inline void
|
|
||||||
kfpu_begin(void)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Preemption and interrupts must be disabled for the critical
|
|
||||||
* region where the FPU state is being modified.
|
|
||||||
*/
|
|
||||||
preempt_disable();
|
|
||||||
local_irq_disable();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The current FPU registers need to be preserved by kfpu_begin()
|
|
||||||
* and restored by kfpu_end(). They are stored in a dedicated
|
|
||||||
* per-cpu variable, not in the task struct, this allows any user
|
|
||||||
* FPU state to be correctly preserved and restored.
|
|
||||||
*/
|
|
||||||
union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];
|
|
||||||
#if defined(HAVE_XSAVES)
|
#if defined(HAVE_XSAVES)
|
||||||
if (static_cpu_has(X86_FEATURE_XSAVES)) {
|
if (static_cpu_has(X86_FEATURE_XSAVES)) {
|
||||||
kfpu_do_xsave("xsaves", &state->xsave, ~0);
|
kfpu_do_xsave("xsaves", state, ~0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAVE_XSAVEOPT)
|
#if defined(HAVE_XSAVEOPT)
|
||||||
if (static_cpu_has(X86_FEATURE_XSAVEOPT)) {
|
if (static_cpu_has(X86_FEATURE_XSAVEOPT)) {
|
||||||
kfpu_do_xsave("xsaveopt", &state->xsave, ~0);
|
kfpu_do_xsave("xsaveopt", state, ~0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (static_cpu_has(X86_FEATURE_XSAVE)) {
|
if (static_cpu_has(X86_FEATURE_XSAVE)) {
|
||||||
kfpu_do_xsave("xsave", &state->xsave, ~0);
|
kfpu_do_xsave("xsave", state, ~0);
|
||||||
} else if (static_cpu_has(X86_FEATURE_FXSR)) {
|
} else if (static_cpu_has(X86_FEATURE_FXSR)) {
|
||||||
kfpu_save_fxsr(&state->fxsave);
|
kfpu_save_fxsr(state);
|
||||||
} else {
|
} else {
|
||||||
kfpu_save_fsave(&state->fsave);
|
kfpu_save_fsave(state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */
|
|
||||||
|
|
||||||
#if defined(HAVE_KERNEL_FPU_INTERNAL)
|
|
||||||
static inline void
|
|
||||||
kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask)
|
|
||||||
{
|
|
||||||
uint32_t low, hi;
|
|
||||||
|
|
||||||
low = mask;
|
|
||||||
hi = mask >> 32;
|
|
||||||
XSTATE_XRESTORE(addr, low, hi);
|
|
||||||
}
|
|
||||||
#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
|
|
||||||
|
|
||||||
#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL)
|
|
||||||
#define kfpu_do_xrstor(instruction, addr, mask) \
|
#define kfpu_do_xrstor(instruction, addr, mask) \
|
||||||
{ \
|
{ \
|
||||||
uint32_t low, hi; \
|
uint32_t low, hi; \
|
||||||
|
@ -361,10 +341,9 @@ kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask)
|
||||||
: [src] "m" (*(addr)), "a" (low), "d" (hi) \
|
: [src] "m" (*(addr)), "a" (low), "d" (hi) \
|
||||||
: "memory"); \
|
: "memory"); \
|
||||||
}
|
}
|
||||||
#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */
|
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
kfpu_restore_fxsr(struct fxregs_state *addr)
|
kfpu_restore_fxsr(uint8_t *addr)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* On AuthenticAMD K7 and K8 processors the fxrstor instruction only
|
* On AuthenticAMD K7 and K8 processors the fxrstor instruction only
|
||||||
|
@ -382,67 +361,40 @@ kfpu_restore_fxsr(struct fxregs_state *addr)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
kfpu_restore_fsave(struct fregs_state *addr)
|
kfpu_restore_fsave(uint8_t *addr)
|
||||||
{
|
{
|
||||||
kfpu_frstor(addr);
|
kfpu_frstor(addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAVE_KERNEL_FPU_INTERNAL)
|
|
||||||
static inline void
|
static inline void
|
||||||
kfpu_end(void)
|
kfpu_end(void)
|
||||||
{
|
{
|
||||||
union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];
|
uint8_t *state = zfs_kfpu_fpregs[smp_processor_id()];
|
||||||
|
|
||||||
if (static_cpu_has(X86_FEATURE_XSAVE)) {
|
|
||||||
kfpu_restore_xsave(&state->xsave, ~0);
|
|
||||||
} else if (static_cpu_has(X86_FEATURE_FXSR)) {
|
|
||||||
kfpu_restore_fxsr(&state->fxsave);
|
|
||||||
} else {
|
|
||||||
kfpu_restore_fsave(&state->fsave);
|
|
||||||
}
|
|
||||||
|
|
||||||
local_irq_enable();
|
|
||||||
preempt_enable();
|
|
||||||
}
|
|
||||||
#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
|
|
||||||
|
|
||||||
#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL)
|
|
||||||
static inline void
|
|
||||||
kfpu_end(void)
|
|
||||||
{
|
|
||||||
union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];
|
|
||||||
#if defined(HAVE_XSAVES)
|
#if defined(HAVE_XSAVES)
|
||||||
if (static_cpu_has(X86_FEATURE_XSAVES)) {
|
if (static_cpu_has(X86_FEATURE_XSAVES)) {
|
||||||
kfpu_do_xrstor("xrstors", &state->xsave, ~0);
|
kfpu_do_xrstor("xrstors", state, ~0);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (static_cpu_has(X86_FEATURE_XSAVE)) {
|
if (static_cpu_has(X86_FEATURE_XSAVE)) {
|
||||||
kfpu_do_xrstor("xrstor", &state->xsave, ~0);
|
kfpu_do_xrstor("xrstor", state, ~0);
|
||||||
} else if (static_cpu_has(X86_FEATURE_FXSR)) {
|
} else if (static_cpu_has(X86_FEATURE_FXSR)) {
|
||||||
kfpu_save_fxsr(&state->fxsave);
|
kfpu_save_fxsr(state);
|
||||||
} else {
|
} else {
|
||||||
kfpu_save_fsave(&state->fsave);
|
kfpu_save_fsave(state);
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
/*
|
#error "Exactly one of KERNEL_EXPORTS_X86_FPU or HAVE_KERNEL_FPU_INTERNAL" \
|
||||||
* FPU support is unavailable.
|
" must be defined"
|
||||||
*/
|
|
||||||
#define kfpu_allowed() 0
|
|
||||||
#define kfpu_begin() do {} while (0)
|
|
||||||
#define kfpu_end() do {} while (0)
|
|
||||||
#define kfpu_init() 0
|
|
||||||
#define kfpu_fini() ((void) 0)
|
|
||||||
|
|
||||||
#endif /* defined(HAVE_KERNEL_FPU_INTERNAL || HAVE_KERNEL_FPU_XSAVE_INTERNAL) */
|
#endif /* defined(HAVE_KERNEL_FPU_INTERNAL */
|
||||||
#endif /* defined(KERNEL_EXPORTS_X86_FPU) */
|
#endif /* defined(KERNEL_EXPORTS_X86_FPU) */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -452,6 +404,25 @@ out:
|
||||||
/*
|
/*
|
||||||
* Detect register set support
|
* Detect register set support
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if OS supports AVX and AVX2 by checking XCR0
|
||||||
|
* Only call this function if CPUID indicates that AVX feature is
|
||||||
|
* supported by the CPU, otherwise it might be an illegal instruction.
|
||||||
|
*/
|
||||||
|
static inline uint64_t
|
||||||
|
zfs_xgetbv(uint32_t index)
|
||||||
|
{
|
||||||
|
uint32_t eax, edx;
|
||||||
|
/* xgetbv - instruction byte code */
|
||||||
|
__asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
|
||||||
|
: "=a" (eax), "=d" (edx)
|
||||||
|
: "c" (index));
|
||||||
|
|
||||||
|
return ((((uint64_t)edx)<<32) | (uint64_t)eax);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline boolean_t
|
static inline boolean_t
|
||||||
__simd_state_enabled(const uint64_t state)
|
__simd_state_enabled(const uint64_t state)
|
||||||
{
|
{
|
||||||
|
@ -466,7 +437,7 @@ __simd_state_enabled(const uint64_t state)
|
||||||
if (!has_osxsave)
|
if (!has_osxsave)
|
||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
|
|
||||||
xcr0 = xgetbv(0);
|
xcr0 = zfs_xgetbv(0);
|
||||||
return ((xcr0 & state) == state);
|
return ((xcr0 & state) == state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1001,10 +1001,10 @@ zfs_prop_align_right(zfs_prop_t prop)
|
||||||
|
|
||||||
#include <sys/simd.h>
|
#include <sys/simd.h>
|
||||||
|
|
||||||
#if defined(HAVE_KERNEL_FPU_INTERNAL) || defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL)
|
#if defined(HAVE_KERNEL_FPU_INTERNAL)
|
||||||
union fpregs_state **zfs_kfpu_fpregs;
|
uint8_t **zfs_kfpu_fpregs;
|
||||||
EXPORT_SYMBOL(zfs_kfpu_fpregs);
|
EXPORT_SYMBOL(zfs_kfpu_fpregs);
|
||||||
#endif /* HAVE_KERNEL_FPU_INTERNAL || HAVE_KERNEL_FPU_XSAVE_INTERNAL */
|
#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
|
||||||
|
|
||||||
static int __init
|
static int __init
|
||||||
zcommon_init(void)
|
zcommon_init(void)
|
||||||
|
|
Loading…
Reference in New Issue