From ce7a5dbf4b37a0ba28dd6911e1a17f039fdd4f4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20F=C3=BCl=C3=B6p?= Date: Wed, 9 Mar 2022 01:19:15 +0100 Subject: [PATCH] Linux x86 SIMD: factor out unneeded kernel dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup the kernel SIMD code by removing kernel dependencies. - Replace XSTATE_XSAVE with our own XSAVE implementation for all kernels not exporting kernel_fpu{begin,end}(), see #13059 - Replace union fpregs_state by a uint8_t * buffer and get the size of the buffer from the hardware via the CPUID instruction - Replace kernels xgetbv() by our own implementation which was already there for userspace. Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Attila Fülöp Closes #13102 --- config/kernel-fpu.m4 | 105 +--------- include/os/linux/kernel/linux/simd_x86.h | 247 ++++++++++------------- module/zcommon/zfs_prop.c | 6 +- 3 files changed, 118 insertions(+), 240 deletions(-) diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4 index 7f8b028d04..eb9520c60a 100644 --- a/config/kernel-fpu.m4 +++ b/config/kernel-fpu.m4 @@ -2,12 +2,6 @@ dnl # dnl # Handle differences in kernel FPU code. dnl # dnl # Kernel -dnl # 5.16: XCR code put into asm/fpu/xcr.h -dnl # HAVE_KERNEL_FPU_XCR_HEADER -dnl # -dnl # XSTATE_XSAVE and XSTATE_XRESTORE aren't accessible any more -dnl # HAVE_KERNEL_FPU_XSAVE_INTERNAL -dnl # dnl # 5.11: kernel_fpu_begin() is an inlined function now, so don't check dnl # for it inside the kernel symbols. dnl # @@ -34,20 +28,8 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU_HEADER], [ AC_DEFINE(HAVE_KERNEL_FPU_API_HEADER, 1, [kernel has asm/fpu/api.h]) AC_MSG_RESULT(asm/fpu/api.h) - AC_MSG_CHECKING([whether fpu/xcr header is available]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - ],[ - ],[ - AC_DEFINE(HAVE_KERNEL_FPU_XCR_HEADER, 1, - [kernel has asm/fpu/xcr.h]) - AC_MSG_RESULT(asm/fpu/xcr.h) - ],[ - AC_MSG_RESULT(no asm/fpu/xcr.h) - ]) ],[ - AC_MSG_RESULT(i387.h & xcr.h) + AC_MSG_RESULT(i387.h) ]) ]) @@ -56,9 +38,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [ #include #ifdef HAVE_KERNEL_FPU_API_HEADER #include + #include #else #include - #include #endif ], [ kernel_fpu_begin(); @@ -69,80 +51,15 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [ #include #ifdef HAVE_KERNEL_FPU_API_HEADER #include + #include #else #include - #include #endif ], [ __kernel_fpu_begin(); __kernel_fpu_end(); ], [], [ZFS_META_LICENSE]) - ZFS_LINUX_TEST_SRC([fpu_internal], [ - #if defined(__x86_64) || defined(__x86_64__) || \ - defined(__i386) || defined(__i386__) - #if !defined(__x86) - #define __x86 - #endif - #endif - - #if !defined(__x86) - #error Unsupported architecture - #endif - - #include - #ifdef HAVE_KERNEL_FPU_API_HEADER - #include - #include - #else - #include - #include - #endif - - #if !defined(XSTATE_XSAVE) - #error XSTATE_XSAVE not defined - #endif - - #if !defined(XSTATE_XRESTORE) - #error XSTATE_XRESTORE not defined - #endif - ],[ - struct fpu *fpu = ¤t->thread.fpu; - union fpregs_state *st = &fpu->state; - struct fregs_state *fr __attribute__ ((unused)) = &st->fsave; - struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave; - struct xregs_state *xr __attribute__ ((unused)) = &st->xsave; - ]) - - ZFS_LINUX_TEST_SRC([fpu_xsave_internal], [ - #include - #if defined(__x86_64) || defined(__x86_64__) || \ - defined(__i386) || defined(__i386__) - #if !defined(__x86) - #define __x86 - #endif - #endif - - #if !defined(__x86) - #error Unsupported architecture - #endif - - #include - #ifdef HAVE_KERNEL_FPU_API_HEADER - #include - #include - #else - #include - #include - #endif - - ],[ - struct fpu *fpu = ¤t->thread.fpu; - union fpregs_state *st = &fpu->fpstate->regs; - struct fregs_state *fr __attribute__ ((unused)) = &st->fsave; - struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave; - struct xregs_state *xr __attribute__ ((unused)) = &st->xsave; - ]) ]) AC_DEFUN([ZFS_AC_KERNEL_FPU], [ @@ -169,19 +86,9 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, [kernel exports FPU functions]) ],[ - ZFS_LINUX_TEST_RESULT([fpu_internal], [ - AC_MSG_RESULT(internal) - AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1, - [kernel fpu internal]) - ],[ - ZFS_LINUX_TEST_RESULT([fpu_xsave_internal], [ - AC_MSG_RESULT(internal with internal XSAVE) - AC_DEFINE(HAVE_KERNEL_FPU_XSAVE_INTERNAL, 1, - [kernel fpu and XSAVE internal]) - ],[ - AC_MSG_RESULT(unavailable) - ]) - ]) + AC_MSG_RESULT(internal) + AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1, + [kernel fpu internal]) ]) ]) ]) diff --git a/include/os/linux/kernel/linux/simd_x86.h b/include/os/linux/kernel/linux/simd_x86.h index 6d4c7a09fe..543ef3cb42 100644 --- a/include/os/linux/kernel/linux/simd_x86.h +++ b/include/os/linux/kernel/linux/simd_x86.h @@ -85,23 +85,19 @@ #undef CONFIG_X86_DEBUG_FPU #endif -#if defined(HAVE_KERNEL_FPU_API_HEADER) -#include -#include -#if defined(HAVE_KERNEL_FPU_XCR_HEADER) -#include -#endif -#else -#include -#include -#endif - /* * The following cases are for kernels which export either the * kernel_fpu_* or __kernel_fpu_* functions. */ #if defined(KERNEL_EXPORTS_X86_FPU) +#if defined(HAVE_KERNEL_FPU_API_HEADER) +#include +#include +#else +#include +#endif + #define kfpu_allowed() 1 #define kfpu_init() 0 #define kfpu_fini() ((void) 0) @@ -136,29 +132,74 @@ * When the kernel_fpu_* symbols are unavailable then provide our own * versions which allow the FPU to be safely used. */ -#if defined(HAVE_KERNEL_FPU_INTERNAL) || defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) - -#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) -/* - * Some sanity checks. - * HAVE_KERNEL_FPU_INTERNAL and HAVE_KERNEL_FPU_XSAVE_INTERNAL are exclusive. - */ #if defined(HAVE_KERNEL_FPU_INTERNAL) -#error "HAVE_KERNEL_FPU_INTERNAL and HAVE_KERNEL_FPU_XSAVE_INTERNAL defined" -#endif + /* - * For kernels >= 5.16 we have to use inline assembly with the XSAVE{,OPT,S} - * instructions, so we need the toolchain to support at least XSAVE. + * For kernels not exporting *kfpu_{begin,end} we have to use inline assembly + * with the XSAVE{,OPT,S} instructions, so we need the toolchain to support at + * least XSAVE. */ #if !defined(HAVE_XSAVE) #error "Toolchain needs to support the XSAVE assembler instruction" #endif -#endif #include #include -extern union fpregs_state **zfs_kfpu_fpregs; +extern uint8_t **zfs_kfpu_fpregs; + +/* + * Return the size in bytes required by the XSAVE instruction for an + * XSAVE area containing all the user state components supported by this CPU. + * See: Intel 64 and IA-32 Architectures Software Developer’s Manual. + * Dec. 2021. Vol. 2A p. 3-222. + */ +static inline uint32_t +get_xsave_area_size(void) +{ + if (!boot_cpu_has(X86_FEATURE_OSXSAVE)) { + return (0); + } + /* + * Call CPUID with leaf 13 and subleaf 0. The size is in ecx. + * We don't need to check for cpuid_max here, since if this CPU has + * OSXSAVE set, it has leaf 13 (0x0D) as well. + */ + uint32_t eax, ebx, ecx, edx; + + eax = 13U; + ecx = 0U; + __asm__ __volatile__("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (eax), "c" (ecx)); + + return (ecx); +} + +/* + * Return the allocation order of the maximum buffer size required to save the + * FPU state on this architecture. The value returned is the same as Linux' + * get_order() function would return (i.e. 2^order = nr. of pages required). + * Currently this will always return 0 since the save area is below 4k even for + * a full fledged AVX-512 implementation. + */ +static inline int +get_fpuregs_save_area_order(void) +{ + size_t area_size = (size_t)get_xsave_area_size(); + + /* + * If we are dealing with a CPU not supporting XSAVE, + * get_xsave_area_size() will return 0. Thus the maximum memory + * required is the FXSAVE area size which is 512 bytes. See: Intel 64 + * and IA-32 Architectures Software Developer’s Manual. Dec. 2021. + * Vol. 2A p. 3-451. + */ + if (area_size == 0) { + area_size = 512; + } + return (get_order(area_size)); +} /* * Initialize per-cpu variables to store FPU state. @@ -167,11 +208,11 @@ static inline void kfpu_fini(void) { int cpu; + int order = get_fpuregs_save_area_order(); for_each_possible_cpu(cpu) { if (zfs_kfpu_fpregs[cpu] != NULL) { - free_pages((unsigned long)zfs_kfpu_fpregs[cpu], - get_order(sizeof (union fpregs_state))); + free_pages((unsigned long)zfs_kfpu_fpregs[cpu], order); } } @@ -181,8 +222,9 @@ kfpu_fini(void) static inline int kfpu_init(void) { - zfs_kfpu_fpregs = kzalloc(num_possible_cpus() * - sizeof (union fpregs_state *), GFP_KERNEL); + zfs_kfpu_fpregs = kzalloc(num_possible_cpus() * sizeof (uint8_t *), + GFP_KERNEL); + if (zfs_kfpu_fpregs == NULL) return (-ENOMEM); @@ -191,8 +233,8 @@ kfpu_init(void) * the target memory. Since kmalloc() provides no alignment * guarantee instead use alloc_pages_node(). */ - unsigned int order = get_order(sizeof (union fpregs_state)); int cpu; + int order = get_fpuregs_save_area_order(); for_each_possible_cpu(cpu) { struct page *page = alloc_pages_node(cpu_to_node(cpu), @@ -209,9 +251,6 @@ kfpu_init(void) } #define kfpu_allowed() 1 -#if defined(HAVE_KERNEL_FPU_INTERNAL) -#define ex_handler_fprestore ex_handler_default -#endif /* * FPU save and restore instructions. @@ -226,21 +265,6 @@ kfpu_init(void) #define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \ : : [addr] "m" (rval)); -#if defined(HAVE_KERNEL_FPU_INTERNAL) -static inline void -kfpu_save_xsave(struct xregs_state *addr, uint64_t mask) -{ - uint32_t low, hi; - int err; - - low = mask; - hi = mask >> 32; - XSTATE_XSAVE(addr, low, hi, err); - WARN_ON_ONCE(err); -} -#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ - -#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) #define kfpu_do_xsave(instruction, addr, mask) \ { \ uint32_t low, hi; \ @@ -252,10 +276,9 @@ kfpu_save_xsave(struct xregs_state *addr, uint64_t mask) : [dst] "m" (*(addr)), "a" (low), "d" (hi) \ : "memory"); \ } -#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */ static inline void -kfpu_save_fxsr(struct fxregs_state *addr) +kfpu_save_fxsr(uint8_t *addr) { if (IS_ENABLED(CONFIG_X86_32)) kfpu_fxsave(addr); @@ -264,12 +287,11 @@ kfpu_save_fxsr(struct fxregs_state *addr) } static inline void -kfpu_save_fsave(struct fregs_state *addr) +kfpu_save_fsave(uint8_t *addr) { kfpu_fnsave(addr); } -#if defined(HAVE_KERNEL_FPU_INTERNAL) static inline void kfpu_begin(void) { @@ -286,70 +308,28 @@ kfpu_begin(void) * per-cpu variable, not in the task struct, this allows any user * FPU state to be correctly preserved and restored. */ - union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; - if (static_cpu_has(X86_FEATURE_XSAVE)) { - kfpu_save_xsave(&state->xsave, ~0); - } else if (static_cpu_has(X86_FEATURE_FXSR)) { - kfpu_save_fxsr(&state->fxsave); - } else { - kfpu_save_fsave(&state->fsave); - } -} -#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ - -#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) -static inline void -kfpu_begin(void) -{ - /* - * Preemption and interrupts must be disabled for the critical - * region where the FPU state is being modified. - */ - preempt_disable(); - local_irq_disable(); - - /* - * The current FPU registers need to be preserved by kfpu_begin() - * and restored by kfpu_end(). They are stored in a dedicated - * per-cpu variable, not in the task struct, this allows any user - * FPU state to be correctly preserved and restored. - */ - union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; + uint8_t *state = zfs_kfpu_fpregs[smp_processor_id()]; #if defined(HAVE_XSAVES) if (static_cpu_has(X86_FEATURE_XSAVES)) { - kfpu_do_xsave("xsaves", &state->xsave, ~0); + kfpu_do_xsave("xsaves", state, ~0); return; } #endif #if defined(HAVE_XSAVEOPT) if (static_cpu_has(X86_FEATURE_XSAVEOPT)) { - kfpu_do_xsave("xsaveopt", &state->xsave, ~0); + kfpu_do_xsave("xsaveopt", state, ~0); return; } #endif if (static_cpu_has(X86_FEATURE_XSAVE)) { - kfpu_do_xsave("xsave", &state->xsave, ~0); + kfpu_do_xsave("xsave", state, ~0); } else if (static_cpu_has(X86_FEATURE_FXSR)) { - kfpu_save_fxsr(&state->fxsave); + kfpu_save_fxsr(state); } else { - kfpu_save_fsave(&state->fsave); + kfpu_save_fsave(state); } } -#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */ -#if defined(HAVE_KERNEL_FPU_INTERNAL) -static inline void -kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask) -{ - uint32_t low, hi; - - low = mask; - hi = mask >> 32; - XSTATE_XRESTORE(addr, low, hi); -} -#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ - -#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) #define kfpu_do_xrstor(instruction, addr, mask) \ { \ uint32_t low, hi; \ @@ -361,10 +341,9 @@ kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask) : [src] "m" (*(addr)), "a" (low), "d" (hi) \ : "memory"); \ } -#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */ static inline void -kfpu_restore_fxsr(struct fxregs_state *addr) +kfpu_restore_fxsr(uint8_t *addr) { /* * On AuthenticAMD K7 and K8 processors the fxrstor instruction only @@ -382,67 +361,40 @@ kfpu_restore_fxsr(struct fxregs_state *addr) } static inline void -kfpu_restore_fsave(struct fregs_state *addr) +kfpu_restore_fsave(uint8_t *addr) { kfpu_frstor(addr); } -#if defined(HAVE_KERNEL_FPU_INTERNAL) static inline void kfpu_end(void) { - union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; - - if (static_cpu_has(X86_FEATURE_XSAVE)) { - kfpu_restore_xsave(&state->xsave, ~0); - } else if (static_cpu_has(X86_FEATURE_FXSR)) { - kfpu_restore_fxsr(&state->fxsave); - } else { - kfpu_restore_fsave(&state->fsave); - } - - local_irq_enable(); - preempt_enable(); -} -#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ - -#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) -static inline void -kfpu_end(void) -{ - union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; + uint8_t *state = zfs_kfpu_fpregs[smp_processor_id()]; #if defined(HAVE_XSAVES) if (static_cpu_has(X86_FEATURE_XSAVES)) { - kfpu_do_xrstor("xrstors", &state->xsave, ~0); + kfpu_do_xrstor("xrstors", state, ~0); goto out; } #endif if (static_cpu_has(X86_FEATURE_XSAVE)) { - kfpu_do_xrstor("xrstor", &state->xsave, ~0); + kfpu_do_xrstor("xrstor", state, ~0); } else if (static_cpu_has(X86_FEATURE_FXSR)) { - kfpu_save_fxsr(&state->fxsave); + kfpu_save_fxsr(state); } else { - kfpu_save_fsave(&state->fsave); + kfpu_save_fsave(state); } out: local_irq_enable(); preempt_enable(); } -#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */ #else -/* - * FPU support is unavailable. - */ -#define kfpu_allowed() 0 -#define kfpu_begin() do {} while (0) -#define kfpu_end() do {} while (0) -#define kfpu_init() 0 -#define kfpu_fini() ((void) 0) +#error "Exactly one of KERNEL_EXPORTS_X86_FPU or HAVE_KERNEL_FPU_INTERNAL" \ + " must be defined" -#endif /* defined(HAVE_KERNEL_FPU_INTERNAL || HAVE_KERNEL_FPU_XSAVE_INTERNAL) */ +#endif /* defined(HAVE_KERNEL_FPU_INTERNAL */ #endif /* defined(KERNEL_EXPORTS_X86_FPU) */ /* @@ -452,6 +404,25 @@ out: /* * Detect register set support */ + +/* + * Check if OS supports AVX and AVX2 by checking XCR0 + * Only call this function if CPUID indicates that AVX feature is + * supported by the CPU, otherwise it might be an illegal instruction. + */ +static inline uint64_t +zfs_xgetbv(uint32_t index) +{ + uint32_t eax, edx; + /* xgetbv - instruction byte code */ + __asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0" + : "=a" (eax), "=d" (edx) + : "c" (index)); + + return ((((uint64_t)edx)<<32) | (uint64_t)eax); +} + + static inline boolean_t __simd_state_enabled(const uint64_t state) { @@ -466,7 +437,7 @@ __simd_state_enabled(const uint64_t state) if (!has_osxsave) return (B_FALSE); - xcr0 = xgetbv(0); + xcr0 = zfs_xgetbv(0); return ((xcr0 & state) == state); } diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 0d57350668..8b3e774d99 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -1001,10 +1001,10 @@ zfs_prop_align_right(zfs_prop_t prop) #include -#if defined(HAVE_KERNEL_FPU_INTERNAL) || defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) -union fpregs_state **zfs_kfpu_fpregs; +#if defined(HAVE_KERNEL_FPU_INTERNAL) +uint8_t **zfs_kfpu_fpregs; EXPORT_SYMBOL(zfs_kfpu_fpregs); -#endif /* HAVE_KERNEL_FPU_INTERNAL || HAVE_KERNEL_FPU_XSAVE_INTERNAL */ +#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ static int __init zcommon_init(void)