Unify Assembler files between Linux and Windows

Add new macro ASMABI used by Windows to change
calling API to "sysv_abi".

Reviewed-by: Attila Fülöp <attila@fueloep.org>
Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Jorgen Lundman <lundman@lundman.net>
Closes #14228
This commit is contained in:
Jorgen Lundman 2023-01-18 04:09:19 +09:00 committed by GitHub
parent 19d3961589
commit 68c0771cc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 815 additions and 488 deletions

View File

@ -20,6 +20,7 @@ COMMON_H = \
sys/aggsum.h \
sys/arc.h \
sys/arc_impl.h \
sys/asm_linkage.h \
sys/avl.h \
sys/avl_impl.h \
sys/bitops.h \

View File

@ -6,6 +6,8 @@ noinst_HEADERS = \
\
%D%/spl/rpc/xdr.h \
\
%D%/spl/sys/ia32/asm_linkage.h \
\
%D%/spl/sys/acl.h \
%D%/spl/sys/acl_impl.h \
%D%/spl/sys/atomic.h \
@ -88,3 +90,4 @@ noinst_HEADERS = \
%D%/zfs/sys/zfs_vnops_os.h \
%D%/zfs/sys/zfs_znode_impl.h \
%D%/zfs/sys/zpl.h

View File

@ -0,0 +1,178 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _IA32_SYS_ASM_LINKAGE_H
#define _IA32_SYS_ASM_LINKAGE_H
#define RET ret
/* Tell compiler to call assembler like Unix */
#undef ASMABI
#define ASMABI __attribute__((sysv_abi))
#define ENDBR
#define SECTION_TEXT .text
#define SECTION_STATIC .data
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _ASM /* The remainder of this file is only for assembly files */
/*
* make annoying differences in assembler syntax go away
*/
/*
* D16 and A16 are used to insert instructions prefixes; the
* macros help the assembler code be slightly more portable.
*/
#if !defined(__GNUC_AS__)
/*
* /usr/ccs/bin/as prefixes are parsed as separate instructions
*/
#define D16 data16;
#define A16 addr16;
/*
* (There are some weird constructs in constant expressions)
*/
#define _CONST(const) [const]
#define _BITNOT(const) -1!_CONST(const)
#define _MUL(a, b) _CONST(a \* b)
#else
/*
* Why not use the 'data16' and 'addr16' prefixes .. well, the
* assembler doesn't quite believe in real mode, and thus argues with
* us about what we're trying to do.
*/
#define D16 .byte 0x66;
#define A16 .byte 0x67;
#define _CONST(const) (const)
#define _BITNOT(const) ~_CONST(const)
#define _MUL(a, b) _CONST(a * b)
#endif
/*
* C pointers are different sizes between i386 and amd64.
* These constants can be used to compute offsets into pointer arrays.
*/
#if defined(__amd64)
#define CLONGSHIFT 3
#define CLONGSIZE 8
#define CLONGMASK 7
#elif defined(__i386)
#define CLONGSHIFT 2
#define CLONGSIZE 4
#define CLONGMASK 3
#endif
/*
* Since we know we're either ILP32 or LP64 ..
*/
#define CPTRSHIFT CLONGSHIFT
#define CPTRSIZE CLONGSIZE
#define CPTRMASK CLONGMASK
#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT)
#error "inconsistent shift constants"
#endif
#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1)
#error "inconsistent mask constants"
#endif
#define ASM_ENTRY_ALIGN 16
/*
* SSE register alignment and save areas
*/
#define XMM_SIZE 16
#define XMM_ALIGN 16
/*
* ENTRY provides the standard procedure entry code and an easy way to
* insert the calls to mcount for profiling. ENTRY_NP is identical, but
* never calls mcount.
*/
#define ENTRY(x) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x; \
x: MCOUNT(x)
#define ENTRY_NP(x) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x; \
x:
#define ENTRY_ALIGN(x, a) \
.text; \
.align a; \
.globl x; \
x:
/*
* ENTRY2 is identical to ENTRY but provides two labels for the entry point.
*/
#define ENTRY2(x, y) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x, y; \
x:; \
y: MCOUNT(x)
#define ENTRY_NP2(x, y) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x, y; \
x:; \
y:
/*
* SET_SIZE trails a function and set the size for the ELF symbol table.
*/
#define SET_SIZE(x)
#define SET_OBJ(x)
#endif /* _ASM */
#ifdef __cplusplus
}
#endif
#endif /* _IA32_SYS_ASM_LINKAGE_H */

View File

@ -109,4 +109,8 @@ kernel_spl_sys_HEADERS = \
%D%/spl/sys/wmsum.h \
%D%/spl/sys/zmod.h \
%D%/spl/sys/zone.h
kernel_spl_ia32dir = $(kernel_spl_sysdir)/ia32
kernel_spl_ia32_HEADERS = \
%D%/spl/sys/ia32/asm_linkage.h
endif

View File

@ -0,0 +1,212 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _IA32_SYS_ASM_LINKAGE_H
#define _IA32_SYS_ASM_LINKAGE_H
#if defined(_KERNEL) && defined(__linux__)
#include <linux/linkage.h>
#endif
#ifndef ENDBR
#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
/* CSTYLED */
#if __has_include(<cet.h>)
#include <cet.h>
#ifdef _CET_ENDBR
#define ENDBR _CET_ENDBR
#endif /* _CET_ENDBR */
#endif /* <cet.h> */
#endif /* __ELF__ && __CET__ && __has_include */
#endif /* !ENDBR */
#ifndef ENDBR
#define ENDBR
#endif
#ifndef RET
#define RET ret
#endif
/* You can set to nothing on Unix platforms */
#undef ASMABI
#define ASMABI __attribute__((sysv_abi))
#define SECTION_TEXT .text
#define SECTION_STATIC .section .rodata
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _ASM /* The remainder of this file is only for assembly files */
/*
* make annoying differences in assembler syntax go away
*/
/*
* D16 and A16 are used to insert instructions prefixes; the
* macros help the assembler code be slightly more portable.
*/
#if !defined(__GNUC_AS__)
/*
* /usr/ccs/bin/as prefixes are parsed as separate instructions
*/
#define D16 data16;
#define A16 addr16;
/*
* (There are some weird constructs in constant expressions)
*/
#define _CONST(const) [const]
#define _BITNOT(const) -1!_CONST(const)
#define _MUL(a, b) _CONST(a \* b)
#else
/*
* Why not use the 'data16' and 'addr16' prefixes .. well, the
* assembler doesn't quite believe in real mode, and thus argues with
* us about what we're trying to do.
*/
#define D16 .byte 0x66;
#define A16 .byte 0x67;
#define _CONST(const) (const)
#define _BITNOT(const) ~_CONST(const)
#define _MUL(a, b) _CONST(a * b)
#endif
/*
* C pointers are different sizes between i386 and amd64.
* These constants can be used to compute offsets into pointer arrays.
*/
#if defined(__amd64)
#define CLONGSHIFT 3
#define CLONGSIZE 8
#define CLONGMASK 7
#elif defined(__i386)
#define CLONGSHIFT 2
#define CLONGSIZE 4
#define CLONGMASK 3
#endif
/*
* Since we know we're either ILP32 or LP64 ..
*/
#define CPTRSHIFT CLONGSHIFT
#define CPTRSIZE CLONGSIZE
#define CPTRMASK CLONGMASK
#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT)
#error "inconsistent shift constants"
#endif
#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1)
#error "inconsistent mask constants"
#endif
#define ASM_ENTRY_ALIGN 16
/*
* SSE register alignment and save areas
*/
#define XMM_SIZE 16
#define XMM_ALIGN 16
/*
* ENTRY provides the standard procedure entry code and an easy way to
* insert the calls to mcount for profiling. ENTRY_NP is identical, but
* never calls mcount.
*/
#undef ENTRY
#define ENTRY(x) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x; \
.type x, @function; \
x: MCOUNT(x)
#define ENTRY_NP(x) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x; \
.type x, @function; \
x:
#define ENTRY_ALIGN(x, a) \
.text; \
.align a; \
.globl x; \
.type x, @function; \
x:
#define FUNCTION(x) \
.type x, @function; \
x:
/*
* ENTRY2 is identical to ENTRY but provides two labels for the entry point.
*/
#define ENTRY2(x, y) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x, y; \
.type x, @function; \
.type y, @function; \
x:; \
y: MCOUNT(x)
#define ENTRY_NP2(x, y) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x, y; \
.type x, @function; \
.type y, @function; \
x:; \
y:
/*
* SET_SIZE trails a function and set the size for the ELF symbol table.
*/
#define SET_SIZE(x) \
.size x, [.-x]
#define SET_OBJ(x) .type x, @object
#endif /* _ASM */
#ifdef __cplusplus
}
#endif
#endif /* _IA32_SYS_ASM_LINKAGE_H */

48
include/sys/asm_linkage.h Normal file
View File

@ -0,0 +1,48 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ASM_LINKAGE_H
#define _SYS_ASM_LINKAGE_H
#define ASMABI
#if defined(__i386) || defined(__amd64)
#include <sys/ia32/asm_linkage.h> /* XX64 x86/sys/asm_linkage.h */
#endif
#if defined(_KERNEL) && defined(HAVE_KERNEL_OBJTOOL)
#include <asm/frame.h>
#else /* userspace */
#define FRAME_BEGIN
#define FRAME_END
#endif
#endif /* _SYS_ASM_LINKAGE_H */

View File

@ -67,3 +67,4 @@ nodist_libicp_la_SOURCES += \
module/icp/asm-x86_64/blake3/blake3_sse2.S \
module/icp/asm-x86_64/blake3/blake3_sse41.S
endif

View File

@ -26,6 +26,7 @@ libspl_sysdir = $(libspldir)/sys
libspl_sys_HEADERS = \
%D%/sys/acl.h \
%D%/sys/acl_impl.h \
%D%/sys/asm_linkage.h \
%D%/sys/callb.h \
%D%/sys/cmn_err.h \
%D%/sys/cred.h \
@ -62,6 +63,8 @@ libspl_sys_HEADERS = \
%D%/sys/wmsum.h \
%D%/sys/zone.h
libspl_ia32dir = $(libspldir)/sys/ia32
if BUILD_LINUX
libspl_sys_HEADERS += \
%D%/os/linux/sys/byteorder.h \
@ -72,6 +75,9 @@ libspl_sys_HEADERS += \
%D%/os/linux/sys/stat.h \
%D%/os/linux/sys/sysmacros.h \
%D%/os/linux/sys/zfs_context_os.h
libspl_ia32_HEADERS = \
%D%/os/linux/sys/ia32/asm_linkage.h
endif
if BUILD_FREEBSD
@ -86,9 +92,13 @@ libspl_sys_HEADERS += \
%D%/os/freebsd/sys/sysmacros.h \
%D%/os/freebsd/sys/vfs.h \
%D%/os/freebsd/sys/zfs_context_os.h
libspl_ia32_HEADERS = \
%D%/os/freebsd/sys/ia32/asm_linkage.h
endif
libspl_sys_dktpdir = $(libspl_sysdir)/dktp
libspl_sys_dktp_HEADERS = \
%D%/sys/dktp/fdisk.h

View File

@ -0,0 +1,184 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _IA32_SYS_ASM_LINKAGE_H
#define _IA32_SYS_ASM_LINKAGE_H
#if defined(__linux__) && defined(CONFIG_SLS)
#define RET ret; int3
#else
#define RET ret
#endif
/* Tell compiler to call assembler like Unix */
#undef ASMABI
#define ASMABI __attribute__((sysv_abi))
#define ENDBR
#define SECTION_TEXT .text
#define SECTION_STATIC .data
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _ASM /* The remainder of this file is only for assembly files */
/*
* make annoying differences in assembler syntax go away
*/
/*
* D16 and A16 are used to insert instructions prefixes; the
* macros help the assembler code be slightly more portable.
*/
#if !defined(__GNUC_AS__)
/*
* /usr/ccs/bin/as prefixes are parsed as separate instructions
*/
#define D16 data16;
#define A16 addr16;
/*
* (There are some weird constructs in constant expressions)
*/
#define _CONST(const) [const]
#define _BITNOT(const) -1!_CONST(const)
#define _MUL(a, b) _CONST(a \* b)
#else
/*
* Why not use the 'data16' and 'addr16' prefixes .. well, the
* assembler doesn't quite believe in real mode, and thus argues with
* us about what we're trying to do.
*/
#define D16 .byte 0x66;
#define A16 .byte 0x67;
#define _CONST(const) (const)
#define _BITNOT(const) ~_CONST(const)
#define _MUL(a, b) _CONST(a * b)
#endif
/*
* C pointers are different sizes between i386 and amd64.
* These constants can be used to compute offsets into pointer arrays.
*/
#if defined(__amd64)
#define CLONGSHIFT 3
#define CLONGSIZE 8
#define CLONGMASK 7
#elif defined(__i386)
#define CLONGSHIFT 2
#define CLONGSIZE 4
#define CLONGMASK 3
#endif
/*
* Since we know we're either ILP32 or LP64 ..
*/
#define CPTRSHIFT CLONGSHIFT
#define CPTRSIZE CLONGSIZE
#define CPTRMASK CLONGMASK
#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT)
#error "inconsistent shift constants"
#endif
#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1)
#error "inconsistent mask constants"
#endif
#define ASM_ENTRY_ALIGN 16
/*
* SSE register alignment and save areas
*/
#define XMM_SIZE 16
#define XMM_ALIGN 16
/*
* ENTRY provides the standard procedure entry code and an easy way to
* insert the calls to mcount for profiling. ENTRY_NP is identical, but
* never calls mcount.
*/
#define ENTRY(x) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x; \
x: MCOUNT(x)
#define ENTRY_NP(x) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x; \
x:
#define ENTRY_ALIGN(x, a) \
.text; \
.align a; \
.globl x; \
x:
#define FUNCTION(x) \
.type x, @function; \
x:
/*
* ENTRY2 is identical to ENTRY but provides two labels for the entry point.
*/
#define ENTRY2(x, y) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x, y; \
x:; \
y: MCOUNT(x)
#define ENTRY_NP2(x, y) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x, y; \
x:; \
y:
/*
* SET_SIZE trails a function and set the size for the ELF symbol table.
*/
#define SET_SIZE(x)
#define SET_OBJ(x)
#endif /* _ASM */
#ifdef __cplusplus
}
#endif
#endif /* _IA32_SYS_ASM_LINKAGE_H */

View File

@ -27,9 +27,6 @@
#ifndef _IA32_SYS_ASM_LINKAGE_H
#define _IA32_SYS_ASM_LINKAGE_H
#include <sys/stack.h>
#include <sys/trap.h>
#if defined(_KERNEL) && defined(__linux__)
#include <linux/linkage.h>
#endif
@ -56,6 +53,13 @@
#define RET ret
#endif
/* You can set to nothing on Unix platforms */
#undef ASMABI
#define ASMABI __attribute__((sysv_abi))
#define SECTION_TEXT .text
#define SECTION_STATIC .section .rodata
#ifdef __cplusplus
extern "C" {
#endif
@ -157,6 +161,17 @@ x: MCOUNT(x)
.type x, @function; \
x:
#define ENTRY_ALIGN(x, a) \
.text; \
.align a; \
.globl x; \
.type x, @function; \
x:
#define FUNCTION(x) \
.type x, @function; \
x:
/*
* ENTRY2 is identical to ENTRY but provides two labels for the entry point.
*/
@ -185,6 +200,8 @@ y:
#define SET_SIZE(x) \
.size x, [.-x]
#define SET_OBJ(x) .type x, @object
#endif /* _ASM */
#ifdef __cplusplus

View File

@ -151,10 +151,10 @@ zfs-$(CONFIG_PPC) += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64))
zfs-$(CONFIG_PPC64) += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64))
$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
$(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : asflags-y += -I$(icp_include)
$(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : asflags-y += -I$(icp_include) -I$(zfs_include)/os/linux/spl -I$(zfs_include)
$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
$(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : ccflags-y += -I$(icp_include)
$(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : ccflags-y += -I$(icp_include) -I$(zfs_include)/os/linux/spl -I$(zfs_include)
# Suppress objtool "return with modified stack frame" warnings.
OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y

View File

@ -26,15 +26,16 @@
#include <sys/simd.h>
#include <sys/types.h>
#include <sys/asm_linkage.h>
/* These functions are used to execute AES-NI instructions: */
extern int rijndael_key_setup_enc_intel(uint32_t rk[],
extern ASMABI int rijndael_key_setup_enc_intel(uint32_t rk[],
const uint32_t cipherKey[], uint64_t keyBits);
extern int rijndael_key_setup_dec_intel(uint32_t rk[],
extern ASMABI int rijndael_key_setup_dec_intel(uint32_t rk[],
const uint32_t cipherKey[], uint64_t keyBits);
extern void aes_encrypt_intel(const uint32_t rk[], int Nr,
extern ASMABI void aes_encrypt_intel(const uint32_t rk[], int Nr,
const uint32_t pt[4], uint32_t ct[4]);
extern void aes_decrypt_intel(const uint32_t rk[], int Nr,
extern ASMABI void aes_decrypt_intel(const uint32_t rk[], int Nr,
const uint32_t ct[4], uint32_t pt[4]);

View File

@ -35,6 +35,7 @@ extern "C" {
#include <sys/types.h>
#include <sys/blake3.h>
#include <sys/simd.h>
#include <sys/asm_linkage.h>
/*
* Methods used to define BLAKE3 assembler implementations

View File

@ -29,15 +29,15 @@
(defined(__x86_64) && defined(HAVE_SSE2)) || \
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
extern void zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags);
extern void zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags, uint8_t out[64]);
extern void zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
size_t num_inputs, size_t blocks, const uint32_t key[8],
uint64_t counter, boolean_t increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
@ -95,15 +95,15 @@ const blake3_ops_t blake3_sse2_impl = {
(defined(__x86_64) && defined(HAVE_SSE2)) || \
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
extern void zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags);
extern void zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags, uint8_t out[64]);
extern void zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
size_t num_inputs, size_t blocks, const uint32_t key[8],
uint64_t counter, boolean_t increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
@ -158,7 +158,7 @@ const blake3_ops_t blake3_sse41_impl = {
#endif
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
extern void zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
size_t num_inputs, size_t blocks, const uint32_t key[8],
uint64_t counter, boolean_t increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
@ -190,15 +190,15 @@ const blake3_ops_t blake3_avx2_impl = {
#endif
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
extern void zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags);
extern void zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags, uint8_t out[64]);
extern void zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
size_t num_inputs, size_t blocks, const uint32_t key[8],
uint64_t counter, boolean_t increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out);

View File

@ -59,7 +59,7 @@ boolean_t gcm_avx_can_use_movbe = B_FALSE;
static boolean_t gcm_use_avx = B_FALSE;
#define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx)
extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
extern boolean_t ASMABI atomic_toggle_boolean_nv(volatile boolean_t *);
static inline boolean_t gcm_avx_will_work(void);
static inline void gcm_set_avx(boolean_t);
@ -1073,19 +1073,19 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
static uint32_t gcm_avx_chunk_size =
((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
extern void clear_fpu_regs_avx(void);
extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
extern void aes_encrypt_intel(const uint32_t rk[], int nr,
extern void ASMABI clear_fpu_regs_avx(void);
extern void ASMABI gcm_xor_avx(const uint8_t *src, uint8_t *dst);
extern void ASMABI aes_encrypt_intel(const uint32_t rk[], int nr,
const uint32_t pt[4], uint32_t ct[4]);
extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
extern void ASMABI gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
extern void ASMABI gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
const uint8_t *in, size_t len);
extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
extern size_t ASMABI aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
const void *, uint64_t *, uint64_t *);
extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
extern size_t ASMABI aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
const void *, uint64_t *, uint64_t *);
static inline boolean_t

View File

@ -26,9 +26,10 @@
#include <sys/types.h>
#include <sys/simd.h>
#include <sys/asm_linkage.h>
/* These functions are used to execute pclmulqdq based assembly methods */
extern void gcm_mul_pclmulqdq(uint64_t *, uint64_t *, uint64_t *);
extern void ASMABI gcm_mul_pclmulqdq(uint64_t *, uint64_t *, uint64_t *);
#include <modes/gcm_impl.h>

View File

@ -48,6 +48,7 @@
#define HAVE_HTONL
#endif
#include <sys/isa_defs.h> /* for _ILP32 */
#include <sys/asm_linkage.h>
static void Encode(uint8_t *, uint32_t *, size_t);
static void Encode64(uint8_t *, uint64_t *, size_t);
@ -57,8 +58,8 @@ static void Encode64(uint8_t *, uint64_t *, size_t);
#define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
#define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
void ASMABI SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
void ASMABI SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
#else
static void SHA256Transform(SHA2_CTX *, const uint8_t *);

View File

@ -188,13 +188,13 @@
#include <sys/types.h>
void
aes_encrypt_amd64(const uint32_t rk[], int Nr, const uint32_t pt[4],
uint32_t ct[4]) {
(void) rk, (void) Nr, (void) pt, (void) ct;
uint32_t ct[4]) {
(void) rk, (void) Nr, (void) pt, (void) ct;
}
void
aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
uint32_t pt[4]) {
(void) rk, (void) Nr, (void) pt, (void) ct;
uint32_t pt[4]) {
(void) rk, (void) Nr, (void) pt, (void) ct;
}
@ -221,23 +221,23 @@ aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
// finite field multiplies by {02}, {04} and {08}
#define f2(x) [[x<<1]^[[[x>>7]&1]*0x11b]]
#define f4(x) [[x<<2]^[[[x>>6]&1]*0x11b]^[[[x>>6]&2]*0x11b]]
#define f8(x) [[x<<3]^[[[x>>5]&1]*0x11b]^[[[x>>5]&2]*0x11b]^[[[x>>5]&4]*0x11b]]
#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
// finite field multiplies required in table generation
#define f3(x) [[f2(x)] ^ [x]]
#define f9(x) [[f8(x)] ^ [x]]
#define fb(x) [[f8(x)] ^ [f2(x)] ^ [x]]
#define fd(x) [[f8(x)] ^ [f4(x)] ^ [x]]
#define fe(x) [[f8(x)] ^ [f4(x)] ^ [f2(x)]]
#define f3(x) ((f2(x)) ^ (x))
#define f9(x) ((f8(x)) ^ (x))
#define fb(x) ((f8(x)) ^ (f2(x)) ^ (x))
#define fd(x) ((f8(x)) ^ (f4(x)) ^ (x))
#define fe(x) ((f8(x)) ^ (f4(x)) ^ (f2(x)))
// macros for expanding S-box data
#define u8(x) [f2(x)], [x], [x], [f3(x)], [f2(x)], [x], [x], [f3(x)]
#define v8(x) [fe(x)], [f9(x)], [fd(x)], [fb(x)], [fe(x)], [f9(x)], [fd(x)], [x]
#define w8(x) [x], 0, 0, 0, [x], 0, 0, 0
#define u8(x) (f2(x)), (x), (x), (f3(x)), (f2(x)), (x), (x), (f3(x))
#define v8(x) (fe(x)), (f9(x)), (fd(x)), (fb(x)), (fe(x)), (f9(x)), (fd(x)), (x)
#define w8(x) (x), 0, 0, 0, (x), 0, 0, 0
#define enc_vals(x) \
.byte x(0x63),x(0x7c),x(0x77),x(0x7b),x(0xf2),x(0x6b),x(0x6f),x(0xc5); \
@ -693,7 +693,7 @@ aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
* int aes_encrypt(const unsigned char *in,
* unsigned char *out, const aes_encrypt_ctx cx[1])/
*/
.section .rodata
SECTION_STATIC
.align 64
enc_tab:
enc_vals(u8)
@ -718,7 +718,7 @@ ENTRY_NP(aes_encrypt_amd64)
#else
// OpenSolaris OS interface
sub $[4*8], %rsp // Make room on stack to save registers
sub $(4*8), %rsp // Make room on stack to save registers
mov %rcx, (%rsp) // Save output pointer (P4) on stack
mov %rdi, %r8 // context (P1)
mov %rdx, %rdi // P3: save input pointer
@ -749,11 +749,11 @@ ENTRY_NP(aes_encrypt_amd64)
lea (kptr,%rsi), kptr
// Jump based on byte key length * 16:
cmp $[10*16], %esi
cmp $(10*16), %esi
je 3f
cmp $[12*16], %esi
cmp $(12*16), %esi
je 2f
cmp $[14*16], %esi
cmp $(14*16), %esi
je 1f
mov $-1, %rax // error
jmp 4f
@ -785,7 +785,7 @@ ENTRY_NP(aes_encrypt_amd64)
mov 1*8(%rsp), %rbx
mov 2*8(%rsp), %rbp
mov 3*8(%rsp), %r12
add $[4*8], %rsp
add $(4*8), %rsp
RET
SET_SIZE(aes_encrypt_amd64)
@ -799,7 +799,7 @@ ENTRY_NP(aes_encrypt_amd64)
* int aes_decrypt(const unsigned char *in,
* unsigned char *out, const aes_encrypt_ctx cx[1])/
*/
.section .rodata
SECTION_STATIC
.align 64
dec_tab:
dec_vals(v8)
@ -824,7 +824,7 @@ ENTRY_NP(aes_decrypt_amd64)
#else
// OpenSolaris OS interface
sub $[4*8], %rsp // Make room on stack to save registers
sub $(4*8), %rsp // Make room on stack to save registers
mov %rcx, (%rsp) // Save output pointer (P4) on stack
mov %rdi, %r8 // context (P1)
mov %rdx, %rdi // P3: save input pointer
@ -861,11 +861,11 @@ ENTRY_NP(aes_decrypt_amd64)
xor rofs+12(%rdi), %edx
// Jump based on byte key length * 16:
cmp $[10*16], %esi
cmp $(10*16), %esi
je 3f
cmp $[12*16], %esi
cmp $(12*16), %esi
je 2f
cmp $[14*16], %esi
cmp $(14*16), %esi
je 1f
mov $-1, %rax // error
jmp 4f
@ -897,11 +897,11 @@ ENTRY_NP(aes_decrypt_amd64)
mov 1*8(%rsp), %rbx
mov 2*8(%rsp), %rbp
mov 3*8(%rsp), %r12
add $[4*8], %rsp
add $(4*8), %rsp
RET
SET_SIZE(aes_decrypt_amd64)
#endif /* lint || __lint */
#endif /* lint || __lint */
#ifdef __ELF__
.section .note.GNU-stack,"",%progbits

View File

@ -31,12 +31,9 @@
#include <sys/asm_linkage.h>
.intel_syntax noprefix
.global zfs_blake3_hash_many_avx2
.text
.type zfs_blake3_hash_many_avx2,@function
.p2align 6
zfs_blake3_hash_many_avx2:
ENTRY_ALIGN(zfs_blake3_hash_many_avx2, 64)
ENDBR
push r15
push r14
@ -1791,13 +1788,10 @@ zfs_blake3_hash_many_avx2:
vmovdqu xmmword ptr [rbx+0x10], xmm1
jmp 4b
.size zfs_blake3_hash_many_avx2, . - zfs_blake3_hash_many_avx2
SET_SIZE(zfs_blake3_hash_many_avx2)
#ifdef __APPLE__
.static_data
#else
SECTION_STATIC
.section .rodata
#endif
.p2align 6
ADD0:

View File

@ -31,17 +31,9 @@
#include <sys/asm_linkage.h>
.intel_syntax noprefix
.global zfs_blake3_hash_many_avx512
.global zfs_blake3_compress_in_place_avx512
.global zfs_blake3_compress_xof_avx512
.text
.type zfs_blake3_hash_many_avx512,@function
.type zfs_blake3_compress_xof_avx512,@function
.type zfs_blake3_compress_in_place_avx512,@function
.p2align 6
zfs_blake3_hash_many_avx512:
ENTRY_ALIGN(zfs_blake3_hash_many_avx512, 64)
ENDBR
push r15
push r14
@ -2397,8 +2389,8 @@ zfs_blake3_hash_many_avx512:
vmovdqu xmmword ptr [rbx], xmm0
vmovdqu xmmword ptr [rbx+0x10], xmm1
jmp 4b
.p2align 6
zfs_blake3_compress_in_place_avx512:
ENTRY_ALIGN(zfs_blake3_compress_in_place_avx512, 64)
ENDBR
vmovdqu xmm0, xmmword ptr [rdi]
vmovdqu xmm1, xmmword ptr [rdi+0x10]
@ -2479,8 +2471,7 @@ zfs_blake3_compress_in_place_avx512:
vmovdqu xmmword ptr [rdi+0x10], xmm1
RET
.p2align 6
zfs_blake3_compress_xof_avx512:
ENTRY_ALIGN(zfs_blake3_compress_xof_avx512, 64)
ENDBR
vmovdqu xmm0, xmmword ptr [rdi]
vmovdqu xmm1, xmmword ptr [rdi+0x10]
@ -2565,15 +2556,11 @@ zfs_blake3_compress_xof_avx512:
vmovdqu xmmword ptr [r9+0x30], xmm3
RET
.size zfs_blake3_hash_many_avx512, . - zfs_blake3_hash_many_avx512
.size zfs_blake3_compress_in_place_avx512, . - zfs_blake3_compress_in_place_avx512
.size zfs_blake3_compress_xof_avx512, . - zfs_blake3_compress_xof_avx512
SET_SIZE(zfs_blake3_hash_many_avx512)
SET_SIZE(zfs_blake3_compress_in_place_avx512)
SET_SIZE(zfs_blake3_compress_xof_avx512)
#ifdef __APPLE__
.static_data
#else
.section .rodata
#endif
SECTION_STATIC
.p2align 6
INDEX0:

View File

@ -31,17 +31,10 @@
#include <sys/asm_linkage.h>
.intel_syntax noprefix
.global zfs_blake3_hash_many_sse2
.global zfs_blake3_compress_in_place_sse2
.global zfs_blake3_compress_xof_sse2
.text
.type zfs_blake3_hash_many_sse2,@function
.type zfs_blake3_compress_in_place_sse2,@function
.type zfs_blake3_compress_xof_sse2,@function
SECTION_TEXT
.p2align 6
zfs_blake3_hash_many_sse2:
ENTRY_ALIGN(zfs_blake3_hash_many_sse2, 64)
ENDBR
push r15
push r14
@ -2038,8 +2031,7 @@ zfs_blake3_hash_many_sse2:
movups xmmword ptr [rbx+0x10], xmm1
jmp 4b
.p2align 6
zfs_blake3_compress_in_place_sse2:
ENTRY_ALIGN(zfs_blake3_compress_in_place_sse2, 64)
ENDBR
movups xmm0, xmmword ptr [rdi]
movups xmm1, xmmword ptr [rdi+0x10]
@ -2149,8 +2141,7 @@ zfs_blake3_compress_in_place_sse2:
movups xmmword ptr [rdi+0x10], xmm1
RET
.p2align 6
zfs_blake3_compress_xof_sse2:
ENTRY_ALIGN(zfs_blake3_compress_xof_sse2, 64)
ENDBR
movups xmm0, xmmword ptr [rdi]
movups xmm1, xmmword ptr [rdi+0x10]
@ -2268,15 +2259,11 @@ zfs_blake3_compress_xof_sse2:
movups xmmword ptr [r9+0x30], xmm3
RET
.size zfs_blake3_hash_many_sse2, . - zfs_blake3_hash_many_sse2
.size zfs_blake3_compress_in_place_sse2, . - zfs_blake3_compress_in_place_sse2
.size zfs_blake3_compress_xof_sse2, . - zfs_blake3_compress_xof_sse2
SET_SIZE(zfs_blake3_hash_many_sse2)
SET_SIZE(zfs_blake3_compress_in_place_sse2)
SET_SIZE(zfs_blake3_compress_xof_sse2)
#ifdef __APPLE__
.static_data
#else
.section .rodata
#endif
SECTION_STATIC
.p2align 6
BLAKE3_IV:
.long 0x6A09E667, 0xBB67AE85

View File

@ -31,17 +31,10 @@
#include <sys/asm_linkage.h>
.intel_syntax noprefix
.global zfs_blake3_compress_in_place_sse41
.global zfs_blake3_compress_xof_sse41
.global zfs_blake3_hash_many_sse41
.text
.type zfs_blake3_hash_many_sse41,@function
.type zfs_blake3_compress_in_place_sse41,@function
.type zfs_blake3_compress_xof_sse41,@function
.p2align 6
zfs_blake3_hash_many_sse41:
ENTRY_ALIGN(zfs_blake3_hash_many_sse41, 64)
ENDBR
push r15
push r14
@ -1800,8 +1793,8 @@ zfs_blake3_hash_many_sse41:
movups xmmword ptr [rbx], xmm0
movups xmmword ptr [rbx+0x10], xmm1
jmp 4b
.p2align 6
zfs_blake3_compress_in_place_sse41:
ENTRY_ALIGN(zfs_blake3_compress_in_place_sse41, 64)
ENDBR
movups xmm0, xmmword ptr [rdi]
movups xmm1, xmmword ptr [rdi+0x10]
@ -1899,8 +1892,8 @@ zfs_blake3_compress_in_place_sse41:
movups xmmword ptr [rdi], xmm0
movups xmmword ptr [rdi+0x10], xmm1
RET
.p2align 6
zfs_blake3_compress_xof_sse41:
ENTRY_ALIGN(zfs_blake3_compress_xof_sse41, 64)
ENDBR
movups xmm0, xmmword ptr [rdi]
movups xmm1, xmmword ptr [rdi+0x10]
@ -2007,15 +2000,12 @@ zfs_blake3_compress_xof_sse41:
movups xmmword ptr [r9+0x30], xmm3
RET
.size zfs_blake3_hash_many_sse41, . - zfs_blake3_hash_many_sse41
.size zfs_blake3_compress_in_place_sse41, . - zfs_blake3_compress_in_place_sse41
.size zfs_blake3_compress_xof_sse41, . - zfs_blake3_compress_xof_sse41
SET_SIZE(zfs_blake3_hash_many_sse41)
SET_SIZE(zfs_blake3_compress_in_place_sse41)
SET_SIZE(zfs_blake3_compress_xof_sse41)
SECTION_STATIC
#ifdef __APPLE__
.static_data
#else
.section .rodata
#endif
.p2align 6
BLAKE3_IV:
.long 0x6A09E667, 0xBB67AE85

View File

@ -50,14 +50,16 @@
#define _ASM
#include <sys/asm_linkage.h>
/* Windows userland links with OpenSSL */
#if !defined (_WIN32) || defined (_KERNEL)
.extern gcm_avx_can_use_movbe
.text
#ifdef HAVE_MOVBE
.type _aesni_ctr32_ghash_6x,@function
.align 32
_aesni_ctr32_ghash_6x:
.align 32
FUNCTION(_aesni_ctr32_ghash_6x)
.cfi_startproc
ENDBR
vmovdqu 32(%r11),%xmm2
@ -369,12 +371,11 @@ _aesni_ctr32_ghash_6x:
RET
.cfi_endproc
.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
SET_SIZE(_aesni_ctr32_ghash_6x)
#endif /* ifdef HAVE_MOVBE */
.type _aesni_ctr32_ghash_no_movbe_6x,@function
.align 32
_aesni_ctr32_ghash_no_movbe_6x:
.align 32
FUNCTION(_aesni_ctr32_ghash_no_movbe_6x)
.cfi_startproc
ENDBR
vmovdqu 32(%r11),%xmm2
@ -698,12 +699,9 @@ _aesni_ctr32_ghash_no_movbe_6x:
RET
.cfi_endproc
.size _aesni_ctr32_ghash_no_movbe_6x,.-_aesni_ctr32_ghash_no_movbe_6x
SET_SIZE(_aesni_ctr32_ghash_no_movbe_6x)
.globl aesni_gcm_decrypt
.type aesni_gcm_decrypt,@function
.align 32
aesni_gcm_decrypt:
ENTRY_ALIGN(aesni_gcm_decrypt, 32)
.cfi_startproc
ENDBR
xorq %r10,%r10
@ -818,10 +816,10 @@ aesni_gcm_decrypt:
movq %r10,%rax
RET
.cfi_endproc
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
.type _aesni_ctr32_6x,@function
.align 32
_aesni_ctr32_6x:
SET_SIZE(aesni_gcm_decrypt)
.align 32
FUNCTION(_aesni_ctr32_6x)
.cfi_startproc
ENDBR
vmovdqu 0-128(%rcx),%xmm4
@ -911,12 +909,9 @@ _aesni_ctr32_6x:
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32
.cfi_endproc
.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
SET_SIZE(_aesni_ctr32_6x)
.globl aesni_gcm_encrypt
.type aesni_gcm_encrypt,@function
.align 32
aesni_gcm_encrypt:
ENTRY_ALIGN(aesni_gcm_encrypt, 32)
.cfi_startproc
ENDBR
xorq %r10,%r10
@ -1196,7 +1191,9 @@ aesni_gcm_encrypt:
movq %r10,%rax
RET
.cfi_endproc
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
SET_SIZE(aesni_gcm_encrypt)
#endif /* !_WIN32 || _KERNEL */
/* Some utility routines */
@ -1204,13 +1201,10 @@ aesni_gcm_encrypt:
* clear all fpu registers
* void clear_fpu_regs_avx(void);
*/
.globl clear_fpu_regs_avx
.type clear_fpu_regs_avx,@function
.align 32
clear_fpu_regs_avx:
ENTRY_ALIGN(clear_fpu_regs_avx, 32)
vzeroall
RET
.size clear_fpu_regs_avx,.-clear_fpu_regs_avx
SET_SIZE(clear_fpu_regs_avx)
/*
* void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
@ -1219,25 +1213,19 @@ clear_fpu_regs_avx:
* stores the result at `dst'. The XOR is performed using FPU registers,
* so make sure FPU state is saved when running this in the kernel.
*/
.globl gcm_xor_avx
.type gcm_xor_avx,@function
.align 32
gcm_xor_avx:
ENTRY_ALIGN(gcm_xor_avx, 32)
movdqu (%rdi), %xmm0
movdqu (%rsi), %xmm1
pxor %xmm1, %xmm0
movdqu %xmm0, (%rsi)
RET
.size gcm_xor_avx,.-gcm_xor_avx
SET_SIZE(gcm_xor_avx)
/*
* Toggle a boolean_t value atomically and return the new value.
* boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
*/
.globl atomic_toggle_boolean_nv
.type atomic_toggle_boolean_nv,@function
.align 32
atomic_toggle_boolean_nv:
ENTRY_ALIGN(atomic_toggle_boolean_nv, 32)
xorl %eax, %eax
lock
xorl $1, (%rdi)
@ -1245,9 +1233,10 @@ atomic_toggle_boolean_nv:
movl $1, %eax
1:
RET
.size atomic_toggle_boolean_nv,.-atomic_toggle_boolean_nv
SET_SIZE(atomic_toggle_boolean_nv)
SECTION_STATIC
.pushsection .rodata
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
@ -1261,7 +1250,6 @@ atomic_toggle_boolean_nv:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.popsection
/* Mark the stack non-executable. */
#if defined(__linux__) && defined(__ELF__)

View File

@ -102,12 +102,13 @@
.text
.globl gcm_gmult_clmul
.type gcm_gmult_clmul,@function
.align 16
gcm_gmult_clmul:
/* Windows userland links with OpenSSL */
#if !defined (_WIN32) || defined (_KERNEL)
ENTRY_ALIGN(gcm_gmult_clmul, 16)
.cfi_startproc
ENDBR
.L_gmult_clmul:
movdqu (%rdi),%xmm0
movdqa .Lbswap_mask(%rip),%xmm5
@ -155,12 +156,10 @@ gcm_gmult_clmul:
movdqu %xmm0,(%rdi)
RET
.cfi_endproc
.size gcm_gmult_clmul,.-gcm_gmult_clmul
SET_SIZE(gcm_gmult_clmul)
#endif /* !_WIN32 || _KERNEL */
.globl gcm_init_htab_avx
.type gcm_init_htab_avx,@function
.align 32
gcm_init_htab_avx:
ENTRY_ALIGN(gcm_init_htab_avx, 32)
.cfi_startproc
ENDBR
vzeroupper
@ -269,21 +268,17 @@ gcm_init_htab_avx:
vzeroupper
RET
.cfi_endproc
.size gcm_init_htab_avx,.-gcm_init_htab_avx
SET_SIZE(gcm_init_htab_avx)
.globl gcm_gmult_avx
.type gcm_gmult_avx,@function
.align 32
gcm_gmult_avx:
#if !defined (_WIN32) || defined (_KERNEL)
ENTRY_ALIGN(gcm_gmult_avx, 32)
.cfi_startproc
ENDBR
jmp .L_gmult_clmul
.cfi_endproc
.size gcm_gmult_avx,.-gcm_gmult_avx
.globl gcm_ghash_avx
.type gcm_ghash_avx,@function
.align 32
gcm_ghash_avx:
SET_SIZE(gcm_gmult_avx)
ENTRY_ALIGN(gcm_ghash_avx, 32)
.cfi_startproc
ENDBR
vzeroupper
@ -658,9 +653,11 @@ gcm_ghash_avx:
vzeroupper
RET
.cfi_endproc
.size gcm_ghash_avx,.-gcm_ghash_avx
SET_SIZE(gcm_ghash_avx)
.pushsection .rodata
#endif /* !_WIN32 || _KERNEL */
SECTION_STATIC
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
@ -671,13 +668,13 @@ gcm_ghash_avx:
.L7_mask_poly:
.long 7,0,450,0
.align 64
.type .Lrem_4bit,@object
SET_OBJ(.Lrem_4bit)
.Lrem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.type .Lrem_8bit,@object
SET_OBJ(.Lrem_8bit)
.Lrem_8bit:
.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
@ -714,7 +711,6 @@ gcm_ghash_avx:
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.popsection
/* Mark the stack non-executable. */
#if defined(__linux__) && defined(__ELF__)

View File

@ -2065,7 +2065,7 @@ SET_SIZE(SHA256TransformBlocks)
.section .rodata
.align 64
.type K256,@object
SET_OBJ(K256)
K256:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5

View File

@ -2066,7 +2066,7 @@ SET_SIZE(SHA512TransformBlocks)
.section .rodata
.align 64
.type K512,@object
SET_OBJ(K512)
K512:
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
@ -2110,6 +2110,7 @@ K512:
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
#endif /* !lint && !__lint */
#ifdef __ELF__
#if defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -36,6 +36,7 @@ extern "C" {
#include <sys/zfs_context.h>
#include <sys/crypto/common.h>
#include <sys/asm_linkage.h>
/* Similar to sysmacros.h IS_P2ALIGNED, but checks two pointers: */
#define IS_P2ALIGNED2(v, w, a) \
@ -190,13 +191,13 @@ extern const aes_impl_ops_t aes_generic_impl;
extern const aes_impl_ops_t aes_x86_64_impl;
/* These functions are used to execute amd64 instructions for AMD or Intel: */
extern int rijndael_key_setup_enc_amd64(uint32_t rk[],
extern ASMABI int rijndael_key_setup_enc_amd64(uint32_t rk[],
const uint32_t cipherKey[], int keyBits);
extern int rijndael_key_setup_dec_amd64(uint32_t rk[],
extern ASMABI int rijndael_key_setup_dec_amd64(uint32_t rk[],
const uint32_t cipherKey[], int keyBits);
extern void aes_encrypt_amd64(const uint32_t rk[], int Nr,
extern ASMABI void aes_encrypt_amd64(const uint32_t rk[], int Nr,
const uint32_t pt[4], uint32_t ct[4]);
extern void aes_decrypt_amd64(const uint32_t rk[], int Nr,
extern ASMABI void aes_decrypt_amd64(const uint32_t rk[], int Nr,
const uint32_t ct[4], uint32_t pt[4]);
#endif
#if defined(__x86_64) && defined(HAVE_AES)

View File

@ -1,160 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _IA32_SYS_STACK_H
#define _IA32_SYS_STACK_H
#if !defined(_ASM)
#include <sys/types.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/*
* In the x86 world, a stack frame looks like this:
*
* |--------------------------|
* 4n+8(%ebp) ->| argument word n |
* | ... | (Previous frame)
* 8(%ebp) ->| argument word 0 |
* |--------------------------|--------------------
* 4(%ebp) ->| return address |
* |--------------------------|
* 0(%ebp) ->| previous %ebp (optional) |
* |--------------------------|
* -4(%ebp) ->| unspecified | (Current frame)
* | ... |
* 0(%esp) ->| variable size |
* |--------------------------|
*/
/*
* Stack alignment macros.
*/
#define STACK_ALIGN32 4
#define STACK_ENTRY_ALIGN32 4
#define STACK_BIAS32 0
#define SA32(x) (((x)+(STACK_ALIGN32-1)) & ~(STACK_ALIGN32-1))
#define STACK_RESERVE32 0
#define MINFRAME32 0
#if defined(__amd64)
/*
* In the amd64 world, a stack frame looks like this:
*
* |--------------------------|
* 8n+16(%rbp)->| argument word n |
* | ... | (Previous frame)
* 16(%rbp) ->| argument word 0 |
* |--------------------------|--------------------
* 8(%rbp) ->| return address |
* |--------------------------|
* 0(%rbp) ->| previous %rbp |
* |--------------------------|
* -8(%rbp) ->| unspecified | (Current frame)
* | ... |
* 0(%rsp) ->| variable size |
* |--------------------------|
* -128(%rsp) ->| reserved for function |
* |--------------------------|
*
* The end of the input argument area must be aligned on a 16-byte
* boundary; i.e. (%rsp - 8) % 16 == 0 at function entry.
*
* The 128-byte location beyond %rsp is considered to be reserved for
* functions and is NOT modified by signal handlers. It can be used
* to store temporary data that is not needed across function calls.
*/
/*
* Stack alignment macros.
*/
#define STACK_ALIGN64 16
#define STACK_ENTRY_ALIGN64 8
#define STACK_BIAS64 0
#define SA64(x) (((x)+(STACK_ALIGN64-1)) & ~(STACK_ALIGN64-1))
#define STACK_RESERVE64 128
#define MINFRAME64 0
#define STACK_ALIGN STACK_ALIGN64
#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN64
#define STACK_BIAS STACK_BIAS64
#define SA(x) SA64(x)
#define STACK_RESERVE STACK_RESERVE64
#define MINFRAME MINFRAME64
#elif defined(__i386)
#define STACK_ALIGN STACK_ALIGN32
#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN32
#define STACK_BIAS STACK_BIAS32
#define SA(x) SA32(x)
#define STACK_RESERVE STACK_RESERVE32
#define MINFRAME MINFRAME32
#endif /* __i386 */
#if defined(_KERNEL) && !defined(_ASM)
#if defined(ZFS_DEBUG)
#if STACK_ALIGN == 4
#define ASSERT_STACK_ALIGNED() \
{ \
uint32_t __tmp; \
ASSERT((((uintptr_t)&__tmp) & (STACK_ALIGN - 1)) == 0); \
}
#elif (STACK_ALIGN == 16) && (_LONG_DOUBLE_ALIGNMENT == 16)
#define ASSERT_STACK_ALIGNED() \
{ \
long double __tmp; \
ASSERT((((uintptr_t)&__tmp) & (STACK_ALIGN - 1)) == 0); \
}
#endif
#else /* DEBUG */
#define ASSERT_STACK_ALIGNED()
#endif /* DEBUG */
struct regs;
void traceregs(struct regs *);
void traceback(caddr_t);
#endif /* defined(_KERNEL) && !defined(_ASM) */
#define STACK_GROWTH_DOWN /* stacks grow from high to low addresses */
#ifdef __cplusplus
}
#endif
#endif /* _IA32_SYS_STACK_H */

View File

@ -1,107 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
/* All Rights Reserved */
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _IA32_SYS_TRAP_H
#define _IA32_SYS_TRAP_H
#ifdef __cplusplus
extern "C" {
#endif
/*
* Trap type values
*/
#define T_ZERODIV 0x0 /* #de divide by 0 error */
#define T_SGLSTP 0x1 /* #db single step */
#define T_NMIFLT 0x2 /* NMI */
#define T_BPTFLT 0x3 /* #bp breakpoint fault, INT3 insn */
#define T_OVFLW 0x4 /* #of INTO overflow fault */
#define T_BOUNDFLT 0x5 /* #br BOUND insn fault */
#define T_ILLINST 0x6 /* #ud invalid opcode fault */
#define T_NOEXTFLT 0x7 /* #nm device not available: x87 */
#define T_DBLFLT 0x8 /* #df double fault */
#define T_EXTOVRFLT 0x9 /* [not generated: 386 only] */
#define T_TSSFLT 0xa /* #ts invalid TSS fault */
#define T_SEGFLT 0xb /* #np segment not present fault */
#define T_STKFLT 0xc /* #ss stack fault */
#define T_GPFLT 0xd /* #gp general protection fault */
#define T_PGFLT 0xe /* #pf page fault */
#define T_EXTERRFLT 0x10 /* #mf x87 FPU error fault */
#define T_ALIGNMENT 0x11 /* #ac alignment check error */
#define T_MCE 0x12 /* #mc machine check exception */
#define T_SIMDFPE 0x13 /* #xm SSE/SSE exception */
#define T_DBGENTR 0x14 /* debugger entry */
#define T_ENDPERR 0x21 /* emulated extension error flt */
#define T_ENOEXTFLT 0x20 /* emulated ext not present */
#define T_FASTTRAP 0xd2 /* fast system call */
#define T_SYSCALLINT 0x91 /* general system call */
#define T_DTRACE_RET 0x7f /* DTrace pid return */
#define T_INT80 0x80 /* int80 handler for linux emulation */
#define T_SOFTINT 0x50fd /* pseudo softint trap type */
/*
* Pseudo traps.
*/
#define T_INTERRUPT 0x100
#define T_FAULT 0x200
#define T_AST 0x400
#define T_SYSCALL 0x180
/*
* Values of error code on stack in case of page fault
*/
#define PF_ERR_MASK 0x01 /* Mask for error bit */
#define PF_ERR_PAGE 0x00 /* page not present */
#define PF_ERR_PROT 0x01 /* protection error */
#define PF_ERR_WRITE 0x02 /* fault caused by write (else read) */
#define PF_ERR_USER 0x04 /* processor was in user mode */
/* (else supervisor) */
#define PF_ERR_EXEC 0x10 /* attempt to execute a No eXec page (AMD) */
/*
* Definitions for fast system call subfunctions
*/
#define T_FNULL 0 /* Null trap for testing */
#define T_FGETFP 1 /* Get emulated FP context */
#define T_FSETFP 2 /* Set emulated FP context */
#define T_GETHRTIME 3 /* Get high resolution time */
#define T_GETHRVTIME 4 /* Get high resolution virtual time */
#define T_GETHRESTIME 5 /* Get high resolution time */
#define T_GETLGRP 6 /* Get home lgrpid */
#define T_LASTFAST 6 /* Last valid subfunction */
#ifdef __cplusplus
}
#endif
#endif /* _IA32_SYS_TRAP_H */

View File

@ -9,6 +9,7 @@
#define LUA_CORE
#include <sys/lua/lua.h>
#include <sys/asm_linkage.h>
#include "lapi.h"
#include "ldebug.h"
@ -27,7 +28,6 @@
#include "lzio.h"
/* Return the number of bytes available on the stack. */
#if defined (_KERNEL) && defined(__linux__)
#include <asm/current.h>
@ -90,8 +90,8 @@ static intptr_t stack_remaining(void) {
typedef struct _label_t { long long unsigned val[JMP_BUF_CNT]; } label_t;
int setjmp(label_t *) __attribute__ ((__nothrow__));
extern __attribute__((noreturn)) void longjmp(label_t *);
int ASMABI setjmp(label_t *) __attribute__ ((__nothrow__));
extern __attribute__((noreturn)) void ASMABI longjmp(label_t *);
#define LUAI_THROW(L,c) longjmp(&(c)->b)
#define LUAI_TRY(L,c,a) if (setjmp(&(c)->b) == 0) { a }

View File

@ -27,28 +27,16 @@
#include <linux/linkage.h>
#endif
#ifndef RET
#define RET ret
#endif
#undef ENTRY
#define ENTRY(x) \
.text; \
.align 8; \
.globl x; \
.type x, @function; \
x:
#define SET_SIZE(x) \
.size x, [.-x]
/*
* Setjmp and longjmp implement non-local gotos using state vectors
* type label_t.
*/
#ifdef __x86_64__
ENTRY(setjmp)
#define _ASM
#include <sys/asm_linkage.h>
ENTRY_ALIGN(setjmp, 8)
movq %rsp, 0(%rdi)
movq %rbp, 8(%rdi)
movq %rbx, 16(%rdi)
@ -62,7 +50,7 @@ x:
RET
SET_SIZE(setjmp)
ENTRY(longjmp)
ENTRY_ALIGN(longjmp, 8)
movq 0(%rdi), %rsp
movq 8(%rdi), %rbp
movq 16(%rdi), %rbx