749 lines
22 KiB
ArmAsm
749 lines
22 KiB
ArmAsm
/*
|
|
* ====================================================================
|
|
* Written by Intel Corporation for the OpenSSL project to add support
|
|
* for Intel AES-NI instructions. Rights for redistribution and usage
|
|
* in source and binary forms are granted according to the OpenSSL
|
|
* license.
|
|
*
|
|
* Author: Huang Ying <ying.huang at intel dot com>
|
|
* Vinodh Gopal <vinodh.gopal at intel dot com>
|
|
* Kahraman Akdemir
|
|
*
|
|
* Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
|
|
* instructions that are going to be introduced in the next generation
|
|
* of Intel processor, as of 2009. These instructions enable fast and
|
|
* secure data encryption and decryption, using the Advanced Encryption
|
|
* Standard (AES), defined by FIPS Publication number 197. The
|
|
* architecture introduces six instructions that offer full hardware
|
|
* support for AES. Four of them support high performance data
|
|
* encryption and decryption, and the other two instructions support
|
|
* the AES key expansion procedure.
|
|
* ====================================================================
|
|
*/
|
|
|
|
/*
|
|
* ====================================================================
|
|
* Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* 3. All advertising materials mentioning features or use of this
|
|
* software must display the following acknowledgment:
|
|
* "This product includes software developed by the OpenSSL Project
|
|
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
|
|
*
|
|
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
|
* endorse or promote products derived from this software without
|
|
* prior written permission. For written permission, please contact
|
|
* openssl-core@openssl.org.
|
|
*
|
|
* 5. Products derived from this software may not be called "OpenSSL"
|
|
* nor may "OpenSSL" appear in their names without prior written
|
|
* permission of the OpenSSL Project.
|
|
*
|
|
* 6. Redistributions of any form whatsoever must retain the following
|
|
* acknowledgment:
|
|
* "This product includes software developed by the OpenSSL Project
|
|
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
|
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
* ====================================================================
|
|
*/
|
|
|
|
/*
|
|
* ====================================================================
|
|
* OpenSolaris OS modifications
|
|
*
|
|
* This source originates as files aes-intel.S and eng_aesni_asm.pl, in
|
|
* patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
|
|
* Huang Ying of Intel to the openssl-dev mailing list under the subject
|
|
* of "Add support to Intel AES-NI instruction set for x86_64 platform".
|
|
*
|
|
* This OpenSolaris version has these major changes from the original source:
|
|
*
|
|
* 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
|
|
* /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
|
|
* definitions for lint.
|
|
*
|
|
* 2. Formatted code, added comments, and added #includes and #defines.
|
|
*
|
|
* 3. If bit CR0.TS is set, clear and set the TS bit, after and before
|
|
* calling kpreempt_disable() and kpreempt_enable().
|
|
* If the TS bit is not set, Save and restore %xmm registers at the beginning
|
|
* and end of function calls (%xmm* registers are not saved and restored by
|
|
* during kernel thread preemption).
|
|
*
|
|
* 4. Renamed functions, reordered parameters, and changed return value
|
|
* to match OpenSolaris:
|
|
*
|
|
* OpenSSL interface:
|
|
* int intel_AES_set_encrypt_key(const unsigned char *userKey,
|
|
* const int bits, AES_KEY *key);
|
|
* int intel_AES_set_decrypt_key(const unsigned char *userKey,
|
|
* const int bits, AES_KEY *key);
|
|
* Return values for above are non-zero on error, 0 on success.
|
|
*
|
|
* void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
|
|
* const AES_KEY *key);
|
|
* void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
|
|
* const AES_KEY *key);
|
|
* typedef struct aes_key_st {
|
|
* unsigned int rd_key[4 *(AES_MAXNR + 1)];
|
|
* int rounds;
|
|
* unsigned int pad[3];
|
|
* } AES_KEY;
|
|
* Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
|
|
* (ks32) instead of 64-bit (ks64).
|
|
* Number of rounds (aka round count) is at offset 240 of AES_KEY.
|
|
*
|
|
* OpenSolaris OS interface (#ifdefs removed for readability):
|
|
* int rijndael_key_setup_dec_intel(uint32_t rk[],
|
|
* const uint32_t cipherKey[], uint64_t keyBits);
|
|
* int rijndael_key_setup_enc_intel(uint32_t rk[],
|
|
* const uint32_t cipherKey[], uint64_t keyBits);
|
|
* Return values for above are 0 on error, number of rounds on success.
|
|
*
|
|
* void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
|
|
* const uint32_t pt[4], uint32_t ct[4]);
|
|
* void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
|
|
* const uint32_t pt[4], uint32_t ct[4]);
|
|
* typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
|
|
* uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
|
|
*
|
|
* typedef union {
|
|
* uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
|
|
* } aes_ks_t;
|
|
* typedef struct aes_key {
|
|
* aes_ks_t encr_ks, decr_ks;
|
|
* long double align128;
|
|
* int flags, nr, type;
|
|
* } aes_key_t;
|
|
*
|
|
* Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
|
|
* ct is crypto text, and MAX_AES_NR is 14.
|
|
* For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
|
|
*
|
|
* Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
|
|
*
|
|
* ====================================================================
|
|
*/
|
|
|
|
|
|
#if defined(lint) || defined(__lint)
|
|
|
|
#include <sys/types.h>
|
|
|
|
/* ARGSUSED */
|
|
void
|
|
aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
|
|
uint32_t ct[4]) {
|
|
}
|
|
/* ARGSUSED */
|
|
void
|
|
aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
|
|
uint32_t pt[4]) {
|
|
}
|
|
/* ARGSUSED */
|
|
int
|
|
rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
|
|
uint64_t keyBits) {
|
|
return (0);
|
|
}
|
|
/* ARGSUSED */
|
|
int
|
|
rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
|
|
uint64_t keyBits) {
|
|
return (0);
|
|
}
|
|
|
|
|
|
#elif defined(HAVE_AES) /* guard by instruction set */
|
|
|
|
#define _ASM
|
|
#include <sys/asm_linkage.h>
|
|
|
|
/*
|
|
* _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
|
|
* _key_expansion_256a(), _key_expansion_256b()
|
|
*
|
|
* Helper functions called by rijndael_key_setup_inc_intel().
|
|
* Also used indirectly by rijndael_key_setup_dec_intel().
|
|
*
|
|
* Input:
|
|
* %xmm0 User-provided cipher key
|
|
* %xmm1 Round constant
|
|
* Output:
|
|
* (%rcx) AES key
|
|
*/
|
|
|
|
ENTRY_NP2(_key_expansion_128, _key_expansion_256a)
|
|
_key_expansion_128_local:
|
|
_key_expansion_256a_local:
|
|
pshufd $0b11111111, %xmm1, %xmm1
|
|
shufps $0b00010000, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
shufps $0b10001100, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
pxor %xmm1, %xmm0
|
|
movups %xmm0, (%rcx)
|
|
add $0x10, %rcx
|
|
RET
|
|
nop
|
|
SET_SIZE(_key_expansion_128)
|
|
SET_SIZE(_key_expansion_256a)
|
|
|
|
|
|
ENTRY_NP(_key_expansion_192a)
|
|
_key_expansion_192a_local:
|
|
pshufd $0b01010101, %xmm1, %xmm1
|
|
shufps $0b00010000, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
shufps $0b10001100, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
pxor %xmm1, %xmm0
|
|
|
|
movups %xmm2, %xmm5
|
|
movups %xmm2, %xmm6
|
|
pslldq $4, %xmm5
|
|
pshufd $0b11111111, %xmm0, %xmm3
|
|
pxor %xmm3, %xmm2
|
|
pxor %xmm5, %xmm2
|
|
|
|
movups %xmm0, %xmm1
|
|
shufps $0b01000100, %xmm0, %xmm6
|
|
movups %xmm6, (%rcx)
|
|
shufps $0b01001110, %xmm2, %xmm1
|
|
movups %xmm1, 0x10(%rcx)
|
|
add $0x20, %rcx
|
|
RET
|
|
SET_SIZE(_key_expansion_192a)
|
|
|
|
|
|
ENTRY_NP(_key_expansion_192b)
|
|
_key_expansion_192b_local:
|
|
pshufd $0b01010101, %xmm1, %xmm1
|
|
shufps $0b00010000, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
shufps $0b10001100, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
pxor %xmm1, %xmm0
|
|
|
|
movups %xmm2, %xmm5
|
|
pslldq $4, %xmm5
|
|
pshufd $0b11111111, %xmm0, %xmm3
|
|
pxor %xmm3, %xmm2
|
|
pxor %xmm5, %xmm2
|
|
|
|
movups %xmm0, (%rcx)
|
|
add $0x10, %rcx
|
|
RET
|
|
SET_SIZE(_key_expansion_192b)
|
|
|
|
|
|
ENTRY_NP(_key_expansion_256b)
|
|
_key_expansion_256b_local:
|
|
pshufd $0b10101010, %xmm1, %xmm1
|
|
shufps $0b00010000, %xmm2, %xmm4
|
|
pxor %xmm4, %xmm2
|
|
shufps $0b10001100, %xmm2, %xmm4
|
|
pxor %xmm4, %xmm2
|
|
pxor %xmm1, %xmm2
|
|
movups %xmm2, (%rcx)
|
|
add $0x10, %rcx
|
|
RET
|
|
SET_SIZE(_key_expansion_256b)
|
|
|
|
|
|
/*
|
|
* rijndael_key_setup_enc_intel()
|
|
* Expand the cipher key into the encryption key schedule.
|
|
*
|
|
* For kernel code, caller is responsible for ensuring kpreempt_disable()
|
|
* has been called. This is because %xmm registers are not saved/restored.
|
|
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
|
|
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
|
|
* on the stack.
|
|
*
|
|
* OpenSolaris interface:
|
|
* int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
|
|
* uint64_t keyBits);
|
|
* Return value is 0 on error, number of rounds on success.
|
|
*
|
|
* Original Intel OpenSSL interface:
|
|
* int intel_AES_set_encrypt_key(const unsigned char *userKey,
|
|
* const int bits, AES_KEY *key);
|
|
* Return value is non-zero on error, 0 on success.
|
|
*/
|
|
|
|
#ifdef OPENSSL_INTERFACE
|
|
#define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key
|
|
#define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key
|
|
|
|
#define USERCIPHERKEY rdi /* P1, 64 bits */
|
|
#define KEYSIZE32 esi /* P2, 32 bits */
|
|
#define KEYSIZE64 rsi /* P2, 64 bits */
|
|
#define AESKEY rdx /* P3, 64 bits */
|
|
|
|
#else /* OpenSolaris Interface */
|
|
#define AESKEY rdi /* P1, 64 bits */
|
|
#define USERCIPHERKEY rsi /* P2, 64 bits */
|
|
#define KEYSIZE32 edx /* P3, 32 bits */
|
|
#define KEYSIZE64 rdx /* P3, 64 bits */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
#define ROUNDS32 KEYSIZE32 /* temp */
|
|
#define ROUNDS64 KEYSIZE64 /* temp */
|
|
#define ENDAESKEY USERCIPHERKEY /* temp */
|
|
|
|
ENTRY_NP(rijndael_key_setup_enc_intel)
|
|
rijndael_key_setup_enc_intel_local:
|
|
FRAME_BEGIN
|
|
// NULL pointer sanity check
|
|
test %USERCIPHERKEY, %USERCIPHERKEY
|
|
jz .Lenc_key_invalid_param
|
|
test %AESKEY, %AESKEY
|
|
jz .Lenc_key_invalid_param
|
|
|
|
movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes)
|
|
movups %xmm0, (%AESKEY)
|
|
lea 0x10(%AESKEY), %rcx // key addr
|
|
pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x
|
|
|
|
cmp $256, %KEYSIZE32
|
|
jnz .Lenc_key192
|
|
|
|
// AES 256: 14 rounds in encryption key schedule
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov $14, %ROUNDS32
|
|
movl %ROUNDS32, 240(%AESKEY) // key.rounds = 14
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes)
|
|
movups %xmm2, (%rcx)
|
|
add $0x10, %rcx
|
|
|
|
aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a_local
|
|
aeskeygenassist $0x1, %xmm0, %xmm1
|
|
call _key_expansion_256b_local
|
|
aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a_local
|
|
aeskeygenassist $0x2, %xmm0, %xmm1
|
|
call _key_expansion_256b_local
|
|
aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a_local
|
|
aeskeygenassist $0x4, %xmm0, %xmm1
|
|
call _key_expansion_256b_local
|
|
aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a_local
|
|
aeskeygenassist $0x8, %xmm0, %xmm1
|
|
call _key_expansion_256b_local
|
|
aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a_local
|
|
aeskeygenassist $0x10, %xmm0, %xmm1
|
|
call _key_expansion_256b_local
|
|
aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a_local
|
|
aeskeygenassist $0x20, %xmm0, %xmm1
|
|
call _key_expansion_256b_local
|
|
aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a_local
|
|
|
|
#ifdef OPENSSL_INTERFACE
|
|
xor %rax, %rax // return 0 (OK)
|
|
#else /* Open Solaris Interface */
|
|
mov $14, %rax // return # rounds = 14
|
|
#endif
|
|
FRAME_END
|
|
RET
|
|
|
|
.align 4
|
|
.Lenc_key192:
|
|
cmp $192, %KEYSIZE32
|
|
jnz .Lenc_key128
|
|
|
|
// AES 192: 12 rounds in encryption key schedule
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov $12, %ROUNDS32
|
|
movl %ROUNDS32, 240(%AESKEY) // key.rounds = 12
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
movq 0x10(%USERCIPHERKEY), %xmm2 // other user key
|
|
aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192a_local
|
|
aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192b_local
|
|
aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192a_local
|
|
aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192b_local
|
|
aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192a_local
|
|
aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192b_local
|
|
aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192a_local
|
|
aeskeygenassist $0x80, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192b_local
|
|
|
|
#ifdef OPENSSL_INTERFACE
|
|
xor %rax, %rax // return 0 (OK)
|
|
#else /* OpenSolaris Interface */
|
|
mov $12, %rax // return # rounds = 12
|
|
#endif
|
|
FRAME_END
|
|
RET
|
|
|
|
.align 4
|
|
.Lenc_key128:
|
|
cmp $128, %KEYSIZE32
|
|
jnz .Lenc_key_invalid_key_bits
|
|
|
|
// AES 128: 10 rounds in encryption key schedule
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov $10, %ROUNDS32
|
|
movl %ROUNDS32, 240(%AESKEY) // key.rounds = 10
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
aeskeygenassist $0x1, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x2, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x4, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x8, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x10, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x20, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x40, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x80, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x1b, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x36, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
|
|
#ifdef OPENSSL_INTERFACE
|
|
xor %rax, %rax // return 0 (OK)
|
|
#else /* OpenSolaris Interface */
|
|
mov $10, %rax // return # rounds = 10
|
|
#endif
|
|
FRAME_END
|
|
RET
|
|
|
|
.Lenc_key_invalid_param:
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov $-1, %rax // user key or AES key pointer is NULL
|
|
FRAME_END
|
|
RET
|
|
#else
|
|
/* FALLTHROUGH */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
.Lenc_key_invalid_key_bits:
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov $-2, %rax // keysize is invalid
|
|
#else /* Open Solaris Interface */
|
|
xor %rax, %rax // a key pointer is NULL or invalid keysize
|
|
#endif /* OPENSSL_INTERFACE */
|
|
FRAME_END
|
|
RET
|
|
SET_SIZE(rijndael_key_setup_enc_intel)
|
|
|
|
|
|
/*
|
|
* rijndael_key_setup_dec_intel()
|
|
* Expand the cipher key into the decryption key schedule.
|
|
*
|
|
* For kernel code, caller is responsible for ensuring kpreempt_disable()
|
|
* has been called. This is because %xmm registers are not saved/restored.
|
|
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
|
|
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
|
|
* on the stack.
|
|
*
|
|
* OpenSolaris interface:
|
|
* int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
|
|
* uint64_t keyBits);
|
|
* Return value is 0 on error, number of rounds on success.
|
|
* P1->P2, P2->P3, P3->P1
|
|
*
|
|
* Original Intel OpenSSL interface:
|
|
* int intel_AES_set_decrypt_key(const unsigned char *userKey,
|
|
* const int bits, AES_KEY *key);
|
|
* Return value is non-zero on error, 0 on success.
|
|
*/
|
|
|
|
ENTRY_NP(rijndael_key_setup_dec_intel)
|
|
FRAME_BEGIN
|
|
// Generate round keys used for encryption
|
|
call rijndael_key_setup_enc_intel_local
|
|
test %rax, %rax
|
|
#ifdef OPENSSL_INTERFACE
|
|
jnz .Ldec_key_exit // Failed if returned non-0
|
|
#else /* OpenSolaris Interface */
|
|
jz .Ldec_key_exit // Failed if returned 0
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
/*
|
|
* Convert round keys used for encryption
|
|
* to a form usable for decryption
|
|
*/
|
|
#ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */
|
|
mov %rax, %ROUNDS64 // set # rounds (10, 12, or 14)
|
|
// (already set for OpenSSL)
|
|
#endif
|
|
|
|
lea 0x10(%AESKEY), %rcx // key addr
|
|
shl $4, %ROUNDS32
|
|
add %AESKEY, %ROUNDS64
|
|
mov %ROUNDS64, %ENDAESKEY
|
|
|
|
.align 4
|
|
.Ldec_key_reorder_loop:
|
|
movups (%AESKEY), %xmm0
|
|
movups (%ROUNDS64), %xmm1
|
|
movups %xmm0, (%ROUNDS64)
|
|
movups %xmm1, (%AESKEY)
|
|
lea 0x10(%AESKEY), %AESKEY
|
|
lea -0x10(%ROUNDS64), %ROUNDS64
|
|
cmp %AESKEY, %ROUNDS64
|
|
ja .Ldec_key_reorder_loop
|
|
|
|
.align 4
|
|
.Ldec_key_inv_loop:
|
|
movups (%rcx), %xmm0
|
|
// Convert an encryption round key to a form usable for decryption
|
|
// with the "AES Inverse Mix Columns" instruction
|
|
aesimc %xmm0, %xmm1
|
|
movups %xmm1, (%rcx)
|
|
lea 0x10(%rcx), %rcx
|
|
cmp %ENDAESKEY, %rcx
|
|
jnz .Ldec_key_inv_loop
|
|
|
|
.Ldec_key_exit:
|
|
// OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
|
|
// OpenSSL: rax = 0 for OK, or non-zero for error
|
|
FRAME_END
|
|
RET
|
|
SET_SIZE(rijndael_key_setup_dec_intel)
|
|
|
|
|
|
/*
|
|
* aes_encrypt_intel()
|
|
* Encrypt a single block (in and out can overlap).
|
|
*
|
|
* For kernel code, caller is responsible for ensuring kpreempt_disable()
|
|
* has been called. This is because %xmm registers are not saved/restored.
|
|
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
|
|
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
|
|
* on the stack.
|
|
*
|
|
* Temporary register usage:
|
|
* %xmm0 State
|
|
* %xmm1 Key
|
|
*
|
|
* Original OpenSolaris Interface:
|
|
* void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
|
|
* const uint32_t pt[4], uint32_t ct[4])
|
|
*
|
|
* Original Intel OpenSSL Interface:
|
|
* void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
|
|
* const AES_KEY *key)
|
|
*/
|
|
|
|
#ifdef OPENSSL_INTERFACE
|
|
#define aes_encrypt_intel intel_AES_encrypt
|
|
#define aes_decrypt_intel intel_AES_decrypt
|
|
|
|
#define INP rdi /* P1, 64 bits */
|
|
#define OUTP rsi /* P2, 64 bits */
|
|
#define KEYP rdx /* P3, 64 bits */
|
|
|
|
/* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */
|
|
#define NROUNDS32 ecx /* temporary, 32 bits */
|
|
#define NROUNDS cl /* temporary, 8 bits */
|
|
|
|
#else /* OpenSolaris Interface */
|
|
#define KEYP rdi /* P1, 64 bits */
|
|
#define NROUNDS esi /* P2, 32 bits */
|
|
#define INP rdx /* P3, 64 bits */
|
|
#define OUTP rcx /* P4, 64 bits */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
#define STATE xmm0 /* temporary, 128 bits */
|
|
#define KEY xmm1 /* temporary, 128 bits */
|
|
|
|
|
|
ENTRY_NP(aes_encrypt_intel)
|
|
|
|
movups (%INP), %STATE // input
|
|
movups (%KEYP), %KEY // key
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov 240(%KEYP), %NROUNDS32 // round count
|
|
#else /* OpenSolaris Interface */
|
|
/* Round count is already present as P2 in %rsi/%esi */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
pxor %KEY, %STATE // round 0
|
|
lea 0x30(%KEYP), %KEYP
|
|
cmp $12, %NROUNDS
|
|
jb .Lenc128
|
|
lea 0x20(%KEYP), %KEYP
|
|
je .Lenc192
|
|
|
|
// AES 256
|
|
lea 0x20(%KEYP), %KEYP
|
|
movups -0x60(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups -0x50(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
|
|
.align 4
|
|
.Lenc192:
|
|
// AES 192 and 256
|
|
movups -0x40(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups -0x30(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
|
|
.align 4
|
|
.Lenc128:
|
|
// AES 128, 192, and 256
|
|
movups -0x20(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups -0x10(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups (%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups 0x10(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups 0x20(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups 0x30(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups 0x40(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups 0x50(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups 0x60(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups 0x70(%KEYP), %KEY
|
|
aesenclast %KEY, %STATE // last round
|
|
movups %STATE, (%OUTP) // output
|
|
|
|
RET
|
|
SET_SIZE(aes_encrypt_intel)
|
|
|
|
|
|
/*
|
|
* aes_decrypt_intel()
|
|
* Decrypt a single block (in and out can overlap).
|
|
*
|
|
* For kernel code, caller is responsible for ensuring kpreempt_disable()
|
|
* has been called. This is because %xmm registers are not saved/restored.
|
|
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
|
|
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
|
|
* on the stack.
|
|
*
|
|
* Temporary register usage:
|
|
* %xmm0 State
|
|
* %xmm1 Key
|
|
*
|
|
* Original OpenSolaris Interface:
|
|
* void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
|
|
* const uint32_t pt[4], uint32_t ct[4])/
|
|
*
|
|
* Original Intel OpenSSL Interface:
|
|
* void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
|
|
* const AES_KEY *key);
|
|
*/
|
|
ENTRY_NP(aes_decrypt_intel)
|
|
|
|
movups (%INP), %STATE // input
|
|
movups (%KEYP), %KEY // key
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov 240(%KEYP), %NROUNDS32 // round count
|
|
#else /* OpenSolaris Interface */
|
|
/* Round count is already present as P2 in %rsi/%esi */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
pxor %KEY, %STATE // round 0
|
|
lea 0x30(%KEYP), %KEYP
|
|
cmp $12, %NROUNDS
|
|
jb .Ldec128
|
|
lea 0x20(%KEYP), %KEYP
|
|
je .Ldec192
|
|
|
|
// AES 256
|
|
lea 0x20(%KEYP), %KEYP
|
|
movups -0x60(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups -0x50(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
|
|
.align 4
|
|
.Ldec192:
|
|
// AES 192 and 256
|
|
movups -0x40(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups -0x30(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
|
|
.align 4
|
|
.Ldec128:
|
|
// AES 128, 192, and 256
|
|
movups -0x20(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups -0x10(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups (%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups 0x10(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups 0x20(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups 0x30(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups 0x40(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups 0x50(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups 0x60(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups 0x70(%KEYP), %KEY
|
|
aesdeclast %KEY, %STATE // last round
|
|
movups %STATE, (%OUTP) // output
|
|
|
|
RET
|
|
SET_SIZE(aes_decrypt_intel)
|
|
|
|
#endif /* lint || __lint */
|
|
|
|
#ifdef __ELF__
|
|
.section .note.GNU-stack,"",%progbits
|
|
#endif
|