Add generic implementation handling and SHA2 impl
The skeleton file module/icp/include/generic_impl.c can be used for
iterating over different implementations of algorithms.
It is used by SHA256, SHA512 and BLAKE3 currently.
The Solaris SHA2 implementation got replaced with a version which is
based on public domain code of cppcrypto v0.10.
These assembly files are taken from current openssl master:
- sha256-x86_64.S: x64, SSSE3, AVX, AVX2, SHA-NI (x86_64)
- sha512-x86_64.S: x64, AVX, AVX2 (x86_64)
- sha256-armv7.S: ARMv7, NEON, ARMv8-CE (arm)
- sha512-armv7.S: ARMv7, NEON (arm)
- sha256-armv8.S: ARMv7, NEON, ARMv8-CE (aarch64)
- sha512-armv8.S: ARMv7, ARMv8-CE (aarch64)
- sha256-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha512-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha256-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
- sha512-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741
2023-03-01 08:40:28 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* https://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
|
|
|
|
* - modified assembly to fit into OpenZFS
|
|
|
|
*/
|
|
|
|
|
|
|
|
#if defined(__x86_64)
|
|
|
|
|
|
|
|
#define _ASM
|
|
|
|
#include <sys/asm_linkage.h>
|
|
|
|
|
2023-03-01 19:44:49 +00:00
|
|
|
SECTION_STATIC
|
Add generic implementation handling and SHA2 impl
The skeleton file module/icp/include/generic_impl.c can be used for
iterating over different implementations of algorithms.
It is used by SHA256, SHA512 and BLAKE3 currently.
The Solaris SHA2 implementation got replaced with a version which is
based on public domain code of cppcrypto v0.10.
These assembly files are taken from current openssl master:
- sha256-x86_64.S: x64, SSSE3, AVX, AVX2, SHA-NI (x86_64)
- sha512-x86_64.S: x64, AVX, AVX2 (x86_64)
- sha256-armv7.S: ARMv7, NEON, ARMv8-CE (arm)
- sha512-armv7.S: ARMv7, NEON (arm)
- sha256-armv8.S: ARMv7, NEON, ARMv8-CE (aarch64)
- sha512-armv8.S: ARMv7, ARMv8-CE (aarch64)
- sha256-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha512-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha256-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
- sha512-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741
2023-03-01 08:40:28 +00:00
|
|
|
|
2023-03-06 23:24:05 +00:00
|
|
|
.balign 64
|
|
|
|
SET_OBJ(K256)
|
Add generic implementation handling and SHA2 impl
The skeleton file module/icp/include/generic_impl.c can be used for
iterating over different implementations of algorithms.
It is used by SHA256, SHA512 and BLAKE3 currently.
The Solaris SHA2 implementation got replaced with a version which is
based on public domain code of cppcrypto v0.10.
These assembly files are taken from current openssl master:
- sha256-x86_64.S: x64, SSSE3, AVX, AVX2, SHA-NI (x86_64)
- sha512-x86_64.S: x64, AVX, AVX2 (x86_64)
- sha256-armv7.S: ARMv7, NEON, ARMv8-CE (arm)
- sha512-armv7.S: ARMv7, NEON (arm)
- sha256-armv8.S: ARMv7, NEON, ARMv8-CE (aarch64)
- sha512-armv8.S: ARMv7, ARMv8-CE (aarch64)
- sha256-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha512-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha256-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
- sha512-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741
2023-03-01 08:40:28 +00:00
|
|
|
K256:
|
|
|
|
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
|
|
|
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
|
|
|
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
|
|
|
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
|
|
|
.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
|
|
|
.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
|
|
|
.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
|
|
|
.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
|
|
|
.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
|
|
|
|
.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
|
|
|
|
.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
|
|
|
.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
|
|
|
.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
|
|
|
.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
|
|
|
.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
|
|
|
|
.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
|
|
|
|
.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
|
|
|
.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
|
|
|
.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
|
|
|
.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
|
|
|
.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
|
|
|
.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
|
|
|
.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
|
|
|
.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
|
|
|
.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
|
|
|
.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
|
|
|
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
|
|
|
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
|
|
|
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
|
|
|
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
|
|
|
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
|
|
|
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
|
|
|
|
|
|
|
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
|
|
|
|
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
|
|
|
|
.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
|
|
|
|
.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
|
|
|
|
.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
|
|
|
|
.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
|
|
|
|
|
|
|
|
ENTRY_ALIGN(zfs_sha256_transform_x64, 16)
|
|
|
|
.cfi_startproc
|
|
|
|
ENDBR
|
|
|
|
movq %rsp,%rax
|
|
|
|
.cfi_def_cfa_register %rax
|
|
|
|
pushq %rbx
|
|
|
|
.cfi_offset %rbx,-16
|
|
|
|
pushq %rbp
|
|
|
|
.cfi_offset %rbp,-24
|
|
|
|
pushq %r12
|
|
|
|
.cfi_offset %r12,-32
|
|
|
|
pushq %r13
|
|
|
|
.cfi_offset %r13,-40
|
|
|
|
pushq %r14
|
|
|
|
.cfi_offset %r14,-48
|
|
|
|
pushq %r15
|
|
|
|
.cfi_offset %r15,-56
|
|
|
|
shlq $4,%rdx
|
|
|
|
subq $64+32,%rsp
|
|
|
|
leaq (%rsi,%rdx,4),%rdx
|
|
|
|
andq $-64,%rsp
|
|
|
|
movq %rdi,64+0(%rsp)
|
|
|
|
movq %rsi,64+8(%rsp)
|
|
|
|
movq %rdx,64+16(%rsp)
|
|
|
|
movq %rax,88(%rsp)
|
|
|
|
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
|
|
|
|
.Lprologue:
|
|
|
|
movl 0(%rdi),%eax
|
|
|
|
movl 4(%rdi),%ebx
|
|
|
|
movl 8(%rdi),%ecx
|
|
|
|
movl 12(%rdi),%edx
|
|
|
|
movl 16(%rdi),%r8d
|
|
|
|
movl 20(%rdi),%r9d
|
|
|
|
movl 24(%rdi),%r10d
|
|
|
|
movl 28(%rdi),%r11d
|
|
|
|
jmp .Lloop
|
2023-03-06 23:24:05 +00:00
|
|
|
.balign 16
|
Add generic implementation handling and SHA2 impl
The skeleton file module/icp/include/generic_impl.c can be used for
iterating over different implementations of algorithms.
It is used by SHA256, SHA512 and BLAKE3 currently.
The Solaris SHA2 implementation got replaced with a version which is
based on public domain code of cppcrypto v0.10.
These assembly files are taken from current openssl master:
- sha256-x86_64.S: x64, SSSE3, AVX, AVX2, SHA-NI (x86_64)
- sha512-x86_64.S: x64, AVX, AVX2 (x86_64)
- sha256-armv7.S: ARMv7, NEON, ARMv8-CE (arm)
- sha512-armv7.S: ARMv7, NEON (arm)
- sha256-armv8.S: ARMv7, NEON, ARMv8-CE (aarch64)
- sha512-armv8.S: ARMv7, ARMv8-CE (aarch64)
- sha256-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha512-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha256-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
- sha512-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741
2023-03-01 08:40:28 +00:00
|
|
|
.Lloop:
|
|
|
|
movl %ebx,%edi
|
|
|
|
leaq K256(%rip),%rbp
|
|
|
|
xorl %ecx,%edi
|
|
|
|
movl 0(%rsi),%r12d
|
|
|
|
movl %r8d,%r13d
|
|
|
|
movl %eax,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r9d,%r15d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
movl %r12d,0(%rsp)
|
|
|
|
xorl %eax,%r14d
|
|
|
|
andl %r8d,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r11d,%r12d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %eax,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %eax,%r14d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %ebx,%r11d
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%r11d
|
|
|
|
addl %r12d,%edx
|
|
|
|
addl %r12d,%r11d
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
addl %r14d,%r11d
|
|
|
|
movl 4(%rsi),%r12d
|
|
|
|
movl %edx,%r13d
|
|
|
|
movl %r11d,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r8d,%edi
|
|
|
|
xorl %edx,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r9d,%edi
|
|
|
|
movl %r12d,4(%rsp)
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
andl %edx,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r10d,%r12d
|
|
|
|
xorl %r9d,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %r11d,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
xorl %eax,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %eax,%r10d
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%r10d
|
|
|
|
addl %r12d,%ecx
|
|
|
|
addl %r12d,%r10d
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
addl %r14d,%r10d
|
|
|
|
movl 8(%rsi),%r12d
|
|
|
|
movl %ecx,%r13d
|
|
|
|
movl %r10d,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %edx,%r15d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
movl %r12d,8(%rsp)
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
andl %ecx,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r9d,%r12d
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r11d,%r9d
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%r9d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
addl %r12d,%r9d
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
addl %r14d,%r9d
|
|
|
|
movl 12(%rsi),%r12d
|
|
|
|
movl %ebx,%r13d
|
|
|
|
movl %r9d,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %ecx,%edi
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %edx,%edi
|
|
|
|
movl %r12d,12(%rsp)
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
andl %ebx,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r8d,%r12d
|
|
|
|
xorl %edx,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %r9d,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r10d,%r8d
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%r8d
|
|
|
|
addl %r12d,%eax
|
|
|
|
addl %r12d,%r8d
|
|
|
|
leaq 20(%rbp),%rbp
|
|
|
|
addl %r14d,%r8d
|
|
|
|
movl 16(%rsi),%r12d
|
|
|
|
movl %eax,%r13d
|
|
|
|
movl %r8d,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %ebx,%r15d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
movl %r12d,16(%rsp)
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
andl %eax,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %edx,%r12d
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %r8d,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r9d,%edx
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%edx
|
|
|
|
addl %r12d,%r11d
|
|
|
|
addl %r12d,%edx
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
addl %r14d,%edx
|
|
|
|
movl 20(%rsi),%r12d
|
|
|
|
movl %r11d,%r13d
|
|
|
|
movl %edx,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %eax,%edi
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
movl %r12d,20(%rsp)
|
|
|
|
xorl %edx,%r14d
|
|
|
|
andl %r11d,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %ecx,%r12d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %edx,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
xorl %r8d,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r8d,%ecx
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%ecx
|
|
|
|
addl %r12d,%r10d
|
|
|
|
addl %r12d,%ecx
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
addl %r14d,%ecx
|
|
|
|
movl 24(%rsi),%r12d
|
|
|
|
movl %r10d,%r13d
|
|
|
|
movl %ecx,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r11d,%r15d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
movl %r12d,24(%rsp)
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
andl %r10d,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %ebx,%r12d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %ecx,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
xorl %edx,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %edx,%ebx
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%ebx
|
|
|
|
addl %r12d,%r9d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
addl %r14d,%ebx
|
|
|
|
movl 28(%rsi),%r12d
|
|
|
|
movl %r9d,%r13d
|
|
|
|
movl %ebx,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r10d,%edi
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
movl %r12d,28(%rsp)
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
andl %r9d,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %eax,%r12d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %ebx,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
xorl %ecx,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %ecx,%eax
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%eax
|
|
|
|
addl %r12d,%r8d
|
|
|
|
addl %r12d,%eax
|
|
|
|
leaq 20(%rbp),%rbp
|
|
|
|
addl %r14d,%eax
|
|
|
|
movl 32(%rsi),%r12d
|
|
|
|
movl %r8d,%r13d
|
|
|
|
movl %eax,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r9d,%r15d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
movl %r12d,32(%rsp)
|
|
|
|
xorl %eax,%r14d
|
|
|
|
andl %r8d,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r11d,%r12d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %eax,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %eax,%r14d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %ebx,%r11d
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%r11d
|
|
|
|
addl %r12d,%edx
|
|
|
|
addl %r12d,%r11d
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
addl %r14d,%r11d
|
|
|
|
movl 36(%rsi),%r12d
|
|
|
|
movl %edx,%r13d
|
|
|
|
movl %r11d,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r8d,%edi
|
|
|
|
xorl %edx,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r9d,%edi
|
|
|
|
movl %r12d,36(%rsp)
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
andl %edx,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r10d,%r12d
|
|
|
|
xorl %r9d,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %r11d,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
xorl %eax,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %eax,%r10d
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%r10d
|
|
|
|
addl %r12d,%ecx
|
|
|
|
addl %r12d,%r10d
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
addl %r14d,%r10d
|
|
|
|
movl 40(%rsi),%r12d
|
|
|
|
movl %ecx,%r13d
|
|
|
|
movl %r10d,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %edx,%r15d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
movl %r12d,40(%rsp)
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
andl %ecx,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r9d,%r12d
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r11d,%r9d
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%r9d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
addl %r12d,%r9d
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
addl %r14d,%r9d
|
|
|
|
movl 44(%rsi),%r12d
|
|
|
|
movl %ebx,%r13d
|
|
|
|
movl %r9d,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %ecx,%edi
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %edx,%edi
|
|
|
|
movl %r12d,44(%rsp)
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
andl %ebx,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r8d,%r12d
|
|
|
|
xorl %edx,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %r9d,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r10d,%r8d
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%r8d
|
|
|
|
addl %r12d,%eax
|
|
|
|
addl %r12d,%r8d
|
|
|
|
leaq 20(%rbp),%rbp
|
|
|
|
addl %r14d,%r8d
|
|
|
|
movl 48(%rsi),%r12d
|
|
|
|
movl %eax,%r13d
|
|
|
|
movl %r8d,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %ebx,%r15d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
movl %r12d,48(%rsp)
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
andl %eax,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %edx,%r12d
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %r8d,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r9d,%edx
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%edx
|
|
|
|
addl %r12d,%r11d
|
|
|
|
addl %r12d,%edx
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
addl %r14d,%edx
|
|
|
|
movl 52(%rsi),%r12d
|
|
|
|
movl %r11d,%r13d
|
|
|
|
movl %edx,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %eax,%edi
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
movl %r12d,52(%rsp)
|
|
|
|
xorl %edx,%r14d
|
|
|
|
andl %r11d,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %ecx,%r12d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %edx,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
xorl %r8d,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r8d,%ecx
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%ecx
|
|
|
|
addl %r12d,%r10d
|
|
|
|
addl %r12d,%ecx
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
addl %r14d,%ecx
|
|
|
|
movl 56(%rsi),%r12d
|
|
|
|
movl %r10d,%r13d
|
|
|
|
movl %ecx,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r11d,%r15d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
movl %r12d,56(%rsp)
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
andl %r10d,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %ebx,%r12d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %ecx,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
xorl %edx,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %edx,%ebx
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%ebx
|
|
|
|
addl %r12d,%r9d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
addl %r14d,%ebx
|
|
|
|
movl 60(%rsi),%r12d
|
|
|
|
movl %r9d,%r13d
|
|
|
|
movl %ebx,%r14d
|
|
|
|
bswapl %r12d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r10d,%edi
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
movl %r12d,60(%rsp)
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
andl %r9d,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %eax,%r12d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %ebx,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
xorl %ecx,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %ecx,%eax
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%eax
|
|
|
|
addl %r12d,%r8d
|
|
|
|
addl %r12d,%eax
|
|
|
|
leaq 20(%rbp),%rbp
|
|
|
|
jmp .Lrounds_16_xx
|
2023-03-06 23:24:05 +00:00
|
|
|
.balign 16
|
Add generic implementation handling and SHA2 impl
The skeleton file module/icp/include/generic_impl.c can be used for
iterating over different implementations of algorithms.
It is used by SHA256, SHA512 and BLAKE3 currently.
The Solaris SHA2 implementation got replaced with a version which is
based on public domain code of cppcrypto v0.10.
These assembly files are taken from current openssl master:
- sha256-x86_64.S: x64, SSSE3, AVX, AVX2, SHA-NI (x86_64)
- sha512-x86_64.S: x64, AVX, AVX2 (x86_64)
- sha256-armv7.S: ARMv7, NEON, ARMv8-CE (arm)
- sha512-armv7.S: ARMv7, NEON (arm)
- sha256-armv8.S: ARMv7, NEON, ARMv8-CE (aarch64)
- sha512-armv8.S: ARMv7, ARMv8-CE (aarch64)
- sha256-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha512-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha256-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
- sha512-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741
2023-03-01 08:40:28 +00:00
|
|
|
.Lrounds_16_xx:
|
|
|
|
movl 4(%rsp),%r13d
|
|
|
|
movl 56(%rsp),%r15d
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%eax
|
|
|
|
movl %r15d,%r14d
|
|
|
|
rorl $2,%r15d
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%r15d
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
addl 36(%rsp),%r12d
|
|
|
|
addl 0(%rsp),%r12d
|
|
|
|
movl %r8d,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %eax,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r9d,%r15d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
movl %r12d,0(%rsp)
|
|
|
|
xorl %eax,%r14d
|
|
|
|
andl %r8d,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r11d,%r12d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %eax,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %eax,%r14d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %ebx,%r11d
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%r11d
|
|
|
|
addl %r12d,%edx
|
|
|
|
addl %r12d,%r11d
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
movl 8(%rsp),%r13d
|
|
|
|
movl 60(%rsp),%edi
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%r11d
|
|
|
|
movl %edi,%r14d
|
|
|
|
rorl $2,%edi
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%edi
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
addl 40(%rsp),%r12d
|
|
|
|
addl 4(%rsp),%r12d
|
|
|
|
movl %edx,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %r11d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r8d,%edi
|
|
|
|
xorl %edx,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r9d,%edi
|
|
|
|
movl %r12d,4(%rsp)
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
andl %edx,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r10d,%r12d
|
|
|
|
xorl %r9d,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %r11d,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
xorl %eax,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %eax,%r10d
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%r10d
|
|
|
|
addl %r12d,%ecx
|
|
|
|
addl %r12d,%r10d
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
movl 12(%rsp),%r13d
|
|
|
|
movl 0(%rsp),%r15d
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%r10d
|
|
|
|
movl %r15d,%r14d
|
|
|
|
rorl $2,%r15d
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%r15d
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
addl 44(%rsp),%r12d
|
|
|
|
addl 8(%rsp),%r12d
|
|
|
|
movl %ecx,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %r10d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %edx,%r15d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
movl %r12d,8(%rsp)
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
andl %ecx,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r9d,%r12d
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r11d,%r9d
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%r9d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
addl %r12d,%r9d
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
movl 16(%rsp),%r13d
|
|
|
|
movl 4(%rsp),%edi
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%r9d
|
|
|
|
movl %edi,%r14d
|
|
|
|
rorl $2,%edi
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%edi
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
addl 48(%rsp),%r12d
|
|
|
|
addl 12(%rsp),%r12d
|
|
|
|
movl %ebx,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %r9d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %ecx,%edi
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %edx,%edi
|
|
|
|
movl %r12d,12(%rsp)
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
andl %ebx,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r8d,%r12d
|
|
|
|
xorl %edx,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %r9d,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r10d,%r8d
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%r8d
|
|
|
|
addl %r12d,%eax
|
|
|
|
addl %r12d,%r8d
|
|
|
|
leaq 20(%rbp),%rbp
|
|
|
|
movl 20(%rsp),%r13d
|
|
|
|
movl 8(%rsp),%r15d
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%r8d
|
|
|
|
movl %r15d,%r14d
|
|
|
|
rorl $2,%r15d
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%r15d
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
addl 52(%rsp),%r12d
|
|
|
|
addl 16(%rsp),%r12d
|
|
|
|
movl %eax,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %r8d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %ebx,%r15d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
movl %r12d,16(%rsp)
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
andl %eax,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %edx,%r12d
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %r8d,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r9d,%edx
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%edx
|
|
|
|
addl %r12d,%r11d
|
|
|
|
addl %r12d,%edx
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
movl 24(%rsp),%r13d
|
|
|
|
movl 12(%rsp),%edi
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%edx
|
|
|
|
movl %edi,%r14d
|
|
|
|
rorl $2,%edi
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%edi
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
addl 56(%rsp),%r12d
|
|
|
|
addl 20(%rsp),%r12d
|
|
|
|
movl %r11d,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %edx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %eax,%edi
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
movl %r12d,20(%rsp)
|
|
|
|
xorl %edx,%r14d
|
|
|
|
andl %r11d,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %ecx,%r12d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %edx,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
xorl %r8d,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r8d,%ecx
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%ecx
|
|
|
|
addl %r12d,%r10d
|
|
|
|
addl %r12d,%ecx
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
movl 28(%rsp),%r13d
|
|
|
|
movl 16(%rsp),%r15d
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%ecx
|
|
|
|
movl %r15d,%r14d
|
|
|
|
rorl $2,%r15d
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%r15d
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
addl 60(%rsp),%r12d
|
|
|
|
addl 24(%rsp),%r12d
|
|
|
|
movl %r10d,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %ecx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r11d,%r15d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
movl %r12d,24(%rsp)
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
andl %r10d,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %ebx,%r12d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %ecx,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
xorl %edx,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %edx,%ebx
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%ebx
|
|
|
|
addl %r12d,%r9d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
movl 32(%rsp),%r13d
|
|
|
|
movl 20(%rsp),%edi
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%ebx
|
|
|
|
movl %edi,%r14d
|
|
|
|
rorl $2,%edi
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%edi
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
addl 0(%rsp),%r12d
|
|
|
|
addl 28(%rsp),%r12d
|
|
|
|
movl %r9d,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %ebx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r10d,%edi
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
movl %r12d,28(%rsp)
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
andl %r9d,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %eax,%r12d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %ebx,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
xorl %ecx,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %ecx,%eax
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%eax
|
|
|
|
addl %r12d,%r8d
|
|
|
|
addl %r12d,%eax
|
|
|
|
leaq 20(%rbp),%rbp
|
|
|
|
movl 36(%rsp),%r13d
|
|
|
|
movl 24(%rsp),%r15d
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%eax
|
|
|
|
movl %r15d,%r14d
|
|
|
|
rorl $2,%r15d
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%r15d
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
addl 4(%rsp),%r12d
|
|
|
|
addl 32(%rsp),%r12d
|
|
|
|
movl %r8d,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %eax,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r9d,%r15d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
movl %r12d,32(%rsp)
|
|
|
|
xorl %eax,%r14d
|
|
|
|
andl %r8d,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r11d,%r12d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %eax,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %eax,%r14d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %ebx,%r11d
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%r11d
|
|
|
|
addl %r12d,%edx
|
|
|
|
addl %r12d,%r11d
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
movl 40(%rsp),%r13d
|
|
|
|
movl 28(%rsp),%edi
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%r11d
|
|
|
|
movl %edi,%r14d
|
|
|
|
rorl $2,%edi
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%edi
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
addl 8(%rsp),%r12d
|
|
|
|
addl 36(%rsp),%r12d
|
|
|
|
movl %edx,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %r11d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r8d,%edi
|
|
|
|
xorl %edx,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r9d,%edi
|
|
|
|
movl %r12d,36(%rsp)
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
andl %edx,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r10d,%r12d
|
|
|
|
xorl %r9d,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %r11d,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
xorl %eax,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %eax,%r10d
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%r10d
|
|
|
|
addl %r12d,%ecx
|
|
|
|
addl %r12d,%r10d
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
movl 44(%rsp),%r13d
|
|
|
|
movl 32(%rsp),%r15d
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%r10d
|
|
|
|
movl %r15d,%r14d
|
|
|
|
rorl $2,%r15d
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%r15d
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
addl 12(%rsp),%r12d
|
|
|
|
addl 40(%rsp),%r12d
|
|
|
|
movl %ecx,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %r10d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %edx,%r15d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
movl %r12d,40(%rsp)
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
andl %ecx,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r9d,%r12d
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r11d,%r9d
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%r9d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
addl %r12d,%r9d
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
movl 48(%rsp),%r13d
|
|
|
|
movl 36(%rsp),%edi
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%r9d
|
|
|
|
movl %edi,%r14d
|
|
|
|
rorl $2,%edi
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%edi
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
addl 16(%rsp),%r12d
|
|
|
|
addl 44(%rsp),%r12d
|
|
|
|
movl %ebx,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %r9d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %ecx,%edi
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %edx,%edi
|
|
|
|
movl %r12d,44(%rsp)
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
andl %ebx,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %r8d,%r12d
|
|
|
|
xorl %edx,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %r9d,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r10d,%r8d
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%r8d
|
|
|
|
addl %r12d,%eax
|
|
|
|
addl %r12d,%r8d
|
|
|
|
leaq 20(%rbp),%rbp
|
|
|
|
movl 52(%rsp),%r13d
|
|
|
|
movl 40(%rsp),%r15d
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%r8d
|
|
|
|
movl %r15d,%r14d
|
|
|
|
rorl $2,%r15d
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%r15d
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
addl 20(%rsp),%r12d
|
|
|
|
addl 48(%rsp),%r12d
|
|
|
|
movl %eax,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %r8d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %ebx,%r15d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
movl %r12d,48(%rsp)
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
andl %eax,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %edx,%r12d
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %r8d,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r9d,%edx
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%edx
|
|
|
|
addl %r12d,%r11d
|
|
|
|
addl %r12d,%edx
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
movl 56(%rsp),%r13d
|
|
|
|
movl 44(%rsp),%edi
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%edx
|
|
|
|
movl %edi,%r14d
|
|
|
|
rorl $2,%edi
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%edi
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
addl 24(%rsp),%r12d
|
|
|
|
addl 52(%rsp),%r12d
|
|
|
|
movl %r11d,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %edx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %eax,%edi
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
movl %r12d,52(%rsp)
|
|
|
|
xorl %edx,%r14d
|
|
|
|
andl %r11d,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %ecx,%r12d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %edx,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
xorl %r8d,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %r8d,%ecx
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%ecx
|
|
|
|
addl %r12d,%r10d
|
|
|
|
addl %r12d,%ecx
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
movl 60(%rsp),%r13d
|
|
|
|
movl 48(%rsp),%r15d
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%ecx
|
|
|
|
movl %r15d,%r14d
|
|
|
|
rorl $2,%r15d
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%r15d
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%r15d
|
|
|
|
addl 28(%rsp),%r12d
|
|
|
|
addl 56(%rsp),%r12d
|
|
|
|
movl %r10d,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %ecx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r11d,%r15d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
movl %r12d,56(%rsp)
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
andl %r10d,%r15d
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %ebx,%r12d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
addl %r15d,%r12d
|
|
|
|
movl %ecx,%r15d
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
xorl %edx,%r15d
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %edx,%ebx
|
|
|
|
andl %r15d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %edi,%ebx
|
|
|
|
addl %r12d,%r9d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
leaq 4(%rbp),%rbp
|
|
|
|
movl 0(%rsp),%r13d
|
|
|
|
movl 52(%rsp),%edi
|
|
|
|
movl %r13d,%r12d
|
|
|
|
rorl $11,%r13d
|
|
|
|
addl %r14d,%ebx
|
|
|
|
movl %edi,%r14d
|
|
|
|
rorl $2,%edi
|
|
|
|
xorl %r12d,%r13d
|
|
|
|
shrl $3,%r12d
|
|
|
|
rorl $7,%r13d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
shrl $10,%r14d
|
|
|
|
rorl $17,%edi
|
|
|
|
xorl %r13d,%r12d
|
|
|
|
xorl %r14d,%edi
|
|
|
|
addl 32(%rsp),%r12d
|
|
|
|
addl 60(%rsp),%r12d
|
|
|
|
movl %r9d,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %ebx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r10d,%edi
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
movl %r12d,60(%rsp)
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
andl %r9d,%edi
|
|
|
|
rorl $5,%r13d
|
|
|
|
addl %eax,%r12d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
addl %edi,%r12d
|
|
|
|
movl %ebx,%edi
|
|
|
|
addl (%rbp),%r12d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
xorl %ecx,%edi
|
|
|
|
rorl $6,%r13d
|
|
|
|
movl %ecx,%eax
|
|
|
|
andl %edi,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r13d,%r12d
|
|
|
|
xorl %r15d,%eax
|
|
|
|
addl %r12d,%r8d
|
|
|
|
addl %r12d,%eax
|
|
|
|
leaq 20(%rbp),%rbp
|
|
|
|
cmpb $0,3(%rbp)
|
|
|
|
jnz .Lrounds_16_xx
|
|
|
|
movq 64+0(%rsp),%rdi
|
|
|
|
addl %r14d,%eax
|
|
|
|
leaq 64(%rsi),%rsi
|
|
|
|
addl 0(%rdi),%eax
|
|
|
|
addl 4(%rdi),%ebx
|
|
|
|
addl 8(%rdi),%ecx
|
|
|
|
addl 12(%rdi),%edx
|
|
|
|
addl 16(%rdi),%r8d
|
|
|
|
addl 20(%rdi),%r9d
|
|
|
|
addl 24(%rdi),%r10d
|
|
|
|
addl 28(%rdi),%r11d
|
|
|
|
cmpq 64+16(%rsp),%rsi
|
|
|
|
movl %eax,0(%rdi)
|
|
|
|
movl %ebx,4(%rdi)
|
|
|
|
movl %ecx,8(%rdi)
|
|
|
|
movl %edx,12(%rdi)
|
|
|
|
movl %r8d,16(%rdi)
|
|
|
|
movl %r9d,20(%rdi)
|
|
|
|
movl %r10d,24(%rdi)
|
|
|
|
movl %r11d,28(%rdi)
|
|
|
|
jb .Lloop
|
|
|
|
movq 88(%rsp),%rsi
|
|
|
|
.cfi_def_cfa %rsi,8
|
|
|
|
movq -48(%rsi),%r15
|
|
|
|
.cfi_restore %r15
|
|
|
|
movq -40(%rsi),%r14
|
|
|
|
.cfi_restore %r14
|
|
|
|
movq -32(%rsi),%r13
|
|
|
|
.cfi_restore %r13
|
|
|
|
movq -24(%rsi),%r12
|
|
|
|
.cfi_restore %r12
|
|
|
|
movq -16(%rsi),%rbp
|
|
|
|
.cfi_restore %rbp
|
|
|
|
movq -8(%rsi),%rbx
|
|
|
|
.cfi_restore %rbx
|
|
|
|
leaq (%rsi),%rsp
|
|
|
|
.cfi_def_cfa_register %rsp
|
|
|
|
.Lepilogue:
|
|
|
|
RET
|
|
|
|
.cfi_endproc
|
|
|
|
SET_SIZE(zfs_sha256_transform_x64)
|
|
|
|
|
|
|
|
ENTRY_ALIGN(zfs_sha256_transform_shani, 64)
|
|
|
|
.cfi_startproc
|
|
|
|
ENDBR
|
|
|
|
leaq K256+128(%rip),%rcx
|
|
|
|
movdqu (%rdi),%xmm1
|
|
|
|
movdqu 16(%rdi),%xmm2
|
|
|
|
movdqa 512-128(%rcx),%xmm7
|
|
|
|
|
|
|
|
pshufd $0x1b,%xmm1,%xmm0
|
|
|
|
pshufd $0xb1,%xmm1,%xmm1
|
|
|
|
pshufd $0x1b,%xmm2,%xmm2
|
|
|
|
movdqa %xmm7,%xmm8
|
|
|
|
.byte 102,15,58,15,202,8
|
|
|
|
punpcklqdq %xmm0,%xmm2
|
|
|
|
jmp .Loop_shani
|
|
|
|
|
2023-03-06 23:24:05 +00:00
|
|
|
.balign 16
|
Add generic implementation handling and SHA2 impl
The skeleton file module/icp/include/generic_impl.c can be used for
iterating over different implementations of algorithms.
It is used by SHA256, SHA512 and BLAKE3 currently.
The Solaris SHA2 implementation got replaced with a version which is
based on public domain code of cppcrypto v0.10.
These assembly files are taken from current openssl master:
- sha256-x86_64.S: x64, SSSE3, AVX, AVX2, SHA-NI (x86_64)
- sha512-x86_64.S: x64, AVX, AVX2 (x86_64)
- sha256-armv7.S: ARMv7, NEON, ARMv8-CE (arm)
- sha512-armv7.S: ARMv7, NEON (arm)
- sha256-armv8.S: ARMv7, NEON, ARMv8-CE (aarch64)
- sha512-armv8.S: ARMv7, ARMv8-CE (aarch64)
- sha256-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha512-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha256-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
- sha512-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741
2023-03-01 08:40:28 +00:00
|
|
|
.Loop_shani:
|
|
|
|
movdqu (%rsi),%xmm3
|
|
|
|
movdqu 16(%rsi),%xmm4
|
|
|
|
movdqu 32(%rsi),%xmm5
|
|
|
|
.byte 102,15,56,0,223
|
|
|
|
movdqu 48(%rsi),%xmm6
|
|
|
|
|
|
|
|
movdqa 0-128(%rcx),%xmm0
|
|
|
|
paddd %xmm3,%xmm0
|
|
|
|
.byte 102,15,56,0,231
|
|
|
|
movdqa %xmm2,%xmm10
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
nop
|
|
|
|
movdqa %xmm1,%xmm9
|
|
|
|
.byte 15,56,203,202
|
|
|
|
|
|
|
|
movdqa 32-128(%rcx),%xmm0
|
|
|
|
paddd %xmm4,%xmm0
|
|
|
|
.byte 102,15,56,0,239
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
leaq 64(%rsi),%rsi
|
|
|
|
.byte 15,56,204,220
|
|
|
|
.byte 15,56,203,202
|
|
|
|
|
|
|
|
movdqa 64-128(%rcx),%xmm0
|
|
|
|
paddd %xmm5,%xmm0
|
|
|
|
.byte 102,15,56,0,247
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
movdqa %xmm6,%xmm7
|
|
|
|
.byte 102,15,58,15,253,4
|
|
|
|
nop
|
|
|
|
paddd %xmm7,%xmm3
|
|
|
|
.byte 15,56,204,229
|
|
|
|
.byte 15,56,203,202
|
|
|
|
|
|
|
|
movdqa 96-128(%rcx),%xmm0
|
|
|
|
paddd %xmm6,%xmm0
|
|
|
|
.byte 15,56,205,222
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
movdqa %xmm3,%xmm7
|
|
|
|
.byte 102,15,58,15,254,4
|
|
|
|
nop
|
|
|
|
paddd %xmm7,%xmm4
|
|
|
|
.byte 15,56,204,238
|
|
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 128-128(%rcx),%xmm0
|
|
|
|
paddd %xmm3,%xmm0
|
|
|
|
.byte 15,56,205,227
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
movdqa %xmm4,%xmm7
|
|
|
|
.byte 102,15,58,15,251,4
|
|
|
|
nop
|
|
|
|
paddd %xmm7,%xmm5
|
|
|
|
.byte 15,56,204,243
|
|
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 160-128(%rcx),%xmm0
|
|
|
|
paddd %xmm4,%xmm0
|
|
|
|
.byte 15,56,205,236
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
movdqa %xmm5,%xmm7
|
|
|
|
.byte 102,15,58,15,252,4
|
|
|
|
nop
|
|
|
|
paddd %xmm7,%xmm6
|
|
|
|
.byte 15,56,204,220
|
|
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 192-128(%rcx),%xmm0
|
|
|
|
paddd %xmm5,%xmm0
|
|
|
|
.byte 15,56,205,245
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
movdqa %xmm6,%xmm7
|
|
|
|
.byte 102,15,58,15,253,4
|
|
|
|
nop
|
|
|
|
paddd %xmm7,%xmm3
|
|
|
|
.byte 15,56,204,229
|
|
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 224-128(%rcx),%xmm0
|
|
|
|
paddd %xmm6,%xmm0
|
|
|
|
.byte 15,56,205,222
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
movdqa %xmm3,%xmm7
|
|
|
|
.byte 102,15,58,15,254,4
|
|
|
|
nop
|
|
|
|
paddd %xmm7,%xmm4
|
|
|
|
.byte 15,56,204,238
|
|
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 256-128(%rcx),%xmm0
|
|
|
|
paddd %xmm3,%xmm0
|
|
|
|
.byte 15,56,205,227
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
movdqa %xmm4,%xmm7
|
|
|
|
.byte 102,15,58,15,251,4
|
|
|
|
nop
|
|
|
|
paddd %xmm7,%xmm5
|
|
|
|
.byte 15,56,204,243
|
|
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 288-128(%rcx),%xmm0
|
|
|
|
paddd %xmm4,%xmm0
|
|
|
|
.byte 15,56,205,236
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
movdqa %xmm5,%xmm7
|
|
|
|
.byte 102,15,58,15,252,4
|
|
|
|
nop
|
|
|
|
paddd %xmm7,%xmm6
|
|
|
|
.byte 15,56,204,220
|
|
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 320-128(%rcx),%xmm0
|
|
|
|
paddd %xmm5,%xmm0
|
|
|
|
.byte 15,56,205,245
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
movdqa %xmm6,%xmm7
|
|
|
|
.byte 102,15,58,15,253,4
|
|
|
|
nop
|
|
|
|
paddd %xmm7,%xmm3
|
|
|
|
.byte 15,56,204,229
|
|
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 352-128(%rcx),%xmm0
|
|
|
|
paddd %xmm6,%xmm0
|
|
|
|
.byte 15,56,205,222
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
movdqa %xmm3,%xmm7
|
|
|
|
.byte 102,15,58,15,254,4
|
|
|
|
nop
|
|
|
|
paddd %xmm7,%xmm4
|
|
|
|
.byte 15,56,204,238
|
|
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 384-128(%rcx),%xmm0
|
|
|
|
paddd %xmm3,%xmm0
|
|
|
|
.byte 15,56,205,227
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
movdqa %xmm4,%xmm7
|
|
|
|
.byte 102,15,58,15,251,4
|
|
|
|
nop
|
|
|
|
paddd %xmm7,%xmm5
|
|
|
|
.byte 15,56,204,243
|
|
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 416-128(%rcx),%xmm0
|
|
|
|
paddd %xmm4,%xmm0
|
|
|
|
.byte 15,56,205,236
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
movdqa %xmm5,%xmm7
|
|
|
|
.byte 102,15,58,15,252,4
|
|
|
|
.byte 15,56,203,202
|
|
|
|
paddd %xmm7,%xmm6
|
|
|
|
|
|
|
|
movdqa 448-128(%rcx),%xmm0
|
|
|
|
paddd %xmm5,%xmm0
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
.byte 15,56,205,245
|
|
|
|
movdqa %xmm8,%xmm7
|
|
|
|
.byte 15,56,203,202
|
|
|
|
|
|
|
|
movdqa 480-128(%rcx),%xmm0
|
|
|
|
paddd %xmm6,%xmm0
|
|
|
|
nop
|
|
|
|
.byte 15,56,203,209
|
|
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
|
|
decq %rdx
|
|
|
|
nop
|
|
|
|
.byte 15,56,203,202
|
|
|
|
|
|
|
|
paddd %xmm10,%xmm2
|
|
|
|
paddd %xmm9,%xmm1
|
|
|
|
jnz .Loop_shani
|
|
|
|
|
|
|
|
pshufd $0xb1,%xmm2,%xmm2
|
|
|
|
pshufd $0x1b,%xmm1,%xmm7
|
|
|
|
pshufd $0xb1,%xmm1,%xmm1
|
|
|
|
punpckhqdq %xmm2,%xmm1
|
|
|
|
.byte 102,15,58,15,215,8
|
|
|
|
|
|
|
|
movdqu %xmm1,(%rdi)
|
|
|
|
movdqu %xmm2,16(%rdi)
|
|
|
|
RET
|
|
|
|
.cfi_endproc
|
|
|
|
SET_SIZE(zfs_sha256_transform_shani)
|
|
|
|
|
|
|
|
ENTRY_ALIGN(zfs_sha256_transform_ssse3, 64)
|
|
|
|
.cfi_startproc
|
|
|
|
ENDBR
|
|
|
|
movq %rsp,%rax
|
|
|
|
.cfi_def_cfa_register %rax
|
|
|
|
pushq %rbx
|
|
|
|
.cfi_offset %rbx,-16
|
|
|
|
pushq %rbp
|
|
|
|
.cfi_offset %rbp,-24
|
|
|
|
pushq %r12
|
|
|
|
.cfi_offset %r12,-32
|
|
|
|
pushq %r13
|
|
|
|
.cfi_offset %r13,-40
|
|
|
|
pushq %r14
|
|
|
|
.cfi_offset %r14,-48
|
|
|
|
pushq %r15
|
|
|
|
.cfi_offset %r15,-56
|
|
|
|
shlq $4,%rdx
|
|
|
|
subq $96,%rsp
|
|
|
|
leaq (%rsi,%rdx,4),%rdx
|
|
|
|
andq $-64,%rsp
|
|
|
|
movq %rdi,64+0(%rsp)
|
|
|
|
movq %rsi,64+8(%rsp)
|
|
|
|
movq %rdx,64+16(%rsp)
|
|
|
|
movq %rax,88(%rsp)
|
|
|
|
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
|
|
|
|
.Lprologue_ssse3:
|
|
|
|
|
|
|
|
movl 0(%rdi),%eax
|
|
|
|
movl 4(%rdi),%ebx
|
|
|
|
movl 8(%rdi),%ecx
|
|
|
|
movl 12(%rdi),%edx
|
|
|
|
movl 16(%rdi),%r8d
|
|
|
|
movl 20(%rdi),%r9d
|
|
|
|
movl 24(%rdi),%r10d
|
|
|
|
movl 28(%rdi),%r11d
|
|
|
|
|
|
|
|
jmp .Lloop_ssse3
|
2023-03-06 23:24:05 +00:00
|
|
|
.balign 16
|
Add generic implementation handling and SHA2 impl
The skeleton file module/icp/include/generic_impl.c can be used for
iterating over different implementations of algorithms.
It is used by SHA256, SHA512 and BLAKE3 currently.
The Solaris SHA2 implementation got replaced with a version which is
based on public domain code of cppcrypto v0.10.
These assembly files are taken from current openssl master:
- sha256-x86_64.S: x64, SSSE3, AVX, AVX2, SHA-NI (x86_64)
- sha512-x86_64.S: x64, AVX, AVX2 (x86_64)
- sha256-armv7.S: ARMv7, NEON, ARMv8-CE (arm)
- sha512-armv7.S: ARMv7, NEON (arm)
- sha256-armv8.S: ARMv7, NEON, ARMv8-CE (aarch64)
- sha512-armv8.S: ARMv7, ARMv8-CE (aarch64)
- sha256-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha512-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha256-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
- sha512-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741
2023-03-01 08:40:28 +00:00
|
|
|
.Lloop_ssse3:
|
|
|
|
movdqa K256+512(%rip),%xmm7
|
|
|
|
movdqu 0(%rsi),%xmm0
|
|
|
|
movdqu 16(%rsi),%xmm1
|
|
|
|
movdqu 32(%rsi),%xmm2
|
|
|
|
.byte 102,15,56,0,199
|
|
|
|
movdqu 48(%rsi),%xmm3
|
|
|
|
leaq K256(%rip),%rbp
|
|
|
|
.byte 102,15,56,0,207
|
|
|
|
movdqa 0(%rbp),%xmm4
|
|
|
|
movdqa 32(%rbp),%xmm5
|
|
|
|
.byte 102,15,56,0,215
|
|
|
|
paddd %xmm0,%xmm4
|
|
|
|
movdqa 64(%rbp),%xmm6
|
|
|
|
.byte 102,15,56,0,223
|
|
|
|
movdqa 96(%rbp),%xmm7
|
|
|
|
paddd %xmm1,%xmm5
|
|
|
|
paddd %xmm2,%xmm6
|
|
|
|
paddd %xmm3,%xmm7
|
|
|
|
movdqa %xmm4,0(%rsp)
|
|
|
|
movl %eax,%r14d
|
|
|
|
movdqa %xmm5,16(%rsp)
|
|
|
|
movl %ebx,%edi
|
|
|
|
movdqa %xmm6,32(%rsp)
|
|
|
|
xorl %ecx,%edi
|
|
|
|
movdqa %xmm7,48(%rsp)
|
|
|
|
movl %r8d,%r13d
|
|
|
|
jmp .Lssse3_00_47
|
|
|
|
|
2023-03-06 23:24:05 +00:00
|
|
|
.balign 16
|
Add generic implementation handling and SHA2 impl
The skeleton file module/icp/include/generic_impl.c can be used for
iterating over different implementations of algorithms.
It is used by SHA256, SHA512 and BLAKE3 currently.
The Solaris SHA2 implementation got replaced with a version which is
based on public domain code of cppcrypto v0.10.
These assembly files are taken from current openssl master:
- sha256-x86_64.S: x64, SSSE3, AVX, AVX2, SHA-NI (x86_64)
- sha512-x86_64.S: x64, AVX, AVX2 (x86_64)
- sha256-armv7.S: ARMv7, NEON, ARMv8-CE (arm)
- sha512-armv7.S: ARMv7, NEON (arm)
- sha256-armv8.S: ARMv7, NEON, ARMv8-CE (aarch64)
- sha512-armv8.S: ARMv7, ARMv8-CE (aarch64)
- sha256-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha512-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha256-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
- sha512-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741
2023-03-01 08:40:28 +00:00
|
|
|
.Lssse3_00_47:
|
|
|
|
subq $-128,%rbp
|
|
|
|
rorl $14,%r13d
|
|
|
|
movdqa %xmm1,%xmm4
|
|
|
|
movl %r14d,%eax
|
|
|
|
movl %r9d,%r12d
|
|
|
|
movdqa %xmm3,%xmm7
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %eax,%r14d
|
|
|
|
.byte 102,15,58,15,224,4
|
|
|
|
andl %r8d,%r12d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
.byte 102,15,58,15,250,4
|
|
|
|
addl 0(%rsp),%r11d
|
|
|
|
movl %eax,%r15d
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
movdqa %xmm4,%xmm5
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
addl %r12d,%r11d
|
|
|
|
movdqa %xmm4,%xmm6
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
psrld $3,%xmm4
|
|
|
|
xorl %eax,%r14d
|
|
|
|
addl %r13d,%r11d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
paddd %xmm7,%xmm0
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r11d,%edx
|
|
|
|
psrld $7,%xmm6
|
|
|
|
addl %edi,%r11d
|
|
|
|
movl %edx,%r13d
|
|
|
|
pshufd $250,%xmm3,%xmm7
|
|
|
|
addl %r11d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
pslld $14,%xmm5
|
|
|
|
movl %r14d,%r11d
|
|
|
|
movl %r8d,%r12d
|
|
|
|
pxor %xmm6,%xmm4
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
psrld $11,%xmm6
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
pxor %xmm5,%xmm4
|
|
|
|
andl %edx,%r12d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
pslld $11,%xmm5
|
|
|
|
addl 4(%rsp),%r10d
|
|
|
|
movl %r11d,%edi
|
|
|
|
pxor %xmm6,%xmm4
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
movdqa %xmm7,%xmm6
|
|
|
|
xorl %eax,%edi
|
|
|
|
addl %r12d,%r10d
|
|
|
|
pxor %xmm5,%xmm4
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
psrld $10,%xmm7
|
|
|
|
addl %r13d,%r10d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
paddd %xmm4,%xmm0
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r10d,%ecx
|
|
|
|
psrlq $17,%xmm6
|
|
|
|
addl %r15d,%r10d
|
|
|
|
movl %ecx,%r13d
|
|
|
|
addl %r10d,%r14d
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%r10d
|
|
|
|
movl %edx,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
psrlq $2,%xmm6
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
andl %ecx,%r12d
|
|
|
|
pshufd $128,%xmm7,%xmm7
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
addl 8(%rsp),%r9d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
psrldq $8,%xmm7
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
addl %r12d,%r9d
|
|
|
|
rorl $6,%r13d
|
|
|
|
paddd %xmm7,%xmm0
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
addl %r13d,%r9d
|
|
|
|
pshufd $80,%xmm0,%xmm7
|
|
|
|
xorl %r11d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r9d,%ebx
|
|
|
|
movdqa %xmm7,%xmm6
|
|
|
|
addl %edi,%r9d
|
|
|
|
movl %ebx,%r13d
|
|
|
|
psrld $10,%xmm7
|
|
|
|
addl %r9d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
psrlq $17,%xmm6
|
|
|
|
movl %r14d,%r9d
|
|
|
|
movl %ecx,%r12d
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
xorl %edx,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
psrlq $2,%xmm6
|
|
|
|
andl %ebx,%r12d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
addl 12(%rsp),%r8d
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
movl %r9d,%edi
|
|
|
|
xorl %edx,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
pshufd $8,%xmm7,%xmm7
|
|
|
|
xorl %r10d,%edi
|
|
|
|
addl %r12d,%r8d
|
|
|
|
movdqa 0(%rbp),%xmm6
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
pslldq $8,%xmm7
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
addl %r13d,%r8d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
paddd %xmm7,%xmm0
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r8d,%eax
|
|
|
|
addl %r15d,%r8d
|
|
|
|
paddd %xmm0,%xmm6
|
|
|
|
movl %eax,%r13d
|
|
|
|
addl %r8d,%r14d
|
|
|
|
movdqa %xmm6,0(%rsp)
|
|
|
|
rorl $14,%r13d
|
|
|
|
movdqa %xmm2,%xmm4
|
|
|
|
movl %r14d,%r8d
|
|
|
|
movl %ebx,%r12d
|
|
|
|
movdqa %xmm0,%xmm7
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
.byte 102,15,58,15,225,4
|
|
|
|
andl %eax,%r12d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
.byte 102,15,58,15,251,4
|
|
|
|
addl 16(%rsp),%edx
|
|
|
|
movl %r8d,%r15d
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
movdqa %xmm4,%xmm5
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
addl %r12d,%edx
|
|
|
|
movdqa %xmm4,%xmm6
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
psrld $3,%xmm4
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
addl %r13d,%edx
|
|
|
|
xorl %r9d,%edi
|
|
|
|
paddd %xmm7,%xmm1
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %edx,%r11d
|
|
|
|
psrld $7,%xmm6
|
|
|
|
addl %edi,%edx
|
|
|
|
movl %r11d,%r13d
|
|
|
|
pshufd $250,%xmm0,%xmm7
|
|
|
|
addl %edx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
pslld $14,%xmm5
|
|
|
|
movl %r14d,%edx
|
|
|
|
movl %eax,%r12d
|
|
|
|
pxor %xmm6,%xmm4
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
psrld $11,%xmm6
|
|
|
|
xorl %edx,%r14d
|
|
|
|
pxor %xmm5,%xmm4
|
|
|
|
andl %r11d,%r12d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
pslld $11,%xmm5
|
|
|
|
addl 20(%rsp),%ecx
|
|
|
|
movl %edx,%edi
|
|
|
|
pxor %xmm6,%xmm4
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
movdqa %xmm7,%xmm6
|
|
|
|
xorl %r8d,%edi
|
|
|
|
addl %r12d,%ecx
|
|
|
|
pxor %xmm5,%xmm4
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
psrld $10,%xmm7
|
|
|
|
addl %r13d,%ecx
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
paddd %xmm4,%xmm1
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %ecx,%r10d
|
|
|
|
psrlq $17,%xmm6
|
|
|
|
addl %r15d,%ecx
|
|
|
|
movl %r10d,%r13d
|
|
|
|
addl %ecx,%r14d
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%ecx
|
|
|
|
movl %r11d,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
psrlq $2,%xmm6
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
xorl %eax,%r12d
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
andl %r10d,%r12d
|
|
|
|
pshufd $128,%xmm7,%xmm7
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
addl 24(%rsp),%ebx
|
|
|
|
movl %ecx,%r15d
|
|
|
|
psrldq $8,%xmm7
|
|
|
|
xorl %eax,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %edx,%r15d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
rorl $6,%r13d
|
|
|
|
paddd %xmm7,%xmm1
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
addl %r13d,%ebx
|
|
|
|
pshufd $80,%xmm1,%xmm7
|
|
|
|
xorl %edx,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %ebx,%r9d
|
|
|
|
movdqa %xmm7,%xmm6
|
|
|
|
addl %edi,%ebx
|
|
|
|
movl %r9d,%r13d
|
|
|
|
psrld $10,%xmm7
|
|
|
|
addl %ebx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
psrlq $17,%xmm6
|
|
|
|
movl %r14d,%ebx
|
|
|
|
movl %r10d,%r12d
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
psrlq $2,%xmm6
|
|
|
|
andl %r9d,%r12d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
addl 28(%rsp),%eax
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
movl %ebx,%edi
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
pshufd $8,%xmm7,%xmm7
|
|
|
|
xorl %ecx,%edi
|
|
|
|
addl %r12d,%eax
|
|
|
|
movdqa 32(%rbp),%xmm6
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
pslldq $8,%xmm7
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
addl %r13d,%eax
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
paddd %xmm7,%xmm1
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %eax,%r8d
|
|
|
|
addl %r15d,%eax
|
|
|
|
paddd %xmm1,%xmm6
|
|
|
|
movl %r8d,%r13d
|
|
|
|
addl %eax,%r14d
|
|
|
|
movdqa %xmm6,16(%rsp)
|
|
|
|
rorl $14,%r13d
|
|
|
|
movdqa %xmm3,%xmm4
|
|
|
|
movl %r14d,%eax
|
|
|
|
movl %r9d,%r12d
|
|
|
|
movdqa %xmm1,%xmm7
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %eax,%r14d
|
|
|
|
.byte 102,15,58,15,226,4
|
|
|
|
andl %r8d,%r12d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
.byte 102,15,58,15,248,4
|
|
|
|
addl 32(%rsp),%r11d
|
|
|
|
movl %eax,%r15d
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
movdqa %xmm4,%xmm5
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
addl %r12d,%r11d
|
|
|
|
movdqa %xmm4,%xmm6
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
psrld $3,%xmm4
|
|
|
|
xorl %eax,%r14d
|
|
|
|
addl %r13d,%r11d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
paddd %xmm7,%xmm2
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r11d,%edx
|
|
|
|
psrld $7,%xmm6
|
|
|
|
addl %edi,%r11d
|
|
|
|
movl %edx,%r13d
|
|
|
|
pshufd $250,%xmm1,%xmm7
|
|
|
|
addl %r11d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
pslld $14,%xmm5
|
|
|
|
movl %r14d,%r11d
|
|
|
|
movl %r8d,%r12d
|
|
|
|
pxor %xmm6,%xmm4
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
psrld $11,%xmm6
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
pxor %xmm5,%xmm4
|
|
|
|
andl %edx,%r12d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
pslld $11,%xmm5
|
|
|
|
addl 36(%rsp),%r10d
|
|
|
|
movl %r11d,%edi
|
|
|
|
pxor %xmm6,%xmm4
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
movdqa %xmm7,%xmm6
|
|
|
|
xorl %eax,%edi
|
|
|
|
addl %r12d,%r10d
|
|
|
|
pxor %xmm5,%xmm4
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
psrld $10,%xmm7
|
|
|
|
addl %r13d,%r10d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
paddd %xmm4,%xmm2
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r10d,%ecx
|
|
|
|
psrlq $17,%xmm6
|
|
|
|
addl %r15d,%r10d
|
|
|
|
movl %ecx,%r13d
|
|
|
|
addl %r10d,%r14d
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%r10d
|
|
|
|
movl %edx,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
psrlq $2,%xmm6
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
andl %ecx,%r12d
|
|
|
|
pshufd $128,%xmm7,%xmm7
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
addl 40(%rsp),%r9d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
psrldq $8,%xmm7
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
addl %r12d,%r9d
|
|
|
|
rorl $6,%r13d
|
|
|
|
paddd %xmm7,%xmm2
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
addl %r13d,%r9d
|
|
|
|
pshufd $80,%xmm2,%xmm7
|
|
|
|
xorl %r11d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r9d,%ebx
|
|
|
|
movdqa %xmm7,%xmm6
|
|
|
|
addl %edi,%r9d
|
|
|
|
movl %ebx,%r13d
|
|
|
|
psrld $10,%xmm7
|
|
|
|
addl %r9d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
psrlq $17,%xmm6
|
|
|
|
movl %r14d,%r9d
|
|
|
|
movl %ecx,%r12d
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
xorl %edx,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
psrlq $2,%xmm6
|
|
|
|
andl %ebx,%r12d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
addl 44(%rsp),%r8d
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
movl %r9d,%edi
|
|
|
|
xorl %edx,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
pshufd $8,%xmm7,%xmm7
|
|
|
|
xorl %r10d,%edi
|
|
|
|
addl %r12d,%r8d
|
|
|
|
movdqa 64(%rbp),%xmm6
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
pslldq $8,%xmm7
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
addl %r13d,%r8d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
paddd %xmm7,%xmm2
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r8d,%eax
|
|
|
|
addl %r15d,%r8d
|
|
|
|
paddd %xmm2,%xmm6
|
|
|
|
movl %eax,%r13d
|
|
|
|
addl %r8d,%r14d
|
|
|
|
movdqa %xmm6,32(%rsp)
|
|
|
|
rorl $14,%r13d
|
|
|
|
movdqa %xmm0,%xmm4
|
|
|
|
movl %r14d,%r8d
|
|
|
|
movl %ebx,%r12d
|
|
|
|
movdqa %xmm2,%xmm7
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
.byte 102,15,58,15,227,4
|
|
|
|
andl %eax,%r12d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
.byte 102,15,58,15,249,4
|
|
|
|
addl 48(%rsp),%edx
|
|
|
|
movl %r8d,%r15d
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
movdqa %xmm4,%xmm5
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
addl %r12d,%edx
|
|
|
|
movdqa %xmm4,%xmm6
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
psrld $3,%xmm4
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
addl %r13d,%edx
|
|
|
|
xorl %r9d,%edi
|
|
|
|
paddd %xmm7,%xmm3
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %edx,%r11d
|
|
|
|
psrld $7,%xmm6
|
|
|
|
addl %edi,%edx
|
|
|
|
movl %r11d,%r13d
|
|
|
|
pshufd $250,%xmm2,%xmm7
|
|
|
|
addl %edx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
pslld $14,%xmm5
|
|
|
|
movl %r14d,%edx
|
|
|
|
movl %eax,%r12d
|
|
|
|
pxor %xmm6,%xmm4
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
psrld $11,%xmm6
|
|
|
|
xorl %edx,%r14d
|
|
|
|
pxor %xmm5,%xmm4
|
|
|
|
andl %r11d,%r12d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
pslld $11,%xmm5
|
|
|
|
addl 52(%rsp),%ecx
|
|
|
|
movl %edx,%edi
|
|
|
|
pxor %xmm6,%xmm4
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
movdqa %xmm7,%xmm6
|
|
|
|
xorl %r8d,%edi
|
|
|
|
addl %r12d,%ecx
|
|
|
|
pxor %xmm5,%xmm4
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
psrld $10,%xmm7
|
|
|
|
addl %r13d,%ecx
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
paddd %xmm4,%xmm3
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %ecx,%r10d
|
|
|
|
psrlq $17,%xmm6
|
|
|
|
addl %r15d,%ecx
|
|
|
|
movl %r10d,%r13d
|
|
|
|
addl %ecx,%r14d
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%ecx
|
|
|
|
movl %r11d,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
psrlq $2,%xmm6
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
xorl %eax,%r12d
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
andl %r10d,%r12d
|
|
|
|
pshufd $128,%xmm7,%xmm7
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
addl 56(%rsp),%ebx
|
|
|
|
movl %ecx,%r15d
|
|
|
|
psrldq $8,%xmm7
|
|
|
|
xorl %eax,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %edx,%r15d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
rorl $6,%r13d
|
|
|
|
paddd %xmm7,%xmm3
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
addl %r13d,%ebx
|
|
|
|
pshufd $80,%xmm3,%xmm7
|
|
|
|
xorl %edx,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %ebx,%r9d
|
|
|
|
movdqa %xmm7,%xmm6
|
|
|
|
addl %edi,%ebx
|
|
|
|
movl %r9d,%r13d
|
|
|
|
psrld $10,%xmm7
|
|
|
|
addl %ebx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
psrlq $17,%xmm6
|
|
|
|
movl %r14d,%ebx
|
|
|
|
movl %r10d,%r12d
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
psrlq $2,%xmm6
|
|
|
|
andl %r9d,%r12d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
addl 60(%rsp),%eax
|
|
|
|
pxor %xmm6,%xmm7
|
|
|
|
movl %ebx,%edi
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
pshufd $8,%xmm7,%xmm7
|
|
|
|
xorl %ecx,%edi
|
|
|
|
addl %r12d,%eax
|
|
|
|
movdqa 96(%rbp),%xmm6
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
pslldq $8,%xmm7
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
addl %r13d,%eax
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
paddd %xmm7,%xmm3
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %eax,%r8d
|
|
|
|
addl %r15d,%eax
|
|
|
|
paddd %xmm3,%xmm6
|
|
|
|
movl %r8d,%r13d
|
|
|
|
addl %eax,%r14d
|
|
|
|
movdqa %xmm6,48(%rsp)
|
|
|
|
cmpb $0,131(%rbp)
|
|
|
|
jne .Lssse3_00_47
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%eax
|
|
|
|
movl %r9d,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %eax,%r14d
|
|
|
|
andl %r8d,%r12d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
addl 0(%rsp),%r11d
|
|
|
|
movl %eax,%r15d
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
addl %r12d,%r11d
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %eax,%r14d
|
|
|
|
addl %r13d,%r11d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r11d,%edx
|
|
|
|
addl %edi,%r11d
|
|
|
|
movl %edx,%r13d
|
|
|
|
addl %r11d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%r11d
|
|
|
|
movl %r8d,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
andl %edx,%r12d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
addl 4(%rsp),%r10d
|
|
|
|
movl %r11d,%edi
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %eax,%edi
|
|
|
|
addl %r12d,%r10d
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
addl %r13d,%r10d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r10d,%ecx
|
|
|
|
addl %r15d,%r10d
|
|
|
|
movl %ecx,%r13d
|
|
|
|
addl %r10d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%r10d
|
|
|
|
movl %edx,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
andl %ecx,%r12d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
addl 8(%rsp),%r9d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
addl %r12d,%r9d
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
addl %r13d,%r9d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r9d,%ebx
|
|
|
|
addl %edi,%r9d
|
|
|
|
movl %ebx,%r13d
|
|
|
|
addl %r9d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%r9d
|
|
|
|
movl %ecx,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
xorl %edx,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
andl %ebx,%r12d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
addl 12(%rsp),%r8d
|
|
|
|
movl %r9d,%edi
|
|
|
|
xorl %edx,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
addl %r12d,%r8d
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
addl %r13d,%r8d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r8d,%eax
|
|
|
|
addl %r15d,%r8d
|
|
|
|
movl %eax,%r13d
|
|
|
|
addl %r8d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%r8d
|
|
|
|
movl %ebx,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
andl %eax,%r12d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
addl 16(%rsp),%edx
|
|
|
|
movl %r8d,%r15d
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
addl %r12d,%edx
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
addl %r13d,%edx
|
|
|
|
xorl %r9d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %edx,%r11d
|
|
|
|
addl %edi,%edx
|
|
|
|
movl %r11d,%r13d
|
|
|
|
addl %edx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%edx
|
|
|
|
movl %eax,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
andl %r11d,%r12d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
addl 20(%rsp),%ecx
|
|
|
|
movl %edx,%edi
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r8d,%edi
|
|
|
|
addl %r12d,%ecx
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
addl %r13d,%ecx
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %ecx,%r10d
|
|
|
|
addl %r15d,%ecx
|
|
|
|
movl %r10d,%r13d
|
|
|
|
addl %ecx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%ecx
|
|
|
|
movl %r11d,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
xorl %eax,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
andl %r10d,%r12d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
addl 24(%rsp),%ebx
|
|
|
|
movl %ecx,%r15d
|
|
|
|
xorl %eax,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %edx,%r15d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
addl %r13d,%ebx
|
|
|
|
xorl %edx,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %ebx,%r9d
|
|
|
|
addl %edi,%ebx
|
|
|
|
movl %r9d,%r13d
|
|
|
|
addl %ebx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%ebx
|
|
|
|
movl %r10d,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
andl %r9d,%r12d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
addl 28(%rsp),%eax
|
|
|
|
movl %ebx,%edi
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %ecx,%edi
|
|
|
|
addl %r12d,%eax
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
addl %r13d,%eax
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %eax,%r8d
|
|
|
|
addl %r15d,%eax
|
|
|
|
movl %r8d,%r13d
|
|
|
|
addl %eax,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%eax
|
|
|
|
movl %r9d,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %eax,%r14d
|
|
|
|
andl %r8d,%r12d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
addl 32(%rsp),%r11d
|
|
|
|
movl %eax,%r15d
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
addl %r12d,%r11d
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %eax,%r14d
|
|
|
|
addl %r13d,%r11d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r11d,%edx
|
|
|
|
addl %edi,%r11d
|
|
|
|
movl %edx,%r13d
|
|
|
|
addl %r11d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%r11d
|
|
|
|
movl %r8d,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
andl %edx,%r12d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
addl 36(%rsp),%r10d
|
|
|
|
movl %r11d,%edi
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %eax,%edi
|
|
|
|
addl %r12d,%r10d
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
addl %r13d,%r10d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r10d,%ecx
|
|
|
|
addl %r15d,%r10d
|
|
|
|
movl %ecx,%r13d
|
|
|
|
addl %r10d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%r10d
|
|
|
|
movl %edx,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
andl %ecx,%r12d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
addl 40(%rsp),%r9d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
addl %r12d,%r9d
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
addl %r13d,%r9d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r9d,%ebx
|
|
|
|
addl %edi,%r9d
|
|
|
|
movl %ebx,%r13d
|
|
|
|
addl %r9d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%r9d
|
|
|
|
movl %ecx,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
xorl %edx,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
andl %ebx,%r12d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
addl 44(%rsp),%r8d
|
|
|
|
movl %r9d,%edi
|
|
|
|
xorl %edx,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
addl %r12d,%r8d
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
addl %r13d,%r8d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %r8d,%eax
|
|
|
|
addl %r15d,%r8d
|
|
|
|
movl %eax,%r13d
|
|
|
|
addl %r8d,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%r8d
|
|
|
|
movl %ebx,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
andl %eax,%r12d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
addl 48(%rsp),%edx
|
|
|
|
movl %r8d,%r15d
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
addl %r12d,%edx
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
addl %r13d,%edx
|
|
|
|
xorl %r9d,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %edx,%r11d
|
|
|
|
addl %edi,%edx
|
|
|
|
movl %r11d,%r13d
|
|
|
|
addl %edx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%edx
|
|
|
|
movl %eax,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
andl %r11d,%r12d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
addl 52(%rsp),%ecx
|
|
|
|
movl %edx,%edi
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %r8d,%edi
|
|
|
|
addl %r12d,%ecx
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
addl %r13d,%ecx
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %ecx,%r10d
|
|
|
|
addl %r15d,%ecx
|
|
|
|
movl %r10d,%r13d
|
|
|
|
addl %ecx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%ecx
|
|
|
|
movl %r11d,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
xorl %eax,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
andl %r10d,%r12d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
addl 56(%rsp),%ebx
|
|
|
|
movl %ecx,%r15d
|
|
|
|
xorl %eax,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %edx,%r15d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
addl %r13d,%ebx
|
|
|
|
xorl %edx,%edi
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %ebx,%r9d
|
|
|
|
addl %edi,%ebx
|
|
|
|
movl %r9d,%r13d
|
|
|
|
addl %ebx,%r14d
|
|
|
|
rorl $14,%r13d
|
|
|
|
movl %r14d,%ebx
|
|
|
|
movl %r10d,%r12d
|
|
|
|
rorl $9,%r14d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
rorl $5,%r13d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
andl %r9d,%r12d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
addl 60(%rsp),%eax
|
|
|
|
movl %ebx,%edi
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
rorl $11,%r14d
|
|
|
|
xorl %ecx,%edi
|
|
|
|
addl %r12d,%eax
|
|
|
|
rorl $6,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
addl %r13d,%eax
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
rorl $2,%r14d
|
|
|
|
addl %eax,%r8d
|
|
|
|
addl %r15d,%eax
|
|
|
|
movl %r8d,%r13d
|
|
|
|
addl %eax,%r14d
|
|
|
|
movq 64+0(%rsp),%rdi
|
|
|
|
movl %r14d,%eax
|
|
|
|
|
|
|
|
addl 0(%rdi),%eax
|
|
|
|
leaq 64(%rsi),%rsi
|
|
|
|
addl 4(%rdi),%ebx
|
|
|
|
addl 8(%rdi),%ecx
|
|
|
|
addl 12(%rdi),%edx
|
|
|
|
addl 16(%rdi),%r8d
|
|
|
|
addl 20(%rdi),%r9d
|
|
|
|
addl 24(%rdi),%r10d
|
|
|
|
addl 28(%rdi),%r11d
|
|
|
|
|
|
|
|
cmpq 64+16(%rsp),%rsi
|
|
|
|
|
|
|
|
movl %eax,0(%rdi)
|
|
|
|
movl %ebx,4(%rdi)
|
|
|
|
movl %ecx,8(%rdi)
|
|
|
|
movl %edx,12(%rdi)
|
|
|
|
movl %r8d,16(%rdi)
|
|
|
|
movl %r9d,20(%rdi)
|
|
|
|
movl %r10d,24(%rdi)
|
|
|
|
movl %r11d,28(%rdi)
|
|
|
|
jb .Lloop_ssse3
|
|
|
|
|
|
|
|
movq 88(%rsp),%rsi
|
|
|
|
.cfi_def_cfa %rsi,8
|
|
|
|
movq -48(%rsi),%r15
|
|
|
|
.cfi_restore %r15
|
|
|
|
movq -40(%rsi),%r14
|
|
|
|
.cfi_restore %r14
|
|
|
|
movq -32(%rsi),%r13
|
|
|
|
.cfi_restore %r13
|
|
|
|
movq -24(%rsi),%r12
|
|
|
|
.cfi_restore %r12
|
|
|
|
movq -16(%rsi),%rbp
|
|
|
|
.cfi_restore %rbp
|
|
|
|
movq -8(%rsi),%rbx
|
|
|
|
.cfi_restore %rbx
|
|
|
|
leaq (%rsi),%rsp
|
|
|
|
.cfi_def_cfa_register %rsp
|
|
|
|
.Lepilogue_ssse3:
|
|
|
|
RET
|
|
|
|
.cfi_endproc
|
|
|
|
SET_SIZE(zfs_sha256_transform_ssse3)
|
|
|
|
|
|
|
|
ENTRY_ALIGN(zfs_sha256_transform_avx, 64)
|
|
|
|
.cfi_startproc
|
|
|
|
ENDBR
|
|
|
|
movq %rsp,%rax
|
|
|
|
.cfi_def_cfa_register %rax
|
|
|
|
pushq %rbx
|
|
|
|
.cfi_offset %rbx,-16
|
|
|
|
pushq %rbp
|
|
|
|
.cfi_offset %rbp,-24
|
|
|
|
pushq %r12
|
|
|
|
.cfi_offset %r12,-32
|
|
|
|
pushq %r13
|
|
|
|
.cfi_offset %r13,-40
|
|
|
|
pushq %r14
|
|
|
|
.cfi_offset %r14,-48
|
|
|
|
pushq %r15
|
|
|
|
.cfi_offset %r15,-56
|
|
|
|
shlq $4,%rdx
|
|
|
|
subq $96,%rsp
|
|
|
|
leaq (%rsi,%rdx,4),%rdx
|
|
|
|
andq $-64,%rsp
|
|
|
|
movq %rdi,64+0(%rsp)
|
|
|
|
movq %rsi,64+8(%rsp)
|
|
|
|
movq %rdx,64+16(%rsp)
|
|
|
|
movq %rax,88(%rsp)
|
|
|
|
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
|
|
|
|
.Lprologue_avx:
|
|
|
|
|
|
|
|
vzeroupper
|
|
|
|
movl 0(%rdi),%eax
|
|
|
|
movl 4(%rdi),%ebx
|
|
|
|
movl 8(%rdi),%ecx
|
|
|
|
movl 12(%rdi),%edx
|
|
|
|
movl 16(%rdi),%r8d
|
|
|
|
movl 20(%rdi),%r9d
|
|
|
|
movl 24(%rdi),%r10d
|
|
|
|
movl 28(%rdi),%r11d
|
|
|
|
vmovdqa K256+512+32(%rip),%xmm8
|
|
|
|
vmovdqa K256+512+64(%rip),%xmm9
|
|
|
|
jmp .Lloop_avx
|
2023-03-06 23:24:05 +00:00
|
|
|
.balign 16
|
Add generic implementation handling and SHA2 impl
The skeleton file module/icp/include/generic_impl.c can be used for
iterating over different implementations of algorithms.
It is used by SHA256, SHA512 and BLAKE3 currently.
The Solaris SHA2 implementation got replaced with a version which is
based on public domain code of cppcrypto v0.10.
These assembly files are taken from current openssl master:
- sha256-x86_64.S: x64, SSSE3, AVX, AVX2, SHA-NI (x86_64)
- sha512-x86_64.S: x64, AVX, AVX2 (x86_64)
- sha256-armv7.S: ARMv7, NEON, ARMv8-CE (arm)
- sha512-armv7.S: ARMv7, NEON (arm)
- sha256-armv8.S: ARMv7, NEON, ARMv8-CE (aarch64)
- sha512-armv8.S: ARMv7, ARMv8-CE (aarch64)
- sha256-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha512-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha256-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
- sha512-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741
2023-03-01 08:40:28 +00:00
|
|
|
.Lloop_avx:
|
|
|
|
vmovdqa K256+512(%rip),%xmm7
|
|
|
|
vmovdqu 0(%rsi),%xmm0
|
|
|
|
vmovdqu 16(%rsi),%xmm1
|
|
|
|
vmovdqu 32(%rsi),%xmm2
|
|
|
|
vmovdqu 48(%rsi),%xmm3
|
|
|
|
vpshufb %xmm7,%xmm0,%xmm0
|
|
|
|
leaq K256(%rip),%rbp
|
|
|
|
vpshufb %xmm7,%xmm1,%xmm1
|
|
|
|
vpshufb %xmm7,%xmm2,%xmm2
|
|
|
|
vpaddd 0(%rbp),%xmm0,%xmm4
|
|
|
|
vpshufb %xmm7,%xmm3,%xmm3
|
|
|
|
vpaddd 32(%rbp),%xmm1,%xmm5
|
|
|
|
vpaddd 64(%rbp),%xmm2,%xmm6
|
|
|
|
vpaddd 96(%rbp),%xmm3,%xmm7
|
|
|
|
vmovdqa %xmm4,0(%rsp)
|
|
|
|
movl %eax,%r14d
|
|
|
|
vmovdqa %xmm5,16(%rsp)
|
|
|
|
movl %ebx,%edi
|
|
|
|
vmovdqa %xmm6,32(%rsp)
|
|
|
|
xorl %ecx,%edi
|
|
|
|
vmovdqa %xmm7,48(%rsp)
|
|
|
|
movl %r8d,%r13d
|
|
|
|
jmp .Lavx_00_47
|
|
|
|
|
2023-03-06 23:24:05 +00:00
|
|
|
.balign 16
|
Add generic implementation handling and SHA2 impl
The skeleton file module/icp/include/generic_impl.c can be used for
iterating over different implementations of algorithms.
It is used by SHA256, SHA512 and BLAKE3 currently.
The Solaris SHA2 implementation got replaced with a version which is
based on public domain code of cppcrypto v0.10.
These assembly files are taken from current openssl master:
- sha256-x86_64.S: x64, SSSE3, AVX, AVX2, SHA-NI (x86_64)
- sha512-x86_64.S: x64, AVX, AVX2 (x86_64)
- sha256-armv7.S: ARMv7, NEON, ARMv8-CE (arm)
- sha512-armv7.S: ARMv7, NEON (arm)
- sha256-armv8.S: ARMv7, NEON, ARMv8-CE (aarch64)
- sha512-armv8.S: ARMv7, ARMv8-CE (aarch64)
- sha256-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha512-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha256-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
- sha512-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741
2023-03-01 08:40:28 +00:00
|
|
|
.Lavx_00_47:
|
|
|
|
subq $-128,%rbp
|
|
|
|
vpalignr $4,%xmm0,%xmm1,%xmm4
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%eax
|
|
|
|
movl %r9d,%r12d
|
|
|
|
vpalignr $4,%xmm2,%xmm3,%xmm7
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
vpsrld $7,%xmm4,%xmm6
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %eax,%r14d
|
|
|
|
andl %r8d,%r12d
|
|
|
|
vpaddd %xmm7,%xmm0,%xmm0
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
addl 0(%rsp),%r11d
|
|
|
|
movl %eax,%r15d
|
|
|
|
vpsrld $3,%xmm4,%xmm7
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
vpslld $14,%xmm4,%xmm5
|
|
|
|
addl %r12d,%r11d
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
vpxor %xmm6,%xmm7,%xmm4
|
|
|
|
xorl %eax,%r14d
|
|
|
|
addl %r13d,%r11d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
vpshufd $250,%xmm3,%xmm7
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %r11d,%edx
|
|
|
|
addl %edi,%r11d
|
|
|
|
vpsrld $11,%xmm6,%xmm6
|
|
|
|
movl %edx,%r13d
|
|
|
|
addl %r11d,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
|
|
movl %r14d,%r11d
|
|
|
|
movl %r8d,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
vpslld $11,%xmm5,%xmm5
|
|
|
|
xorl %edx,%r13d
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
vpxor %xmm6,%xmm4,%xmm4
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
andl %edx,%r12d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
vpsrld $10,%xmm7,%xmm6
|
|
|
|
addl 4(%rsp),%r10d
|
|
|
|
movl %r11d,%edi
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %eax,%edi
|
|
|
|
addl %r12d,%r10d
|
|
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
vpaddd %xmm4,%xmm0,%xmm0
|
|
|
|
addl %r13d,%r10d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
addl %r10d,%ecx
|
|
|
|
addl %r15d,%r10d
|
|
|
|
movl %ecx,%r13d
|
|
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
|
|
addl %r10d,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%r10d
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
movl %edx,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
vpshufb %xmm8,%xmm6,%xmm6
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
vpaddd %xmm6,%xmm0,%xmm0
|
|
|
|
andl %ecx,%r12d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
addl 8(%rsp),%r9d
|
|
|
|
vpshufd $80,%xmm0,%xmm7
|
|
|
|
movl %r10d,%r15d
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
vpsrld $10,%xmm7,%xmm6
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
addl %r12d,%r9d
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
addl %r13d,%r9d
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
xorl %r11d,%edi
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %r9d,%ebx
|
|
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
|
|
addl %edi,%r9d
|
|
|
|
movl %ebx,%r13d
|
|
|
|
addl %r9d,%r14d
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%r9d
|
|
|
|
movl %ecx,%r12d
|
|
|
|
vpshufb %xmm9,%xmm6,%xmm6
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
xorl %edx,%r12d
|
|
|
|
vpaddd %xmm6,%xmm0,%xmm0
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
andl %ebx,%r12d
|
|
|
|
vpaddd 0(%rbp),%xmm0,%xmm6
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
addl 12(%rsp),%r8d
|
|
|
|
movl %r9d,%edi
|
|
|
|
xorl %edx,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
addl %r12d,%r8d
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
addl %r13d,%r8d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %r8d,%eax
|
|
|
|
addl %r15d,%r8d
|
|
|
|
movl %eax,%r13d
|
|
|
|
addl %r8d,%r14d
|
|
|
|
vmovdqa %xmm6,0(%rsp)
|
|
|
|
vpalignr $4,%xmm1,%xmm2,%xmm4
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%r8d
|
|
|
|
movl %ebx,%r12d
|
|
|
|
vpalignr $4,%xmm3,%xmm0,%xmm7
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
vpsrld $7,%xmm4,%xmm6
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
andl %eax,%r12d
|
|
|
|
vpaddd %xmm7,%xmm1,%xmm1
|
|
|
|
xorl %eax,%r13d
|
|
|
|
addl 16(%rsp),%edx
|
|
|
|
movl %r8d,%r15d
|
|
|
|
vpsrld $3,%xmm4,%xmm7
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
vpslld $14,%xmm4,%xmm5
|
|
|
|
addl %r12d,%edx
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
vpxor %xmm6,%xmm7,%xmm4
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
addl %r13d,%edx
|
|
|
|
xorl %r9d,%edi
|
|
|
|
vpshufd $250,%xmm0,%xmm7
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %edx,%r11d
|
|
|
|
addl %edi,%edx
|
|
|
|
vpsrld $11,%xmm6,%xmm6
|
|
|
|
movl %r11d,%r13d
|
|
|
|
addl %edx,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
|
|
movl %r14d,%edx
|
|
|
|
movl %eax,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
vpslld $11,%xmm5,%xmm5
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
vpxor %xmm6,%xmm4,%xmm4
|
|
|
|
xorl %edx,%r14d
|
|
|
|
andl %r11d,%r12d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
vpsrld $10,%xmm7,%xmm6
|
|
|
|
addl 20(%rsp),%ecx
|
|
|
|
movl %edx,%edi
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %r8d,%edi
|
|
|
|
addl %r12d,%ecx
|
|
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
vpaddd %xmm4,%xmm1,%xmm1
|
|
|
|
addl %r13d,%ecx
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
addl %ecx,%r10d
|
|
|
|
addl %r15d,%ecx
|
|
|
|
movl %r10d,%r13d
|
|
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
|
|
addl %ecx,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%ecx
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
movl %r11d,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
vpshufb %xmm8,%xmm6,%xmm6
|
|
|
|
xorl %eax,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
vpaddd %xmm6,%xmm1,%xmm1
|
|
|
|
andl %r10d,%r12d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
addl 24(%rsp),%ebx
|
|
|
|
vpshufd $80,%xmm1,%xmm7
|
|
|
|
movl %ecx,%r15d
|
|
|
|
xorl %eax,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
vpsrld $10,%xmm7,%xmm6
|
|
|
|
xorl %edx,%r15d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
addl %r13d,%ebx
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
xorl %edx,%edi
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %ebx,%r9d
|
|
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
|
|
addl %edi,%ebx
|
|
|
|
movl %r9d,%r13d
|
|
|
|
addl %ebx,%r14d
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%ebx
|
|
|
|
movl %r10d,%r12d
|
|
|
|
vpshufb %xmm9,%xmm6,%xmm6
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
vpaddd %xmm6,%xmm1,%xmm1
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
andl %r9d,%r12d
|
|
|
|
vpaddd 32(%rbp),%xmm1,%xmm6
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
addl 28(%rsp),%eax
|
|
|
|
movl %ebx,%edi
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %ecx,%edi
|
|
|
|
addl %r12d,%eax
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
addl %r13d,%eax
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %eax,%r8d
|
|
|
|
addl %r15d,%eax
|
|
|
|
movl %r8d,%r13d
|
|
|
|
addl %eax,%r14d
|
|
|
|
vmovdqa %xmm6,16(%rsp)
|
|
|
|
vpalignr $4,%xmm2,%xmm3,%xmm4
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%eax
|
|
|
|
movl %r9d,%r12d
|
|
|
|
vpalignr $4,%xmm0,%xmm1,%xmm7
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
vpsrld $7,%xmm4,%xmm6
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %eax,%r14d
|
|
|
|
andl %r8d,%r12d
|
|
|
|
vpaddd %xmm7,%xmm2,%xmm2
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
addl 32(%rsp),%r11d
|
|
|
|
movl %eax,%r15d
|
|
|
|
vpsrld $3,%xmm4,%xmm7
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
vpslld $14,%xmm4,%xmm5
|
|
|
|
addl %r12d,%r11d
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
vpxor %xmm6,%xmm7,%xmm4
|
|
|
|
xorl %eax,%r14d
|
|
|
|
addl %r13d,%r11d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
vpshufd $250,%xmm1,%xmm7
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %r11d,%edx
|
|
|
|
addl %edi,%r11d
|
|
|
|
vpsrld $11,%xmm6,%xmm6
|
|
|
|
movl %edx,%r13d
|
|
|
|
addl %r11d,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
|
|
movl %r14d,%r11d
|
|
|
|
movl %r8d,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
vpslld $11,%xmm5,%xmm5
|
|
|
|
xorl %edx,%r13d
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
vpxor %xmm6,%xmm4,%xmm4
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
andl %edx,%r12d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
vpsrld $10,%xmm7,%xmm6
|
|
|
|
addl 36(%rsp),%r10d
|
|
|
|
movl %r11d,%edi
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %eax,%edi
|
|
|
|
addl %r12d,%r10d
|
|
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
vpaddd %xmm4,%xmm2,%xmm2
|
|
|
|
addl %r13d,%r10d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
addl %r10d,%ecx
|
|
|
|
addl %r15d,%r10d
|
|
|
|
movl %ecx,%r13d
|
|
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
|
|
addl %r10d,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%r10d
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
movl %edx,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
vpshufb %xmm8,%xmm6,%xmm6
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
vpaddd %xmm6,%xmm2,%xmm2
|
|
|
|
andl %ecx,%r12d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
addl 40(%rsp),%r9d
|
|
|
|
vpshufd $80,%xmm2,%xmm7
|
|
|
|
movl %r10d,%r15d
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
vpsrld $10,%xmm7,%xmm6
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
addl %r12d,%r9d
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
addl %r13d,%r9d
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
xorl %r11d,%edi
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %r9d,%ebx
|
|
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
|
|
addl %edi,%r9d
|
|
|
|
movl %ebx,%r13d
|
|
|
|
addl %r9d,%r14d
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%r9d
|
|
|
|
movl %ecx,%r12d
|
|
|
|
vpshufb %xmm9,%xmm6,%xmm6
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
xorl %edx,%r12d
|
|
|
|
vpaddd %xmm6,%xmm2,%xmm2
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
andl %ebx,%r12d
|
|
|
|
vpaddd 64(%rbp),%xmm2,%xmm6
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
addl 44(%rsp),%r8d
|
|
|
|
movl %r9d,%edi
|
|
|
|
xorl %edx,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
addl %r12d,%r8d
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
addl %r13d,%r8d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %r8d,%eax
|
|
|
|
addl %r15d,%r8d
|
|
|
|
movl %eax,%r13d
|
|
|
|
addl %r8d,%r14d
|
|
|
|
vmovdqa %xmm6,32(%rsp)
|
|
|
|
vpalignr $4,%xmm3,%xmm0,%xmm4
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%r8d
|
|
|
|
movl %ebx,%r12d
|
|
|
|
vpalignr $4,%xmm1,%xmm2,%xmm7
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
vpsrld $7,%xmm4,%xmm6
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
andl %eax,%r12d
|
|
|
|
vpaddd %xmm7,%xmm3,%xmm3
|
|
|
|
xorl %eax,%r13d
|
|
|
|
addl 48(%rsp),%edx
|
|
|
|
movl %r8d,%r15d
|
|
|
|
vpsrld $3,%xmm4,%xmm7
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
vpslld $14,%xmm4,%xmm5
|
|
|
|
addl %r12d,%edx
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
vpxor %xmm6,%xmm7,%xmm4
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
addl %r13d,%edx
|
|
|
|
xorl %r9d,%edi
|
|
|
|
vpshufd $250,%xmm2,%xmm7
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %edx,%r11d
|
|
|
|
addl %edi,%edx
|
|
|
|
vpsrld $11,%xmm6,%xmm6
|
|
|
|
movl %r11d,%r13d
|
|
|
|
addl %edx,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
|
|
movl %r14d,%edx
|
|
|
|
movl %eax,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
vpslld $11,%xmm5,%xmm5
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
vpxor %xmm6,%xmm4,%xmm4
|
|
|
|
xorl %edx,%r14d
|
|
|
|
andl %r11d,%r12d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
vpsrld $10,%xmm7,%xmm6
|
|
|
|
addl 52(%rsp),%ecx
|
|
|
|
movl %edx,%edi
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %r8d,%edi
|
|
|
|
addl %r12d,%ecx
|
|
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
vpaddd %xmm4,%xmm3,%xmm3
|
|
|
|
addl %r13d,%ecx
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
addl %ecx,%r10d
|
|
|
|
addl %r15d,%ecx
|
|
|
|
movl %r10d,%r13d
|
|
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
|
|
addl %ecx,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%ecx
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
movl %r11d,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
vpshufb %xmm8,%xmm6,%xmm6
|
|
|
|
xorl %eax,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
vpaddd %xmm6,%xmm3,%xmm3
|
|
|
|
andl %r10d,%r12d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
addl 56(%rsp),%ebx
|
|
|
|
vpshufd $80,%xmm3,%xmm7
|
|
|
|
movl %ecx,%r15d
|
|
|
|
xorl %eax,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
vpsrld $10,%xmm7,%xmm6
|
|
|
|
xorl %edx,%r15d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
addl %r13d,%ebx
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
xorl %edx,%edi
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %ebx,%r9d
|
|
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
|
|
addl %edi,%ebx
|
|
|
|
movl %r9d,%r13d
|
|
|
|
addl %ebx,%r14d
|
|
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%ebx
|
|
|
|
movl %r10d,%r12d
|
|
|
|
vpshufb %xmm9,%xmm6,%xmm6
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
vpaddd %xmm6,%xmm3,%xmm3
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
andl %r9d,%r12d
|
|
|
|
vpaddd 96(%rbp),%xmm3,%xmm6
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
addl 60(%rsp),%eax
|
|
|
|
movl %ebx,%edi
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %ecx,%edi
|
|
|
|
addl %r12d,%eax
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
addl %r13d,%eax
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %eax,%r8d
|
|
|
|
addl %r15d,%eax
|
|
|
|
movl %r8d,%r13d
|
|
|
|
addl %eax,%r14d
|
|
|
|
vmovdqa %xmm6,48(%rsp)
|
|
|
|
cmpb $0,131(%rbp)
|
|
|
|
jne .Lavx_00_47
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%eax
|
|
|
|
movl %r9d,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %eax,%r14d
|
|
|
|
andl %r8d,%r12d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
addl 0(%rsp),%r11d
|
|
|
|
movl %eax,%r15d
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
addl %r12d,%r11d
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %eax,%r14d
|
|
|
|
addl %r13d,%r11d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %r11d,%edx
|
|
|
|
addl %edi,%r11d
|
|
|
|
movl %edx,%r13d
|
|
|
|
addl %r11d,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%r11d
|
|
|
|
movl %r8d,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
andl %edx,%r12d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
addl 4(%rsp),%r10d
|
|
|
|
movl %r11d,%edi
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %eax,%edi
|
|
|
|
addl %r12d,%r10d
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
addl %r13d,%r10d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %r10d,%ecx
|
|
|
|
addl %r15d,%r10d
|
|
|
|
movl %ecx,%r13d
|
|
|
|
addl %r10d,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%r10d
|
|
|
|
movl %edx,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
andl %ecx,%r12d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
addl 8(%rsp),%r9d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
addl %r12d,%r9d
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
addl %r13d,%r9d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %r9d,%ebx
|
|
|
|
addl %edi,%r9d
|
|
|
|
movl %ebx,%r13d
|
|
|
|
addl %r9d,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%r9d
|
|
|
|
movl %ecx,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
xorl %edx,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
andl %ebx,%r12d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
addl 12(%rsp),%r8d
|
|
|
|
movl %r9d,%edi
|
|
|
|
xorl %edx,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
addl %r12d,%r8d
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
addl %r13d,%r8d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %r8d,%eax
|
|
|
|
addl %r15d,%r8d
|
|
|
|
movl %eax,%r13d
|
|
|
|
addl %r8d,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%r8d
|
|
|
|
movl %ebx,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
andl %eax,%r12d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
addl 16(%rsp),%edx
|
|
|
|
movl %r8d,%r15d
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
addl %r12d,%edx
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
addl %r13d,%edx
|
|
|
|
xorl %r9d,%edi
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %edx,%r11d
|
|
|
|
addl %edi,%edx
|
|
|
|
movl %r11d,%r13d
|
|
|
|
addl %edx,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%edx
|
|
|
|
movl %eax,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
andl %r11d,%r12d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
addl 20(%rsp),%ecx
|
|
|
|
movl %edx,%edi
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %r8d,%edi
|
|
|
|
addl %r12d,%ecx
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
addl %r13d,%ecx
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %ecx,%r10d
|
|
|
|
addl %r15d,%ecx
|
|
|
|
movl %r10d,%r13d
|
|
|
|
addl %ecx,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%ecx
|
|
|
|
movl %r11d,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
xorl %eax,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
andl %r10d,%r12d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
addl 24(%rsp),%ebx
|
|
|
|
movl %ecx,%r15d
|
|
|
|
xorl %eax,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %edx,%r15d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
addl %r13d,%ebx
|
|
|
|
xorl %edx,%edi
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %ebx,%r9d
|
|
|
|
addl %edi,%ebx
|
|
|
|
movl %r9d,%r13d
|
|
|
|
addl %ebx,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%ebx
|
|
|
|
movl %r10d,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
andl %r9d,%r12d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
addl 28(%rsp),%eax
|
|
|
|
movl %ebx,%edi
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %ecx,%edi
|
|
|
|
addl %r12d,%eax
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
addl %r13d,%eax
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %eax,%r8d
|
|
|
|
addl %r15d,%eax
|
|
|
|
movl %r8d,%r13d
|
|
|
|
addl %eax,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%eax
|
|
|
|
movl %r9d,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %eax,%r14d
|
|
|
|
andl %r8d,%r12d
|
|
|
|
xorl %r8d,%r13d
|
|
|
|
addl 32(%rsp),%r11d
|
|
|
|
movl %eax,%r15d
|
|
|
|
xorl %r10d,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
addl %r12d,%r11d
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %eax,%r14d
|
|
|
|
addl %r13d,%r11d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %r11d,%edx
|
|
|
|
addl %edi,%r11d
|
|
|
|
movl %edx,%r13d
|
|
|
|
addl %r11d,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%r11d
|
|
|
|
movl %r8d,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
andl %edx,%r12d
|
|
|
|
xorl %edx,%r13d
|
|
|
|
addl 36(%rsp),%r10d
|
|
|
|
movl %r11d,%edi
|
|
|
|
xorl %r9d,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %eax,%edi
|
|
|
|
addl %r12d,%r10d
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r11d,%r14d
|
|
|
|
addl %r13d,%r10d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %r10d,%ecx
|
|
|
|
addl %r15d,%r10d
|
|
|
|
movl %ecx,%r13d
|
|
|
|
addl %r10d,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%r10d
|
|
|
|
movl %edx,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
andl %ecx,%r12d
|
|
|
|
xorl %ecx,%r13d
|
|
|
|
addl 40(%rsp),%r9d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
xorl %r8d,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
addl %r12d,%r9d
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r10d,%r14d
|
|
|
|
addl %r13d,%r9d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %r9d,%ebx
|
|
|
|
addl %edi,%r9d
|
|
|
|
movl %ebx,%r13d
|
|
|
|
addl %r9d,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%r9d
|
|
|
|
movl %ecx,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
xorl %edx,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
andl %ebx,%r12d
|
|
|
|
xorl %ebx,%r13d
|
|
|
|
addl 44(%rsp),%r8d
|
|
|
|
movl %r9d,%edi
|
|
|
|
xorl %edx,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
addl %r12d,%r8d
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r9d,%r14d
|
|
|
|
addl %r13d,%r8d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %r8d,%eax
|
|
|
|
addl %r15d,%r8d
|
|
|
|
movl %eax,%r13d
|
|
|
|
addl %r8d,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%r8d
|
|
|
|
movl %ebx,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
andl %eax,%r12d
|
|
|
|
xorl %eax,%r13d
|
|
|
|
addl 48(%rsp),%edx
|
|
|
|
movl %r8d,%r15d
|
|
|
|
xorl %ecx,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
addl %r12d,%edx
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r8d,%r14d
|
|
|
|
addl %r13d,%edx
|
|
|
|
xorl %r9d,%edi
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %edx,%r11d
|
|
|
|
addl %edi,%edx
|
|
|
|
movl %r11d,%r13d
|
|
|
|
addl %edx,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%edx
|
|
|
|
movl %eax,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
andl %r11d,%r12d
|
|
|
|
xorl %r11d,%r13d
|
|
|
|
addl 52(%rsp),%ecx
|
|
|
|
movl %edx,%edi
|
|
|
|
xorl %ebx,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %r8d,%edi
|
|
|
|
addl %r12d,%ecx
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %edx,%r14d
|
|
|
|
addl %r13d,%ecx
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %ecx,%r10d
|
|
|
|
addl %r15d,%ecx
|
|
|
|
movl %r10d,%r13d
|
|
|
|
addl %ecx,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%ecx
|
|
|
|
movl %r11d,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
xorl %eax,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
andl %r10d,%r12d
|
|
|
|
xorl %r10d,%r13d
|
|
|
|
addl 56(%rsp),%ebx
|
|
|
|
movl %ecx,%r15d
|
|
|
|
xorl %eax,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %edx,%r15d
|
|
|
|
addl %r12d,%ebx
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %ecx,%r14d
|
|
|
|
addl %r13d,%ebx
|
|
|
|
xorl %edx,%edi
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %ebx,%r9d
|
|
|
|
addl %edi,%ebx
|
|
|
|
movl %r9d,%r13d
|
|
|
|
addl %ebx,%r14d
|
|
|
|
shrdl $14,%r13d,%r13d
|
|
|
|
movl %r14d,%ebx
|
|
|
|
movl %r10d,%r12d
|
|
|
|
shrdl $9,%r14d,%r14d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
shrdl $5,%r13d,%r13d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
andl %r9d,%r12d
|
|
|
|
xorl %r9d,%r13d
|
|
|
|
addl 60(%rsp),%eax
|
|
|
|
movl %ebx,%edi
|
|
|
|
xorl %r11d,%r12d
|
|
|
|
shrdl $11,%r14d,%r14d
|
|
|
|
xorl %ecx,%edi
|
|
|
|
addl %r12d,%eax
|
|
|
|
shrdl $6,%r13d,%r13d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %ebx,%r14d
|
|
|
|
addl %r13d,%eax
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
shrdl $2,%r14d,%r14d
|
|
|
|
addl %eax,%r8d
|
|
|
|
addl %r15d,%eax
|
|
|
|
movl %r8d,%r13d
|
|
|
|
addl %eax,%r14d
|
|
|
|
movq 64+0(%rsp),%rdi
|
|
|
|
movl %r14d,%eax
|
|
|
|
|
|
|
|
addl 0(%rdi),%eax
|
|
|
|
leaq 64(%rsi),%rsi
|
|
|
|
addl 4(%rdi),%ebx
|
|
|
|
addl 8(%rdi),%ecx
|
|
|
|
addl 12(%rdi),%edx
|
|
|
|
addl 16(%rdi),%r8d
|
|
|
|
addl 20(%rdi),%r9d
|
|
|
|
addl 24(%rdi),%r10d
|
|
|
|
addl 28(%rdi),%r11d
|
|
|
|
|
|
|
|
cmpq 64+16(%rsp),%rsi
|
|
|
|
|
|
|
|
movl %eax,0(%rdi)
|
|
|
|
movl %ebx,4(%rdi)
|
|
|
|
movl %ecx,8(%rdi)
|
|
|
|
movl %edx,12(%rdi)
|
|
|
|
movl %r8d,16(%rdi)
|
|
|
|
movl %r9d,20(%rdi)
|
|
|
|
movl %r10d,24(%rdi)
|
|
|
|
movl %r11d,28(%rdi)
|
|
|
|
jb .Lloop_avx
|
|
|
|
|
|
|
|
movq 88(%rsp),%rsi
|
|
|
|
.cfi_def_cfa %rsi,8
|
|
|
|
vzeroupper
|
|
|
|
movq -48(%rsi),%r15
|
|
|
|
.cfi_restore %r15
|
|
|
|
movq -40(%rsi),%r14
|
|
|
|
.cfi_restore %r14
|
|
|
|
movq -32(%rsi),%r13
|
|
|
|
.cfi_restore %r13
|
|
|
|
movq -24(%rsi),%r12
|
|
|
|
.cfi_restore %r12
|
|
|
|
movq -16(%rsi),%rbp
|
|
|
|
.cfi_restore %rbp
|
|
|
|
movq -8(%rsi),%rbx
|
|
|
|
.cfi_restore %rbx
|
|
|
|
leaq (%rsi),%rsp
|
|
|
|
.cfi_def_cfa_register %rsp
|
|
|
|
.Lepilogue_avx:
|
|
|
|
RET
|
|
|
|
.cfi_endproc
|
|
|
|
SET_SIZE(zfs_sha256_transform_avx)
|
|
|
|
|
|
|
|
ENTRY_ALIGN(zfs_sha256_transform_avx2, 64)
|
|
|
|
.cfi_startproc
|
|
|
|
ENDBR
|
|
|
|
movq %rsp,%rax
|
|
|
|
.cfi_def_cfa_register %rax
|
|
|
|
pushq %rbx
|
|
|
|
.cfi_offset %rbx,-16
|
|
|
|
pushq %rbp
|
|
|
|
.cfi_offset %rbp,-24
|
|
|
|
pushq %r12
|
|
|
|
.cfi_offset %r12,-32
|
|
|
|
pushq %r13
|
|
|
|
.cfi_offset %r13,-40
|
|
|
|
pushq %r14
|
|
|
|
.cfi_offset %r14,-48
|
|
|
|
pushq %r15
|
|
|
|
.cfi_offset %r15,-56
|
|
|
|
subq $544,%rsp
|
|
|
|
shlq $4,%rdx
|
|
|
|
andq $-1024,%rsp
|
|
|
|
leaq (%rsi,%rdx,4),%rdx
|
|
|
|
addq $448,%rsp
|
|
|
|
movq %rdi,64+0(%rsp)
|
|
|
|
movq %rsi,64+8(%rsp)
|
|
|
|
movq %rdx,64+16(%rsp)
|
|
|
|
movq %rax,88(%rsp)
|
|
|
|
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
|
|
|
|
.Lprologue_avx2:
|
|
|
|
|
|
|
|
vzeroupper
|
|
|
|
subq $-64,%rsi
|
|
|
|
movl 0(%rdi),%eax
|
|
|
|
movq %rsi,%r12
|
|
|
|
movl 4(%rdi),%ebx
|
|
|
|
cmpq %rdx,%rsi
|
|
|
|
movl 8(%rdi),%ecx
|
|
|
|
cmoveq %rsp,%r12
|
|
|
|
movl 12(%rdi),%edx
|
|
|
|
movl 16(%rdi),%r8d
|
|
|
|
movl 20(%rdi),%r9d
|
|
|
|
movl 24(%rdi),%r10d
|
|
|
|
movl 28(%rdi),%r11d
|
|
|
|
vmovdqa K256+512+32(%rip),%ymm8
|
|
|
|
vmovdqa K256+512+64(%rip),%ymm9
|
|
|
|
jmp .Loop_avx2
|
2023-03-06 23:24:05 +00:00
|
|
|
.balign 16
|
Add generic implementation handling and SHA2 impl
The skeleton file module/icp/include/generic_impl.c can be used for
iterating over different implementations of algorithms.
It is used by SHA256, SHA512 and BLAKE3 currently.
The Solaris SHA2 implementation got replaced with a version which is
based on public domain code of cppcrypto v0.10.
These assembly files are taken from current openssl master:
- sha256-x86_64.S: x64, SSSE3, AVX, AVX2, SHA-NI (x86_64)
- sha512-x86_64.S: x64, AVX, AVX2 (x86_64)
- sha256-armv7.S: ARMv7, NEON, ARMv8-CE (arm)
- sha512-armv7.S: ARMv7, NEON (arm)
- sha256-armv8.S: ARMv7, NEON, ARMv8-CE (aarch64)
- sha512-armv8.S: ARMv7, ARMv8-CE (aarch64)
- sha256-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha512-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha256-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
- sha512-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741
2023-03-01 08:40:28 +00:00
|
|
|
.Loop_avx2:
|
|
|
|
vmovdqa K256+512(%rip),%ymm7
|
|
|
|
vmovdqu -64+0(%rsi),%xmm0
|
|
|
|
vmovdqu -64+16(%rsi),%xmm1
|
|
|
|
vmovdqu -64+32(%rsi),%xmm2
|
|
|
|
vmovdqu -64+48(%rsi),%xmm3
|
|
|
|
|
|
|
|
vinserti128 $1,(%r12),%ymm0,%ymm0
|
|
|
|
vinserti128 $1,16(%r12),%ymm1,%ymm1
|
|
|
|
vpshufb %ymm7,%ymm0,%ymm0
|
|
|
|
vinserti128 $1,32(%r12),%ymm2,%ymm2
|
|
|
|
vpshufb %ymm7,%ymm1,%ymm1
|
|
|
|
vinserti128 $1,48(%r12),%ymm3,%ymm3
|
|
|
|
|
|
|
|
leaq K256(%rip),%rbp
|
|
|
|
vpshufb %ymm7,%ymm2,%ymm2
|
|
|
|
vpaddd 0(%rbp),%ymm0,%ymm4
|
|
|
|
vpshufb %ymm7,%ymm3,%ymm3
|
|
|
|
vpaddd 32(%rbp),%ymm1,%ymm5
|
|
|
|
vpaddd 64(%rbp),%ymm2,%ymm6
|
|
|
|
vpaddd 96(%rbp),%ymm3,%ymm7
|
|
|
|
vmovdqa %ymm4,0(%rsp)
|
|
|
|
xorl %r14d,%r14d
|
|
|
|
vmovdqa %ymm5,32(%rsp)
|
|
|
|
|
|
|
|
movq 88(%rsp),%rdi
|
|
|
|
.cfi_def_cfa %rdi,8
|
|
|
|
leaq -64(%rsp),%rsp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
movq %rdi,-8(%rsp)
|
|
|
|
.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
|
|
|
|
movl %ebx,%edi
|
|
|
|
vmovdqa %ymm6,0(%rsp)
|
|
|
|
xorl %ecx,%edi
|
|
|
|
vmovdqa %ymm7,32(%rsp)
|
|
|
|
movl %r9d,%r12d
|
|
|
|
subq $-32*4,%rbp
|
|
|
|
jmp .Lavx2_00_47
|
|
|
|
|
2023-03-06 23:24:05 +00:00
|
|
|
.balign 16
|
Add generic implementation handling and SHA2 impl
The skeleton file module/icp/include/generic_impl.c can be used for
iterating over different implementations of algorithms.
It is used by SHA256, SHA512 and BLAKE3 currently.
The Solaris SHA2 implementation got replaced with a version which is
based on public domain code of cppcrypto v0.10.
These assembly files are taken from current openssl master:
- sha256-x86_64.S: x64, SSSE3, AVX, AVX2, SHA-NI (x86_64)
- sha512-x86_64.S: x64, AVX, AVX2 (x86_64)
- sha256-armv7.S: ARMv7, NEON, ARMv8-CE (arm)
- sha512-armv7.S: ARMv7, NEON (arm)
- sha256-armv8.S: ARMv7, NEON, ARMv8-CE (aarch64)
- sha512-armv8.S: ARMv7, ARMv8-CE (aarch64)
- sha256-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha512-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha256-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
- sha512-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741
2023-03-01 08:40:28 +00:00
|
|
|
.Lavx2_00_47:
|
|
|
|
leaq -64(%rsp),%rsp
|
|
|
|
.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08
|
|
|
|
|
|
|
|
pushq 64-8(%rsp)
|
|
|
|
.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08
|
|
|
|
leaq 8(%rsp),%rsp
|
|
|
|
.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
|
|
|
|
vpalignr $4,%ymm0,%ymm1,%ymm4
|
|
|
|
addl 0+128(%rsp),%r11d
|
|
|
|
andl %r8d,%r12d
|
|
|
|
rorxl $25,%r8d,%r13d
|
|
|
|
vpalignr $4,%ymm2,%ymm3,%ymm7
|
|
|
|
rorxl $11,%r8d,%r15d
|
|
|
|
leal (%rax,%r14,1),%eax
|
|
|
|
leal (%r11,%r12,1),%r11d
|
|
|
|
vpsrld $7,%ymm4,%ymm6
|
|
|
|
andnl %r10d,%r8d,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%r8d,%r14d
|
|
|
|
vpaddd %ymm7,%ymm0,%ymm0
|
|
|
|
leal (%r11,%r12,1),%r11d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %eax,%r15d
|
|
|
|
vpsrld $3,%ymm4,%ymm7
|
|
|
|
rorxl $22,%eax,%r12d
|
|
|
|
leal (%r11,%r13,1),%r11d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
vpslld $14,%ymm4,%ymm5
|
|
|
|
rorxl $13,%eax,%r14d
|
|
|
|
rorxl $2,%eax,%r13d
|
|
|
|
leal (%rdx,%r11,1),%edx
|
|
|
|
vpxor %ymm6,%ymm7,%ymm4
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
vpshufd $250,%ymm3,%ymm7
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r11,%rdi,1),%r11d
|
|
|
|
movl %r8d,%r12d
|
|
|
|
vpsrld $11,%ymm6,%ymm6
|
|
|
|
addl 4+128(%rsp),%r10d
|
|
|
|
andl %edx,%r12d
|
|
|
|
rorxl $25,%edx,%r13d
|
|
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
|
|
rorxl $11,%edx,%edi
|
|
|
|
leal (%r11,%r14,1),%r11d
|
|
|
|
leal (%r10,%r12,1),%r10d
|
|
|
|
vpslld $11,%ymm5,%ymm5
|
|
|
|
andnl %r9d,%edx,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%edx,%r14d
|
|
|
|
vpxor %ymm6,%ymm4,%ymm4
|
|
|
|
leal (%r10,%r12,1),%r10d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r11d,%edi
|
|
|
|
vpsrld $10,%ymm7,%ymm6
|
|
|
|
rorxl $22,%r11d,%r12d
|
|
|
|
leal (%r10,%r13,1),%r10d
|
|
|
|
xorl %eax,%edi
|
|
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
|
|
rorxl $13,%r11d,%r14d
|
|
|
|
rorxl $2,%r11d,%r13d
|
|
|
|
leal (%rcx,%r10,1),%ecx
|
|
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
vpaddd %ymm4,%ymm0,%ymm0
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r10,%r15,1),%r10d
|
|
|
|
movl %edx,%r12d
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
addl 8+128(%rsp),%r9d
|
|
|
|
andl %ecx,%r12d
|
|
|
|
rorxl $25,%ecx,%r13d
|
|
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
|
|
rorxl $11,%ecx,%r15d
|
|
|
|
leal (%r10,%r14,1),%r10d
|
|
|
|
leal (%r9,%r12,1),%r9d
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
andnl %r8d,%ecx,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%ecx,%r14d
|
|
|
|
vpshufb %ymm8,%ymm6,%ymm6
|
|
|
|
leal (%r9,%r12,1),%r9d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
vpaddd %ymm6,%ymm0,%ymm0
|
|
|
|
rorxl $22,%r10d,%r12d
|
|
|
|
leal (%r9,%r13,1),%r9d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
vpshufd $80,%ymm0,%ymm7
|
|
|
|
rorxl $13,%r10d,%r14d
|
|
|
|
rorxl $2,%r10d,%r13d
|
|
|
|
leal (%rbx,%r9,1),%ebx
|
|
|
|
vpsrld $10,%ymm7,%ymm6
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r9,%rdi,1),%r9d
|
|
|
|
movl %ecx,%r12d
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
addl 12+128(%rsp),%r8d
|
|
|
|
andl %ebx,%r12d
|
|
|
|
rorxl $25,%ebx,%r13d
|
|
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
|
|
rorxl $11,%ebx,%edi
|
|
|
|
leal (%r9,%r14,1),%r9d
|
|
|
|
leal (%r8,%r12,1),%r8d
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
andnl %edx,%ebx,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%ebx,%r14d
|
|
|
|
vpshufb %ymm9,%ymm6,%ymm6
|
|
|
|
leal (%r8,%r12,1),%r8d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r9d,%edi
|
|
|
|
vpaddd %ymm6,%ymm0,%ymm0
|
|
|
|
rorxl $22,%r9d,%r12d
|
|
|
|
leal (%r8,%r13,1),%r8d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
vpaddd 0(%rbp),%ymm0,%ymm6
|
|
|
|
rorxl $13,%r9d,%r14d
|
|
|
|
rorxl $2,%r9d,%r13d
|
|
|
|
leal (%rax,%r8,1),%eax
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r8,%r15,1),%r8d
|
|
|
|
movl %ebx,%r12d
|
|
|
|
vmovdqa %ymm6,0(%rsp)
|
|
|
|
vpalignr $4,%ymm1,%ymm2,%ymm4
|
|
|
|
addl 32+128(%rsp),%edx
|
|
|
|
andl %eax,%r12d
|
|
|
|
rorxl $25,%eax,%r13d
|
|
|
|
vpalignr $4,%ymm3,%ymm0,%ymm7
|
|
|
|
rorxl $11,%eax,%r15d
|
|
|
|
leal (%r8,%r14,1),%r8d
|
|
|
|
leal (%rdx,%r12,1),%edx
|
|
|
|
vpsrld $7,%ymm4,%ymm6
|
|
|
|
andnl %ecx,%eax,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%eax,%r14d
|
|
|
|
vpaddd %ymm7,%ymm1,%ymm1
|
|
|
|
leal (%rdx,%r12,1),%edx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r8d,%r15d
|
|
|
|
vpsrld $3,%ymm4,%ymm7
|
|
|
|
rorxl $22,%r8d,%r12d
|
|
|
|
leal (%rdx,%r13,1),%edx
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
vpslld $14,%ymm4,%ymm5
|
|
|
|
rorxl $13,%r8d,%r14d
|
|
|
|
rorxl $2,%r8d,%r13d
|
|
|
|
leal (%r11,%rdx,1),%r11d
|
|
|
|
vpxor %ymm6,%ymm7,%ymm4
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r9d,%edi
|
|
|
|
vpshufd $250,%ymm0,%ymm7
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rdx,%rdi,1),%edx
|
|
|
|
movl %eax,%r12d
|
|
|
|
vpsrld $11,%ymm6,%ymm6
|
|
|
|
addl 36+128(%rsp),%ecx
|
|
|
|
andl %r11d,%r12d
|
|
|
|
rorxl $25,%r11d,%r13d
|
|
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
|
|
rorxl $11,%r11d,%edi
|
|
|
|
leal (%rdx,%r14,1),%edx
|
|
|
|
leal (%rcx,%r12,1),%ecx
|
|
|
|
vpslld $11,%ymm5,%ymm5
|
|
|
|
andnl %ebx,%r11d,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%r11d,%r14d
|
|
|
|
vpxor %ymm6,%ymm4,%ymm4
|
|
|
|
leal (%rcx,%r12,1),%ecx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %edx,%edi
|
|
|
|
vpsrld $10,%ymm7,%ymm6
|
|
|
|
rorxl $22,%edx,%r12d
|
|
|
|
leal (%rcx,%r13,1),%ecx
|
|
|
|
xorl %r8d,%edi
|
|
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
|
|
rorxl $13,%edx,%r14d
|
|
|
|
rorxl $2,%edx,%r13d
|
|
|
|
leal (%r10,%rcx,1),%r10d
|
|
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
vpaddd %ymm4,%ymm1,%ymm1
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rcx,%r15,1),%ecx
|
|
|
|
movl %r11d,%r12d
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
addl 40+128(%rsp),%ebx
|
|
|
|
andl %r10d,%r12d
|
|
|
|
rorxl $25,%r10d,%r13d
|
|
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
|
|
rorxl $11,%r10d,%r15d
|
|
|
|
leal (%rcx,%r14,1),%ecx
|
|
|
|
leal (%rbx,%r12,1),%ebx
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
andnl %eax,%r10d,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%r10d,%r14d
|
|
|
|
vpshufb %ymm8,%ymm6,%ymm6
|
|
|
|
leal (%rbx,%r12,1),%ebx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %ecx,%r15d
|
|
|
|
vpaddd %ymm6,%ymm1,%ymm1
|
|
|
|
rorxl $22,%ecx,%r12d
|
|
|
|
leal (%rbx,%r13,1),%ebx
|
|
|
|
xorl %edx,%r15d
|
|
|
|
vpshufd $80,%ymm1,%ymm7
|
|
|
|
rorxl $13,%ecx,%r14d
|
|
|
|
rorxl $2,%ecx,%r13d
|
|
|
|
leal (%r9,%rbx,1),%r9d
|
|
|
|
vpsrld $10,%ymm7,%ymm6
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %edx,%edi
|
|
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rbx,%rdi,1),%ebx
|
|
|
|
movl %r10d,%r12d
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
addl 44+128(%rsp),%eax
|
|
|
|
andl %r9d,%r12d
|
|
|
|
rorxl $25,%r9d,%r13d
|
|
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
|
|
rorxl $11,%r9d,%edi
|
|
|
|
leal (%rbx,%r14,1),%ebx
|
|
|
|
leal (%rax,%r12,1),%eax
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
andnl %r11d,%r9d,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%r9d,%r14d
|
|
|
|
vpshufb %ymm9,%ymm6,%ymm6
|
|
|
|
leal (%rax,%r12,1),%eax
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %ebx,%edi
|
|
|
|
vpaddd %ymm6,%ymm1,%ymm1
|
|
|
|
rorxl $22,%ebx,%r12d
|
|
|
|
leal (%rax,%r13,1),%eax
|
|
|
|
xorl %ecx,%edi
|
|
|
|
vpaddd 32(%rbp),%ymm1,%ymm6
|
|
|
|
rorxl $13,%ebx,%r14d
|
|
|
|
rorxl $2,%ebx,%r13d
|
|
|
|
leal (%r8,%rax,1),%r8d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rax,%r15,1),%eax
|
|
|
|
movl %r9d,%r12d
|
|
|
|
vmovdqa %ymm6,32(%rsp)
|
|
|
|
leaq -64(%rsp),%rsp
|
|
|
|
.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08
|
|
|
|
|
|
|
|
pushq 64-8(%rsp)
|
|
|
|
.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08
|
|
|
|
leaq 8(%rsp),%rsp
|
|
|
|
.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
|
|
|
|
vpalignr $4,%ymm2,%ymm3,%ymm4
|
|
|
|
addl 0+128(%rsp),%r11d
|
|
|
|
andl %r8d,%r12d
|
|
|
|
rorxl $25,%r8d,%r13d
|
|
|
|
vpalignr $4,%ymm0,%ymm1,%ymm7
|
|
|
|
rorxl $11,%r8d,%r15d
|
|
|
|
leal (%rax,%r14,1),%eax
|
|
|
|
leal (%r11,%r12,1),%r11d
|
|
|
|
vpsrld $7,%ymm4,%ymm6
|
|
|
|
andnl %r10d,%r8d,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%r8d,%r14d
|
|
|
|
vpaddd %ymm7,%ymm2,%ymm2
|
|
|
|
leal (%r11,%r12,1),%r11d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %eax,%r15d
|
|
|
|
vpsrld $3,%ymm4,%ymm7
|
|
|
|
rorxl $22,%eax,%r12d
|
|
|
|
leal (%r11,%r13,1),%r11d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
vpslld $14,%ymm4,%ymm5
|
|
|
|
rorxl $13,%eax,%r14d
|
|
|
|
rorxl $2,%eax,%r13d
|
|
|
|
leal (%rdx,%r11,1),%edx
|
|
|
|
vpxor %ymm6,%ymm7,%ymm4
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
vpshufd $250,%ymm1,%ymm7
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r11,%rdi,1),%r11d
|
|
|
|
movl %r8d,%r12d
|
|
|
|
vpsrld $11,%ymm6,%ymm6
|
|
|
|
addl 4+128(%rsp),%r10d
|
|
|
|
andl %edx,%r12d
|
|
|
|
rorxl $25,%edx,%r13d
|
|
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
|
|
rorxl $11,%edx,%edi
|
|
|
|
leal (%r11,%r14,1),%r11d
|
|
|
|
leal (%r10,%r12,1),%r10d
|
|
|
|
vpslld $11,%ymm5,%ymm5
|
|
|
|
andnl %r9d,%edx,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%edx,%r14d
|
|
|
|
vpxor %ymm6,%ymm4,%ymm4
|
|
|
|
leal (%r10,%r12,1),%r10d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r11d,%edi
|
|
|
|
vpsrld $10,%ymm7,%ymm6
|
|
|
|
rorxl $22,%r11d,%r12d
|
|
|
|
leal (%r10,%r13,1),%r10d
|
|
|
|
xorl %eax,%edi
|
|
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
|
|
rorxl $13,%r11d,%r14d
|
|
|
|
rorxl $2,%r11d,%r13d
|
|
|
|
leal (%rcx,%r10,1),%ecx
|
|
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
vpaddd %ymm4,%ymm2,%ymm2
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r10,%r15,1),%r10d
|
|
|
|
movl %edx,%r12d
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
addl 8+128(%rsp),%r9d
|
|
|
|
andl %ecx,%r12d
|
|
|
|
rorxl $25,%ecx,%r13d
|
|
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
|
|
rorxl $11,%ecx,%r15d
|
|
|
|
leal (%r10,%r14,1),%r10d
|
|
|
|
leal (%r9,%r12,1),%r9d
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
andnl %r8d,%ecx,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%ecx,%r14d
|
|
|
|
vpshufb %ymm8,%ymm6,%ymm6
|
|
|
|
leal (%r9,%r12,1),%r9d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
vpaddd %ymm6,%ymm2,%ymm2
|
|
|
|
rorxl $22,%r10d,%r12d
|
|
|
|
leal (%r9,%r13,1),%r9d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
vpshufd $80,%ymm2,%ymm7
|
|
|
|
rorxl $13,%r10d,%r14d
|
|
|
|
rorxl $2,%r10d,%r13d
|
|
|
|
leal (%rbx,%r9,1),%ebx
|
|
|
|
vpsrld $10,%ymm7,%ymm6
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r9,%rdi,1),%r9d
|
|
|
|
movl %ecx,%r12d
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
addl 12+128(%rsp),%r8d
|
|
|
|
andl %ebx,%r12d
|
|
|
|
rorxl $25,%ebx,%r13d
|
|
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
|
|
rorxl $11,%ebx,%edi
|
|
|
|
leal (%r9,%r14,1),%r9d
|
|
|
|
leal (%r8,%r12,1),%r8d
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
andnl %edx,%ebx,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%ebx,%r14d
|
|
|
|
vpshufb %ymm9,%ymm6,%ymm6
|
|
|
|
leal (%r8,%r12,1),%r8d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r9d,%edi
|
|
|
|
vpaddd %ymm6,%ymm2,%ymm2
|
|
|
|
rorxl $22,%r9d,%r12d
|
|
|
|
leal (%r8,%r13,1),%r8d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
vpaddd 64(%rbp),%ymm2,%ymm6
|
|
|
|
rorxl $13,%r9d,%r14d
|
|
|
|
rorxl $2,%r9d,%r13d
|
|
|
|
leal (%rax,%r8,1),%eax
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r8,%r15,1),%r8d
|
|
|
|
movl %ebx,%r12d
|
|
|
|
vmovdqa %ymm6,0(%rsp)
|
|
|
|
vpalignr $4,%ymm3,%ymm0,%ymm4
|
|
|
|
addl 32+128(%rsp),%edx
|
|
|
|
andl %eax,%r12d
|
|
|
|
rorxl $25,%eax,%r13d
|
|
|
|
vpalignr $4,%ymm1,%ymm2,%ymm7
|
|
|
|
rorxl $11,%eax,%r15d
|
|
|
|
leal (%r8,%r14,1),%r8d
|
|
|
|
leal (%rdx,%r12,1),%edx
|
|
|
|
vpsrld $7,%ymm4,%ymm6
|
|
|
|
andnl %ecx,%eax,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%eax,%r14d
|
|
|
|
vpaddd %ymm7,%ymm3,%ymm3
|
|
|
|
leal (%rdx,%r12,1),%edx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r8d,%r15d
|
|
|
|
vpsrld $3,%ymm4,%ymm7
|
|
|
|
rorxl $22,%r8d,%r12d
|
|
|
|
leal (%rdx,%r13,1),%edx
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
vpslld $14,%ymm4,%ymm5
|
|
|
|
rorxl $13,%r8d,%r14d
|
|
|
|
rorxl $2,%r8d,%r13d
|
|
|
|
leal (%r11,%rdx,1),%r11d
|
|
|
|
vpxor %ymm6,%ymm7,%ymm4
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r9d,%edi
|
|
|
|
vpshufd $250,%ymm2,%ymm7
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rdx,%rdi,1),%edx
|
|
|
|
movl %eax,%r12d
|
|
|
|
vpsrld $11,%ymm6,%ymm6
|
|
|
|
addl 36+128(%rsp),%ecx
|
|
|
|
andl %r11d,%r12d
|
|
|
|
rorxl $25,%r11d,%r13d
|
|
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
|
|
rorxl $11,%r11d,%edi
|
|
|
|
leal (%rdx,%r14,1),%edx
|
|
|
|
leal (%rcx,%r12,1),%ecx
|
|
|
|
vpslld $11,%ymm5,%ymm5
|
|
|
|
andnl %ebx,%r11d,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%r11d,%r14d
|
|
|
|
vpxor %ymm6,%ymm4,%ymm4
|
|
|
|
leal (%rcx,%r12,1),%ecx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %edx,%edi
|
|
|
|
vpsrld $10,%ymm7,%ymm6
|
|
|
|
rorxl $22,%edx,%r12d
|
|
|
|
leal (%rcx,%r13,1),%ecx
|
|
|
|
xorl %r8d,%edi
|
|
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
|
|
rorxl $13,%edx,%r14d
|
|
|
|
rorxl $2,%edx,%r13d
|
|
|
|
leal (%r10,%rcx,1),%r10d
|
|
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
vpaddd %ymm4,%ymm3,%ymm3
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rcx,%r15,1),%ecx
|
|
|
|
movl %r11d,%r12d
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
addl 40+128(%rsp),%ebx
|
|
|
|
andl %r10d,%r12d
|
|
|
|
rorxl $25,%r10d,%r13d
|
|
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
|
|
rorxl $11,%r10d,%r15d
|
|
|
|
leal (%rcx,%r14,1),%ecx
|
|
|
|
leal (%rbx,%r12,1),%ebx
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
andnl %eax,%r10d,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%r10d,%r14d
|
|
|
|
vpshufb %ymm8,%ymm6,%ymm6
|
|
|
|
leal (%rbx,%r12,1),%ebx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %ecx,%r15d
|
|
|
|
vpaddd %ymm6,%ymm3,%ymm3
|
|
|
|
rorxl $22,%ecx,%r12d
|
|
|
|
leal (%rbx,%r13,1),%ebx
|
|
|
|
xorl %edx,%r15d
|
|
|
|
vpshufd $80,%ymm3,%ymm7
|
|
|
|
rorxl $13,%ecx,%r14d
|
|
|
|
rorxl $2,%ecx,%r13d
|
|
|
|
leal (%r9,%rbx,1),%r9d
|
|
|
|
vpsrld $10,%ymm7,%ymm6
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %edx,%edi
|
|
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rbx,%rdi,1),%ebx
|
|
|
|
movl %r10d,%r12d
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
addl 44+128(%rsp),%eax
|
|
|
|
andl %r9d,%r12d
|
|
|
|
rorxl $25,%r9d,%r13d
|
|
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
|
|
rorxl $11,%r9d,%edi
|
|
|
|
leal (%rbx,%r14,1),%ebx
|
|
|
|
leal (%rax,%r12,1),%eax
|
|
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
|
|
andnl %r11d,%r9d,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%r9d,%r14d
|
|
|
|
vpshufb %ymm9,%ymm6,%ymm6
|
|
|
|
leal (%rax,%r12,1),%eax
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %ebx,%edi
|
|
|
|
vpaddd %ymm6,%ymm3,%ymm3
|
|
|
|
rorxl $22,%ebx,%r12d
|
|
|
|
leal (%rax,%r13,1),%eax
|
|
|
|
xorl %ecx,%edi
|
|
|
|
vpaddd 96(%rbp),%ymm3,%ymm6
|
|
|
|
rorxl $13,%ebx,%r14d
|
|
|
|
rorxl $2,%ebx,%r13d
|
|
|
|
leal (%r8,%rax,1),%r8d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rax,%r15,1),%eax
|
|
|
|
movl %r9d,%r12d
|
|
|
|
vmovdqa %ymm6,32(%rsp)
|
|
|
|
leaq 128(%rbp),%rbp
|
|
|
|
cmpb $0,3(%rbp)
|
|
|
|
jne .Lavx2_00_47
|
|
|
|
addl 0+64(%rsp),%r11d
|
|
|
|
andl %r8d,%r12d
|
|
|
|
rorxl $25,%r8d,%r13d
|
|
|
|
rorxl $11,%r8d,%r15d
|
|
|
|
leal (%rax,%r14,1),%eax
|
|
|
|
leal (%r11,%r12,1),%r11d
|
|
|
|
andnl %r10d,%r8d,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%r8d,%r14d
|
|
|
|
leal (%r11,%r12,1),%r11d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %eax,%r15d
|
|
|
|
rorxl $22,%eax,%r12d
|
|
|
|
leal (%r11,%r13,1),%r11d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
rorxl $13,%eax,%r14d
|
|
|
|
rorxl $2,%eax,%r13d
|
|
|
|
leal (%rdx,%r11,1),%edx
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r11,%rdi,1),%r11d
|
|
|
|
movl %r8d,%r12d
|
|
|
|
addl 4+64(%rsp),%r10d
|
|
|
|
andl %edx,%r12d
|
|
|
|
rorxl $25,%edx,%r13d
|
|
|
|
rorxl $11,%edx,%edi
|
|
|
|
leal (%r11,%r14,1),%r11d
|
|
|
|
leal (%r10,%r12,1),%r10d
|
|
|
|
andnl %r9d,%edx,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%edx,%r14d
|
|
|
|
leal (%r10,%r12,1),%r10d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r11d,%edi
|
|
|
|
rorxl $22,%r11d,%r12d
|
|
|
|
leal (%r10,%r13,1),%r10d
|
|
|
|
xorl %eax,%edi
|
|
|
|
rorxl $13,%r11d,%r14d
|
|
|
|
rorxl $2,%r11d,%r13d
|
|
|
|
leal (%rcx,%r10,1),%ecx
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r10,%r15,1),%r10d
|
|
|
|
movl %edx,%r12d
|
|
|
|
addl 8+64(%rsp),%r9d
|
|
|
|
andl %ecx,%r12d
|
|
|
|
rorxl $25,%ecx,%r13d
|
|
|
|
rorxl $11,%ecx,%r15d
|
|
|
|
leal (%r10,%r14,1),%r10d
|
|
|
|
leal (%r9,%r12,1),%r9d
|
|
|
|
andnl %r8d,%ecx,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%ecx,%r14d
|
|
|
|
leal (%r9,%r12,1),%r9d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
rorxl $22,%r10d,%r12d
|
|
|
|
leal (%r9,%r13,1),%r9d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
rorxl $13,%r10d,%r14d
|
|
|
|
rorxl $2,%r10d,%r13d
|
|
|
|
leal (%rbx,%r9,1),%ebx
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r9,%rdi,1),%r9d
|
|
|
|
movl %ecx,%r12d
|
|
|
|
addl 12+64(%rsp),%r8d
|
|
|
|
andl %ebx,%r12d
|
|
|
|
rorxl $25,%ebx,%r13d
|
|
|
|
rorxl $11,%ebx,%edi
|
|
|
|
leal (%r9,%r14,1),%r9d
|
|
|
|
leal (%r8,%r12,1),%r8d
|
|
|
|
andnl %edx,%ebx,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%ebx,%r14d
|
|
|
|
leal (%r8,%r12,1),%r8d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r9d,%edi
|
|
|
|
rorxl $22,%r9d,%r12d
|
|
|
|
leal (%r8,%r13,1),%r8d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
rorxl $13,%r9d,%r14d
|
|
|
|
rorxl $2,%r9d,%r13d
|
|
|
|
leal (%rax,%r8,1),%eax
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r8,%r15,1),%r8d
|
|
|
|
movl %ebx,%r12d
|
|
|
|
addl 32+64(%rsp),%edx
|
|
|
|
andl %eax,%r12d
|
|
|
|
rorxl $25,%eax,%r13d
|
|
|
|
rorxl $11,%eax,%r15d
|
|
|
|
leal (%r8,%r14,1),%r8d
|
|
|
|
leal (%rdx,%r12,1),%edx
|
|
|
|
andnl %ecx,%eax,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%eax,%r14d
|
|
|
|
leal (%rdx,%r12,1),%edx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r8d,%r15d
|
|
|
|
rorxl $22,%r8d,%r12d
|
|
|
|
leal (%rdx,%r13,1),%edx
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
rorxl $13,%r8d,%r14d
|
|
|
|
rorxl $2,%r8d,%r13d
|
|
|
|
leal (%r11,%rdx,1),%r11d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r9d,%edi
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rdx,%rdi,1),%edx
|
|
|
|
movl %eax,%r12d
|
|
|
|
addl 36+64(%rsp),%ecx
|
|
|
|
andl %r11d,%r12d
|
|
|
|
rorxl $25,%r11d,%r13d
|
|
|
|
rorxl $11,%r11d,%edi
|
|
|
|
leal (%rdx,%r14,1),%edx
|
|
|
|
leal (%rcx,%r12,1),%ecx
|
|
|
|
andnl %ebx,%r11d,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%r11d,%r14d
|
|
|
|
leal (%rcx,%r12,1),%ecx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %edx,%edi
|
|
|
|
rorxl $22,%edx,%r12d
|
|
|
|
leal (%rcx,%r13,1),%ecx
|
|
|
|
xorl %r8d,%edi
|
|
|
|
rorxl $13,%edx,%r14d
|
|
|
|
rorxl $2,%edx,%r13d
|
|
|
|
leal (%r10,%rcx,1),%r10d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rcx,%r15,1),%ecx
|
|
|
|
movl %r11d,%r12d
|
|
|
|
addl 40+64(%rsp),%ebx
|
|
|
|
andl %r10d,%r12d
|
|
|
|
rorxl $25,%r10d,%r13d
|
|
|
|
rorxl $11,%r10d,%r15d
|
|
|
|
leal (%rcx,%r14,1),%ecx
|
|
|
|
leal (%rbx,%r12,1),%ebx
|
|
|
|
andnl %eax,%r10d,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%r10d,%r14d
|
|
|
|
leal (%rbx,%r12,1),%ebx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %ecx,%r15d
|
|
|
|
rorxl $22,%ecx,%r12d
|
|
|
|
leal (%rbx,%r13,1),%ebx
|
|
|
|
xorl %edx,%r15d
|
|
|
|
rorxl $13,%ecx,%r14d
|
|
|
|
rorxl $2,%ecx,%r13d
|
|
|
|
leal (%r9,%rbx,1),%r9d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %edx,%edi
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rbx,%rdi,1),%ebx
|
|
|
|
movl %r10d,%r12d
|
|
|
|
addl 44+64(%rsp),%eax
|
|
|
|
andl %r9d,%r12d
|
|
|
|
rorxl $25,%r9d,%r13d
|
|
|
|
rorxl $11,%r9d,%edi
|
|
|
|
leal (%rbx,%r14,1),%ebx
|
|
|
|
leal (%rax,%r12,1),%eax
|
|
|
|
andnl %r11d,%r9d,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%r9d,%r14d
|
|
|
|
leal (%rax,%r12,1),%eax
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %ebx,%edi
|
|
|
|
rorxl $22,%ebx,%r12d
|
|
|
|
leal (%rax,%r13,1),%eax
|
|
|
|
xorl %ecx,%edi
|
|
|
|
rorxl $13,%ebx,%r14d
|
|
|
|
rorxl $2,%ebx,%r13d
|
|
|
|
leal (%r8,%rax,1),%r8d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rax,%r15,1),%eax
|
|
|
|
movl %r9d,%r12d
|
|
|
|
addl 0(%rsp),%r11d
|
|
|
|
andl %r8d,%r12d
|
|
|
|
rorxl $25,%r8d,%r13d
|
|
|
|
rorxl $11,%r8d,%r15d
|
|
|
|
leal (%rax,%r14,1),%eax
|
|
|
|
leal (%r11,%r12,1),%r11d
|
|
|
|
andnl %r10d,%r8d,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%r8d,%r14d
|
|
|
|
leal (%r11,%r12,1),%r11d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %eax,%r15d
|
|
|
|
rorxl $22,%eax,%r12d
|
|
|
|
leal (%r11,%r13,1),%r11d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
rorxl $13,%eax,%r14d
|
|
|
|
rorxl $2,%eax,%r13d
|
|
|
|
leal (%rdx,%r11,1),%edx
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r11,%rdi,1),%r11d
|
|
|
|
movl %r8d,%r12d
|
|
|
|
addl 4(%rsp),%r10d
|
|
|
|
andl %edx,%r12d
|
|
|
|
rorxl $25,%edx,%r13d
|
|
|
|
rorxl $11,%edx,%edi
|
|
|
|
leal (%r11,%r14,1),%r11d
|
|
|
|
leal (%r10,%r12,1),%r10d
|
|
|
|
andnl %r9d,%edx,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%edx,%r14d
|
|
|
|
leal (%r10,%r12,1),%r10d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r11d,%edi
|
|
|
|
rorxl $22,%r11d,%r12d
|
|
|
|
leal (%r10,%r13,1),%r10d
|
|
|
|
xorl %eax,%edi
|
|
|
|
rorxl $13,%r11d,%r14d
|
|
|
|
rorxl $2,%r11d,%r13d
|
|
|
|
leal (%rcx,%r10,1),%ecx
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r10,%r15,1),%r10d
|
|
|
|
movl %edx,%r12d
|
|
|
|
addl 8(%rsp),%r9d
|
|
|
|
andl %ecx,%r12d
|
|
|
|
rorxl $25,%ecx,%r13d
|
|
|
|
rorxl $11,%ecx,%r15d
|
|
|
|
leal (%r10,%r14,1),%r10d
|
|
|
|
leal (%r9,%r12,1),%r9d
|
|
|
|
andnl %r8d,%ecx,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%ecx,%r14d
|
|
|
|
leal (%r9,%r12,1),%r9d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
rorxl $22,%r10d,%r12d
|
|
|
|
leal (%r9,%r13,1),%r9d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
rorxl $13,%r10d,%r14d
|
|
|
|
rorxl $2,%r10d,%r13d
|
|
|
|
leal (%rbx,%r9,1),%ebx
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r9,%rdi,1),%r9d
|
|
|
|
movl %ecx,%r12d
|
|
|
|
addl 12(%rsp),%r8d
|
|
|
|
andl %ebx,%r12d
|
|
|
|
rorxl $25,%ebx,%r13d
|
|
|
|
rorxl $11,%ebx,%edi
|
|
|
|
leal (%r9,%r14,1),%r9d
|
|
|
|
leal (%r8,%r12,1),%r8d
|
|
|
|
andnl %edx,%ebx,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%ebx,%r14d
|
|
|
|
leal (%r8,%r12,1),%r8d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r9d,%edi
|
|
|
|
rorxl $22,%r9d,%r12d
|
|
|
|
leal (%r8,%r13,1),%r8d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
rorxl $13,%r9d,%r14d
|
|
|
|
rorxl $2,%r9d,%r13d
|
|
|
|
leal (%rax,%r8,1),%eax
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r8,%r15,1),%r8d
|
|
|
|
movl %ebx,%r12d
|
|
|
|
addl 32(%rsp),%edx
|
|
|
|
andl %eax,%r12d
|
|
|
|
rorxl $25,%eax,%r13d
|
|
|
|
rorxl $11,%eax,%r15d
|
|
|
|
leal (%r8,%r14,1),%r8d
|
|
|
|
leal (%rdx,%r12,1),%edx
|
|
|
|
andnl %ecx,%eax,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%eax,%r14d
|
|
|
|
leal (%rdx,%r12,1),%edx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r8d,%r15d
|
|
|
|
rorxl $22,%r8d,%r12d
|
|
|
|
leal (%rdx,%r13,1),%edx
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
rorxl $13,%r8d,%r14d
|
|
|
|
rorxl $2,%r8d,%r13d
|
|
|
|
leal (%r11,%rdx,1),%r11d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r9d,%edi
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rdx,%rdi,1),%edx
|
|
|
|
movl %eax,%r12d
|
|
|
|
addl 36(%rsp),%ecx
|
|
|
|
andl %r11d,%r12d
|
|
|
|
rorxl $25,%r11d,%r13d
|
|
|
|
rorxl $11,%r11d,%edi
|
|
|
|
leal (%rdx,%r14,1),%edx
|
|
|
|
leal (%rcx,%r12,1),%ecx
|
|
|
|
andnl %ebx,%r11d,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%r11d,%r14d
|
|
|
|
leal (%rcx,%r12,1),%ecx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %edx,%edi
|
|
|
|
rorxl $22,%edx,%r12d
|
|
|
|
leal (%rcx,%r13,1),%ecx
|
|
|
|
xorl %r8d,%edi
|
|
|
|
rorxl $13,%edx,%r14d
|
|
|
|
rorxl $2,%edx,%r13d
|
|
|
|
leal (%r10,%rcx,1),%r10d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rcx,%r15,1),%ecx
|
|
|
|
movl %r11d,%r12d
|
|
|
|
addl 40(%rsp),%ebx
|
|
|
|
andl %r10d,%r12d
|
|
|
|
rorxl $25,%r10d,%r13d
|
|
|
|
rorxl $11,%r10d,%r15d
|
|
|
|
leal (%rcx,%r14,1),%ecx
|
|
|
|
leal (%rbx,%r12,1),%ebx
|
|
|
|
andnl %eax,%r10d,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%r10d,%r14d
|
|
|
|
leal (%rbx,%r12,1),%ebx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %ecx,%r15d
|
|
|
|
rorxl $22,%ecx,%r12d
|
|
|
|
leal (%rbx,%r13,1),%ebx
|
|
|
|
xorl %edx,%r15d
|
|
|
|
rorxl $13,%ecx,%r14d
|
|
|
|
rorxl $2,%ecx,%r13d
|
|
|
|
leal (%r9,%rbx,1),%r9d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %edx,%edi
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rbx,%rdi,1),%ebx
|
|
|
|
movl %r10d,%r12d
|
|
|
|
addl 44(%rsp),%eax
|
|
|
|
andl %r9d,%r12d
|
|
|
|
rorxl $25,%r9d,%r13d
|
|
|
|
rorxl $11,%r9d,%edi
|
|
|
|
leal (%rbx,%r14,1),%ebx
|
|
|
|
leal (%rax,%r12,1),%eax
|
|
|
|
andnl %r11d,%r9d,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%r9d,%r14d
|
|
|
|
leal (%rax,%r12,1),%eax
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %ebx,%edi
|
|
|
|
rorxl $22,%ebx,%r12d
|
|
|
|
leal (%rax,%r13,1),%eax
|
|
|
|
xorl %ecx,%edi
|
|
|
|
rorxl $13,%ebx,%r14d
|
|
|
|
rorxl $2,%ebx,%r13d
|
|
|
|
leal (%r8,%rax,1),%r8d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rax,%r15,1),%eax
|
|
|
|
movl %r9d,%r12d
|
|
|
|
movq 512(%rsp),%rdi
|
|
|
|
addl %r14d,%eax
|
|
|
|
|
|
|
|
leaq 448(%rsp),%rbp
|
|
|
|
|
|
|
|
addl 0(%rdi),%eax
|
|
|
|
addl 4(%rdi),%ebx
|
|
|
|
addl 8(%rdi),%ecx
|
|
|
|
addl 12(%rdi),%edx
|
|
|
|
addl 16(%rdi),%r8d
|
|
|
|
addl 20(%rdi),%r9d
|
|
|
|
addl 24(%rdi),%r10d
|
|
|
|
addl 28(%rdi),%r11d
|
|
|
|
|
|
|
|
movl %eax,0(%rdi)
|
|
|
|
movl %ebx,4(%rdi)
|
|
|
|
movl %ecx,8(%rdi)
|
|
|
|
movl %edx,12(%rdi)
|
|
|
|
movl %r8d,16(%rdi)
|
|
|
|
movl %r9d,20(%rdi)
|
|
|
|
movl %r10d,24(%rdi)
|
|
|
|
movl %r11d,28(%rdi)
|
|
|
|
|
|
|
|
cmpq 80(%rbp),%rsi
|
|
|
|
je .Ldone_avx2
|
|
|
|
|
|
|
|
xorl %r14d,%r14d
|
|
|
|
movl %ebx,%edi
|
|
|
|
xorl %ecx,%edi
|
|
|
|
movl %r9d,%r12d
|
|
|
|
jmp .Lower_avx2
|
2023-03-06 23:24:05 +00:00
|
|
|
.balign 16
|
Add generic implementation handling and SHA2 impl
The skeleton file module/icp/include/generic_impl.c can be used for
iterating over different implementations of algorithms.
It is used by SHA256, SHA512 and BLAKE3 currently.
The Solaris SHA2 implementation got replaced with a version which is
based on public domain code of cppcrypto v0.10.
These assembly files are taken from current openssl master:
- sha256-x86_64.S: x64, SSSE3, AVX, AVX2, SHA-NI (x86_64)
- sha512-x86_64.S: x64, AVX, AVX2 (x86_64)
- sha256-armv7.S: ARMv7, NEON, ARMv8-CE (arm)
- sha512-armv7.S: ARMv7, NEON (arm)
- sha256-armv8.S: ARMv7, NEON, ARMv8-CE (aarch64)
- sha512-armv8.S: ARMv7, ARMv8-CE (aarch64)
- sha256-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha512-ppc.S: Generic PPC64 LE/BE (ppc64)
- sha256-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
- sha512-p8.S: Power8 ISA Version 2.07 LE/BE (ppc64)
Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741
2023-03-01 08:40:28 +00:00
|
|
|
.Lower_avx2:
|
|
|
|
addl 0+16(%rbp),%r11d
|
|
|
|
andl %r8d,%r12d
|
|
|
|
rorxl $25,%r8d,%r13d
|
|
|
|
rorxl $11,%r8d,%r15d
|
|
|
|
leal (%rax,%r14,1),%eax
|
|
|
|
leal (%r11,%r12,1),%r11d
|
|
|
|
andnl %r10d,%r8d,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%r8d,%r14d
|
|
|
|
leal (%r11,%r12,1),%r11d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %eax,%r15d
|
|
|
|
rorxl $22,%eax,%r12d
|
|
|
|
leal (%r11,%r13,1),%r11d
|
|
|
|
xorl %ebx,%r15d
|
|
|
|
rorxl $13,%eax,%r14d
|
|
|
|
rorxl $2,%eax,%r13d
|
|
|
|
leal (%rdx,%r11,1),%edx
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %ebx,%edi
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r11,%rdi,1),%r11d
|
|
|
|
movl %r8d,%r12d
|
|
|
|
addl 4+16(%rbp),%r10d
|
|
|
|
andl %edx,%r12d
|
|
|
|
rorxl $25,%edx,%r13d
|
|
|
|
rorxl $11,%edx,%edi
|
|
|
|
leal (%r11,%r14,1),%r11d
|
|
|
|
leal (%r10,%r12,1),%r10d
|
|
|
|
andnl %r9d,%edx,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%edx,%r14d
|
|
|
|
leal (%r10,%r12,1),%r10d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r11d,%edi
|
|
|
|
rorxl $22,%r11d,%r12d
|
|
|
|
leal (%r10,%r13,1),%r10d
|
|
|
|
xorl %eax,%edi
|
|
|
|
rorxl $13,%r11d,%r14d
|
|
|
|
rorxl $2,%r11d,%r13d
|
|
|
|
leal (%rcx,%r10,1),%ecx
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %eax,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r10,%r15,1),%r10d
|
|
|
|
movl %edx,%r12d
|
|
|
|
addl 8+16(%rbp),%r9d
|
|
|
|
andl %ecx,%r12d
|
|
|
|
rorxl $25,%ecx,%r13d
|
|
|
|
rorxl $11,%ecx,%r15d
|
|
|
|
leal (%r10,%r14,1),%r10d
|
|
|
|
leal (%r9,%r12,1),%r9d
|
|
|
|
andnl %r8d,%ecx,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%ecx,%r14d
|
|
|
|
leal (%r9,%r12,1),%r9d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r10d,%r15d
|
|
|
|
rorxl $22,%r10d,%r12d
|
|
|
|
leal (%r9,%r13,1),%r9d
|
|
|
|
xorl %r11d,%r15d
|
|
|
|
rorxl $13,%r10d,%r14d
|
|
|
|
rorxl $2,%r10d,%r13d
|
|
|
|
leal (%rbx,%r9,1),%ebx
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r11d,%edi
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r9,%rdi,1),%r9d
|
|
|
|
movl %ecx,%r12d
|
|
|
|
addl 12+16(%rbp),%r8d
|
|
|
|
andl %ebx,%r12d
|
|
|
|
rorxl $25,%ebx,%r13d
|
|
|
|
rorxl $11,%ebx,%edi
|
|
|
|
leal (%r9,%r14,1),%r9d
|
|
|
|
leal (%r8,%r12,1),%r8d
|
|
|
|
andnl %edx,%ebx,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%ebx,%r14d
|
|
|
|
leal (%r8,%r12,1),%r8d
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r9d,%edi
|
|
|
|
rorxl $22,%r9d,%r12d
|
|
|
|
leal (%r8,%r13,1),%r8d
|
|
|
|
xorl %r10d,%edi
|
|
|
|
rorxl $13,%r9d,%r14d
|
|
|
|
rorxl $2,%r9d,%r13d
|
|
|
|
leal (%rax,%r8,1),%eax
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r10d,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%r8,%r15,1),%r8d
|
|
|
|
movl %ebx,%r12d
|
|
|
|
addl 32+16(%rbp),%edx
|
|
|
|
andl %eax,%r12d
|
|
|
|
rorxl $25,%eax,%r13d
|
|
|
|
rorxl $11,%eax,%r15d
|
|
|
|
leal (%r8,%r14,1),%r8d
|
|
|
|
leal (%rdx,%r12,1),%edx
|
|
|
|
andnl %ecx,%eax,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%eax,%r14d
|
|
|
|
leal (%rdx,%r12,1),%edx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %r8d,%r15d
|
|
|
|
rorxl $22,%r8d,%r12d
|
|
|
|
leal (%rdx,%r13,1),%edx
|
|
|
|
xorl %r9d,%r15d
|
|
|
|
rorxl $13,%r8d,%r14d
|
|
|
|
rorxl $2,%r8d,%r13d
|
|
|
|
leal (%r11,%rdx,1),%r11d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r9d,%edi
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rdx,%rdi,1),%edx
|
|
|
|
movl %eax,%r12d
|
|
|
|
addl 36+16(%rbp),%ecx
|
|
|
|
andl %r11d,%r12d
|
|
|
|
rorxl $25,%r11d,%r13d
|
|
|
|
rorxl $11,%r11d,%edi
|
|
|
|
leal (%rdx,%r14,1),%edx
|
|
|
|
leal (%rcx,%r12,1),%ecx
|
|
|
|
andnl %ebx,%r11d,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%r11d,%r14d
|
|
|
|
leal (%rcx,%r12,1),%ecx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %edx,%edi
|
|
|
|
rorxl $22,%edx,%r12d
|
|
|
|
leal (%rcx,%r13,1),%ecx
|
|
|
|
xorl %r8d,%edi
|
|
|
|
rorxl $13,%edx,%r14d
|
|
|
|
rorxl $2,%edx,%r13d
|
|
|
|
leal (%r10,%rcx,1),%r10d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %r8d,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rcx,%r15,1),%ecx
|
|
|
|
movl %r11d,%r12d
|
|
|
|
addl 40+16(%rbp),%ebx
|
|
|
|
andl %r10d,%r12d
|
|
|
|
rorxl $25,%r10d,%r13d
|
|
|
|
rorxl $11,%r10d,%r15d
|
|
|
|
leal (%rcx,%r14,1),%ecx
|
|
|
|
leal (%rbx,%r12,1),%ebx
|
|
|
|
andnl %eax,%r10d,%r12d
|
|
|
|
xorl %r15d,%r13d
|
|
|
|
rorxl $6,%r10d,%r14d
|
|
|
|
leal (%rbx,%r12,1),%ebx
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %ecx,%r15d
|
|
|
|
rorxl $22,%ecx,%r12d
|
|
|
|
leal (%rbx,%r13,1),%ebx
|
|
|
|
xorl %edx,%r15d
|
|
|
|
rorxl $13,%ecx,%r14d
|
|
|
|
rorxl $2,%ecx,%r13d
|
|
|
|
leal (%r9,%rbx,1),%r9d
|
|
|
|
andl %r15d,%edi
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %edx,%edi
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rbx,%rdi,1),%ebx
|
|
|
|
movl %r10d,%r12d
|
|
|
|
addl 44+16(%rbp),%eax
|
|
|
|
andl %r9d,%r12d
|
|
|
|
rorxl $25,%r9d,%r13d
|
|
|
|
rorxl $11,%r9d,%edi
|
|
|
|
leal (%rbx,%r14,1),%ebx
|
|
|
|
leal (%rax,%r12,1),%eax
|
|
|
|
andnl %r11d,%r9d,%r12d
|
|
|
|
xorl %edi,%r13d
|
|
|
|
rorxl $6,%r9d,%r14d
|
|
|
|
leal (%rax,%r12,1),%eax
|
|
|
|
xorl %r14d,%r13d
|
|
|
|
movl %ebx,%edi
|
|
|
|
rorxl $22,%ebx,%r12d
|
|
|
|
leal (%rax,%r13,1),%eax
|
|
|
|
xorl %ecx,%edi
|
|
|
|
rorxl $13,%ebx,%r14d
|
|
|
|
rorxl $2,%ebx,%r13d
|
|
|
|
leal (%r8,%rax,1),%r8d
|
|
|
|
andl %edi,%r15d
|
|
|
|
xorl %r12d,%r14d
|
|
|
|
xorl %ecx,%r15d
|
|
|
|
xorl %r13d,%r14d
|
|
|
|
leal (%rax,%r15,1),%eax
|
|
|
|
movl %r9d,%r12d
|
|
|
|
leaq -64(%rbp),%rbp
|
|
|
|
cmpq %rsp,%rbp
|
|
|
|
jae .Lower_avx2
|
|
|
|
|
|
|
|
movq 512(%rsp),%rdi
|
|
|
|
addl %r14d,%eax
|
|
|
|
|
|
|
|
leaq 448(%rsp),%rsp
|
|
|
|
|
|
|
|
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
|
|
|
|
|
|
|
|
addl 0(%rdi),%eax
|
|
|
|
addl 4(%rdi),%ebx
|
|
|
|
addl 8(%rdi),%ecx
|
|
|
|
addl 12(%rdi),%edx
|
|
|
|
addl 16(%rdi),%r8d
|
|
|
|
addl 20(%rdi),%r9d
|
|
|
|
leaq 128(%rsi),%rsi
|
|
|
|
addl 24(%rdi),%r10d
|
|
|
|
movq %rsi,%r12
|
|
|
|
addl 28(%rdi),%r11d
|
|
|
|
cmpq 64+16(%rsp),%rsi
|
|
|
|
|
|
|
|
movl %eax,0(%rdi)
|
|
|
|
cmoveq %rsp,%r12
|
|
|
|
movl %ebx,4(%rdi)
|
|
|
|
movl %ecx,8(%rdi)
|
|
|
|
movl %edx,12(%rdi)
|
|
|
|
movl %r8d,16(%rdi)
|
|
|
|
movl %r9d,20(%rdi)
|
|
|
|
movl %r10d,24(%rdi)
|
|
|
|
movl %r11d,28(%rdi)
|
|
|
|
|
|
|
|
jbe .Loop_avx2
|
|
|
|
leaq (%rsp),%rbp
|
|
|
|
|
|
|
|
|
|
|
|
.cfi_escape 0x0f,0x06,0x76,0xd8,0x00,0x06,0x23,0x08
|
|
|
|
|
|
|
|
.Ldone_avx2:
|
|
|
|
movq 88(%rbp),%rsi
|
|
|
|
.cfi_def_cfa %rsi,8
|
|
|
|
vzeroupper
|
|
|
|
movq -48(%rsi),%r15
|
|
|
|
.cfi_restore %r15
|
|
|
|
movq -40(%rsi),%r14
|
|
|
|
.cfi_restore %r14
|
|
|
|
movq -32(%rsi),%r13
|
|
|
|
.cfi_restore %r13
|
|
|
|
movq -24(%rsi),%r12
|
|
|
|
.cfi_restore %r12
|
|
|
|
movq -16(%rsi),%rbp
|
|
|
|
.cfi_restore %rbp
|
|
|
|
movq -8(%rsi),%rbx
|
|
|
|
.cfi_restore %rbx
|
|
|
|
leaq (%rsi),%rsp
|
|
|
|
.cfi_def_cfa_register %rsp
|
|
|
|
.Lepilogue_avx2:
|
|
|
|
RET
|
|
|
|
.cfi_endproc
|
|
|
|
SET_SIZE(zfs_sha256_transform_avx2)
|
|
|
|
|
|
|
|
#if defined(__ELF__)
|
|
|
|
.section .note.GNU-stack,"",%progbits
|
|
|
|
#endif
|
|
|
|
#endif
|