diff --git a/include/os/freebsd/Makefile.am b/include/os/freebsd/Makefile.am
index 89d4ef564d..c1ad40e2bc 100644
--- a/include/os/freebsd/Makefile.am
+++ b/include/os/freebsd/Makefile.am
@@ -79,7 +79,6 @@ noinst_HEADERS = \
 	%D%/spl/sys/zone.h \
 	\
 	%D%/zfs/sys/freebsd_crypto.h \
-	%D%/zfs/sys/sha2.h \
 	%D%/zfs/sys/vdev_os.h \
 	%D%/zfs/sys/zfs_bootenv_os.h \
 	%D%/zfs/sys/zfs_context_os.h \
diff --git a/include/os/freebsd/zfs/sys/sha2.h b/include/os/freebsd/zfs/sys/sha2.h
deleted file mode 100644
index 1f520eba00..0000000000
--- a/include/os/freebsd/zfs/sys/sha2.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-/* Copyright 2013 Saso Kiselkov.  All rights reserved. */
-
-#ifndef _SYS_SHA2_H
-#define	_SYS_SHA2_H
-
-#include <sys/types.h>		/* for uint_* */
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-#define	SHA256_DIGEST_LENGTH	32	/* SHA256 digest length in bytes */
-#define	SHA384_DIGEST_LENGTH	48	/* SHA384 digest length in bytes */
-#define	SHA512_DIGEST_LENGTH	64	/* SHA512 digest length in bytes */
-
-/* Truncated versions of SHA-512 according to FIPS-180-4, section 5.3.6 */
-#define	SHA512_224_DIGEST_LENGTH	28	/* SHA512/224 digest length */
-#define	SHA512_256_DIGEST_LENGTH	32	/* SHA512/256 digest length */
-
-#define	SHA256_HMAC_BLOCK_SIZE	64	/* SHA256-HMAC block size */
-#define	SHA512_HMAC_BLOCK_SIZE	128	/* SHA512-HMAC block size */
-
-#define	SHA256			0
-#define	SHA256_HMAC		1
-#define	SHA256_HMAC_GEN		2
-#define	SHA384			3
-#define	SHA384_HMAC		4
-#define	SHA384_HMAC_GEN		5
-#define	SHA512			6
-#define	SHA512_HMAC		7
-#define	SHA512_HMAC_GEN		8
-#define	SHA512_224		9
-#define	SHA512_256		10
-
-/*
- * SHA2 context.
- * The contents of this structure are a private interface between the
- * Init/Update/Final calls of the functions defined below.
- * Callers must never attempt to read or write any of the fields
- * in this structure directly.
- */
-
-#include <crypto/sha2/sha256.h>
-#include <crypto/sha2/sha384.h>
-#include <crypto/sha2/sha512.h>
-#include <crypto/sha2/sha512t.h>
-typedef struct 	{
-	uint32_t algotype;		/* Algorithm Type */
-	union {
-		SHA256_CTX SHA256_ctx;
-		SHA384_CTX SHA384_ctx;
-		SHA512_CTX SHA512_ctx;
-	};
-} SHA2_CTX;
-
-extern void SHA256Init(SHA256_CTX *);
-
-extern void SHA256Update(SHA256_CTX *, const void *, size_t);
-
-extern void SHA256Final(void *, SHA256_CTX *);
-
-extern void SHA384Init(SHA384_CTX *);
-
-extern void SHA384Update(SHA384_CTX *, const void *, size_t);
-
-extern void SHA384Final(void *, SHA384_CTX *);
-
-extern void SHA512Init(SHA512_CTX *);
-
-extern void SHA512Update(SHA512_CTX *, const void *, size_t);
-
-extern void SHA512Final(void *, SHA512_CTX *);
-
-
-static inline void
-SHA2Init(uint64_t mech, SHA2_CTX *c)
-{
-	switch (mech) {
-		case SHA256:
-			SHA256_Init(&c->SHA256_ctx);
-			break;
-		case SHA384:
-			SHA384_Init(&c->SHA384_ctx);
-			break;
-		case SHA512:
-			SHA512_Init(&c->SHA512_ctx);
-			break;
-		case SHA512_256:
-			SHA512_256_Init(&c->SHA512_ctx);
-			break;
-		default:
-			panic("unknown mechanism %ju", (uintmax_t)mech);
-	}
-	c->algotype = (uint32_t)mech;
-}
-
-static inline void
-SHA2Update(SHA2_CTX *c, const void *p, size_t s)
-{
-	switch (c->algotype) {
-		case SHA256:
-			SHA256_Update(&c->SHA256_ctx, p, s);
-			break;
-		case SHA384:
-			SHA384_Update(&c->SHA384_ctx, p, s);
-			break;
-		case SHA512:
-			SHA512_Update(&c->SHA512_ctx, p, s);
-			break;
-		case SHA512_256:
-			SHA512_256_Update(&c->SHA512_ctx, p, s);
-			break;
-		default:
-			panic("unknown mechanism %d", c->algotype);
-	}
-}
-
-static inline void
-SHA2Final(void *p, SHA2_CTX *c)
-{
-	switch (c->algotype) {
-		case SHA256:
-			SHA256_Final(p, &c->SHA256_ctx);
-			break;
-		case SHA384:
-			SHA384_Final(p, &c->SHA384_ctx);
-			break;
-		case SHA512:
-			SHA512_Final(p, &c->SHA512_ctx);
-			break;
-		case SHA512_256:
-			SHA512_256_Final(p, &c->SHA512_ctx);
-			break;
-		default:
-			panic("unknown mechanism %d", c->algotype);
-	}
-}
-
-#ifdef _SHA2_IMPL
-/*
- * The following types/functions are all private to the implementation
- * of the SHA2 functions and must not be used by consumers of the interface
- */
-
-/*
- * List of support mechanisms in this module.
- *
- * It is important to note that in the module, division or modulus calculations
- * are used on the enumerated type to determine which mechanism is being used;
- * therefore, changing the order or additional mechanisms should be done
- * carefully
- */
-typedef enum sha2_mech_type {
-	SHA256_MECH_INFO_TYPE,		/* SUN_CKM_SHA256 */
-	SHA256_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC */
-	SHA256_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC_GENERAL */
-	SHA384_MECH_INFO_TYPE,		/* SUN_CKM_SHA384 */
-	SHA384_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC */
-	SHA384_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC_GENERAL */
-	SHA512_MECH_INFO_TYPE,		/* SUN_CKM_SHA512 */
-	SHA512_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC */
-	SHA512_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC_GENERAL */
-	SHA512_224_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_224 */
-	SHA512_256_MECH_INFO_TYPE	/* SUN_CKM_SHA512_256 */
-} sha2_mech_type_t;
-
-#endif /* _SHA2_IMPL */
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA2_H */
diff --git a/include/os/linux/Makefile.am b/include/os/linux/Makefile.am
index e20702d332..e821e075de 100644
--- a/include/os/linux/Makefile.am
+++ b/include/os/linux/Makefile.am
@@ -19,7 +19,6 @@ kernel_linux_HEADERS = \
 kernel_sysdir = $(kerneldir)/sys
 kernel_sys_HEADERS = \
 	%D%/zfs/sys/policy.h \
-	%D%/zfs/sys/sha2.h \
 	%D%/zfs/sys/trace_acl.h \
 	%D%/zfs/sys/trace_arc.h \
 	%D%/zfs/sys/trace_common.h \
diff --git a/include/os/linux/zfs/sys/sha2.h b/include/os/linux/zfs/sys/sha2.h
deleted file mode 100644
index ef37139dd4..0000000000
--- a/include/os/linux/zfs/sys/sha2.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-/* Copyright 2013 Saso Kiselkov.  All rights reserved. */
-
-#ifndef _SYS_SHA2_H
-#define	_SYS_SHA2_H
-
-#include <sys/types.h>		/* for uint_* */
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-#define	SHA256_DIGEST_LENGTH	32	/* SHA256 digest length in bytes */
-#define	SHA384_DIGEST_LENGTH	48	/* SHA384 digest length in bytes */
-#define	SHA512_DIGEST_LENGTH	64	/* SHA512 digest length in bytes */
-
-/* Truncated versions of SHA-512 according to FIPS-180-4, section 5.3.6 */
-#define	SHA512_224_DIGEST_LENGTH	28	/* SHA512/224 digest length */
-#define	SHA512_256_DIGEST_LENGTH	32	/* SHA512/256 digest length */
-
-#define	SHA256_HMAC_BLOCK_SIZE	64	/* SHA256-HMAC block size */
-#define	SHA512_HMAC_BLOCK_SIZE	128	/* SHA512-HMAC block size */
-
-#define	SHA256			0
-#define	SHA256_HMAC		1
-#define	SHA256_HMAC_GEN		2
-#define	SHA384			3
-#define	SHA384_HMAC		4
-#define	SHA384_HMAC_GEN		5
-#define	SHA512			6
-#define	SHA512_HMAC		7
-#define	SHA512_HMAC_GEN		8
-#define	SHA512_224		9
-#define	SHA512_256		10
-
-/*
- * SHA2 context.
- * The contents of this structure are a private interface between the
- * Init/Update/Final calls of the functions defined below.
- * Callers must never attempt to read or write any of the fields
- * in this structure directly.
- */
-typedef struct 	{
-	uint32_t algotype;		/* Algorithm Type */
-
-	/* state (ABCDEFGH) */
-	union {
-		uint32_t s32[8];	/* for SHA256 */
-		uint64_t s64[8];	/* for SHA384/512 */
-	} state;
-	/* number of bits */
-	union {
-		uint32_t c32[2];	/* for SHA256 , modulo 2^64 */
-		uint64_t c64[2];	/* for SHA384/512, modulo 2^128 */
-	} count;
-	union {
-		uint8_t		buf8[128];	/* undigested input */
-		uint32_t	buf32[32];	/* realigned input */
-		uint64_t	buf64[16];	/* realigned input */
-	} buf_un;
-} SHA2_CTX;
-
-typedef SHA2_CTX SHA256_CTX;
-typedef SHA2_CTX SHA384_CTX;
-typedef SHA2_CTX SHA512_CTX;
-
-extern void SHA2Init(uint64_t mech, SHA2_CTX *);
-
-extern void SHA2Update(SHA2_CTX *, const void *, size_t);
-
-extern void SHA2Final(void *, SHA2_CTX *);
-
-extern void SHA256Init(SHA256_CTX *);
-
-extern void SHA256Update(SHA256_CTX *, const void *, size_t);
-
-extern void SHA256Final(void *, SHA256_CTX *);
-
-extern void SHA384Init(SHA384_CTX *);
-
-extern void SHA384Update(SHA384_CTX *, const void *, size_t);
-
-extern void SHA384Final(void *, SHA384_CTX *);
-
-extern void SHA512Init(SHA512_CTX *);
-
-extern void SHA512Update(SHA512_CTX *, const void *, size_t);
-
-extern void SHA512Final(void *, SHA512_CTX *);
-
-#ifdef _SHA2_IMPL
-/*
- * The following types/functions are all private to the implementation
- * of the SHA2 functions and must not be used by consumers of the interface
- */
-
-/*
- * List of support mechanisms in this module.
- *
- * It is important to note that in the module, division or modulus calculations
- * are used on the enumerated type to determine which mechanism is being used;
- * therefore, changing the order or additional mechanisms should be done
- * carefully
- */
-typedef enum sha2_mech_type {
-	SHA256_MECH_INFO_TYPE,		/* SUN_CKM_SHA256 */
-	SHA256_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC */
-	SHA256_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC_GENERAL */
-	SHA384_MECH_INFO_TYPE,		/* SUN_CKM_SHA384 */
-	SHA384_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC */
-	SHA384_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC_GENERAL */
-	SHA512_MECH_INFO_TYPE,		/* SUN_CKM_SHA512 */
-	SHA512_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC */
-	SHA512_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC_GENERAL */
-	SHA512_224_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_224 */
-	SHA512_256_MECH_INFO_TYPE	/* SUN_CKM_SHA512_256 */
-} sha2_mech_type_t;
-
-#endif /* _SHA2_IMPL */
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA2_H */
diff --git a/lib/libspl/include/sys/sha2.h b/lib/libspl/include/sys/sha2.h
deleted file mode 100644
index 40db1a678c..0000000000
--- a/lib/libspl/include/sys/sha2.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-/* Copyright 2013 Saso Kiselkov.  All rights reserved. */
-
-#ifndef _SYS_SHA2_H
-#define	_SYS_SHA2_H
-
-#include <stdint.h>
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-#define	SHA256_DIGEST_LENGTH	32	/* SHA256 digest length in bytes */
-#define	SHA384_DIGEST_LENGTH	48	/* SHA384 digest length in bytes */
-#define	SHA512_DIGEST_LENGTH	64	/* SHA512 digest length in bytes */
-
-/* Truncated versions of SHA-512 according to FIPS-180-4, section 5.3.6 */
-#define	SHA512_224_DIGEST_LENGTH	28	/* SHA512/224 digest length */
-#define	SHA512_256_DIGEST_LENGTH	32	/* SHA512/256 digest length */
-
-#define	SHA256_HMAC_BLOCK_SIZE	64	/* SHA256-HMAC block size */
-#define	SHA512_HMAC_BLOCK_SIZE	128	/* SHA512-HMAC block size */
-
-#define	SHA256			0
-#define	SHA256_HMAC		1
-#define	SHA256_HMAC_GEN		2
-#define	SHA384			3
-#define	SHA384_HMAC		4
-#define	SHA384_HMAC_GEN		5
-#define	SHA512			6
-#define	SHA512_HMAC		7
-#define	SHA512_HMAC_GEN		8
-#define	SHA512_224		9
-#define	SHA512_256		10
-
-/*
- * SHA2 context.
- * The contents of this structure are a private interface between the
- * Init/Update/Final calls of the functions defined below.
- * Callers must never attempt to read or write any of the fields
- * in this structure directly.
- */
-typedef struct 	{
-	uint32_t algotype;		/* Algorithm Type */
-
-	/* state (ABCDEFGH) */
-	union {
-		uint32_t s32[8];	/* for SHA256 */
-		uint64_t s64[8];	/* for SHA384/512 */
-	} state;
-	/* number of bits */
-	union {
-		uint32_t c32[2];	/* for SHA256 , modulo 2^64 */
-		uint64_t c64[2];	/* for SHA384/512, modulo 2^128 */
-	} count;
-	union {
-		uint8_t		buf8[128];	/* undigested input */
-		uint32_t	buf32[32];	/* realigned input */
-		uint64_t	buf64[16];	/* realigned input */
-	} buf_un;
-} SHA2_CTX;
-
-typedef SHA2_CTX SHA256_CTX;
-typedef SHA2_CTX SHA384_CTX;
-typedef SHA2_CTX SHA512_CTX;
-
-extern void SHA256Init(SHA256_CTX *);
-
-extern void SHA256Update(SHA256_CTX *, const void *, size_t);
-
-extern void SHA256Final(void *, SHA256_CTX *);
-
-extern void SHA384Init(SHA384_CTX *);
-
-extern void SHA384Update(SHA384_CTX *, const void *, size_t);
-
-extern void SHA384Final(void *, SHA384_CTX *);
-
-extern void SHA512Init(SHA512_CTX *);
-
-extern void SHA512Update(SHA512_CTX *, const void *, size_t);
-
-extern void SHA512Final(void *, SHA512_CTX *);
-
-extern void SHA2Init(uint64_t mech, SHA2_CTX *);
-
-extern void SHA2Update(SHA2_CTX *, const void *, size_t);
-
-extern void SHA2Final(void *, SHA2_CTX *);
-
-#ifdef _SHA2_IMPL
-/*
- * The following types/functions are all private to the implementation
- * of the SHA2 functions and must not be used by consumers of the interface
- */
-
-/*
- * List of support mechanisms in this module.
- *
- * It is important to note that in the module, division or modulus calculations
- * are used on the enumerated type to determine which mechanism is being used;
- * therefore, changing the order or additional mechanisms should be done
- * carefully
- */
-typedef enum sha2_mech_type {
-	SHA256_MECH_INFO_TYPE,		/* SUN_CKM_SHA256 */
-	SHA256_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC */
-	SHA256_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC_GENERAL */
-	SHA384_MECH_INFO_TYPE,		/* SUN_CKM_SHA384 */
-	SHA384_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC */
-	SHA384_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC_GENERAL */
-	SHA512_MECH_INFO_TYPE,		/* SUN_CKM_SHA512 */
-	SHA512_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC */
-	SHA512_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC_GENERAL */
-	SHA512_224_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_224 */
-	SHA512_256_MECH_INFO_TYPE	/* SUN_CKM_SHA512_256 */
-} sha2_mech_type_t;
-
-#endif /* _SHA2_IMPL */
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA2_H */
diff --git a/module/Kbuild.in b/module/Kbuild.in
index a1ea08cd43..6b1c9c48b1 100644
--- a/module/Kbuild.in
+++ b/module/Kbuild.in
@@ -94,7 +94,6 @@ ICP_OBJS := \
 	algs/modes/gcm.o \
 	algs/modes/gcm_generic.o \
 	algs/modes/modes.o \
-	algs/sha2/sha2.o \
 	algs/skein/skein.o \
 	algs/skein/skein_block.o \
 	algs/skein/skein_iv.o \
@@ -122,9 +121,7 @@ ICP_OBJS_X86_64 := \
 	asm-x86_64/blake3/blake3_sse41.o \
 	asm-x86_64/modes/aesni-gcm-x86_64.o \
 	asm-x86_64/modes/gcm_pclmulqdq.o \
-	asm-x86_64/modes/ghash-x86_64.o \
-	asm-x86_64/sha2/sha256_impl.o \
-	asm-x86_64/sha2/sha512_impl.o
+	asm-x86_64/modes/ghash-x86_64.o
 
 
 ICP_OBJS_X86 := \
@@ -159,13 +156,6 @@ $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
 # Suppress objtool "return with modified stack frame" warnings.
 OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y
 
-# Suppress objtool "unsupported stack pointer realignment" warnings. We are
-# not using a DRAP register while aligning the stack to a 64 byte boundary.
-# See #6950 for the reasoning.
-OBJECT_FILES_NON_STANDARD_sha256_impl.o := y
-OBJECT_FILES_NON_STANDARD_sha512_impl.o := y
-
-
 LUA_OBJS := \
 	lapi.o \
 	lauxlib.o \
@@ -344,7 +334,7 @@ ZFS_OBJS := \
 	refcount.o \
 	rrwlock.o \
 	sa.o \
-	sha256.o \
+	sha2_zfs.o \
 	skein_zfs.o \
 	spa.o \
 	spa_checkpoint.o \
diff --git a/module/Makefile.bsd b/module/Makefile.bsd
index 999dc90ff5..1663dcec63 100644
--- a/module/Makefile.bsd
+++ b/module/Makefile.bsd
@@ -141,8 +141,6 @@ SRCS+=	nvpair.c \
 SRCS+=	acl_common.c \
 	callb.c \
 	list.c \
-	sha256c.c \
-	sha512c.c \
 	spl_acl.c \
 	spl_cmn_err.c \
 	spl_dtrace.c \
@@ -268,7 +266,7 @@ SRCS+=	abd.c \
 	refcount.c \
 	rrwlock.c \
 	sa.c \
-	sha256.c \
+	sha2_zfs.c \
 	skein_zfs.c \
 	spa.c \
 	spa_checkpoint.c \
diff --git a/module/icp/algs/sha2/sha2.c b/module/icp/algs/sha2/sha2.c
deleted file mode 100644
index e6bbe34eaa..0000000000
--- a/module/icp/algs/sha2/sha2.c
+++ /dev/null
@@ -1,957 +0,0 @@
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Copyright 2013 Saso Kiselkov.  All rights reserved.
- */
-
-/*
- * The basic framework for this code came from the reference
- * implementation for MD5.  That implementation is Copyright (C)
- * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
- *
- * License to copy and use this software is granted provided that it
- * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
- * Algorithm" in all material mentioning or referencing this software
- * or this function.
- *
- * License is also granted to make and use derivative works provided
- * that such works are identified as "derived from the RSA Data
- * Security, Inc. MD5 Message-Digest Algorithm" in all material
- * mentioning or referencing the derived work.
- *
- * RSA Data Security, Inc. makes no representations concerning either
- * the merchantability of this software or the suitability of this
- * software for any particular purpose. It is provided "as is"
- * without express or implied warranty of any kind.
- *
- * These notices must be retained in any copies of any part of this
- * documentation and/or software.
- *
- * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
- * standard, available at
- * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
- * Not as fast as one would like -- further optimizations are encouraged
- * and appreciated.
- */
-
-#include <sys/zfs_context.h>
-#define	_SHA2_IMPL
-#include <sys/sha2.h>
-#include <sha2/sha2_consts.h>
-
-#define	_RESTRICT_KYWD
-
-#ifdef _ZFS_LITTLE_ENDIAN
-#include <sys/byteorder.h>
-#define	HAVE_HTONL
-#endif
-#include <sys/isa_defs.h>	/* for _ILP32 */
-#include <sys/asm_linkage.h>
-
-static void Encode(uint8_t *, uint32_t *, size_t);
-static void Encode64(uint8_t *, uint64_t *, size_t);
-
-/* userspace only supports the generic version */
-#if	defined(__amd64) && defined(_KERNEL)
-#define	SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
-#define	SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
-
-void ASMABI SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
-void ASMABI SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
-
-#else
-static void SHA256Transform(SHA2_CTX *, const uint8_t *);
-static void SHA512Transform(SHA2_CTX *, const uint8_t *);
-#endif	/* __amd64 && _KERNEL */
-
-static const uint8_t PADDING[128] = { 0x80, /* all zeros */ };
-
-/*
- * The low-level checksum routines use a lot of stack space. On systems where
- * small stacks are enforced (like 32-bit kernel builds), insert compiler memory
- * barriers to reduce stack frame size. This can reduce the SHA512Transform()
- * stack frame usage from 3k to <1k on ARM32, for example.
- */
-#if defined(_ILP32) || defined(__powerpc)	/* small stack */
-#define	SMALL_STACK_MEMORY_BARRIER	asm volatile("": : :"memory");
-#else
-#define	SMALL_STACK_MEMORY_BARRIER
-#endif
-
-/* Ch and Maj are the basic SHA2 functions. */
-#define	Ch(b, c, d)	(((b) & (c)) ^ ((~b) & (d)))
-#define	Maj(b, c, d)	(((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
-
-/* Rotates x right n bits. */
-#define	ROTR(x, n)	\
-	(((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
-
-/* Shift x right n bits */
-#define	SHR(x, n)	((x) >> (n))
-
-/* SHA256 Functions */
-#define	BIGSIGMA0_256(x)	(ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
-#define	BIGSIGMA1_256(x)	(ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
-#define	SIGMA0_256(x)		(ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
-#define	SIGMA1_256(x)		(ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
-
-#define	SHA256ROUND(a, b, c, d, e, f, g, h, i, w)			\
-	T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w;	\
-	d += T1;							\
-	T2 = BIGSIGMA0_256(a) + Maj(a, b, c);				\
-	h = T1 + T2
-
-/* SHA384/512 Functions */
-#define	BIGSIGMA0(x)	(ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
-#define	BIGSIGMA1(x)	(ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
-#define	SIGMA0(x)	(ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
-#define	SIGMA1(x)	(ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
-#define	SHA512ROUND(a, b, c, d, e, f, g, h, i, w)			\
-	T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w;	\
-	d += T1;							\
-	T2 = BIGSIGMA0(a) + Maj(a, b, c);				\
-	h = T1 + T2;							\
-	SMALL_STACK_MEMORY_BARRIER;
-
-/*
- * sparc optimization:
- *
- * on the sparc, we can load big endian 32-bit data easily.  note that
- * special care must be taken to ensure the address is 32-bit aligned.
- * in the interest of speed, we don't check to make sure, since
- * careful programming can guarantee this for us.
- */
-
-#if	defined(_ZFS_BIG_ENDIAN)
-#define	LOAD_BIG_32(addr)	(*(uint32_t *)(addr))
-#define	LOAD_BIG_64(addr)	(*(uint64_t *)(addr))
-
-#elif	defined(HAVE_HTONL)
-#define	LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
-#define	LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr)))
-
-#else
-/* little endian -- will work on big endian, but slowly */
-#define	LOAD_BIG_32(addr)	\
-	(((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
-#define	LOAD_BIG_64(addr)	\
-	(((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) |	\
-	    ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) |	\
-	    ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) |	\
-	    ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
-#endif	/* _BIG_ENDIAN */
-
-
-#if	!defined(__amd64) || !defined(_KERNEL)
-/* SHA256 Transform */
-
-static void
-SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
-{
-	uint32_t a = ctx->state.s32[0];
-	uint32_t b = ctx->state.s32[1];
-	uint32_t c = ctx->state.s32[2];
-	uint32_t d = ctx->state.s32[3];
-	uint32_t e = ctx->state.s32[4];
-	uint32_t f = ctx->state.s32[5];
-	uint32_t g = ctx->state.s32[6];
-	uint32_t h = ctx->state.s32[7];
-
-	uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
-	uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
-	uint32_t T1, T2;
-
-#if	defined(__sparc)
-	static const uint32_t sha256_consts[] = {
-		SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
-		SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
-		SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
-		SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
-		SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
-		SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
-		SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
-		SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
-		SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
-		SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
-		SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
-		SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
-		SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
-		SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
-		SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
-		SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
-		SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
-		SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
-		SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
-		SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
-		SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
-		SHA256_CONST_63
-	};
-#endif	/* __sparc */
-
-	if ((uintptr_t)blk & 0x3) {		/* not 4-byte aligned? */
-		memcpy(ctx->buf_un.buf32, blk, sizeof (ctx->buf_un.buf32));
-		blk = (uint8_t *)ctx->buf_un.buf32;
-	}
-
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w0 =  LOAD_BIG_32(blk + 4 * 0);
-	SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w1 =  LOAD_BIG_32(blk + 4 * 1);
-	SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w2 =  LOAD_BIG_32(blk + 4 * 2);
-	SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w3 =  LOAD_BIG_32(blk + 4 * 3);
-	SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w4 =  LOAD_BIG_32(blk + 4 * 4);
-	SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w5 =  LOAD_BIG_32(blk + 4 * 5);
-	SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w6 =  LOAD_BIG_32(blk + 4 * 6);
-	SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w7 =  LOAD_BIG_32(blk + 4 * 7);
-	SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w8 =  LOAD_BIG_32(blk + 4 * 8);
-	SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w9 =  LOAD_BIG_32(blk + 4 * 9);
-	SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w10 =  LOAD_BIG_32(blk + 4 * 10);
-	SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w11 =  LOAD_BIG_32(blk + 4 * 11);
-	SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w12 =  LOAD_BIG_32(blk + 4 * 12);
-	SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w13 =  LOAD_BIG_32(blk + 4 * 13);
-	SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w14 =  LOAD_BIG_32(blk + 4 * 14);
-	SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w15 =  LOAD_BIG_32(blk + 4 * 15);
-	SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
-
-	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
-	SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
-	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
-	SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
-	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
-	SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
-	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
-	SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
-	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
-	SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
-	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
-	SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
-	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
-	SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
-	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
-	SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
-	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
-	SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
-	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
-	SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
-	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
-	SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
-	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
-	SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
-	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
-	SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
-	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
-	SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
-	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
-	SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
-	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
-	SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
-
-	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
-	SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
-	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
-	SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
-	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
-	SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
-	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
-	SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
-	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
-	SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
-	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
-	SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
-	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
-	SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
-	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
-	SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
-	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
-	SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
-	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
-	SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
-	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
-	SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
-	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
-	SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
-	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
-	SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
-	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
-	SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
-	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
-	SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
-	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
-	SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
-
-	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
-	SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
-	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
-	SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
-	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
-	SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
-	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
-	SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
-	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
-	SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
-	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
-	SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
-	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
-	SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
-	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
-	SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
-	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
-	SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
-	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
-	SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
-	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
-	SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
-	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
-	SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
-	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
-	SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
-	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
-	SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
-	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
-	SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
-	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
-	SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
-
-	ctx->state.s32[0] += a;
-	ctx->state.s32[1] += b;
-	ctx->state.s32[2] += c;
-	ctx->state.s32[3] += d;
-	ctx->state.s32[4] += e;
-	ctx->state.s32[5] += f;
-	ctx->state.s32[6] += g;
-	ctx->state.s32[7] += h;
-}
-
-
-/* SHA384 and SHA512 Transform */
-
-static void
-SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
-{
-
-	uint64_t a = ctx->state.s64[0];
-	uint64_t b = ctx->state.s64[1];
-	uint64_t c = ctx->state.s64[2];
-	uint64_t d = ctx->state.s64[3];
-	uint64_t e = ctx->state.s64[4];
-	uint64_t f = ctx->state.s64[5];
-	uint64_t g = ctx->state.s64[6];
-	uint64_t h = ctx->state.s64[7];
-
-	uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
-	uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
-	uint64_t T1, T2;
-
-#if	defined(__sparc)
-	static const uint64_t sha512_consts[] = {
-		SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
-		SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
-		SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
-		SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
-		SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
-		SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
-		SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
-		SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
-		SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
-		SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
-		SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
-		SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
-		SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
-		SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
-		SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
-		SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
-		SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
-		SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
-		SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
-		SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
-		SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
-		SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
-		SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
-		SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
-		SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
-		SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
-		SHA512_CONST_78, SHA512_CONST_79
-	};
-#endif	/* __sparc */
-
-
-	if ((uintptr_t)blk & 0x7) {		/* not 8-byte aligned? */
-		memcpy(ctx->buf_un.buf64, blk, sizeof (ctx->buf_un.buf64));
-		blk = (uint8_t *)ctx->buf_un.buf64;
-	}
-
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w0 =  LOAD_BIG_64(blk + 8 * 0);
-	SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w1 =  LOAD_BIG_64(blk + 8 * 1);
-	SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w2 =  LOAD_BIG_64(blk + 8 * 2);
-	SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w3 =  LOAD_BIG_64(blk + 8 * 3);
-	SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w4 =  LOAD_BIG_64(blk + 8 * 4);
-	SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w5 =  LOAD_BIG_64(blk + 8 * 5);
-	SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w6 =  LOAD_BIG_64(blk + 8 * 6);
-	SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w7 =  LOAD_BIG_64(blk + 8 * 7);
-	SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w8 =  LOAD_BIG_64(blk + 8 * 8);
-	SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w9 =  LOAD_BIG_64(blk + 8 * 9);
-	SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w10 =  LOAD_BIG_64(blk + 8 * 10);
-	SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w11 =  LOAD_BIG_64(blk + 8 * 11);
-	SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w12 =  LOAD_BIG_64(blk + 8 * 12);
-	SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w13 =  LOAD_BIG_64(blk + 8 * 13);
-	SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w14 =  LOAD_BIG_64(blk + 8 * 14);
-	SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
-	/* LINTED E_BAD_PTR_CAST_ALIGN */
-	w15 =  LOAD_BIG_64(blk + 8 * 15);
-	SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
-
-	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
-	SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
-	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
-	SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
-	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
-	SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
-	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
-	SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
-	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
-	SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
-	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
-	SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
-	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
-	SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
-	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
-	SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
-	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
-	SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
-	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
-	SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
-	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
-	SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
-	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
-	SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
-	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
-	SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
-	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
-	SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
-	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
-	SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
-	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
-	SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
-
-	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
-	SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
-	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
-	SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
-	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
-	SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
-	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
-	SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
-	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
-	SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
-	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
-	SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
-	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
-	SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
-	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
-	SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
-	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
-	SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
-	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
-	SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
-	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
-	SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
-	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
-	SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
-	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
-	SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
-	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
-	SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
-	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
-	SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
-	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
-	SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
-
-	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
-	SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
-	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
-	SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
-	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
-	SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
-	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
-	SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
-	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
-	SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
-	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
-	SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
-	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
-	SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
-	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
-	SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
-	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
-	SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
-	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
-	SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
-	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
-	SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
-	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
-	SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
-	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
-	SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
-	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
-	SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
-	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
-	SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
-	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
-	SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
-
-	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
-	SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
-	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
-	SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
-	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
-	SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
-	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
-	SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
-	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
-	SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
-	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
-	SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
-	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
-	SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
-	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
-	SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
-	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
-	SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
-	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
-	SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
-	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
-	SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
-	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
-	SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
-	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
-	SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
-	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
-	SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
-	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
-	SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
-	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
-	SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
-
-	ctx->state.s64[0] += a;
-	ctx->state.s64[1] += b;
-	ctx->state.s64[2] += c;
-	ctx->state.s64[3] += d;
-	ctx->state.s64[4] += e;
-	ctx->state.s64[5] += f;
-	ctx->state.s64[6] += g;
-	ctx->state.s64[7] += h;
-
-}
-#endif	/* !__amd64 || !_KERNEL */
-
-
-/*
- * Encode()
- *
- * purpose: to convert a list of numbers from little endian to big endian
- *   input: uint8_t *	: place to store the converted big endian numbers
- *	    uint32_t *	: place to get numbers to convert from
- *          size_t	: the length of the input in bytes
- *  output: void
- */
-
-static void
-Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
-    size_t len)
-{
-	size_t		i, j;
-
-#if	defined(__sparc)
-	if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
-		for (i = 0, j = 0; j < len; i++, j += 4) {
-			/* LINTED E_BAD_PTR_CAST_ALIGN */
-			*((uint32_t *)(output + j)) = input[i];
-		}
-	} else {
-#endif	/* little endian -- will work on big endian, but slowly */
-		for (i = 0, j = 0; j < len; i++, j += 4) {
-			output[j]	= (input[i] >> 24) & 0xff;
-			output[j + 1]	= (input[i] >> 16) & 0xff;
-			output[j + 2]	= (input[i] >>  8) & 0xff;
-			output[j + 3]	= input[i] & 0xff;
-		}
-#if	defined(__sparc)
-	}
-#endif
-}
-
-static void
-Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
-    size_t len)
-{
-	size_t		i, j;
-
-#if	defined(__sparc)
-	if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
-		for (i = 0, j = 0; j < len; i++, j += 8) {
-			/* LINTED E_BAD_PTR_CAST_ALIGN */
-			*((uint64_t *)(output + j)) = input[i];
-		}
-	} else {
-#endif	/* little endian -- will work on big endian, but slowly */
-		for (i = 0, j = 0; j < len; i++, j += 8) {
-
-			output[j]	= (input[i] >> 56) & 0xff;
-			output[j + 1]	= (input[i] >> 48) & 0xff;
-			output[j + 2]	= (input[i] >> 40) & 0xff;
-			output[j + 3]	= (input[i] >> 32) & 0xff;
-			output[j + 4]	= (input[i] >> 24) & 0xff;
-			output[j + 5]	= (input[i] >> 16) & 0xff;
-			output[j + 6]	= (input[i] >>  8) & 0xff;
-			output[j + 7]	= input[i] & 0xff;
-		}
-#if	defined(__sparc)
-	}
-#endif
-}
-
-
-void
-SHA2Init(uint64_t mech, SHA2_CTX *ctx)
-{
-
-	switch (mech) {
-	case SHA256_MECH_INFO_TYPE:
-	case SHA256_HMAC_MECH_INFO_TYPE:
-	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
-		ctx->state.s32[0] = 0x6a09e667U;
-		ctx->state.s32[1] = 0xbb67ae85U;
-		ctx->state.s32[2] = 0x3c6ef372U;
-		ctx->state.s32[3] = 0xa54ff53aU;
-		ctx->state.s32[4] = 0x510e527fU;
-		ctx->state.s32[5] = 0x9b05688cU;
-		ctx->state.s32[6] = 0x1f83d9abU;
-		ctx->state.s32[7] = 0x5be0cd19U;
-		break;
-	case SHA384_MECH_INFO_TYPE:
-	case SHA384_HMAC_MECH_INFO_TYPE:
-	case SHA384_HMAC_GEN_MECH_INFO_TYPE:
-		ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
-		ctx->state.s64[1] = 0x629a292a367cd507ULL;
-		ctx->state.s64[2] = 0x9159015a3070dd17ULL;
-		ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
-		ctx->state.s64[4] = 0x67332667ffc00b31ULL;
-		ctx->state.s64[5] = 0x8eb44a8768581511ULL;
-		ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
-		ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
-		break;
-	case SHA512_MECH_INFO_TYPE:
-	case SHA512_HMAC_MECH_INFO_TYPE:
-	case SHA512_HMAC_GEN_MECH_INFO_TYPE:
-		ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
-		ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
-		ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
-		ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
-		ctx->state.s64[4] = 0x510e527fade682d1ULL;
-		ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
-		ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
-		ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
-		break;
-	case SHA512_224_MECH_INFO_TYPE:
-		ctx->state.s64[0] = 0x8C3D37C819544DA2ULL;
-		ctx->state.s64[1] = 0x73E1996689DCD4D6ULL;
-		ctx->state.s64[2] = 0x1DFAB7AE32FF9C82ULL;
-		ctx->state.s64[3] = 0x679DD514582F9FCFULL;
-		ctx->state.s64[4] = 0x0F6D2B697BD44DA8ULL;
-		ctx->state.s64[5] = 0x77E36F7304C48942ULL;
-		ctx->state.s64[6] = 0x3F9D85A86A1D36C8ULL;
-		ctx->state.s64[7] = 0x1112E6AD91D692A1ULL;
-		break;
-	case SHA512_256_MECH_INFO_TYPE:
-		ctx->state.s64[0] = 0x22312194FC2BF72CULL;
-		ctx->state.s64[1] = 0x9F555FA3C84C64C2ULL;
-		ctx->state.s64[2] = 0x2393B86B6F53B151ULL;
-		ctx->state.s64[3] = 0x963877195940EABDULL;
-		ctx->state.s64[4] = 0x96283EE2A88EFFE3ULL;
-		ctx->state.s64[5] = 0xBE5E1E2553863992ULL;
-		ctx->state.s64[6] = 0x2B0199FC2C85B8AAULL;
-		ctx->state.s64[7] = 0x0EB72DDC81C52CA2ULL;
-		break;
-#ifdef _KERNEL
-	default:
-		cmn_err(CE_PANIC,
-		    "sha2_init: failed to find a supported algorithm: 0x%x",
-		    (uint32_t)mech);
-
-#endif /* _KERNEL */
-	}
-
-	ctx->algotype = (uint32_t)mech;
-	ctx->count.c64[0] = ctx->count.c64[1] = 0;
-}
-
-#ifndef _KERNEL
-
-// #pragma inline(SHA256Init, SHA384Init, SHA512Init)
-void
-SHA256Init(SHA256_CTX *ctx)
-{
-	SHA2Init(SHA256, ctx);
-}
-
-void
-SHA384Init(SHA384_CTX *ctx)
-{
-	SHA2Init(SHA384, ctx);
-}
-
-void
-SHA512Init(SHA512_CTX *ctx)
-{
-	SHA2Init(SHA512, ctx);
-}
-
-#endif /* _KERNEL */
-
-/*
- * SHA2Update()
- *
- * purpose: continues an sha2 digest operation, using the message block
- *          to update the context.
- *   input: SHA2_CTX *	: the context to update
- *          void *	: the message block
- *          size_t      : the length of the message block, in bytes
- *  output: void
- */
-
-void
-SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
-{
-	uint32_t	i, buf_index, buf_len, buf_limit;
-	const uint8_t	*input = inptr;
-	uint32_t	algotype = ctx->algotype;
-
-	/* check for noop */
-	if (input_len == 0)
-		return;
-
-	if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
-		buf_limit = 64;
-
-		/* compute number of bytes mod 64 */
-		buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
-
-		/* update number of bits */
-		if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
-			ctx->count.c32[0]++;
-
-		ctx->count.c32[0] += (input_len >> 29);
-
-	} else {
-		buf_limit = 128;
-
-		/* compute number of bytes mod 128 */
-		buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
-
-		/* update number of bits */
-		if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
-			ctx->count.c64[0]++;
-
-		ctx->count.c64[0] += (input_len >> 29);
-	}
-
-	buf_len = buf_limit - buf_index;
-
-	/* transform as many times as possible */
-	i = 0;
-	if (input_len >= buf_len) {
-
-		/*
-		 * general optimization:
-		 *
-		 * only do initial memcpy() and SHA2Transform() if
-		 * buf_index != 0.  if buf_index == 0, we're just
-		 * wasting our time doing the memcpy() since there
-		 * wasn't any data left over from a previous call to
-		 * SHA2Update().
-		 */
-		if (buf_index) {
-			memcpy(&ctx->buf_un.buf8[buf_index], input, buf_len);
-			if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
-				SHA256Transform(ctx, ctx->buf_un.buf8);
-			else
-				SHA512Transform(ctx, ctx->buf_un.buf8);
-
-			i = buf_len;
-		}
-
-#if !defined(__amd64) || !defined(_KERNEL)
-		if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
-			for (; i + buf_limit - 1 < input_len; i += buf_limit) {
-				SHA256Transform(ctx, &input[i]);
-			}
-		} else {
-			for (; i + buf_limit - 1 < input_len; i += buf_limit) {
-				SHA512Transform(ctx, &input[i]);
-			}
-		}
-
-#else
-		uint32_t block_count;
-		if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
-			block_count = (input_len - i) >> 6;
-			if (block_count > 0) {
-				SHA256TransformBlocks(ctx, &input[i],
-				    block_count);
-				i += block_count << 6;
-			}
-		} else {
-			block_count = (input_len - i) >> 7;
-			if (block_count > 0) {
-				SHA512TransformBlocks(ctx, &input[i],
-				    block_count);
-				i += block_count << 7;
-			}
-		}
-#endif	/* !__amd64 || !_KERNEL */
-
-		/*
-		 * general optimization:
-		 *
-		 * if i and input_len are the same, return now instead
-		 * of calling memcpy(), since the memcpy() in this case
-		 * will be an expensive noop.
-		 */
-
-		if (input_len == i)
-			return;
-
-		buf_index = 0;
-	}
-
-	/* buffer remaining input */
-	memcpy(&ctx->buf_un.buf8[buf_index], &input[i], input_len - i);
-}
-
-
-/*
- * SHA2Final()
- *
- * purpose: ends an sha2 digest operation, finalizing the message digest and
- *          zeroing the context.
- *   input: uchar_t *	: a buffer to store the digest
- *			: The function actually uses void* because many
- *			: callers pass things other than uchar_t here.
- *          SHA2_CTX *  : the context to finalize, save, and zero
- *  output: void
- */
-
-void
-SHA2Final(void *digest, SHA2_CTX *ctx)
-{
-	uint8_t		bitcount_be[sizeof (ctx->count.c32)];
-	uint8_t		bitcount_be64[sizeof (ctx->count.c64)];
-	uint32_t	index;
-	uint32_t	algotype = ctx->algotype;
-
-	if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
-		index  = (ctx->count.c32[1] >> 3) & 0x3f;
-		Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
-		SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
-		SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
-		Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
-	} else {
-		index  = (ctx->count.c64[1] >> 3) & 0x7f;
-		Encode64(bitcount_be64, ctx->count.c64,
-		    sizeof (bitcount_be64));
-		SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
-		SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
-		if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
-			ctx->state.s64[6] = ctx->state.s64[7] = 0;
-			Encode64(digest, ctx->state.s64,
-			    sizeof (uint64_t) * 6);
-		} else if (algotype == SHA512_224_MECH_INFO_TYPE) {
-			uint8_t last[sizeof (uint64_t)];
-			/*
-			 * Since SHA-512/224 doesn't align well to 64-bit
-			 * boundaries, we must do the encoding in three steps:
-			 * 1) encode the three 64-bit words that fit neatly
-			 * 2) encode the last 64-bit word to a temp buffer
-			 * 3) chop out the lower 32-bits from the temp buffer
-			 *    and append them to the digest
-			 */
-			Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 3);
-			Encode64(last, &ctx->state.s64[3], sizeof (uint64_t));
-			memcpy((uint8_t *)digest + 24, last, 4);
-		} else if (algotype == SHA512_256_MECH_INFO_TYPE) {
-			Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 4);
-		} else {
-			Encode64(digest, ctx->state.s64,
-			    sizeof (ctx->state.s64));
-		}
-	}
-
-	/* zeroize sensitive information */
-	memset(ctx, 0, sizeof (*ctx));
-}
-
-#ifdef _KERNEL
-EXPORT_SYMBOL(SHA2Init);
-EXPORT_SYMBOL(SHA2Update);
-EXPORT_SYMBOL(SHA2Final);
-#endif
diff --git a/module/icp/asm-x86_64/sha2/sha256_impl.S b/module/icp/asm-x86_64/sha2/sha256_impl.S
deleted file mode 100644
index f1fde51c1d..0000000000
--- a/module/icp/asm-x86_64/sha2/sha256_impl.S
+++ /dev/null
@@ -1,2090 +0,0 @@
-/*
- * ====================================================================
- * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
- * project. Rights for redistribution and usage in source and binary
- * forms are granted according to the OpenSSL license.
- * ====================================================================
- *
- * sha256/512_block procedure for x86_64.
- *
- * 40% improvement over compiler-generated code on Opteron. On EM64T
- * sha256 was observed to run >80% faster and sha512 - >40%. No magical
- * tricks, just straight implementation... I really wonder why gcc
- * [being armed with inline assembler] fails to generate as fast code.
- * The only thing which is cool about this module is that it's very
- * same instruction sequence used for both SHA-256 and SHA-512. In
- * former case the instructions operate on 32-bit operands, while in
- * latter - on 64-bit ones. All I had to do is to get one flavor right,
- * the other one passed the test right away:-)
- *
- * sha256_block runs in ~1005 cycles on Opteron, which gives you
- * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
- * frequency in GHz. sha512_block runs in ~1275 cycles, which results
- * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
- * Well, if you compare it to IA-64 implementation, which maintains
- * X[16] in register bank[!], tends to 4 instructions per CPU clock
- * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
- * issue Opteron pipeline and X[16] maintained in memory. So that *if*
- * there is a way to improve it, *then* the only way would be to try to
- * offload X[16] updates to SSE unit, but that would require "deeper"
- * loop unroll, which in turn would naturally cause size blow-up, not
- * to mention increased complexity! And once again, only *if* it's
- * actually possible to noticeably improve overall ILP, instruction
- * level parallelism, on a given CPU implementation in this case.
- *
- * Special note on Intel EM64T. While Opteron CPU exhibits perfect
- * performance ratio of 1.5 between 64- and 32-bit flavors [see above],
- * [currently available] EM64T CPUs apparently are far from it. On the
- * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
- * sha256_block:-( This is presumably because 64-bit shifts/rotates
- * apparently are not atomic instructions, but implemented in microcode.
- */
-
-/*
- * OpenSolaris OS modifications
- *
- * Sun elects to use this software under the BSD license.
- *
- * This source originates from OpenSSL file sha512-x86_64.pl at
- * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
- * (presumably for future OpenSSL release 0.9.8h), with these changes:
- *
- * 1. Added perl "use strict" and declared variables.
- *
- * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
- * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
- *
- * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
- * assemblers).  Replaced the .picmeup macro with assembler code.
- *
- * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
- * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
- */
-
-/*
- * This file was generated by a perl script (sha512-x86_64.pl) that were
- * used to generate sha256 and sha512 variants from the same code base.
- * The comments from the original file have been pasted above.
- */
-
-#if defined(lint) || defined(__lint)
-#include <sys/stdint.h>
-#include <sha2/sha2.h>
-
-void
-SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
-{
-	(void) ctx, (void) in, (void) num;
-}
-
-
-#else
-#define _ASM
-#include <sys/asm_linkage.h>
-
-ENTRY_NP(SHA256TransformBlocks)
-.cfi_startproc
-	ENDBR
-	movq	%rsp, %rax
-.cfi_def_cfa_register %rax
-	push	%rbx
-.cfi_offset	%rbx,-16
-	push	%rbp
-.cfi_offset	%rbp,-24
-	push	%r12
-.cfi_offset	%r12,-32
-	push	%r13
-.cfi_offset	%r13,-40
-	push	%r14
-.cfi_offset	%r14,-48
-	push	%r15
-.cfi_offset	%r15,-56
-	mov	%rsp,%rbp		# copy %rsp
-	shl	$4,%rdx		# num*16
-	sub	$16*4+4*8,%rsp
-	lea	(%rsi,%rdx,4),%rdx	# inp+num*16*4
-	and	$-64,%rsp		# align stack frame
-	add	$8,%rdi		# Skip OpenSolaris field, "algotype"
-	mov	%rdi,16*4+0*8(%rsp)		# save ctx, 1st arg
-	mov	%rsi,16*4+1*8(%rsp)		# save inp, 2nd arg
-	mov	%rdx,16*4+2*8(%rsp)		# save end pointer, "3rd" arg
-	mov	%rbp,16*4+3*8(%rsp)		# save copy of %rsp
-# echo ".cfi_cfa_expression %rsp+88,deref,+56" |
-#	openssl/crypto/perlasm/x86_64-xlate.pl
-.cfi_escape	0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x38
-
-	#.picmeup %rbp
-	# The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
-	# the address of the "next" instruction into the target register
-	# (%rbp).  This generates these 2 instructions:
-	lea	.Llea(%rip),%rbp
-	#nop	# .picmeup generates a nop for mod 8 alignment--not needed here
-
-.Llea:
-	lea	K256-.(%rbp),%rbp
-
-	mov	4*0(%rdi),%eax
-	mov	4*1(%rdi),%ebx
-	mov	4*2(%rdi),%ecx
-	mov	4*3(%rdi),%edx
-	mov	4*4(%rdi),%r8d
-	mov	4*5(%rdi),%r9d
-	mov	4*6(%rdi),%r10d
-	mov	4*7(%rdi),%r11d
-	jmp	.Lloop
-
-.balign	16
-.Lloop:
-	xor	%rdi,%rdi
-	mov	4*0(%rsi),%r12d
-	bswap	%r12d
-	mov	%r8d,%r13d
-	mov	%r8d,%r14d
-	mov	%r9d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r10d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r8d,%r15d			# (f^g)&e
-	mov	%r12d,0(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r11d,%r12d			# T1+=h
-
-	mov	%eax,%r11d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%eax,%r13d
-	mov	%eax,%r14d
-
-	ror	$2,%r11d
-	ror	$13,%r13d
-	mov	%eax,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r11d
-	ror	$9,%r13d
-	or	%ecx,%r14d			# a|c
-
-	xor	%r13d,%r11d			# h=Sigma0(a)
-	and	%ecx,%r15d			# a&c
-	add	%r12d,%edx			# d+=T1
-
-	and	%ebx,%r14d			# (a|c)&b
-	add	%r12d,%r11d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r11d			# h+=Maj(a,b,c)
-	mov	4*1(%rsi),%r12d
-	bswap	%r12d
-	mov	%edx,%r13d
-	mov	%edx,%r14d
-	mov	%r8d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r9d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%edx,%r15d			# (f^g)&e
-	mov	%r12d,4(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r10d,%r12d			# T1+=h
-
-	mov	%r11d,%r10d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r11d,%r13d
-	mov	%r11d,%r14d
-
-	ror	$2,%r10d
-	ror	$13,%r13d
-	mov	%r11d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r10d
-	ror	$9,%r13d
-	or	%ebx,%r14d			# a|c
-
-	xor	%r13d,%r10d			# h=Sigma0(a)
-	and	%ebx,%r15d			# a&c
-	add	%r12d,%ecx			# d+=T1
-
-	and	%eax,%r14d			# (a|c)&b
-	add	%r12d,%r10d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r10d			# h+=Maj(a,b,c)
-	mov	4*2(%rsi),%r12d
-	bswap	%r12d
-	mov	%ecx,%r13d
-	mov	%ecx,%r14d
-	mov	%edx,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r8d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%ecx,%r15d			# (f^g)&e
-	mov	%r12d,8(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r9d,%r12d			# T1+=h
-
-	mov	%r10d,%r9d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r10d,%r13d
-	mov	%r10d,%r14d
-
-	ror	$2,%r9d
-	ror	$13,%r13d
-	mov	%r10d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r9d
-	ror	$9,%r13d
-	or	%eax,%r14d			# a|c
-
-	xor	%r13d,%r9d			# h=Sigma0(a)
-	and	%eax,%r15d			# a&c
-	add	%r12d,%ebx			# d+=T1
-
-	and	%r11d,%r14d			# (a|c)&b
-	add	%r12d,%r9d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r9d			# h+=Maj(a,b,c)
-	mov	4*3(%rsi),%r12d
-	bswap	%r12d
-	mov	%ebx,%r13d
-	mov	%ebx,%r14d
-	mov	%ecx,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%edx,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%ebx,%r15d			# (f^g)&e
-	mov	%r12d,12(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r8d,%r12d			# T1+=h
-
-	mov	%r9d,%r8d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r9d,%r13d
-	mov	%r9d,%r14d
-
-	ror	$2,%r8d
-	ror	$13,%r13d
-	mov	%r9d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r8d
-	ror	$9,%r13d
-	or	%r11d,%r14d			# a|c
-
-	xor	%r13d,%r8d			# h=Sigma0(a)
-	and	%r11d,%r15d			# a&c
-	add	%r12d,%eax			# d+=T1
-
-	and	%r10d,%r14d			# (a|c)&b
-	add	%r12d,%r8d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r8d			# h+=Maj(a,b,c)
-	mov	4*4(%rsi),%r12d
-	bswap	%r12d
-	mov	%eax,%r13d
-	mov	%eax,%r14d
-	mov	%ebx,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%ecx,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%eax,%r15d			# (f^g)&e
-	mov	%r12d,16(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%edx,%r12d			# T1+=h
-
-	mov	%r8d,%edx
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r8d,%r13d
-	mov	%r8d,%r14d
-
-	ror	$2,%edx
-	ror	$13,%r13d
-	mov	%r8d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%edx
-	ror	$9,%r13d
-	or	%r10d,%r14d			# a|c
-
-	xor	%r13d,%edx			# h=Sigma0(a)
-	and	%r10d,%r15d			# a&c
-	add	%r12d,%r11d			# d+=T1
-
-	and	%r9d,%r14d			# (a|c)&b
-	add	%r12d,%edx			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%edx			# h+=Maj(a,b,c)
-	mov	4*5(%rsi),%r12d
-	bswap	%r12d
-	mov	%r11d,%r13d
-	mov	%r11d,%r14d
-	mov	%eax,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%ebx,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r11d,%r15d			# (f^g)&e
-	mov	%r12d,20(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%ecx,%r12d			# T1+=h
-
-	mov	%edx,%ecx
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%edx,%r13d
-	mov	%edx,%r14d
-
-	ror	$2,%ecx
-	ror	$13,%r13d
-	mov	%edx,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%ecx
-	ror	$9,%r13d
-	or	%r9d,%r14d			# a|c
-
-	xor	%r13d,%ecx			# h=Sigma0(a)
-	and	%r9d,%r15d			# a&c
-	add	%r12d,%r10d			# d+=T1
-
-	and	%r8d,%r14d			# (a|c)&b
-	add	%r12d,%ecx			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%ecx			# h+=Maj(a,b,c)
-	mov	4*6(%rsi),%r12d
-	bswap	%r12d
-	mov	%r10d,%r13d
-	mov	%r10d,%r14d
-	mov	%r11d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%eax,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r10d,%r15d			# (f^g)&e
-	mov	%r12d,24(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%ebx,%r12d			# T1+=h
-
-	mov	%ecx,%ebx
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%ecx,%r13d
-	mov	%ecx,%r14d
-
-	ror	$2,%ebx
-	ror	$13,%r13d
-	mov	%ecx,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%ebx
-	ror	$9,%r13d
-	or	%r8d,%r14d			# a|c
-
-	xor	%r13d,%ebx			# h=Sigma0(a)
-	and	%r8d,%r15d			# a&c
-	add	%r12d,%r9d			# d+=T1
-
-	and	%edx,%r14d			# (a|c)&b
-	add	%r12d,%ebx			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%ebx			# h+=Maj(a,b,c)
-	mov	4*7(%rsi),%r12d
-	bswap	%r12d
-	mov	%r9d,%r13d
-	mov	%r9d,%r14d
-	mov	%r10d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r11d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r9d,%r15d			# (f^g)&e
-	mov	%r12d,28(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%eax,%r12d			# T1+=h
-
-	mov	%ebx,%eax
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%ebx,%r13d
-	mov	%ebx,%r14d
-
-	ror	$2,%eax
-	ror	$13,%r13d
-	mov	%ebx,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%eax
-	ror	$9,%r13d
-	or	%edx,%r14d			# a|c
-
-	xor	%r13d,%eax			# h=Sigma0(a)
-	and	%edx,%r15d			# a&c
-	add	%r12d,%r8d			# d+=T1
-
-	and	%ecx,%r14d			# (a|c)&b
-	add	%r12d,%eax			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%eax			# h+=Maj(a,b,c)
-	mov	4*8(%rsi),%r12d
-	bswap	%r12d
-	mov	%r8d,%r13d
-	mov	%r8d,%r14d
-	mov	%r9d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r10d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r8d,%r15d			# (f^g)&e
-	mov	%r12d,32(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r11d,%r12d			# T1+=h
-
-	mov	%eax,%r11d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%eax,%r13d
-	mov	%eax,%r14d
-
-	ror	$2,%r11d
-	ror	$13,%r13d
-	mov	%eax,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r11d
-	ror	$9,%r13d
-	or	%ecx,%r14d			# a|c
-
-	xor	%r13d,%r11d			# h=Sigma0(a)
-	and	%ecx,%r15d			# a&c
-	add	%r12d,%edx			# d+=T1
-
-	and	%ebx,%r14d			# (a|c)&b
-	add	%r12d,%r11d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r11d			# h+=Maj(a,b,c)
-	mov	4*9(%rsi),%r12d
-	bswap	%r12d
-	mov	%edx,%r13d
-	mov	%edx,%r14d
-	mov	%r8d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r9d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%edx,%r15d			# (f^g)&e
-	mov	%r12d,36(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r10d,%r12d			# T1+=h
-
-	mov	%r11d,%r10d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r11d,%r13d
-	mov	%r11d,%r14d
-
-	ror	$2,%r10d
-	ror	$13,%r13d
-	mov	%r11d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r10d
-	ror	$9,%r13d
-	or	%ebx,%r14d			# a|c
-
-	xor	%r13d,%r10d			# h=Sigma0(a)
-	and	%ebx,%r15d			# a&c
-	add	%r12d,%ecx			# d+=T1
-
-	and	%eax,%r14d			# (a|c)&b
-	add	%r12d,%r10d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r10d			# h+=Maj(a,b,c)
-	mov	4*10(%rsi),%r12d
-	bswap	%r12d
-	mov	%ecx,%r13d
-	mov	%ecx,%r14d
-	mov	%edx,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r8d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%ecx,%r15d			# (f^g)&e
-	mov	%r12d,40(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r9d,%r12d			# T1+=h
-
-	mov	%r10d,%r9d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r10d,%r13d
-	mov	%r10d,%r14d
-
-	ror	$2,%r9d
-	ror	$13,%r13d
-	mov	%r10d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r9d
-	ror	$9,%r13d
-	or	%eax,%r14d			# a|c
-
-	xor	%r13d,%r9d			# h=Sigma0(a)
-	and	%eax,%r15d			# a&c
-	add	%r12d,%ebx			# d+=T1
-
-	and	%r11d,%r14d			# (a|c)&b
-	add	%r12d,%r9d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r9d			# h+=Maj(a,b,c)
-	mov	4*11(%rsi),%r12d
-	bswap	%r12d
-	mov	%ebx,%r13d
-	mov	%ebx,%r14d
-	mov	%ecx,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%edx,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%ebx,%r15d			# (f^g)&e
-	mov	%r12d,44(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r8d,%r12d			# T1+=h
-
-	mov	%r9d,%r8d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r9d,%r13d
-	mov	%r9d,%r14d
-
-	ror	$2,%r8d
-	ror	$13,%r13d
-	mov	%r9d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r8d
-	ror	$9,%r13d
-	or	%r11d,%r14d			# a|c
-
-	xor	%r13d,%r8d			# h=Sigma0(a)
-	and	%r11d,%r15d			# a&c
-	add	%r12d,%eax			# d+=T1
-
-	and	%r10d,%r14d			# (a|c)&b
-	add	%r12d,%r8d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r8d			# h+=Maj(a,b,c)
-	mov	4*12(%rsi),%r12d
-	bswap	%r12d
-	mov	%eax,%r13d
-	mov	%eax,%r14d
-	mov	%ebx,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%ecx,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%eax,%r15d			# (f^g)&e
-	mov	%r12d,48(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%edx,%r12d			# T1+=h
-
-	mov	%r8d,%edx
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r8d,%r13d
-	mov	%r8d,%r14d
-
-	ror	$2,%edx
-	ror	$13,%r13d
-	mov	%r8d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%edx
-	ror	$9,%r13d
-	or	%r10d,%r14d			# a|c
-
-	xor	%r13d,%edx			# h=Sigma0(a)
-	and	%r10d,%r15d			# a&c
-	add	%r12d,%r11d			# d+=T1
-
-	and	%r9d,%r14d			# (a|c)&b
-	add	%r12d,%edx			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%edx			# h+=Maj(a,b,c)
-	mov	4*13(%rsi),%r12d
-	bswap	%r12d
-	mov	%r11d,%r13d
-	mov	%r11d,%r14d
-	mov	%eax,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%ebx,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r11d,%r15d			# (f^g)&e
-	mov	%r12d,52(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%ecx,%r12d			# T1+=h
-
-	mov	%edx,%ecx
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%edx,%r13d
-	mov	%edx,%r14d
-
-	ror	$2,%ecx
-	ror	$13,%r13d
-	mov	%edx,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%ecx
-	ror	$9,%r13d
-	or	%r9d,%r14d			# a|c
-
-	xor	%r13d,%ecx			# h=Sigma0(a)
-	and	%r9d,%r15d			# a&c
-	add	%r12d,%r10d			# d+=T1
-
-	and	%r8d,%r14d			# (a|c)&b
-	add	%r12d,%ecx			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%ecx			# h+=Maj(a,b,c)
-	mov	4*14(%rsi),%r12d
-	bswap	%r12d
-	mov	%r10d,%r13d
-	mov	%r10d,%r14d
-	mov	%r11d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%eax,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r10d,%r15d			# (f^g)&e
-	mov	%r12d,56(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%ebx,%r12d			# T1+=h
-
-	mov	%ecx,%ebx
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%ecx,%r13d
-	mov	%ecx,%r14d
-
-	ror	$2,%ebx
-	ror	$13,%r13d
-	mov	%ecx,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%ebx
-	ror	$9,%r13d
-	or	%r8d,%r14d			# a|c
-
-	xor	%r13d,%ebx			# h=Sigma0(a)
-	and	%r8d,%r15d			# a&c
-	add	%r12d,%r9d			# d+=T1
-
-	and	%edx,%r14d			# (a|c)&b
-	add	%r12d,%ebx			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%ebx			# h+=Maj(a,b,c)
-	mov	4*15(%rsi),%r12d
-	bswap	%r12d
-	mov	%r9d,%r13d
-	mov	%r9d,%r14d
-	mov	%r10d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r11d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r9d,%r15d			# (f^g)&e
-	mov	%r12d,60(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%eax,%r12d			# T1+=h
-
-	mov	%ebx,%eax
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%ebx,%r13d
-	mov	%ebx,%r14d
-
-	ror	$2,%eax
-	ror	$13,%r13d
-	mov	%ebx,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%eax
-	ror	$9,%r13d
-	or	%edx,%r14d			# a|c
-
-	xor	%r13d,%eax			# h=Sigma0(a)
-	and	%edx,%r15d			# a&c
-	add	%r12d,%r8d			# d+=T1
-
-	and	%ecx,%r14d			# (a|c)&b
-	add	%r12d,%eax			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%eax			# h+=Maj(a,b,c)
-	jmp	.Lrounds_16_xx
-.balign	16
-.Lrounds_16_xx:
-	mov	4(%rsp),%r13d
-	mov	56(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	36(%rsp),%r12d
-
-	add	0(%rsp),%r12d
-	mov	%r8d,%r13d
-	mov	%r8d,%r14d
-	mov	%r9d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r10d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r8d,%r15d			# (f^g)&e
-	mov	%r12d,0(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r11d,%r12d			# T1+=h
-
-	mov	%eax,%r11d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%eax,%r13d
-	mov	%eax,%r14d
-
-	ror	$2,%r11d
-	ror	$13,%r13d
-	mov	%eax,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r11d
-	ror	$9,%r13d
-	or	%ecx,%r14d			# a|c
-
-	xor	%r13d,%r11d			# h=Sigma0(a)
-	and	%ecx,%r15d			# a&c
-	add	%r12d,%edx			# d+=T1
-
-	and	%ebx,%r14d			# (a|c)&b
-	add	%r12d,%r11d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r11d			# h+=Maj(a,b,c)
-	mov	8(%rsp),%r13d
-	mov	60(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	40(%rsp),%r12d
-
-	add	4(%rsp),%r12d
-	mov	%edx,%r13d
-	mov	%edx,%r14d
-	mov	%r8d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r9d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%edx,%r15d			# (f^g)&e
-	mov	%r12d,4(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r10d,%r12d			# T1+=h
-
-	mov	%r11d,%r10d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r11d,%r13d
-	mov	%r11d,%r14d
-
-	ror	$2,%r10d
-	ror	$13,%r13d
-	mov	%r11d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r10d
-	ror	$9,%r13d
-	or	%ebx,%r14d			# a|c
-
-	xor	%r13d,%r10d			# h=Sigma0(a)
-	and	%ebx,%r15d			# a&c
-	add	%r12d,%ecx			# d+=T1
-
-	and	%eax,%r14d			# (a|c)&b
-	add	%r12d,%r10d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r10d			# h+=Maj(a,b,c)
-	mov	12(%rsp),%r13d
-	mov	0(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	44(%rsp),%r12d
-
-	add	8(%rsp),%r12d
-	mov	%ecx,%r13d
-	mov	%ecx,%r14d
-	mov	%edx,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r8d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%ecx,%r15d			# (f^g)&e
-	mov	%r12d,8(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r9d,%r12d			# T1+=h
-
-	mov	%r10d,%r9d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r10d,%r13d
-	mov	%r10d,%r14d
-
-	ror	$2,%r9d
-	ror	$13,%r13d
-	mov	%r10d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r9d
-	ror	$9,%r13d
-	or	%eax,%r14d			# a|c
-
-	xor	%r13d,%r9d			# h=Sigma0(a)
-	and	%eax,%r15d			# a&c
-	add	%r12d,%ebx			# d+=T1
-
-	and	%r11d,%r14d			# (a|c)&b
-	add	%r12d,%r9d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r9d			# h+=Maj(a,b,c)
-	mov	16(%rsp),%r13d
-	mov	4(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	48(%rsp),%r12d
-
-	add	12(%rsp),%r12d
-	mov	%ebx,%r13d
-	mov	%ebx,%r14d
-	mov	%ecx,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%edx,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%ebx,%r15d			# (f^g)&e
-	mov	%r12d,12(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r8d,%r12d			# T1+=h
-
-	mov	%r9d,%r8d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r9d,%r13d
-	mov	%r9d,%r14d
-
-	ror	$2,%r8d
-	ror	$13,%r13d
-	mov	%r9d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r8d
-	ror	$9,%r13d
-	or	%r11d,%r14d			# a|c
-
-	xor	%r13d,%r8d			# h=Sigma0(a)
-	and	%r11d,%r15d			# a&c
-	add	%r12d,%eax			# d+=T1
-
-	and	%r10d,%r14d			# (a|c)&b
-	add	%r12d,%r8d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r8d			# h+=Maj(a,b,c)
-	mov	20(%rsp),%r13d
-	mov	8(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	52(%rsp),%r12d
-
-	add	16(%rsp),%r12d
-	mov	%eax,%r13d
-	mov	%eax,%r14d
-	mov	%ebx,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%ecx,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%eax,%r15d			# (f^g)&e
-	mov	%r12d,16(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%edx,%r12d			# T1+=h
-
-	mov	%r8d,%edx
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r8d,%r13d
-	mov	%r8d,%r14d
-
-	ror	$2,%edx
-	ror	$13,%r13d
-	mov	%r8d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%edx
-	ror	$9,%r13d
-	or	%r10d,%r14d			# a|c
-
-	xor	%r13d,%edx			# h=Sigma0(a)
-	and	%r10d,%r15d			# a&c
-	add	%r12d,%r11d			# d+=T1
-
-	and	%r9d,%r14d			# (a|c)&b
-	add	%r12d,%edx			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%edx			# h+=Maj(a,b,c)
-	mov	24(%rsp),%r13d
-	mov	12(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	56(%rsp),%r12d
-
-	add	20(%rsp),%r12d
-	mov	%r11d,%r13d
-	mov	%r11d,%r14d
-	mov	%eax,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%ebx,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r11d,%r15d			# (f^g)&e
-	mov	%r12d,20(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%ecx,%r12d			# T1+=h
-
-	mov	%edx,%ecx
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%edx,%r13d
-	mov	%edx,%r14d
-
-	ror	$2,%ecx
-	ror	$13,%r13d
-	mov	%edx,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%ecx
-	ror	$9,%r13d
-	or	%r9d,%r14d			# a|c
-
-	xor	%r13d,%ecx			# h=Sigma0(a)
-	and	%r9d,%r15d			# a&c
-	add	%r12d,%r10d			# d+=T1
-
-	and	%r8d,%r14d			# (a|c)&b
-	add	%r12d,%ecx			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%ecx			# h+=Maj(a,b,c)
-	mov	28(%rsp),%r13d
-	mov	16(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	60(%rsp),%r12d
-
-	add	24(%rsp),%r12d
-	mov	%r10d,%r13d
-	mov	%r10d,%r14d
-	mov	%r11d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%eax,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r10d,%r15d			# (f^g)&e
-	mov	%r12d,24(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%ebx,%r12d			# T1+=h
-
-	mov	%ecx,%ebx
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%ecx,%r13d
-	mov	%ecx,%r14d
-
-	ror	$2,%ebx
-	ror	$13,%r13d
-	mov	%ecx,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%ebx
-	ror	$9,%r13d
-	or	%r8d,%r14d			# a|c
-
-	xor	%r13d,%ebx			# h=Sigma0(a)
-	and	%r8d,%r15d			# a&c
-	add	%r12d,%r9d			# d+=T1
-
-	and	%edx,%r14d			# (a|c)&b
-	add	%r12d,%ebx			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%ebx			# h+=Maj(a,b,c)
-	mov	32(%rsp),%r13d
-	mov	20(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	0(%rsp),%r12d
-
-	add	28(%rsp),%r12d
-	mov	%r9d,%r13d
-	mov	%r9d,%r14d
-	mov	%r10d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r11d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r9d,%r15d			# (f^g)&e
-	mov	%r12d,28(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%eax,%r12d			# T1+=h
-
-	mov	%ebx,%eax
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%ebx,%r13d
-	mov	%ebx,%r14d
-
-	ror	$2,%eax
-	ror	$13,%r13d
-	mov	%ebx,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%eax
-	ror	$9,%r13d
-	or	%edx,%r14d			# a|c
-
-	xor	%r13d,%eax			# h=Sigma0(a)
-	and	%edx,%r15d			# a&c
-	add	%r12d,%r8d			# d+=T1
-
-	and	%ecx,%r14d			# (a|c)&b
-	add	%r12d,%eax			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%eax			# h+=Maj(a,b,c)
-	mov	36(%rsp),%r13d
-	mov	24(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	4(%rsp),%r12d
-
-	add	32(%rsp),%r12d
-	mov	%r8d,%r13d
-	mov	%r8d,%r14d
-	mov	%r9d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r10d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r8d,%r15d			# (f^g)&e
-	mov	%r12d,32(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r11d,%r12d			# T1+=h
-
-	mov	%eax,%r11d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%eax,%r13d
-	mov	%eax,%r14d
-
-	ror	$2,%r11d
-	ror	$13,%r13d
-	mov	%eax,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r11d
-	ror	$9,%r13d
-	or	%ecx,%r14d			# a|c
-
-	xor	%r13d,%r11d			# h=Sigma0(a)
-	and	%ecx,%r15d			# a&c
-	add	%r12d,%edx			# d+=T1
-
-	and	%ebx,%r14d			# (a|c)&b
-	add	%r12d,%r11d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r11d			# h+=Maj(a,b,c)
-	mov	40(%rsp),%r13d
-	mov	28(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	8(%rsp),%r12d
-
-	add	36(%rsp),%r12d
-	mov	%edx,%r13d
-	mov	%edx,%r14d
-	mov	%r8d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r9d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%edx,%r15d			# (f^g)&e
-	mov	%r12d,36(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r10d,%r12d			# T1+=h
-
-	mov	%r11d,%r10d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r11d,%r13d
-	mov	%r11d,%r14d
-
-	ror	$2,%r10d
-	ror	$13,%r13d
-	mov	%r11d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r10d
-	ror	$9,%r13d
-	or	%ebx,%r14d			# a|c
-
-	xor	%r13d,%r10d			# h=Sigma0(a)
-	and	%ebx,%r15d			# a&c
-	add	%r12d,%ecx			# d+=T1
-
-	and	%eax,%r14d			# (a|c)&b
-	add	%r12d,%r10d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r10d			# h+=Maj(a,b,c)
-	mov	44(%rsp),%r13d
-	mov	32(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	12(%rsp),%r12d
-
-	add	40(%rsp),%r12d
-	mov	%ecx,%r13d
-	mov	%ecx,%r14d
-	mov	%edx,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r8d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%ecx,%r15d			# (f^g)&e
-	mov	%r12d,40(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r9d,%r12d			# T1+=h
-
-	mov	%r10d,%r9d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r10d,%r13d
-	mov	%r10d,%r14d
-
-	ror	$2,%r9d
-	ror	$13,%r13d
-	mov	%r10d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r9d
-	ror	$9,%r13d
-	or	%eax,%r14d			# a|c
-
-	xor	%r13d,%r9d			# h=Sigma0(a)
-	and	%eax,%r15d			# a&c
-	add	%r12d,%ebx			# d+=T1
-
-	and	%r11d,%r14d			# (a|c)&b
-	add	%r12d,%r9d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r9d			# h+=Maj(a,b,c)
-	mov	48(%rsp),%r13d
-	mov	36(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	16(%rsp),%r12d
-
-	add	44(%rsp),%r12d
-	mov	%ebx,%r13d
-	mov	%ebx,%r14d
-	mov	%ecx,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%edx,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%ebx,%r15d			# (f^g)&e
-	mov	%r12d,44(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r8d,%r12d			# T1+=h
-
-	mov	%r9d,%r8d
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r9d,%r13d
-	mov	%r9d,%r14d
-
-	ror	$2,%r8d
-	ror	$13,%r13d
-	mov	%r9d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%r8d
-	ror	$9,%r13d
-	or	%r11d,%r14d			# a|c
-
-	xor	%r13d,%r8d			# h=Sigma0(a)
-	and	%r11d,%r15d			# a&c
-	add	%r12d,%eax			# d+=T1
-
-	and	%r10d,%r14d			# (a|c)&b
-	add	%r12d,%r8d			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%r8d			# h+=Maj(a,b,c)
-	mov	52(%rsp),%r13d
-	mov	40(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	20(%rsp),%r12d
-
-	add	48(%rsp),%r12d
-	mov	%eax,%r13d
-	mov	%eax,%r14d
-	mov	%ebx,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%ecx,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%eax,%r15d			# (f^g)&e
-	mov	%r12d,48(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%edx,%r12d			# T1+=h
-
-	mov	%r8d,%edx
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%r8d,%r13d
-	mov	%r8d,%r14d
-
-	ror	$2,%edx
-	ror	$13,%r13d
-	mov	%r8d,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%edx
-	ror	$9,%r13d
-	or	%r10d,%r14d			# a|c
-
-	xor	%r13d,%edx			# h=Sigma0(a)
-	and	%r10d,%r15d			# a&c
-	add	%r12d,%r11d			# d+=T1
-
-	and	%r9d,%r14d			# (a|c)&b
-	add	%r12d,%edx			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%edx			# h+=Maj(a,b,c)
-	mov	56(%rsp),%r13d
-	mov	44(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	24(%rsp),%r12d
-
-	add	52(%rsp),%r12d
-	mov	%r11d,%r13d
-	mov	%r11d,%r14d
-	mov	%eax,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%ebx,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r11d,%r15d			# (f^g)&e
-	mov	%r12d,52(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%ecx,%r12d			# T1+=h
-
-	mov	%edx,%ecx
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%edx,%r13d
-	mov	%edx,%r14d
-
-	ror	$2,%ecx
-	ror	$13,%r13d
-	mov	%edx,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%ecx
-	ror	$9,%r13d
-	or	%r9d,%r14d			# a|c
-
-	xor	%r13d,%ecx			# h=Sigma0(a)
-	and	%r9d,%r15d			# a&c
-	add	%r12d,%r10d			# d+=T1
-
-	and	%r8d,%r14d			# (a|c)&b
-	add	%r12d,%ecx			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%ecx			# h+=Maj(a,b,c)
-	mov	60(%rsp),%r13d
-	mov	48(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	28(%rsp),%r12d
-
-	add	56(%rsp),%r12d
-	mov	%r10d,%r13d
-	mov	%r10d,%r14d
-	mov	%r11d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%eax,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r10d,%r15d			# (f^g)&e
-	mov	%r12d,56(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%ebx,%r12d			# T1+=h
-
-	mov	%ecx,%ebx
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%ecx,%r13d
-	mov	%ecx,%r14d
-
-	ror	$2,%ebx
-	ror	$13,%r13d
-	mov	%ecx,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%ebx
-	ror	$9,%r13d
-	or	%r8d,%r14d			# a|c
-
-	xor	%r13d,%ebx			# h=Sigma0(a)
-	and	%r8d,%r15d			# a&c
-	add	%r12d,%r9d			# d+=T1
-
-	and	%edx,%r14d			# (a|c)&b
-	add	%r12d,%ebx			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%ebx			# h+=Maj(a,b,c)
-	mov	0(%rsp),%r13d
-	mov	52(%rsp),%r12d
-
-	mov	%r13d,%r15d
-
-	shr	$3,%r13d
-	ror	$7,%r15d
-
-	xor	%r15d,%r13d
-	ror	$11,%r15d
-
-	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
-	mov	%r12d,%r14d
-
-	shr	$10,%r12d
-	ror	$17,%r14d
-
-	xor	%r14d,%r12d
-	ror	$2,%r14d
-
-	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
-
-	add	%r13d,%r12d
-
-	add	32(%rsp),%r12d
-
-	add	60(%rsp),%r12d
-	mov	%r9d,%r13d
-	mov	%r9d,%r14d
-	mov	%r10d,%r15d
-
-	ror	$6,%r13d
-	ror	$11,%r14d
-	xor	%r11d,%r15d			# f^g
-
-	xor	%r14d,%r13d
-	ror	$14,%r14d
-	and	%r9d,%r15d			# (f^g)&e
-	mov	%r12d,60(%rsp)
-
-	xor	%r14d,%r13d			# Sigma1(e)
-	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
-	add	%eax,%r12d			# T1+=h
-
-	mov	%ebx,%eax
-	add	%r13d,%r12d			# T1+=Sigma1(e)
-
-	add	%r15d,%r12d			# T1+=Ch(e,f,g)
-	mov	%ebx,%r13d
-	mov	%ebx,%r14d
-
-	ror	$2,%eax
-	ror	$13,%r13d
-	mov	%ebx,%r15d
-	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
-
-	xor	%r13d,%eax
-	ror	$9,%r13d
-	or	%edx,%r14d			# a|c
-
-	xor	%r13d,%eax			# h=Sigma0(a)
-	and	%edx,%r15d			# a&c
-	add	%r12d,%r8d			# d+=T1
-
-	and	%ecx,%r14d			# (a|c)&b
-	add	%r12d,%eax			# h+=T1
-
-	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14d,%eax			# h+=Maj(a,b,c)
-	cmp	$64,%rdi
-	jb	.Lrounds_16_xx
-
-	mov	16*4+0*8(%rsp),%rdi
-	lea	16*4(%rsi),%rsi
-
-	add	4*0(%rdi),%eax
-	add	4*1(%rdi),%ebx
-	add	4*2(%rdi),%ecx
-	add	4*3(%rdi),%edx
-	add	4*4(%rdi),%r8d
-	add	4*5(%rdi),%r9d
-	add	4*6(%rdi),%r10d
-	add	4*7(%rdi),%r11d
-
-	cmp	16*4+2*8(%rsp),%rsi
-
-	mov	%eax,4*0(%rdi)
-	mov	%ebx,4*1(%rdi)
-	mov	%ecx,4*2(%rdi)
-	mov	%edx,4*3(%rdi)
-	mov	%r8d,4*4(%rdi)
-	mov	%r9d,4*5(%rdi)
-	mov	%r10d,4*6(%rdi)
-	mov	%r11d,4*7(%rdi)
-	jb	.Lloop
-
-	mov	16*4+3*8(%rsp),%rsp
-.cfi_def_cfa	%rsp,56
-	pop	%r15
-.cfi_adjust_cfa_offset -8
-.cfi_restore	%r15
-	pop	%r14
-.cfi_adjust_cfa_offset -8
-.cfi_restore	%r14
-	pop	%r13
-.cfi_adjust_cfa_offset -8
-.cfi_restore	%r13
-	pop	%r12
-.cfi_adjust_cfa_offset -8
-.cfi_restore	%r12
-	pop	%rbp
-.cfi_adjust_cfa_offset -8
-.cfi_restore	%rbp
-	pop	%rbx
-.cfi_adjust_cfa_offset -8
-.cfi_restore	%rbx
-
-	RET
-.cfi_endproc
-SET_SIZE(SHA256TransformBlocks)
-
-SECTION_STATIC
-.balign	64
-SET_OBJ(K256)
-K256:
-	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-#endif /* !lint && !__lint */
-
-#ifdef __ELF__
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/module/icp/asm-x86_64/sha2/sha512_impl.S b/module/icp/asm-x86_64/sha2/sha512_impl.S
deleted file mode 100644
index b2f7d4863d..0000000000
--- a/module/icp/asm-x86_64/sha2/sha512_impl.S
+++ /dev/null
@@ -1,2115 +0,0 @@
-/*
- * ====================================================================
- * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
- * project. Rights for redistribution and usage in source and binary
- * forms are granted according to the OpenSSL license.
- * ====================================================================
- *
- * sha256/512_block procedure for x86_64.
- *
- * 40% improvement over compiler-generated code on Opteron. On EM64T
- * sha256 was observed to run >80% faster and sha512 - >40%. No magical
- * tricks, just straight implementation... I really wonder why gcc
- * [being armed with inline assembler] fails to generate as fast code.
- * The only thing which is cool about this module is that it's very
- * same instruction sequence used for both SHA-256 and SHA-512. In
- * former case the instructions operate on 32-bit operands, while in
- * latter - on 64-bit ones. All I had to do is to get one flavor right,
- * the other one passed the test right away:-)
- *
- * sha256_block runs in ~1005 cycles on Opteron, which gives you
- * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
- * frequency in GHz. sha512_block runs in ~1275 cycles, which results
- * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
- * Well, if you compare it to IA-64 implementation, which maintains
- * X[16] in register bank[!], tends to 4 instructions per CPU clock
- * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
- * issue Opteron pipeline and X[16] maintained in memory. So that *if*
- * there is a way to improve it, *then* the only way would be to try to
- * offload X[16] updates to SSE unit, but that would require "deeper"
- * loop unroll, which in turn would naturally cause size blow-up, not
- * to mention increased complexity! And once again, only *if* it's
- * actually possible to noticeably improve overall ILP, instruction
- * level parallelism, on a given CPU implementation in this case.
- *
- * Special note on Intel EM64T. While Opteron CPU exhibits perfect
- * performance ratio of 1.5 between 64- and 32-bit flavors [see above],
- * [currently available] EM64T CPUs apparently are far from it. On the
- * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
- * sha256_block:-( This is presumably because 64-bit shifts/rotates
- * apparently are not atomic instructions, but implemented in microcode.
- */
-
-/*
- * OpenSolaris OS modifications
- *
- * Sun elects to use this software under the BSD license.
- *
- * This source originates from OpenSSL file sha512-x86_64.pl at
- * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
- * (presumably for future OpenSSL release 0.9.8h), with these changes:
- *
- * 1. Added perl "use strict" and declared variables.
- *
- * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
- * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
- *
- * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
- * assemblers).  Replaced the .picmeup macro with assembler code.
- *
- * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
- * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
- */
-
-/*
- * This file was generated by a perl script (sha512-x86_64.pl) that were
- * used to generate sha256 and sha512 variants from the same code base.
- * The comments from the original file have been pasted above.
- */
-
-
-#if defined(lint) || defined(__lint)
-#include <sys/stdint.h>
-#include <sha2/sha2.h>
-
-void
-SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
-{
-	(void) ctx, (void) in, (void) num;
-}
-
-
-#else
-#define _ASM
-#include <sys/asm_linkage.h>
-
-ENTRY_NP(SHA512TransformBlocks)
-.cfi_startproc
-	ENDBR
-	movq	%rsp, %rax
-.cfi_def_cfa_register %rax
-	push	%rbx
-.cfi_offset	%rbx,-16
-	push	%rbp
-.cfi_offset	%rbp,-24
-	push	%r12
-.cfi_offset	%r12,-32
-	push	%r13
-.cfi_offset	%r13,-40
-	push	%r14
-.cfi_offset	%r14,-48
-	push	%r15
-.cfi_offset	%r15,-56
-	mov	%rsp,%rbp		# copy %rsp
-	shl	$4,%rdx		# num*16
-	sub	$16*8+4*8,%rsp
-	lea	(%rsi,%rdx,8),%rdx	# inp+num*16*8
-	and	$-64,%rsp		# align stack frame
-	add	$8,%rdi		# Skip OpenSolaris field, "algotype"
-	mov	%rdi,16*8+0*8(%rsp)		# save ctx, 1st arg
-	mov	%rsi,16*8+1*8(%rsp)		# save inp, 2nd arg
-	mov	%rdx,16*8+2*8(%rsp)		# save end pointer, "3rd" arg
-	mov	%rbp,16*8+3*8(%rsp)		# save copy of %rsp
-# echo ".cfi_cfa_expression %rsp+152,deref,+56" |
-#	openssl/crypto/perlasm/x86_64-xlate.pl
-.cfi_escape	0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x38
-
-	#.picmeup %rbp
-	# The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
-	# the address of the "next" instruction into the target register
-	# (%rbp).  This generates these 2 instructions:
-	lea	.Llea(%rip),%rbp
-	#nop	# .picmeup generates a nop for mod 8 alignment--not needed here
-
-.Llea:
-	lea	K512-.(%rbp),%rbp
-
-	mov	8*0(%rdi),%rax
-	mov	8*1(%rdi),%rbx
-	mov	8*2(%rdi),%rcx
-	mov	8*3(%rdi),%rdx
-	mov	8*4(%rdi),%r8
-	mov	8*5(%rdi),%r9
-	mov	8*6(%rdi),%r10
-	mov	8*7(%rdi),%r11
-	jmp	.Lloop
-
-.balign	16
-.Lloop:
-	xor	%rdi,%rdi
-	mov	8*0(%rsi),%r12
-	bswap	%r12
-	mov	%r8,%r13
-	mov	%r8,%r14
-	mov	%r9,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r10,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r8,%r15			# (f^g)&e
-	mov	%r12,0(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r11,%r12			# T1+=h
-
-	mov	%rax,%r11
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rax,%r13
-	mov	%rax,%r14
-
-	ror	$28,%r11
-	ror	$34,%r13
-	mov	%rax,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r11
-	ror	$5,%r13
-	or	%rcx,%r14			# a|c
-
-	xor	%r13,%r11			# h=Sigma0(a)
-	and	%rcx,%r15			# a&c
-	add	%r12,%rdx			# d+=T1
-
-	and	%rbx,%r14			# (a|c)&b
-	add	%r12,%r11			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r11			# h+=Maj(a,b,c)
-	mov	8*1(%rsi),%r12
-	bswap	%r12
-	mov	%rdx,%r13
-	mov	%rdx,%r14
-	mov	%r8,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r9,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rdx,%r15			# (f^g)&e
-	mov	%r12,8(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r10,%r12			# T1+=h
-
-	mov	%r11,%r10
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r11,%r13
-	mov	%r11,%r14
-
-	ror	$28,%r10
-	ror	$34,%r13
-	mov	%r11,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r10
-	ror	$5,%r13
-	or	%rbx,%r14			# a|c
-
-	xor	%r13,%r10			# h=Sigma0(a)
-	and	%rbx,%r15			# a&c
-	add	%r12,%rcx			# d+=T1
-
-	and	%rax,%r14			# (a|c)&b
-	add	%r12,%r10			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r10			# h+=Maj(a,b,c)
-	mov	8*2(%rsi),%r12
-	bswap	%r12
-	mov	%rcx,%r13
-	mov	%rcx,%r14
-	mov	%rdx,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r8,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rcx,%r15			# (f^g)&e
-	mov	%r12,16(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r9,%r12			# T1+=h
-
-	mov	%r10,%r9
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r10,%r13
-	mov	%r10,%r14
-
-	ror	$28,%r9
-	ror	$34,%r13
-	mov	%r10,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r9
-	ror	$5,%r13
-	or	%rax,%r14			# a|c
-
-	xor	%r13,%r9			# h=Sigma0(a)
-	and	%rax,%r15			# a&c
-	add	%r12,%rbx			# d+=T1
-
-	and	%r11,%r14			# (a|c)&b
-	add	%r12,%r9			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r9			# h+=Maj(a,b,c)
-	mov	8*3(%rsi),%r12
-	bswap	%r12
-	mov	%rbx,%r13
-	mov	%rbx,%r14
-	mov	%rcx,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rdx,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rbx,%r15			# (f^g)&e
-	mov	%r12,24(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r8,%r12			# T1+=h
-
-	mov	%r9,%r8
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r9,%r13
-	mov	%r9,%r14
-
-	ror	$28,%r8
-	ror	$34,%r13
-	mov	%r9,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r8
-	ror	$5,%r13
-	or	%r11,%r14			# a|c
-
-	xor	%r13,%r8			# h=Sigma0(a)
-	and	%r11,%r15			# a&c
-	add	%r12,%rax			# d+=T1
-
-	and	%r10,%r14			# (a|c)&b
-	add	%r12,%r8			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r8			# h+=Maj(a,b,c)
-	mov	8*4(%rsi),%r12
-	bswap	%r12
-	mov	%rax,%r13
-	mov	%rax,%r14
-	mov	%rbx,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rcx,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rax,%r15			# (f^g)&e
-	mov	%r12,32(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rdx,%r12			# T1+=h
-
-	mov	%r8,%rdx
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r8,%r13
-	mov	%r8,%r14
-
-	ror	$28,%rdx
-	ror	$34,%r13
-	mov	%r8,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rdx
-	ror	$5,%r13
-	or	%r10,%r14			# a|c
-
-	xor	%r13,%rdx			# h=Sigma0(a)
-	and	%r10,%r15			# a&c
-	add	%r12,%r11			# d+=T1
-
-	and	%r9,%r14			# (a|c)&b
-	add	%r12,%rdx			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rdx			# h+=Maj(a,b,c)
-	mov	8*5(%rsi),%r12
-	bswap	%r12
-	mov	%r11,%r13
-	mov	%r11,%r14
-	mov	%rax,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rbx,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r11,%r15			# (f^g)&e
-	mov	%r12,40(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rcx,%r12			# T1+=h
-
-	mov	%rdx,%rcx
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rdx,%r13
-	mov	%rdx,%r14
-
-	ror	$28,%rcx
-	ror	$34,%r13
-	mov	%rdx,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rcx
-	ror	$5,%r13
-	or	%r9,%r14			# a|c
-
-	xor	%r13,%rcx			# h=Sigma0(a)
-	and	%r9,%r15			# a&c
-	add	%r12,%r10			# d+=T1
-
-	and	%r8,%r14			# (a|c)&b
-	add	%r12,%rcx			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rcx			# h+=Maj(a,b,c)
-	mov	8*6(%rsi),%r12
-	bswap	%r12
-	mov	%r10,%r13
-	mov	%r10,%r14
-	mov	%r11,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rax,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r10,%r15			# (f^g)&e
-	mov	%r12,48(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rbx,%r12			# T1+=h
-
-	mov	%rcx,%rbx
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rcx,%r13
-	mov	%rcx,%r14
-
-	ror	$28,%rbx
-	ror	$34,%r13
-	mov	%rcx,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rbx
-	ror	$5,%r13
-	or	%r8,%r14			# a|c
-
-	xor	%r13,%rbx			# h=Sigma0(a)
-	and	%r8,%r15			# a&c
-	add	%r12,%r9			# d+=T1
-
-	and	%rdx,%r14			# (a|c)&b
-	add	%r12,%rbx			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rbx			# h+=Maj(a,b,c)
-	mov	8*7(%rsi),%r12
-	bswap	%r12
-	mov	%r9,%r13
-	mov	%r9,%r14
-	mov	%r10,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r11,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r9,%r15			# (f^g)&e
-	mov	%r12,56(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rax,%r12			# T1+=h
-
-	mov	%rbx,%rax
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rbx,%r13
-	mov	%rbx,%r14
-
-	ror	$28,%rax
-	ror	$34,%r13
-	mov	%rbx,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rax
-	ror	$5,%r13
-	or	%rdx,%r14			# a|c
-
-	xor	%r13,%rax			# h=Sigma0(a)
-	and	%rdx,%r15			# a&c
-	add	%r12,%r8			# d+=T1
-
-	and	%rcx,%r14			# (a|c)&b
-	add	%r12,%rax			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rax			# h+=Maj(a,b,c)
-	mov	8*8(%rsi),%r12
-	bswap	%r12
-	mov	%r8,%r13
-	mov	%r8,%r14
-	mov	%r9,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r10,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r8,%r15			# (f^g)&e
-	mov	%r12,64(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r11,%r12			# T1+=h
-
-	mov	%rax,%r11
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rax,%r13
-	mov	%rax,%r14
-
-	ror	$28,%r11
-	ror	$34,%r13
-	mov	%rax,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r11
-	ror	$5,%r13
-	or	%rcx,%r14			# a|c
-
-	xor	%r13,%r11			# h=Sigma0(a)
-	and	%rcx,%r15			# a&c
-	add	%r12,%rdx			# d+=T1
-
-	and	%rbx,%r14			# (a|c)&b
-	add	%r12,%r11			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r11			# h+=Maj(a,b,c)
-	mov	8*9(%rsi),%r12
-	bswap	%r12
-	mov	%rdx,%r13
-	mov	%rdx,%r14
-	mov	%r8,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r9,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rdx,%r15			# (f^g)&e
-	mov	%r12,72(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r10,%r12			# T1+=h
-
-	mov	%r11,%r10
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r11,%r13
-	mov	%r11,%r14
-
-	ror	$28,%r10
-	ror	$34,%r13
-	mov	%r11,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r10
-	ror	$5,%r13
-	or	%rbx,%r14			# a|c
-
-	xor	%r13,%r10			# h=Sigma0(a)
-	and	%rbx,%r15			# a&c
-	add	%r12,%rcx			# d+=T1
-
-	and	%rax,%r14			# (a|c)&b
-	add	%r12,%r10			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r10			# h+=Maj(a,b,c)
-	mov	8*10(%rsi),%r12
-	bswap	%r12
-	mov	%rcx,%r13
-	mov	%rcx,%r14
-	mov	%rdx,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r8,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rcx,%r15			# (f^g)&e
-	mov	%r12,80(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r9,%r12			# T1+=h
-
-	mov	%r10,%r9
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r10,%r13
-	mov	%r10,%r14
-
-	ror	$28,%r9
-	ror	$34,%r13
-	mov	%r10,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r9
-	ror	$5,%r13
-	or	%rax,%r14			# a|c
-
-	xor	%r13,%r9			# h=Sigma0(a)
-	and	%rax,%r15			# a&c
-	add	%r12,%rbx			# d+=T1
-
-	and	%r11,%r14			# (a|c)&b
-	add	%r12,%r9			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r9			# h+=Maj(a,b,c)
-	mov	8*11(%rsi),%r12
-	bswap	%r12
-	mov	%rbx,%r13
-	mov	%rbx,%r14
-	mov	%rcx,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rdx,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rbx,%r15			# (f^g)&e
-	mov	%r12,88(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r8,%r12			# T1+=h
-
-	mov	%r9,%r8
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r9,%r13
-	mov	%r9,%r14
-
-	ror	$28,%r8
-	ror	$34,%r13
-	mov	%r9,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r8
-	ror	$5,%r13
-	or	%r11,%r14			# a|c
-
-	xor	%r13,%r8			# h=Sigma0(a)
-	and	%r11,%r15			# a&c
-	add	%r12,%rax			# d+=T1
-
-	and	%r10,%r14			# (a|c)&b
-	add	%r12,%r8			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r8			# h+=Maj(a,b,c)
-	mov	8*12(%rsi),%r12
-	bswap	%r12
-	mov	%rax,%r13
-	mov	%rax,%r14
-	mov	%rbx,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rcx,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rax,%r15			# (f^g)&e
-	mov	%r12,96(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rdx,%r12			# T1+=h
-
-	mov	%r8,%rdx
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r8,%r13
-	mov	%r8,%r14
-
-	ror	$28,%rdx
-	ror	$34,%r13
-	mov	%r8,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rdx
-	ror	$5,%r13
-	or	%r10,%r14			# a|c
-
-	xor	%r13,%rdx			# h=Sigma0(a)
-	and	%r10,%r15			# a&c
-	add	%r12,%r11			# d+=T1
-
-	and	%r9,%r14			# (a|c)&b
-	add	%r12,%rdx			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rdx			# h+=Maj(a,b,c)
-	mov	8*13(%rsi),%r12
-	bswap	%r12
-	mov	%r11,%r13
-	mov	%r11,%r14
-	mov	%rax,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rbx,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r11,%r15			# (f^g)&e
-	mov	%r12,104(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rcx,%r12			# T1+=h
-
-	mov	%rdx,%rcx
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rdx,%r13
-	mov	%rdx,%r14
-
-	ror	$28,%rcx
-	ror	$34,%r13
-	mov	%rdx,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rcx
-	ror	$5,%r13
-	or	%r9,%r14			# a|c
-
-	xor	%r13,%rcx			# h=Sigma0(a)
-	and	%r9,%r15			# a&c
-	add	%r12,%r10			# d+=T1
-
-	and	%r8,%r14			# (a|c)&b
-	add	%r12,%rcx			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rcx			# h+=Maj(a,b,c)
-	mov	8*14(%rsi),%r12
-	bswap	%r12
-	mov	%r10,%r13
-	mov	%r10,%r14
-	mov	%r11,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rax,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r10,%r15			# (f^g)&e
-	mov	%r12,112(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rbx,%r12			# T1+=h
-
-	mov	%rcx,%rbx
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rcx,%r13
-	mov	%rcx,%r14
-
-	ror	$28,%rbx
-	ror	$34,%r13
-	mov	%rcx,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rbx
-	ror	$5,%r13
-	or	%r8,%r14			# a|c
-
-	xor	%r13,%rbx			# h=Sigma0(a)
-	and	%r8,%r15			# a&c
-	add	%r12,%r9			# d+=T1
-
-	and	%rdx,%r14			# (a|c)&b
-	add	%r12,%rbx			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rbx			# h+=Maj(a,b,c)
-	mov	8*15(%rsi),%r12
-	bswap	%r12
-	mov	%r9,%r13
-	mov	%r9,%r14
-	mov	%r10,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r11,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r9,%r15			# (f^g)&e
-	mov	%r12,120(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rax,%r12			# T1+=h
-
-	mov	%rbx,%rax
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rbx,%r13
-	mov	%rbx,%r14
-
-	ror	$28,%rax
-	ror	$34,%r13
-	mov	%rbx,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rax
-	ror	$5,%r13
-	or	%rdx,%r14			# a|c
-
-	xor	%r13,%rax			# h=Sigma0(a)
-	and	%rdx,%r15			# a&c
-	add	%r12,%r8			# d+=T1
-
-	and	%rcx,%r14			# (a|c)&b
-	add	%r12,%rax			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rax			# h+=Maj(a,b,c)
-	jmp	.Lrounds_16_xx
-.balign	16
-.Lrounds_16_xx:
-	mov	8(%rsp),%r13
-	mov	112(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	72(%rsp),%r12
-
-	add	0(%rsp),%r12
-	mov	%r8,%r13
-	mov	%r8,%r14
-	mov	%r9,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r10,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r8,%r15			# (f^g)&e
-	mov	%r12,0(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r11,%r12			# T1+=h
-
-	mov	%rax,%r11
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rax,%r13
-	mov	%rax,%r14
-
-	ror	$28,%r11
-	ror	$34,%r13
-	mov	%rax,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r11
-	ror	$5,%r13
-	or	%rcx,%r14			# a|c
-
-	xor	%r13,%r11			# h=Sigma0(a)
-	and	%rcx,%r15			# a&c
-	add	%r12,%rdx			# d+=T1
-
-	and	%rbx,%r14			# (a|c)&b
-	add	%r12,%r11			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r11			# h+=Maj(a,b,c)
-	mov	16(%rsp),%r13
-	mov	120(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	80(%rsp),%r12
-
-	add	8(%rsp),%r12
-	mov	%rdx,%r13
-	mov	%rdx,%r14
-	mov	%r8,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r9,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rdx,%r15			# (f^g)&e
-	mov	%r12,8(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r10,%r12			# T1+=h
-
-	mov	%r11,%r10
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r11,%r13
-	mov	%r11,%r14
-
-	ror	$28,%r10
-	ror	$34,%r13
-	mov	%r11,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r10
-	ror	$5,%r13
-	or	%rbx,%r14			# a|c
-
-	xor	%r13,%r10			# h=Sigma0(a)
-	and	%rbx,%r15			# a&c
-	add	%r12,%rcx			# d+=T1
-
-	and	%rax,%r14			# (a|c)&b
-	add	%r12,%r10			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r10			# h+=Maj(a,b,c)
-	mov	24(%rsp),%r13
-	mov	0(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	88(%rsp),%r12
-
-	add	16(%rsp),%r12
-	mov	%rcx,%r13
-	mov	%rcx,%r14
-	mov	%rdx,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r8,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rcx,%r15			# (f^g)&e
-	mov	%r12,16(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r9,%r12			# T1+=h
-
-	mov	%r10,%r9
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r10,%r13
-	mov	%r10,%r14
-
-	ror	$28,%r9
-	ror	$34,%r13
-	mov	%r10,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r9
-	ror	$5,%r13
-	or	%rax,%r14			# a|c
-
-	xor	%r13,%r9			# h=Sigma0(a)
-	and	%rax,%r15			# a&c
-	add	%r12,%rbx			# d+=T1
-
-	and	%r11,%r14			# (a|c)&b
-	add	%r12,%r9			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r9			# h+=Maj(a,b,c)
-	mov	32(%rsp),%r13
-	mov	8(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	96(%rsp),%r12
-
-	add	24(%rsp),%r12
-	mov	%rbx,%r13
-	mov	%rbx,%r14
-	mov	%rcx,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rdx,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rbx,%r15			# (f^g)&e
-	mov	%r12,24(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r8,%r12			# T1+=h
-
-	mov	%r9,%r8
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r9,%r13
-	mov	%r9,%r14
-
-	ror	$28,%r8
-	ror	$34,%r13
-	mov	%r9,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r8
-	ror	$5,%r13
-	or	%r11,%r14			# a|c
-
-	xor	%r13,%r8			# h=Sigma0(a)
-	and	%r11,%r15			# a&c
-	add	%r12,%rax			# d+=T1
-
-	and	%r10,%r14			# (a|c)&b
-	add	%r12,%r8			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r8			# h+=Maj(a,b,c)
-	mov	40(%rsp),%r13
-	mov	16(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	104(%rsp),%r12
-
-	add	32(%rsp),%r12
-	mov	%rax,%r13
-	mov	%rax,%r14
-	mov	%rbx,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rcx,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rax,%r15			# (f^g)&e
-	mov	%r12,32(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rdx,%r12			# T1+=h
-
-	mov	%r8,%rdx
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r8,%r13
-	mov	%r8,%r14
-
-	ror	$28,%rdx
-	ror	$34,%r13
-	mov	%r8,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rdx
-	ror	$5,%r13
-	or	%r10,%r14			# a|c
-
-	xor	%r13,%rdx			# h=Sigma0(a)
-	and	%r10,%r15			# a&c
-	add	%r12,%r11			# d+=T1
-
-	and	%r9,%r14			# (a|c)&b
-	add	%r12,%rdx			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rdx			# h+=Maj(a,b,c)
-	mov	48(%rsp),%r13
-	mov	24(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	112(%rsp),%r12
-
-	add	40(%rsp),%r12
-	mov	%r11,%r13
-	mov	%r11,%r14
-	mov	%rax,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rbx,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r11,%r15			# (f^g)&e
-	mov	%r12,40(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rcx,%r12			# T1+=h
-
-	mov	%rdx,%rcx
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rdx,%r13
-	mov	%rdx,%r14
-
-	ror	$28,%rcx
-	ror	$34,%r13
-	mov	%rdx,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rcx
-	ror	$5,%r13
-	or	%r9,%r14			# a|c
-
-	xor	%r13,%rcx			# h=Sigma0(a)
-	and	%r9,%r15			# a&c
-	add	%r12,%r10			# d+=T1
-
-	and	%r8,%r14			# (a|c)&b
-	add	%r12,%rcx			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rcx			# h+=Maj(a,b,c)
-	mov	56(%rsp),%r13
-	mov	32(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	120(%rsp),%r12
-
-	add	48(%rsp),%r12
-	mov	%r10,%r13
-	mov	%r10,%r14
-	mov	%r11,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rax,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r10,%r15			# (f^g)&e
-	mov	%r12,48(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rbx,%r12			# T1+=h
-
-	mov	%rcx,%rbx
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rcx,%r13
-	mov	%rcx,%r14
-
-	ror	$28,%rbx
-	ror	$34,%r13
-	mov	%rcx,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rbx
-	ror	$5,%r13
-	or	%r8,%r14			# a|c
-
-	xor	%r13,%rbx			# h=Sigma0(a)
-	and	%r8,%r15			# a&c
-	add	%r12,%r9			# d+=T1
-
-	and	%rdx,%r14			# (a|c)&b
-	add	%r12,%rbx			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rbx			# h+=Maj(a,b,c)
-	mov	64(%rsp),%r13
-	mov	40(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	0(%rsp),%r12
-
-	add	56(%rsp),%r12
-	mov	%r9,%r13
-	mov	%r9,%r14
-	mov	%r10,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r11,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r9,%r15			# (f^g)&e
-	mov	%r12,56(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rax,%r12			# T1+=h
-
-	mov	%rbx,%rax
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rbx,%r13
-	mov	%rbx,%r14
-
-	ror	$28,%rax
-	ror	$34,%r13
-	mov	%rbx,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rax
-	ror	$5,%r13
-	or	%rdx,%r14			# a|c
-
-	xor	%r13,%rax			# h=Sigma0(a)
-	and	%rdx,%r15			# a&c
-	add	%r12,%r8			# d+=T1
-
-	and	%rcx,%r14			# (a|c)&b
-	add	%r12,%rax			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rax			# h+=Maj(a,b,c)
-	mov	72(%rsp),%r13
-	mov	48(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	8(%rsp),%r12
-
-	add	64(%rsp),%r12
-	mov	%r8,%r13
-	mov	%r8,%r14
-	mov	%r9,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r10,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r8,%r15			# (f^g)&e
-	mov	%r12,64(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r11,%r12			# T1+=h
-
-	mov	%rax,%r11
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rax,%r13
-	mov	%rax,%r14
-
-	ror	$28,%r11
-	ror	$34,%r13
-	mov	%rax,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r11
-	ror	$5,%r13
-	or	%rcx,%r14			# a|c
-
-	xor	%r13,%r11			# h=Sigma0(a)
-	and	%rcx,%r15			# a&c
-	add	%r12,%rdx			# d+=T1
-
-	and	%rbx,%r14			# (a|c)&b
-	add	%r12,%r11			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r11			# h+=Maj(a,b,c)
-	mov	80(%rsp),%r13
-	mov	56(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	16(%rsp),%r12
-
-	add	72(%rsp),%r12
-	mov	%rdx,%r13
-	mov	%rdx,%r14
-	mov	%r8,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r9,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rdx,%r15			# (f^g)&e
-	mov	%r12,72(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r10,%r12			# T1+=h
-
-	mov	%r11,%r10
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r11,%r13
-	mov	%r11,%r14
-
-	ror	$28,%r10
-	ror	$34,%r13
-	mov	%r11,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r10
-	ror	$5,%r13
-	or	%rbx,%r14			# a|c
-
-	xor	%r13,%r10			# h=Sigma0(a)
-	and	%rbx,%r15			# a&c
-	add	%r12,%rcx			# d+=T1
-
-	and	%rax,%r14			# (a|c)&b
-	add	%r12,%r10			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r10			# h+=Maj(a,b,c)
-	mov	88(%rsp),%r13
-	mov	64(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	24(%rsp),%r12
-
-	add	80(%rsp),%r12
-	mov	%rcx,%r13
-	mov	%rcx,%r14
-	mov	%rdx,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r8,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rcx,%r15			# (f^g)&e
-	mov	%r12,80(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r9,%r12			# T1+=h
-
-	mov	%r10,%r9
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r10,%r13
-	mov	%r10,%r14
-
-	ror	$28,%r9
-	ror	$34,%r13
-	mov	%r10,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r9
-	ror	$5,%r13
-	or	%rax,%r14			# a|c
-
-	xor	%r13,%r9			# h=Sigma0(a)
-	and	%rax,%r15			# a&c
-	add	%r12,%rbx			# d+=T1
-
-	and	%r11,%r14			# (a|c)&b
-	add	%r12,%r9			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r9			# h+=Maj(a,b,c)
-	mov	96(%rsp),%r13
-	mov	72(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	32(%rsp),%r12
-
-	add	88(%rsp),%r12
-	mov	%rbx,%r13
-	mov	%rbx,%r14
-	mov	%rcx,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rdx,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rbx,%r15			# (f^g)&e
-	mov	%r12,88(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%r8,%r12			# T1+=h
-
-	mov	%r9,%r8
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r9,%r13
-	mov	%r9,%r14
-
-	ror	$28,%r8
-	ror	$34,%r13
-	mov	%r9,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%r8
-	ror	$5,%r13
-	or	%r11,%r14			# a|c
-
-	xor	%r13,%r8			# h=Sigma0(a)
-	and	%r11,%r15			# a&c
-	add	%r12,%rax			# d+=T1
-
-	and	%r10,%r14			# (a|c)&b
-	add	%r12,%r8			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%r8			# h+=Maj(a,b,c)
-	mov	104(%rsp),%r13
-	mov	80(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	40(%rsp),%r12
-
-	add	96(%rsp),%r12
-	mov	%rax,%r13
-	mov	%rax,%r14
-	mov	%rbx,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rcx,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%rax,%r15			# (f^g)&e
-	mov	%r12,96(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rdx,%r12			# T1+=h
-
-	mov	%r8,%rdx
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%r8,%r13
-	mov	%r8,%r14
-
-	ror	$28,%rdx
-	ror	$34,%r13
-	mov	%r8,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rdx
-	ror	$5,%r13
-	or	%r10,%r14			# a|c
-
-	xor	%r13,%rdx			# h=Sigma0(a)
-	and	%r10,%r15			# a&c
-	add	%r12,%r11			# d+=T1
-
-	and	%r9,%r14			# (a|c)&b
-	add	%r12,%rdx			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rdx			# h+=Maj(a,b,c)
-	mov	112(%rsp),%r13
-	mov	88(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	48(%rsp),%r12
-
-	add	104(%rsp),%r12
-	mov	%r11,%r13
-	mov	%r11,%r14
-	mov	%rax,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rbx,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r11,%r15			# (f^g)&e
-	mov	%r12,104(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rcx,%r12			# T1+=h
-
-	mov	%rdx,%rcx
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rdx,%r13
-	mov	%rdx,%r14
-
-	ror	$28,%rcx
-	ror	$34,%r13
-	mov	%rdx,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rcx
-	ror	$5,%r13
-	or	%r9,%r14			# a|c
-
-	xor	%r13,%rcx			# h=Sigma0(a)
-	and	%r9,%r15			# a&c
-	add	%r12,%r10			# d+=T1
-
-	and	%r8,%r14			# (a|c)&b
-	add	%r12,%rcx			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rcx			# h+=Maj(a,b,c)
-	mov	120(%rsp),%r13
-	mov	96(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	56(%rsp),%r12
-
-	add	112(%rsp),%r12
-	mov	%r10,%r13
-	mov	%r10,%r14
-	mov	%r11,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%rax,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r10,%r15			# (f^g)&e
-	mov	%r12,112(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rbx,%r12			# T1+=h
-
-	mov	%rcx,%rbx
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rcx,%r13
-	mov	%rcx,%r14
-
-	ror	$28,%rbx
-	ror	$34,%r13
-	mov	%rcx,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rbx
-	ror	$5,%r13
-	or	%r8,%r14			# a|c
-
-	xor	%r13,%rbx			# h=Sigma0(a)
-	and	%r8,%r15			# a&c
-	add	%r12,%r9			# d+=T1
-
-	and	%rdx,%r14			# (a|c)&b
-	add	%r12,%rbx			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rbx			# h+=Maj(a,b,c)
-	mov	0(%rsp),%r13
-	mov	104(%rsp),%r12
-
-	mov	%r13,%r15
-
-	shr	$7,%r13
-	ror	$1,%r15
-
-	xor	%r15,%r13
-	ror	$7,%r15
-
-	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
-	mov	%r12,%r14
-
-	shr	$6,%r12
-	ror	$19,%r14
-
-	xor	%r14,%r12
-	ror	$42,%r14
-
-	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
-
-	add	%r13,%r12
-
-	add	64(%rsp),%r12
-
-	add	120(%rsp),%r12
-	mov	%r9,%r13
-	mov	%r9,%r14
-	mov	%r10,%r15
-
-	ror	$14,%r13
-	ror	$18,%r14
-	xor	%r11,%r15			# f^g
-
-	xor	%r14,%r13
-	ror	$23,%r14
-	and	%r9,%r15			# (f^g)&e
-	mov	%r12,120(%rsp)
-
-	xor	%r14,%r13			# Sigma1(e)
-	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
-	add	%rax,%r12			# T1+=h
-
-	mov	%rbx,%rax
-	add	%r13,%r12			# T1+=Sigma1(e)
-
-	add	%r15,%r12			# T1+=Ch(e,f,g)
-	mov	%rbx,%r13
-	mov	%rbx,%r14
-
-	ror	$28,%rax
-	ror	$34,%r13
-	mov	%rbx,%r15
-	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
-
-	xor	%r13,%rax
-	ror	$5,%r13
-	or	%rdx,%r14			# a|c
-
-	xor	%r13,%rax			# h=Sigma0(a)
-	and	%rdx,%r15			# a&c
-	add	%r12,%r8			# d+=T1
-
-	and	%rcx,%r14			# (a|c)&b
-	add	%r12,%rax			# h+=T1
-
-	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
-	lea	1(%rdi),%rdi	# round++
-
-	add	%r14,%rax			# h+=Maj(a,b,c)
-	cmp	$80,%rdi
-	jb	.Lrounds_16_xx
-
-	mov	16*8+0*8(%rsp),%rdi
-	lea	16*8(%rsi),%rsi
-
-	add	8*0(%rdi),%rax
-	add	8*1(%rdi),%rbx
-	add	8*2(%rdi),%rcx
-	add	8*3(%rdi),%rdx
-	add	8*4(%rdi),%r8
-	add	8*5(%rdi),%r9
-	add	8*6(%rdi),%r10
-	add	8*7(%rdi),%r11
-
-	cmp	16*8+2*8(%rsp),%rsi
-
-	mov	%rax,8*0(%rdi)
-	mov	%rbx,8*1(%rdi)
-	mov	%rcx,8*2(%rdi)
-	mov	%rdx,8*3(%rdi)
-	mov	%r8,8*4(%rdi)
-	mov	%r9,8*5(%rdi)
-	mov	%r10,8*6(%rdi)
-	mov	%r11,8*7(%rdi)
-	jb	.Lloop
-
-	mov	16*8+3*8(%rsp),%rsp
-.cfi_def_cfa	%rsp,56
-	pop	%r15
-.cfi_adjust_cfa_offset -8
-.cfi_restore	%r15
-	pop	%r14
-.cfi_adjust_cfa_offset -8
-.cfi_restore	%r14
-	pop	%r13
-.cfi_adjust_cfa_offset -8
-.cfi_restore	%r13
-	pop	%r12
-.cfi_adjust_cfa_offset -8
-.cfi_restore	%r12
-	pop	%rbp
-.cfi_adjust_cfa_offset -8
-.cfi_restore	%rbp
-	pop	%rbx
-.cfi_adjust_cfa_offset -8
-.cfi_restore	%rbx
-
-	RET
-.cfi_endproc
-SET_SIZE(SHA512TransformBlocks)
-
-SECTION_STATIC
-.balign	64
-SET_OBJ(K512)
-K512:
-	.quad	0x428a2f98d728ae22,0x7137449123ef65cd
-	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
-	.quad	0x3956c25bf348b538,0x59f111f1b605d019
-	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
-	.quad	0xd807aa98a3030242,0x12835b0145706fbe
-	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
-	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
-	.quad	0x9bdc06a725c71235,0xc19bf174cf692694
-	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
-	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
-	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
-	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
-	.quad	0x983e5152ee66dfab,0xa831c66d2db43210
-	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
-	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
-	.quad	0x06ca6351e003826f,0x142929670a0e6e70
-	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
-	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
-	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
-	.quad	0x81c2c92e47edaee6,0x92722c851482353b
-	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
-	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
-	.quad	0xd192e819d6ef5218,0xd69906245565a910
-	.quad	0xf40e35855771202a,0x106aa07032bbd1b8
-	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
-	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
-	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
-	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
-	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
-	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
-	.quad	0x90befffa23631e28,0xa4506cebde82bde9
-	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
-	.quad	0xca273eceea26619c,0xd186b8c721c0c207
-	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
-	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
-	.quad	0x113f9804bef90dae,0x1b710b35131c471b
-	.quad	0x28db77f523047d84,0x32caab7b40c72493
-	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
-	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
-	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
-#endif /* !lint && !__lint */
-
-#if defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/module/icp/include/sha2/sha2_consts.h b/module/icp/include/sha2/sha2_consts.h
deleted file mode 100644
index b33ddf8218..0000000000
--- a/module/icp/include/sha2/sha2_consts.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef	_SYS_SHA2_CONSTS_H
-#define	_SYS_SHA2_CONSTS_H
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-/*
- * Loading 32-bit constants on a sparc is expensive since it involves both
- * a `sethi' and an `or'.  thus, we instead use `ld' to load the constants
- * from an array called `sha2_consts'.  however, on intel (and perhaps other
- * processors), it is cheaper to load the constant directly.  thus, the c
- * code in SHA transform functions uses the macro SHA2_CONST() which either
- * expands to a constant or an array reference, depending on
- * the architecture the code is being compiled for.
- *
- * SHA512 constants are used for SHA384
- */
-
-#include <sys/types.h>		/* uint32_t */
-
-extern	const uint32_t	sha256_consts[];
-extern	const uint64_t	sha512_consts[];
-
-#if	defined(__sparc)
-#define	SHA256_CONST(x)		(sha256_consts[x])
-#define	SHA512_CONST(x)		(sha512_consts[x])
-#else
-#define	SHA256_CONST(x)		(SHA256_CONST_ ## x)
-#define	SHA512_CONST(x)		(SHA512_CONST_ ## x)
-#endif
-
-/* constants, as provided in FIPS 180-2 */
-
-#define	SHA256_CONST_0		0x428a2f98U
-#define	SHA256_CONST_1		0x71374491U
-#define	SHA256_CONST_2		0xb5c0fbcfU
-#define	SHA256_CONST_3		0xe9b5dba5U
-#define	SHA256_CONST_4		0x3956c25bU
-#define	SHA256_CONST_5		0x59f111f1U
-#define	SHA256_CONST_6		0x923f82a4U
-#define	SHA256_CONST_7		0xab1c5ed5U
-
-#define	SHA256_CONST_8		0xd807aa98U
-#define	SHA256_CONST_9		0x12835b01U
-#define	SHA256_CONST_10		0x243185beU
-#define	SHA256_CONST_11		0x550c7dc3U
-#define	SHA256_CONST_12		0x72be5d74U
-#define	SHA256_CONST_13		0x80deb1feU
-#define	SHA256_CONST_14		0x9bdc06a7U
-#define	SHA256_CONST_15		0xc19bf174U
-
-#define	SHA256_CONST_16		0xe49b69c1U
-#define	SHA256_CONST_17		0xefbe4786U
-#define	SHA256_CONST_18		0x0fc19dc6U
-#define	SHA256_CONST_19		0x240ca1ccU
-#define	SHA256_CONST_20		0x2de92c6fU
-#define	SHA256_CONST_21		0x4a7484aaU
-#define	SHA256_CONST_22		0x5cb0a9dcU
-#define	SHA256_CONST_23		0x76f988daU
-
-#define	SHA256_CONST_24		0x983e5152U
-#define	SHA256_CONST_25		0xa831c66dU
-#define	SHA256_CONST_26		0xb00327c8U
-#define	SHA256_CONST_27		0xbf597fc7U
-#define	SHA256_CONST_28		0xc6e00bf3U
-#define	SHA256_CONST_29		0xd5a79147U
-#define	SHA256_CONST_30		0x06ca6351U
-#define	SHA256_CONST_31		0x14292967U
-
-#define	SHA256_CONST_32		0x27b70a85U
-#define	SHA256_CONST_33		0x2e1b2138U
-#define	SHA256_CONST_34		0x4d2c6dfcU
-#define	SHA256_CONST_35		0x53380d13U
-#define	SHA256_CONST_36		0x650a7354U
-#define	SHA256_CONST_37		0x766a0abbU
-#define	SHA256_CONST_38		0x81c2c92eU
-#define	SHA256_CONST_39		0x92722c85U
-
-#define	SHA256_CONST_40		0xa2bfe8a1U
-#define	SHA256_CONST_41		0xa81a664bU
-#define	SHA256_CONST_42		0xc24b8b70U
-#define	SHA256_CONST_43		0xc76c51a3U
-#define	SHA256_CONST_44		0xd192e819U
-#define	SHA256_CONST_45		0xd6990624U
-#define	SHA256_CONST_46		0xf40e3585U
-#define	SHA256_CONST_47		0x106aa070U
-
-#define	SHA256_CONST_48		0x19a4c116U
-#define	SHA256_CONST_49		0x1e376c08U
-#define	SHA256_CONST_50		0x2748774cU
-#define	SHA256_CONST_51		0x34b0bcb5U
-#define	SHA256_CONST_52		0x391c0cb3U
-#define	SHA256_CONST_53		0x4ed8aa4aU
-#define	SHA256_CONST_54		0x5b9cca4fU
-#define	SHA256_CONST_55		0x682e6ff3U
-
-#define	SHA256_CONST_56		0x748f82eeU
-#define	SHA256_CONST_57		0x78a5636fU
-#define	SHA256_CONST_58		0x84c87814U
-#define	SHA256_CONST_59		0x8cc70208U
-#define	SHA256_CONST_60		0x90befffaU
-#define	SHA256_CONST_61		0xa4506cebU
-#define	SHA256_CONST_62		0xbef9a3f7U
-#define	SHA256_CONST_63		0xc67178f2U
-
-#define	SHA512_CONST_0		0x428a2f98d728ae22ULL
-#define	SHA512_CONST_1		0x7137449123ef65cdULL
-#define	SHA512_CONST_2		0xb5c0fbcfec4d3b2fULL
-#define	SHA512_CONST_3		0xe9b5dba58189dbbcULL
-#define	SHA512_CONST_4		0x3956c25bf348b538ULL
-#define	SHA512_CONST_5		0x59f111f1b605d019ULL
-#define	SHA512_CONST_6		0x923f82a4af194f9bULL
-#define	SHA512_CONST_7		0xab1c5ed5da6d8118ULL
-#define	SHA512_CONST_8		0xd807aa98a3030242ULL
-#define	SHA512_CONST_9		0x12835b0145706fbeULL
-#define	SHA512_CONST_10		0x243185be4ee4b28cULL
-#define	SHA512_CONST_11		0x550c7dc3d5ffb4e2ULL
-#define	SHA512_CONST_12		0x72be5d74f27b896fULL
-#define	SHA512_CONST_13		0x80deb1fe3b1696b1ULL
-#define	SHA512_CONST_14		0x9bdc06a725c71235ULL
-#define	SHA512_CONST_15		0xc19bf174cf692694ULL
-#define	SHA512_CONST_16		0xe49b69c19ef14ad2ULL
-#define	SHA512_CONST_17		0xefbe4786384f25e3ULL
-#define	SHA512_CONST_18		0x0fc19dc68b8cd5b5ULL
-#define	SHA512_CONST_19		0x240ca1cc77ac9c65ULL
-#define	SHA512_CONST_20		0x2de92c6f592b0275ULL
-#define	SHA512_CONST_21		0x4a7484aa6ea6e483ULL
-#define	SHA512_CONST_22		0x5cb0a9dcbd41fbd4ULL
-#define	SHA512_CONST_23		0x76f988da831153b5ULL
-#define	SHA512_CONST_24		0x983e5152ee66dfabULL
-#define	SHA512_CONST_25		0xa831c66d2db43210ULL
-#define	SHA512_CONST_26		0xb00327c898fb213fULL
-#define	SHA512_CONST_27		0xbf597fc7beef0ee4ULL
-#define	SHA512_CONST_28		0xc6e00bf33da88fc2ULL
-#define	SHA512_CONST_29		0xd5a79147930aa725ULL
-#define	SHA512_CONST_30		0x06ca6351e003826fULL
-#define	SHA512_CONST_31		0x142929670a0e6e70ULL
-#define	SHA512_CONST_32		0x27b70a8546d22ffcULL
-#define	SHA512_CONST_33		0x2e1b21385c26c926ULL
-#define	SHA512_CONST_34		0x4d2c6dfc5ac42aedULL
-#define	SHA512_CONST_35		0x53380d139d95b3dfULL
-#define	SHA512_CONST_36		0x650a73548baf63deULL
-#define	SHA512_CONST_37		0x766a0abb3c77b2a8ULL
-#define	SHA512_CONST_38		0x81c2c92e47edaee6ULL
-#define	SHA512_CONST_39		0x92722c851482353bULL
-#define	SHA512_CONST_40		0xa2bfe8a14cf10364ULL
-#define	SHA512_CONST_41		0xa81a664bbc423001ULL
-#define	SHA512_CONST_42		0xc24b8b70d0f89791ULL
-#define	SHA512_CONST_43		0xc76c51a30654be30ULL
-#define	SHA512_CONST_44		0xd192e819d6ef5218ULL
-#define	SHA512_CONST_45		0xd69906245565a910ULL
-#define	SHA512_CONST_46		0xf40e35855771202aULL
-#define	SHA512_CONST_47		0x106aa07032bbd1b8ULL
-#define	SHA512_CONST_48		0x19a4c116b8d2d0c8ULL
-#define	SHA512_CONST_49		0x1e376c085141ab53ULL
-#define	SHA512_CONST_50		0x2748774cdf8eeb99ULL
-#define	SHA512_CONST_51		0x34b0bcb5e19b48a8ULL
-#define	SHA512_CONST_52		0x391c0cb3c5c95a63ULL
-#define	SHA512_CONST_53		0x4ed8aa4ae3418acbULL
-#define	SHA512_CONST_54		0x5b9cca4f7763e373ULL
-#define	SHA512_CONST_55		0x682e6ff3d6b2b8a3ULL
-#define	SHA512_CONST_56		0x748f82ee5defb2fcULL
-#define	SHA512_CONST_57		0x78a5636f43172f60ULL
-#define	SHA512_CONST_58		0x84c87814a1f0ab72ULL
-#define	SHA512_CONST_59		0x8cc702081a6439ecULL
-#define	SHA512_CONST_60		0x90befffa23631e28ULL
-#define	SHA512_CONST_61		0xa4506cebde82bde9ULL
-#define	SHA512_CONST_62		0xbef9a3f7b2c67915ULL
-#define	SHA512_CONST_63		0xc67178f2e372532bULL
-#define	SHA512_CONST_64		0xca273eceea26619cULL
-#define	SHA512_CONST_65		0xd186b8c721c0c207ULL
-#define	SHA512_CONST_66		0xeada7dd6cde0eb1eULL
-#define	SHA512_CONST_67		0xf57d4f7fee6ed178ULL
-#define	SHA512_CONST_68		0x06f067aa72176fbaULL
-#define	SHA512_CONST_69		0x0a637dc5a2c898a6ULL
-#define	SHA512_CONST_70		0x113f9804bef90daeULL
-#define	SHA512_CONST_71		0x1b710b35131c471bULL
-#define	SHA512_CONST_72		0x28db77f523047d84ULL
-#define	SHA512_CONST_73		0x32caab7b40c72493ULL
-#define	SHA512_CONST_74		0x3c9ebe0a15c9bebcULL
-#define	SHA512_CONST_75		0x431d67c49c100d4cULL
-#define	SHA512_CONST_76		0x4cc5d4becb3e42b6ULL
-#define	SHA512_CONST_77		0x597f299cfc657e2aULL
-#define	SHA512_CONST_78		0x5fcb6fab3ad6faecULL
-#define	SHA512_CONST_79		0x6c44198c4a475817ULL
-
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA2_CONSTS_H */
diff --git a/module/os/freebsd/spl/sha224.h b/module/os/freebsd/spl/sha224.h
deleted file mode 100644
index 0abd430687..0000000000
--- a/module/os/freebsd/spl/sha224.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright 2005 Colin Percival
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef	_SHA224_H_
-#define	_SHA224_H_
-
-#ifndef _KERNEL
-#include <sys/types.h>
-#endif
-
-#define	SHA224_BLOCK_LENGTH		64
-#define	SHA224_DIGEST_LENGTH		28
-#define	SHA224_DIGEST_STRING_LENGTH	(SHA224_DIGEST_LENGTH * 2 + 1)
-
-typedef struct SHA224Context {
-	uint32_t state[8];
-	uint64_t count;
-	uint8_t buf[SHA224_BLOCK_LENGTH];
-} SHA224_CTX;
-
-__BEGIN_DECLS
-
-/* Ensure libmd symbols do not clash with libcrypto */
-
-#ifndef SHA224_Init
-#define	SHA224_Init		_libmd_SHA224_Init
-#endif
-#ifndef SHA224_Update
-#define	SHA224_Update		_libmd_SHA224_Update
-#endif
-#ifndef SHA224_Final
-#define	SHA224_Final		_libmd_SHA224_Final
-#endif
-#ifndef SHA224_End
-#define	SHA224_End		_libmd_SHA224_End
-#endif
-#ifndef SHA224_Fd
-#define	SHA224_Fd		_libmd_SHA224_Fd
-#endif
-#ifndef SHA224_FdChunk
-#define	SHA224_FdChunk		_libmd_SHA224_FdChunk
-#endif
-#ifndef SHA224_File
-#define	SHA224_File		_libmd_SHA224_File
-#endif
-#ifndef SHA224_FileChunk
-#define	SHA224_FileChunk	_libmd_SHA224_FileChunk
-#endif
-#ifndef SHA224_Data
-#define	SHA224_Data		_libmd_SHA224_Data
-#endif
-
-#ifndef SHA224_version
-#define	SHA224_version		_libmd_SHA224_version
-#endif
-
-void	SHA224_Init(SHA224_CTX *);
-void	SHA224_Update(SHA224_CTX *, const void *, size_t);
-void	SHA224_Final(unsigned char [__min_size(SHA224_DIGEST_LENGTH)],
-    SHA224_CTX *);
-#ifndef _KERNEL
-char   *SHA224_End(SHA224_CTX *, char *);
-char   *SHA224_Data(const void *, unsigned int, char *);
-char   *SHA224_Fd(int, char *);
-char   *SHA224_FdChunk(int, char *, off_t, off_t);
-char   *SHA224_File(const char *, char *);
-char   *SHA224_FileChunk(const char *, char *, off_t, off_t);
-#endif
-__END_DECLS
-
-#endif /* !_SHA224_H_ */
diff --git a/module/os/freebsd/spl/sha256.h b/module/os/freebsd/spl/sha256.h
deleted file mode 100644
index 193c0c0251..0000000000
--- a/module/os/freebsd/spl/sha256.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright 2005 Colin Percival
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _SHA256_H_
-#define	_SHA256_H_
-
-#ifndef _KERNEL
-#include <sys/types.h>
-#endif
-
-#define	SHA256_BLOCK_LENGTH		64
-#define	SHA256_DIGEST_LENGTH		32
-#define	SHA256_DIGEST_STRING_LENGTH	(SHA256_DIGEST_LENGTH * 2 + 1)
-
-typedef struct SHA256Context {
-	uint32_t state[8];
-	uint64_t count;
-	uint8_t buf[SHA256_BLOCK_LENGTH];
-} SHA256_CTX;
-
-__BEGIN_DECLS
-
-/* Ensure libmd symbols do not clash with libcrypto */
-
-#ifndef SHA256_Init
-#define	SHA256_Init		_libmd_SHA256_Init
-#endif
-#ifndef SHA256_Update
-#define	SHA256_Update		_libmd_SHA256_Update
-#endif
-#ifndef SHA256_Final
-#define	SHA256_Final		_libmd_SHA256_Final
-#endif
-#ifndef SHA256_End
-#define	SHA256_End		_libmd_SHA256_End
-#endif
-#ifndef SHA256_Fd
-#define	SHA256_Fd		_libmd_SHA256_Fd
-#endif
-#ifndef SHA256_FdChunk
-#define	SHA256_FdChunk		_libmd_SHA256_FdChunk
-#endif
-#ifndef SHA256_File
-#define	SHA256_File		_libmd_SHA256_File
-#endif
-#ifndef SHA256_FileChunk
-#define	SHA256_FileChunk	_libmd_SHA256_FileChunk
-#endif
-#ifndef SHA256_Data
-#define	SHA256_Data		_libmd_SHA256_Data
-#endif
-
-#ifndef SHA256_Transform
-#define	SHA256_Transform	_libmd_SHA256_Transform
-#endif
-#ifndef SHA256_version
-#define	SHA256_version		_libmd_SHA256_version
-#endif
-
-void	SHA256_Init(SHA256_CTX *);
-void	SHA256_Update(SHA256_CTX *, const void *, size_t);
-void	SHA256_Final(unsigned char [__min_size(SHA256_DIGEST_LENGTH)],
-    SHA256_CTX *);
-#ifndef _KERNEL
-char   *SHA256_End(SHA256_CTX *, char *);
-char   *SHA256_Data(const void *, unsigned int, char *);
-char   *SHA256_Fd(int, char *);
-char   *SHA256_FdChunk(int, char *, off_t, off_t);
-char   *SHA256_File(const char *, char *);
-char   *SHA256_FileChunk(const char *, char *, off_t, off_t);
-#endif
-__END_DECLS
-
-#endif /* !_SHA256_H_ */
diff --git a/module/os/freebsd/spl/sha256c.c b/module/os/freebsd/spl/sha256c.c
deleted file mode 100644
index 52cf0df6c9..0000000000
--- a/module/os/freebsd/spl/sha256c.c
+++ /dev/null
@@ -1,378 +0,0 @@
-/*
- * Copyright 2005 Colin Percival
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/types.h>
-
-#ifdef _KERNEL
-#include <sys/systm.h>
-#else
-#include <string.h>
-#endif
-
-
-#include <sys/byteorder.h>
-#include <sys/endian.h>
-#include "sha224.h"
-#include "sha256.h"
-
-#if BYTE_ORDER == BIG_ENDIAN
-
-/* Copy a vector of big-endian uint32_t into a vector of bytes */
-#define	be32enc_vect(dst, src, len)	\
-	memcpy((void *)dst, (const void *)src, (size_t)len)
-
-/* Copy a vector of bytes into a vector of big-endian uint32_t */
-#define	be32dec_vect(dst, src, len)	\
-	memcpy((void *)dst, (const void *)src, (size_t)len)
-
-#else /* BYTE_ORDER != BIG_ENDIAN */
-
-/*
- * Encode a length len/4 vector of (uint32_t) into a length len vector of
- * (unsigned char) in big-endian form.  Assumes len is a multiple of 4.
- */
-static void
-be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
-{
-	size_t i;
-
-	for (i = 0; i < len / 4; i++)
-		be32enc(dst + i * 4, src[i]);
-}
-
-/*
- * Decode a big-endian length len vector of (unsigned char) into a length
- * len/4 vector of (uint32_t).  Assumes len is a multiple of 4.
- */
-static void
-be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
-{
-	size_t i;
-
-	for (i = 0; i < len / 4; i++)
-		dst[i] = be32dec(src + i * 4);
-}
-
-#endif /* BYTE_ORDER != BIG_ENDIAN */
-
-/* SHA256 round constants. */
-static const uint32_t K[64] = {
-	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
-	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
-	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
-	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
-	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
-	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
-	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
-	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
-	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-/* Elementary functions used by SHA256 */
-#define	Ch(x, y, z)	((x & (y ^ z)) ^ z)
-#define	Maj(x, y, z)	((x & (y | z)) | (y & z))
-#define	SHR(x, n)	(x >> n)
-#define	ROTR(x, n)	((x >> n) | (x << (32 - n)))
-#define	S0(x)		(ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
-#define	S1(x)		(ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
-#define	s0(x)		(ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
-#define	s1(x)		(ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
-
-/* SHA256 round function */
-#define	RND(a, b, c, d, e, f, g, h, k)			\
-	h += S1(e) + Ch(e, f, g) + k;			\
-	d += h;						\
-	h += S0(a) + Maj(a, b, c);
-
-/* Adjusted round function for rotating state */
-#define	RNDr(S, W, i, ii)			\
-	RND(S[(64 - i) % 8], S[(65 - i) % 8],	\
-	    S[(66 - i) % 8], S[(67 - i) % 8],	\
-	    S[(68 - i) % 8], S[(69 - i) % 8],	\
-	    S[(70 - i) % 8], S[(71 - i) % 8],	\
-	    W[i + ii] + K[i + ii])
-
-/* Message schedule computation */
-#define	MSCH(W, ii, i)				\
-	W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] +	\
-		s0(W[i + ii + 1]) + W[i + ii]
-
-/*
- * SHA256 block compression function.  The 256-bit state is transformed via
- * the 512-bit input block to produce a new state.
- */
-static void
-SHA256_Transform(uint32_t *state, const unsigned char block[64])
-{
-	uint32_t W[64];
-	uint32_t S[8];
-	int i;
-
-	/* 1. Prepare the first part of the message schedule W. */
-	be32dec_vect(W, block, 64);
-
-	/* 2. Initialize working variables. */
-	memcpy(S, state, 32);
-
-	/* 3. Mix. */
-	for (i = 0; i < 64; i += 16) {
-		RNDr(S, W, 0, i);
-		RNDr(S, W, 1, i);
-		RNDr(S, W, 2, i);
-		RNDr(S, W, 3, i);
-		RNDr(S, W, 4, i);
-		RNDr(S, W, 5, i);
-		RNDr(S, W, 6, i);
-		RNDr(S, W, 7, i);
-		RNDr(S, W, 8, i);
-		RNDr(S, W, 9, i);
-		RNDr(S, W, 10, i);
-		RNDr(S, W, 11, i);
-		RNDr(S, W, 12, i);
-		RNDr(S, W, 13, i);
-		RNDr(S, W, 14, i);
-		RNDr(S, W, 15, i);
-
-		if (i == 48)
-			break;
-		MSCH(W, 0, i);
-		MSCH(W, 1, i);
-		MSCH(W, 2, i);
-		MSCH(W, 3, i);
-		MSCH(W, 4, i);
-		MSCH(W, 5, i);
-		MSCH(W, 6, i);
-		MSCH(W, 7, i);
-		MSCH(W, 8, i);
-		MSCH(W, 9, i);
-		MSCH(W, 10, i);
-		MSCH(W, 11, i);
-		MSCH(W, 12, i);
-		MSCH(W, 13, i);
-		MSCH(W, 14, i);
-		MSCH(W, 15, i);
-	}
-
-	/* 4. Mix local working variables into global state */
-	for (i = 0; i < 8; i++)
-		state[i] += S[i];
-}
-
-static unsigned char PAD[64] = {
-	0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-/* Add padding and terminating bit-count. */
-static void
-SHA256_Pad(SHA256_CTX * ctx)
-{
-	size_t r;
-
-	/* Figure out how many bytes we have buffered. */
-	r = (ctx->count >> 3) & 0x3f;
-
-	/* Pad to 56 mod 64, transforming if we finish a block en route. */
-	if (r < 56) {
-		/* Pad to 56 mod 64. */
-		memcpy(&ctx->buf[r], PAD, 56 - r);
-	} else {
-		/* Finish the current block and mix. */
-		memcpy(&ctx->buf[r], PAD, 64 - r);
-		SHA256_Transform(ctx->state, ctx->buf);
-
-		/* The start of the final block is all zeroes. */
-		memset(&ctx->buf[0], 0, 56);
-	}
-
-	/* Add the terminating bit-count. */
-	be64enc(&ctx->buf[56], ctx->count);
-
-	/* Mix in the final block. */
-	SHA256_Transform(ctx->state, ctx->buf);
-}
-
-/* SHA-256 initialization.  Begins a SHA-256 operation. */
-void
-SHA256_Init(SHA256_CTX * ctx)
-{
-
-	/* Zero bits processed so far */
-	ctx->count = 0;
-
-	/* Magic initialization constants */
-	ctx->state[0] = 0x6A09E667;
-	ctx->state[1] = 0xBB67AE85;
-	ctx->state[2] = 0x3C6EF372;
-	ctx->state[3] = 0xA54FF53A;
-	ctx->state[4] = 0x510E527F;
-	ctx->state[5] = 0x9B05688C;
-	ctx->state[6] = 0x1F83D9AB;
-	ctx->state[7] = 0x5BE0CD19;
-}
-
-/* Add bytes into the hash */
-void
-SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len)
-{
-	uint64_t bitlen;
-	uint32_t r;
-	const unsigned char *src = in;
-
-	/* Number of bytes left in the buffer from previous updates */
-	r = (ctx->count >> 3) & 0x3f;
-
-	/* Convert the length into a number of bits */
-	bitlen = len << 3;
-
-	/* Update number of bits */
-	ctx->count += bitlen;
-
-	/* Handle the case where we don't need to perform any transforms */
-	if (len < 64 - r) {
-		memcpy(&ctx->buf[r], src, len);
-		return;
-	}
-
-	/* Finish the current block */
-	memcpy(&ctx->buf[r], src, 64 - r);
-	SHA256_Transform(ctx->state, ctx->buf);
-	src += 64 - r;
-	len -= 64 - r;
-
-	/* Perform complete blocks */
-	while (len >= 64) {
-		SHA256_Transform(ctx->state, src);
-		src += 64;
-		len -= 64;
-	}
-
-	/* Copy left over data into buffer */
-	memcpy(ctx->buf, src, len);
-}
-
-/*
- * SHA-256 finalization.  Pads the input data, exports the hash value,
- * and clears the context state.
- */
-void
-SHA256_Final(unsigned char digest[static SHA256_DIGEST_LENGTH], SHA256_CTX *ctx)
-{
-
-	/* Add padding */
-	SHA256_Pad(ctx);
-
-	/* Write the hash */
-	be32enc_vect(digest, ctx->state, SHA256_DIGEST_LENGTH);
-
-	/* Clear the context state */
-	memset(ctx, 0, sizeof (*ctx));
-}
-
-/* SHA-224: ******************************************************* */
-/*
- * the SHA224 and SHA256 transforms are identical
- */
-
-/* SHA-224 initialization.  Begins a SHA-224 operation. */
-void
-SHA224_Init(SHA224_CTX * ctx)
-{
-
-	/* Zero bits processed so far */
-	ctx->count = 0;
-
-	/* Magic initialization constants */
-	ctx->state[0] = 0xC1059ED8;
-	ctx->state[1] = 0x367CD507;
-	ctx->state[2] = 0x3070DD17;
-	ctx->state[3] = 0xF70E5939;
-	ctx->state[4] = 0xFFC00B31;
-	ctx->state[5] = 0x68581511;
-	ctx->state[6] = 0x64f98FA7;
-	ctx->state[7] = 0xBEFA4FA4;
-}
-
-/* Add bytes into the SHA-224 hash */
-void
-SHA224_Update(SHA224_CTX * ctx, const void *in, size_t len)
-{
-
-	SHA256_Update((SHA256_CTX *)ctx, in, len);
-}
-
-/*
- * SHA-224 finalization.  Pads the input data, exports the hash value,
- * and clears the context state.
- */
-void
-SHA224_Final(unsigned char digest[static SHA224_DIGEST_LENGTH], SHA224_CTX *ctx)
-{
-
-	/* Add padding */
-	SHA256_Pad((SHA256_CTX *)ctx);
-
-	/* Write the hash */
-	be32enc_vect(digest, ctx->state, SHA224_DIGEST_LENGTH);
-
-	/* Clear the context state */
-	memset(ctx, 0, sizeof (*ctx));
-}
-
-#ifdef WEAK_REFS
-/*
- * When building libmd, provide weak references. Note: this is not
- * activated in the context of compiling these sources for internal
- * use in libcrypt.
- */
-#undef SHA256_Init
-__weak_reference(_libmd_SHA256_Init, SHA256_Init);
-#undef SHA256_Update
-__weak_reference(_libmd_SHA256_Update, SHA256_Update);
-#undef SHA256_Final
-__weak_reference(_libmd_SHA256_Final, SHA256_Final);
-#undef SHA256_Transform
-__weak_reference(_libmd_SHA256_Transform, SHA256_Transform);
-
-#undef SHA224_Init
-__weak_reference(_libmd_SHA224_Init, SHA224_Init);
-#undef SHA224_Update
-__weak_reference(_libmd_SHA224_Update, SHA224_Update);
-#undef SHA224_Final
-__weak_reference(_libmd_SHA224_Final, SHA224_Final);
-#endif
diff --git a/module/os/freebsd/spl/sha384.h b/module/os/freebsd/spl/sha384.h
deleted file mode 100644
index 67250cee03..0000000000
--- a/module/os/freebsd/spl/sha384.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright 2005 Colin Percival
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _SHA384_H_
-#define	_SHA384_H_
-
-#ifndef _KERNEL
-#include <sys/types.h>
-#endif
-
-#define	SHA384_BLOCK_LENGTH		128
-#define	SHA384_DIGEST_LENGTH		48
-#define	SHA384_DIGEST_STRING_LENGTH	(SHA384_DIGEST_LENGTH * 2 + 1)
-
-typedef struct SHA384Context {
-	uint64_t state[8];
-	uint64_t count[2];
-	uint8_t buf[SHA384_BLOCK_LENGTH];
-} SHA384_CTX;
-
-__BEGIN_DECLS
-
-/* Ensure libmd symbols do not clash with libcrypto */
-#ifndef SHA384_Init
-#define	SHA384_Init		_libmd_SHA384_Init
-#endif
-#ifndef SHA384_Update
-#define	SHA384_Update		_libmd_SHA384_Update
-#endif
-#ifndef SHA384_Final
-#define	SHA384_Final		_libmd_SHA384_Final
-#endif
-#ifndef SHA384_End
-#define	SHA384_End		_libmd_SHA384_End
-#endif
-#ifndef SHA384_Fd
-#define	SHA384_Fd		_libmd_SHA384_Fd
-#endif
-#ifndef SHA384_FdChunk
-#define	SHA384_FdChunk		_libmd_SHA384_FdChunk
-#endif
-#ifndef SHA384_File
-#define	SHA384_File		_libmd_SHA384_File
-#endif
-#ifndef SHA384_FileChunk
-#define	SHA384_FileChunk	_libmd_SHA384_FileChunk
-#endif
-#ifndef SHA384_Data
-#define	SHA384_Data		_libmd_SHA384_Data
-#endif
-
-#ifndef SHA384_version
-#define	SHA384_version		_libmd_SHA384_version
-#endif
-
-void	SHA384_Init(SHA384_CTX *);
-void	SHA384_Update(SHA384_CTX *, const void *, size_t);
-void	SHA384_Final(unsigned char [__min_size(SHA384_DIGEST_LENGTH)],
-    SHA384_CTX *);
-#ifndef _KERNEL
-char   *SHA384_End(SHA384_CTX *, char *);
-char   *SHA384_Data(const void *, unsigned int, char *);
-char   *SHA384_Fd(int, char *);
-char   *SHA384_FdChunk(int, char *, off_t, off_t);
-char   *SHA384_File(const char *, char *);
-char   *SHA384_FileChunk(const char *, char *, off_t, off_t);
-#endif
-
-__END_DECLS
-
-#endif /* !_SHA384_H_ */
diff --git a/module/os/freebsd/spl/sha512.h b/module/os/freebsd/spl/sha512.h
deleted file mode 100644
index b6fb733ca5..0000000000
--- a/module/os/freebsd/spl/sha512.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright 2005 Colin Percival
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _SHA512_H_
-#define	_SHA512_H_
-
-#ifndef _KERNEL
-#include <sys/types.h>
-#endif
-
-#define	SHA512_BLOCK_LENGTH		128
-#define	SHA512_DIGEST_LENGTH		64
-#define	SHA512_DIGEST_STRING_LENGTH	(SHA512_DIGEST_LENGTH * 2 + 1)
-
-typedef struct SHA512Context {
-	uint64_t state[8];
-	uint64_t count[2];
-	uint8_t buf[SHA512_BLOCK_LENGTH];
-} SHA512_CTX;
-
-__BEGIN_DECLS
-
-/* Ensure libmd symbols do not clash with libcrypto */
-#if 0
-#ifndef SHA512_Init
-#define	SHA512_Init		_libmd_SHA512_Init
-#endif
-#ifndef SHA512_Update
-#define	SHA512_Update		_libmd_SHA512_Update
-#endif
-#ifndef SHA512_Final
-#define	SHA512_Final		_libmd_SHA512_Final
-#endif
-#endif
-#ifndef SHA512_End
-#define	SHA512_End		_libmd_SHA512_End
-#endif
-#ifndef SHA512_Fd
-#define	SHA512_Fd		_libmd_SHA512_Fd
-#endif
-#ifndef SHA512_FdChunk
-#define	SHA512_FdChunk		_libmd_SHA512_FdChunk
-#endif
-#ifndef SHA512_File
-#define	SHA512_File		_libmd_SHA512_File
-#endif
-#ifndef SHA512_FileChunk
-#define	SHA512_FileChunk	_libmd_SHA512_FileChunk
-#endif
-#ifndef SHA512_Data
-#define	SHA512_Data		_libmd_SHA512_Data
-#endif
-
-#ifndef SHA512_Transform
-#define	SHA512_Transform	_libmd_SHA512_Transform
-#endif
-#ifndef SHA512_version
-#define	SHA512_version		_libmd_SHA512_version
-#endif
-
-void	SHA512_Init(SHA512_CTX *);
-void	SHA512_Update(SHA512_CTX *, const void *, size_t);
-void	SHA512_Final(unsigned char [__min_size(SHA512_DIGEST_LENGTH)],
-    SHA512_CTX *);
-#ifndef _KERNEL
-char   *SHA512_End(SHA512_CTX *, char *);
-char   *SHA512_Data(const void *, unsigned int, char *);
-char   *SHA512_Fd(int, char *);
-char   *SHA512_FdChunk(int, char *, off_t, off_t);
-char   *SHA512_File(const char *, char *);
-char   *SHA512_FileChunk(const char *, char *, off_t, off_t);
-#endif
-
-__END_DECLS
-
-#endif /* !_SHA512_H_ */
diff --git a/module/os/freebsd/spl/sha512c.c b/module/os/freebsd/spl/sha512c.c
deleted file mode 100644
index 254cc21565..0000000000
--- a/module/os/freebsd/spl/sha512c.c
+++ /dev/null
@@ -1,508 +0,0 @@
-/*
- * Copyright 2005 Colin Percival
- * Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/endian.h>
-#include <sys/types.h>
-
-#ifdef _KERNEL
-#include <sys/systm.h>
-#else
-#include <string.h>
-#endif
-
-#include "sha512.h"
-#include "sha512t.h"
-#include "sha384.h"
-
-#if BYTE_ORDER == BIG_ENDIAN
-
-/* Copy a vector of big-endian uint64_t into a vector of bytes */
-#define	be64enc_vect(dst, src, len)	\
-	memcpy((void *)dst, (const void *)src, (size_t)len)
-
-/* Copy a vector of bytes into a vector of big-endian uint64_t */
-#define	be64dec_vect(dst, src, len)	\
-	memcpy((void *)dst, (const void *)src, (size_t)len)
-
-#else /* BYTE_ORDER != BIG_ENDIAN */
-
-/*
- * Encode a length len/4 vector of (uint64_t) into a length len vector of
- * (unsigned char) in big-endian form.  Assumes len is a multiple of 8.
- */
-static void
-be64enc_vect(unsigned char *dst, const uint64_t *src, size_t len)
-{
-	size_t i;
-
-	for (i = 0; i < len / 8; i++)
-		be64enc(dst + i * 8, src[i]);
-}
-
-/*
- * Decode a big-endian length len vector of (unsigned char) into a length
- * len/4 vector of (uint64_t).  Assumes len is a multiple of 8.
- */
-static void
-be64dec_vect(uint64_t *dst, const unsigned char *src, size_t len)
-{
-	size_t i;
-
-	for (i = 0; i < len / 8; i++)
-		dst[i] = be64dec(src + i * 8);
-}
-
-#endif /* BYTE_ORDER != BIG_ENDIAN */
-
-/* SHA512 round constants. */
-static const uint64_t K[80] = {
-	0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
-	0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
-	0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
-	0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
-	0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
-	0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
-	0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
-	0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
-	0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
-	0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
-	0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
-	0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
-	0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
-	0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
-	0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
-	0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
-	0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
-	0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
-	0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
-	0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
-	0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
-	0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
-	0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
-	0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
-	0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
-	0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
-	0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
-	0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
-	0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
-	0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
-	0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
-	0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
-	0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
-	0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
-	0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
-	0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
-	0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
-	0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
-	0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
-	0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
-};
-
-/* Elementary functions used by SHA512 */
-#define	Ch(x, y, z)	((x & (y ^ z)) ^ z)
-#define	Maj(x, y, z)	((x & (y | z)) | (y & z))
-#define	SHR(x, n)	(x >> n)
-#define	ROTR(x, n)	((x >> n) | (x << (64 - n)))
-#define	S0(x)		(ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39))
-#define	S1(x)		(ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41))
-#define	s0(x)		(ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x, 7))
-#define	s1(x)		(ROTR(x, 19) ^ ROTR(x, 61) ^ SHR(x, 6))
-
-/* SHA512 round function */
-#define	RND(a, b, c, d, e, f, g, h, k)			\
-	h += S1(e) + Ch(e, f, g) + k;			\
-	d += h;						\
-	h += S0(a) + Maj(a, b, c);
-
-/* Adjusted round function for rotating state */
-#define	RNDr(S, W, i, ii)			\
-	RND(S[(80 - i) % 8], S[(81 - i) % 8],	\
-	    S[(82 - i) % 8], S[(83 - i) % 8],	\
-	    S[(84 - i) % 8], S[(85 - i) % 8],	\
-	    S[(86 - i) % 8], S[(87 - i) % 8],	\
-	    W[i + ii] + K[i + ii])
-
-/* Message schedule computation */
-#define	MSCH(W, ii, i)				\
-	W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] +	\
-		s0(W[i + ii + 1]) + W[i + ii]
-
-/*
- * SHA512 block compression function.  The 512-bit state is transformed via
- * the 512-bit input block to produce a new state.
- */
-static void
-SHA512_Transform(uint64_t *state,
-    const unsigned char block[SHA512_BLOCK_LENGTH])
-{
-	uint64_t W[80];
-	uint64_t S[8];
-	int i;
-
-	/* 1. Prepare the first part of the message schedule W. */
-	be64dec_vect(W, block, SHA512_BLOCK_LENGTH);
-
-	/* 2. Initialize working variables. */
-	memcpy(S, state, SHA512_DIGEST_LENGTH);
-
-	/* 3. Mix. */
-	for (i = 0; i < 80; i += 16) {
-		RNDr(S, W, 0, i);
-		RNDr(S, W, 1, i);
-		RNDr(S, W, 2, i);
-		RNDr(S, W, 3, i);
-		RNDr(S, W, 4, i);
-		RNDr(S, W, 5, i);
-		RNDr(S, W, 6, i);
-		RNDr(S, W, 7, i);
-		RNDr(S, W, 8, i);
-		RNDr(S, W, 9, i);
-		RNDr(S, W, 10, i);
-		RNDr(S, W, 11, i);
-		RNDr(S, W, 12, i);
-		RNDr(S, W, 13, i);
-		RNDr(S, W, 14, i);
-		RNDr(S, W, 15, i);
-
-		if (i == 64)
-			break;
-		MSCH(W, 0, i);
-		MSCH(W, 1, i);
-		MSCH(W, 2, i);
-		MSCH(W, 3, i);
-		MSCH(W, 4, i);
-		MSCH(W, 5, i);
-		MSCH(W, 6, i);
-		MSCH(W, 7, i);
-		MSCH(W, 8, i);
-		MSCH(W, 9, i);
-		MSCH(W, 10, i);
-		MSCH(W, 11, i);
-		MSCH(W, 12, i);
-		MSCH(W, 13, i);
-		MSCH(W, 14, i);
-		MSCH(W, 15, i);
-	}
-
-	/* 4. Mix local working variables into global state */
-	for (i = 0; i < 8; i++)
-		state[i] += S[i];
-}
-
-static unsigned char PAD[SHA512_BLOCK_LENGTH] = {
-	0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-/* Add padding and terminating bit-count. */
-static void
-SHA512_Pad(SHA512_CTX * ctx)
-{
-	size_t r;
-
-	/* Figure out how many bytes we have buffered. */
-	r = (ctx->count[1] >> 3) & 0x7f;
-
-	/* Pad to 112 mod 128, transforming if we finish a block en route. */
-	if (r < 112) {
-		/* Pad to 112 mod 128. */
-		memcpy(&ctx->buf[r], PAD, 112 - r);
-	} else {
-		/* Finish the current block and mix. */
-		memcpy(&ctx->buf[r], PAD, 128 - r);
-		SHA512_Transform(ctx->state, ctx->buf);
-
-		/* The start of the final block is all zeroes. */
-		memset(&ctx->buf[0], 0, 112);
-	}
-
-	/* Add the terminating bit-count. */
-	be64enc_vect(&ctx->buf[112], ctx->count, 16);
-
-	/* Mix in the final block. */
-	SHA512_Transform(ctx->state, ctx->buf);
-}
-
-/* SHA-512 initialization.  Begins a SHA-512 operation. */
-void
-SHA512_Init(SHA512_CTX * ctx)
-{
-
-	/* Zero bits processed so far */
-	ctx->count[0] = ctx->count[1] = 0;
-
-	/* Magic initialization constants */
-	ctx->state[0] = 0x6a09e667f3bcc908ULL;
-	ctx->state[1] = 0xbb67ae8584caa73bULL;
-	ctx->state[2] = 0x3c6ef372fe94f82bULL;
-	ctx->state[3] = 0xa54ff53a5f1d36f1ULL;
-	ctx->state[4] = 0x510e527fade682d1ULL;
-	ctx->state[5] = 0x9b05688c2b3e6c1fULL;
-	ctx->state[6] = 0x1f83d9abfb41bd6bULL;
-	ctx->state[7] = 0x5be0cd19137e2179ULL;
-}
-
-/* Add bytes into the hash */
-void
-SHA512_Update(SHA512_CTX * ctx, const void *in, size_t len)
-{
-	uint64_t bitlen[2];
-	uint64_t r;
-	const unsigned char *src = in;
-
-	/* Number of bytes left in the buffer from previous updates */
-	r = (ctx->count[1] >> 3) & 0x7f;
-
-	/* Convert the length into a number of bits */
-	bitlen[1] = ((uint64_t)len) << 3;
-	bitlen[0] = ((uint64_t)len) >> 61;
-
-	/* Update number of bits */
-	if ((ctx->count[1] += bitlen[1]) < bitlen[1])
-		ctx->count[0]++;
-	ctx->count[0] += bitlen[0];
-
-	/* Handle the case where we don't need to perform any transforms */
-	if (len < SHA512_BLOCK_LENGTH - r) {
-		memcpy(&ctx->buf[r], src, len);
-		return;
-	}
-
-	/* Finish the current block */
-	memcpy(&ctx->buf[r], src, SHA512_BLOCK_LENGTH - r);
-	SHA512_Transform(ctx->state, ctx->buf);
-	src += SHA512_BLOCK_LENGTH - r;
-	len -= SHA512_BLOCK_LENGTH - r;
-
-	/* Perform complete blocks */
-	while (len >= SHA512_BLOCK_LENGTH) {
-		SHA512_Transform(ctx->state, src);
-		src += SHA512_BLOCK_LENGTH;
-		len -= SHA512_BLOCK_LENGTH;
-	}
-
-	/* Copy left over data into buffer */
-	memcpy(ctx->buf, src, len);
-}
-
-/*
- * SHA-512 finalization.  Pads the input data, exports the hash value,
- * and clears the context state.
- */
-void
-SHA512_Final(unsigned char digest[static SHA512_DIGEST_LENGTH], SHA512_CTX *ctx)
-{
-
-	/* Add padding */
-	SHA512_Pad(ctx);
-
-	/* Write the hash */
-	be64enc_vect(digest, ctx->state, SHA512_DIGEST_LENGTH);
-
-	/* Clear the context state */
-	memset(ctx, 0, sizeof (*ctx));
-}
-
-/* SHA-512t: ******************************************************** */
-/*
- * the SHA512t transforms are identical to SHA512 so reuse the existing function
- */
-void
-SHA512_224_Init(SHA512_CTX * ctx)
-{
-
-	/* Zero bits processed so far */
-	ctx->count[0] = ctx->count[1] = 0;
-
-	/* Magic initialization constants */
-	ctx->state[0] = 0x8c3d37c819544da2ULL;
-	ctx->state[1] = 0x73e1996689dcd4d6ULL;
-	ctx->state[2] = 0x1dfab7ae32ff9c82ULL;
-	ctx->state[3] = 0x679dd514582f9fcfULL;
-	ctx->state[4] = 0x0f6d2b697bd44da8ULL;
-	ctx->state[5] = 0x77e36f7304c48942ULL;
-	ctx->state[6] = 0x3f9d85a86a1d36c8ULL;
-	ctx->state[7] = 0x1112e6ad91d692a1ULL;
-}
-
-void
-SHA512_224_Update(SHA512_CTX * ctx, const void *in, size_t len)
-{
-
-	SHA512_Update(ctx, in, len);
-}
-
-void
-SHA512_224_Final(unsigned char digest[static SHA512_224_DIGEST_LENGTH],
-    SHA512_CTX *ctx)
-{
-
-	/* Add padding */
-	SHA512_Pad(ctx);
-
-	/* Write the hash */
-	be64enc_vect(digest, ctx->state, SHA512_224_DIGEST_LENGTH);
-
-	/* Clear the context state */
-	memset(ctx, 0, sizeof (*ctx));
-}
-
-void
-SHA512_256_Init(SHA512_CTX * ctx)
-{
-
-	/* Zero bits processed so far */
-	ctx->count[0] = ctx->count[1] = 0;
-
-	/* Magic initialization constants */
-	ctx->state[0] = 0x22312194fc2bf72cULL;
-	ctx->state[1] = 0x9f555fa3c84c64c2ULL;
-	ctx->state[2] = 0x2393b86b6f53b151ULL;
-	ctx->state[3] = 0x963877195940eabdULL;
-	ctx->state[4] = 0x96283ee2a88effe3ULL;
-	ctx->state[5] = 0xbe5e1e2553863992ULL;
-	ctx->state[6] = 0x2b0199fc2c85b8aaULL;
-	ctx->state[7] = 0x0eb72ddc81c52ca2ULL;
-}
-
-void
-SHA512_256_Update(SHA512_CTX * ctx, const void *in, size_t len)
-{
-
-	SHA512_Update(ctx, in, len);
-}
-
-void
-SHA512_256_Final(unsigned char digest[static SHA512_256_DIGEST_LENGTH],
-    SHA512_CTX * ctx)
-{
-
-	/* Add padding */
-	SHA512_Pad(ctx);
-
-	/* Write the hash */
-	be64enc_vect(digest, ctx->state, SHA512_256_DIGEST_LENGTH);
-
-	/* Clear the context state */
-	memset(ctx, 0, sizeof (*ctx));
-}
-
-/* ** SHA-384: ******************************************************** */
-/*
- * the SHA384 and SHA512 transforms are identical, so SHA384 is skipped
- */
-
-/* SHA-384 initialization.  Begins a SHA-384 operation. */
-void
-SHA384_Init(SHA384_CTX * ctx)
-{
-
-	/* Zero bits processed so far */
-	ctx->count[0] = ctx->count[1] = 0;
-
-	/* Magic initialization constants */
-	ctx->state[0] = 0xcbbb9d5dc1059ed8ULL;
-	ctx->state[1] = 0x629a292a367cd507ULL;
-	ctx->state[2] = 0x9159015a3070dd17ULL;
-	ctx->state[3] = 0x152fecd8f70e5939ULL;
-	ctx->state[4] = 0x67332667ffc00b31ULL;
-	ctx->state[5] = 0x8eb44a8768581511ULL;
-	ctx->state[6] = 0xdb0c2e0d64f98fa7ULL;
-	ctx->state[7] = 0x47b5481dbefa4fa4ULL;
-}
-
-/* Add bytes into the SHA-384 hash */
-void
-SHA384_Update(SHA384_CTX * ctx, const void *in, size_t len)
-{
-
-	SHA512_Update((SHA512_CTX *)ctx, in, len);
-}
-
-/*
- * SHA-384 finalization.  Pads the input data, exports the hash value,
- * and clears the context state.
- */
-void
-SHA384_Final(unsigned char digest[static SHA384_DIGEST_LENGTH], SHA384_CTX *ctx)
-{
-
-	/* Add padding */
-	SHA512_Pad((SHA512_CTX *)ctx);
-
-	/* Write the hash */
-	be64enc_vect(digest, ctx->state, SHA384_DIGEST_LENGTH);
-
-	/* Clear the context state */
-	memset(ctx, 0, sizeof (*ctx));
-}
-
-#if 0
-/*
- * When building libmd, provide weak references. Note: this is not
- * activated in the context of compiling these sources for internal
- * use in libcrypt.
- */
-#undef SHA512_Init
-__weak_reference(_libmd_SHA512_Init, SHA512_Init);
-#undef SHA512_Update
-__weak_reference(_libmd_SHA512_Update, SHA512_Update);
-#undef SHA512_Final
-__weak_reference(_libmd_SHA512_Final, SHA512_Final);
-#undef SHA512_Transform
-__weak_reference(_libmd_SHA512_Transform, SHA512_Transform);
-
-#undef SHA512_224_Init
-__weak_reference(_libmd_SHA512_224_Init, SHA512_224_Init);
-#undef SHA512_224_Update
-__weak_reference(_libmd_SHA512_224_Update, SHA512_224_Update);
-#undef SHA512_224_Final
-__weak_reference(_libmd_SHA512_224_Final, SHA512_224_Final);
-
-#undef SHA512_256_Init
-__weak_reference(_libmd_SHA512_256_Init, SHA512_256_Init);
-#undef SHA512_256_Update
-__weak_reference(_libmd_SHA512_256_Update, SHA512_256_Update);
-#undef SHA512_256_Final
-__weak_reference(_libmd_SHA512_256_Final, SHA512_256_Final);
-
-#undef SHA384_Init
-__weak_reference(_libmd_SHA384_Init, SHA384_Init);
-#undef SHA384_Update
-__weak_reference(_libmd_SHA384_Update, SHA384_Update);
-#undef SHA384_Final
-__weak_reference(_libmd_SHA384_Final, SHA384_Final);
-#endif
diff --git a/module/os/freebsd/spl/sha512t.h b/module/os/freebsd/spl/sha512t.h
deleted file mode 100644
index 703867fc02..0000000000
--- a/module/os/freebsd/spl/sha512t.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _SHA512T_H_
-#define	_SHA512T_H_
-
-#include "sha512.h"
-
-#ifndef _KERNEL
-#include <sys/types.h>
-#endif
-
-#define	SHA512_224_DIGEST_LENGTH	28
-#define	SHA512_224_DIGEST_STRING_LENGTH	(SHA512_224_DIGEST_LENGTH * 2 + 1)
-#define	SHA512_256_DIGEST_LENGTH	32
-#define	SHA512_256_DIGEST_STRING_LENGTH	(SHA512_256_DIGEST_LENGTH * 2 + 1)
-
-__BEGIN_DECLS
-
-/* Ensure libmd symbols do not clash with libcrypto */
-#ifndef SHA512_224_Init
-#define	SHA512_224_Init		_libmd_SHA512_224_Init
-#endif
-#ifndef SHA512_224_Update
-#define	SHA512_224_Update	_libmd_SHA512_224_Update
-#endif
-#ifndef SHA512_224_Final
-#define	SHA512_224_Final	_libmd_SHA512_224_Final
-#endif
-#ifndef SHA512_224_End
-#define	SHA512_224_End		_libmd_SHA512_224_End
-#endif
-#ifndef SHA512_224_Fd
-#define	SHA512_224_Fd		_libmd_SHA512_224_Fd
-#endif
-#ifndef SHA512_224_FdChunk
-#define	SHA512_224_FdChunk	_libmd_SHA512_224_FdChunk
-#endif
-#ifndef SHA512_224_File
-#define	SHA512_224_File		_libmd_SHA512_224_File
-#endif
-#ifndef SHA512_224_FileChunk
-#define	SHA512_224_FileChunk	_libmd_SHA512_224_FileChunk
-#endif
-#ifndef SHA512_224_Data
-#define	SHA512_224_Data		_libmd_SHA512_224_Data
-#endif
-
-#ifndef SHA512_224_Transform
-#define	SHA512_224_Transform	_libmd_SHA512_224_Transform
-#endif
-#ifndef SHA512_224_version
-#define	SHA512_224_version	_libmd_SHA512_224_version
-#endif
-
-#ifndef SHA512_256_Init
-#define	SHA512_256_Init		_libmd_SHA512_256_Init
-#endif
-#ifndef SHA512_256_Update
-#define	SHA512_256_Update	_libmd_SHA512_256_Update
-#endif
-#ifndef SHA512_256_Final
-#define	SHA512_256_Final	_libmd_SHA512_256_Final
-#endif
-#ifndef SHA512_256_End
-#define	SHA512_256_End		_libmd_SHA512_256_End
-#endif
-#ifndef SHA512_256_Fd
-#define	SHA512_256_Fd		_libmd_SHA512_256_Fd
-#endif
-#ifndef SHA512_256_FdChunk
-#define	SHA512_256_FdChunk	_libmd_SHA512_256_FdChunk
-#endif
-#ifndef SHA512_256_File
-#define	SHA512_256_File		_libmd_SHA512_256_File
-#endif
-#ifndef SHA512_256_FileChunk
-#define	SHA512_256_FileChunk	_libmd_SHA512_256_FileChunk
-#endif
-#ifndef SHA512_256_Data
-#define	SHA512_256_Data		_libmd_SHA512_256_Data
-#endif
-
-#ifndef SHA512_256_Transform
-#define	SHA512_256_Transform	_libmd_SHA512_256_Transform
-#endif
-#ifndef SHA512_256_version
-#define	SHA512_256_version	_libmd_SHA512_256_version
-#endif
-
-void	SHA512_224_Init(SHA512_CTX *);
-void	SHA512_224_Update(SHA512_CTX *, const void *, size_t);
-void	SHA512_224_Final(unsigned char [__min_size(SHA512_224_DIGEST_LENGTH)],
-    SHA512_CTX *);
-#ifndef _KERNEL
-char   *SHA512_224_End(SHA512_CTX *, char *);
-char   *SHA512_224_Data(const void *, unsigned int, char *);
-char   *SHA512_224_Fd(int, char *);
-char   *SHA512_224_FdChunk(int, char *, off_t, off_t);
-char   *SHA512_224_File(const char *, char *);
-char   *SHA512_224_FileChunk(const char *, char *, off_t, off_t);
-#endif
-void	SHA512_256_Init(SHA512_CTX *);
-void	SHA512_256_Update(SHA512_CTX *, const void *, size_t);
-void	SHA512_256_Final(unsigned char [__min_size(SHA512_256_DIGEST_LENGTH)],
-    SHA512_CTX *);
-#ifndef _KERNEL
-char   *SHA512_256_End(SHA512_CTX *, char *);
-char   *SHA512_256_Data(const void *, unsigned int, char *);
-char   *SHA512_256_Fd(int, char *);
-char   *SHA512_256_FdChunk(int, char *, off_t, off_t);
-char   *SHA512_256_File(const char *, char *);
-char   *SHA512_256_FileChunk(const char *, char *, off_t, off_t);
-#endif
-
-__END_DECLS
-
-#endif /* !_SHA512T_H_ */