zfs/module/icp/algs/sha2/sha2.c

/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */
/*
 * Copyright 2013 Saso Kiselkov.  All rights reserved.
 */

/*
 * The basic framework for this code came from the reference
 * implementation for MD5.  That implementation is Copyright (C)
 * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
 *
 * License to copy and use this software is granted provided that it
 * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
 * Algorithm" in all material mentioning or referencing this software
 * or this function.
 *
 * License is also granted to make and use derivative works provided
 * that such works are identified as "derived from the RSA Data
 * Security, Inc. MD5 Message-Digest Algorithm" in all material
 * mentioning or referencing the derived work.
 *
 * RSA Data Security, Inc. makes no representations concerning either
 * the merchantability of this software or the suitability of this
 * software for any particular purpose. It is provided "as is"
 * without express or implied warranty of any kind.
 *
 * These notices must be retained in any copies of any part of this
 * documentation and/or software.
 *
 * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
 * standard, available at
 * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
 * Not as fast as one would like -- further optimizations are encouraged
 * and appreciated.
 */

#include <sys/zfs_context.h>
#define	_SHA2_IMPL
#include <sha2/sha2.h>
#include <sha2/sha2_consts.h>

#define	_RESTRICT_KYWD

#ifdef _LITTLE_ENDIAN
#include <sys/byteorder.h>
#define	HAVE_HTONL
#endif

static void Encode(uint8_t *, uint32_t *, size_t);

#if	defined(__amd64)
#define	SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
#else
static void SHA256Transform(SHA2_CTX *, const uint8_t *);
#endif	/* __amd64 */

static uint8_t PADDING[128] = { 0x80, /* all zeros */ };

/* Ch and Maj are the basic SHA2 functions. */
#define	Ch(b, c, d)	(((b) & (c)) ^ ((~b) & (d)))
#define	Maj(b, c, d)	(((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))

/* Rotates x right n bits. */
#define	ROTR(x, n)	\
	(((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))

/* Shift x right n bits */
#define	SHR(x, n)	((x) >> (n))

/* SHA256 Functions */
#define	BIGSIGMA0_256(x)	(ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
#define	BIGSIGMA1_256(x)	(ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
#define	SIGMA0_256(x)		(ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
#define	SIGMA1_256(x)		(ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))

#define	SHA256ROUND(a, b, c, d, e, f, g, h, i, w)			\
	T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w;	\
	d += T1;							\
	T2 = BIGSIGMA0_256(a) + Maj(a, b, c);				\
	h = T1 + T2

/*
 * sparc optimization:
 *
 * on the sparc, we can load big endian 32-bit data easily.  note that
 * special care must be taken to ensure the address is 32-bit aligned.
 * in the interest of speed, we don't check to make sure, since
 * careful programming can guarantee this for us.
 */

#if	defined(_BIG_ENDIAN)
#define	LOAD_BIG_32(addr)	(*(uint32_t *)(addr))
#define	LOAD_BIG_64(addr)	(*(uint64_t *)(addr))

#elif	defined(HAVE_HTONL)
#define	LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
#define	LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr)))

#else
/* little endian -- will work on big endian, but slowly */
#define	LOAD_BIG_32(addr)	\
	(((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
#define	LOAD_BIG_64(addr)	\
	(((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) |	\
	    ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) |	\
	    ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) |	\
	    ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
#endif	/* _BIG_ENDIAN */


#if	!defined(__amd64)
/* SHA256 Transform */

static void
SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
{
	uint32_t a = ctx->state.s32[0];
	uint32_t b = ctx->state.s32[1];
	uint32_t c = ctx->state.s32[2];
	uint32_t d = ctx->state.s32[3];
	uint32_t e = ctx->state.s32[4];
	uint32_t f = ctx->state.s32[5];
	uint32_t g = ctx->state.s32[6];
	uint32_t h = ctx->state.s32[7];

	uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
	uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
	uint32_t T1, T2;

	if ((uintptr_t)blk & 0x3) {		/* not 4-byte aligned? */
		bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
		blk = (uint8_t *)ctx->buf_un.buf32;
	}

	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w0 =  LOAD_BIG_32(blk + 4 * 0);
	SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w1 =  LOAD_BIG_32(blk + 4 * 1);
	SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w2 =  LOAD_BIG_32(blk + 4 * 2);
	SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w3 =  LOAD_BIG_32(blk + 4 * 3);
	SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w4 =  LOAD_BIG_32(blk + 4 * 4);
	SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w5 =  LOAD_BIG_32(blk + 4 * 5);
	SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w6 =  LOAD_BIG_32(blk + 4 * 6);
	SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w7 =  LOAD_BIG_32(blk + 4 * 7);
	SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w8 =  LOAD_BIG_32(blk + 4 * 8);
	SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w9 =  LOAD_BIG_32(blk + 4 * 9);
	SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w10 =  LOAD_BIG_32(blk + 4 * 10);
	SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w11 =  LOAD_BIG_32(blk + 4 * 11);
	SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w12 =  LOAD_BIG_32(blk + 4 * 12);
	SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w13 =  LOAD_BIG_32(blk + 4 * 13);
	SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w14 =  LOAD_BIG_32(blk + 4 * 14);
	SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	w15 =  LOAD_BIG_32(blk + 4 * 15);
	SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);

	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
	SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
	SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
	SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
	SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
	SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
	SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
	SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
	SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
	SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
	SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
	SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
	SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
	SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
	SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
	SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
	SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);

	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
	SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
	SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
	SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
	SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
	SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
	SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
	SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
	SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
	SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
	SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
	SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
	SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
	SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
	SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
	SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
	SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);

	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
	SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
	SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
	SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
	SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
	SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
	SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
	SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
	SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
	SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
	SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
	SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
	SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
	SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
	SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
	SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
	SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);

	ctx->state.s32[0] += a;
	ctx->state.s32[1] += b;
	ctx->state.s32[2] += c;
	ctx->state.s32[3] += d;
	ctx->state.s32[4] += e;
	ctx->state.s32[5] += f;
	ctx->state.s32[6] += g;
	ctx->state.s32[7] += h;
}
#endif	/* !__amd64 */


/*
 * Encode()
 *
 * purpose: to convert a list of numbers from little endian to big endian
 *   input: uint8_t *	: place to store the converted big endian numbers
 *	    uint32_t *	: place to get numbers to convert from
 *          size_t	: the length of the input in bytes
 *  output: void
 */

static void
Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
    size_t len)
{
	size_t		i, j;

	for (i = 0, j = 0; j < len; i++, j += 4) {
		output[j]	= (input[i] >> 24) & 0xff;
		output[j + 1]	= (input[i] >> 16) & 0xff;
		output[j + 2]	= (input[i] >>  8) & 0xff;
		output[j + 3]	= input[i] & 0xff;
	}
}

void
SHA2Init(uint64_t mech, SHA2_CTX *ctx)
{

	switch (mech) {
	case SHA256_MECH_INFO_TYPE:
	case SHA256_HMAC_MECH_INFO_TYPE:
	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
		ctx->state.s32[0] = 0x6a09e667U;
		ctx->state.s32[1] = 0xbb67ae85U;
		ctx->state.s32[2] = 0x3c6ef372U;
		ctx->state.s32[3] = 0xa54ff53aU;
		ctx->state.s32[4] = 0x510e527fU;
		ctx->state.s32[5] = 0x9b05688cU;
		ctx->state.s32[6] = 0x1f83d9abU;
		ctx->state.s32[7] = 0x5be0cd19U;
		break;
	default:
		cmn_err(CE_PANIC,
		    "sha2_init: failed to find a supported algorithm: 0x%x",
		    (uint32_t)mech);
	}

	ctx->algotype = (uint32_t)mech;
	ctx->count.c64[0] = ctx->count.c64[1] = 0;
}

void
SHA256Init(SHA256_CTX *ctx)
{
	SHA2Init(SHA256, ctx);
}

/*
 * SHA2Update()
 *
 * purpose: continues an sha2 digest operation, using the message block
 *          to update the context.
 *   input: SHA2_CTX *	: the context to update
 *          void *	: the message block
 *          size_t      : the length of the message block, in bytes
 *  output: void
 */

void
SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
{
	uint32_t	i, buf_index, buf_len, buf_limit;
	const uint8_t	*input = inptr;
	uint32_t	algotype = ctx->algotype;
#if defined(__amd64)
	uint32_t	block_count;
#endif	/* !__amd64 */


	/* check for noop */
	if (input_len == 0)
		return;

	if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
		buf_limit = 64;

		/* compute number of bytes mod 64 */
		buf_index = (ctx->count.c32[1] >> 3) & 0x3F;

		/* update number of bits */
		if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
			ctx->count.c32[0]++;

		ctx->count.c32[0] += (input_len >> 29);

	} else {
		buf_limit = 128;

		/* compute number of bytes mod 128 */
		buf_index = (ctx->count.c64[1] >> 3) & 0x7F;

		/* update number of bits */
		if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
			ctx->count.c64[0]++;

		ctx->count.c64[0] += (input_len >> 29);
	}

	buf_len = buf_limit - buf_index;

	/* transform as many times as possible */
	i = 0;
	if (input_len >= buf_len) {

		/*
		 * general optimization:
		 *
		 * only do initial bcopy() and SHA2Transform() if
		 * buf_index != 0.  if buf_index == 0, we're just
		 * wasting our time doing the bcopy() since there
		 * wasn't any data left over from a previous call to
		 * SHA2Update().
		 */
		if (buf_index) {
			bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
			if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
				SHA256Transform(ctx, ctx->buf_un.buf8);

			i = buf_len;
		}

#if !defined(__amd64)
		if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
			for (; i + buf_limit - 1 < input_len; i += buf_limit) {
				SHA256Transform(ctx, &input[i]);
			}
		}

#else
		if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
			block_count = (input_len - i) >> 6;
			if (block_count > 0) {
				SHA256TransformBlocks(ctx, &input[i],
				    block_count);
				i += block_count << 6;
			}
		}
#endif	/* !__amd64 */

		/*
		 * general optimization:
		 *
		 * if i and input_len are the same, return now instead
		 * of calling bcopy(), since the bcopy() in this case
		 * will be an expensive noop.
		 */

		if (input_len == i)
			return;

		buf_index = 0;
	}

	/* buffer remaining input */
	bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
}


/*
 * SHA2Final()
 *
 * purpose: ends an sha2 digest operation, finalizing the message digest and
 *          zeroing the context.
 *   input: uchar_t *	: a buffer to store the digest
 *			: The function actually uses void* because many
 *			: callers pass things other than uchar_t here.
 *          SHA2_CTX *  : the context to finalize, save, and zero
 *  output: void
 */

void
SHA2Final(void *digest, SHA2_CTX *ctx)
{
	uint8_t		bitcount_be[sizeof (ctx->count.c32)];
	uint32_t	index;
	uint32_t	algotype = ctx->algotype;

	if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
		index  = (ctx->count.c32[1] >> 3) & 0x3f;
		Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
		SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
		SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
		Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
	}

	/* zeroize sensitive information */
	bzero(ctx, sizeof (*ctx));
}
Illumos Crypto Port module added to enable native encryption in zfs A port of the Illumos Crypto Framework to a Linux kernel module (found in module/icp). This is needed to do the actual encryption work. We cannot use the Linux kernel's built in crypto api because it is only exported to GPL-licensed modules. Having the ICP also means the crypto code can run on any of the other kernels under OpenZFS. I ended up porting over most of the internals of the framework, which means that porting over other API calls (if we need them) should be fairly easy. Specifically, I have ported over the API functions related to encryption, digests, macs, and crypto templates. The ICP is able to use assembly-accelerated encryption on amd64 machines and AES-NI instructions on Intel chips that support it. There are place-holder directories for similar assembly optimizations for other architectures (although they have not been written). Signed-off-by: Tom Caputi <tcaputi@datto.com> Signed-off-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #4329 2016-05-12 14:51:24 +00:00			`/*`
			`* Copyright 2009 Sun Microsystems, Inc. All rights reserved.`
			`* Use is subject to license terms.`
			`*/`
			`/*`
			`* Copyright 2013 Saso Kiselkov. All rights reserved.`
			`*/`

			`/*`
			`* The basic framework for this code came from the reference`
			`* implementation for MD5. That implementation is Copyright (C)`
			`* 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.`
			`*`
			`* License to copy and use this software is granted provided that it`
			`* is identified as the "RSA Data Security, Inc. MD5 Message-Digest`
			`* Algorithm" in all material mentioning or referencing this software`
			`* or this function.`
			`*`
			`* License is also granted to make and use derivative works provided`
			`* that such works are identified as "derived from the RSA Data`
			`* Security, Inc. MD5 Message-Digest Algorithm" in all material`
			`* mentioning or referencing the derived work.`
			`*`
			`* RSA Data Security, Inc. makes no representations concerning either`
			`* the merchantability of this software or the suitability of this`
			`* software for any particular purpose. It is provided "as is"`
			`* without express or implied warranty of any kind.`
			`*`
			`* These notices must be retained in any copies of any part of this`
			`* documentation and/or software.`
			`*`
			`* NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2`
			`* standard, available at`
			`* http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf`
			`* Not as fast as one would like -- further optimizations are encouraged`
			`* and appreciated.`
			`*/`

			`#include <sys/zfs_context.h>`
			`#define _SHA2_IMPL`
			`#include <sha2/sha2.h>`
			`#include <sha2/sha2_consts.h>`

			`#define _RESTRICT_KYWD`

			`#ifdef _LITTLE_ENDIAN`
			`#include <sys/byteorder.h>`
			`#define HAVE_HTONL`
			`#endif`

			`static void Encode(uint8_t , uint32_t , size_t);`

			`#if defined(__amd64)`
			`#define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)`
			`void SHA256TransformBlocks(SHA2_CTX ctx, const void in, size_t num);`
			`#else`
			`static void SHA256Transform(SHA2_CTX , const uint8_t );`
			`#endif /* __amd64 */`

			`static uint8_t PADDING[128] = { 0x80, /* all zeros */ };`

			`/* Ch and Maj are the basic SHA2 functions. */`
			`#define Ch(b, c, d) (((b) & (c)) ^ ((~b) & (d)))`
			`#define Maj(b, c, d) (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))`

			`/* Rotates x right n bits. */`
			`#define ROTR(x, n) \`
			`(((x) >> (n)) \| ((x) << ((sizeof (x) * NBBY)-(n))))`

			`/* Shift x right n bits */`
			`#define SHR(x, n) ((x) >> (n))`

			`/* SHA256 Functions */`
			`#define BIGSIGMA0_256(x) (ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))`
			`#define BIGSIGMA1_256(x) (ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))`
			`#define SIGMA0_256(x) (ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))`
			`#define SIGMA1_256(x) (ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))`

			`#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \`
			`T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w; \`
			`d += T1; \`
			`T2 = BIGSIGMA0_256(a) + Maj(a, b, c); \`
			`h = T1 + T2`

			`/*`
			`* sparc optimization:`
			`*`
			`* on the sparc, we can load big endian 32-bit data easily. note that`
			`* special care must be taken to ensure the address is 32-bit aligned.`
			`* in the interest of speed, we don't check to make sure, since`
			`* careful programming can guarantee this for us.`
			`*/`

			`#if defined(_BIG_ENDIAN)`
			`#define LOAD_BIG_32(addr) ((uint32_t )(addr))`
			`#define LOAD_BIG_64(addr) ((uint64_t )(addr))`

			`#elif defined(HAVE_HTONL)`
			`#define LOAD_BIG_32(addr) htonl(((uint32_t )(addr)))`
			`#define LOAD_BIG_64(addr) htonll(((uint64_t )(addr)))`

			`#else`
			`/* little endian -- will work on big endian, but slowly */`
			`#define LOAD_BIG_32(addr) \`
			`(((addr)[0] << 24) \| ((addr)[1] << 16) \| ((addr)[2] << 8) \| (addr)[3])`
			`#define LOAD_BIG_64(addr) \`
			`(((uint64_t)(addr)[0] << 56) \| ((uint64_t)(addr)[1] << 48) \| \`
			`((uint64_t)(addr)[2] << 40) \| ((uint64_t)(addr)[3] << 32) \| \`
			`((uint64_t)(addr)[4] << 24) \| ((uint64_t)(addr)[5] << 16) \| \`
			`((uint64_t)(addr)[6] << 8) \| (uint64_t)(addr)[7])`
			`#endif /* _BIG_ENDIAN */`


			`#if !defined(__amd64)`
			`/* SHA256 Transform */`

			`static void`
			`SHA256Transform(SHA2_CTX ctx, const uint8_t blk)`
			`{`
			`uint32_t a = ctx->state.s32[0];`
			`uint32_t b = ctx->state.s32[1];`
			`uint32_t c = ctx->state.s32[2];`
			`uint32_t d = ctx->state.s32[3];`
			`uint32_t e = ctx->state.s32[4];`
			`uint32_t f = ctx->state.s32[5];`
			`uint32_t g = ctx->state.s32[6];`
			`uint32_t h = ctx->state.s32[7];`

			`uint32_t w0, w1, w2, w3, w4, w5, w6, w7;`
			`uint32_t w8, w9, w10, w11, w12, w13, w14, w15;`
			`uint32_t T1, T2;`

			`if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */`
			`bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32));`
			`blk = (uint8_t *)ctx->buf_un.buf32;`
			`}`

			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w0 = LOAD_BIG_32(blk + 4 * 0);`
			`SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w1 = LOAD_BIG_32(blk + 4 * 1);`
			`SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w2 = LOAD_BIG_32(blk + 4 * 2);`
			`SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w3 = LOAD_BIG_32(blk + 4 * 3);`
			`SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w4 = LOAD_BIG_32(blk + 4 * 4);`
			`SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w5 = LOAD_BIG_32(blk + 4 * 5);`
			`SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w6 = LOAD_BIG_32(blk + 4 * 6);`
			`SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w7 = LOAD_BIG_32(blk + 4 * 7);`
			`SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w8 = LOAD_BIG_32(blk + 4 * 8);`
			`SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w9 = LOAD_BIG_32(blk + 4 * 9);`
			`SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w10 = LOAD_BIG_32(blk + 4 * 10);`
			`SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w11 = LOAD_BIG_32(blk + 4 * 11);`
			`SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w12 = LOAD_BIG_32(blk + 4 * 12);`
			`SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w13 = LOAD_BIG_32(blk + 4 * 13);`
			`SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w14 = LOAD_BIG_32(blk + 4 * 14);`
			`SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);`
			`/* LINTED E_BAD_PTR_CAST_ALIGN */`
			`w15 = LOAD_BIG_32(blk + 4 * 15);`
			`SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);`

			`w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;`
			`SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);`
			`w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;`
			`SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);`
			`w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;`
			`SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);`
			`w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;`
			`SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);`
			`w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;`
			`SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);`
			`w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;`
			`SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);`
			`w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;`
			`SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);`
			`w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;`
			`SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);`
			`w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;`
			`SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);`
			`w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;`
			`SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);`
			`w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;`
			`SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);`
			`w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;`
			`SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);`
			`w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;`
			`SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);`
			`w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;`
			`SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);`
			`w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;`
			`SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);`
			`w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;`
			`SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);`

			`w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;`
			`SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);`
			`w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;`
			`SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);`
			`w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;`
			`SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);`
			`w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;`
			`SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);`
			`w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;`
			`SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);`
			`w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;`
			`SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);`
			`w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;`
			`SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);`
			`w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;`
			`SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);`
			`w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;`
			`SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);`
			`w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;`
			`SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);`
			`w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;`
			`SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);`
			`w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;`
			`SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);`
			`w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;`
			`SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);`
			`w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;`
			`SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);`
			`w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;`
			`SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);`
			`w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;`
			`SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);`

			`w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;`
			`SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);`
			`w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;`
			`SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);`
			`w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;`
			`SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);`
			`w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;`
			`SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);`
			`w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;`
			`SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);`
			`w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;`
			`SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);`
			`w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;`
			`SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);`
			`w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;`
			`SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);`
			`w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;`
			`SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);`
			`w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;`
			`SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);`
			`w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;`
			`SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);`
			`w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;`
			`SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);`
			`w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;`
			`SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);`
			`w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;`
			`SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);`
			`w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;`
			`SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);`
			`w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;`
			`SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);`

			`ctx->state.s32[0] += a;`
			`ctx->state.s32[1] += b;`
			`ctx->state.s32[2] += c;`
			`ctx->state.s32[3] += d;`
			`ctx->state.s32[4] += e;`
			`ctx->state.s32[5] += f;`
			`ctx->state.s32[6] += g;`
			`ctx->state.s32[7] += h;`
			`}`
			`#endif /* !__amd64 */`


			`/*`
			`* Encode()`
			`*`
			`* purpose: to convert a list of numbers from little endian to big endian`
			`* input: uint8_t * : place to store the converted big endian numbers`
			`* uint32_t * : place to get numbers to convert from`
			`* size_t : the length of the input in bytes`
			`* output: void`
			`*/`

			`static void`
			`Encode(uint8_t _RESTRICT_KYWD output, uint32_t _RESTRICT_KYWD input,`
			`size_t len)`
			`{`
			`size_t i, j;`

			`for (i = 0, j = 0; j < len; i++, j += 4) {`
			`output[j] = (input[i] >> 24) & 0xff;`
			`output[j + 1] = (input[i] >> 16) & 0xff;`
			`output[j + 2] = (input[i] >> 8) & 0xff;`
			`output[j + 3] = input[i] & 0xff;`
			`}`
			`}`

			`void`
			`SHA2Init(uint64_t mech, SHA2_CTX *ctx)`
			`{`

			`switch (mech) {`
			`case SHA256_MECH_INFO_TYPE:`
			`case SHA256_HMAC_MECH_INFO_TYPE:`
			`case SHA256_HMAC_GEN_MECH_INFO_TYPE:`
			`ctx->state.s32[0] = 0x6a09e667U;`
			`ctx->state.s32[1] = 0xbb67ae85U;`
			`ctx->state.s32[2] = 0x3c6ef372U;`
			`ctx->state.s32[3] = 0xa54ff53aU;`
			`ctx->state.s32[4] = 0x510e527fU;`
			`ctx->state.s32[5] = 0x9b05688cU;`
			`ctx->state.s32[6] = 0x1f83d9abU;`
			`ctx->state.s32[7] = 0x5be0cd19U;`
			`break;`
			`default:`
			`cmn_err(CE_PANIC,`
			`"sha2_init: failed to find a supported algorithm: 0x%x",`
			`(uint32_t)mech);`
			`}`

			`ctx->algotype = (uint32_t)mech;`
			`ctx->count.c64[0] = ctx->count.c64[1] = 0;`
			`}`

			`void`
			`SHA256Init(SHA256_CTX *ctx)`
			`{`
			`SHA2Init(SHA256, ctx);`
			`}`

			`/*`
			`* SHA2Update()`
			`*`
			`* purpose: continues an sha2 digest operation, using the message block`
			`* to update the context.`
			`* input: SHA2_CTX * : the context to update`
			`* void * : the message block`
			`* size_t : the length of the message block, in bytes`
			`* output: void`
			`*/`

			`void`
			`SHA2Update(SHA2_CTX ctx, const void inptr, size_t input_len)`
			`{`
			`uint32_t i, buf_index, buf_len, buf_limit;`
			`const uint8_t *input = inptr;`
			`uint32_t algotype = ctx->algotype;`
			`#if defined(__amd64)`
			`uint32_t block_count;`
			`#endif /* !__amd64 */`


			`/* check for noop */`
			`if (input_len == 0)`
			`return;`

			`if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {`
			`buf_limit = 64;`

			`/* compute number of bytes mod 64 */`
			`buf_index = (ctx->count.c32[1] >> 3) & 0x3F;`

			`/* update number of bits */`
			`if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))`
			`ctx->count.c32[0]++;`

			`ctx->count.c32[0] += (input_len >> 29);`

			`} else {`
			`buf_limit = 128;`

			`/* compute number of bytes mod 128 */`
			`buf_index = (ctx->count.c64[1] >> 3) & 0x7F;`

			`/* update number of bits */`
			`if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))`
			`ctx->count.c64[0]++;`

			`ctx->count.c64[0] += (input_len >> 29);`
			`}`

			`buf_len = buf_limit - buf_index;`

			`/* transform as many times as possible */`
			`i = 0;`
			`if (input_len >= buf_len) {`

			`/*`
			`* general optimization:`
			`*`
			`* only do initial bcopy() and SHA2Transform() if`
			`* buf_index != 0. if buf_index == 0, we're just`
			`* wasting our time doing the bcopy() since there`
			`* wasn't any data left over from a previous call to`
			`* SHA2Update().`
			`*/`
			`if (buf_index) {`
			`bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);`
			`if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)`
			`SHA256Transform(ctx, ctx->buf_un.buf8);`

			`i = buf_len;`
			`}`

			`#if !defined(__amd64)`
			`if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {`
			`for (; i + buf_limit - 1 < input_len; i += buf_limit) {`
			`SHA256Transform(ctx, &input[i]);`
			`}`
			`}`

			`#else`
			`if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {`
			`block_count = (input_len - i) >> 6;`
			`if (block_count > 0) {`
			`SHA256TransformBlocks(ctx, &input[i],`
			`block_count);`
			`i += block_count << 6;`
			`}`
			`}`
			`#endif /* !__amd64 */`

			`/*`
			`* general optimization:`
			`*`
			`* if i and input_len are the same, return now instead`
			`* of calling bcopy(), since the bcopy() in this case`
			`* will be an expensive noop.`
			`*/`

			`if (input_len == i)`
			`return;`

			`buf_index = 0;`
			`}`

			`/* buffer remaining input */`
			`bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);`
			`}`


			`/*`
			`* SHA2Final()`
			`*`
			`* purpose: ends an sha2 digest operation, finalizing the message digest and`
			`* zeroing the context.`
			`* input: uchar_t * : a buffer to store the digest`
			`* : The function actually uses void* because many`
			`* : callers pass things other than uchar_t here.`
			`* SHA2_CTX * : the context to finalize, save, and zero`
			`* output: void`
			`*/`

			`void`
			`SHA2Final(void digest, SHA2_CTX ctx)`
			`{`
			`uint8_t bitcount_be[sizeof (ctx->count.c32)];`
			`uint32_t index;`
			`uint32_t algotype = ctx->algotype;`

			`if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {`
			`index = (ctx->count.c32[1] >> 3) & 0x3f;`
			`Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));`
			`SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);`
			`SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));`
			`Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));`
			`}`

			`/* zeroize sensitive information */`
			`bzero(ctx, sizeof (*ctx));`
			`}`