/* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright 2013 Saso Kiselkov. All rights reserved. */ /* * The basic framework for this code came from the reference * implementation for MD5. That implementation is Copyright (C) * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved. * * License to copy and use this software is granted provided that it * is identified as the "RSA Data Security, Inc. MD5 Message-Digest * Algorithm" in all material mentioning or referencing this software * or this function. * * License is also granted to make and use derivative works provided * that such works are identified as "derived from the RSA Data * Security, Inc. MD5 Message-Digest Algorithm" in all material * mentioning or referencing the derived work. * * RSA Data Security, Inc. makes no representations concerning either * the merchantability of this software or the suitability of this * software for any particular purpose. It is provided "as is" * without express or implied warranty of any kind. * * These notices must be retained in any copies of any part of this * documentation and/or software. * * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2 * standard, available at * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf * Not as fast as one would like -- further optimizations are encouraged * and appreciated. */ #include #define _SHA2_IMPL #include #include #define _RESTRICT_KYWD #ifdef _LITTLE_ENDIAN #include #define HAVE_HTONL #endif static void Encode(uint8_t *, uint32_t *, size_t); #if defined(__amd64) #define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1) void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num); #else static void SHA256Transform(SHA2_CTX *, const uint8_t *); #endif /* __amd64 */ static uint8_t PADDING[128] = { 0x80, /* all zeros */ }; /* Ch and Maj are the basic SHA2 functions. */ #define Ch(b, c, d) (((b) & (c)) ^ ((~b) & (d))) #define Maj(b, c, d) (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d))) /* Rotates x right n bits. */ #define ROTR(x, n) \ (((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n)))) /* Shift x right n bits */ #define SHR(x, n) ((x) >> (n)) /* SHA256 Functions */ #define BIGSIGMA0_256(x) (ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22)) #define BIGSIGMA1_256(x) (ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25)) #define SIGMA0_256(x) (ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3)) #define SIGMA1_256(x) (ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10)) #define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \ T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w; \ d += T1; \ T2 = BIGSIGMA0_256(a) + Maj(a, b, c); \ h = T1 + T2 /* * sparc optimization: * * on the sparc, we can load big endian 32-bit data easily. note that * special care must be taken to ensure the address is 32-bit aligned. * in the interest of speed, we don't check to make sure, since * careful programming can guarantee this for us. */ #if defined(_BIG_ENDIAN) #define LOAD_BIG_32(addr) (*(uint32_t *)(addr)) #define LOAD_BIG_64(addr) (*(uint64_t *)(addr)) #elif defined(HAVE_HTONL) #define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr))) #define LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr))) #else /* little endian -- will work on big endian, but slowly */ #define LOAD_BIG_32(addr) \ (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3]) #define LOAD_BIG_64(addr) \ (((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) | \ ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) | \ ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) | \ ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7]) #endif /* _BIG_ENDIAN */ #if !defined(__amd64) /* SHA256 Transform */ static void SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk) { uint32_t a = ctx->state.s32[0]; uint32_t b = ctx->state.s32[1]; uint32_t c = ctx->state.s32[2]; uint32_t d = ctx->state.s32[3]; uint32_t e = ctx->state.s32[4]; uint32_t f = ctx->state.s32[5]; uint32_t g = ctx->state.s32[6]; uint32_t h = ctx->state.s32[7]; uint32_t w0, w1, w2, w3, w4, w5, w6, w7; uint32_t w8, w9, w10, w11, w12, w13, w14, w15; uint32_t T1, T2; if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */ bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32)); blk = (uint8_t *)ctx->buf_un.buf32; } /* LINTED E_BAD_PTR_CAST_ALIGN */ w0 = LOAD_BIG_32(blk + 4 * 0); SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0); /* LINTED E_BAD_PTR_CAST_ALIGN */ w1 = LOAD_BIG_32(blk + 4 * 1); SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1); /* LINTED E_BAD_PTR_CAST_ALIGN */ w2 = LOAD_BIG_32(blk + 4 * 2); SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2); /* LINTED E_BAD_PTR_CAST_ALIGN */ w3 = LOAD_BIG_32(blk + 4 * 3); SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3); /* LINTED E_BAD_PTR_CAST_ALIGN */ w4 = LOAD_BIG_32(blk + 4 * 4); SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4); /* LINTED E_BAD_PTR_CAST_ALIGN */ w5 = LOAD_BIG_32(blk + 4 * 5); SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5); /* LINTED E_BAD_PTR_CAST_ALIGN */ w6 = LOAD_BIG_32(blk + 4 * 6); SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6); /* LINTED E_BAD_PTR_CAST_ALIGN */ w7 = LOAD_BIG_32(blk + 4 * 7); SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7); /* LINTED E_BAD_PTR_CAST_ALIGN */ w8 = LOAD_BIG_32(blk + 4 * 8); SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8); /* LINTED E_BAD_PTR_CAST_ALIGN */ w9 = LOAD_BIG_32(blk + 4 * 9); SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9); /* LINTED E_BAD_PTR_CAST_ALIGN */ w10 = LOAD_BIG_32(blk + 4 * 10); SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10); /* LINTED E_BAD_PTR_CAST_ALIGN */ w11 = LOAD_BIG_32(blk + 4 * 11); SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11); /* LINTED E_BAD_PTR_CAST_ALIGN */ w12 = LOAD_BIG_32(blk + 4 * 12); SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12); /* LINTED E_BAD_PTR_CAST_ALIGN */ w13 = LOAD_BIG_32(blk + 4 * 13); SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13); /* LINTED E_BAD_PTR_CAST_ALIGN */ w14 = LOAD_BIG_32(blk + 4 * 14); SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14); /* LINTED E_BAD_PTR_CAST_ALIGN */ w15 = LOAD_BIG_32(blk + 4 * 15); SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15); w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0; SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0); w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1; SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1); w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2; SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2); w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3; SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3); w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4; SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4); w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5; SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5); w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6; SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6); w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7; SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7); w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8; SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8); w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9; SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9); w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10; SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10); w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11; SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11); w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12; SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12); w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13; SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13); w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14; SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14); w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15; SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15); w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0; SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0); w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1; SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1); w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2; SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2); w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3; SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3); w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4; SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4); w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5; SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5); w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6; SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6); w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7; SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7); w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8; SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8); w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9; SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9); w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10; SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10); w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11; SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11); w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12; SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12); w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13; SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13); w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14; SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14); w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15; SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15); w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0; SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0); w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1; SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1); w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2; SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2); w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3; SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3); w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4; SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4); w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5; SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5); w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6; SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6); w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7; SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7); w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8; SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8); w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9; SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9); w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10; SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10); w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11; SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11); w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12; SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12); w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13; SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13); w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14; SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14); w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15; SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15); ctx->state.s32[0] += a; ctx->state.s32[1] += b; ctx->state.s32[2] += c; ctx->state.s32[3] += d; ctx->state.s32[4] += e; ctx->state.s32[5] += f; ctx->state.s32[6] += g; ctx->state.s32[7] += h; } #endif /* !__amd64 */ /* * Encode() * * purpose: to convert a list of numbers from little endian to big endian * input: uint8_t * : place to store the converted big endian numbers * uint32_t * : place to get numbers to convert from * size_t : the length of the input in bytes * output: void */ static void Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input, size_t len) { size_t i, j; for (i = 0, j = 0; j < len; i++, j += 4) { output[j] = (input[i] >> 24) & 0xff; output[j + 1] = (input[i] >> 16) & 0xff; output[j + 2] = (input[i] >> 8) & 0xff; output[j + 3] = input[i] & 0xff; } } void SHA2Init(uint64_t mech, SHA2_CTX *ctx) { switch (mech) { case SHA256_MECH_INFO_TYPE: case SHA256_HMAC_MECH_INFO_TYPE: case SHA256_HMAC_GEN_MECH_INFO_TYPE: ctx->state.s32[0] = 0x6a09e667U; ctx->state.s32[1] = 0xbb67ae85U; ctx->state.s32[2] = 0x3c6ef372U; ctx->state.s32[3] = 0xa54ff53aU; ctx->state.s32[4] = 0x510e527fU; ctx->state.s32[5] = 0x9b05688cU; ctx->state.s32[6] = 0x1f83d9abU; ctx->state.s32[7] = 0x5be0cd19U; break; default: cmn_err(CE_PANIC, "sha2_init: failed to find a supported algorithm: 0x%x", (uint32_t)mech); } ctx->algotype = (uint32_t)mech; ctx->count.c64[0] = ctx->count.c64[1] = 0; } void SHA256Init(SHA256_CTX *ctx) { SHA2Init(SHA256, ctx); } /* * SHA2Update() * * purpose: continues an sha2 digest operation, using the message block * to update the context. * input: SHA2_CTX * : the context to update * void * : the message block * size_t : the length of the message block, in bytes * output: void */ void SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len) { uint32_t i, buf_index, buf_len, buf_limit; const uint8_t *input = inptr; uint32_t algotype = ctx->algotype; #if defined(__amd64) uint32_t block_count; #endif /* !__amd64 */ /* check for noop */ if (input_len == 0) return; if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) { buf_limit = 64; /* compute number of bytes mod 64 */ buf_index = (ctx->count.c32[1] >> 3) & 0x3F; /* update number of bits */ if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3)) ctx->count.c32[0]++; ctx->count.c32[0] += (input_len >> 29); } else { buf_limit = 128; /* compute number of bytes mod 128 */ buf_index = (ctx->count.c64[1] >> 3) & 0x7F; /* update number of bits */ if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3)) ctx->count.c64[0]++; ctx->count.c64[0] += (input_len >> 29); } buf_len = buf_limit - buf_index; /* transform as many times as possible */ i = 0; if (input_len >= buf_len) { /* * general optimization: * * only do initial bcopy() and SHA2Transform() if * buf_index != 0. if buf_index == 0, we're just * wasting our time doing the bcopy() since there * wasn't any data left over from a previous call to * SHA2Update(). */ if (buf_index) { bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len); if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) SHA256Transform(ctx, ctx->buf_un.buf8); i = buf_len; } #if !defined(__amd64) if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) { for (; i + buf_limit - 1 < input_len; i += buf_limit) { SHA256Transform(ctx, &input[i]); } } #else if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) { block_count = (input_len - i) >> 6; if (block_count > 0) { SHA256TransformBlocks(ctx, &input[i], block_count); i += block_count << 6; } } #endif /* !__amd64 */ /* * general optimization: * * if i and input_len are the same, return now instead * of calling bcopy(), since the bcopy() in this case * will be an expensive noop. */ if (input_len == i) return; buf_index = 0; } /* buffer remaining input */ bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i); } /* * SHA2Final() * * purpose: ends an sha2 digest operation, finalizing the message digest and * zeroing the context. * input: uchar_t * : a buffer to store the digest * : The function actually uses void* because many * : callers pass things other than uchar_t here. * SHA2_CTX * : the context to finalize, save, and zero * output: void */ void SHA2Final(void *digest, SHA2_CTX *ctx) { uint8_t bitcount_be[sizeof (ctx->count.c32)]; uint32_t index; uint32_t algotype = ctx->algotype; if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) { index = (ctx->count.c32[1] >> 3) & 0x3f; Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be)); SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index); SHA2Update(ctx, bitcount_be, sizeof (bitcount_be)); Encode(digest, ctx->state.s32, sizeof (ctx->state.s32)); } /* zeroize sensitive information */ bzero(ctx, sizeof (*ctx)); }