From e8beeaa1116cc771360a24c9c1f9e6f47ced0e28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20F=C3=BCl=C3=B6p?= Date: Fri, 30 Oct 2020 23:24:21 +0100 Subject: [PATCH] ICP: gcm: Allocate hash subkey table separately MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While evaluating other assembler implementations it turns out that the precomputed hash subkey tables vary in size, from 8*16 bytes (avx2/avx512) up to 48*16 bytes (avx512-vaes), depending on the implementation. To be able to handle the size differences later, allocate `gcm_Htable` dynamically rather then having a fixed size array, and adapt consumers. Reviewed-by: Brian Behlendorf Signed-off-by: Attila Fülöp Closes #11102 --- module/icp/algs/modes/gcm.c | 54 +++++++++++++++++-- module/icp/algs/modes/modes.c | 8 +++ .../icp/asm-x86_64/modes/aesni-gcm-x86_64.S | 18 +++++-- module/icp/include/modes/modes.h | 8 +-- module/icp/io/aes.c | 18 +++++++ 5 files changed, 93 insertions(+), 13 deletions(-) diff --git a/module/icp/algs/modes/gcm.c b/module/icp/algs/modes/gcm.c index 5553c55e11..23686c59e8 100644 --- a/module/icp/algs/modes/gcm.c +++ b/module/icp/algs/modes/gcm.c @@ -59,10 +59,12 @@ boolean_t gcm_avx_can_use_movbe = B_FALSE; static boolean_t gcm_use_avx = B_FALSE; #define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx) +extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); + static inline boolean_t gcm_avx_will_work(void); static inline void gcm_set_avx(boolean_t); static inline boolean_t gcm_toggle_avx(void); -extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); +static inline size_t gcm_simd_get_htab_size(boolean_t); static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t, crypto_data_t *, size_t); @@ -629,6 +631,21 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, (volatile boolean_t *)&gcm_avx_can_use_movbe); } } + /* Allocate Htab memory as needed. */ + if (gcm_ctx->gcm_use_avx == B_TRUE) { + size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); + + if (htab_len == 0) { + return (CRYPTO_MECHANISM_PARAM_INVALID); + } + gcm_ctx->gcm_htab_len = htab_len; + gcm_ctx->gcm_Htable = + (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag); + + if (gcm_ctx->gcm_Htable == NULL) { + return (CRYPTO_HOST_MEMORY); + } + } /* Avx and non avx context initialization differs from here on. */ if (gcm_ctx->gcm_use_avx == B_FALSE) { #endif /* ifdef CAN_USE_GCM_ASM */ @@ -689,6 +706,22 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, if (ks->ops->needs_byteswap == B_TRUE) { gcm_ctx->gcm_use_avx = B_FALSE; } + /* Allocate Htab memory as needed. */ + if (gcm_ctx->gcm_use_avx == B_TRUE) { + size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); + + if (htab_len == 0) { + return (CRYPTO_MECHANISM_PARAM_INVALID); + } + gcm_ctx->gcm_htab_len = htab_len; + gcm_ctx->gcm_Htable = + (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag); + + if (gcm_ctx->gcm_Htable == NULL) { + return (CRYPTO_HOST_MEMORY); + } + } + /* Avx and non avx context initialization differs from here on. */ if (gcm_ctx->gcm_use_avx == B_FALSE) { #endif /* ifdef CAN_USE_GCM_ASM */ @@ -1018,7 +1051,7 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); /* Clear the FPU registers since they hold sensitive internal state. */ #define clear_fpu_regs() clear_fpu_regs_avx() #define GHASH_AVX(ctx, in, len) \ - gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t (*)[2])(ctx)->gcm_Htable, \ + gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \ in, len) #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1) @@ -1036,8 +1069,8 @@ extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst); extern void aes_encrypt_intel(const uint32_t rk[], int nr, const uint32_t pt[4], uint32_t ct[4]); -extern void gcm_init_htab_avx(uint64_t Htable[16][2], const uint64_t H[2]); -extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t Htable[16][2], +extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]); +extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable, const uint8_t *in, size_t len); extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, @@ -1073,6 +1106,18 @@ gcm_toggle_avx(void) } } +static inline size_t +gcm_simd_get_htab_size(boolean_t simd_mode) +{ + switch (simd_mode) { + case B_TRUE: + return (2 * 6 * 2 * sizeof (uint64_t)); + + default: + return (0); + } +} + /* * Clear sensitive data in the context. * @@ -1088,7 +1133,6 @@ gcm_clear_ctx(gcm_ctx_t *ctx) { bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder)); bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); - bzero(ctx->gcm_Htable, sizeof (ctx->gcm_Htable)); bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0)); bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp)); } diff --git a/module/icp/algs/modes/modes.c b/module/icp/algs/modes/modes.c index f07876a478..faae9722bd 100644 --- a/module/icp/algs/modes/modes.c +++ b/module/icp/algs/modes/modes.c @@ -152,6 +152,14 @@ crypto_free_mode_ctx(void *ctx) vmem_free(((gcm_ctx_t *)ctx)->gcm_pt_buf, ((gcm_ctx_t *)ctx)->gcm_pt_buf_len); +#ifdef CAN_USE_GCM_ASM + if (((gcm_ctx_t *)ctx)->gcm_Htable != NULL) { + gcm_ctx_t *gcm_ctx = (gcm_ctx_t *)ctx; + bzero(gcm_ctx->gcm_Htable, gcm_ctx->gcm_htab_len); + kmem_free(gcm_ctx->gcm_Htable, gcm_ctx->gcm_htab_len); + } +#endif + kmem_free(ctx, sizeof (gcm_ctx_t)); } } diff --git a/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S index 4e05255728..dc71ae2c1c 100644 --- a/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S +++ b/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S @@ -718,6 +718,8 @@ aesni_gcm_decrypt: .cfi_offset %r14,-48 pushq %r15 .cfi_offset %r15,-56 + pushq %r9 +.cfi_offset %r9,-64 vzeroupper vmovdqu (%r8),%xmm1 @@ -730,7 +732,8 @@ aesni_gcm_decrypt: andq $-128,%rsp vmovdqu (%r11),%xmm0 leaq 128(%rcx),%rcx - leaq 32+32(%r9),%r9 + movq 32(%r9),%r9 + leaq 32(%r9),%r9 movl 504-128(%rcx),%ebp // ICP has a larger offset for rounds. vpshufb %xmm0,%xmm8,%xmm8 @@ -786,7 +789,9 @@ aesni_gcm_decrypt: vmovups %xmm14,-16(%rsi) vpshufb (%r11),%xmm8,%xmm8 - vmovdqu %xmm8,-64(%r9) + movq -56(%rax),%r9 +.cfi_restore %r9 + vmovdqu %xmm8,(%r9) vzeroupper movq -48(%rax),%r15 @@ -924,6 +929,8 @@ aesni_gcm_encrypt: .cfi_offset %r14,-48 pushq %r15 .cfi_offset %r15,-56 + pushq %r9 +.cfi_offset %r9,-64 vzeroupper vmovdqu (%r8),%xmm1 @@ -966,7 +973,8 @@ aesni_gcm_encrypt: call _aesni_ctr32_6x vmovdqu (%r9),%xmm8 - leaq 32+32(%r9),%r9 + movq 32(%r9),%r9 + leaq 32(%r9),%r9 subq $12,%rdx movq $192,%r10 vpshufb %xmm0,%xmm8,%xmm8 @@ -1157,7 +1165,9 @@ aesni_gcm_encrypt: vpxor %xmm7,%xmm2,%xmm2 vpxor %xmm2,%xmm8,%xmm8 vpshufb (%r11),%xmm8,%xmm8 - vmovdqu %xmm8,-64(%r9) + movq -56(%rax),%r9 +.cfi_restore %r9 + vmovdqu %xmm8,(%r9) vzeroupper movq -48(%rax),%r15 diff --git a/module/icp/include/modes/modes.h b/module/icp/include/modes/modes.h index 57a211ccf1..ab71197542 100644 --- a/module/icp/include/modes/modes.h +++ b/module/icp/include/modes/modes.h @@ -219,14 +219,14 @@ typedef struct gcm_ctx { size_t gcm_pt_buf_len; uint32_t gcm_tmp[4]; /* - * The relative positions of gcm_ghash, gcm_H and pre-computed - * gcm_Htable are hard coded in aesni-gcm-x86_64.S and ghash-x86_64.S, - * so please don't change (or adjust accordingly). + * The offset of gcm_Htable relative to gcm_ghash, (32), is hard coded + * in aesni-gcm-x86_64.S, so please don't change (or adjust there). */ uint64_t gcm_ghash[2]; uint64_t gcm_H[2]; #ifdef CAN_USE_GCM_ASM - uint64_t gcm_Htable[12][2]; + uint64_t *gcm_Htable; + size_t gcm_htab_len; #endif uint64_t gcm_J0[2]; uint64_t gcm_len_a_len_c[2]; diff --git a/module/icp/io/aes.c b/module/icp/io/aes.c index 96fb6bb1af..e540af4473 100644 --- a/module/icp/io/aes.c +++ b/module/icp/io/aes.c @@ -1051,6 +1051,16 @@ out: bzero(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len); kmem_free(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len); } +#ifdef CAN_USE_GCM_ASM + if (aes_ctx.ac_flags & (GCM_MODE|GMAC_MODE) && + ((gcm_ctx_t *)&aes_ctx)->gcm_Htable != NULL) { + + gcm_ctx_t *ctx = (gcm_ctx_t *)&aes_ctx; + + bzero(ctx->gcm_Htable, ctx->gcm_htab_len); + kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len); + } +#endif return (ret); } @@ -1209,6 +1219,14 @@ out: vmem_free(((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf, ((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf_len); } +#ifdef CAN_USE_GCM_ASM + if (((gcm_ctx_t *)&aes_ctx)->gcm_Htable != NULL) { + gcm_ctx_t *ctx = (gcm_ctx_t *)&aes_ctx; + + bzero(ctx->gcm_Htable, ctx->gcm_htab_len); + kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len); + } +#endif } return (ret);