From ee93cbc9d477d8ce0a6ebffe9a582dbe83144577 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sun, 16 Oct 2022 16:53:22 +0200 Subject: [PATCH] icp: properly fix all RETs in x86_64 Asm code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 43569ee37420 ("Fix objtool: missing int3 after ret warning") addressed replacing all `ret`s in x86 asm code to a macro in the Linux kernel in order to enable SLS. That was done by copying the upstream macro definitions and fixed objtool complaints. Since then, several more mitigations were introduced, including Rethunk. It requires to have a jump to one of the thunks in order to work, so the RET macro was changed again. And, as ZFS code didn't use the mainline defition, but copied it, this is currently missing. Objtool reminds about it time to time (Clang 16, CONFIG_RETHUNK=y): fs/zfs/lua/zlua.o: warning: objtool: setjmp+0x25: 'naked' return found in RETHUNK build fs/zfs/lua/zlua.o: warning: objtool: longjmp+0x27: 'naked' return found in RETHUNK build Do it the following way: * if we're building under Linux, unconditionally include in the related files. It is available in x86 sources since even pre-2.6 times, so doesn't need any conftests; * then, if RET macro is available, it will be used directly, so that we will always have the version actual to the kernel we build; * if there's no such macro, we define it as a simple `ret`, as it was on pre-SLS times. This ensures we always have the up-to-date definition with no need to update it manually, and at the same time is safe for the whole variety of kernels ZFS module supports. Then, there's a couple more "naked" rets left in the code, they're just defined as: .byte 0xf3,0xc3 In fact, this is just: rep ret `rep ret` instead of just `ret` seems to mitigate performance issues on some old AMD processors and most likely makes no sense as of today. Anyways, address those rets, so that they will be protected with Rethunk and SLS. Include here which now always has RET definition and replace those constructs with just RET. This wipes the last couple of places with unpatched rets objtool's been complaining about. Reviewed-by: Attila Fülöp Reviewed-by: Tino Reichardt Reviewed-by: Richard Yao Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Lobakin Closes #14035 --- module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S | 13 ++++++++----- module/icp/asm-x86_64/modes/ghash-x86_64.S | 9 ++++++--- module/icp/include/sys/ia32/asm_linkage.h | 9 ++++++--- module/lua/setjmp/setjmp_x86_64.S | 15 ++++++++------- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S index 70e419c2e4..7414b3540f 100644 --- a/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S +++ b/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S @@ -47,6 +47,9 @@ #if defined(__x86_64__) && defined(HAVE_AVX) && \ defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) +#define _ASM +#include + .extern gcm_avx_can_use_movbe .text @@ -363,7 +366,7 @@ _aesni_ctr32_ghash_6x: vpxor 16+8(%rsp),%xmm8,%xmm8 vpxor %xmm4,%xmm8,%xmm8 - .byte 0xf3,0xc3 + RET .cfi_endproc .size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x #endif /* ifdef HAVE_MOVBE */ @@ -691,7 +694,7 @@ _aesni_ctr32_ghash_no_movbe_6x: vpxor 16+8(%rsp),%xmm8,%xmm8 vpxor %xmm4,%xmm8,%xmm8 - .byte 0xf3,0xc3 + RET .cfi_endproc .size _aesni_ctr32_ghash_no_movbe_6x,.-_aesni_ctr32_ghash_no_movbe_6x @@ -810,7 +813,7 @@ aesni_gcm_decrypt: .cfi_def_cfa_register %rsp .Lgcm_dec_abort: movq %r10,%rax - .byte 0xf3,0xc3 + RET .cfi_endproc .size aesni_gcm_decrypt,.-aesni_gcm_decrypt .type _aesni_ctr32_6x,@function @@ -880,7 +883,7 @@ _aesni_ctr32_6x: vmovups %xmm14,80(%rsi) leaq 96(%rsi),%rsi - .byte 0xf3,0xc3 + RET .align 32 .Lhandle_ctr32_2: vpshufb %xmm0,%xmm1,%xmm6 @@ -1186,7 +1189,7 @@ aesni_gcm_encrypt: .cfi_def_cfa_register %rsp .Lgcm_enc_abort: movq %r10,%rax - .byte 0xf3,0xc3 + RET .cfi_endproc .size aesni_gcm_encrypt,.-aesni_gcm_encrypt diff --git a/module/icp/asm-x86_64/modes/ghash-x86_64.S b/module/icp/asm-x86_64/modes/ghash-x86_64.S index 90cc36b43a..77a3ce1859 100644 --- a/module/icp/asm-x86_64/modes/ghash-x86_64.S +++ b/module/icp/asm-x86_64/modes/ghash-x86_64.S @@ -97,6 +97,9 @@ #if defined(__x86_64__) && defined(HAVE_AVX) && \ defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) +#define _ASM +#include + .text .globl gcm_gmult_clmul @@ -149,7 +152,7 @@ gcm_gmult_clmul: pxor %xmm1,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%rdi) - .byte 0xf3,0xc3 + RET .cfi_endproc .size gcm_gmult_clmul,.-gcm_gmult_clmul @@ -262,7 +265,7 @@ gcm_init_htab_avx: vmovdqu %xmm5,-16(%rdi) vzeroupper - .byte 0xf3,0xc3 + RET .cfi_endproc .size gcm_init_htab_avx,.-gcm_init_htab_avx @@ -649,7 +652,7 @@ gcm_ghash_avx: vpshufb %xmm13,%xmm10,%xmm10 vmovdqu %xmm10,(%rdi) vzeroupper - .byte 0xf3,0xc3 + RET .cfi_endproc .size gcm_ghash_avx,.-gcm_ghash_avx .align 64 diff --git a/module/icp/include/sys/ia32/asm_linkage.h b/module/icp/include/sys/ia32/asm_linkage.h index 876e21e5f1..0717db4ab1 100644 --- a/module/icp/include/sys/ia32/asm_linkage.h +++ b/module/icp/include/sys/ia32/asm_linkage.h @@ -30,9 +30,11 @@ #include #include -#if defined(__linux__) && defined(CONFIG_SLS) -#define RET ret; int3 -#else +#if defined(_KERNEL) && defined(__linux__) +#include +#endif + +#ifndef RET #define RET ret #endif @@ -204,6 +206,7 @@ sym1 = sym2 * insert the calls to mcount for profiling. ENTRY_NP is identical, but * never calls mcount. */ +#undef ENTRY #define ENTRY(x) \ .text; \ .align ASM_ENTRY_ALIGN; \ diff --git a/module/lua/setjmp/setjmp_x86_64.S b/module/lua/setjmp/setjmp_x86_64.S index 34cf2c7dce..47c886709c 100644 --- a/module/lua/setjmp/setjmp_x86_64.S +++ b/module/lua/setjmp/setjmp_x86_64.S @@ -23,7 +23,15 @@ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ +#if defined(_KERNEL) && defined(__linux__) +#include +#endif +#ifndef RET +#define RET ret +#endif + +#undef ENTRY #define ENTRY(x) \ .text; \ .align 8; \ @@ -34,13 +42,6 @@ x: #define SET_SIZE(x) \ .size x, [.-x] - -#if defined(__linux__) && defined(CONFIG_SLS) -#define RET ret; int3 -#else -#define RET ret -#endif - /* * Setjmp and longjmp implement non-local gotos using state vectors * type label_t.