Skip to content

Commit f8bf674

Browse files
committed
CVPN-1725 Fix illegal instruction error on older CPUs
Fix illegal instruction on older desktop CPUs. Applied the 2 PRs provided by WolfSSL to fix this issue. Tested it on the platform that was not working originally and verified the 2 fixes work.
1 parent e96bc58 commit f8bf674

File tree

6 files changed

+262
-0
lines changed

6 files changed

+262
-0
lines changed

windows_32.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
- git apply ../../wolfssl/0005-fix-mlkem-get-curve-name.patch
2020
- git apply ../../wolfssl/0006-fix-kyber-get-curve-name.patch
2121
- git apply ../../wolfssl/0007-fix-kyber-prf-non-avx2.patch
22+
- git apply ../../wolfssl/0008-intel-illegal-instruction.patch
23+
- git apply ../../wolfssl/0009-reverse-only-with-avx12.patch
2224
- "cp ../../windows/wolfssl-user_settings-common.h wolfssl/user_settings.h"
2325
- "cat ../../windows/wolfssl-user_settings-32.h >> wolfssl/user_settings.h"
2426
- "cp -f wolfssl/user_settings.h IDE/WIN/user_settings.h"

windows_64.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
- git apply ../../wolfssl/0005-fix-mlkem-get-curve-name.patch
2020
- git apply ../../wolfssl/0006-fix-kyber-get-curve-name.patch
2121
- git apply ../../wolfssl/0007-fix-kyber-prf-non-avx2.patch
22+
- git apply ../../wolfssl/0008-intel-illegal-instruction.patch
23+
- git apply ../../wolfssl/0009-reverse-only-with-avx12.patch
2224
- "cp ../../windows/wolfssl-user_settings-common.h wolfssl/user_settings.h"
2325
- "cat ../../windows/wolfssl-user_settings-64.h >> wolfssl/user_settings.h"
2426
- "cp -f wolfssl/user_settings.h IDE/WIN/user_settings.h"

windows_64_multithread.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
- git apply ../../wolfssl/0005-fix-mlkem-get-curve-name.patch
2020
- git apply ../../wolfssl/0006-fix-kyber-get-curve-name.patch
2121
- git apply ../../wolfssl/0007-fix-kyber-prf-non-avx2.patch
22+
- git apply ../../wolfssl/0008-intel-illegal-instruction.patch
23+
- git apply ../../wolfssl/0009-reverse-only-with-avx12.patch
2224
- "cp ../../windows/wolfssl-user_settings-common.h wolfssl/user_settings.h"
2325
- "cat ../../windows/wolfssl-user_settings-64.h >> wolfssl/user_settings.h"
2426
- "cat ../../windows/wolfssl-user_settings-multithread.h >> wolfssl/user_settings.h"

windows_arm64.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
- git apply ../../wolfssl/0005-fix-mlkem-get-curve-name.patch
2020
- git apply ../../wolfssl/0006-fix-kyber-get-curve-name.patch
2121
- git apply ../../wolfssl/0007-fix-kyber-prf-non-avx2.patch
22+
- git apply ../../wolfssl/0008-intel-illegal-instruction.patch
23+
- git apply ../../wolfssl/0009-reverse-only-with-avx12.patch
2224
- "cp ../../windows/wolfssl-user_settings-common.h wolfssl/user_settings.h"
2325
- "cat ../../windows/wolfssl-user_settings-arm-64.h >> wolfssl/user_settings.h"
2426
- "cp -f wolfssl/user_settings.h IDE/WIN/user_settings.h"
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
From e90e3aa7c69169a7c398edd274be3c0234444b6d Mon Sep 17 00:00:00 2001
2+
From: Sean Parkinson <sean@wolfssl.com>
3+
Date: Thu, 20 Feb 2025 22:28:40 +1000
4+
Subject: [PATCH] Intel AVX1/SSE2 ASM: no ymm/zmm regs no vzeroupper
5+
6+
vzeroupper instruction not needed to be invoked unless ymm or zmm
7+
registers are used.
8+
---
9+
wolfcrypt/src/aes_gcm_asm.S | 2 --
10+
wolfcrypt/src/aes_gcm_asm.asm | 2 --
11+
wolfcrypt/src/chacha_asm.S | 1 -
12+
wolfcrypt/src/chacha_asm.asm | 1 -
13+
wolfcrypt/src/sha256_asm.S | 8 --------
14+
wolfcrypt/src/sha512_asm.S | 12 ++++--------
15+
6 files changed, 4 insertions(+), 22 deletions(-)
16+
17+
diff --git a/wolfcrypt/src/aes_gcm_asm.S b/wolfcrypt/src/aes_gcm_asm.S
18+
index b14620be0f..95ac60ae20 100644
19+
--- a/wolfcrypt/src/aes_gcm_asm.S
20+
+++ b/wolfcrypt/src/aes_gcm_asm.S
21+
@@ -9910,7 +9910,6 @@ L_AES_GCM_init_avx1_iv_done:
22+
vpaddd L_avx1_aes_gcm_one(%rip), %xmm4, %xmm4
23+
vmovdqa %xmm5, (%r8)
24+
vmovdqa %xmm4, (%r9)
25+
- vzeroupper
26+
addq $16, %rsp
27+
popq %r13
28+
popq %r12
29+
@@ -9985,7 +9984,6 @@ L_AES_GCM_aad_update_avx1_16_loop:
30+
cmpl %esi, %ecx
31+
jl L_AES_GCM_aad_update_avx1_16_loop
32+
vmovdqa %xmm5, (%rdx)
33+
- vzeroupper
34+
repz retq
35+
#ifndef __APPLE__
36+
.size AES_GCM_aad_update_avx1,.-AES_GCM_aad_update_avx1
37+
diff --git a/wolfcrypt/src/aes_gcm_asm.asm b/wolfcrypt/src/aes_gcm_asm.asm
38+
index 2e4683cdd5..a818e86583 100644
39+
--- a/wolfcrypt/src/aes_gcm_asm.asm
40+
+++ b/wolfcrypt/src/aes_gcm_asm.asm
41+
@@ -9832,7 +9832,6 @@ L_AES_GCM_init_avx1_iv_done:
42+
vpaddd xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_one
43+
vmovdqa OWORD PTR [rax], xmm5
44+
vmovdqa OWORD PTR [r8], xmm4
45+
- vzeroupper
46+
vmovdqu xmm6, OWORD PTR [rsp+16]
47+
vmovdqu xmm7, OWORD PTR [rsp+32]
48+
vmovdqu xmm8, OWORD PTR [rsp+48]
49+
@@ -9905,7 +9904,6 @@ L_AES_GCM_aad_update_avx1_16_loop:
50+
cmp ecx, edx
51+
jl L_AES_GCM_aad_update_avx1_16_loop
52+
vmovdqa OWORD PTR [r8], xmm5
53+
- vzeroupper
54+
vmovdqu xmm6, OWORD PTR [rsp]
55+
vmovdqu xmm7, OWORD PTR [rsp+16]
56+
add rsp, 32
57+
diff --git a/wolfcrypt/src/chacha_asm.S b/wolfcrypt/src/chacha_asm.S
58+
index 6616e5b3d0..37e2a59306 100644
59+
--- a/wolfcrypt/src/chacha_asm.S
60+
+++ b/wolfcrypt/src/chacha_asm.S
61+
@@ -1033,7 +1033,6 @@ L_chacha20_avx1_partial_end64:
62+
subl %r11d, %r8d
63+
movl %r8d, 76(%rdi)
64+
L_chacha20_avx1_partial_done:
65+
- vzeroupper
66+
addq $0x190, %rsp
67+
popq %r15
68+
popq %r14
69+
diff --git a/wolfcrypt/src/chacha_asm.asm b/wolfcrypt/src/chacha_asm.asm
70+
index 334b0555f6..e9988945b1 100644
71+
--- a/wolfcrypt/src/chacha_asm.asm
72+
+++ b/wolfcrypt/src/chacha_asm.asm
73+
@@ -990,7 +990,6 @@ L_chacha20_avx1_partial_end64:
74+
sub r10d, r13d
75+
mov DWORD PTR [rcx+76], r10d
76+
L_chacha20_avx1_partial_done:
77+
- vzeroupper
78+
vmovdqu xmm6, OWORD PTR [rsp+400]
79+
vmovdqu xmm7, OWORD PTR [rsp+416]
80+
vmovdqu xmm8, OWORD PTR [rsp+432]
81+
diff --git a/wolfcrypt/src/sha256_asm.S b/wolfcrypt/src/sha256_asm.S
82+
index e180a5fc37..5d2d600490 100644
83+
--- a/wolfcrypt/src/sha256_asm.S
84+
+++ b/wolfcrypt/src/sha256_asm.S
85+
@@ -273,7 +273,6 @@ _Transform_Sha256_SSE2_Sha:
86+
movhpd %xmm1, 16(%rdi)
87+
movhpd %xmm2, 24(%rdi)
88+
xorq %rax, %rax
89+
- vzeroupper
90+
repz retq
91+
#ifndef __APPLE__
92+
.size Transform_Sha256_SSE2_Sha,.-Transform_Sha256_SSE2_Sha
93+
@@ -476,7 +475,6 @@ L_sha256_sha_len_sse2_start:
94+
movhpd %xmm1, 16(%rdi)
95+
movhpd %xmm2, 24(%rdi)
96+
xorq %rax, %rax
97+
- vzeroupper
98+
repz retq
99+
#ifndef __APPLE__
100+
.size Transform_Sha256_SSE2_Sha_Len,.-Transform_Sha256_SSE2_Sha_Len
101+
@@ -2920,7 +2918,6 @@ _Transform_Sha256_AVX1:
102+
addl %r14d, 24(%rdi)
103+
addl %r15d, 28(%rdi)
104+
xorq %rax, %rax
105+
- vzeroupper
106+
addq $0x40, %rsp
107+
popq %rbp
108+
popq %r15
109+
@@ -5327,7 +5324,6 @@ L_sha256_len_avx1_start:
110+
movl %r15d, 28(%rdi)
111+
jnz L_sha256_len_avx1_start
112+
xorq %rax, %rax
113+
- vzeroupper
114+
addq $0x44, %rsp
115+
popq %rbp
116+
popq %r15
117+
@@ -7735,7 +7731,6 @@ _Transform_Sha256_AVX1_RORX:
118+
addl %r14d, 24(%rdi)
119+
addl %r15d, 28(%rdi)
120+
xorq %rax, %rax
121+
- vzeroupper
122+
addq $0x40, %rsp
123+
popq %rbp
124+
popq %r15
125+
@@ -10101,7 +10096,6 @@ L_sha256_len_avx1_len_rorx_start:
126+
movl %r15d, 28(%rdi)
127+
jnz L_sha256_len_avx1_len_rorx_start
128+
xorq %rax, %rax
129+
- vzeroupper
130+
addq $0x44, %rsp
131+
popq %rbp
132+
popq %r15
133+
@@ -10312,7 +10306,6 @@ _Transform_Sha256_AVX1_Sha:
134+
vmovhpd %xmm1, 16(%rdi)
135+
vmovhpd %xmm2, 24(%rdi)
136+
xorq %rax, %rax
137+
- vzeroupper
138+
repz retq
139+
#ifndef __APPLE__
140+
.size Transform_Sha256_AVX1_Sha,.-Transform_Sha256_AVX1_Sha
141+
@@ -10487,7 +10480,6 @@ L_sha256_sha_len_avx1_start:
142+
vmovhpd %xmm1, 16(%rdi)
143+
vmovhpd %xmm2, 24(%rdi)
144+
xorq %rax, %rax
145+
- vzeroupper
146+
repz retq
147+
#ifndef __APPLE__
148+
.size Transform_Sha256_AVX1_Sha_Len,.-Transform_Sha256_AVX1_Sha_Len
149+
diff --git a/wolfcrypt/src/sha512_asm.S b/wolfcrypt/src/sha512_asm.S
150+
index 84cb7c8269..fe7278541d 100644
151+
--- a/wolfcrypt/src/sha512_asm.S
152+
+++ b/wolfcrypt/src/sha512_asm.S
153+
@@ -159,7 +159,7 @@ _Transform_Sha512_AVX1:
154+
movq %r12, %rax
155+
xorq %r10, %rbx
156+
# Start of 16 rounds
157+
-L_sha256_len_avx1_start:
158+
+L_transform_sha512_avx1_start:
159+
vpaddq (%rsi), %xmm0, %xmm8
160+
vpaddq 16(%rsi), %xmm1, %xmm9
161+
vmovdqu %xmm8, (%rsp)
162+
@@ -906,7 +906,7 @@ L_sha256_len_avx1_start:
163+
vpaddq %xmm7, %xmm8, %xmm7
164+
# msg_sched done: 14-17
165+
subl $0x01, 128(%rsp)
166+
- jne L_sha256_len_avx1_start
167+
+ jne L_transform_sha512_avx1_start
168+
vpaddq (%rsi), %xmm0, %xmm8
169+
vpaddq 16(%rsi), %xmm1, %xmm9
170+
vmovdqu %xmm8, (%rsp)
171+
@@ -1372,7 +1372,6 @@ L_sha256_len_avx1_start:
172+
addq %r14, 48(%rdi)
173+
addq %r15, 56(%rdi)
174+
xorq %rax, %rax
175+
- vzeroupper
176+
addq $0x88, %rsp
177+
popq %r15
178+
popq %r14
179+
@@ -2664,7 +2663,6 @@ L_sha512_len_avx1_start:
180+
movq %r15, 56(%rdi)
181+
jnz L_sha512_len_avx1_begin
182+
xorq %rax, %rax
183+
- vzeroupper
184+
addq $0x90, %rsp
185+
popq %rbp
186+
popq %r15
187+
@@ -2805,7 +2803,7 @@ _Transform_Sha512_AVX1_RORX:
188+
vmovdqu %xmm8, 96(%rsp)
189+
vmovdqu %xmm9, 112(%rsp)
190+
# Start of 16 rounds
191+
-L_sha256_len_avx1_rorx_start:
192+
+L_transform_sha512_avx1_rorx_start:
193+
addq $0x80, %rsi
194+
# msg_sched: 0-1
195+
# rnd_0: 0 - 0
196+
@@ -3512,7 +3510,7 @@ L_sha256_len_avx1_rorx_start:
197+
vmovdqu %xmm8, 96(%rsp)
198+
vmovdqu %xmm9, 112(%rsp)
199+
subl $0x01, 128(%rsp)
200+
- jne L_sha256_len_avx1_rorx_start
201+
+ jne L_transform_sha512_avx1_rorx_start
202+
# rnd_all_2: 0-1
203+
# rnd_0: 0 - 7
204+
rorxq $14, %r12, %rax
205+
@@ -3931,7 +3929,6 @@ L_sha256_len_avx1_rorx_start:
206+
addq %r14, 48(%rdi)
207+
addq %r15, 56(%rdi)
208+
xorq %rax, %rax
209+
- vzeroupper
210+
addq $0x88, %rsp
211+
popq %r15
212+
popq %r14
213+
@@ -5168,7 +5165,6 @@ L_sha512_len_avx1_rorx_start:
214+
movq %r15, 56(%rdi)
215+
jnz L_sha512_len_avx1_rorx_begin
216+
xorq %rax, %rax
217+
- vzeroupper
218+
addq $0x90, %rsp
219+
popq %rbp
220+
popq %r15
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
From b1048870420bef0294b495e4b88871aa4e3c78ee Mon Sep 17 00:00:00 2001
2+
From: Sean Parkinson <sean@wolfssl.com>
3+
Date: Thu, 27 Feb 2025 09:25:13 +1000
4+
Subject: [PATCH] SHA256: Intel flags has SHA but not AVX1 or AVX2
5+
6+
Reversal of bytes when IS_INTEL_SHA only is same as when AVX1 or AVX2.
7+
---
8+
wolfcrypt/src/sha256.c | 6 ++++--
9+
1 file changed, 4 insertions(+), 2 deletions(-)
10+
11+
diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c
12+
index 93b6afc546..31a557c8d0 100644
13+
--- a/wolfcrypt/src/sha256.c
14+
+++ b/wolfcrypt/src/sha256.c
15+
@@ -209,7 +209,8 @@ on the specific device platform.
16+
#define SHA256_UPDATE_REV_BYTES(ctx) (sha256->sha_method == SHA256_C)
17+
#else
18+
#define SHA256_UPDATE_REV_BYTES(ctx) \
19+
- (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
20+
+ (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags) && \
21+
+ !IS_INTEL_SHA(intel_flags))
22+
#endif
23+
#elif defined(FREESCALE_MMCAU_SHA)
24+
#define SHA256_UPDATE_REV_BYTES(ctx) 0 /* reverse not needed on update */
25+
@@ -1652,7 +1653,8 @@ static int InitSha256(wc_Sha256* sha256)
26+
#ifdef WC_C_DYNAMIC_FALLBACK
27+
if (sha256->sha_method != SHA256_C)
28+
#else
29+
- if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags))
30+
+ if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags) ||
31+
+ IS_INTEL_SHA(intel_flags))
32+
#endif
33+
#endif
34+
{

0 commit comments

Comments
 (0)