Blob Blame History Raw
From db1afe88c361ceea22c6ac8abdb9b0ff41a39aa2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nikola=20Forr=C3=B3?= <nforro@redhat.com>
Date: Mon, 15 Apr 2019 16:56:45 +0200
Subject: [PATCH] x86 SIMD: Add endbr32/endbr64 instructions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allow for indirect branch tracking with Intel CET (Control-Flow
Enforcement Technology) [1], by making all exported routines a possible
target for an indirect jump.

Signed-off-by: Nikola Forró <nforro@redhat.com>

[1] https://software.intel.com/sites/default/files/managed/4d/2a/control-flow-enforcement-technology-preview.pdf
---
 simd/i386/jccolext-avx2.asm   | 1 +
 simd/i386/jccolext-mmx.asm    | 1 +
 simd/i386/jccolext-sse2.asm   | 1 +
 simd/i386/jcgryext-avx2.asm   | 1 +
 simd/i386/jcgryext-mmx.asm    | 1 +
 simd/i386/jcgryext-sse2.asm   | 1 +
 simd/i386/jchuff-sse2.asm     | 1 +
 simd/i386/jcphuff-sse2.asm    | 2 ++
 simd/i386/jcsample-avx2.asm   | 2 ++
 simd/i386/jcsample-mmx.asm    | 2 ++
 simd/i386/jcsample-sse2.asm   | 2 ++
 simd/i386/jdcolext-avx2.asm   | 1 +
 simd/i386/jdcolext-mmx.asm    | 1 +
 simd/i386/jdcolext-sse2.asm   | 1 +
 simd/i386/jdmrgext-avx2.asm   | 2 ++
 simd/i386/jdmrgext-mmx.asm    | 2 ++
 simd/i386/jdmrgext-sse2.asm   | 2 ++
 simd/i386/jdsample-avx2.asm   | 4 ++++
 simd/i386/jdsample-mmx.asm    | 4 ++++
 simd/i386/jdsample-sse2.asm   | 4 ++++
 simd/i386/jfdctflt-3dn.asm    | 1 +
 simd/i386/jfdctflt-sse.asm    | 1 +
 simd/i386/jfdctfst-mmx.asm    | 1 +
 simd/i386/jfdctfst-sse2.asm   | 1 +
 simd/i386/jfdctint-avx2.asm   | 1 +
 simd/i386/jfdctint-mmx.asm    | 1 +
 simd/i386/jfdctint-sse2.asm   | 1 +
 simd/i386/jidctflt-3dn.asm    | 1 +
 simd/i386/jidctflt-sse.asm    | 1 +
 simd/i386/jidctflt-sse2.asm   | 1 +
 simd/i386/jidctfst-mmx.asm    | 1 +
 simd/i386/jidctfst-sse2.asm   | 1 +
 simd/i386/jidctint-avx2.asm   | 1 +
 simd/i386/jidctint-mmx.asm    | 1 +
 simd/i386/jidctint-sse2.asm   | 1 +
 simd/i386/jidctred-mmx.asm    | 2 ++
 simd/i386/jidctred-sse2.asm   | 2 ++
 simd/i386/jquant-3dn.asm      | 2 ++
 simd/i386/jquant-mmx.asm      | 2 ++
 simd/i386/jquant-sse.asm      | 2 ++
 simd/i386/jquantf-sse2.asm    | 2 ++
 simd/i386/jquanti-avx2.asm    | 2 ++
 simd/i386/jquanti-sse2.asm    | 2 ++
 simd/nasm/jsimdext.inc        | 8 ++++++++
 simd/x86_64/jccolext-avx2.asm | 1 +
 simd/x86_64/jccolext-sse2.asm | 1 +
 simd/x86_64/jcgryext-avx2.asm | 1 +
 simd/x86_64/jcgryext-sse2.asm | 1 +
 simd/x86_64/jchuff-sse2.asm   | 1 +
 simd/x86_64/jcphuff-sse2.asm  | 2 ++
 simd/x86_64/jcsample-avx2.asm | 2 ++
 simd/x86_64/jcsample-sse2.asm | 2 ++
 simd/x86_64/jdcolext-avx2.asm | 1 +
 simd/x86_64/jdcolext-sse2.asm | 1 +
 simd/x86_64/jdmrgext-avx2.asm | 2 ++
 simd/x86_64/jdmrgext-sse2.asm | 2 ++
 simd/x86_64/jdsample-avx2.asm | 4 ++++
 simd/x86_64/jdsample-sse2.asm | 4 ++++
 simd/x86_64/jfdctflt-sse.asm  | 1 +
 simd/x86_64/jfdctfst-sse2.asm | 1 +
 simd/x86_64/jfdctint-avx2.asm | 1 +
 simd/x86_64/jfdctint-sse2.asm | 1 +
 simd/x86_64/jidctflt-sse2.asm | 1 +
 simd/x86_64/jidctfst-sse2.asm | 1 +
 simd/x86_64/jidctint-avx2.asm | 1 +
 simd/x86_64/jidctint-sse2.asm | 1 +
 simd/x86_64/jidctred-sse2.asm | 2 ++
 simd/x86_64/jquantf-sse2.asm  | 2 ++
 simd/x86_64/jquanti-avx2.asm  | 2 ++
 simd/x86_64/jquanti-sse2.asm  | 2 ++
 70 files changed, 116 insertions(+)

diff --git a/simd/i386/jccolext-avx2.asm b/simd/i386/jccolext-avx2.asm
index c46d684..7dc6e08 100644
--- a/simd/i386/jccolext-avx2.asm
+++ b/simd/i386/jccolext-avx2.asm
@@ -42,6 +42,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2)

 EXTN(jsimd_rgb_ycc_convert_avx2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jccolext-mmx.asm b/simd/i386/jccolext-mmx.asm
index 6357a42..8048abb 100644
--- a/simd/i386/jccolext-mmx.asm
+++ b/simd/i386/jccolext-mmx.asm
@@ -42,6 +42,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_mmx)

 EXTN(jsimd_rgb_ycc_convert_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jccolext-sse2.asm b/simd/i386/jccolext-sse2.asm
index c6c8085..5307ddc 100644
--- a/simd/i386/jccolext-sse2.asm
+++ b/simd/i386/jccolext-sse2.asm
@@ -41,6 +41,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2)

 EXTN(jsimd_rgb_ycc_convert_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jcgryext-avx2.asm b/simd/i386/jcgryext-avx2.asm
index 3fa7973..27a0e11 100644
--- a/simd/i386/jcgryext-avx2.asm
+++ b/simd/i386/jcgryext-avx2.asm
@@ -42,6 +42,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2)

 EXTN(jsimd_rgb_gray_convert_avx2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jcgryext-mmx.asm b/simd/i386/jcgryext-mmx.asm
index 8af42e5..dda0e05 100644
--- a/simd/i386/jcgryext-mmx.asm
+++ b/simd/i386/jcgryext-mmx.asm
@@ -42,6 +42,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_gray_convert_mmx)

 EXTN(jsimd_rgb_gray_convert_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jcgryext-sse2.asm b/simd/i386/jcgryext-sse2.asm
index c9d6ff1..f8835bb 100644
--- a/simd/i386/jcgryext-sse2.asm
+++ b/simd/i386/jcgryext-sse2.asm
@@ -41,6 +41,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2)

 EXTN(jsimd_rgb_gray_convert_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jchuff-sse2.asm b/simd/i386/jchuff-sse2.asm
index 76cc85f..0217480 100644
--- a/simd/i386/jchuff-sse2.asm
+++ b/simd/i386/jchuff-sse2.asm
@@ -350,6 +350,7 @@ times 1 << 14 db 15
     GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2)

 EXTN(jsimd_huff_encode_one_block_sse2):
+    _endbr32

 %assign stack_offset      0
 %define arg_state         4 + stack_offset
diff --git a/simd/i386/jcphuff-sse2.asm b/simd/i386/jcphuff-sse2.asm
index c26b48a..7fb01e5 100644
--- a/simd/i386/jcphuff-sse2.asm
+++ b/simd/i386/jcphuff-sse2.asm
@@ -281,6 +281,7 @@
     GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2)

 EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
@@ -460,6 +461,7 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
     GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2)

 EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jcsample-avx2.asm b/simd/i386/jcsample-avx2.asm
index 0a20802..46eba8c 100644
--- a/simd/i386/jcsample-avx2.asm
+++ b/simd/i386/jcsample-avx2.asm
@@ -43,6 +43,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2)

 EXTN(jsimd_h2v1_downsample_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
@@ -216,6 +217,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2)

 EXTN(jsimd_h2v2_downsample_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jcsample-mmx.asm b/simd/i386/jcsample-mmx.asm
index 2c223ee..b2b8ded 100644
--- a/simd/i386/jcsample-mmx.asm
+++ b/simd/i386/jcsample-mmx.asm
@@ -42,6 +42,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_downsample_mmx)

 EXTN(jsimd_h2v1_downsample_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
@@ -185,6 +186,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
     GLOBAL_FUNCTION(jsimd_h2v2_downsample_mmx)

 EXTN(jsimd_h2v2_downsample_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jcsample-sse2.asm b/simd/i386/jcsample-sse2.asm
index 4fea60d..4c22b40 100644
--- a/simd/i386/jcsample-sse2.asm
+++ b/simd/i386/jcsample-sse2.asm
@@ -42,6 +42,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2)

 EXTN(jsimd_h2v1_downsample_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
@@ -198,6 +199,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2)

 EXTN(jsimd_h2v2_downsample_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jdcolext-avx2.asm b/simd/i386/jdcolext-avx2.asm
index 015be04..b076765 100644
--- a/simd/i386/jdcolext-avx2.asm
+++ b/simd/i386/jdcolext-avx2.asm
@@ -43,6 +43,7 @@
     GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2)

 EXTN(jsimd_ycc_rgb_convert_avx2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jdcolext-mmx.asm b/simd/i386/jdcolext-mmx.asm
index 5813cfc..150f5b6 100644
--- a/simd/i386/jdcolext-mmx.asm
+++ b/simd/i386/jdcolext-mmx.asm
@@ -42,6 +42,7 @@
     GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_mmx)

 EXTN(jsimd_ycc_rgb_convert_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jdcolext-sse2.asm b/simd/i386/jdcolext-sse2.asm
index d5572b3..cd3ac70 100644
--- a/simd/i386/jdcolext-sse2.asm
+++ b/simd/i386/jdcolext-sse2.asm
@@ -42,6 +42,7 @@
     GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2)

 EXTN(jsimd_ycc_rgb_convert_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jdmrgext-avx2.asm b/simd/i386/jdmrgext-avx2.asm
index e35f728..0db0aa4 100644
--- a/simd/i386/jdmrgext-avx2.asm
+++ b/simd/i386/jdmrgext-avx2.asm
@@ -43,6 +43,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2)

 EXTN(jsimd_h2v1_merged_upsample_avx2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
@@ -523,6 +524,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2)

 EXTN(jsimd_h2v2_merged_upsample_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jdmrgext-mmx.asm b/simd/i386/jdmrgext-mmx.asm
index eb3e36b..6427a1a 100644
--- a/simd/i386/jdmrgext-mmx.asm
+++ b/simd/i386/jdmrgext-mmx.asm
@@ -40,6 +40,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_mmx)

 EXTN(jsimd_h2v1_merged_upsample_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
@@ -408,6 +409,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
     GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_mmx)

 EXTN(jsimd_h2v2_merged_upsample_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jdmrgext-sse2.asm b/simd/i386/jdmrgext-sse2.asm
index c113dc4..6897fa1 100644
--- a/simd/i386/jdmrgext-sse2.asm
+++ b/simd/i386/jdmrgext-sse2.asm
@@ -42,6 +42,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2)

 EXTN(jsimd_h2v1_merged_upsample_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
@@ -465,6 +466,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2)

 EXTN(jsimd_h2v2_merged_upsample_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jdsample-avx2.asm b/simd/i386/jdsample-avx2.asm
index a800c35..7d52708 100644
--- a/simd/i386/jdsample-avx2.asm
+++ b/simd/i386/jdsample-avx2.asm
@@ -60,6 +60,7 @@ PW_EIGHT times 16 dw 8
     GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)

 EXTN(jsimd_h2v1_fancy_upsample_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     pushpic     ebx
@@ -227,6 +228,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2)

 EXTN(jsimd_h2v2_fancy_upsample_avx2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
@@ -570,6 +572,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2)

 EXTN(jsimd_h2v1_upsample_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
@@ -669,6 +672,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2)

 EXTN(jsimd_h2v2_upsample_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jdsample-mmx.asm b/simd/i386/jdsample-mmx.asm
index 12c49f0..7f2ab40 100644
--- a/simd/i386/jdsample-mmx.asm
+++ b/simd/i386/jdsample-mmx.asm
@@ -59,6 +59,7 @@ PW_EIGHT times 4 dw 8
     GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_mmx)

 EXTN(jsimd_h2v1_fancy_upsample_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
     pushpic     ebx
@@ -217,6 +218,7 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
     GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_mmx)

 EXTN(jsimd_h2v2_fancy_upsample_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
@@ -541,6 +543,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     GLOBAL_FUNCTION(jsimd_h2v1_upsample_mmx)

 EXTN(jsimd_h2v1_upsample_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
@@ -640,6 +643,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
     GLOBAL_FUNCTION(jsimd_h2v2_upsample_mmx)

 EXTN(jsimd_h2v2_upsample_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jdsample-sse2.asm b/simd/i386/jdsample-sse2.asm
index 4e28d2f..3311b25 100644
--- a/simd/i386/jdsample-sse2.asm
+++ b/simd/i386/jdsample-sse2.asm
@@ -59,6 +59,7 @@ PW_EIGHT times 8 dw 8
     GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2)

 EXTN(jsimd_h2v1_fancy_upsample_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     pushpic     ebx
@@ -216,6 +217,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2)

 EXTN(jsimd_h2v2_fancy_upsample_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
@@ -538,6 +540,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2)

 EXTN(jsimd_h2v1_upsample_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
@@ -635,6 +638,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2)

 EXTN(jsimd_h2v2_upsample_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jfdctflt-3dn.asm b/simd/i386/jfdctflt-3dn.asm
index 322ab16..109e36e 100644
--- a/simd/i386/jfdctflt-3dn.asm
+++ b/simd/i386/jfdctflt-3dn.asm
@@ -56,6 +56,7 @@ PD_1_306 times 2 dd 1.306562964876376527856643
     GLOBAL_FUNCTION(jsimd_fdct_float_3dnow)

 EXTN(jsimd_fdct_float_3dnow):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jfdctflt-sse.asm b/simd/i386/jfdctflt-sse.asm
index 86952c6..b1e0576 100644
--- a/simd/i386/jfdctflt-sse.asm
+++ b/simd/i386/jfdctflt-sse.asm
@@ -67,6 +67,7 @@ PD_1_306 times 4 dd 1.306562964876376527856643
     GLOBAL_FUNCTION(jsimd_fdct_float_sse)

 EXTN(jsimd_fdct_float_sse):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jfdctfst-mmx.asm b/simd/i386/jfdctfst-mmx.asm
index 80645a5..be84fdb 100644
--- a/simd/i386/jfdctfst-mmx.asm
+++ b/simd/i386/jfdctfst-mmx.asm
@@ -81,6 +81,7 @@ PW_F1306 times 4 dw F_1_306 << CONST_SHIFT
     GLOBAL_FUNCTION(jsimd_fdct_ifast_mmx)

 EXTN(jsimd_fdct_ifast_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jfdctfst-sse2.asm b/simd/i386/jfdctfst-sse2.asm
index 446fa7a..945f9cf 100644
--- a/simd/i386/jfdctfst-sse2.asm
+++ b/simd/i386/jfdctfst-sse2.asm
@@ -82,6 +82,7 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
     GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2)

 EXTN(jsimd_fdct_ifast_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jfdctint-avx2.asm b/simd/i386/jfdctint-avx2.asm
index 23cf733..56acb63 100644
--- a/simd/i386/jfdctint-avx2.asm
+++ b/simd/i386/jfdctint-avx2.asm
@@ -260,6 +260,7 @@ PW_1_NEG1                  times 8  dw  1
     GLOBAL_FUNCTION(jsimd_fdct_islow_avx2)

 EXTN(jsimd_fdct_islow_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     pushpic     ebx
diff --git a/simd/i386/jfdctint-mmx.asm b/simd/i386/jfdctint-mmx.asm
index 34a43b9..4d1e773 100644
--- a/simd/i386/jfdctint-mmx.asm
+++ b/simd/i386/jfdctint-mmx.asm
@@ -102,6 +102,7 @@ PW_DESCALE_P2X times 4 dw  1 << (PASS1_BITS - 1)
     GLOBAL_FUNCTION(jsimd_fdct_islow_mmx)

 EXTN(jsimd_fdct_islow_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jfdctint-sse2.asm b/simd/i386/jfdctint-sse2.asm
index 6f8e18c..3954c8f 100644
--- a/simd/i386/jfdctint-sse2.asm
+++ b/simd/i386/jfdctint-sse2.asm
@@ -103,6 +103,7 @@ PW_DESCALE_P2X times 8 dw  1 << (PASS1_BITS - 1)
     GLOBAL_FUNCTION(jsimd_fdct_islow_sse2)

 EXTN(jsimd_fdct_islow_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctflt-3dn.asm b/simd/i386/jidctflt-3dn.asm
index 8795191..eb49902 100644
--- a/simd/i386/jidctflt-3dn.asm
+++ b/simd/i386/jidctflt-3dn.asm
@@ -65,6 +65,7 @@ PB_CENTERJSAMP  times 8 db CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_float_3dnow)

 EXTN(jsimd_idct_float_3dnow):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctflt-sse.asm b/simd/i386/jidctflt-sse.asm
index b27ecfd..ffe54f8 100644
--- a/simd/i386/jidctflt-sse.asm
+++ b/simd/i386/jidctflt-sse.asm
@@ -75,6 +75,7 @@ PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_float_sse)

 EXTN(jsimd_idct_float_sse):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctflt-sse2.asm b/simd/i386/jidctflt-sse2.asm
index c646eae..fd1fe35 100644
--- a/simd/i386/jidctflt-sse2.asm
+++ b/simd/i386/jidctflt-sse2.asm
@@ -75,6 +75,7 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_float_sse2)

 EXTN(jsimd_idct_float_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctfst-mmx.asm b/simd/i386/jidctfst-mmx.asm
index 24622d4..00940b8 100644
--- a/simd/i386/jidctfst-mmx.asm
+++ b/simd/i386/jidctfst-mmx.asm
@@ -96,6 +96,7 @@ PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_ifast_mmx)

 EXTN(jsimd_idct_ifast_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctfst-sse2.asm b/simd/i386/jidctfst-sse2.asm
index 19704ff..1f4af33 100644
--- a/simd/i386/jidctfst-sse2.asm
+++ b/simd/i386/jidctfst-sse2.asm
@@ -94,6 +94,7 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_ifast_sse2)

 EXTN(jsimd_idct_ifast_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctint-avx2.asm b/simd/i386/jidctint-avx2.asm
index 199c7df..2eb606a 100644
--- a/simd/i386/jidctint-avx2.asm
+++ b/simd/i386/jidctint-avx2.asm
@@ -296,6 +296,7 @@ PW_1_NEG1                  times 8  dw  1
     GLOBAL_FUNCTION(jsimd_idct_islow_avx2)

 EXTN(jsimd_idct_islow_avx2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctint-mmx.asm b/simd/i386/jidctint-mmx.asm
index f15c8d3..2d91b7e 100644
--- a/simd/i386/jidctint-mmx.asm
+++ b/simd/i386/jidctint-mmx.asm
@@ -109,6 +109,7 @@ PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_islow_mmx)

 EXTN(jsimd_idct_islow_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctint-sse2.asm b/simd/i386/jidctint-sse2.asm
index 43e3201..804be19 100644
--- a/simd/i386/jidctint-sse2.asm
+++ b/simd/i386/jidctint-sse2.asm
@@ -107,6 +107,7 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_islow_sse2)

 EXTN(jsimd_idct_islow_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctred-mmx.asm b/simd/i386/jidctred-mmx.asm
index e2307e1..cb43106 100644
--- a/simd/i386/jidctred-mmx.asm
+++ b/simd/i386/jidctred-mmx.asm
@@ -117,6 +117,7 @@ PB_CENTERJSAMP  times 8 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_4x4_mmx)

 EXTN(jsimd_idct_4x4_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
@@ -504,6 +505,7 @@ EXTN(jsimd_idct_4x4_mmx):
     GLOBAL_FUNCTION(jsimd_idct_2x2_mmx)

 EXTN(jsimd_idct_2x2_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jidctred-sse2.asm b/simd/i386/jidctred-sse2.asm
index 6e56494..2a61b9e 100644
--- a/simd/i386/jidctred-sse2.asm
+++ b/simd/i386/jidctred-sse2.asm
@@ -115,6 +115,7 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_4x4_sse2)

 EXTN(jsimd_idct_4x4_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
@@ -425,6 +426,7 @@ EXTN(jsimd_idct_4x4_sse2):
     GLOBAL_FUNCTION(jsimd_idct_2x2_sse2)

 EXTN(jsimd_idct_2x2_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jquant-3dn.asm b/simd/i386/jquant-3dn.asm
index 5cb60ca..a0599eb 100644
--- a/simd/i386/jquant-3dn.asm
+++ b/simd/i386/jquant-3dn.asm
@@ -36,6 +36,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_float_3dnow)

 EXTN(jsimd_convsamp_float_3dnow):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
@@ -138,6 +139,7 @@ EXTN(jsimd_convsamp_float_3dnow):
     GLOBAL_FUNCTION(jsimd_quantize_float_3dnow)

 EXTN(jsimd_quantize_float_3dnow):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jquant-mmx.asm b/simd/i386/jquant-mmx.asm
index 61305c6..080021b 100644
--- a/simd/i386/jquant-mmx.asm
+++ b/simd/i386/jquant-mmx.asm
@@ -36,6 +36,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_mmx)

 EXTN(jsimd_convsamp_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
@@ -145,6 +146,7 @@ EXTN(jsimd_convsamp_mmx):
     GLOBAL_FUNCTION(jsimd_quantize_mmx)

 EXTN(jsimd_quantize_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jquant-sse.asm b/simd/i386/jquant-sse.asm
index 218adc9..cacd2a9 100644
--- a/simd/i386/jquant-sse.asm
+++ b/simd/i386/jquant-sse.asm
@@ -36,6 +36,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_float_sse)

 EXTN(jsimd_convsamp_float_sse):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
@@ -138,6 +139,7 @@ EXTN(jsimd_convsamp_float_sse):
     GLOBAL_FUNCTION(jsimd_quantize_float_sse)

 EXTN(jsimd_quantize_float_sse):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jquantf-sse2.asm b/simd/i386/jquantf-sse2.asm
index a881ab5..6f4789c 100644
--- a/simd/i386/jquantf-sse2.asm
+++ b/simd/i386/jquantf-sse2.asm
@@ -36,6 +36,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_float_sse2)

 EXTN(jsimd_convsamp_float_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
@@ -115,6 +116,7 @@ EXTN(jsimd_convsamp_float_sse2):
     GLOBAL_FUNCTION(jsimd_quantize_float_sse2)

 EXTN(jsimd_quantize_float_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jquanti-avx2.asm b/simd/i386/jquanti-avx2.asm
index 5ed6bec..efcddd2 100644
--- a/simd/i386/jquanti-avx2.asm
+++ b/simd/i386/jquanti-avx2.asm
@@ -37,6 +37,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_avx2)

 EXTN(jsimd_convsamp_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
@@ -130,6 +131,7 @@ EXTN(jsimd_convsamp_avx2):
     GLOBAL_FUNCTION(jsimd_quantize_avx2)

 EXTN(jsimd_quantize_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jquanti-sse2.asm b/simd/i386/jquanti-sse2.asm
index 0a50940..98d39e0 100644
--- a/simd/i386/jquanti-sse2.asm
+++ b/simd/i386/jquanti-sse2.asm
@@ -36,6 +36,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_sse2)

 EXTN(jsimd_convsamp_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
@@ -121,6 +122,7 @@ EXTN(jsimd_convsamp_sse2):
     GLOBAL_FUNCTION(jsimd_quantize_sse2)

 EXTN(jsimd_quantize_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/nasm/jsimdext.inc b/simd/nasm/jsimdext.inc
index bebcb20..4b0cd69 100644
--- a/simd/nasm/jsimdext.inc
+++ b/simd/nasm/jsimdext.inc
@@ -513,6 +513,14 @@ const_base:

 %endif

+%imacro _endbr32 0
+    dd 0xfb1e0ff3
+%endmacro
+
+%imacro _endbr64 0
+    dd 0xfa1e0ff3
+%endmacro
+
 ; --------------------------------------------------------------------------
 ;  Defines picked up from the C headers
 ;
diff --git a/simd/x86_64/jccolext-avx2.asm b/simd/x86_64/jccolext-avx2.asm
index dd7ea39..e6c8283 100644
--- a/simd/x86_64/jccolext-avx2.asm
+++ b/simd/x86_64/jccolext-avx2.asm
@@ -41,6 +41,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2)

 EXTN(jsimd_rgb_ycc_convert_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
diff --git a/simd/x86_64/jccolext-sse2.asm b/simd/x86_64/jccolext-sse2.asm
index bc1e817..4bb1af6 100644
--- a/simd/x86_64/jccolext-sse2.asm
+++ b/simd/x86_64/jccolext-sse2.asm
@@ -40,6 +40,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2)

 EXTN(jsimd_rgb_ycc_convert_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
diff --git a/simd/x86_64/jcgryext-avx2.asm b/simd/x86_64/jcgryext-avx2.asm
index c8c8d12..12ecb7e 100644
--- a/simd/x86_64/jcgryext-avx2.asm
+++ b/simd/x86_64/jcgryext-avx2.asm
@@ -41,6 +41,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2)

 EXTN(jsimd_rgb_gray_convert_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
diff --git a/simd/x86_64/jcgryext-sse2.asm b/simd/x86_64/jcgryext-sse2.asm
index 7e5a0f2..e3a2413 100644
--- a/simd/x86_64/jcgryext-sse2.asm
+++ b/simd/x86_64/jcgryext-sse2.asm
@@ -40,6 +40,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2)

 EXTN(jsimd_rgb_gray_convert_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
diff --git a/simd/x86_64/jchuff-sse2.asm b/simd/x86_64/jchuff-sse2.asm
index 0c2cdd6..44ea81d 100644
--- a/simd/x86_64/jchuff-sse2.asm
+++ b/simd/x86_64/jchuff-sse2.asm
@@ -261,6 +261,7 @@ times 1 << 15 db 16
     GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2)

 EXTN(jsimd_huff_encode_one_block_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp

diff --git a/simd/x86_64/jcphuff-sse2.asm b/simd/x86_64/jcphuff-sse2.asm
index 11db4b2..2157e97 100644
--- a/simd/x86_64/jcphuff-sse2.asm
+++ b/simd/x86_64/jcphuff-sse2.asm
@@ -282,6 +282,7 @@
     GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2)

 EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
@@ -445,6 +446,7 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
     GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2)

 EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
diff --git a/simd/x86_64/jcsample-avx2.asm b/simd/x86_64/jcsample-avx2.asm
index 589c52b..7d8d4e0 100644
--- a/simd/x86_64/jcsample-avx2.asm
+++ b/simd/x86_64/jcsample-avx2.asm
@@ -44,6 +44,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2)

 EXTN(jsimd_h2v1_downsample_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 6
@@ -205,6 +206,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2)

 EXTN(jsimd_h2v2_downsample_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 6
diff --git a/simd/x86_64/jcsample-sse2.asm b/simd/x86_64/jcsample-sse2.asm
index 7a4f1bc..8932b94 100644
--- a/simd/x86_64/jcsample-sse2.asm
+++ b/simd/x86_64/jcsample-sse2.asm
@@ -43,6 +43,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2)

 EXTN(jsimd_h2v1_downsample_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 6
@@ -187,6 +188,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2)

 EXTN(jsimd_h2v2_downsample_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 6
diff --git a/simd/x86_64/jdcolext-avx2.asm b/simd/x86_64/jdcolext-avx2.asm
index 070436c..6c0a212 100644
--- a/simd/x86_64/jdcolext-avx2.asm
+++ b/simd/x86_64/jdcolext-avx2.asm
@@ -42,6 +42,7 @@
     GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2)

 EXTN(jsimd_ycc_rgb_convert_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
diff --git a/simd/x86_64/jdcolext-sse2.asm b/simd/x86_64/jdcolext-sse2.asm
index bba3a30..28d05e4 100644
--- a/simd/x86_64/jdcolext-sse2.asm
+++ b/simd/x86_64/jdcolext-sse2.asm
@@ -41,6 +41,7 @@
     GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2)

 EXTN(jsimd_ycc_rgb_convert_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
diff --git a/simd/x86_64/jdmrgext-avx2.asm b/simd/x86_64/jdmrgext-avx2.asm
index 1191645..f58384c 100644
--- a/simd/x86_64/jdmrgext-avx2.asm
+++ b/simd/x86_64/jdmrgext-avx2.asm
@@ -42,6 +42,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2)

 EXTN(jsimd_h2v1_merged_upsample_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
@@ -506,6 +507,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2)

 EXTN(jsimd_h2v2_merged_upsample_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 4
diff --git a/simd/x86_64/jdmrgext-sse2.asm b/simd/x86_64/jdmrgext-sse2.asm
index 8988dd0..8641aa3 100644
--- a/simd/x86_64/jdmrgext-sse2.asm
+++ b/simd/x86_64/jdmrgext-sse2.asm
@@ -41,6 +41,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2)

 EXTN(jsimd_h2v1_merged_upsample_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
@@ -448,6 +449,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2)

 EXTN(jsimd_h2v2_merged_upsample_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 4
diff --git a/simd/x86_64/jdsample-avx2.asm b/simd/x86_64/jdsample-avx2.asm
index c6ddbb5..c5594ed 100644
--- a/simd/x86_64/jdsample-avx2.asm
+++ b/simd/x86_64/jdsample-avx2.asm
@@ -62,6 +62,7 @@ PW_EIGHT times 16 dw 8
     GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)

 EXTN(jsimd_h2v1_fancy_upsample_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push_xmm    3
@@ -215,6 +216,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2)

 EXTN(jsimd_h2v2_fancy_upsample_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
@@ -524,6 +526,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2)

 EXTN(jsimd_h2v1_upsample_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 4
@@ -612,6 +615,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2)

 EXTN(jsimd_h2v2_upsample_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 4
diff --git a/simd/x86_64/jdsample-sse2.asm b/simd/x86_64/jdsample-sse2.asm
index 24cd389..27bf771 100644
--- a/simd/x86_64/jdsample-sse2.asm
+++ b/simd/x86_64/jdsample-sse2.asm
@@ -61,6 +61,7 @@ PW_EIGHT times 8 dw 8
     GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2)

 EXTN(jsimd_h2v1_fancy_upsample_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 4
@@ -202,6 +203,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2)

 EXTN(jsimd_h2v2_fancy_upsample_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
@@ -497,6 +499,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2)

 EXTN(jsimd_h2v1_upsample_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 4
@@ -583,6 +586,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2)

 EXTN(jsimd_h2v2_upsample_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 4
diff --git a/simd/x86_64/jfdctflt-sse.asm b/simd/x86_64/jfdctflt-sse.asm
index 3595496..b5abeee 100644
--- a/simd/x86_64/jfdctflt-sse.asm
+++ b/simd/x86_64/jfdctflt-sse.asm
@@ -66,6 +66,7 @@ PD_1_306 times 4 dd 1.306562964876376527856643
     GLOBAL_FUNCTION(jsimd_fdct_float_sse)

 EXTN(jsimd_fdct_float_sse):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
diff --git a/simd/x86_64/jfdctfst-sse2.asm b/simd/x86_64/jfdctfst-sse2.asm
index d33c58a..78af5e4 100644
--- a/simd/x86_64/jfdctfst-sse2.asm
+++ b/simd/x86_64/jfdctfst-sse2.asm
@@ -81,6 +81,7 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
     GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2)

 EXTN(jsimd_fdct_ifast_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
diff --git a/simd/x86_64/jfdctint-avx2.asm b/simd/x86_64/jfdctint-avx2.asm
index d0afe5e..b083fbb 100644
--- a/simd/x86_64/jfdctint-avx2.asm
+++ b/simd/x86_64/jfdctint-avx2.asm
@@ -260,6 +260,7 @@ PW_1_NEG1                  times 8  dw  1
     GLOBAL_FUNCTION(jsimd_fdct_islow_avx2)

 EXTN(jsimd_fdct_islow_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 1
diff --git a/simd/x86_64/jfdctint-sse2.asm b/simd/x86_64/jfdctint-sse2.asm
index 024ce90..88ea491 100644
--- a/simd/x86_64/jfdctint-sse2.asm
+++ b/simd/x86_64/jfdctint-sse2.asm
@@ -102,6 +102,7 @@ PW_DESCALE_P2X times 8 dw  1 << (PASS1_BITS - 1)
     GLOBAL_FUNCTION(jsimd_fdct_islow_sse2)

 EXTN(jsimd_fdct_islow_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
diff --git a/simd/x86_64/jidctflt-sse2.asm b/simd/x86_64/jidctflt-sse2.asm
index 952fbe3..8610710 100644
--- a/simd/x86_64/jidctflt-sse2.asm
+++ b/simd/x86_64/jidctflt-sse2.asm
@@ -76,6 +76,7 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_float_sse2)

 EXTN(jsimd_idct_float_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
diff --git a/simd/x86_64/jidctfst-sse2.asm b/simd/x86_64/jidctfst-sse2.asm
index a3da8d8..351ee06 100644
--- a/simd/x86_64/jidctfst-sse2.asm
+++ b/simd/x86_64/jidctfst-sse2.asm
@@ -95,6 +95,7 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_ifast_sse2)

 EXTN(jsimd_idct_ifast_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
diff --git a/simd/x86_64/jidctint-avx2.asm b/simd/x86_64/jidctint-avx2.asm
index 528da01..00087c7 100644
--- a/simd/x86_64/jidctint-avx2.asm
+++ b/simd/x86_64/jidctint-avx2.asm
@@ -282,6 +282,7 @@ PW_1_NEG1                  times 8  dw  1
     GLOBAL_FUNCTION(jsimd_idct_islow_avx2)

 EXTN(jsimd_idct_islow_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp                     ; rbp = aligned rbp
     push_xmm    4
diff --git a/simd/x86_64/jidctint-sse2.asm b/simd/x86_64/jidctint-sse2.asm
index 92f633e..9301e81 100644
--- a/simd/x86_64/jidctint-sse2.asm
+++ b/simd/x86_64/jidctint-sse2.asm
@@ -108,6 +108,7 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_islow_sse2)

 EXTN(jsimd_idct_islow_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
diff --git a/simd/x86_64/jidctred-sse2.asm b/simd/x86_64/jidctred-sse2.asm
index 1ec500c..e74162d 100644
--- a/simd/x86_64/jidctred-sse2.asm
+++ b/simd/x86_64/jidctred-sse2.asm
@@ -116,6 +116,7 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_4x4_sse2)

 EXTN(jsimd_idct_4x4_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     push        r15
@@ -413,6 +414,7 @@ EXTN(jsimd_idct_4x4_sse2):
     GLOBAL_FUNCTION(jsimd_idct_2x2_sse2)

 EXTN(jsimd_idct_2x2_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 4
diff --git a/simd/x86_64/jquantf-sse2.asm b/simd/x86_64/jquantf-sse2.asm
index 232bbb2..2d65986 100644
--- a/simd/x86_64/jquantf-sse2.asm
+++ b/simd/x86_64/jquantf-sse2.asm
@@ -37,6 +37,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_float_sse2)

 EXTN(jsimd_convsamp_float_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 3
@@ -109,6 +110,7 @@ EXTN(jsimd_convsamp_float_sse2):
     GLOBAL_FUNCTION(jsimd_quantize_float_sse2)

 EXTN(jsimd_quantize_float_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 3
diff --git a/simd/x86_64/jquanti-avx2.asm b/simd/x86_64/jquanti-avx2.asm
index 66104d7..a7ea496 100644
--- a/simd/x86_64/jquanti-avx2.asm
+++ b/simd/x86_64/jquanti-avx2.asm
@@ -38,6 +38,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_avx2)

 EXTN(jsimd_convsamp_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 3
@@ -115,6 +116,7 @@ EXTN(jsimd_convsamp_avx2):
     GLOBAL_FUNCTION(jsimd_quantize_avx2)

 EXTN(jsimd_quantize_avx2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 3
diff --git a/simd/x86_64/jquanti-sse2.asm b/simd/x86_64/jquanti-sse2.asm
index 11e9f4c..24cb1cf 100644
--- a/simd/x86_64/jquanti-sse2.asm
+++ b/simd/x86_64/jquanti-sse2.asm
@@ -37,6 +37,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_sse2)

 EXTN(jsimd_convsamp_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 3
@@ -115,6 +116,7 @@ EXTN(jsimd_convsamp_sse2):
     GLOBAL_FUNCTION(jsimd_quantize_sse2)

 EXTN(jsimd_quantize_sse2):
+    _endbr64
     push        rbp
     mov         rbp, rsp
     collect_args 3
--