Blob Blame History Raw
From 525936a83e019540870f54ffb60d2281805d84bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nikola=20Forr=C3=B3?= <nforro@redhat.com>
Date: Mon, 29 Apr 2019 11:28:51 +0200
Subject: [PATCH] x86 SIMD: Add endbr32/endbr64 instructions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allow for indirect branch tracking with Intel CET (Control-Flow
Enforcement Technology) [1], by making all exported routines a possible
target for an indirect jump.

Signed-off-by: Nikola Forró <nforro@redhat.com>

[1] https://software.intel.com/sites/default/files/managed/4d/2a/control-flow-enforcement-technology-preview.pdf
---
 simd/i386/jccolext-avx2.asm   | 1 +
 simd/i386/jccolext-mmx.asm    | 1 +
 simd/i386/jccolext-sse2.asm   | 1 +
 simd/i386/jcgryext-avx2.asm   | 1 +
 simd/i386/jcgryext-mmx.asm    | 1 +
 simd/i386/jcgryext-sse2.asm   | 1 +
 simd/i386/jchuff-sse2.asm     | 1 +
 simd/i386/jcphuff-sse2.asm    | 2 ++
 simd/i386/jcsample-avx2.asm   | 2 ++
 simd/i386/jcsample-mmx.asm    | 2 ++
 simd/i386/jcsample-sse2.asm   | 2 ++
 simd/i386/jdcolext-avx2.asm   | 1 +
 simd/i386/jdcolext-mmx.asm    | 1 +
 simd/i386/jdcolext-sse2.asm   | 1 +
 simd/i386/jdmrgext-avx2.asm   | 2 ++
 simd/i386/jdmrgext-mmx.asm    | 2 ++
 simd/i386/jdmrgext-sse2.asm   | 2 ++
 simd/i386/jdsample-avx2.asm   | 4 ++++
 simd/i386/jdsample-mmx.asm    | 4 ++++
 simd/i386/jdsample-sse2.asm   | 4 ++++
 simd/i386/jfdctflt-3dn.asm    | 1 +
 simd/i386/jfdctflt-sse.asm    | 1 +
 simd/i386/jfdctfst-mmx.asm    | 1 +
 simd/i386/jfdctfst-sse2.asm   | 1 +
 simd/i386/jfdctint-avx2.asm   | 1 +
 simd/i386/jfdctint-mmx.asm    | 1 +
 simd/i386/jfdctint-sse2.asm   | 1 +
 simd/i386/jidctflt-3dn.asm    | 1 +
 simd/i386/jidctflt-sse.asm    | 1 +
 simd/i386/jidctflt-sse2.asm   | 1 +
 simd/i386/jidctfst-mmx.asm    | 1 +
 simd/i386/jidctfst-sse2.asm   | 1 +
 simd/i386/jidctint-avx2.asm   | 1 +
 simd/i386/jidctint-mmx.asm    | 1 +
 simd/i386/jidctint-sse2.asm   | 1 +
 simd/i386/jidctred-mmx.asm    | 2 ++
 simd/i386/jidctred-sse2.asm   | 2 ++
 simd/i386/jquant-3dn.asm      | 2 ++
 simd/i386/jquant-mmx.asm      | 2 ++
 simd/i386/jquant-sse.asm      | 2 ++
 simd/i386/jquantf-sse2.asm    | 2 ++
 simd/i386/jquanti-avx2.asm    | 2 ++
 simd/i386/jquanti-sse2.asm    | 2 ++
 simd/nasm/jsimdext.inc        | 8 ++++++++
 simd/x86_64/jccolext-avx2.asm | 1 +
 simd/x86_64/jccolext-sse2.asm | 1 +
 simd/x86_64/jcgryext-avx2.asm | 1 +
 simd/x86_64/jcgryext-sse2.asm | 1 +
 simd/x86_64/jchuff-sse2.asm   | 1 +
 simd/x86_64/jcphuff-sse2.asm  | 2 ++
 simd/x86_64/jcsample-avx2.asm | 2 ++
 simd/x86_64/jcsample-sse2.asm | 2 ++
 simd/x86_64/jdcolext-avx2.asm | 1 +
 simd/x86_64/jdcolext-sse2.asm | 1 +
 simd/x86_64/jdmrgext-avx2.asm | 2 ++
 simd/x86_64/jdmrgext-sse2.asm | 2 ++
 simd/x86_64/jdsample-avx2.asm | 4 ++++
 simd/x86_64/jdsample-sse2.asm | 4 ++++
 simd/x86_64/jfdctflt-sse.asm  | 1 +
 simd/x86_64/jfdctfst-sse2.asm | 1 +
 simd/x86_64/jfdctint-avx2.asm | 1 +
 simd/x86_64/jfdctint-sse2.asm | 1 +
 simd/x86_64/jidctflt-sse2.asm | 1 +
 simd/x86_64/jidctfst-sse2.asm | 1 +
 simd/x86_64/jidctint-avx2.asm | 1 +
 simd/x86_64/jidctint-sse2.asm | 1 +
 simd/x86_64/jidctred-sse2.asm | 2 ++
 simd/x86_64/jquantf-sse2.asm  | 2 ++
 simd/x86_64/jquanti-avx2.asm  | 2 ++
 simd/x86_64/jquanti-sse2.asm  | 2 ++
 70 files changed, 116 insertions(+)

diff --git a/simd/i386/jccolext-avx2.asm b/simd/i386/jccolext-avx2.asm
index 7a8d784..1a94b79 100644
--- a/simd/i386/jccolext-avx2.asm
+++ b/simd/i386/jccolext-avx2.asm
@@ -44,6 +44,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2)
 
 EXTN(jsimd_rgb_ycc_convert_avx2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jccolext-mmx.asm b/simd/i386/jccolext-mmx.asm
index 9a2c30e..3b526bf 100644
--- a/simd/i386/jccolext-mmx.asm
+++ b/simd/i386/jccolext-mmx.asm
@@ -44,6 +44,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_mmx)
 
 EXTN(jsimd_rgb_ycc_convert_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jccolext-sse2.asm b/simd/i386/jccolext-sse2.asm
index e830562..f84ed39 100644
--- a/simd/i386/jccolext-sse2.asm
+++ b/simd/i386/jccolext-sse2.asm
@@ -43,6 +43,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2)
 
 EXTN(jsimd_rgb_ycc_convert_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jcgryext-avx2.asm b/simd/i386/jcgryext-avx2.asm
index 52e99a8..f32540c 100644
--- a/simd/i386/jcgryext-avx2.asm
+++ b/simd/i386/jcgryext-avx2.asm
@@ -44,6 +44,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2)
 
 EXTN(jsimd_rgb_gray_convert_avx2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jcgryext-mmx.asm b/simd/i386/jcgryext-mmx.asm
index 4a9ab0d..5ef3da2 100644
--- a/simd/i386/jcgryext-mmx.asm
+++ b/simd/i386/jcgryext-mmx.asm
@@ -44,6 +44,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_gray_convert_mmx)
 
 EXTN(jsimd_rgb_gray_convert_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jcgryext-sse2.asm b/simd/i386/jcgryext-sse2.asm
index 04d891c..14f1975 100644
--- a/simd/i386/jcgryext-sse2.asm
+++ b/simd/i386/jcgryext-sse2.asm
@@ -43,6 +43,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2)
 
 EXTN(jsimd_rgb_gray_convert_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jchuff-sse2.asm b/simd/i386/jchuff-sse2.asm
index 6ea69f6..98edd0a 100644
--- a/simd/i386/jchuff-sse2.asm
+++ b/simd/i386/jchuff-sse2.asm
@@ -182,6 +182,7 @@ EXTN(jconst_huff_encode_one_block):
     GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2)
 
 EXTN(jsimd_huff_encode_one_block_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jcphuff-sse2.asm b/simd/i386/jcphuff-sse2.asm
index 25c63c7..5ab8bdb 100644
--- a/simd/i386/jcphuff-sse2.asm
+++ b/simd/i386/jcphuff-sse2.asm
@@ -283,6 +283,7 @@
     GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2)
 
 EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
@@ -460,6 +461,7 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
     GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2)
 
 EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jcsample-avx2.asm b/simd/i386/jcsample-avx2.asm
index 5bcdefd..1f0ca65 100644
--- a/simd/i386/jcsample-avx2.asm
+++ b/simd/i386/jcsample-avx2.asm
@@ -45,6 +45,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2)
 
 EXTN(jsimd_h2v1_downsample_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
@@ -218,6 +219,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2)
 
 EXTN(jsimd_h2v2_downsample_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jcsample-mmx.asm b/simd/i386/jcsample-mmx.asm
index faf4234..a2c14df 100644
--- a/simd/i386/jcsample-mmx.asm
+++ b/simd/i386/jcsample-mmx.asm
@@ -44,6 +44,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_downsample_mmx)
 
 EXTN(jsimd_h2v1_downsample_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
@@ -187,6 +188,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
     GLOBAL_FUNCTION(jsimd_h2v2_downsample_mmx)
 
 EXTN(jsimd_h2v2_downsample_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jcsample-sse2.asm b/simd/i386/jcsample-sse2.asm
index b10fa83..74205d0 100644
--- a/simd/i386/jcsample-sse2.asm
+++ b/simd/i386/jcsample-sse2.asm
@@ -44,6 +44,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2)
 
 EXTN(jsimd_h2v1_downsample_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
@@ -200,6 +201,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2)
 
 EXTN(jsimd_h2v2_downsample_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jdcolext-avx2.asm b/simd/i386/jdcolext-avx2.asm
index 46de9b9..7119de4 100644
--- a/simd/i386/jdcolext-avx2.asm
+++ b/simd/i386/jdcolext-avx2.asm
@@ -45,6 +45,7 @@
     GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2)
 
 EXTN(jsimd_ycc_rgb_convert_avx2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jdcolext-mmx.asm b/simd/i386/jdcolext-mmx.asm
index cd2cb3f..10f135a 100644
--- a/simd/i386/jdcolext-mmx.asm
+++ b/simd/i386/jdcolext-mmx.asm
@@ -44,6 +44,7 @@
     GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_mmx)
 
 EXTN(jsimd_ycc_rgb_convert_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jdcolext-sse2.asm b/simd/i386/jdcolext-sse2.asm
index 0fcb006..10d264d 100644
--- a/simd/i386/jdcolext-sse2.asm
+++ b/simd/i386/jdcolext-sse2.asm
@@ -44,6 +44,7 @@
     GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2)
 
 EXTN(jsimd_ycc_rgb_convert_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jdmrgext-avx2.asm b/simd/i386/jdmrgext-avx2.asm
index cde4865..8621951 100644
--- a/simd/i386/jdmrgext-avx2.asm
+++ b/simd/i386/jdmrgext-avx2.asm
@@ -45,6 +45,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2)
 
 EXTN(jsimd_h2v1_merged_upsample_avx2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
@@ -525,6 +526,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2)
 
 EXTN(jsimd_h2v2_merged_upsample_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jdmrgext-mmx.asm b/simd/i386/jdmrgext-mmx.asm
index 4b9e35d..51741db 100644
--- a/simd/i386/jdmrgext-mmx.asm
+++ b/simd/i386/jdmrgext-mmx.asm
@@ -42,6 +42,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_mmx)
 
 EXTN(jsimd_h2v1_merged_upsample_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
@@ -410,6 +411,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
     GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_mmx)
 
 EXTN(jsimd_h2v2_merged_upsample_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jdmrgext-sse2.asm b/simd/i386/jdmrgext-sse2.asm
index ac4697e..4cb088d 100644
--- a/simd/i386/jdmrgext-sse2.asm
+++ b/simd/i386/jdmrgext-sse2.asm
@@ -44,6 +44,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2)
 
 EXTN(jsimd_h2v1_merged_upsample_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
@@ -467,6 +468,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2)
 
 EXTN(jsimd_h2v2_merged_upsample_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jdsample-avx2.asm b/simd/i386/jdsample-avx2.asm
index 61ce511..4dcb328 100644
--- a/simd/i386/jdsample-avx2.asm
+++ b/simd/i386/jdsample-avx2.asm
@@ -62,6 +62,7 @@ PW_EIGHT times 16 dw 8
     GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)
 
 EXTN(jsimd_h2v1_fancy_upsample_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     pushpic     ebx
@@ -229,6 +230,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2)
 
 EXTN(jsimd_h2v2_fancy_upsample_avx2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
@@ -572,6 +574,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2)
 
 EXTN(jsimd_h2v1_upsample_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
@@ -671,6 +674,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2)
 
 EXTN(jsimd_h2v2_upsample_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jdsample-mmx.asm b/simd/i386/jdsample-mmx.asm
index 1f810fa..5d0909d 100644
--- a/simd/i386/jdsample-mmx.asm
+++ b/simd/i386/jdsample-mmx.asm
@@ -61,6 +61,7 @@ PW_EIGHT times 4 dw 8
     GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_mmx)
 
 EXTN(jsimd_h2v1_fancy_upsample_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
     pushpic     ebx
@@ -219,6 +220,7 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
     GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_mmx)
 
 EXTN(jsimd_h2v2_fancy_upsample_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
@@ -543,6 +545,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     GLOBAL_FUNCTION(jsimd_h2v1_upsample_mmx)
 
 EXTN(jsimd_h2v1_upsample_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
@@ -642,6 +645,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
     GLOBAL_FUNCTION(jsimd_h2v2_upsample_mmx)
 
 EXTN(jsimd_h2v2_upsample_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jdsample-sse2.asm b/simd/i386/jdsample-sse2.asm
index f0da626..84d7433 100644
--- a/simd/i386/jdsample-sse2.asm
+++ b/simd/i386/jdsample-sse2.asm
@@ -61,6 +61,7 @@ PW_EIGHT times 8 dw 8
     GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2)
 
 EXTN(jsimd_h2v1_fancy_upsample_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     pushpic     ebx
@@ -218,6 +219,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2)
 
 EXTN(jsimd_h2v2_fancy_upsample_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
@@ -540,6 +542,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2)
 
 EXTN(jsimd_h2v1_upsample_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
@@ -637,6 +640,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2)
 
 EXTN(jsimd_h2v2_upsample_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jfdctflt-3dn.asm b/simd/i386/jfdctflt-3dn.asm
index 1d45865..c7f3fb8 100644
--- a/simd/i386/jfdctflt-3dn.asm
+++ b/simd/i386/jfdctflt-3dn.asm
@@ -58,6 +58,7 @@ PD_1_306 times 2 dd 1.306562964876376527856643
     GLOBAL_FUNCTION(jsimd_fdct_float_3dnow)
 
 EXTN(jsimd_fdct_float_3dnow):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jfdctflt-sse.asm b/simd/i386/jfdctflt-sse.asm
index 1faf835..c70aeb0 100644
--- a/simd/i386/jfdctflt-sse.asm
+++ b/simd/i386/jfdctflt-sse.asm
@@ -69,6 +69,7 @@ PD_1_306 times 4 dd 1.306562964876376527856643
     GLOBAL_FUNCTION(jsimd_fdct_float_sse)
 
 EXTN(jsimd_fdct_float_sse):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jfdctfst-mmx.asm b/simd/i386/jfdctfst-mmx.asm
index 0271901..efc2c97 100644
--- a/simd/i386/jfdctfst-mmx.asm
+++ b/simd/i386/jfdctfst-mmx.asm
@@ -83,6 +83,7 @@ PW_F1306 times 4 dw F_1_306 << CONST_SHIFT
     GLOBAL_FUNCTION(jsimd_fdct_ifast_mmx)
 
 EXTN(jsimd_fdct_ifast_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jfdctfst-sse2.asm b/simd/i386/jfdctfst-sse2.asm
index f09dadd..919ed27 100644
--- a/simd/i386/jfdctfst-sse2.asm
+++ b/simd/i386/jfdctfst-sse2.asm
@@ -84,6 +84,7 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
     GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2)
 
 EXTN(jsimd_fdct_ifast_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jfdctint-avx2.asm b/simd/i386/jfdctint-avx2.asm
index ae258ee..921c792 100644
--- a/simd/i386/jfdctint-avx2.asm
+++ b/simd/i386/jfdctint-avx2.asm
@@ -262,6 +262,7 @@ PW_1_NEG1                  times 8  dw  1
     GLOBAL_FUNCTION(jsimd_fdct_islow_avx2)
 
 EXTN(jsimd_fdct_islow_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     pushpic     ebx
diff --git a/simd/i386/jfdctint-mmx.asm b/simd/i386/jfdctint-mmx.asm
index c6bd959..b59bdb1 100644
--- a/simd/i386/jfdctint-mmx.asm
+++ b/simd/i386/jfdctint-mmx.asm
@@ -104,6 +104,7 @@ PW_DESCALE_P2X times 4 dw  1 << (PASS1_BITS - 1)
     GLOBAL_FUNCTION(jsimd_fdct_islow_mmx)
 
 EXTN(jsimd_fdct_islow_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jfdctint-sse2.asm b/simd/i386/jfdctint-sse2.asm
index d67dcc1..8d3c6f8 100644
--- a/simd/i386/jfdctint-sse2.asm
+++ b/simd/i386/jfdctint-sse2.asm
@@ -105,6 +105,7 @@ PW_DESCALE_P2X times 8 dw  1 << (PASS1_BITS - 1)
     GLOBAL_FUNCTION(jsimd_fdct_islow_sse2)
 
 EXTN(jsimd_fdct_islow_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctflt-3dn.asm b/simd/i386/jidctflt-3dn.asm
index 73aa18d..73afee9 100644
--- a/simd/i386/jidctflt-3dn.asm
+++ b/simd/i386/jidctflt-3dn.asm
@@ -67,6 +67,7 @@ PB_CENTERJSAMP  times 8 db CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_float_3dnow)
 
 EXTN(jsimd_idct_float_3dnow):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctflt-sse.asm b/simd/i386/jidctflt-sse.asm
index 386650f..5d6ab0c 100644
--- a/simd/i386/jidctflt-sse.asm
+++ b/simd/i386/jidctflt-sse.asm
@@ -77,6 +77,7 @@ PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_float_sse)
 
 EXTN(jsimd_idct_float_sse):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctflt-sse2.asm b/simd/i386/jidctflt-sse2.asm
index 9de7139..94bd5a0 100644
--- a/simd/i386/jidctflt-sse2.asm
+++ b/simd/i386/jidctflt-sse2.asm
@@ -77,6 +77,7 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_float_sse2)
 
 EXTN(jsimd_idct_float_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctfst-mmx.asm b/simd/i386/jidctfst-mmx.asm
index d3e8a5d..d458df9 100644
--- a/simd/i386/jidctfst-mmx.asm
+++ b/simd/i386/jidctfst-mmx.asm
@@ -98,6 +98,7 @@ PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_ifast_mmx)
 
 EXTN(jsimd_idct_ifast_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctfst-sse2.asm b/simd/i386/jidctfst-sse2.asm
index 83bc414..5e8566f 100644
--- a/simd/i386/jidctfst-sse2.asm
+++ b/simd/i386/jidctfst-sse2.asm
@@ -96,6 +96,7 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_ifast_sse2)
 
 EXTN(jsimd_idct_ifast_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctint-avx2.asm b/simd/i386/jidctint-avx2.asm
index b3b7b14..6c77546 100644
--- a/simd/i386/jidctint-avx2.asm
+++ b/simd/i386/jidctint-avx2.asm
@@ -298,6 +298,7 @@ PW_1_NEG1                  times 8  dw  1
     GLOBAL_FUNCTION(jsimd_idct_islow_avx2)
 
 EXTN(jsimd_idct_islow_avx2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctint-mmx.asm b/simd/i386/jidctint-mmx.asm
index 6ca6d06..2fa0107 100644
--- a/simd/i386/jidctint-mmx.asm
+++ b/simd/i386/jidctint-mmx.asm
@@ -111,6 +111,7 @@ PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_islow_mmx)
 
 EXTN(jsimd_idct_islow_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctint-sse2.asm b/simd/i386/jidctint-sse2.asm
index a6bd00a..381abd9 100644
--- a/simd/i386/jidctint-sse2.asm
+++ b/simd/i386/jidctint-sse2.asm
@@ -109,6 +109,7 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_islow_sse2)
 
 EXTN(jsimd_idct_islow_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
diff --git a/simd/i386/jidctred-mmx.asm b/simd/i386/jidctred-mmx.asm
index 336ee3b..c7ba445 100644
--- a/simd/i386/jidctred-mmx.asm
+++ b/simd/i386/jidctred-mmx.asm
@@ -119,6 +119,7 @@ PB_CENTERJSAMP  times 8 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_4x4_mmx)
 
 EXTN(jsimd_idct_4x4_mmx):
+    _endbr32
     push        ebp
     mov         eax, esp                    ; eax = original ebp
     sub         esp, byte 4
@@ -506,6 +507,7 @@ EXTN(jsimd_idct_4x4_mmx):
     GLOBAL_FUNCTION(jsimd_idct_2x2_mmx)
 
 EXTN(jsimd_idct_2x2_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jidctred-sse2.asm b/simd/i386/jidctred-sse2.asm
index 97838ba..cc6c915 100644
--- a/simd/i386/jidctred-sse2.asm
+++ b/simd/i386/jidctred-sse2.asm
@@ -117,6 +117,7 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_4x4_sse2)
 
 EXTN(jsimd_idct_4x4_sse2):
+    _endbr32
     push        ebp
     mov         eax, esp                     ; eax = original ebp
     sub         esp, byte 4
@@ -427,6 +428,7 @@ EXTN(jsimd_idct_4x4_sse2):
     GLOBAL_FUNCTION(jsimd_idct_2x2_sse2)
 
 EXTN(jsimd_idct_2x2_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
diff --git a/simd/i386/jquant-3dn.asm b/simd/i386/jquant-3dn.asm
index 1767f44..fa350f3 100644
--- a/simd/i386/jquant-3dn.asm
+++ b/simd/i386/jquant-3dn.asm
@@ -38,6 +38,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_float_3dnow)
 
 EXTN(jsimd_convsamp_float_3dnow):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
@@ -140,6 +141,7 @@ EXTN(jsimd_convsamp_float_3dnow):
     GLOBAL_FUNCTION(jsimd_quantize_float_3dnow)
 
 EXTN(jsimd_quantize_float_3dnow):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jquant-mmx.asm b/simd/i386/jquant-mmx.asm
index 98932db..6903db1 100644
--- a/simd/i386/jquant-mmx.asm
+++ b/simd/i386/jquant-mmx.asm
@@ -38,6 +38,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_mmx)
 
 EXTN(jsimd_convsamp_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
@@ -147,6 +148,7 @@ EXTN(jsimd_convsamp_mmx):
     GLOBAL_FUNCTION(jsimd_quantize_mmx)
 
 EXTN(jsimd_quantize_mmx):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jquant-sse.asm b/simd/i386/jquant-sse.asm
index cc244c4..99c97f0 100644
--- a/simd/i386/jquant-sse.asm
+++ b/simd/i386/jquant-sse.asm
@@ -38,6 +38,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_float_sse)
 
 EXTN(jsimd_convsamp_float_sse):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
@@ -140,6 +141,7 @@ EXTN(jsimd_convsamp_float_sse):
     GLOBAL_FUNCTION(jsimd_quantize_float_sse)
 
 EXTN(jsimd_quantize_float_sse):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jquantf-sse2.asm b/simd/i386/jquantf-sse2.asm
index 8d1201c..5118837 100644
--- a/simd/i386/jquantf-sse2.asm
+++ b/simd/i386/jquantf-sse2.asm
@@ -38,6 +38,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_float_sse2)
 
 EXTN(jsimd_convsamp_float_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
@@ -117,6 +118,7 @@ EXTN(jsimd_convsamp_float_sse2):
     GLOBAL_FUNCTION(jsimd_quantize_float_sse2)
 
 EXTN(jsimd_quantize_float_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jquanti-avx2.asm b/simd/i386/jquanti-avx2.asm
index ea8e1a1..5df0d10 100644
--- a/simd/i386/jquanti-avx2.asm
+++ b/simd/i386/jquanti-avx2.asm
@@ -39,6 +39,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_avx2)
 
 EXTN(jsimd_convsamp_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
@@ -132,6 +133,7 @@ EXTN(jsimd_convsamp_avx2):
     GLOBAL_FUNCTION(jsimd_quantize_avx2)
 
 EXTN(jsimd_quantize_avx2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/i386/jquanti-sse2.asm b/simd/i386/jquanti-sse2.asm
index 2a69494..67a423a 100644
--- a/simd/i386/jquanti-sse2.asm
+++ b/simd/i386/jquanti-sse2.asm
@@ -38,6 +38,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_sse2)
 
 EXTN(jsimd_convsamp_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
     push        ebx
@@ -123,6 +124,7 @@ EXTN(jsimd_convsamp_sse2):
     GLOBAL_FUNCTION(jsimd_quantize_sse2)
 
 EXTN(jsimd_quantize_sse2):
+    _endbr32
     push        ebp
     mov         ebp, esp
 ;   push        ebx                     ; unused
diff --git a/simd/nasm/jsimdext.inc b/simd/nasm/jsimdext.inc
index b40901f..9c25f7a 100644
--- a/simd/nasm/jsimdext.inc
+++ b/simd/nasm/jsimdext.inc
@@ -468,6 +468,14 @@ const_base:
 
 %endif
 
+%imacro _endbr32 0
+    dd 0xfb1e0ff3
+%endmacro
+
+%imacro _endbr64 0
+    dd 0xfa1e0ff3
+%endmacro
+
 ; --------------------------------------------------------------------------
 ;  Defines picked up from the C headers
 ;
diff --git a/simd/x86_64/jccolext-avx2.asm b/simd/x86_64/jccolext-avx2.asm
index 5fa3848..b0c1e58 100644
--- a/simd/x86_64/jccolext-avx2.asm
+++ b/simd/x86_64/jccolext-avx2.asm
@@ -41,6 +41,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2)
 
 EXTN(jsimd_rgb_ycc_convert_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
diff --git a/simd/x86_64/jccolext-sse2.asm b/simd/x86_64/jccolext-sse2.asm
index b1486c0..0d776ff 100644
--- a/simd/x86_64/jccolext-sse2.asm
+++ b/simd/x86_64/jccolext-sse2.asm
@@ -40,6 +40,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2)
 
 EXTN(jsimd_rgb_ycc_convert_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
diff --git a/simd/x86_64/jcgryext-avx2.asm b/simd/x86_64/jcgryext-avx2.asm
index 79e2aa0..8dc16cf 100644
--- a/simd/x86_64/jcgryext-avx2.asm
+++ b/simd/x86_64/jcgryext-avx2.asm
@@ -41,6 +41,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2)
 
 EXTN(jsimd_rgb_gray_convert_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
diff --git a/simd/x86_64/jcgryext-sse2.asm b/simd/x86_64/jcgryext-sse2.asm
index 9c3ae5e..11e0a5d 100644
--- a/simd/x86_64/jcgryext-sse2.asm
+++ b/simd/x86_64/jcgryext-sse2.asm
@@ -40,6 +40,7 @@
     GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2)
 
 EXTN(jsimd_rgb_gray_convert_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
diff --git a/simd/x86_64/jchuff-sse2.asm b/simd/x86_64/jchuff-sse2.asm
index 1b091ad..5265f3b 100644
--- a/simd/x86_64/jchuff-sse2.asm
+++ b/simd/x86_64/jchuff-sse2.asm
@@ -186,6 +186,7 @@ EXTN(jconst_huff_encode_one_block):
     GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2)
 
 EXTN(jsimd_huff_encode_one_block_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
diff --git a/simd/x86_64/jcphuff-sse2.asm b/simd/x86_64/jcphuff-sse2.asm
index b17488a..59d4d8d 100644
--- a/simd/x86_64/jcphuff-sse2.asm
+++ b/simd/x86_64/jcphuff-sse2.asm
@@ -283,6 +283,7 @@
     GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2)
 
 EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
@@ -449,6 +450,7 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
     GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2)
 
 EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
diff --git a/simd/x86_64/jcsample-avx2.asm b/simd/x86_64/jcsample-avx2.asm
index 9d5a861..1208dc3 100644
--- a/simd/x86_64/jcsample-avx2.asm
+++ b/simd/x86_64/jcsample-avx2.asm
@@ -45,6 +45,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2)
 
 EXTN(jsimd_h2v1_downsample_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
@@ -207,6 +208,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2)
 
 EXTN(jsimd_h2v2_downsample_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
diff --git a/simd/x86_64/jcsample-sse2.asm b/simd/x86_64/jcsample-sse2.asm
index 1b31536..046b23f 100644
--- a/simd/x86_64/jcsample-sse2.asm
+++ b/simd/x86_64/jcsample-sse2.asm
@@ -44,6 +44,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2)
 
 EXTN(jsimd_h2v1_downsample_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
@@ -189,6 +190,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2)
 
 EXTN(jsimd_h2v2_downsample_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
diff --git a/simd/x86_64/jdcolext-avx2.asm b/simd/x86_64/jdcolext-avx2.asm
index e2b96c7..ab7bfa4 100644
--- a/simd/x86_64/jdcolext-avx2.asm
+++ b/simd/x86_64/jdcolext-avx2.asm
@@ -42,6 +42,7 @@
     GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2)
 
 EXTN(jsimd_ycc_rgb_convert_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
diff --git a/simd/x86_64/jdcolext-sse2.asm b/simd/x86_64/jdcolext-sse2.asm
index a94954b..458fac1 100644
--- a/simd/x86_64/jdcolext-sse2.asm
+++ b/simd/x86_64/jdcolext-sse2.asm
@@ -41,6 +41,7 @@
     GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2)
 
 EXTN(jsimd_ycc_rgb_convert_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
diff --git a/simd/x86_64/jdmrgext-avx2.asm b/simd/x86_64/jdmrgext-avx2.asm
index 04e8a94..15dbb53 100644
--- a/simd/x86_64/jdmrgext-avx2.asm
+++ b/simd/x86_64/jdmrgext-avx2.asm
@@ -42,6 +42,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2)
 
 EXTN(jsimd_h2v1_merged_upsample_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
@@ -506,6 +507,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2)
 
 EXTN(jsimd_h2v2_merged_upsample_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
diff --git a/simd/x86_64/jdmrgext-sse2.asm b/simd/x86_64/jdmrgext-sse2.asm
index 1cc3345..78a6922 100644
--- a/simd/x86_64/jdmrgext-sse2.asm
+++ b/simd/x86_64/jdmrgext-sse2.asm
@@ -41,6 +41,7 @@
     GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2)
 
 EXTN(jsimd_h2v1_merged_upsample_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
@@ -448,6 +449,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2)
 
 EXTN(jsimd_h2v2_merged_upsample_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
diff --git a/simd/x86_64/jdsample-avx2.asm b/simd/x86_64/jdsample-avx2.asm
index 10fa5c4..025fffd 100644
--- a/simd/x86_64/jdsample-avx2.asm
+++ b/simd/x86_64/jdsample-avx2.asm
@@ -62,6 +62,7 @@ PW_EIGHT times 16 dw 8
     GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)
 
 EXTN(jsimd_h2v1_fancy_upsample_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
@@ -216,6 +217,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2)
 
 EXTN(jsimd_h2v2_fancy_upsample_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
@@ -525,6 +527,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2)
 
 EXTN(jsimd_h2v1_upsample_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
@@ -614,6 +617,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2)
 
 EXTN(jsimd_h2v2_upsample_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
diff --git a/simd/x86_64/jdsample-sse2.asm b/simd/x86_64/jdsample-sse2.asm
index d8ccda9..cbd1543 100644
--- a/simd/x86_64/jdsample-sse2.asm
+++ b/simd/x86_64/jdsample-sse2.asm
@@ -61,6 +61,7 @@ PW_EIGHT times 8 dw 8
     GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2)
 
 EXTN(jsimd_h2v1_fancy_upsample_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
@@ -203,6 +204,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2)
 
 EXTN(jsimd_h2v2_fancy_upsample_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
@@ -498,6 +500,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2)
 
 EXTN(jsimd_h2v1_upsample_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
@@ -585,6 +588,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2)
 
 EXTN(jsimd_h2v2_upsample_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
diff --git a/simd/x86_64/jfdctflt-sse.asm b/simd/x86_64/jfdctflt-sse.asm
index 26f9fb6..45ddfc2 100644
--- a/simd/x86_64/jfdctflt-sse.asm
+++ b/simd/x86_64/jfdctflt-sse.asm
@@ -67,6 +67,7 @@ PD_1_306 times 4 dd 1.306562964876376527856643
     GLOBAL_FUNCTION(jsimd_fdct_float_sse)
 
 EXTN(jsimd_fdct_float_sse):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
diff --git a/simd/x86_64/jfdctfst-sse2.asm b/simd/x86_64/jfdctfst-sse2.asm
index aaf8b9e..c14a184 100644
--- a/simd/x86_64/jfdctfst-sse2.asm
+++ b/simd/x86_64/jfdctfst-sse2.asm
@@ -82,6 +82,7 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
     GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2)
 
 EXTN(jsimd_fdct_ifast_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
diff --git a/simd/x86_64/jfdctint-avx2.asm b/simd/x86_64/jfdctint-avx2.asm
index 448f47d..e5aaa8e 100644
--- a/simd/x86_64/jfdctint-avx2.asm
+++ b/simd/x86_64/jfdctint-avx2.asm
@@ -262,6 +262,7 @@ PW_1_NEG1                  times 8  dw  1
     GLOBAL_FUNCTION(jsimd_fdct_islow_avx2)
 
 EXTN(jsimd_fdct_islow_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
diff --git a/simd/x86_64/jfdctint-sse2.asm b/simd/x86_64/jfdctint-sse2.asm
index ef16a52..092cc3c 100644
--- a/simd/x86_64/jfdctint-sse2.asm
+++ b/simd/x86_64/jfdctint-sse2.asm
@@ -103,6 +103,7 @@ PW_DESCALE_P2X times 8 dw  1 << (PASS1_BITS - 1)
     GLOBAL_FUNCTION(jsimd_fdct_islow_sse2)
 
 EXTN(jsimd_fdct_islow_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
diff --git a/simd/x86_64/jidctflt-sse2.asm b/simd/x86_64/jidctflt-sse2.asm
index b676ef3..d7c7298 100644
--- a/simd/x86_64/jidctflt-sse2.asm
+++ b/simd/x86_64/jidctflt-sse2.asm
@@ -77,6 +77,7 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_float_sse2)
 
 EXTN(jsimd_idct_float_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
diff --git a/simd/x86_64/jidctfst-sse2.asm b/simd/x86_64/jidctfst-sse2.asm
index c6c42f9..b7d3f01 100644
--- a/simd/x86_64/jidctfst-sse2.asm
+++ b/simd/x86_64/jidctfst-sse2.asm
@@ -96,6 +96,7 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_ifast_sse2)
 
 EXTN(jsimd_idct_ifast_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
diff --git a/simd/x86_64/jidctint-avx2.asm b/simd/x86_64/jidctint-avx2.asm
index b60b44f..c33f381 100644
--- a/simd/x86_64/jidctint-avx2.asm
+++ b/simd/x86_64/jidctint-avx2.asm
@@ -283,6 +283,7 @@ PW_1_NEG1                  times 8  dw  1
     GLOBAL_FUNCTION(jsimd_idct_islow_avx2)
 
 EXTN(jsimd_idct_islow_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     mov         rbp, rsp                     ; rbp = aligned rbp
diff --git a/simd/x86_64/jidctint-sse2.asm b/simd/x86_64/jidctint-sse2.asm
index 83fc344..7a65f3c 100644
--- a/simd/x86_64/jidctint-sse2.asm
+++ b/simd/x86_64/jidctint-sse2.asm
@@ -109,6 +109,7 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_islow_sse2)
 
 EXTN(jsimd_idct_islow_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
diff --git a/simd/x86_64/jidctred-sse2.asm b/simd/x86_64/jidctred-sse2.asm
index af64fdc..4d53c58 100644
--- a/simd/x86_64/jidctred-sse2.asm
+++ b/simd/x86_64/jidctred-sse2.asm
@@ -117,6 +117,7 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_4x4_sse2)
 
 EXTN(jsimd_idct_4x4_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp                     ; rax = original rbp
     sub         rsp, byte 4
@@ -415,6 +416,7 @@ EXTN(jsimd_idct_4x4_sse2):
     GLOBAL_FUNCTION(jsimd_idct_2x2_sse2)
 
 EXTN(jsimd_idct_2x2_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
diff --git a/simd/x86_64/jquantf-sse2.asm b/simd/x86_64/jquantf-sse2.asm
index 4600eec..13c1710 100644
--- a/simd/x86_64/jquantf-sse2.asm
+++ b/simd/x86_64/jquantf-sse2.asm
@@ -38,6 +38,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_float_sse2)
 
 EXTN(jsimd_convsamp_float_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
@@ -111,6 +112,7 @@ EXTN(jsimd_convsamp_float_sse2):
     GLOBAL_FUNCTION(jsimd_quantize_float_sse2)
 
 EXTN(jsimd_quantize_float_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
diff --git a/simd/x86_64/jquanti-avx2.asm b/simd/x86_64/jquanti-avx2.asm
index b7243e4..6b20791 100644
--- a/simd/x86_64/jquanti-avx2.asm
+++ b/simd/x86_64/jquanti-avx2.asm
@@ -39,6 +39,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_avx2)
 
 EXTN(jsimd_convsamp_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
@@ -117,6 +118,7 @@ EXTN(jsimd_convsamp_avx2):
     GLOBAL_FUNCTION(jsimd_quantize_avx2)
 
 EXTN(jsimd_quantize_avx2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
diff --git a/simd/x86_64/jquanti-sse2.asm b/simd/x86_64/jquanti-sse2.asm
index 7ff7275..7d5cf5a 100644
--- a/simd/x86_64/jquanti-sse2.asm
+++ b/simd/x86_64/jquanti-sse2.asm
@@ -38,6 +38,7 @@
     GLOBAL_FUNCTION(jsimd_convsamp_sse2)
 
 EXTN(jsimd_convsamp_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
@@ -117,6 +118,7 @@ EXTN(jsimd_convsamp_sse2):
     GLOBAL_FUNCTION(jsimd_quantize_sse2)
 
 EXTN(jsimd_quantize_sse2):
+    _endbr64
     push        rbp
     mov         rax, rsp
     mov         rbp, rsp
-- 
2.21.0