From 1b1cdf661126b581e948de3a0c2051b654f51543 Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Apr 20 2024 18:14:33 +0000 Subject: 3.23.0-0.1.RC1 - Upstream 3.23.0-RC1 - Remove all upstreamed patches --- diff --git a/.gitignore b/.gitignore index 4ddb7e4..27bdf5b 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,4 @@ /valgrind-3.22.0.RC1.tar.bz2 /valgrind-3.22.0.RC2.tar.bz2 /valgrind-3.22.0.tar.bz2 +/valgrind-3.23.0.RC1.tar.bz2 diff --git a/sources b/sources index 9965f6f..b1c83bf 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (valgrind-3.22.0.tar.bz2) = 2904c13f68245bbafcea70998c6bd20725271300a7e94b6751ca00916943595fc3fac8557da7ea8db31b54a43f092823a0a947bc142829da811d074e1fe49777 +SHA512 (valgrind-3.23.0.RC1.tar.bz2) = 6d9b95ce919ffa1993265c2972b407beb2879b87f84efd61ada4e75f334b332917a726b053c5dbc64b970a55334189c48ccae9231934c9474b65eb6ec1e2c18f diff --git a/valgrind-3.22.0-amd64-VFMADD213.patch b/valgrind-3.22.0-amd64-VFMADD213.patch deleted file mode 100644 index 1b58b3d..0000000 --- a/valgrind-3.22.0-amd64-VFMADD213.patch +++ /dev/null @@ -1,530 +0,0 @@ -commit a5693c1203c3a26443af13182a8082c2e9152f6c -Author: Mark Wielaard -Date: Sat Apr 13 14:33:19 2024 +0200 - - amd64: Implement VFMADD213 for Iop_MAddF32 and Iop_MAddF64 - - Speed up F32 and F64 FMA on amd64. Add priv/host_amd64_maddf.c - implementing h_amd64_calc_MAddF32_fma4 and h_amd64_calc_MAddF64_fma4 - to be used instead of the generic variants h_generic_calc_MAddF32 - and h_generic_calc_MAddF64 when host has VEX_HWCAPS_AMD64_FMA4. - Add fma3 and fma4 detection m_machine.c (machine_get_hwcaps). - - This patch also fixes the memcheck/tests/vcpu_fnfns and - none/tests/amd64/fma testcases when run on a x86-64-v3 system. - - Patch contributed by Grazvydas Ignotas and - Bruno Lathuilière - - https://bugs.kde.org/show_bug.cgi?id=481127 - https://bugs.kde.org/show_bug.cgi?id=463463 - https://bugs.kde.org/show_bug.cgi?id=463458 - -diff --git a/Makefile.vex.am b/Makefile.vex.am -index 98d84835946b..c1244a69d25c 100644 ---- a/Makefile.vex.am -+++ b/Makefile.vex.am -@@ -54,6 +54,7 @@ noinst_HEADERS = \ - priv/host_generic_simd128.h \ - priv/host_generic_simd256.h \ - priv/host_generic_maddf.h \ -+ priv/host_amd64_maddf.h \ - priv/host_x86_defs.h \ - priv/host_amd64_defs.h \ - priv/host_ppc_defs.h \ -@@ -156,6 +157,7 @@ LIBVEX_SOURCES_COMMON = \ - priv/host_generic_simd128.c \ - priv/host_generic_simd256.c \ - priv/host_generic_maddf.c \ -+ priv/host_amd64_maddf.c \ - priv/host_generic_reg_alloc2.c \ - priv/host_generic_reg_alloc3.c \ - priv/host_x86_defs.c \ -diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c -index 69afab739ce8..253ed65150e5 100644 ---- a/VEX/priv/host_amd64_defs.c -+++ b/VEX/priv/host_amd64_defs.c -@@ -590,6 +590,7 @@ const HChar* showAMD64SseOp ( AMD64SseOp op ) { - case Asse_PMADDUBSW: return "pmaddubsw"; - case Asse_F32toF16: return "vcvtps2ph(rm_field=$0x4)."; - case Asse_F16toF32: return "vcvtph2ps."; -+ case Asse_VFMADD213: return "vfmadd213"; - default: vpanic("showAMD64SseOp"); - } - } -@@ -1056,6 +1057,28 @@ AMD64Instr* AMD64Instr_SseMOVQ ( HReg gpr, HReg xmm, Bool toXMM ) { - //uu i->Ain.AvxReRg.dst = rg; - //uu return i; - //uu } -+AMD64Instr* AMD64Instr_Avx32FLo ( AMD64SseOp op, HReg src1, HReg src2, HReg dst ) { -+ AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr)); -+ i->tag = Ain_Avx32FLo; -+ i->Ain.Avx32FLo.op = op; -+ i->Ain.Avx32FLo.src1 = src1; -+ i->Ain.Avx32FLo.src2 = src2; -+ i->Ain.Avx32FLo.dst = dst; -+ vassert(op != Asse_MOV); -+ return i; -+} -+ -+AMD64Instr* AMD64Instr_Avx64FLo ( AMD64SseOp op, HReg src1, HReg src2, HReg dst ) { -+ AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr)); -+ i->tag = Ain_Avx64FLo; -+ i->Ain.Avx64FLo.op = op; -+ i->Ain.Avx64FLo.src1 = src1; -+ i->Ain.Avx64FLo.src2 = src2; -+ i->Ain.Avx64FLo.dst = dst; -+ vassert(op != Asse_MOV); -+ return i; -+} -+ - AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter, - AMD64AMode* amFailAddr ) { - AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr)); -@@ -1434,6 +1457,22 @@ void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 ) - //uu vex_printf(","); - //uu ppHRegAMD64(i->Ain.AvxReRg.dst); - //uu return; -+ case Ain_Avx32FLo: -+ vex_printf("%sss ", showAMD64SseOp(i->Ain.Avx32FLo.op)); -+ ppHRegAMD64(i->Ain.Avx32FLo.src2); -+ vex_printf(","); -+ ppHRegAMD64(i->Ain.Avx32FLo.src1); -+ vex_printf(","); -+ ppHRegAMD64(i->Ain.Avx32FLo.dst); -+ return; -+ case Ain_Avx64FLo: -+ vex_printf("%ssd ", showAMD64SseOp(i->Ain.Avx64FLo.op)); -+ ppHRegAMD64(i->Ain.Avx64FLo.src2); -+ vex_printf(","); -+ ppHRegAMD64(i->Ain.Avx64FLo.src1); -+ vex_printf(","); -+ ppHRegAMD64(i->Ain.Avx64FLo.dst); -+ return; - case Ain_EvCheck: - vex_printf("(evCheck) decl "); - ppAMD64AMode(i->Ain.EvCheck.amCounter); -@@ -1790,6 +1829,18 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 ) - //uu } - //uu } - //uu return; -+ case Ain_Avx32FLo: -+ vassert(i->Ain.Avx32FLo.op != Asse_MOV); -+ addHRegUse(u, HRmRead, i->Ain.Avx32FLo.src1); -+ addHRegUse(u, HRmRead, i->Ain.Avx32FLo.src2); -+ addHRegUse(u, HRmModify, i->Ain.Avx32FLo.dst); -+ return; -+ case Ain_Avx64FLo: -+ vassert(i->Ain.Avx64FLo.op != Asse_MOV); -+ addHRegUse(u, HRmRead, i->Ain.Avx64FLo.src1); -+ addHRegUse(u, HRmRead, i->Ain.Avx64FLo.src2); -+ addHRegUse(u, HRmModify, i->Ain.Avx64FLo.dst); -+ return; - case Ain_EvCheck: - /* We expect both amodes only to mention %rbp, so this is in - fact pointless, since %rbp isn't allocatable, but anyway.. */ -@@ -1999,6 +2050,16 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 ) - //uu mapReg(m, &i->Ain.AvxReRg.src); - //uu mapReg(m, &i->Ain.AvxReRg.dst); - //uu return; -+ case Ain_Avx32FLo: -+ mapReg(m, &i->Ain.Avx32FLo.src1); -+ mapReg(m, &i->Ain.Avx32FLo.src2); -+ mapReg(m, &i->Ain.Avx32FLo.dst); -+ return; -+ case Ain_Avx64FLo: -+ mapReg(m, &i->Ain.Avx64FLo.src1); -+ mapReg(m, &i->Ain.Avx64FLo.src2); -+ mapReg(m, &i->Ain.Avx64FLo.dst); -+ return; - case Ain_EvCheck: - /* We expect both amodes only to mention %rbp, so this is in - fact pointless, since %rbp isn't allocatable, but anyway.. */ -@@ -4061,6 +4122,53 @@ Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc, - //uu goto done; - //uu } - -+ case Ain_Avx32FLo: { -+ UInt d = vregEnc3210(i->Ain.Avx32FLo.dst); -+ UInt v = vregEnc3210(i->Ain.Avx32FLo.src1); -+ UInt s = vregEnc3210(i->Ain.Avx32FLo.src2); -+ UInt m = 2, pp = 1; -+ UInt opcode; -+ switch (i->Ain.Avx32FLo.op) { -+ case Asse_VFMADD213: -+ // VFMADD213SS %xmmS2, %xmmS1, %xmmD (xmm regs range 0 .. 15) -+ opcode = 0xa9; -+ break; -+ default: -+ goto bad; -+ } -+ // 0xC4 : ~d3 1 ~s3 o4 o3 o2 o1 o0 : 0 ~v3 ~v2 ~v1 ~v0 0 p1 p0 : opcode_byte -+ // : 1 1 d2 d1 d0 s2 s1 s0 -+ *p++ = 0xC4; // 3-byte VEX -+ *p++ = ((((~d)>>3)&1)<<7) | (1<<6) | ((((~s)>>3)&1)<<5) | m; -+ *p++ = ((~v&0x0f) << 3) | pp; -+ *p++ = opcode; -+ *p++ = (1<<7) | (1<<6) | ((d&7) << 3) | ((s&7) << 0); -+ goto done; -+ } -+ case Ain_Avx64FLo: { -+ UInt d = vregEnc3210(i->Ain.Avx64FLo.dst); -+ UInt v = vregEnc3210(i->Ain.Avx64FLo.src1); -+ UInt s = vregEnc3210(i->Ain.Avx64FLo.src2); -+ UInt m = 2, pp = 1; -+ UInt opcode; -+ switch (i->Ain.Avx64FLo.op) { -+ case Asse_VFMADD213: -+ // VFMADD213SD %xmmS2, %xmmS1, %xmmD (xmm regs range 0 .. 15) -+ opcode = 0xa9; -+ break; -+ default: -+ goto bad; -+ } -+ // 0xC4 : ~d3 1 ~s3 o4 o3 o2 o1 o0 : 1 ~v3 ~v2 ~v1 ~v0 0 p1 p0 : opcode_byte -+ // : 1 1 d2 d1 d0 s2 s1 s0 -+ *p++ = 0xC4; // 3-byte VEX -+ *p++ = ((((~d)>>3)&1)<<7) | (1<<6) | ((((~s)>>3)&1)<<5) | m; -+ *p++ = (1<<7)|((~v&0x0f) << 3) | pp; -+ *p++ = opcode; -+ *p++ = (1<<7) | (1<<6) | ((d&7) << 3) | ((s&7) << 0); -+ goto done; -+ } -+ - case Ain_EvCheck: { - /* We generate: - (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER) -diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h -index e2ed2613bf67..eae878e312d7 100644 ---- a/VEX/priv/host_amd64_defs.h -+++ b/VEX/priv/host_amd64_defs.h -@@ -347,6 +347,8 @@ typedef - // Only for F16C capable hosts: - Asse_F32toF16, // F32 to F16 conversion, aka vcvtps2ph - Asse_F16toF32, // F16 to F32 conversion, aka vcvtph2ps -+ // Only for FMA (FMA3) capable hosts: -+ Asse_VFMADD213, // Fused Multiply-Add, aka vfmadd213ss - } - AMD64SseOp; - -@@ -412,6 +414,8 @@ typedef - //uu Ain_AvxLdSt, /* AVX load/store 256 bits, - //uu no alignment constraints */ - //uu Ain_AvxReRg, /* AVX binary general reg-reg, Re, Rg */ -+ Ain_Avx32FLo, /* AVX binary 3 operand, 32F in lowest lane only */ -+ Ain_Avx64FLo, /* AVX binary 3 operand, 64F in lowest lane only */ - Ain_EvCheck, /* Event check */ - Ain_ProfInc /* 64-bit profile counter increment */ - } -@@ -729,6 +733,18 @@ typedef - //uu HReg src; - //uu HReg dst; - //uu } AvxReRg; -+ struct { -+ AMD64SseOp op; -+ HReg src1; -+ HReg src2; -+ HReg dst; -+ } Avx32FLo; -+ struct { -+ AMD64SseOp op; -+ HReg src1; -+ HReg src2; -+ HReg dst; -+ } Avx64FLo; - struct { - AMD64AMode* amCounter; - AMD64AMode* amFailAddr; -@@ -803,6 +819,8 @@ extern AMD64Instr* AMD64Instr_SseShiftN ( AMD64SseOp, - extern AMD64Instr* AMD64Instr_SseMOVQ ( HReg gpr, HReg xmm, Bool toXMM ); - //uu extern AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad, HReg, AMD64AMode* ); - //uu extern AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp, HReg, HReg ); -+extern AMD64Instr* AMD64Instr_Avx32FLo ( AMD64SseOp, HReg, HReg, HReg ); -+extern AMD64Instr* AMD64Instr_Avx64FLo ( AMD64SseOp, HReg, HReg, HReg ); - extern AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter, - AMD64AMode* amFailAddr ); - extern AMD64Instr* AMD64Instr_ProfInc ( void ); -diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c -index e15e1e60fb85..21d20c77f090 100644 ---- a/VEX/priv/host_amd64_isel.c -+++ b/VEX/priv/host_amd64_isel.c -@@ -42,6 +42,7 @@ - #include "host_generic_simd64.h" - #include "host_generic_simd128.h" - #include "host_generic_simd256.h" -+#include "host_amd64_maddf.h" - #include "host_generic_maddf.h" - #include "host_amd64_defs.h" - -@@ -2832,6 +2833,13 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e ) - HReg argX = iselFltExpr(env, qop->arg2); - HReg argY = iselFltExpr(env, qop->arg3); - HReg argZ = iselFltExpr(env, qop->arg4); -+ if (env->hwcaps & VEX_HWCAPS_AMD64_FMA3) { -+ vassert(dst.u32 != argY.u32 && dst.u32 != argZ.u32); -+ if (dst.u32 != argX.u32) -+ addInstr(env, AMD64Instr_SseReRg(Asse_MOV, argX, dst)); -+ addInstr(env, AMD64Instr_Avx32FLo(Asse_VFMADD213, argY, argZ, dst)); -+ return dst; -+ } - /* XXXROUNDINGFIXME */ - /* set roundingmode here */ - /* subq $16, %rsp -- make a space*/ -@@ -2861,10 +2869,22 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e ) - AMD64AMode_IR(0, hregAMD64_RDX()))); - addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argZ, - AMD64AMode_IR(0, hregAMD64_RCX()))); -- /* call the helper */ -- addInstr(env, AMD64Instr_Call( Acc_ALWAYS, -- (ULong)(HWord)h_generic_calc_MAddF32, -- 4, mk_RetLoc_simple(RLPri_None) )); -+ -+ /* call the helper with priority order : fma4 -> fallback generic -+ remark: the fma3 case is handled before without helper*/ -+#if defined(VGA_amd64) -+ if (env->hwcaps & VEX_HWCAPS_AMD64_FMA4) { -+ addInstr(env, AMD64Instr_Call( Acc_ALWAYS, -+ (ULong)(HWord)h_amd64_calc_MAddF32_fma4, -+ 4, mk_RetLoc_simple(RLPri_None) )); -+ }else -+#endif -+ { -+ addInstr(env, AMD64Instr_Call( Acc_ALWAYS, -+ (ULong)(HWord)h_generic_calc_MAddF32, -+ 4, mk_RetLoc_simple(RLPri_None) )); -+ } -+ - /* fetch the result from memory, using %r_argp, which the - register allocator will keep alive across the call. */ - addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 4, dst, -@@ -3024,6 +3044,14 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e ) - HReg argX = iselDblExpr(env, qop->arg2); - HReg argY = iselDblExpr(env, qop->arg3); - HReg argZ = iselDblExpr(env, qop->arg4); -+ if (env->hwcaps & VEX_HWCAPS_AMD64_FMA3) { -+ vassert(dst.u32 != argY.u32 && dst.u32 != argZ.u32); -+ if (dst.u32 != argX.u32) -+ addInstr(env, AMD64Instr_SseReRg(Asse_MOV, argX, dst)); -+ addInstr(env, AMD64Instr_Avx64FLo(Asse_VFMADD213, argY, argZ, dst)); -+ return dst; -+ } -+ - /* XXXROUNDINGFIXME */ - /* set roundingmode here */ - /* subq $32, %rsp -- make a space*/ -@@ -3053,10 +3081,22 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e ) - AMD64AMode_IR(0, hregAMD64_RDX()))); - addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argZ, - AMD64AMode_IR(0, hregAMD64_RCX()))); -- /* call the helper */ -- addInstr(env, AMD64Instr_Call( Acc_ALWAYS, -- (ULong)(HWord)h_generic_calc_MAddF64, -- 4, mk_RetLoc_simple(RLPri_None) )); -+ -+ /* call the helper with priority order : fma4 -> fallback generic -+ remark: the fma3 case is handled before without helper*/ -+#if defined(VGA_amd64) -+ if (env->hwcaps & VEX_HWCAPS_AMD64_FMA4) { -+ addInstr(env, AMD64Instr_Call( Acc_ALWAYS, -+ (ULong)(HWord)h_amd64_calc_MAddF64_fma4, -+ 4, mk_RetLoc_simple(RLPri_None) )); -+ }else -+#endif -+ { -+ addInstr(env, AMD64Instr_Call( Acc_ALWAYS, -+ (ULong)(HWord)h_generic_calc_MAddF64, -+ 4, mk_RetLoc_simple(RLPri_None) )); -+ } -+ - /* fetch the result from memory, using %r_argp, which the - register allocator will keep alive across the call. */ - addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 8, dst, -@@ -5372,7 +5412,9 @@ HInstrArray* iselSB_AMD64 ( const IRSB* bb, - | VEX_HWCAPS_AMD64_AVX2 - | VEX_HWCAPS_AMD64_F16C - | VEX_HWCAPS_AMD64_RDRAND -- | VEX_HWCAPS_AMD64_RDSEED))); -+ | VEX_HWCAPS_AMD64_RDSEED -+ | VEX_HWCAPS_AMD64_FMA3 -+ | VEX_HWCAPS_AMD64_FMA4))); - - /* Check that the host's endianness is as expected. */ - vassert(archinfo_host->endness == VexEndnessLE); -diff --git a/VEX/priv/host_amd64_maddf.c b/VEX/priv/host_amd64_maddf.c -new file mode 100644 -index 000000000000..579abb4389ac ---- /dev/null -+++ b/VEX/priv/host_amd64_maddf.c -@@ -0,0 +1,35 @@ -+ -+/*---------------------------------------------------------------*/ -+/*--- begin host_amd64_maddf.c ---*/ -+/*---------------------------------------------------------------*/ -+ -+/* -+ Compute x * y + z as ternary operation with intrinsics. -+*/ -+ -+ -+#include "libvex_basictypes.h" -+#include "host_amd64_maddf.h" -+ -+#if defined(VGA_amd64) -+void VEX_REGPARM(3) -+ h_amd64_calc_MAddF32_fma4 ( /*OUT*/Float* res, -+ Float* argX, Float* argY, Float* argZ ) -+{ -+ __asm__ ("vfmaddss %3,%2,%1,%0;" : -+ "=x"(*res): "x"(*argX),"x"(*argY), "x"(*argZ)); -+ return ; -+} -+ -+void VEX_REGPARM(3) -+ h_amd64_calc_MAddF64_fma4 ( /*OUT*/Double* res, -+ Double* argX, Double* argY, Double* argZ ) -+{ -+ __asm__ ("vfmaddsd %3,%2,%1,%0;" : -+ "=x"(*res): "x"(*argX),"x"(*argY), "x"(*argZ)); -+ return; -+} -+#endif -+/*---------------------------------------------------------------*/ -+/*--- end host_amd64_maddf.c --*/ -+/*---------------------------------------------------------------*/ -diff --git a/VEX/priv/host_amd64_maddf.h b/VEX/priv/host_amd64_maddf.h -new file mode 100644 -index 000000000000..b592a44e17d7 ---- /dev/null -+++ b/VEX/priv/host_amd64_maddf.h -@@ -0,0 +1,32 @@ -+ -+/*---------------------------------------------------------------*/ -+/*--- begin host_amd64_maddf.h ---*/ -+/*---------------------------------------------------------------*/ -+ -+/* -+ Compute x * y + z as ternary operation with intrinsics -+*/ -+ -+/* Generic helper functions for doing FMA, i.e. compute x * y + z -+ as ternary operation. -+ These are purely back-end entities and cannot be seen/referenced -+ from IR. */ -+ -+#ifndef __VEX_HOST_AMD64_MADDF_H -+#define __VEX_HOST_AMD64_MADDF_H -+ -+#include "libvex_basictypes.h" -+ -+#if defined(VGA_amd64) -+extern VEX_REGPARM(3) -+ void h_amd64_calc_MAddF32_fma4 ( /*OUT*/Float*, Float*, Float*, Float* ); -+ -+extern VEX_REGPARM(3) -+ void h_amd64_calc_MAddF64_fma4 ( /*OUT*/Double*, Double*, Double*, -+ Double* ); -+#endif -+#endif /* ndef __VEX_HOST_AMD64_MADDF_H */ -+ -+/*---------------------------------------------------------------*/ -+/*--- end host_amd64_maddf.h --*/ -+/*---------------------------------------------------------------*/ -diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c -index 482047c7aa1a..eda2fe6eeecd 100644 ---- a/VEX/priv/main_main.c -+++ b/VEX/priv/main_main.c -@@ -1650,6 +1650,8 @@ static const HChar* show_hwcaps_amd64 ( UInt hwcaps ) - { VEX_HWCAPS_AMD64_F16C, "f16c" }, - { VEX_HWCAPS_AMD64_RDRAND, "rdrand" }, - { VEX_HWCAPS_AMD64_RDSEED, "rdseed" }, -+ { VEX_HWCAPS_AMD64_FMA3, "fma" }, /*fma to keep the same naming as /proc/cpuinfo*/ -+ { VEX_HWCAPS_AMD64_FMA4, "fma4" }, - }; - /* Allocate a large enough buffer */ - static HChar buf[sizeof prefix + -diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h -index 42c013c1ece6..15e2d39deb10 100644 ---- a/VEX/pub/libvex.h -+++ b/VEX/pub/libvex.h -@@ -101,6 +101,8 @@ typedef - #define VEX_HWCAPS_AMD64_RDRAND (1<<13) /* RDRAND instructions */ - #define VEX_HWCAPS_AMD64_F16C (1<<14) /* F16C instructions */ - #define VEX_HWCAPS_AMD64_RDSEED (1<<15) /* RDSEED instructions */ -+#define VEX_HWCAPS_AMD64_FMA3 (1<<16) /* FMA3 instructions */ -+#define VEX_HWCAPS_AMD64_FMA4 (1<<17) /* FMA4 instructions */ - - /* ppc32: baseline capability is integer only */ - #define VEX_HWCAPS_PPC32_F (1<<8) /* basic (non-optional) FP */ -diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c -index 079383651226..81fb810643cf 100644 ---- a/coregrind/m_machine.c -+++ b/coregrind/m_machine.c -@@ -984,6 +984,7 @@ Bool VG_(machine_get_hwcaps)( void ) - #elif defined(VGA_amd64) - { Bool have_sse3, have_ssse3, have_cx8, have_cx16; - Bool have_lzcnt, have_avx, have_bmi, have_avx2; -+ Bool have_fma3, have_fma4; - Bool have_rdtscp, have_rdrand, have_f16c, have_rdseed; - UInt eax, ebx, ecx, edx, max_basic, max_extended; - ULong xgetbv_0 = 0; -@@ -992,7 +993,8 @@ Bool VG_(machine_get_hwcaps)( void ) - - have_sse3 = have_ssse3 = have_cx8 = have_cx16 - = have_lzcnt = have_avx = have_bmi = have_avx2 -- = have_rdtscp = have_rdrand = have_f16c = have_rdseed = False; -+ = have_rdtscp = have_rdrand = have_f16c = have_rdseed -+ = have_fma3 = have_fma4 = False; - - eax = ebx = ecx = edx = max_basic = max_extended = 0; - -@@ -1022,7 +1024,7 @@ Bool VG_(machine_get_hwcaps)( void ) - // we assume that SSE1 and SSE2 are available by default - have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */ - have_ssse3 = (ecx & (1<<9)) != 0; /* True => have Sup SSE3 insns */ -- // fma is ecx:12 -+ have_fma3 = (ecx & (1<<12))!= 0; /* True => have fma3 insns */ - // sse41 is ecx:19 - // sse42 is ecx:20 - // xsave is ecx:26 -@@ -1032,7 +1034,7 @@ Bool VG_(machine_get_hwcaps)( void ) - have_rdrand = (ecx & (1<<30)) != 0; /* True => have RDRAND insns */ - - have_avx = False; -- /* have_fma = False; */ -+ - if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) { - /* Processor supports AVX instructions and XGETBV is enabled - by OS and AVX instructions are enabled by the OS. */ -@@ -1059,9 +1061,6 @@ Bool VG_(machine_get_hwcaps)( void ) - if (ebx2 == 576 && eax2 == 256) { - have_avx = True; - } -- /* have_fma = (ecx & (1<<12)) != 0; */ -- /* have_fma: Probably correct, but gcc complains due to -- unusedness. */ - } - } - -@@ -1089,6 +1088,11 @@ Bool VG_(machine_get_hwcaps)( void ) - have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */ - } - -+ if (max_extended >= 0x80000001) { -+ VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); -+ have_fma4= (ecx & (1<<16)) != 0; /* True => have fma4 */ -+ } -+ - /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */ - have_bmi = False; - have_avx2 = False; -@@ -1120,7 +1124,9 @@ Bool VG_(machine_get_hwcaps)( void ) - | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0) - | (have_f16c ? VEX_HWCAPS_AMD64_F16C : 0) - | (have_rdrand ? VEX_HWCAPS_AMD64_RDRAND : 0) -- | (have_rdseed ? VEX_HWCAPS_AMD64_RDSEED : 0); -+ | (have_rdseed ? VEX_HWCAPS_AMD64_RDSEED : 0) -+ | (have_fma3 ? VEX_HWCAPS_AMD64_FMA3 : 0) -+ | (have_fma4 ? VEX_HWCAPS_AMD64_FMA4 : 0); - - VG_(machine_get_cache_info)(&vai); - diff --git a/valgrind-3.22.0-amd64-redir-strcmp.patch b/valgrind-3.22.0-amd64-redir-strcmp.patch deleted file mode 100644 index 6e37ba1..0000000 --- a/valgrind-3.22.0-amd64-redir-strcmp.patch +++ /dev/null @@ -1,66 +0,0 @@ -diff --git a/coregrind/m_redir.c b/coregrind/m_redir.c -index cef241b4f85aad23..ce6073a62190a639 100644 ---- a/coregrind/m_redir.c -+++ b/coregrind/m_redir.c -@@ -1410,6 +1410,15 @@ void VG_(redir_initialise) ( void ) - complain_about_stripped_glibc_ldso - # endif - ); -+ add_hardwired_spec( -+ "ld-linux-x86-64.so.2", "strcmp", -+ (Addr)&VG_(amd64_linux_REDIR_FOR_strcmp), -+# ifndef GLIBC_MANDATORY_STRLEN_REDIRECT -+ NULL -+# else -+ complain_about_stripped_glibc_ldso -+# endif -+ ); - } - - # elif defined(VGP_ppc32_linux) -diff --git a/coregrind/m_trampoline.S b/coregrind/m_trampoline.S -index da96972323824bdb..716afb2121b84b57 100644 ---- a/coregrind/m_trampoline.S -+++ b/coregrind/m_trampoline.S -@@ -218,6 +218,29 @@ VG_(amd64_linux_REDIR_FOR_strlen): - .LfnE5: - .size VG_(amd64_linux_REDIR_FOR_strlen), .-VG_(amd64_linux_REDIR_FOR_strlen) - -+/* There's no particular reason that this needs to be handwritten -+ assembly, but since that's what this file contains, here's a -+ simple strcmp.) -+*/ -+.global VG_(amd64_linux_REDIR_FOR_strcmp) -+.type VG_(amd64_linux_REDIR_FOR_strcmp), @function -+VG_(amd64_linux_REDIR_FOR_strcmp): -+ xorl %ecx, %ecx -+1: -+ movzbl (%rdi, %rcx), %eax -+ movzbl (%rsi, %rcx), %edx -+ testb %al, %al -+ jne 2f -+ movl %edx, %eax -+ negl %eax -+ ret -+2: -+ incq %rcx -+ subl %edx, %eax -+ je 1b -+ ret -+.size VG_(amd64_linux_REDIR_FOR_strcmp), .-VG_(amd64_linux_REDIR_FOR_strcmp) -+ - .global VG_(amd64_linux_REDIR_FOR_index) - .type VG_(amd64_linux_REDIR_FOR_index), @function - VG_(amd64_linux_REDIR_FOR_index): -diff --git a/coregrind/pub_core_trampoline.h b/coregrind/pub_core_trampoline.h -index 54c575a7233a9451..c11060139e576616 100644 ---- a/coregrind/pub_core_trampoline.h -+++ b/coregrind/pub_core_trampoline.h -@@ -77,6 +77,7 @@ extern Addr VG_(amd64_linux_REDIR_FOR_vgettimeofday); - extern Addr VG_(amd64_linux_REDIR_FOR_vtime); - extern Addr VG_(amd64_linux_REDIR_FOR_vgetcpu); - extern UInt VG_(amd64_linux_REDIR_FOR_strlen)( void* ); -+extern UInt VG_(amd64_linux_REDIR_FOR_strcmp)( void*, void* ); - extern Char* VG_(amd64_linux_REDIR_FOR_index) ( const Char*, Int ); - #endif - diff --git a/valgrind-3.22.0-fchmodat2.patch b/valgrind-3.22.0-fchmodat2.patch deleted file mode 100644 index b1ba313..0000000 --- a/valgrind-3.22.0-fchmodat2.patch +++ /dev/null @@ -1,206 +0,0 @@ -From a43e62dddcf51ec6578a90c5988a41e856b44b05 Mon Sep 17 00:00:00 2001 -From: Mark Wielaard -Date: Sat, 18 Nov 2023 21:17:02 +0100 -Subject: [PATCH] Add fchmodat2 syscall on linux - -fchmodat2 is a new syscall on linux 6.6. It is a variant of fchmodat -that takes an extra flags argument. - -https://bugs.kde.org/show_bug.cgi?id=477198 - -(cherry picked from commit 372d09fd9a8d76847c81092ebff71c80fd6c145d) ---- - NEWS | 1 + - coregrind/m_syswrap/priv_syswrap-linux.h | 3 +++ - coregrind/m_syswrap/syswrap-amd64-linux.c | 2 ++ - coregrind/m_syswrap/syswrap-arm-linux.c | 2 ++ - coregrind/m_syswrap/syswrap-arm64-linux.c | 2 ++ - coregrind/m_syswrap/syswrap-linux.c | 11 +++++++++++ - coregrind/m_syswrap/syswrap-mips32-linux.c | 2 ++ - coregrind/m_syswrap/syswrap-mips64-linux.c | 1 + - coregrind/m_syswrap/syswrap-nanomips-linux.c | 1 + - coregrind/m_syswrap/syswrap-ppc32-linux.c | 2 ++ - coregrind/m_syswrap/syswrap-ppc64-linux.c | 2 ++ - coregrind/m_syswrap/syswrap-s390x-linux.c | 2 ++ - coregrind/m_syswrap/syswrap-x86-linux.c | 2 ++ - include/vki/vki-scnums-shared-linux.h | 2 ++ - 14 files changed, 35 insertions(+) - -diff --git a/coregrind/m_syswrap/priv_syswrap-linux.h b/coregrind/m_syswrap/priv_syswrap-linux.h -index 7c9decf5a..798c456c9 100644 ---- a/coregrind/m_syswrap/priv_syswrap-linux.h -+++ b/coregrind/m_syswrap/priv_syswrap-linux.h -@@ -331,6 +331,9 @@ DECL_TEMPLATE(linux, sys_openat2); - // Linux-specific (new in Linux 5.14) - DECL_TEMPLATE(linux, sys_memfd_secret); - -+// Since Linux 6.6 -+DECL_TEMPLATE(linux, sys_fchmodat2); -+ - /* --------------------------------------------------------------------- - Wrappers for sockets and ipc-ery. These are split into standalone - procedures because x86-linux hides them inside multiplexors -diff --git a/coregrind/m_syswrap/syswrap-amd64-linux.c b/coregrind/m_syswrap/syswrap-amd64-linux.c -index 008600798..fe17d118b 100644 ---- a/coregrind/m_syswrap/syswrap-amd64-linux.c -+++ b/coregrind/m_syswrap/syswrap-amd64-linux.c -@@ -886,6 +886,8 @@ static SyscallTableEntry syscall_table[] = { - LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 - - LINXY(__NR_memfd_secret, sys_memfd_secret), // 447 -+ -+ LINX_(__NR_fchmodat2, sys_fchmodat2), // 452 - }; - - SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) -diff --git a/coregrind/m_syswrap/syswrap-arm-linux.c b/coregrind/m_syswrap/syswrap-arm-linux.c -index 9a7a1e0d2..811931d3b 100644 ---- a/coregrind/m_syswrap/syswrap-arm-linux.c -+++ b/coregrind/m_syswrap/syswrap-arm-linux.c -@@ -1059,6 +1059,8 @@ static SyscallTableEntry syscall_main_table[] = { - LINX_(__NR_faccessat2, sys_faccessat2), // 439 - - LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 -+ -+ LINX_(__NR_fchmodat2, sys_fchmodat2), // 452 - }; - - -diff --git a/coregrind/m_syswrap/syswrap-arm64-linux.c b/coregrind/m_syswrap/syswrap-arm64-linux.c -index 6af7bab83..3307bc2ca 100644 ---- a/coregrind/m_syswrap/syswrap-arm64-linux.c -+++ b/coregrind/m_syswrap/syswrap-arm64-linux.c -@@ -840,6 +840,8 @@ static SyscallTableEntry syscall_main_table[] = { - LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 - - LINXY(__NR_memfd_secret, sys_memfd_secret), // 447 -+ -+ LINX_(__NR_fchmodat2, sys_fchmodat2), // 452 - }; - - -diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c -index d571fc327..efa47f2e6 100644 ---- a/coregrind/m_syswrap/syswrap-linux.c -+++ b/coregrind/m_syswrap/syswrap-linux.c -@@ -6059,6 +6059,17 @@ PRE(sys_fchmodat) - PRE_MEM_RASCIIZ( "fchmodat(path)", ARG2 ); - } - -+PRE(sys_fchmodat2) -+{ -+ PRINT("sys_fchmodat2 ( %ld, %#" FMT_REGWORD "x(%s), %" FMT_REGWORD "u, %" -+ FMT_REGWORD "u )", -+ SARG1, ARG2, (HChar*)(Addr)ARG2, ARG3, ARG4); -+ PRE_REG_READ4(long, "fchmodat2", -+ int, dfd, const char *, path, vki_mode_t, mode, -+ unsigned int, flags); -+ PRE_MEM_RASCIIZ( "fchmodat2(pathname)", ARG2 ); -+} -+ - PRE(sys_faccessat) - { - PRINT("sys_faccessat ( %ld, %#" FMT_REGWORD "x(%s), %ld )", -diff --git a/coregrind/m_syswrap/syswrap-mips32-linux.c b/coregrind/m_syswrap/syswrap-mips32-linux.c -index 6268a00dd..74a1f6eac 100644 ---- a/coregrind/m_syswrap/syswrap-mips32-linux.c -+++ b/coregrind/m_syswrap/syswrap-mips32-linux.c -@@ -1143,6 +1143,8 @@ static SyscallTableEntry syscall_main_table[] = { - LINX_ (__NR_faccessat2, sys_faccessat2), // 439 - - LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 -+ -+ LINX_(__NR_fchmodat2, sys_fchmodat2), // 452 - }; - - SyscallTableEntry* ML_(get_linux_syscall_entry) (UInt sysno) -diff --git a/coregrind/m_syswrap/syswrap-mips64-linux.c b/coregrind/m_syswrap/syswrap-mips64-linux.c -index 6cdf25893..4e8508b7a 100644 ---- a/coregrind/m_syswrap/syswrap-mips64-linux.c -+++ b/coregrind/m_syswrap/syswrap-mips64-linux.c -@@ -820,6 +820,7 @@ static SyscallTableEntry syscall_main_table[] = { - LINXY (__NR_close_range, sys_close_range), - LINX_ (__NR_faccessat2, sys_faccessat2), - LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), -+ LINX_ (__NR_fchmodat2, sys_fchmodat2), - }; - - SyscallTableEntry * ML_(get_linux_syscall_entry) ( UInt sysno ) -diff --git a/coregrind/m_syswrap/syswrap-nanomips-linux.c b/coregrind/m_syswrap/syswrap-nanomips-linux.c -index d724cde74..7859900c1 100644 ---- a/coregrind/m_syswrap/syswrap-nanomips-linux.c -+++ b/coregrind/m_syswrap/syswrap-nanomips-linux.c -@@ -829,6 +829,7 @@ static SyscallTableEntry syscall_main_table[] = { - LINXY (__NR_close_range, sys_close_range), - LINX_ (__NR_faccessat2, sys_faccessat2), - LINXY (__NR_epoll_pwait2, sys_epoll_pwait2), -+ LINX_ (__NR_fchmodat2, sys_fchmodat2), - }; - - SyscallTableEntry* ML_(get_linux_syscall_entry) (UInt sysno) -diff --git a/coregrind/m_syswrap/syswrap-ppc32-linux.c b/coregrind/m_syswrap/syswrap-ppc32-linux.c -index c0cfef235..1e19116ee 100644 ---- a/coregrind/m_syswrap/syswrap-ppc32-linux.c -+++ b/coregrind/m_syswrap/syswrap-ppc32-linux.c -@@ -1063,6 +1063,8 @@ static SyscallTableEntry syscall_table[] = { - LINX_(__NR_faccessat2, sys_faccessat2), // 439 - - LINXY (__NR_epoll_pwait2, sys_epoll_pwait2), // 441 -+ -+ LINX_ (__NR_fchmodat2, sys_fchmodat2), // 452 - }; - - SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) -diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c -index f5976f30c..1097212a4 100644 ---- a/coregrind/m_syswrap/syswrap-ppc64-linux.c -+++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c -@@ -1032,6 +1032,8 @@ static SyscallTableEntry syscall_table[] = { - LINX_(__NR_faccessat2, sys_faccessat2), // 439 - - LINXY (__NR_epoll_pwait2, sys_epoll_pwait2), // 441 -+ -+ LINX_ (__NR_fchmodat2, sys_fchmodat2), // 452 - }; - - SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) -diff --git a/coregrind/m_syswrap/syswrap-s390x-linux.c b/coregrind/m_syswrap/syswrap-s390x-linux.c -index afba154e7..3588672c7 100644 ---- a/coregrind/m_syswrap/syswrap-s390x-linux.c -+++ b/coregrind/m_syswrap/syswrap-s390x-linux.c -@@ -873,6 +873,8 @@ static SyscallTableEntry syscall_table[] = { - LINX_(__NR_faccessat2, sys_faccessat2), // 439 - - LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 -+ -+ LINX_ (__NR_fchmodat2, sys_fchmodat2), // 452 - }; - - SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) -diff --git a/coregrind/m_syswrap/syswrap-x86-linux.c b/coregrind/m_syswrap/syswrap-x86-linux.c -index da4fd8fa2..58badc6b0 100644 ---- a/coregrind/m_syswrap/syswrap-x86-linux.c -+++ b/coregrind/m_syswrap/syswrap-x86-linux.c -@@ -1658,6 +1658,8 @@ static SyscallTableEntry syscall_table[] = { - LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 - - LINXY(__NR_memfd_secret, sys_memfd_secret), // 447 -+ -+ LINX_(__NR_fchmodat2, sys_fchmodat2), // 452 - }; - - SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) -diff --git a/include/vki/vki-scnums-shared-linux.h b/include/vki/vki-scnums-shared-linux.h -index 542382b53..a4cd87149 100644 ---- a/include/vki/vki-scnums-shared-linux.h -+++ b/include/vki/vki-scnums-shared-linux.h -@@ -50,4 +50,6 @@ - - #define __NR_memfd_secret 447 - -+#define __NR_fchmodat2 452 -+ - #endif --- -2.39.3 - diff --git a/valgrind-3.22.0-gcc-builtin_strcmp-128-256-bit-vector.patch b/valgrind-3.22.0-gcc-builtin_strcmp-128-256-bit-vector.patch deleted file mode 100644 index cbcf6e0..0000000 --- a/valgrind-3.22.0-gcc-builtin_strcmp-128-256-bit-vector.patch +++ /dev/null @@ -1,355 +0,0 @@ -From 316ef0d22b6f41722cc71316a6b59e60e7c9576c Mon Sep 17 00:00:00 2001 -From: Julian Seward -Date: Mon, 11 Mar 2024 17:53:14 +0100 -Subject: [PATCH] Handle gcc __builtin_strcmp using 128/256 bit vectors with - sse4.1, avx/avx2 - -* amd64 front end: redo the translation into IR for PTEST, so as to - use only IROps which we know Memcheck can do exact instrumentation - for. Handling for both the 128- and 256-bit cases is has been - changed. - -* ir_opt.c: add some constant folding rules to support the above. In - particular, for the case `ptest %reg, %reg` (the same reg twice), we - want rflags.C to be set to a defined-1 even if %reg is completely - undefined. Doing that requires folding `x and not(x)` to zero when - x has type V128 or V256. - -* memcheck/tests/amd64/rh2257546_{128,256}.c: new test cases - -https://bugzilla.redhat.com/show_bug.cgi?id=2257546 ---- - VEX/priv/guest_amd64_toIR.c | 64 +++++++++---------- - VEX/priv/ir_opt.c | 34 ++++++++++ - memcheck/tests/amd64/Makefile.am | 7 +- - memcheck/tests/amd64/rh2257546_128.c | 32 ++++++++++ - memcheck/tests/amd64/rh2257546_128.stderr.exp | 0 - memcheck/tests/amd64/rh2257546_128.stdout.exp | 1 + - memcheck/tests/amd64/rh2257546_128.vgtest | 2 + - memcheck/tests/amd64/rh2257546_256.c | 32 ++++++++++ - memcheck/tests/amd64/rh2257546_256.stderr.exp | 0 - memcheck/tests/amd64/rh2257546_256.stdout.exp | 1 + - memcheck/tests/amd64/rh2257546_256.vgtest | 3 + - 11 files changed, 141 insertions(+), 35 deletions(-) - create mode 100644 memcheck/tests/amd64/rh2257546_128.c - create mode 100644 memcheck/tests/amd64/rh2257546_128.stderr.exp - create mode 100644 memcheck/tests/amd64/rh2257546_128.stdout.exp - create mode 100644 memcheck/tests/amd64/rh2257546_128.vgtest - create mode 100644 memcheck/tests/amd64/rh2257546_256.c - create mode 100644 memcheck/tests/amd64/rh2257546_256.stderr.exp - create mode 100644 memcheck/tests/amd64/rh2257546_256.stdout.exp - create mode 100644 memcheck/tests/amd64/rh2257546_256.vgtest - -diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c -index 0414aa5c5..d7c25042d 100644 ---- a/VEX/priv/guest_amd64_toIR.c -+++ b/VEX/priv/guest_amd64_toIR.c -@@ -16826,13 +16826,18 @@ static Long dis_VBLENDV_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta, - - static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign ) - { -- /* Set Z=1 iff (vecE & vecG) == 0 -- Set C=1 iff (vecE & not vecG) == 0 -+ /* Set Z=1 iff (vecE & vecG) == 0--(128)--0 -+ Set C=1 iff (vecE & not vecG) == 0--(128)--0 -+ -+ For the case `sign == 0`, be careful to use only IROps that can be -+ instrumented exactly by memcheck. This is because PTEST is used for -+ __builtin_strcmp in gcc14. See -+ https://bugzilla.redhat.com/show_bug.cgi?id=2257546 - */ - - /* andV, andnV: vecE & vecG, vecE and not(vecG) */ - -- /* andV resp. andnV, reduced to 64-bit values, by or-ing the top -+ /* andV resp. andnV, are reduced to 64-bit values by or-ing the top - and bottom 64-bits together. It relies on this trick: - - InterleaveLO64x2([a,b],[c,d]) == [b,d] hence -@@ -16862,11 +16867,13 @@ static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign ) - binop(Iop_InterleaveHI64x2, - mkexpr(andnV), mkexpr(andnV))))); - -+ // Make z64 and c64 be either all-0s or all-1s - IRTemp z64 = newTemp(Ity_I64); - IRTemp c64 = newTemp(Ity_I64); -+ - if (sign == 64) { -- /* When only interested in the most significant bit, just shift -- arithmetically right and negate. */ -+ /* When only interested in the most significant bit, just copy bit 63 -+ into all bit positions, then invert. */ - assign(z64, - unop(Iop_Not64, - binop(Iop_Sar64, mkexpr(and64), mkU8(63)))); -@@ -16874,37 +16881,28 @@ static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign ) - assign(c64, - unop(Iop_Not64, - binop(Iop_Sar64, mkexpr(andn64), mkU8(63)))); -- } else { -- if (sign == 32) { -- /* When interested in bit 31 and bit 63, mask those bits and -- fallthrough into the PTEST handling. */ -- IRTemp t0 = newTemp(Ity_I64); -- IRTemp t1 = newTemp(Ity_I64); -- IRTemp t2 = newTemp(Ity_I64); -- assign(t0, mkU64(0x8000000080000000ULL)); -- assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0))); -- assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0))); -- and64 = t1; -- andn64 = t2; -- } -- /* Now convert and64, andn64 to all-zeroes or all-1s, so we can -- slice out the Z and C bits conveniently. We use the standard -- trick all-zeroes -> all-zeroes, anything-else -> all-ones -- done by "(x | -x) >>s (word-size - 1)". -- */ -+ } else if (sign == 32) { -+ /* If we're interested into bits 63 and 31, OR bit 31 into bit 63, copy -+ bit 63 into all bit positions, then invert. */ -+ IRTemp and3264 = newTemp(Ity_I64); -+ assign(and3264, binop(Iop_Or64, mkexpr(and64), -+ binop(Iop_Shl64, mkexpr(and64), mkU8(32)))); - assign(z64, - unop(Iop_Not64, -- binop(Iop_Sar64, -- binop(Iop_Or64, -- binop(Iop_Sub64, mkU64(0), mkexpr(and64)), -- mkexpr(and64)), mkU8(63)))); -+ binop(Iop_Sar64, mkexpr(and3264), mkU8(63)))); - -+ IRTemp andn3264 = newTemp(Ity_I64); -+ assign(andn3264, binop(Iop_Or64, mkexpr(andn64), -+ binop(Iop_Shl64, mkexpr(andn64), mkU8(32)))); - assign(c64, - unop(Iop_Not64, -- binop(Iop_Sar64, -- binop(Iop_Or64, -- binop(Iop_Sub64, mkU64(0), mkexpr(andn64)), -- mkexpr(andn64)), mkU8(63)))); -+ binop(Iop_Sar64, mkexpr(andn3264), mkU8(63)))); -+ } else { -+ vassert(sign == 0); -+ assign(z64, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(and64), mkU64(0)), -+ mkU64(~0ULL), mkU64(0ULL))); -+ assign(c64, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(andn64), mkU64(0)), -+ mkU64(~0ULL), mkU64(0ULL))); - } - - /* And finally, slice out the Z and C flags and set the flags -@@ -16966,9 +16964,7 @@ static Long dis_xTESTy_128 ( const VexAbiInfo* vbi, Prefix pfx, - IRTemp andnV = newTemp(Ity_V128); - assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG))); - assign(andnV, binop(Iop_AndV128, -- mkexpr(vecE), -- binop(Iop_XorV128, mkexpr(vecG), -- mkV128(0xFFFF)))); -+ mkexpr(vecE), unop(Iop_NotV128, mkexpr(vecG)))); - - finish_xTESTy ( andV, andnV, sign ); - return delta; -diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c -index f918e9f85..6453f4fdf 100644 ---- a/VEX/priv/ir_opt.c -+++ b/VEX/priv/ir_opt.c -@@ -1693,6 +1693,17 @@ static IRExpr* fold_Expr_WRK ( IRExpr** env, IRExpr* e ) - break; - } - -+ /* Similarly .. */ -+ case Iop_V256toV128_0: case Iop_V256toV128_1: { -+ UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256; -+ if (v256 == 0x00000000) { -+ e2 = IRExpr_Const(IRConst_V128(0x0000)); -+ } else { -+ goto unhandled; -+ } -+ break; -+ } -+ - case Iop_ZeroHI64ofV128: { - /* Could do better here -- only need to look at the bottom 64 bits - of the argument, really. */ -@@ -2129,6 +2140,8 @@ static IRExpr* fold_Expr_WRK ( IRExpr** env, IRExpr* e ) - } - - /* -- V128 stuff -- */ -+ case Iop_InterleaveLO64x2: // This, and the HI64, are created -+ case Iop_InterleaveHI64x2: // by the amd64 PTEST translation - case Iop_InterleaveLO8x16: { - /* This turns up a lot in Memcheck instrumentation of - Icc generated code. I don't know why. */ -@@ -2321,6 +2334,27 @@ static IRExpr* fold_Expr_WRK ( IRExpr** env, IRExpr* e ) - break; - } - } -+ /* AndV128( x, NotV128( x ) ) ==> 0...0 and -+ AndV256( x, NotV256( x ) ) ==> 0...0 -+ This is generated by amd64 `ptest %xmmReg, %xmmReg` -+ (the same reg both times) -+ See https://bugzilla.redhat.com/show_bug.cgi?id=2257546 */ -+ if (e->Iex.Binop.op == Iop_AndV128 -+ || e->Iex.Binop.op == Iop_AndV256) { -+ Bool isV256 = e->Iex.Binop.op == Iop_AndV256; -+ IRExpr* x1 = chase(env, e->Iex.Binop.arg1); -+ IRExpr* rhs = chase(env, e->Iex.Binop.arg2); -+ if (x1 && rhs -+ && rhs->tag == Iex_Unop -+ && rhs->Iex.Unop.op == (isV256 ? Iop_NotV256 -+ : Iop_NotV128)) { -+ IRExpr* x2 = chase(env, rhs->Iex.Unop.arg); -+ if (x2 && sameIRExprs(env, x1, x2)) { -+ e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); -+ break; -+ } -+ } -+ } - break; - - case Iop_OrV128: -diff --git a/memcheck/tests/amd64/Makefile.am b/memcheck/tests/amd64/Makefile.am -index 0d2812dd8..906b8f393 100644 ---- a/memcheck/tests/amd64/Makefile.am -+++ b/memcheck/tests/amd64/Makefile.am -@@ -20,6 +20,10 @@ EXTRA_DIST = \ - insn-pmovmskb.stderr.exp-clang \ - more_x87_fp.stderr.exp more_x87_fp.stdout.exp more_x87_fp.vgtest \ - pcmpgt.stderr.exp pcmpgt.vgtest \ -+ rh2257546_128.vgtest \ -+ rh2257546_128.stderr.exp rh2257546_128.stdout.exp \ -+ rh2257546_256.vgtest \ -+ rh2257546_256.stderr.exp rh2257546_256.stdout.exp \ - sh-mem-vec128-plo-no.vgtest \ - sh-mem-vec128-plo-no.stderr.exp \ - sh-mem-vec128-plo-no.stdout.exp \ -@@ -46,12 +50,13 @@ check_PROGRAMS = \ - insn-bsfl \ - insn-pmovmskb \ - pcmpgt \ -+ rh2257546_128 \ - sh-mem-vec128 \ - sse_memory \ - xor-undef-amd64 - - if BUILD_AVX_TESTS -- check_PROGRAMS += sh-mem-vec256 xsave-avx -+ check_PROGRAMS += rh2257546_256 sh-mem-vec256 xsave-avx - endif - if HAVE_ASM_CONSTRAINT_P - check_PROGRAMS += insn-pcmpistri -diff --git a/memcheck/tests/amd64/rh2257546_128.c b/memcheck/tests/amd64/rh2257546_128.c -new file mode 100644 -index 000000000..a405aa775 ---- /dev/null -+++ b/memcheck/tests/amd64/rh2257546_128.c -@@ -0,0 +1,32 @@ -+ -+// This should run on memcheck without reporting an undef-value error. -+// See https://bugzilla.redhat.com/show_bug.cgi?id=2257546 -+ -+#include -+#include -+ -+int main ( void ) -+{ -+ char* c1 = malloc(16); -+ c1[0] = 'x'; c1[1] = 'y'; c1[2] = 'x'; c1[3] = 0; -+ -+ char* c2 = "foobarxyzzyfoobarzyzzy"; // strlen > 16 -+ -+ long long int res; -+ __asm__ __volatile__( -+ "movdqu (%1), %%xmm4" "\n\t" -+ "movdqu (%2), %%xmm5" "\n\t" -+ "pxor %%xmm4, %%xmm5" "\n\t" -+ "ptest %%xmm5, %%xmm5" "\n\t" -+ "je zzz1f" "\n\t" -+ "mov $99, %0" "\n\t" -+ "jmp zzzafter" "\n" -+ "zzz1f:" "\n\t" -+ "mov $88, %0" "\n" -+ "zzzafter:" "\n\t" -+ : /*OUT*/"=r"(res) : /*IN*/"r"(c1),"r"(c2) : /*TRASH*/"xmm4","xmm5","cc" -+ ); -+ printf("res = %lld\n", res); -+ free(c1); -+ return 0; -+} -diff --git a/memcheck/tests/amd64/rh2257546_128.stderr.exp b/memcheck/tests/amd64/rh2257546_128.stderr.exp -new file mode 100644 -index 000000000..e69de29bb -diff --git a/memcheck/tests/amd64/rh2257546_128.stdout.exp b/memcheck/tests/amd64/rh2257546_128.stdout.exp -new file mode 100644 -index 000000000..454131089 ---- /dev/null -+++ b/memcheck/tests/amd64/rh2257546_128.stdout.exp -@@ -0,0 +1 @@ -+res = 99 -diff --git a/memcheck/tests/amd64/rh2257546_128.vgtest b/memcheck/tests/amd64/rh2257546_128.vgtest -new file mode 100644 -index 000000000..94414cd6f ---- /dev/null -+++ b/memcheck/tests/amd64/rh2257546_128.vgtest -@@ -0,0 +1,2 @@ -+prog: rh2257546_128 -+vgopts: -q -diff --git a/memcheck/tests/amd64/rh2257546_256.c b/memcheck/tests/amd64/rh2257546_256.c -new file mode 100644 -index 000000000..235005ca6 ---- /dev/null -+++ b/memcheck/tests/amd64/rh2257546_256.c -@@ -0,0 +1,32 @@ -+ -+// This should run on memcheck without reporting an undef-value error. -+// See https://bugzilla.redhat.com/show_bug.cgi?id=2257546 -+ -+#include -+#include -+ -+int main ( void ) -+{ -+ char* c1 = malloc(32); -+ c1[0] = 'x'; c1[1] = 'y'; c1[2] = 'x'; c1[3] = 0; -+ -+ char* c2 = "foobarxyzzyfoobarzyzzyandawholelotmoretoo"; // strlen > 32 -+ -+ long long int res; -+ __asm__ __volatile__( -+ "vmovdqu (%1), %%ymm4" "\n\t" -+ "vmovdqu (%2), %%ymm5" "\n\t" -+ "vpxor %%ymm4, %%ymm5, %%ymm5" "\n\t" -+ "vptest %%ymm5, %%ymm5" "\n\t" -+ "je zzz1f" "\n\t" -+ "mov $99, %0" "\n\t" -+ "jmp zzzafter" "\n" -+ "zzz1f:" "\n\t" -+ "mov $88, %0" "\n" -+ "zzzafter:" "\n\t" -+ : /*OUT*/"=r"(res) : /*IN*/"r"(c1),"r"(c2) : /*TRASH*/"ymm4","ymm5","cc" -+ ); -+ printf("res = %lld\n", res); -+ free(c1); -+ return 0; -+} -diff --git a/memcheck/tests/amd64/rh2257546_256.stderr.exp b/memcheck/tests/amd64/rh2257546_256.stderr.exp -new file mode 100644 -index 000000000..e69de29bb -diff --git a/memcheck/tests/amd64/rh2257546_256.stdout.exp b/memcheck/tests/amd64/rh2257546_256.stdout.exp -new file mode 100644 -index 000000000..454131089 ---- /dev/null -+++ b/memcheck/tests/amd64/rh2257546_256.stdout.exp -@@ -0,0 +1 @@ -+res = 99 -diff --git a/memcheck/tests/amd64/rh2257546_256.vgtest b/memcheck/tests/amd64/rh2257546_256.vgtest -new file mode 100644 -index 000000000..86eef8fe2 ---- /dev/null -+++ b/memcheck/tests/amd64/rh2257546_256.vgtest -@@ -0,0 +1,3 @@ -+prereq: test -e rh2257546_256 -+prog: rh2257546_256 -+vgopts: -q --- -2.43.0 - diff --git a/valgrind-3.22.0-gdb-thread-exited.patch b/valgrind-3.22.0-gdb-thread-exited.patch deleted file mode 100644 index 4a4d451..0000000 --- a/valgrind-3.22.0-gdb-thread-exited.patch +++ /dev/null @@ -1,23 +0,0 @@ -commit 7aa63de01a3cda2a346f90be16bd29fd7b410c63 -Author: Mark Wielaard -Date: Mon Jan 22 14:08:30 2024 +0100 - - gdbserver_tests/filter_gdb.in: delete thread exiting - - The hginfo testcase would fail with GDB 14.1 because it would output - an extra line saying: [Thread .... exited] - -diff --git a/gdbserver_tests/filter_gdb.in b/gdbserver_tests/filter_gdb.in -index 16186dfe2c9b..6c85284b7531 100755 ---- a/gdbserver_tests/filter_gdb.in -+++ b/gdbserver_tests/filter_gdb.in -@@ -65,6 +65,9 @@ s/Thread [0-9][0-9]*/Thread ..../g - # delete thread switches - /\[Switching to Thread ....\]/d - -+# delete thread exiting -+/\[Thread .... exited\]/d -+ - # ??? Probably we had a 'Switching ' message in front of - # a frame line ? - s/\(\[Switching to thread [1234] (Thread ....)\]\)#0/\1\n#0/ diff --git a/valgrind-3.22.0-pth_mempcpy_false_races.patch b/valgrind-3.22.0-pth_mempcpy_false_races.patch deleted file mode 100644 index d1d426d..0000000 --- a/valgrind-3.22.0-pth_mempcpy_false_races.patch +++ /dev/null @@ -1,33 +0,0 @@ -commit f47109d30ca319a3bcd4288803e30a473d1506d2 -Author: Mark Wielaard -Date: Fri Apr 12 15:18:51 2024 +0200 - - Add new helgrind race suppression for pth_mempcpy_false_races - - In glibc 2.39+ the internal __printf_buffer_write might use memmove - instead of a memcpy variant. Add an extra suppression variant using - memmove. - - This makes pth_mempcpy_false_races pass again. - - See also https://bugs.kde.org/show_bug.cgi?id=450962 - -diff --git a/glibc-2.X-helgrind.supp.in b/glibc-2.X-helgrind.supp.in -index 2b576b982e42..dce611a09e38 100644 ---- a/glibc-2.X-helgrind.supp.in -+++ b/glibc-2.X-helgrind.supp.in -@@ -63,6 +63,14 @@ - fun:__printf_buffer_write - } - -+{ -+ helgrind-glibc-__printf_buffer_write2 -+ Helgrind:Race -+ fun:*memmove* -+ ... -+ fun:__printf_buffer_write -+} -+ - { - helgrind-glibc2X-005 - Helgrind:Race diff --git a/valgrind-3.22.0-rodata.patch b/valgrind-3.22.0-rodata.patch deleted file mode 100644 index 47f31ed..0000000 --- a/valgrind-3.22.0-rodata.patch +++ /dev/null @@ -1,122 +0,0 @@ -commit 1d00e5ce0fb069911c4b525ec38289fb5d9021b0 -Author: Paul Floyd -Date: Sat Nov 18 08:49:34 2023 +0100 - - Bug 476548 - valgrind 3.22.0 fails on assertion when loading debuginfo file produced by mold - - (cherry picked from commit 9ea4ae66707a4dcc6f4328e11911652e4418c585) - -diff --git a/coregrind/m_debuginfo/image.c b/coregrind/m_debuginfo/image.c -index 02e509071..445f95555 100644 ---- a/coregrind/m_debuginfo/image.c -+++ b/coregrind/m_debuginfo/image.c -@@ -1221,6 +1221,20 @@ Int ML_(img_strcmp_c)(DiImage* img, DiOffT off1, const HChar* str2) - } - } - -+Int ML_(img_strcmp_n)(DiImage* img, DiOffT off1, const HChar* str2, Word n) -+{ -+ ensure_valid(img, off1, 1, "ML_(img_strcmp_c)"); -+ while (n) { -+ UChar c1 = get(img, off1); -+ UChar c2 = *(const UChar*)str2; -+ if (c1 < c2) return -1; -+ if (c1 > c2) return 1; -+ if (c1 == 0) return 0; -+ off1++; str2++; --n; -+ } -+ return 0; -+} -+ - UChar ML_(img_get_UChar)(DiImage* img, DiOffT offset) - { - ensure_valid(img, offset, 1, "ML_(img_get_UChar)"); -diff --git a/coregrind/m_debuginfo/priv_image.h b/coregrind/m_debuginfo/priv_image.h -index a49846f14..c91e49f01 100644 ---- a/coregrind/m_debuginfo/priv_image.h -+++ b/coregrind/m_debuginfo/priv_image.h -@@ -115,6 +115,10 @@ Int ML_(img_strcmp)(DiImage* img, DiOffT off1, DiOffT off2); - cast to HChar before comparison. */ - Int ML_(img_strcmp_c)(DiImage* img, DiOffT off1, const HChar* str2); - -+/* Do strncmp of a C string in the image vs a normal one. Chars are -+ cast to HChar before comparison. */ -+Int ML_(img_strcmp_n)(DiImage* img, DiOffT off1, const HChar* str2, Word n); -+ - /* Do strlen of a C string in the image. */ - SizeT ML_(img_strlen)(DiImage* img, DiOffT off); - -diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c -index fb64ed976..46f8c8343 100644 ---- a/coregrind/m_debuginfo/readelf.c -+++ b/coregrind/m_debuginfo/readelf.c -@@ -2501,8 +2501,7 @@ Bool ML_(read_elf_object) ( struct _DebugInfo* di ) - di->rodata_avma += inrw1->bias; - di->rodata_bias = inrw1->bias; - di->rodata_debug_bias = inrw1->bias; -- } -- else { -+ } else { - BAD(".rodata"); /* should not happen? */ - } - di->rodata_present = True; -@@ -2977,6 +2976,46 @@ Bool ML_(read_elf_object) ( struct _DebugInfo* di ) - return retval; - } - -+static void find_rodata(Word i, Word shnum, DiImage* dimg, struct _DebugInfo* di, DiOffT shdr_dioff, -+ UWord shdr_dent_szB, DiOffT shdr_strtab_dioff, PtrdiffT rw_dbias) -+{ -+ ElfXX_Shdr a_shdr; -+ ElfXX_Shdr a_extra_shdr; -+ ML_(img_get)(&a_shdr, dimg, -+ INDEX_BIS(shdr_dioff, i, shdr_dent_szB), -+ sizeof(a_shdr)); -+ if (di->rodata_present && -+ 0 == ML_(img_strcmp_c)(dimg, shdr_strtab_dioff -+ + a_shdr.sh_name, ".rodata")) { -+ Word sh_size = a_shdr.sh_size; -+ Word j; -+ Word next_addr = a_shdr.sh_addr + a_shdr.sh_size; -+ for (j = i + 1; j < shnum; ++j) { -+ ML_(img_get)(&a_extra_shdr, dimg, -+ INDEX_BIS(shdr_dioff, j, shdr_dent_szB), -+ sizeof(a_shdr)); -+ if (0 == ML_(img_strcmp_n)(dimg, shdr_strtab_dioff -+ + a_extra_shdr.sh_name, ".rodata", 7)) { -+ if (a_extra_shdr.sh_addr == -+ VG_ROUNDUP(next_addr, a_extra_shdr.sh_addralign)) { -+ sh_size = VG_ROUNDUP(sh_size, a_extra_shdr.sh_addralign) + a_extra_shdr.sh_size; -+ } -+ next_addr = a_extra_shdr.sh_addr + a_extra_shdr.sh_size; -+ } else { -+ break; -+ } -+ } -+ vg_assert(di->rodata_size == sh_size); -+ vg_assert(di->rodata_avma + a_shdr.sh_addr + rw_dbias); -+ di->rodata_debug_svma = a_shdr.sh_addr; -+ di->rodata_debug_bias = di->rodata_bias + -+ di->rodata_svma - di->rodata_debug_svma; -+ TRACE_SYMTAB("acquiring .rodata debug svma = %#lx .. %#lx\n", -+ di->rodata_debug_svma, -+ di->rodata_debug_svma + di->rodata_size - 1); -+ TRACE_SYMTAB("acquiring .rodata debug bias = %#lx\n", (UWord)di->rodata_debug_bias); -+ } -+} - Bool ML_(read_elf_debug) ( struct _DebugInfo* di ) - { - Word i, j; -@@ -3391,7 +3430,11 @@ Bool ML_(read_elf_debug) ( struct _DebugInfo* di ) - FIND(text, rx) - FIND(data, rw) - FIND(sdata, rw) -- FIND(rodata, rw) -+ // https://bugs.kde.org/show_bug.cgi?id=476548 -+ // special handling for rodata as adjacent -+ // rodata sections may have been merged in ML_(read_elf_object) -+ //FIND(rodata, rw) -+ find_rodata(i, ehdr_dimg.e_shnum, dimg, di, shdr_dioff, shdr_dent_szB, shdr_strtab_dioff, rw_dbias); - FIND(bss, rw) - FIND(sbss, rw) - diff --git a/valgrind-3.22.0-set_vma_name-supp.patch b/valgrind-3.22.0-set_vma_name-supp.patch deleted file mode 100644 index 42b39a0..0000000 --- a/valgrind-3.22.0-set_vma_name-supp.patch +++ /dev/null @@ -1,24 +0,0 @@ -commit 5505e7b371f26f9f9588f4aa3a156dfa6856b90a -Author: Mark Wielaard -Date: Thu Apr 11 19:48:22 2024 +0200 - - Add __set_vma_name suppression to drd/tests/std_thread2.supp - - glibc uses a atomic var to set whether the kernel supports - PR_SET_VMA_ANON_NAME. This looks like a conflicting access to drd. - Suppress it for this testcase. - -diff --git a/drd/tests/std_thread2.supp b/drd/tests/std_thread2.supp -index 4f30560cb924..22052cf2c935 100644 ---- a/drd/tests/std_thread2.supp -+++ b/drd/tests/std_thread2.supp -@@ -44,4 +44,9 @@ - drd:ConflictingAccess - obj:/libexec/ld-elf*.so.1 - } -+{ -+ drd-glibc-set-vma-name -+ drd:ConflictingAccess -+ fun:__set_vma_name -+} - diff --git a/valgrind-3.22.0-valgrind-monitor-python-re.patch b/valgrind-3.22.0-valgrind-monitor-python-re.patch deleted file mode 100644 index 61d26cd..0000000 --- a/valgrind-3.22.0-valgrind-monitor-python-re.patch +++ /dev/null @@ -1,32 +0,0 @@ -commit 0fbfbe05028ad18efda786a256a2738d2c231ed4 -Author: Mark Wielaard -Date: Fri Nov 17 13:31:52 2023 +0100 - - valgrind-monitor.py regular expressions should use raw strings - - With python 3.12 gdb will produce the following SyntaxWarning when - loading valgrind-monitor-def.py: - - /usr/share/gdb/auto-load/valgrind-monitor-def.py:214: - SyntaxWarning: invalid escape sequence '\[' - if re.fullmatch("^0x[0123456789ABCDEFabcdef]+\[[^\[\]]+\]$", arg_str): - - In a future python version this will become an SyntaxError. - - Use a raw strings for the regular expression. - - https://bugs.kde.org/show_bug.cgi?id=476708 - -diff --git a/coregrind/m_gdbserver/valgrind-monitor-def.py b/coregrind/m_gdbserver/valgrind-monitor-def.py -index b4e7b992d..d74b1590c 100644 ---- a/coregrind/m_gdbserver/valgrind-monitor-def.py -+++ b/coregrind/m_gdbserver/valgrind-monitor-def.py -@@ -211,7 +211,7 @@ class Valgrind_ADDR_LEN_opt(Valgrind_Command): - For compatibility reason with the Valgrind gdbserver monitor command, - we detect and accept usages such as 0x1234ABCD[10].""" - def invoke(self, arg_str : str, from_tty : bool) -> None: -- if re.fullmatch("^0x[0123456789ABCDEFabcdef]+\[[^\[\]]+\]$", arg_str): -+ if re.fullmatch(r"^0x[0123456789ABCDEFabcdef]+\[[^\[\]]+\]$", arg_str): - arg_str = arg_str.replace("[", " ") - arg_str = arg_str.replace("]", " ") - eval_execute_2(self, arg_str, diff --git a/valgrind-3.22.0-x86-nop.patch b/valgrind-3.22.0-x86-nop.patch deleted file mode 100644 index 421b291..0000000 --- a/valgrind-3.22.0-x86-nop.patch +++ /dev/null @@ -1,136 +0,0 @@ -From d35005cef8ad8207542738812705ceabf137d7e0 Mon Sep 17 00:00:00 2001 -From: Paul Floyd -Date: Sun, 17 Dec 2023 14:18:51 +0100 -Subject: [PATCH] Bug 478624 - Valgrind incompatibility with binutils-2.42 on - x86 with new nop patterns (unhandled instruction bytes: 0x2E 0x8D 0xB4 0x26) - -It was a bit of a struggle to get the testcase to build -with both clang and gcc (oddly enough gcc was more difficult) so -I just resorted to using .byte arrays. ---- - .gitignore | 1 + - NEWS | 2 ++ - VEX/priv/guest_x86_toIR.c | 22 +++++++++++++- - none/tests/x86/Makefile.am | 2 ++ - none/tests/x86/gnu_binutils_nop.c | 34 ++++++++++++++++++++++ - none/tests/x86/gnu_binutils_nop.stderr.exp | 0 - none/tests/x86/gnu_binutils_nop.vgtest | 2 ++ - 7 files changed, 62 insertions(+), 1 deletion(-) - create mode 100644 none/tests/x86/gnu_binutils_nop.c - create mode 100644 none/tests/x86/gnu_binutils_nop.stderr.exp - create mode 100644 none/tests/x86/gnu_binutils_nop.vgtest - -diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c -index 5d6e6dc64..3b6efb387 100644 ---- a/VEX/priv/guest_x86_toIR.c -+++ b/VEX/priv/guest_x86_toIR.c -@@ -8198,7 +8198,7 @@ DisResult disInstr_X86_WRK ( - delta += 5; - goto decode_success; - } -- /* Don't barf on recent binutils padding, -+ /* Don't barf on recent (2010) binutils padding, - all variants of which are: nopw %cs:0x0(%eax,%eax,1) - 66 2e 0f 1f 84 00 00 00 00 00 - 66 66 2e 0f 1f 84 00 00 00 00 00 -@@ -8223,6 +8223,26 @@ DisResult disInstr_X86_WRK ( - } - } - -+ /* bug478624 GNU binutils uses a leal of esi into itself with -+ a zero offset and CS prefix as an 8 byte no-op (Dec 2023). -+ Since the CS prefix is hardly ever used we don't do much -+ to decode it, just a few cases for conditional branches. -+ So add handling here with other pseudo-no-ops. -+ */ -+ if (code[0] == 0x2E && code[1] == 0x8D) { -+ if (code[2] == 0x74 && code[3] == 0x26 && code[4] == 0x00) { -+ DIP("leal %%cs:0(%%esi,%%eiz,1),%%esi\n"); -+ delta += 5; -+ goto decode_success; -+ } -+ if (code[2] == 0xB4 && code[3] == 0x26 && code[4] == 0x00 -+ && code[5] == 0x00 && code[6] == 0x00 && code[7] == 0x00) { -+ DIP("leal %%cs:0(%%esi,%%eiz,1),%%esi\n"); -+ delta += 8; -+ goto decode_success; -+ } -+ } -+ - // Intel CET requires the following opcodes to be treated as NOPs - // with any prefix and ModRM, SIB and disp combination: - // "0F 19", "0F 1C", "0F 1D", "0F 1E", "0F 1F" -diff --git a/none/tests/x86/Makefile.am b/none/tests/x86/Makefile.am -index 3ecd1ad3c..dbae86571 100644 ---- a/none/tests/x86/Makefile.am -+++ b/none/tests/x86/Makefile.am -@@ -52,6 +52,7 @@ EXTRA_DIST = \ - fxtract.stdout.exp fxtract.stderr.exp fxtract.vgtest \ - fxtract.stdout.exp-older-glibc \ - getseg.stdout.exp getseg.stderr.exp getseg.vgtest \ -+ gnu_binutils_nop.stderr.exp gnu_binutils_nop.vgtest \ - incdec_alt.stdout.exp incdec_alt.stderr.exp incdec_alt.vgtest \ - int.stderr.exp int.stdout.exp int.disabled \ - $(addsuffix .stderr.exp,$(INSN_TESTS)) \ -@@ -100,6 +101,7 @@ check_PROGRAMS = \ - fpu_lazy_eflags \ - fxtract \ - getseg \ -+ gnu_binutils_nop \ - incdec_alt \ - $(INSN_TESTS) \ - int \ -diff --git a/none/tests/x86/gnu_binutils_nop.c b/none/tests/x86/gnu_binutils_nop.c -new file mode 100644 -index 000000000..412a4c2cb ---- /dev/null -+++ b/none/tests/x86/gnu_binutils_nop.c -@@ -0,0 +1,34 @@ -+int main(void) -+{ -+ // GNU binutils uses various opcodes as alternatives for nop -+ // the idea is that it is faster to execute one large opcode -+ // with no side-effects than multiple repetitions of the -+ // single byte 'nop'. This gives more choice when code -+ // needs to be padded. -+ -+ // the following is based on -+ // https://sourceware.org/cgit/binutils-gdb/tree/gas/config/tc-i386.c#n1256 -+ -+ // one byte -+ __asm__ __volatile__("nop"); -+ // two bytes -+ __asm__ __volatile__("xchg %ax,%ax"); -+ // three bytes -+ //__asm__ __volatile__("leal 0(%esi),%esi"); -+ __asm__ __volatile__(".byte 0x8d,0x76,0x00"); -+ // four bytes -+ //__asm__ __volatile__("leal 0(%esi,%eiz),%esi"); -+ __asm__ __volatile__(".byte 0x8d,0x74,0x26,0x00"); -+ // five bytes -+ //__asm__ __volatile__("leal %cs:0(%esi,%eiz),%esi"); -+ __asm__ __volatile__(".byte 0x2e,0x8d,0x74,0x26,0x00"); -+ // six bytes -+ //__asm__ __volatile__("leal 0L(%esi),%esi"); -+ __asm__ __volatile__(".byte 0x8d,0xb6,0x00,0x00,0x00,0x00"); -+ // seven bytes -+ //__asm__ __volatile__("leal 0L(%esi,%eiz),%esi"); -+ __asm__ __volatile__(".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00"); -+ // eight bytes -+ //__asm__ __volatile__("leal %cs:0L(%esi,%eiz),%esi"); -+ __asm__ __volatile__(".byte 0x2e,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00"); -+} -diff --git a/none/tests/x86/gnu_binutils_nop.stderr.exp b/none/tests/x86/gnu_binutils_nop.stderr.exp -new file mode 100644 -index 000000000..e69de29bb -diff --git a/none/tests/x86/gnu_binutils_nop.vgtest b/none/tests/x86/gnu_binutils_nop.vgtest -new file mode 100644 -index 000000000..7f378dd53 ---- /dev/null -+++ b/none/tests/x86/gnu_binutils_nop.vgtest -@@ -0,0 +1,2 @@ -+prog: gnu_binutils_nop -+vgopts: -q --- -2.43.0 - diff --git a/valgrind.spec b/valgrind.spec index 766d77f..f264430 100644 --- a/valgrind.spec +++ b/valgrind.spec @@ -2,8 +2,8 @@ Summary: Dynamic analysis tools to detect memory or thread bugs and profile Name: %{?scl_prefix}valgrind -Version: 3.22.0 -Release: 8%{?dist} +Version: 3.23.0 +Release: 0.1.RC1%{?dist} Epoch: 1 # This ignores licenses that are only found in the test or perf sources @@ -71,7 +71,7 @@ URL: https://www.valgrind.org/ # So those will already have their full symbol table. %undefine _include_minidebuginfo -Source0: https://sourceware.org/pub/valgrind/valgrind-%{version}.tar.bz2 +Source0: https://sourceware.org/pub/valgrind/valgrind-%{version}.RC1.tar.bz2 # Needs investigation and pushing upstream Patch1: valgrind-3.9.0-cachegrind-improvements.patch @@ -85,47 +85,6 @@ Patch3: valgrind-3.16.0-some-stack-protector.patch # Add some -Wl,z,now. Patch4: valgrind-3.16.0-some-Wl-z-now.patch -# valgrind-monitor.py regular expressions should use raw strings -# https://bugs.kde.org/show_bug.cgi?id=476708 -Patch5: valgrind-3.22.0-valgrind-monitor-python-re.patch - -# valgrind 3.22.0 fails on assertion when loading debuginfo -# https://bugs.kde.org/show_bug.cgi?id=476548 -Patch6: valgrind-3.22.0-rodata.patch - -# Add fchmodat2 syscall on linux -# https://bugs.kde.org/show_bug.cgi?id=477198 -Patch7: valgrind-3.22.0-fchmodat2.patch - -# Valgrind incompatibility with binutils-2.42 on x86 with new nop patterns -# (unhandled instruction bytes: 0x2E 0x8D 0xB4 0x26) -# https://bugs.kde.org/show_bug.cgi?id=478624 -Patch8: valgrind-3.22.0-x86-nop.patch - -# Handle gcc __builtin_strcmp using 128/256 bit vectors with sse4.1, avx/avx2 -# https://bugzilla.redhat.com/show_bug.cgi?id=2257546 -Patch9: valgrind-3.22.0-gcc-builtin_strcmp-128-256-bit-vector.patch - -# Upstream commit 7aa63de01a3cda2a346f90be16bd29fd7b410c63 -# Testsuite fix gdbserver_tests/filter_gdb.in: delete thread exiting -Patch10: valgrind-3.22.0-gdb-thread-exited.patch - -# Upstream commit 5505e7b371f26f9f9588f4aa3a156dfa6856b90a -# Add __set_vma_name suppression to drd/tests/std_thread2.supp -Patch11: valgrind-3.22.0-set_vma_name-supp.patch - -# Upstream commit f47109d30ca319a3bcd4288803e30a473d1506d2 -# Add new helgrind race suppression for pth_mempcpy_false_races -Patch12: valgrind-3.22.0-pth_mempcpy_false_races.patch - -# amd64: Implement VFMADD213 for Iop_MAddF32 and Iop_MAddF64 -# https://bugs.kde.org/show_bug.cgi?id=481127 -Patch13: valgrind-3.22.0-amd64-VFMADD213.patch - -# glibc built with -march=x86-64-v3 does not work due to ld.so strcmp -# https://bugs.kde.org/show_bug.cgi?id=485487 -Patch14: valgrind-3.22.0-amd64-redir-strcmp.patch - BuildRequires: make BuildRequires: glibc-devel @@ -257,23 +216,13 @@ Valgrind User Manual for details. %endif %prep -%setup -q -n %{?scl:%{pkg_name}}%{!?scl:%{name}}-%{version} +%setup -q -n %{?scl:%{pkg_name}}%{!?scl:%{name}}-%{version}.RC1 %patch -P1 -p1 %patch -P2 -p1 %patch -P3 -p1 %patch -P4 -p1 -%patch -P5 -p1 -%patch -P6 -p1 -%patch -P7 -p1 -%patch -P8 -p1 -%patch -P9 -p1 -%patch -P10 -p1 -%patch -P11 -p1 -%patch -P12 -p1 -%patch -P13 -p1 -%patch -P14 -p1 %build # LTO triggers undefined symbols in valgrind. Valgrind has a --enable-lto @@ -490,6 +439,10 @@ echo ===============END TESTING=============== %endif %changelog +* Sat Apr 20 2024 Mark Wielaard - 3.23.0-0.1.RC1 ++- Upstream 3.23.0-RC1 ++- Remove all upstreamed patches + * Sat Apr 13 2024 Mark Wielaard - 3.22.0-8 - Add BuildRequires: python3-devel for running testsuite. - Add valgrind-3.22.0-gdb-thread-exited.patch