diff --git a/0001-Merging-r324449.patch b/0001-Merging-r324449.patch new file mode 100644 index 0000000..864962a --- /dev/null +++ b/0001-Merging-r324449.patch @@ -0,0 +1,237 @@ +From 4e5fddc22a28e0e59d6409a98fb22eba32d0eae7 Mon Sep 17 00:00:00 2001 +From: Reid Kleckner +Date: Wed, 14 Feb 2018 00:32:26 +0000 +Subject: [PATCH 1/4] Merging r324449: + ------------------------------------------------------------------------ + r324449 | chandlerc | 2018-02-06 22:16:24 -0800 (Tue, 06 Feb 2018) | 15 lines + +[x86/retpoline] Make the external thunk names exactly match the names +that happened to end up in GCC. + +This is really unfortunate, as the names don't have much rhyme or reason +to them. Originally in the discussions it seemed fine to rely on aliases +to map different names to whatever external thunk code developers wished +to use but there are practical problems with that in the kernel it turns +out. And since we're discovering this practical problems late and since +GCC has already shipped a release with one set of names, we are forced, +yet again, to blindly match what is there. + +Somewhat rushing this patch out for the Linux kernel folks to test and +so we can get it patched into our releases. + +Differential Revision: https://reviews.llvm.org/D42998 +------------------------------------------------------------------------ + + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@325088 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + lib/Target/X86/X86ISelLowering.cpp | 59 +++++++++++++++++++++++++--------- + test/CodeGen/X86/retpoline-external.ll | 48 +++++++++++++-------------- + 2 files changed, 68 insertions(+), 39 deletions(-) + +diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp +index 2c2294d..9aa3023 100644 +--- a/lib/Target/X86/X86ISelLowering.cpp ++++ b/lib/Target/X86/X86ISelLowering.cpp +@@ -26250,28 +26250,57 @@ static unsigned getOpcodeForRetpoline(unsigned RPOpc) { + + static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, + unsigned Reg) { ++ if (Subtarget.useRetpolineExternalThunk()) { ++ // When using an external thunk for retpolines, we pick names that match the ++ // names GCC happens to use as well. This helps simplify the implementation ++ // of the thunks for kernels where they have no easy ability to create ++ // aliases and are doing non-trivial configuration of the thunk's body. For ++ // example, the Linux kernel will do boot-time hot patching of the thunk ++ // bodies and cannot easily export aliases of these to loaded modules. ++ // ++ // Note that at any point in the future, we may need to change the semantics ++ // of how we implement retpolines and at that time will likely change the ++ // name of the called thunk. Essentially, there is no hard guarantee that ++ // LLVM will generate calls to specific thunks, we merely make a best-effort ++ // attempt to help out kernels and other systems where duplicating the ++ // thunks is costly. ++ switch (Reg) { ++ case 0: ++ assert(!Subtarget.is64Bit() && "R11 should always be available on x64"); ++ return "__x86_indirect_thunk"; ++ case X86::EAX: ++ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); ++ return "__x86_indirect_thunk_eax"; ++ case X86::ECX: ++ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); ++ return "__x86_indirect_thunk_ecx"; ++ case X86::EDX: ++ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); ++ return "__x86_indirect_thunk_edx"; ++ case X86::R11: ++ assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); ++ return "__x86_indirect_thunk_r11"; ++ } ++ llvm_unreachable("unexpected reg for retpoline"); ++ } ++ ++ // When targeting an internal COMDAT thunk use an LLVM-specific name. + switch (Reg) { + case 0: + assert(!Subtarget.is64Bit() && "R11 should always be available on x64"); +- return Subtarget.useRetpolineExternalThunk() +- ? "__llvm_external_retpoline_push" +- : "__llvm_retpoline_push"; ++ return "__llvm_retpoline_push"; + case X86::EAX: +- return Subtarget.useRetpolineExternalThunk() +- ? "__llvm_external_retpoline_eax" +- : "__llvm_retpoline_eax"; ++ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); ++ return "__llvm_retpoline_eax"; + case X86::ECX: +- return Subtarget.useRetpolineExternalThunk() +- ? "__llvm_external_retpoline_ecx" +- : "__llvm_retpoline_ecx"; ++ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); ++ return "__llvm_retpoline_ecx"; + case X86::EDX: +- return Subtarget.useRetpolineExternalThunk() +- ? "__llvm_external_retpoline_edx" +- : "__llvm_retpoline_edx"; ++ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); ++ return "__llvm_retpoline_edx"; + case X86::R11: +- return Subtarget.useRetpolineExternalThunk() +- ? "__llvm_external_retpoline_r11" +- : "__llvm_retpoline_r11"; ++ assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); ++ return "__llvm_retpoline_r11"; + } + llvm_unreachable("unexpected reg for retpoline"); + } +diff --git a/test/CodeGen/X86/retpoline-external.ll b/test/CodeGen/X86/retpoline-external.ll +index 66d32ba..2f21bb2 100644 +--- a/test/CodeGen/X86/retpoline-external.ll ++++ b/test/CodeGen/X86/retpoline-external.ll +@@ -23,18 +23,18 @@ entry: + ; X64: callq bar + ; X64-DAG: movl %[[x]], %edi + ; X64-DAG: movq %[[fp]], %r11 +-; X64: callq __llvm_external_retpoline_r11 ++; X64: callq __x86_indirect_thunk_r11 + ; X64: movl %[[x]], %edi + ; X64: callq bar + ; X64-DAG: movl %[[x]], %edi + ; X64-DAG: movq %[[fp]], %r11 +-; X64: jmp __llvm_external_retpoline_r11 # TAILCALL ++; X64: jmp __x86_indirect_thunk_r11 # TAILCALL + + ; X64FAST-LABEL: icall_reg: + ; X64FAST: callq bar +-; X64FAST: callq __llvm_external_retpoline_r11 ++; X64FAST: callq __x86_indirect_thunk_r11 + ; X64FAST: callq bar +-; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL ++; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL + + ; X86-LABEL: icall_reg: + ; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]] +@@ -43,19 +43,19 @@ entry: + ; X86: calll bar + ; X86: movl %[[fp]], %eax + ; X86: pushl %[[x]] +-; X86: calll __llvm_external_retpoline_eax ++; X86: calll __x86_indirect_thunk_eax + ; X86: pushl %[[x]] + ; X86: calll bar + ; X86: movl %[[fp]], %eax + ; X86: pushl %[[x]] +-; X86: calll __llvm_external_retpoline_eax ++; X86: calll __x86_indirect_thunk_eax + ; X86-NOT: # TAILCALL + + ; X86FAST-LABEL: icall_reg: + ; X86FAST: calll bar +-; X86FAST: calll __llvm_external_retpoline_eax ++; X86FAST: calll __x86_indirect_thunk_eax + ; X86FAST: calll bar +-; X86FAST: calll __llvm_external_retpoline_eax ++; X86FAST: calll __x86_indirect_thunk_eax + + + @global_fp = external global void (i32)* +@@ -72,28 +72,28 @@ define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 { + ; X64-LABEL: icall_global_fp: + ; X64-DAG: movl %edi, %[[x:[^ ]*]] + ; X64-DAG: movq global_fp(%rip), %r11 +-; X64: callq __llvm_external_retpoline_r11 ++; X64: callq __x86_indirect_thunk_r11 + ; X64-DAG: movl %[[x]], %edi + ; X64-DAG: movq global_fp(%rip), %r11 +-; X64: jmp __llvm_external_retpoline_r11 # TAILCALL ++; X64: jmp __x86_indirect_thunk_r11 # TAILCALL + + ; X64FAST-LABEL: icall_global_fp: + ; X64FAST: movq global_fp(%rip), %r11 +-; X64FAST: callq __llvm_external_retpoline_r11 ++; X64FAST: callq __x86_indirect_thunk_r11 + ; X64FAST: movq global_fp(%rip), %r11 +-; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL ++; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL + + ; X86-LABEL: icall_global_fp: + ; X86: movl global_fp, %eax + ; X86: pushl 4(%esp) +-; X86: calll __llvm_external_retpoline_eax ++; X86: calll __x86_indirect_thunk_eax + ; X86: addl $4, %esp + ; X86: movl global_fp, %eax +-; X86: jmp __llvm_external_retpoline_eax # TAILCALL ++; X86: jmp __x86_indirect_thunk_eax # TAILCALL + + ; X86FAST-LABEL: icall_global_fp: +-; X86FAST: calll __llvm_external_retpoline_eax +-; X86FAST: jmp __llvm_external_retpoline_eax # TAILCALL ++; X86FAST: calll __x86_indirect_thunk_eax ++; X86FAST: jmp __x86_indirect_thunk_eax # TAILCALL + + + %struct.Foo = type { void (%struct.Foo*)** } +@@ -114,14 +114,14 @@ define void @vcall(%struct.Foo* %obj) #0 { + ; X64: movq (%[[obj]]), %[[vptr:[^ ]*]] + ; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]] + ; X64: movq %[[fp]], %r11 +-; X64: callq __llvm_external_retpoline_r11 ++; X64: callq __x86_indirect_thunk_r11 + ; X64-DAG: movq %[[obj]], %rdi + ; X64-DAG: movq %[[fp]], %r11 +-; X64: jmp __llvm_external_retpoline_r11 # TAILCALL ++; X64: jmp __x86_indirect_thunk_r11 # TAILCALL + + ; X64FAST-LABEL: vcall: +-; X64FAST: callq __llvm_external_retpoline_r11 +-; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL ++; X64FAST: callq __x86_indirect_thunk_r11 ++; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL + + ; X86-LABEL: vcall: + ; X86: movl 8(%esp), %[[obj:[^ ]*]] +@@ -129,14 +129,14 @@ define void @vcall(%struct.Foo* %obj) #0 { + ; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]] + ; X86: movl %[[fp]], %eax + ; X86: pushl %[[obj]] +-; X86: calll __llvm_external_retpoline_eax ++; X86: calll __x86_indirect_thunk_eax + ; X86: addl $4, %esp + ; X86: movl %[[fp]], %eax +-; X86: jmp __llvm_external_retpoline_eax # TAILCALL ++; X86: jmp __x86_indirect_thunk_eax # TAILCALL + + ; X86FAST-LABEL: vcall: +-; X86FAST: calll __llvm_external_retpoline_eax +-; X86FAST: jmp __llvm_external_retpoline_eax # TAILCALL ++; X86FAST: calll __x86_indirect_thunk_eax ++; X86FAST: jmp __x86_indirect_thunk_eax # TAILCALL + + + declare void @direct_callee() +-- +1.8.3.1 + diff --git a/0002-Merging-r324645.patch b/0002-Merging-r324645.patch new file mode 100644 index 0000000..381f18e --- /dev/null +++ b/0002-Merging-r324645.patch @@ -0,0 +1,88 @@ +From 8f5f7f9cb15387ddb010894c17e788b3116fe26d Mon Sep 17 00:00:00 2001 +From: Reid Kleckner +Date: Wed, 14 Feb 2018 00:33:00 +0000 +Subject: [PATCH 2/4] Merging r324645: + ------------------------------------------------------------------------ + r324645 | dwmw2 | 2018-02-08 12:06:05 -0800 (Thu, 08 Feb 2018) | 5 lines + +[X86] Support 'V' register operand modifier + +This allows the register name to be printed without the leading '%'. +This can be used for emitting calls to the retpoline thunks from inline +asm. +------------------------------------------------------------------------ + + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@325089 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + lib/Target/X86/X86AsmPrinter.cpp | 11 ++++++++++- + test/CodeGen/X86/inline-asm-modifier-V.ll | 14 ++++++++++++++ + 2 files changed, 24 insertions(+), 1 deletion(-) + create mode 100644 test/CodeGen/X86/inline-asm-modifier-V.ll + +diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp +index dc15aea..8c7ddd9 100644 +--- a/lib/Target/X86/X86AsmPrinter.cpp ++++ b/lib/Target/X86/X86AsmPrinter.cpp +@@ -344,6 +344,8 @@ static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI, + static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, + char Mode, raw_ostream &O) { + unsigned Reg = MO.getReg(); ++ bool EmitPercent = true; ++ + switch (Mode) { + default: return true; // Unknown mode. + case 'b': // Print QImode register +@@ -358,6 +360,9 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, + case 'k': // Print SImode register + Reg = getX86SubSuperRegister(Reg, 32); + break; ++ case 'V': ++ EmitPercent = false; ++ LLVM_FALLTHROUGH; + case 'q': + // Print 64-bit register names if 64-bit integer registers are available. + // Otherwise, print 32-bit register names. +@@ -365,7 +370,10 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, + break; + } + +- O << '%' << X86ATTInstPrinter::getRegisterName(Reg); ++ if (EmitPercent) ++ O << '%'; ++ ++ O << X86ATTInstPrinter::getRegisterName(Reg); + return false; + } + +@@ -438,6 +446,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + case 'w': // Print HImode register + case 'k': // Print SImode register + case 'q': // Print DImode register ++ case 'V': // Print native register without '%' + if (MO.isReg()) + return printAsmMRegister(*this, MO, ExtraCode[0], O); + printOperand(*this, MI, OpNo, O); +diff --git a/test/CodeGen/X86/inline-asm-modifier-V.ll b/test/CodeGen/X86/inline-asm-modifier-V.ll +new file mode 100644 +index 0000000..5a7f3fd +--- /dev/null ++++ b/test/CodeGen/X86/inline-asm-modifier-V.ll +@@ -0,0 +1,14 @@ ++; RUN: llc < %s -mtriple=i686-- -no-integrated-as | FileCheck -check-prefix=X86 %s ++; RUN: llc < %s -mtriple=x86_64-- -no-integrated-as | FileCheck -check-prefix=X64 %s ++ ++; If the target does not have 64-bit integer registers, emit 32-bit register ++; names. ++ ++; X86: call __x86_indirect_thunk_e{{[abcd]}}x ++; X64: call __x86_indirect_thunk_r ++ ++define void @q_modifier(i32* %p) { ++entry: ++ tail call void asm sideeffect "call __x86_indirect_thunk_${0:V}", "r,~{dirflag},~{fpsr},~{flags}"(i32* %p) ++ ret void ++} +-- +1.8.3.1 + diff --git a/0003-Merging-r325049.patch b/0003-Merging-r325049.patch new file mode 100644 index 0000000..b207dc7 --- /dev/null +++ b/0003-Merging-r325049.patch @@ -0,0 +1,308 @@ +From 4594a6164d5ae9252825e23a95aa6f2fce304d6e Mon Sep 17 00:00:00 2001 +From: Reid Kleckner +Date: Wed, 14 Feb 2018 00:34:13 +0000 +Subject: [PATCH 3/4] Merging r325049: + ------------------------------------------------------------------------ + r325049 | rnk | 2018-02-13 12:47:49 -0800 (Tue, 13 Feb 2018) | 17 lines + +[X86] Use EDI for retpoline when no scratch regs are left + +Summary: +Instead of solving the hard problem of how to pass the callee to the indirect +jump thunk without a register, just use a CSR. At a call boundary, there's +nothing stopping us from using a CSR to hold the callee as long as we save and +restore it in the prologue. + +Also, add tests for this mregparm=3 case. I wrote execution tests for +__llvm_retpoline_push, but they never got committed as lit tests, either +because I never rewrote them or because they got lost in merge conflicts. + +Reviewers: chandlerc, dwmw2 + +Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits + +Differential Revision: https://reviews.llvm.org/D43214 +------------------------------------------------------------------------ + + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@325090 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + lib/Target/X86/X86ISelLowering.cpp | 50 +++++++++++++---------------------- + lib/Target/X86/X86RetpolineThunks.cpp | 42 ++++++++--------------------- + test/CodeGen/X86/retpoline-regparm.ll | 42 +++++++++++++++++++++++++++++ + test/CodeGen/X86/retpoline.ll | 14 ++++------ + 4 files changed, 76 insertions(+), 72 deletions(-) + create mode 100644 test/CodeGen/X86/retpoline-regparm.ll + +diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp +index 9aa3023..59a9832 100644 +--- a/lib/Target/X86/X86ISelLowering.cpp ++++ b/lib/Target/X86/X86ISelLowering.cpp +@@ -26265,9 +26265,6 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, + // attempt to help out kernels and other systems where duplicating the + // thunks is costly. + switch (Reg) { +- case 0: +- assert(!Subtarget.is64Bit() && "R11 should always be available on x64"); +- return "__x86_indirect_thunk"; + case X86::EAX: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__x86_indirect_thunk_eax"; +@@ -26277,6 +26274,9 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, + case X86::EDX: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__x86_indirect_thunk_edx"; ++ case X86::EDI: ++ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); ++ return "__x86_indirect_thunk_edi"; + case X86::R11: + assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); + return "__x86_indirect_thunk_r11"; +@@ -26286,9 +26286,6 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, + + // When targeting an internal COMDAT thunk use an LLVM-specific name. + switch (Reg) { +- case 0: +- assert(!Subtarget.is64Bit() && "R11 should always be available on x64"); +- return "__llvm_retpoline_push"; + case X86::EAX: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__llvm_retpoline_eax"; +@@ -26298,6 +26295,9 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, + case X86::EDX: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__llvm_retpoline_edx"; ++ case X86::EDI: ++ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); ++ return "__llvm_retpoline_edi"; + case X86::R11: + assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); + return "__llvm_retpoline_r11"; +@@ -26319,15 +26319,13 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, + // just use R11, but we scan for uses anyway to ensure we don't generate + // incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't + // already a register use operand to the call to hold the callee. If none +- // are available, push the callee instead. This is less efficient, but is +- // necessary for functions using 3 regparms. Such function calls are +- // (currently) not eligible for tail call optimization, because there is no +- // scratch register available to hold the address of the callee. ++ // are available, use EDI instead. EDI is chosen because EBX is the PIC base ++ // register and ESI is the base pointer to realigned stack frames with VLAs. + SmallVector AvailableRegs; + if (Subtarget.is64Bit()) + AvailableRegs.push_back(X86::R11); + else +- AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX}); ++ AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI}); + + // Zero out any registers that are already used. + for (const auto &MO : MI.operands()) { +@@ -26345,30 +26343,18 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, + break; + } + } ++ if (!AvailableReg) ++ report_fatal_error("calling convention incompatible with retpoline, no " ++ "available registers"); + + const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg); + +- if (AvailableReg == 0) { +- // No register available. Use PUSH. This must not be a tailcall, and this +- // must not be x64. +- if (Subtarget.is64Bit()) +- report_fatal_error( +- "Cannot make an indirect call on x86-64 using both retpoline and a " +- "calling convention that preservers r11"); +- if (Opc != X86::CALLpcrel32) +- report_fatal_error("Cannot make an indirect tail call on x86 using " +- "retpoline without a preserved register"); +- BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg); +- MI.getOperand(0).ChangeToES(Symbol); +- MI.setDesc(TII->get(Opc)); +- } else { +- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg) +- .addReg(CalleeVReg); +- MI.getOperand(0).ChangeToES(Symbol); +- MI.setDesc(TII->get(Opc)); +- MachineInstrBuilder(*BB->getParent(), &MI) +- .addReg(AvailableReg, RegState::Implicit | RegState::Kill); +- } ++ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg) ++ .addReg(CalleeVReg); ++ MI.getOperand(0).ChangeToES(Symbol); ++ MI.setDesc(TII->get(Opc)); ++ MachineInstrBuilder(*BB->getParent(), &MI) ++ .addReg(AvailableReg, RegState::Implicit | RegState::Kill); + return BB; + } + +diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp +index 223fa57..59ace3f 100644 +--- a/lib/Target/X86/X86RetpolineThunks.cpp ++++ b/lib/Target/X86/X86RetpolineThunks.cpp +@@ -43,7 +43,7 @@ static const char R11ThunkName[] = "__llvm_retpoline_r11"; + static const char EAXThunkName[] = "__llvm_retpoline_eax"; + static const char ECXThunkName[] = "__llvm_retpoline_ecx"; + static const char EDXThunkName[] = "__llvm_retpoline_edx"; +-static const char PushThunkName[] = "__llvm_retpoline_push"; ++static const char EDIThunkName[] = "__llvm_retpoline_edi"; + + namespace { + class X86RetpolineThunks : public MachineFunctionPass { +@@ -127,7 +127,7 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) { + createThunkFunction(M, R11ThunkName); + else + for (StringRef Name : +- {EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName}) ++ {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName}) + createThunkFunction(M, Name); + InsertedThunks = true; + return true; +@@ -151,9 +151,8 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) { + populateThunk(MF, X86::R11); + } else { + // For 32-bit targets we need to emit a collection of thunks for various +- // possible scratch registers as well as a fallback that is used when +- // there are no scratch registers and assumes the retpoline target has +- // been pushed. ++ // possible scratch registers as well as a fallback that uses EDI, which is ++ // normally callee saved. + // __llvm_retpoline_eax: + // calll .Leax_call_target + // .Leax_capture_spec: +@@ -174,32 +173,18 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) { + // movl %edx, (%esp) + // retl + // +- // This last one is a bit more special and so needs a little extra +- // handling. +- // __llvm_retpoline_push: +- // calll .Lpush_call_target +- // .Lpush_capture_spec: +- // pause +- // lfence +- // jmp .Lpush_capture_spec +- // .align 16 +- // .Lpush_call_target: +- // # Clear pause_loop return address. +- // addl $4, %esp +- // # Top of stack words are: Callee, RA. Exchange Callee and RA. +- // pushl 4(%esp) # Push callee +- // pushl 4(%esp) # Push RA +- // popl 8(%esp) # Pop RA to final RA +- // popl (%esp) # Pop callee to next top of stack +- // retl # Ret to callee ++ // __llvm_retpoline_edi: ++ // ... # Same setup ++ // movl %edi, (%esp) ++ // retl + if (MF.getName() == EAXThunkName) + populateThunk(MF, X86::EAX); + else if (MF.getName() == ECXThunkName) + populateThunk(MF, X86::ECX); + else if (MF.getName() == EDXThunkName) + populateThunk(MF, X86::EDX); +- else if (MF.getName() == PushThunkName) +- populateThunk(MF); ++ else if (MF.getName() == EDIThunkName) ++ populateThunk(MF, X86::EDI); + else + llvm_unreachable("Invalid thunk name on x86-32!"); + } +@@ -301,11 +286,6 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF, + CaptureSpec->addSuccessor(CaptureSpec); + + CallTarget->setAlignment(4); +- if (Reg) { +- insertRegReturnAddrClobber(*CallTarget, *Reg); +- } else { +- assert(!Is64Bit && "We only support non-reg thunks on 32-bit x86!"); +- insert32BitPushReturnAddrClobber(*CallTarget); +- } ++ insertRegReturnAddrClobber(*CallTarget, *Reg); + BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc)); + } +diff --git a/test/CodeGen/X86/retpoline-regparm.ll b/test/CodeGen/X86/retpoline-regparm.ll +new file mode 100644 +index 0000000..13b3274 +--- /dev/null ++++ b/test/CodeGen/X86/retpoline-regparm.ll +@@ -0,0 +1,42 @@ ++; RUN: llc -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s ++ ++; Test 32-bit retpoline when -mregparm=3 is used. This case is interesting ++; because there are no available scratch registers. The Linux kernel builds ++; with -mregparm=3, so we need to support it. TCO should fail because we need ++; to restore EDI. ++ ++define void @call_edi(void (i32, i32, i32)* %fp) #0 { ++entry: ++ tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0) ++ ret void ++} ++ ++; CHECK-LABEL: call_edi: ++; EDI is used, so it must be saved. ++; CHECK: pushl %edi ++; CHECK-DAG: xorl %eax, %eax ++; CHECK-DAG: xorl %edx, %edx ++; CHECK-DAG: xorl %ecx, %ecx ++; CHECK-DAG: movl {{.*}}, %edi ++; CHECK: calll __llvm_retpoline_edi ++; CHECK: popl %edi ++; CHECK: retl ++ ++define void @edi_external(void (i32, i32, i32)* %fp) #1 { ++entry: ++ tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0) ++ ret void ++} ++ ++; CHECK-LABEL: edi_external: ++; CHECK: pushl %edi ++; CHECK-DAG: xorl %eax, %eax ++; CHECK-DAG: xorl %edx, %edx ++; CHECK-DAG: xorl %ecx, %ecx ++; CHECK-DAG: movl {{.*}}, %edi ++; CHECK: calll __x86_indirect_thunk_edi ++; CHECK: popl %edi ++; CHECK: retl ++ ++attributes #0 = { "target-features"="+retpoline" } ++attributes #1 = { "target-features"="+retpoline-external-thunk" } +diff --git a/test/CodeGen/X86/retpoline.ll b/test/CodeGen/X86/retpoline.ll +index b0d4c85..562386e 100644 +--- a/test/CodeGen/X86/retpoline.ll ++++ b/test/CodeGen/X86/retpoline.ll +@@ -336,10 +336,10 @@ latch: + ; X86-NEXT: movl %edx, (%esp) + ; X86-NEXT: retl + ; +-; X86-LABEL: .section .text.__llvm_retpoline_push,{{.*}},__llvm_retpoline_push,comdat +-; X86-NEXT: .hidden __llvm_retpoline_push +-; X86-NEXT: .weak __llvm_retpoline_push +-; X86: __llvm_retpoline_push: ++; X86-LABEL: .section .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat ++; X86-NEXT: .hidden __llvm_retpoline_edi ++; X86-NEXT: .weak __llvm_retpoline_edi ++; X86: __llvm_retpoline_edi: + ; X86-NEXT: # {{.*}} # %entry + ; X86-NEXT: calll [[CALL_TARGET:.*]] + ; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken +@@ -351,11 +351,7 @@ latch: + ; X86-NEXT: .p2align 4, 0x90 + ; X86-NEXT: [[CALL_TARGET]]: # Block address taken + ; X86-NEXT: # %entry +-; X86-NEXT: addl $4, %esp +-; X86-NEXT: pushl 4(%esp) +-; X86-NEXT: pushl 4(%esp) +-; X86-NEXT: popl 8(%esp) +-; X86-NEXT: popl (%esp) ++; X86-NEXT: movl %edi, (%esp) + ; X86-NEXT: retl + + +-- +1.8.3.1 + diff --git a/0004-Merging-r325085.patch b/0004-Merging-r325085.patch new file mode 100644 index 0000000..6b5bd85 --- /dev/null +++ b/0004-Merging-r325085.patch @@ -0,0 +1,65 @@ +From de9a0f9c449d4b13c70eff8c9a3023948dc21cb7 Mon Sep 17 00:00:00 2001 +From: Reid Kleckner +Date: Wed, 14 Feb 2018 00:34:35 +0000 +Subject: [PATCH 4/4] Merging r325085: + ------------------------------------------------------------------------ + r325085 | rnk | 2018-02-13 16:24:29 -0800 (Tue, 13 Feb 2018) | 3 lines + +[X86] Remove dead code from retpoline thunk generation + +Follow-up to r325049 +------------------------------------------------------------------------ + + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@325091 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + lib/Target/X86/X86RetpolineThunks.cpp | 26 -------------------------- + 1 file changed, 26 deletions(-) + +diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp +index 59ace3f..d03826b 100644 +--- a/lib/Target/X86/X86RetpolineThunks.cpp ++++ b/lib/Target/X86/X86RetpolineThunks.cpp +@@ -74,7 +74,6 @@ private: + + void createThunkFunction(Module &M, StringRef Name); + void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg); +- void insert32BitPushReturnAddrClobber(MachineBasicBlock &MBB); + void populateThunk(MachineFunction &MF, Optional Reg = None); + }; + +@@ -225,31 +224,6 @@ void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB, + .addReg(Reg); + } + +-void X86RetpolineThunks::insert32BitPushReturnAddrClobber( +- MachineBasicBlock &MBB) { +- // The instruction sequence we use to replace the return address without +- // a scratch register is somewhat complicated: +- // # Clear capture_spec from return address. +- // addl $4, %esp +- // # Top of stack words are: Callee, RA. Exchange Callee and RA. +- // pushl 4(%esp) # Push callee +- // pushl 4(%esp) # Push RA +- // popl 8(%esp) # Pop RA to final RA +- // popl (%esp) # Pop callee to next top of stack +- // retl # Ret to callee +- BuildMI(&MBB, DebugLoc(), TII->get(X86::ADD32ri), X86::ESP) +- .addReg(X86::ESP) +- .addImm(4); +- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP, +- false, 4); +- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP, +- false, 4); +- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP, +- false, 8); +- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP, +- false, 0); +-} +- + void X86RetpolineThunks::populateThunk(MachineFunction &MF, + Optional Reg) { + // Set MF properties. We never use vregs... +-- +1.8.3.1 + diff --git a/llvm.spec b/llvm.spec index 09f59e1..bcc4614 100644 --- a/llvm.spec +++ b/llvm.spec @@ -12,7 +12,7 @@ Name: llvm Version: %{maj_ver}.%{min_ver}.%{patch_ver} -Release: 3%{?dist} +Release: 4%{?dist} Summary: The Low Level Virtual Machine License: NCSA @@ -30,6 +30,10 @@ Patch6: 0001-Ignore-all-duplicate-frame-index-expression.patch Patch7: 0002-Reinstantiate-old-bad-deduplication-logic-that-was-r.patch Patch8: 0001-Merging-r323155.patch Patch9: 0001-Merging-r323915.patch +Patch10: 0001-Merging-r324449.patch +Patch11: 0002-Merging-r324645.patch +Patch12: 0003-Merging-r325049.patch +Patch13: 0004-Merging-r325085.patch BuildRequires: cmake BuildRequires: zlib-devel @@ -220,6 +224,9 @@ fi %{_libdir}/cmake/llvm/LLVMStaticExports.cmake %changelog +* Wed Mar 07 2018 Tom Stellard - 5.0.1-4 +- Backport more retpoline patches + * Tue Feb 06 2018 Tom Stellard - 5.0.1-3 - Backport retpoline support