tstellar / rpms / llvm

Forked from rpms/llvm 5 years ago
Clone
64558d2
From b4b2cc0cca3595185683aa7aa4d29c4a151a679e Mon Sep 17 00:00:00 2001
64558d2
From: Reid Kleckner <rnk@google.com>
64558d2
Date: Thu, 1 Feb 2018 21:31:35 +0000
64558d2
Subject: [PATCH] Merging r323915:
64558d2
 ------------------------------------------------------------------------
64558d2
 r323915 | chandlerc | 2018-01-31 12:56:37 -0800 (Wed, 31 Jan 2018) | 17 lines
64558d2
64558d2
[x86] Make the retpoline thunk insertion a machine function pass.
64558d2
64558d2
Summary:
64558d2
This removes the need for a machine module pass using some deeply
64558d2
questionable hacks. This should address PR36123 which is a case where in
64558d2
full LTO the memory usage of a machine module pass actually ended up
64558d2
being significant.
64558d2
64558d2
We should revert this on trunk as soon as we understand and fix the
64558d2
memory usage issue, but we should include this in any backports of
64558d2
retpolines themselves.
64558d2
64558d2
Reviewers: echristo, MatzeB
64558d2
64558d2
Subscribers: sanjoy, mcrosier, mehdi_amini, hiraditya, llvm-commits
64558d2
64558d2
Differential Revision: https://reviews.llvm.org/D42726
64558d2
------------------------------------------------------------------------
64558d2
64558d2
64558d2
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@324009 91177308-0d34-0410-b5e6-96231b3b80d8
64558d2
---
64558d2
 lib/Target/X86/X86.h                  |   2 +-
64558d2
 lib/Target/X86/X86RetpolineThunks.cpp | 135 +++++++++++++++++++++-------------
64558d2
 test/CodeGen/X86/O0-pipeline.ll       |   3 +-
64558d2
 3 files changed, 87 insertions(+), 53 deletions(-)
64558d2
64558d2
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
64558d2
index 25e4b89..2e3ace2 100644
64558d2
--- a/lib/Target/X86/X86.h
64558d2
+++ b/lib/Target/X86/X86.h
64558d2
@@ -100,7 +100,7 @@ void initializeFixupBWInstPassPass(PassRegistry &);
64558d2
 FunctionPass *createX86EvexToVexInsts();
64558d2
 
64558d2
 /// This pass creates the thunks for the retpoline feature.
64558d2
-ModulePass *createX86RetpolineThunksPass();
64558d2
+FunctionPass *createX86RetpolineThunksPass();
64558d2
 
64558d2
 InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
64558d2
                                                   X86Subtarget &,
64558d2
diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp
64558d2
index 6b4bc8a..223fa57 100644
64558d2
--- a/lib/Target/X86/X86RetpolineThunks.cpp
64558d2
+++ b/lib/Target/X86/X86RetpolineThunks.cpp
64558d2
@@ -38,18 +38,27 @@ using namespace llvm;
64558d2
 
64558d2
 #define DEBUG_TYPE "x86-retpoline-thunks"
64558d2
 
64558d2
+static const char ThunkNamePrefix[] = "__llvm_retpoline_";
64558d2
+static const char R11ThunkName[]    = "__llvm_retpoline_r11";
64558d2
+static const char EAXThunkName[]    = "__llvm_retpoline_eax";
64558d2
+static const char ECXThunkName[]    = "__llvm_retpoline_ecx";
64558d2
+static const char EDXThunkName[]    = "__llvm_retpoline_edx";
64558d2
+static const char PushThunkName[]   = "__llvm_retpoline_push";
64558d2
+
64558d2
 namespace {
64558d2
-class X86RetpolineThunks : public ModulePass {
64558d2
+class X86RetpolineThunks : public MachineFunctionPass {
64558d2
 public:
64558d2
   static char ID;
64558d2
 
64558d2
-  X86RetpolineThunks() : ModulePass(ID) {}
64558d2
+  X86RetpolineThunks() : MachineFunctionPass(ID) {}
64558d2
 
64558d2
   StringRef getPassName() const override { return "X86 Retpoline Thunks"; }
64558d2
 
64558d2
-  bool runOnModule(Module &M) override;
64558d2
+  bool doInitialization(Module &M) override;
64558d2
+  bool runOnMachineFunction(MachineFunction &F) override;
64558d2
 
64558d2
   void getAnalysisUsage(AnalysisUsage &AU) const override {
64558d2
+    MachineFunctionPass::getAnalysisUsage(AU);
64558d2
     AU.addRequired<MachineModuleInfo>();
64558d2
     AU.addPreserved<MachineModuleInfo>();
64558d2
   }
64558d2
@@ -61,51 +70,74 @@ private:
64558d2
   const X86Subtarget *STI;
64558d2
   const X86InstrInfo *TII;
64558d2
 
64558d2
-  Function *createThunkFunction(Module &M, StringRef Name);
64558d2
+  bool InsertedThunks;
64558d2
+
64558d2
+  void createThunkFunction(Module &M, StringRef Name);
64558d2
   void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
64558d2
   void insert32BitPushReturnAddrClobber(MachineBasicBlock &MBB);
64558d2
-  void createThunk(Module &M, StringRef NameSuffix,
64558d2
-                   Optional<unsigned> Reg = None);
64558d2
+  void populateThunk(MachineFunction &MF, Optional<unsigned> Reg = None);
64558d2
 };
64558d2
 
64558d2
 } // end anonymous namespace
64558d2
 
64558d2
-ModulePass *llvm::createX86RetpolineThunksPass() {
64558d2
+FunctionPass *llvm::createX86RetpolineThunksPass() {
64558d2
   return new X86RetpolineThunks();
64558d2
 }
64558d2
 
64558d2
 char X86RetpolineThunks::ID = 0;
64558d2
 
64558d2
-bool X86RetpolineThunks::runOnModule(Module &M) {
64558d2
-  DEBUG(dbgs() << getPassName() << '\n');
64558d2
+bool X86RetpolineThunks::doInitialization(Module &M) {
64558d2
+  InsertedThunks = false;
64558d2
+  return false;
64558d2
+}
64558d2
 
64558d2
-  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
64558d2
-  assert(TPC && "X86-specific target pass should not be run without a target "
64558d2
-                "pass config!");
64558d2
+bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
64558d2
+  DEBUG(dbgs() << getPassName() << '\n');
64558d2
 
64558d2
-  MMI = &getAnalysis<MachineModuleInfo>();
64558d2
-  TM = &TPC->getTM<TargetMachine>();
64558d2
+  TM = &MF.getTarget();;
64558d2
+  STI = &MF.getSubtarget<X86Subtarget>();
64558d2
+  TII = STI->getInstrInfo();
64558d2
   Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64;
64558d2
 
64558d2
-  // Only add a thunk if we have at least one function that has the retpoline
64558d2
-  // feature enabled in its subtarget.
64558d2
-  // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
64558d2
-  // nothing will end up calling it.
64558d2
-  // FIXME: It's a little silly to look at every function just to enumerate
64558d2
-  // the subtargets, but eventually we'll want to look at them for indirect
64558d2
-  // calls, so maybe this is OK.
64558d2
-  if (!llvm::any_of(M, [&](const Function &F) {
64558d2
-        // Save the subtarget we find for use in emitting the subsequent
64558d2
-        // thunk.
64558d2
-        STI = &TM->getSubtarget<X86Subtarget>(F);
64558d2
-        return STI->useRetpoline() && !STI->useRetpolineExternalThunk();
64558d2
-      }))
64558d2
-    return false;
64558d2
-
64558d2
-  // If we have a relevant subtarget, get the instr info as well.
64558d2
-  TII = STI->getInstrInfo();
64558d2
+  MMI = &getAnalysis<MachineModuleInfo>();
64558d2
+  Module &M = const_cast<Module &>(*MMI->getModule());
64558d2
+
64558d2
+  // If this function is not a thunk, check to see if we need to insert
64558d2
+  // a thunk.
64558d2
+  if (!MF.getName().startswith(ThunkNamePrefix)) {
64558d2
+    // If we've already inserted a thunk, nothing else to do.
64558d2
+    if (InsertedThunks)
64558d2
+      return false;
64558d2
+
64558d2
+    // Only add a thunk if one of the functions has the retpoline feature
64558d2
+    // enabled in its subtarget, and doesn't enable external thunks.
64558d2
+    // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
64558d2
+    // nothing will end up calling it.
64558d2
+    // FIXME: It's a little silly to look at every function just to enumerate
64558d2
+    // the subtargets, but eventually we'll want to look at them for indirect
64558d2
+    // calls, so maybe this is OK.
64558d2
+    if (!STI->useRetpoline() || STI->useRetpolineExternalThunk())
64558d2
+      return false;
64558d2
+
64558d2
+    // Otherwise, we need to insert the thunk.
64558d2
+    // WARNING: This is not really a well behaving thing to do in a function
64558d2
+    // pass. We extract the module and insert a new function (and machine
64558d2
+    // function) directly into the module.
64558d2
+    if (Is64Bit)
64558d2
+      createThunkFunction(M, R11ThunkName);
64558d2
+    else
64558d2
+      for (StringRef Name :
64558d2
+           {EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName})
64558d2
+        createThunkFunction(M, Name);
64558d2
+    InsertedThunks = true;
64558d2
+    return true;
64558d2
+  }
64558d2
 
64558d2
+  // If this *is* a thunk function, we need to populate it with the correct MI.
64558d2
   if (Is64Bit) {
64558d2
+    assert(MF.getName() == "__llvm_retpoline_r11" &&
64558d2
+           "Should only have an r11 thunk on 64-bit targets");
64558d2
+
64558d2
     // __llvm_retpoline_r11:
64558d2
     //   callq .Lr11_call_target
64558d2
     // .Lr11_capture_spec:
64558d2
@@ -116,8 +148,7 @@ bool X86RetpolineThunks::runOnModule(Module &M) {
64558d2
     // .Lr11_call_target:
64558d2
     //   movq %r11, (%rsp)
64558d2
     //   retq
64558d2
-
64558d2
-    createThunk(M, "r11", X86::R11);
64558d2
+    populateThunk(MF, X86::R11);
64558d2
   } else {
64558d2
     // For 32-bit targets we need to emit a collection of thunks for various
64558d2
     // possible scratch registers as well as a fallback that is used when
64558d2
@@ -161,16 +192,25 @@ bool X86RetpolineThunks::runOnModule(Module &M) {
64558d2
     //         popl 8(%esp)   # Pop RA to final RA
64558d2
     //         popl (%esp)    # Pop callee to next top of stack
64558d2
     //         retl           # Ret to callee
64558d2
-    createThunk(M, "eax", X86::EAX);
64558d2
-    createThunk(M, "ecx", X86::ECX);
64558d2
-    createThunk(M, "edx", X86::EDX);
64558d2
-    createThunk(M, "push");
64558d2
+    if (MF.getName() == EAXThunkName)
64558d2
+      populateThunk(MF, X86::EAX);
64558d2
+    else if (MF.getName() == ECXThunkName)
64558d2
+      populateThunk(MF, X86::ECX);
64558d2
+    else if (MF.getName() == EDXThunkName)
64558d2
+      populateThunk(MF, X86::EDX);
64558d2
+    else if (MF.getName() == PushThunkName)
64558d2
+      populateThunk(MF);
64558d2
+    else
64558d2
+      llvm_unreachable("Invalid thunk name on x86-32!");
64558d2
   }
64558d2
 
64558d2
   return true;
64558d2
 }
64558d2
 
64558d2
-Function *X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
64558d2
+void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
64558d2
+  assert(Name.startswith(ThunkNamePrefix) &&
64558d2
+         "Created a thunk with an unexpected prefix!");
64558d2
+
64558d2
   LLVMContext &Ctx = M.getContext();
64558d2
   auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
64558d2
   Function *F =
64558d2
@@ -190,7 +230,6 @@ Function *X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
64558d2
   IRBuilder<> Builder(Entry);
64558d2
 
64558d2
   Builder.CreateRetVoid();
64558d2
-  return F;
64558d2
 }
64558d2
 
64558d2
 void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
64558d2
@@ -200,6 +239,7 @@ void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
64558d2
   addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0)
64558d2
       .addReg(Reg);
64558d2
 }
64558d2
+
64558d2
 void X86RetpolineThunks::insert32BitPushReturnAddrClobber(
64558d2
     MachineBasicBlock &MBB) {
64558d2
   // The instruction sequence we use to replace the return address without
64558d2
@@ -225,21 +265,16 @@ void X86RetpolineThunks::insert32BitPushReturnAddrClobber(
64558d2
                false, 0);
64558d2
 }
64558d2
 
64558d2
-void X86RetpolineThunks::createThunk(Module &M, StringRef NameSuffix,
64558d2
-                                     Optional<unsigned> Reg) {
64558d2
-  Function &F =
64558d2
-      *createThunkFunction(M, (Twine("__llvm_retpoline_") + NameSuffix).str());
64558d2
-  MachineFunction &MF = MMI->getOrCreateMachineFunction(F);
64558d2
-
64558d2
+void X86RetpolineThunks::populateThunk(MachineFunction &MF,
64558d2
+                                       Optional<unsigned> Reg) {
64558d2
   // Set MF properties. We never use vregs...
64558d2
   MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
64558d2
 
64558d2
-  BasicBlock &OrigEntryBB = F.getEntryBlock();
64558d2
-  MachineBasicBlock *Entry = MF.CreateMachineBasicBlock(&OrigEntryBB);
64558d2
-  MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(&OrigEntryBB);
64558d2
-  MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(&OrigEntryBB);
64558d2
+  MachineBasicBlock *Entry = &MF.front();
64558d2
+  Entry->clear();
64558d2
 
64558d2
-  MF.push_back(Entry);
64558d2
+  MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
64558d2
+  MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
64558d2
   MF.push_back(CaptureSpec);
64558d2
   MF.push_back(CallTarget);
64558d2
 
64558d2
diff --git a/test/CodeGen/X86/O0-pipeline.ll b/test/CodeGen/X86/O0-pipeline.ll
64558d2
index f9bd66f..123dcf6 100644
64558d2
--- a/test/CodeGen/X86/O0-pipeline.ll
64558d2
+++ b/test/CodeGen/X86/O0-pipeline.ll
64558d2
@@ -56,8 +56,7 @@
64558d2
 ; CHECK-NEXT:       Machine Natural Loop Construction
64558d2
 ; CHECK-NEXT:       Insert XRay ops
64558d2
 ; CHECK-NEXT:       Implement the 'patchable-function' attribute
64558d2
-; CHECK-NEXT:     X86 Retpoline Thunks
64558d2
-; CHECK-NEXT:     FunctionPass Manager
64558d2
+; CHECK-NEXT:       X86 Retpoline Thunks
64558d2
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
64558d2
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
64558d2
 ; CHECK-NEXT:       MachineDominator Tree Construction
64558d2
-- 
64558d2
1.8.3.1
64558d2