40733c
From 6e361d895dda4600a85e01c72ff219474b5c7190 Mon Sep 17 00:00:00 2001
40733c
From: Kavon Farvardin <kavon@farvard.in>
40733c
Date: Thu, 4 Oct 2018 13:44:55 -0400
40733c
Subject: [PATCH] Multiple fixes / improvements for LLVM backend
40733c
40733c
- Fix for #13904 -- stop "trashing" callee-saved registers, since it is
40733c
  not actually doing anything useful.
40733c
40733c
- Fix for #14251 -- fixes the calling convention for functions passing
40733c
  raw SSE-register values by adding padding as needed to get the values
40733c
  in the right registers. This problem cropped up when some args were
40733c
  unused an dropped from the live list.
40733c
40733c
- Fixed a typo in 'readnone' attribute
40733c
40733c
- Added 'lower-expect' pass to level 0 LLVM optimization passes to
40733c
  improve block layout in LLVM for stack checks, etc.
40733c
40733c
Test Plan: `make test WAYS=optllvm` and `make test WAYS=llvm`
40733c
40733c
Reviewers: bgamari, simonmar, angerman
40733c
40733c
Reviewed By: angerman
40733c
40733c
Subscribers: rwbarton, carter
40733c
40733c
GHC Trac Issues: #13904, #14251
40733c
40733c
Differential Revision: https://phabricator.haskell.org/D5190
40733c
40733c
(cherry picked from commit adcb5fb47c0942671d409b940d8884daa9359ca4)
40733c
---
40733c
 compiler/llvmGen/Llvm/Types.hs           |  2 +-
40733c
 compiler/llvmGen/LlvmCodeGen/Base.hs     | 62 ++++++++++++++++++++----
40733c
 compiler/llvmGen/LlvmCodeGen/CodeGen.hs  | 59 +++++-----------------
40733c
 compiler/main/DriverPipeline.hs          |  2 +-
40733c
 testsuite/tests/codeGen/should_run/all.T |  4 +-
40733c
 5 files changed, 67 insertions(+), 62 deletions(-)
40733c
40733c
diff --git a/compiler/llvmGen/Llvm/Types.hs b/compiler/llvmGen/Llvm/Types.hs
40733c
index 87111499fc0..c1c51afcf0f 100644
40733c
--- a/compiler/llvmGen/Llvm/Types.hs
40733c
+++ b/compiler/llvmGen/Llvm/Types.hs
40733c
@@ -560,7 +560,7 @@ instance Outputable LlvmFuncAttr where
40733c
   ppr OptSize            = text "optsize"
40733c
   ppr NoReturn           = text "noreturn"
40733c
   ppr NoUnwind           = text "nounwind"
40733c
-  ppr ReadNone           = text "readnon"
40733c
+  ppr ReadNone           = text "readnone"
40733c
   ppr ReadOnly           = text "readonly"
40733c
   ppr Ssp                = text "ssp"
40733c
   ppr SspReq             = text "ssqreq"
40733c
diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs
40733c
index 6e20da48c1b..ec91bacc4c8 100644
40733c
--- a/compiler/llvmGen/LlvmCodeGen/Base.hs
40733c
+++ b/compiler/llvmGen/LlvmCodeGen/Base.hs
40733c
@@ -26,7 +26,7 @@ module LlvmCodeGen.Base (
40733c
 
40733c
         cmmToLlvmType, widthToLlvmFloat, widthToLlvmInt, llvmFunTy,
40733c
         llvmFunSig, llvmFunArgs, llvmStdFunAttrs, llvmFunAlign, llvmInfAlign,
40733c
-        llvmPtrBits, tysToParams, llvmFunSection,
40733c
+        llvmPtrBits, tysToParams, llvmFunSection, padLiveArgs, isSSE,
40733c
 
40733c
         strCLabel_llvm, strDisplayName_llvm, strProcedureName_llvm,
40733c
         getGlobalPtr, generateExternDecls,
40733c
@@ -58,6 +58,8 @@ import ErrUtils
40733c
 import qualified Stream
40733c
 
40733c
 import Control.Monad (ap)
40733c
+import Data.List (sort)
40733c
+import Data.Maybe (mapMaybe)
40733c
 
40733c
 -- ----------------------------------------------------------------------------
40733c
 -- * Some Data Types
40733c
@@ -147,16 +149,58 @@ llvmFunSection dflags lbl
40733c
 -- | A Function's arguments
40733c
 llvmFunArgs :: DynFlags -> LiveGlobalRegs -> [LlvmVar]
40733c
 llvmFunArgs dflags live =
40733c
-    map (lmGlobalRegArg dflags) (filter isPassed (activeStgRegs platform))
40733c
+    map (lmGlobalRegArg dflags) (filter isPassed allRegs)
40733c
     where platform = targetPlatform dflags
40733c
-          isLive r = not (isSSE r) || r `elem` alwaysLive || r `elem` live
40733c
+          allRegs = activeStgRegs platform
40733c
+          paddedLive = map (\(_,r) -> r) $ padLiveArgs live
40733c
+          isLive r = r `elem` alwaysLive || r `elem` paddedLive
40733c
           isPassed r = not (isSSE r) || isLive r
40733c
-          isSSE (FloatReg _)  = True
40733c
-          isSSE (DoubleReg _) = True
40733c
-          isSSE (XmmReg _)    = True
40733c
-          isSSE (YmmReg _)    = True
40733c
-          isSSE (ZmmReg _)    = True
40733c
-          isSSE _             = False
40733c
+
40733c
+
40733c
+isSSE :: GlobalReg -> Bool
40733c
+isSSE (FloatReg _)  = True
40733c
+isSSE (DoubleReg _) = True
40733c
+isSSE (XmmReg _)    = True
40733c
+isSSE (YmmReg _)    = True
40733c
+isSSE (ZmmReg _)    = True
40733c
+isSSE _             = False
40733c
+
40733c
+sseRegNum :: GlobalReg -> Maybe Int
40733c
+sseRegNum (FloatReg i)  = Just i
40733c
+sseRegNum (DoubleReg i) = Just i
40733c
+sseRegNum (XmmReg i)    = Just i
40733c
+sseRegNum (YmmReg i)    = Just i
40733c
+sseRegNum (ZmmReg i)    = Just i
40733c
+sseRegNum _             = Nothing
40733c
+
40733c
+-- the bool indicates whether the global reg was added as padding.
40733c
+-- the returned list is not sorted in any particular order,
40733c
+-- but does indicate the set of live registers needed, with SSE padding.
40733c
+padLiveArgs :: LiveGlobalRegs -> [(Bool, GlobalReg)]
40733c
+padLiveArgs live = allRegs
40733c
+    where
40733c
+        sseRegNums = sort $ mapMaybe sseRegNum live
40733c
+        (_, padding) = foldl assignSlots (1, []) $ sseRegNums
40733c
+        allRegs = padding ++ map (\r -> (False, r)) live
40733c
+
40733c
+        assignSlots (i, acc) regNum
40733c
+            | i == regNum = -- don't need padding here
40733c
+                  (i+1, acc)
40733c
+            | i < regNum = let -- add padding for slots i .. regNum-1
40733c
+                  numNeeded = regNum-i
40733c
+                  acc' = genPad i numNeeded ++ acc
40733c
+                in
40733c
+                  (regNum+1, acc')
40733c
+            | otherwise = error "padLiveArgs -- i > regNum ??"
40733c
+
40733c
+        genPad start n =
40733c
+            take n $ flip map (iterate (+1) start) (\i ->
40733c
+                (True, FloatReg i))
40733c
+                -- NOTE: Picking float should be fine for the following reasons:
40733c
+                -- (1) Float aliases with all the other SSE register types on
40733c
+                -- the given platform.
40733c
+                -- (2) The argument is not live anyways.
40733c
+
40733c
 
40733c
 -- | Llvm standard fun attributes
40733c
 llvmStdFunAttrs :: [LlvmFuncAttr]
40733c
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
40733c
index e812dd445f1..a7121b7909a 100644
40733c
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
40733c
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
40733c
@@ -14,7 +14,7 @@ import LlvmCodeGen.Base
40733c
 import LlvmCodeGen.Regs
40733c
 
40733c
 import BlockId
40733c
-import CodeGen.Platform ( activeStgRegs, callerSaves )
40733c
+import CodeGen.Platform ( activeStgRegs )
40733c
 import CLabel
40733c
 import Cmm
40733c
 import PprCmm
40733c
@@ -211,7 +211,6 @@ genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args
40733c
     fptr    <- liftExprData $ getFunPtr funTy t
40733c
     argVars' <- castVarsW Signed $ zip argVars argTy
40733c
 
40733c
-    doTrashStmts
40733c
     let argSuffix = [mkIntLit i32 0, mkIntLit i32 localityInt, mkIntLit i32 1]
40733c
     statement $ Expr $ Call StdCall fptr (argVars' ++ argSuffix) []
40733c
   | otherwise = panic $ "prefetch locality level integer must be between 0 and 3, given: " ++ (show localityInt)
40733c
@@ -294,7 +293,6 @@ genCall t@(PrimTarget op) [] args
40733c
     fptr          <- getFunPtrW funTy t
40733c
     argVars' <- castVarsW Signed $ zip argVars argTy
40733c
 
40733c
-    doTrashStmts
40733c
     let alignVal = mkIntLit i32 align
40733c
         arguments = argVars' ++ (alignVal:isVolVal)
40733c
     statement $ Expr $ Call StdCall fptr arguments []
40733c
@@ -446,7 +444,6 @@ genCall target res args = runStmtsDecls $ do
40733c
                  | never_returns     = statement $ Unreachable
40733c
                  | otherwise         = return ()
40733c
 
40733c
-    doTrashStmts
40733c
 
40733c
     -- make the actual call
40733c
     case retTy of
40733c
@@ -1759,12 +1756,9 @@ genLit _ CmmHighStackMark
40733c
 funPrologue :: LiveGlobalRegs -> [CmmBlock] -> LlvmM StmtData
40733c
 funPrologue live cmmBlocks = do
40733c
 
40733c
-  trash <- getTrashRegs
40733c
   let getAssignedRegs :: CmmNode O O -> [CmmReg]
40733c
       getAssignedRegs (CmmAssign reg _)  = [reg]
40733c
-      -- Calls will trash all registers. Unfortunately, this needs them to
40733c
-      -- be stack-allocated in the first place.
40733c
-      getAssignedRegs (CmmUnsafeForeignCall _ rs _) = map CmmGlobal trash ++ map CmmLocal rs
40733c
+      getAssignedRegs (CmmUnsafeForeignCall _ rs _) = map CmmLocal rs
40733c
       getAssignedRegs _                  = []
40733c
       getRegsBlock (_, body, _)          = concatMap getAssignedRegs $ blockToList body
40733c
       assignedRegs = nub $ concatMap (getRegsBlock . blockSplit) cmmBlocks
40733c
@@ -1794,14 +1788,9 @@ funPrologue live cmmBlocks = do
40733c
 funEpilogue :: LiveGlobalRegs -> LlvmM ([LlvmVar], LlvmStatements)
40733c
 funEpilogue live = do
40733c
 
40733c
-    -- Have information and liveness optimisation is enabled?
40733c
-    let liveRegs = alwaysLive ++ live
40733c
-        isSSE (FloatReg _)  = True
40733c
-        isSSE (DoubleReg _) = True
40733c
-        isSSE (XmmReg _)    = True
40733c
-        isSSE (YmmReg _)    = True
40733c
-        isSSE (ZmmReg _)    = True
40733c
-        isSSE _             = False
40733c
+    -- the bool indicates whether the register is padding.
40733c
+    let alwaysNeeded = map (\r -> (False, r)) alwaysLive
40733c
+        livePadded = alwaysNeeded ++ padLiveArgs live
40733c
 
40733c
     -- Set to value or "undef" depending on whether the register is
40733c
     -- actually live
40733c
@@ -1813,39 +1802,17 @@ funEpilogue live = do
40733c
           let ty = (pLower . getVarType $ lmGlobalRegVar dflags r)
40733c
           return (Just $ LMLitVar $ LMUndefLit ty, nilOL)
40733c
     platform <- getDynFlag targetPlatform
40733c
-    loads <- flip mapM (activeStgRegs platform) $ \r -> case () of
40733c
-      _ | r `elem` liveRegs  -> loadExpr r
40733c
-        | not (isSSE r)      -> loadUndef r
40733c
+    let allRegs = activeStgRegs platform
40733c
+    loads <- flip mapM allRegs $ \r -> case () of
40733c
+      _ | (False, r) `elem` livePadded
40733c
+                             -> loadExpr r   -- if r is not padding, load it
40733c
+        | not (isSSE r) || (True, r) `elem` livePadded
40733c
+                             -> loadUndef r
40733c
         | otherwise          -> return (Nothing, nilOL)
40733c
 
40733c
     let (vars, stmts) = unzip loads
40733c
     return (catMaybes vars, concatOL stmts)
40733c
 
40733c
-
40733c
--- | A series of statements to trash all the STG registers.
40733c
---
40733c
--- In LLVM we pass the STG registers around everywhere in function calls.
40733c
--- So this means LLVM considers them live across the entire function, when
40733c
--- in reality they usually aren't. For Caller save registers across C calls
40733c
--- the saving and restoring of them is done by the Cmm code generator,
40733c
--- using Cmm local vars. So to stop LLVM saving them as well (and saving
40733c
--- all of them since it thinks they're always live, we trash them just
40733c
--- before the call by assigning the 'undef' value to them. The ones we
40733c
--- need are restored from the Cmm local var and the ones we don't need
40733c
--- are fine to be trashed.
40733c
-getTrashStmts :: LlvmM LlvmStatements
40733c
-getTrashStmts = do
40733c
-  regs <- getTrashRegs
40733c
-  stmts <- flip mapM regs $ \ r -> do
40733c
-    reg <- getCmmReg (CmmGlobal r)
40733c
-    let ty = (pLower . getVarType) reg
40733c
-    return $ Store (LMLitVar $ LMUndefLit ty) reg
40733c
-  return $ toOL stmts
40733c
-
40733c
-getTrashRegs :: LlvmM [GlobalReg]
40733c
-getTrashRegs = do plat <- getLlvmPlatform
40733c
-                  return $ filter (callerSaves plat) (activeStgRegs plat)
40733c
-
40733c
 -- | Get a function pointer to the CLabel specified.
40733c
 --
40733c
 -- This is for Haskell functions, function type is assumed, so doesn't work
40733c
@@ -1967,7 +1934,3 @@ getCmmRegW = lift . getCmmReg
40733c
 genLoadW :: Atomic -> CmmExpr -> CmmType -> WriterT LlvmAccum LlvmM LlvmVar
40733c
 genLoadW atomic e ty = liftExprData $ genLoad atomic e ty
40733c
 
40733c
-doTrashStmts :: WriterT LlvmAccum LlvmM ()
40733c
-doTrashStmts = do
40733c
-    stmts <- lift getTrashStmts
40733c
-    tell $ LlvmAccum stmts mempty
40733c
diff --git a/compiler/main/DriverPipeline.hs b/compiler/main/DriverPipeline.hs
40733c
index 86dd913461c..f4d5e7f553c 100644
40733c
--- a/compiler/main/DriverPipeline.hs
40733c
+++ b/compiler/main/DriverPipeline.hs
40733c
@@ -1465,7 +1465,7 @@ runPhase (RealPhase LlvmOpt) input_fn dflags
40733c
         -- we always (unless -optlo specified) run Opt since we rely on it to
40733c
         -- fix up some pretty big deficiencies in the code we generate
40733c
         llvmOpts = case optLevel dflags of
40733c
-          0 -> "-mem2reg -globalopt"
40733c
+          0 -> "-mem2reg -globalopt -lower-expect"
40733c
           1 -> "-O1 -globalopt"
40733c
           _ -> "-O2"
40733c