From 7a93d34863cf1492988172c3040b5e7a7c384907 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Aug 10 2018 15:32:39 +0000 Subject: 7.0.0-rc1 Release - Reduce the number of enabled targets based on the architecture. - Drop s390 detection patch, LLVM does not support s390 codegen. --- diff --git a/0001-Export-LLVM_DYLIB_COMPONENTS-in-LLVMConfig.cmake.patch b/0001-Export-LLVM_DYLIB_COMPONENTS-in-LLVMConfig.cmake.patch deleted file mode 100644 index c5c6c63..0000000 --- a/0001-Export-LLVM_DYLIB_COMPONENTS-in-LLVMConfig.cmake.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 4d613a84ce271c6225068bef67d727ae02b2e3b1 Mon Sep 17 00:00:00 2001 -From: Pavel Labath -Date: Wed, 14 Mar 2018 09:28:38 +0000 -Subject: [PATCH] Export LLVM_DYLIB_COMPONENTS in LLVMConfig.cmake - -Summary: -This is needed so that external projects (e.g. a standalone build of -lldb) can link to the LLVM shared library via the USE_SHARED argument of -llvm_config. Without this, llvm_config would add LLVM to the link list, -but then also add the constituent static libraries, resulting in -multiply defined symbols. - -Reviewers: beanz, mgorny - -Subscribers: llvm-commits - -Differential Revision: https://reviews.llvm.org/D44391 - -git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@327484 91177308-0d34-0410-b5e6-96231b3b80d8 ---- - cmake/modules/LLVMConfig.cmake.in | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/cmake/modules/LLVMConfig.cmake.in b/cmake/modules/LLVMConfig.cmake.in -index fe4df52..e700186 100644 ---- a/cmake/modules/LLVMConfig.cmake.in -+++ b/cmake/modules/LLVMConfig.cmake.in -@@ -13,6 +13,8 @@ set(LLVM_COMMON_DEPENDS @LLVM_COMMON_DEPENDS@) - - set(LLVM_AVAILABLE_LIBS @LLVM_AVAILABLE_LIBS@) - -+set(LLVM_DYLIB_COMPONENTS @LLVM_DYLIB_COMPONENTS@) -+ - set(LLVM_ALL_TARGETS @LLVM_ALL_TARGETS@) - - set(LLVM_TARGETS_TO_BUILD @LLVM_TARGETS_TO_BUILD@) --- -1.8.3.1 - diff --git a/0001-Filter-out-cxxflags-not-supported-by-clang.patch b/0001-Filter-out-cxxflags-not-supported-by-clang.patch index 5511315..31b819b 100644 --- a/0001-Filter-out-cxxflags-not-supported-by-clang.patch +++ b/0001-Filter-out-cxxflags-not-supported-by-clang.patch @@ -1,4 +1,4 @@ -From acdb4ab5aa8076469fa551cc79e6bc7bbe8c06a5 Mon Sep 17 00:00:00 2001 +From 5f7fd92155db77c7608e3a07e5dcfad1ec7bd4e4 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Mar 2018 07:52:33 -0700 Subject: [PATCH] Filter out cxxflags not supported by clang @@ -8,10 +8,10 @@ Subject: [PATCH] Filter out cxxflags not supported by clang 1 file changed, 4 insertions(+) diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt -index 25f99ce..922d4c5 100644 +index a0bd36c..4193b0e 100644 --- a/tools/llvm-config/CMakeLists.txt +++ b/tools/llvm-config/CMakeLists.txt -@@ -33,7 +33,11 @@ set(LLVM_SRC_ROOT ${LLVM_MAIN_SRC_DIR}) +@@ -34,7 +34,11 @@ set(LLVM_SRC_ROOT ${LLVM_MAIN_SRC_DIR}) set(LLVM_OBJ_ROOT ${LLVM_BINARY_DIR}) set(LLVM_CPPFLAGS "${CMAKE_CPP_FLAGS} ${CMAKE_CPP_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}") set(LLVM_CFLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}") @@ -22,7 +22,7 @@ index 25f99ce..922d4c5 100644 +STRING(REGEX REPLACE "-fcf-protection" "" LLVM_CXXFLAGS ${LLVM_CXXFLAGS}) set(LLVM_BUILD_SYSTEM cmake) set(LLVM_HAS_RTTI ${LLVM_CONFIG_HAS_RTTI}) - set(LLVM_DYLIB_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX}") + set(LLVM_DYLIB_VERSION "${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX}") -- 1.8.3.1 diff --git a/0001-PowerPC-Do-not-round-values-prior-to-converting-to-i.patch b/0001-PowerPC-Do-not-round-values-prior-to-converting-to-i.patch deleted file mode 100644 index 4c97cc6..0000000 --- a/0001-PowerPC-Do-not-round-values-prior-to-converting-to-i.patch +++ /dev/null @@ -1,919 +0,0 @@ -From 88ad713b81c2f51dd8405b251f9825b0bca6e57d Mon Sep 17 00:00:00 2001 -From: Nemanja Ivanovic -Date: Thu, 2 Aug 2018 00:03:22 +0000 -Subject: [PATCH] [PowerPC] Do not round values prior to converting to integer - -Adding the FP_ROUND nodes when combining FP_TO_[SU]INT of elements -feeding a BUILD_VECTOR into an FP_TO_[SU]INT of the built vector -loses precision. This patch removes the code that adds these nodes -to true f64 operands. It also adds patterns required to ensure -the code is still vectorized rather than converting individual -elements and inserting into a vector. - -Fixes https://bugs.llvm.org/show_bug.cgi?id=38342 - -Differential Revision: https://reviews.llvm.org/D50121 - -git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338658 91177308-0d34-0410-b5e6-96231b3b80d8 ---- - lib/Target/PowerPC/PPCISelLowering.cpp | 22 +- - lib/Target/PowerPC/PPCInstrVSX.td | 86 +++++++ - test/CodeGen/PowerPC/build-vector-tests.ll | 357 +++++++++++++---------------- - 3 files changed, 258 insertions(+), 207 deletions(-) - -diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp -index f622b05..527ec5a 100644 ---- a/lib/Target/PowerPC/PPCISelLowering.cpp -+++ b/lib/Target/PowerPC/PPCISelLowering.cpp -@@ -11560,6 +11560,14 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, - ShiftCst); - } - -+// Is this an extending load from an f32 to an f64? -+static bool isFPExtLoad(SDValue Op) { -+ if (LoadSDNode *LD = dyn_cast(Op.getNode())) -+ return LD->getExtensionType() == ISD::EXTLOAD && -+ Op.getValueType() == MVT::f64; -+ return false; -+} -+ - /// \brief Reduces the number of fp-to-int conversion when building a vector. - /// - /// If this vector is built out of floating to integer conversions, -@@ -11594,11 +11602,18 @@ combineElementTruncationToVectorTruncation(SDNode *N, - SmallVector Ops; - EVT TargetVT = N->getValueType(0); - for (int i = 0, e = N->getNumOperands(); i < e; ++i) { -- if (N->getOperand(i).getOpcode() != PPCISD::MFVSR) -+ SDValue NextOp = N->getOperand(i); -+ if (NextOp.getOpcode() != PPCISD::MFVSR) - return SDValue(); -- unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode(); -+ unsigned NextConversion = NextOp.getOperand(0).getOpcode(); - if (NextConversion != FirstConversion) - return SDValue(); -+ // If we are converting to 32-bit integers, we need to add an FP_ROUND. -+ // This is not valid if the input was originally double precision. It is -+ // also not profitable to do unless this is an extending load in which -+ // case doing this combine will allow us to combine consecutive loads. -+ if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0))) -+ return SDValue(); - if (N->getOperand(i) != FirstInput) - IsSplat = false; - } -@@ -11612,8 +11627,9 @@ combineElementTruncationToVectorTruncation(SDNode *N, - // Now that we know we have the right type of node, get its operands - for (int i = 0, e = N->getNumOperands(); i < e; ++i) { - SDValue In = N->getOperand(i).getOperand(0); -- // For 32-bit values, we need to add an FP_ROUND node. - if (Is32Bit) { -+ // For 32-bit values, we need to add an FP_ROUND node (if we made it -+ // here, we know that all inputs are extending loads so this is safe). - if (In.isUndef()) - Ops.push_back(DAG.getUNDEF(SrcVT)); - else { -diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td -index 6f71978..1f48473 100644 ---- a/lib/Target/PowerPC/PPCInstrVSX.td -+++ b/lib/Target/PowerPC/PPCInstrVSX.td -@@ -3100,6 +3100,17 @@ def DblToFlt { - dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); - } - -+def ExtDbl { -+ dag A0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 0)))))); -+ dag A1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 1)))))); -+ dag B0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 0)))))); -+ dag B1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 1)))))); -+ dag A0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 0)))))); -+ dag A1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 1)))))); -+ dag B0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 0)))))); -+ dag B1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 1)))))); -+} -+ - def ByteToWord { - dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); - dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); -@@ -3177,9 +3188,15 @@ def FltToULong { - } - def DblToInt { - dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A)))); -+ dag B = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$B)))); -+ dag C = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$C)))); -+ dag D = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$D)))); - } - def DblToUInt { - dag A = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$A)))); -+ dag B = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$B)))); -+ dag C = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$C)))); -+ dag D = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$D)))); - } - def DblToLong { - dag A = (i64 (PPCmfvsr (f64 (PPCfctidz f64:$A)))); -@@ -3218,6 +3235,47 @@ def MrgFP { - dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3)); - } - -+// Word-element merge dags - conversions from f64 to i32 merged into vectors. -+def MrgWords { -+ // For big endian, we merge low and hi doublewords (A, B). -+ dag A0B0 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 0)); -+ dag A1B1 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 3)); -+ dag CVA1B1S = (v4i32 (XVCVDPSXWS A1B1)); -+ dag CVA0B0S = (v4i32 (XVCVDPSXWS A0B0)); -+ dag CVA1B1U = (v4i32 (XVCVDPUXWS A1B1)); -+ dag CVA0B0U = (v4i32 (XVCVDPUXWS A0B0)); -+ -+ // For little endian, we merge low and hi doublewords (B, A). -+ dag B1A1 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 0)); -+ dag B0A0 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 3)); -+ dag CVB1A1S = (v4i32 (XVCVDPSXWS B1A1)); -+ dag CVB0A0S = (v4i32 (XVCVDPSXWS B0A0)); -+ dag CVB1A1U = (v4i32 (XVCVDPUXWS B1A1)); -+ dag CVB0A0U = (v4i32 (XVCVDPUXWS B0A0)); -+ -+ // For big endian, we merge hi doublewords of (A, C) and (B, D), convert -+ // then merge. -+ dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC), -+ (COPY_TO_REGCLASS f64:$C, VSRC), 0)); -+ dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC), -+ (COPY_TO_REGCLASS f64:$D, VSRC), 0)); -+ dag CVACS = (v4i32 (XVCVDPSXWS AC)); -+ dag CVBDS = (v4i32 (XVCVDPSXWS BD)); -+ dag CVACU = (v4i32 (XVCVDPUXWS AC)); -+ dag CVBDU = (v4i32 (XVCVDPUXWS BD)); -+ -+ // For little endian, we merge hi doublewords of (D, B) and (C, A), convert -+ // then merge. -+ dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC), -+ (COPY_TO_REGCLASS f64:$B, VSRC), 0)); -+ dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC), -+ (COPY_TO_REGCLASS f64:$A, VSRC), 0)); -+ dag CVDBS = (v4i32 (XVCVDPSXWS DB)); -+ dag CVCAS = (v4i32 (XVCVDPSXWS CA)); -+ dag CVDBU = (v4i32 (XVCVDPUXWS DB)); -+ dag CVCAU = (v4i32 (XVCVDPUXWS CA)); -+} -+ - // Patterns for BUILD_VECTOR nodes. - def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">; - let AddedComplexity = 400 in { -@@ -3286,6 +3344,20 @@ let AddedComplexity = 400 in { - def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, - DblToFlt.B0, DblToFlt.B1)), - (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>; -+ -+ // Convert 4 doubles to a vector of ints. -+ def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B, -+ DblToInt.C, DblToInt.D)), -+ (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>; -+ def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B, -+ DblToUInt.C, DblToUInt.D)), -+ (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>; -+ def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S, -+ ExtDbl.B0S, ExtDbl.B1S)), -+ (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>; -+ def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, -+ ExtDbl.B0U, ExtDbl.B1U)), -+ (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>; - } - - let Predicates = [IsLittleEndian, HasVSX] in { -@@ -3300,6 +3372,20 @@ let AddedComplexity = 400 in { - def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, - DblToFlt.B0, DblToFlt.B1)), - (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>; -+ -+ // Convert 4 doubles to a vector of ints. -+ def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B, -+ DblToInt.C, DblToInt.D)), -+ (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>; -+ def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B, -+ DblToUInt.C, DblToUInt.D)), -+ (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>; -+ def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S, -+ ExtDbl.B0S, ExtDbl.B1S)), -+ (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>; -+ def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, -+ ExtDbl.B0U, ExtDbl.B1U)), -+ (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>; - } - - let Predicates = [HasDirectMove] in { -diff --git a/test/CodeGen/PowerPC/build-vector-tests.ll b/test/CodeGen/PowerPC/build-vector-tests.ll -index 16b562b..3785b2a 100644 ---- a/test/CodeGen/PowerPC/build-vector-tests.ll -+++ b/test/CodeGen/PowerPC/build-vector-tests.ll -@@ -119,8 +119,8 @@ - ;vector int spltCnstConvftoi() { // - ; return (vector int) 4.74f; // - ;} // --;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // --;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvdpsxws // -+;// P8: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // -+;// P9: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // - ;vector int fromRegsConvftoi(float a, float b, float c, float d) { // - ; return (vector int) { a, b, c, d }; // - ;} // -@@ -139,15 +139,15 @@ - ;vector int fromDiffMemConsDConvftoi(float *ptr) { // - ; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // - ;} // --;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // --;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // -+;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // -+;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // - ;// Note: if the consecutive loads learns to handle pre-inc, this can be: // - ;// sldi 2, load, xvcvspuxws // - ;vector int fromDiffMemVarAConvftoi(float *arr, int elem) { // - ; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; // - ;} // --;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // --;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // -+;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // -+;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // - ;// Note: if the consecutive loads learns to handle pre-inc, this can be: // - ;// sldi 2, 2 x load, vperm, xvcvspuxws // - ;vector int fromDiffMemVarDConvftoi(float *arr, int elem) { // -@@ -168,8 +168,8 @@ - ;vector int spltCnstConvdtoi() { // - ; return (vector int) 4.74; // - ;} // --;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // --;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // -+;// P8: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // -+;// P9: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // - ;vector int fromRegsConvdtoi(double a, double b, double c, double d) { // - ; return (vector int) { a, b, c, d }; // - ;} // -@@ -178,25 +178,23 @@ - ;vector int fromDiffConstsConvdtoi() { // - ; return (vector int) { 24.46, 234., 988.19, 422.39 }; // - ;} // --;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, // --;// xvcvspsxws // --;// P9: 2 x lxvx, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, // --;// xvcvspsxws // -+;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspsxws, vmrgew // -+;// P9: 2 x lxvx, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspsxws, vmrgew // - ;vector int fromDiffMemConsAConvdtoi(double *ptr) { // - ; return (vector int) { ptr[0], ptr[1], ptr[2], ptr[3] }; // - ;} // --;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // --;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // -+;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // -+;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // - ;vector int fromDiffMemConsDConvdtoi(double *ptr) { // - ; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // - ;} // --;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // --;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // -+;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // -+;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // - ;vector int fromDiffMemVarAConvdtoi(double *arr, int elem) { // - ; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; // - ;} // --;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // --;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // -+;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // -+;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // - ;vector int fromDiffMemVarDConvdtoi(double *arr, int elem) { // - ; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; // - ;} // -@@ -296,8 +294,8 @@ - ;vector unsigned int spltCnstConvftoui() { // - ; return (vector unsigned int) 4.74f; // - ;} // --;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // --;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -+;// P8: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // -+;// P9: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // - ;vector unsigned int fromRegsConvftoui(float a, float b, float c, float d) { // - ; return (vector unsigned int) { a, b, c, d }; // - ;} // -@@ -316,16 +314,16 @@ - ;vector unsigned int fromDiffMemConsDConvftoui(float *ptr) { // - ; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // - ;} // --;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // --;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -+;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // -+;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // - ;// Note: if the consecutive loads learns to handle pre-inc, this can be: // - ;// sldi 2, load, xvcvspuxws // - ;vector unsigned int fromDiffMemVarAConvftoui(float *arr, int elem) { // - ; return (vector unsigned int) { arr[elem], arr[elem+1], // - ; arr[elem+2], arr[elem+3] }; // - ;} // --;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // --;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -+;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // -+;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // - ;// Note: if the consecutive loads learns to handle pre-inc, this can be: // - ;// sldi 2, 2 x load, vperm, xvcvspuxws // - ;vector unsigned int fromDiffMemVarDConvftoui(float *arr, int elem) { // -@@ -347,8 +345,8 @@ - ;vector unsigned int spltCnstConvdtoui() { // - ; return (vector unsigned int) 4.74; // - ;} // --;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // --;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -+;// P8: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // -+;// P9: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // - ;vector unsigned int fromRegsConvdtoui(double a, double b, // - ; double c, double d) { // - ; return (vector unsigned int) { a, b, c, d }; // -@@ -358,25 +356,24 @@ - ;vector unsigned int fromDiffConstsConvdtoui() { // - ; return (vector unsigned int) { 24.46, 234., 988.19, 422.39 }; // - ;} // --;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, // --;// xvcvspuxws // --;// P9: 2 x lxvx, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -+;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspuxws, vmrgew // -+;// P9: 2 x lxvx, xxmrgld, xxmrghd, 2 x xvcvspuxws, vmrgew // - ;vector unsigned int fromDiffMemConsAConvdtoui(double *ptr) { // - ; return (vector unsigned int) { ptr[0], ptr[1], ptr[2], ptr[3] }; // - ;} // --;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // --;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -+;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // -+;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // - ;vector unsigned int fromDiffMemConsDConvdtoui(double *ptr) { // - ; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // - ;} // --;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // --;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -+;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // -+;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // - ;vector unsigned int fromDiffMemVarAConvdtoui(double *arr, int elem) { // - ; return (vector unsigned int) { arr[elem], arr[elem+1], // - ; arr[elem+2], arr[elem+3] }; // - ;} // --;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // --;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -+;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // -+;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // - ;vector unsigned int fromDiffMemVarDConvdtoui(double *arr, int elem) { // - ; return (vector unsigned int) { arr[elem], arr[elem-1], // - ; arr[elem-2], arr[elem-3] }; // -@@ -1253,28 +1250,24 @@ entry: - ; P8LE-LABEL: fromRegsConvftoi - ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 - ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 --; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P9BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P9BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P9BE: vmrgew v2, [[REG3]], [[REG4]] --; P9BE: xvcvspsxws v2, v2 - ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 - ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 --; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P9LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P9LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P9LE: vmrgew v2, [[REG4]], [[REG3]] --; P9LE: xvcvspsxws v2, v2 - ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 - ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 --; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P8BE: vmrgew v2, [[REG3]], [[REG4]] --; P8BE: xvcvspsxws v2, v2 - ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 - ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 --; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P8LE: vmrgew v2, [[REG4]], [[REG3]] --; P8LE: xvcvspsxws v2, v2 - } - - ; Function Attrs: norecurse nounwind readnone -@@ -1529,28 +1522,24 @@ entry: - ; P8LE-LABEL: fromRegsConvdtoi - ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 - ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 --; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P9BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P9BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P9BE: vmrgew v2, [[REG3]], [[REG4]] --; P9BE: xvcvspsxws v2, v2 - ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 - ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 --; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P9LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P9LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P9LE: vmrgew v2, [[REG4]], [[REG3]] --; P9LE: xvcvspsxws v2, v2 - ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 - ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 --; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P8BE: vmrgew v2, [[REG3]], [[REG4]] --; P8BE: xvcvspsxws v2, v2 - ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 - ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 --; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P8LE: vmrgew v2, [[REG4]], [[REG3]] --; P8LE: xvcvspsxws v2, v2 - } - - ; Function Attrs: norecurse nounwind readnone -@@ -1592,36 +1581,32 @@ entry: - ; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) - ; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] - ; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] --; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] --; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] -+; P9BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]] -+; P9BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]] - ; P9BE: vmrgew v2, [[REG6]], [[REG5]] --; P9BE: xvcvspsxws v2, v2 - ; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) - ; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) - ; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] - ; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]] --; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] --; P9LE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] -+; P9LE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]] -+; P9LE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]] - ; P9LE: vmrgew v2, [[REG6]], [[REG5]] --; P9LE: xvcvspsxws v2, v2 - ; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 - ; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 - ; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] - ; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] --; P8BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] --; P8BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] -+; P8BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]] -+; P8BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]] - ; P8BE: vmrgew v2, [[REG6]], [[REG5]] --; P8BE: xvcvspsxws v2, v2 - ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 - ; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 - ; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]] - ; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]] - ; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]] - ; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]] --; P8LE-DAG: xvcvdpsp [[REG7:[vs0-9]+]], [[REG5]] --; P8LE-DAG: xvcvdpsp [[REG8:[vs0-9]+]], [[REG6]] -+; P8LE-DAG: xvcvdpsxws [[REG7:[vs0-9]+]], [[REG5]] -+; P8LE-DAG: xvcvdpsxws [[REG8:[vs0-9]+]], [[REG6]] - ; P8LE: vmrgew v2, [[REG8]], [[REG7]] --; P8LE: xvcvspsxws v2, v2 - } - - ; Function Attrs: norecurse nounwind readonly -@@ -1653,40 +1638,36 @@ entry: - ; P9BE: lfd - ; P9BE: xxmrghd - ; P9BE: xxmrghd --; P9BE: xvcvdpsp --; P9BE: xvcvdpsp --; P9BE: vmrgew --; P9BE: xvcvspsxws v2 -+; P9BE: xvcvdpsxws -+; P9BE: xvcvdpsxws -+; P9BE: vmrgew v2 - ; P9LE: lfd - ; P9LE: lfd - ; P9LE: lfd - ; P9LE: lfd - ; P9LE: xxmrghd - ; P9LE: xxmrghd --; P9LE: xvcvdpsp --; P9LE: xvcvdpsp --; P9LE: vmrgew --; P9LE: xvcvspsxws v2 -+; P9LE: xvcvdpsxws -+; P9LE: xvcvdpsxws -+; P9LE: vmrgew v2 - ; P8BE: lxsdx - ; P8BE: lxsdx - ; P8BE: lxsdx - ; P8BE: lxsdx - ; P8BE: xxmrghd - ; P8BE: xxmrghd --; P8BE: xvcvdpsp --; P8BE: xvcvdpsp --; P8BE: vmrgew --; P8BE: xvcvspsxws v2 -+; P8BE: xvcvdpsxws -+; P8BE: xvcvdpsxws -+; P8BE: vmrgew v2 - ; P8LE: lxsdx - ; P8LE: lxsdx - ; P8LE: lxsdx - ; P8LE: lxsdx - ; P8LE: xxmrghd - ; P8LE: xxmrghd --; P8LE: xvcvdpsp --; P8LE: xvcvdpsp --; P8LE: vmrgew --; P8LE: xvcvspsxws v2 -+; P8LE: xvcvdpsxws -+; P8LE: xvcvdpsxws -+; P8LE: vmrgew v2 - } - - ; Function Attrs: norecurse nounwind readonly -@@ -1726,40 +1707,36 @@ entry: - ; P9BE: lfd - ; P9BE: xxmrghd - ; P9BE: xxmrghd --; P9BE: xvcvdpsp --; P9BE: xvcvdpsp --; P9BE: vmrgew --; P9BE: xvcvspsxws v2 -+; P9BE: xvcvdpsxws -+; P9BE: xvcvdpsxws -+; P9BE: vmrgew v2 - ; P9LE: lfdux - ; P9LE: lfd - ; P9LE: lfd - ; P9LE: lfd - ; P9LE: xxmrghd - ; P9LE: xxmrghd --; P9LE: xvcvdpsp --; P9LE: xvcvdpsp --; P9LE: vmrgew --; P9LE: xvcvspsxws v2 -+; P9LE: xvcvdpsxws -+; P9LE: xvcvdpsxws -+; P9LE: vmrgew v2 - ; P8BE: lfdux - ; P8BE: lxsdx - ; P8BE: lxsdx - ; P8BE: lxsdx - ; P8BE: xxmrghd - ; P8BE: xxmrghd --; P8BE: xvcvdpsp --; P8BE: xvcvdpsp --; P8BE: vmrgew --; P8BE: xvcvspsxws v2 -+; P8BE: xvcvdpsxws -+; P8BE: xvcvdpsxws -+; P8BE: vmrgew v2 - ; P8LE: lfdux - ; P8LE: lxsdx - ; P8LE: lxsdx - ; P8LE: lxsdx - ; P8LE: xxmrghd - ; P8LE: xxmrghd --; P8LE: xvcvdpsp --; P8LE: xvcvdpsp --; P8LE: vmrgew --; P8LE: xvcvspsxws v2 -+; P8LE: xvcvdpsxws -+; P8LE: xvcvdpsxws -+; P8LE: vmrgew v2 - } - - ; Function Attrs: norecurse nounwind readonly -@@ -1799,40 +1776,36 @@ entry: - ; P9BE: lfd - ; P9BE: xxmrghd - ; P9BE: xxmrghd --; P9BE: xvcvdpsp --; P9BE: xvcvdpsp --; P9BE: vmrgew --; P9BE: xvcvspsxws v2 -+; P9BE: xvcvdpsxws -+; P9BE: xvcvdpsxws -+; P9BE: vmrgew v2 - ; P9LE: lfdux - ; P9LE: lfd - ; P9LE: lfd - ; P9LE: lfd - ; P9LE: xxmrghd - ; P9LE: xxmrghd --; P9LE: xvcvdpsp --; P9LE: xvcvdpsp --; P9LE: vmrgew --; P9LE: xvcvspsxws v2 -+; P9LE: xvcvdpsxws -+; P9LE: xvcvdpsxws -+; P9LE: vmrgew v2 - ; P8BE: lfdux - ; P8BE: lxsdx - ; P8BE: lxsdx - ; P8BE: lxsdx - ; P8BE: xxmrghd - ; P8BE: xxmrghd --; P8BE: xvcvdpsp --; P8BE: xvcvdpsp --; P8BE: vmrgew --; P8BE: xvcvspsxws v2 -+; P8BE: xvcvdpsxws -+; P8BE: xvcvdpsxws -+; P8BE: vmrgew v2 - ; P8LE: lfdux - ; P8LE: lxsdx - ; P8LE: lxsdx - ; P8LE: lxsdx - ; P8LE: xxmrghd - ; P8LE: xxmrghd --; P8LE: xvcvdpsp --; P8LE: xvcvdpsp --; P8LE: vmrgew --; P8LE: xvcvspsxws v2 -+; P8LE: xvcvdpsxws -+; P8LE: xvcvdpsxws -+; P8LE: vmrgew v2 - } - - ; Function Attrs: norecurse nounwind readnone -@@ -2413,28 +2386,24 @@ entry: - ; P8LE-LABEL: fromRegsConvftoui - ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 - ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 --; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P9BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P9BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P9BE: vmrgew v2, [[REG3]], [[REG4]] --; P9BE: xvcvspuxws v2, v2 - ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 - ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 --; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P9LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P9LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P9LE: vmrgew v2, [[REG4]], [[REG3]] --; P9LE: xvcvspuxws v2, v2 - ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 - ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 --; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P8BE: vmrgew v2, [[REG3]], [[REG4]] --; P8BE: xvcvspuxws v2, v2 - ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 - ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 --; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P8LE: vmrgew v2, [[REG4]], [[REG3]] --; P8LE: xvcvspuxws v2, v2 - } - - ; Function Attrs: norecurse nounwind readnone -@@ -2689,28 +2658,24 @@ entry: - ; P8LE-LABEL: fromRegsConvdtoui - ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 - ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 --; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P9BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P9BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P9BE: vmrgew v2, [[REG3]], [[REG4]] --; P9BE: xvcvspuxws v2, v2 - ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 - ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 --; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P9LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P9LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P9LE: vmrgew v2, [[REG4]], [[REG3]] --; P9LE: xvcvspuxws v2, v2 - ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 - ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 --; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P8BE: vmrgew v2, [[REG3]], [[REG4]] --; P8BE: xvcvspuxws v2, v2 - ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 - ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 --; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] --; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -+; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -+; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] - ; P8LE: vmrgew v2, [[REG4]], [[REG3]] --; P8LE: xvcvspuxws v2, v2 - } - - ; Function Attrs: norecurse nounwind readnone -@@ -2752,36 +2717,32 @@ entry: - ; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) - ; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] - ; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] --; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] --; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] -+; P9BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]] -+; P9BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]] - ; P9BE: vmrgew v2, [[REG6]], [[REG5]] --; P9BE: xvcvspuxws v2, v2 - ; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) - ; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) --; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] - ; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]] --; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] --; P9LE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] -+; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] -+; P9LE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]] -+; P9LE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]] - ; P9LE: vmrgew v2, [[REG6]], [[REG5]] --; P9LE: xvcvspuxws v2, v2 - ; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 - ; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 - ; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] - ; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] --; P8BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] --; P8BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] -+; P8BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]] -+; P8BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]] - ; P8BE: vmrgew v2, [[REG6]], [[REG5]] --; P8BE: xvcvspuxws v2, v2 - ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 - ; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 - ; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]] - ; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]] - ; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]] - ; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]] --; P8LE-DAG: xvcvdpsp [[REG7:[vs0-9]+]], [[REG5]] --; P8LE-DAG: xvcvdpsp [[REG8:[vs0-9]+]], [[REG6]] -+; P8LE-DAG: xvcvdpuxws [[REG7:[vs0-9]+]], [[REG5]] -+; P8LE-DAG: xvcvdpuxws [[REG8:[vs0-9]+]], [[REG6]] - ; P8LE: vmrgew v2, [[REG8]], [[REG7]] --; P8LE: xvcvspuxws v2, v2 - } - - ; Function Attrs: norecurse nounwind readonly -@@ -2813,40 +2774,36 @@ entry: - ; P9BE: lfd - ; P9BE: xxmrghd - ; P9BE: xxmrghd --; P9BE: xvcvdpsp --; P9BE: xvcvdpsp --; P9BE: vmrgew --; P9BE: xvcvspuxws v2 -+; P9BE: xvcvdpuxws -+; P9BE: xvcvdpuxws -+; P9BE: vmrgew v2 - ; P9LE: lfd - ; P9LE: lfd - ; P9LE: lfd - ; P9LE: lfd - ; P9LE: xxmrghd - ; P9LE: xxmrghd --; P9LE: xvcvdpsp --; P9LE: xvcvdpsp --; P9LE: vmrgew --; P9LE: xvcvspuxws v2 -+; P9LE: xvcvdpuxws -+; P9LE: xvcvdpuxws -+; P9LE: vmrgew v2 - ; P8BE: lxsdx - ; P8BE: lxsdx - ; P8BE: lxsdx - ; P8BE: lxsdx - ; P8BE: xxmrghd - ; P8BE: xxmrghd --; P8BE: xvcvdpsp --; P8BE: xvcvdpsp --; P8BE: vmrgew --; P8BE: xvcvspuxws v2 -+; P8BE: xvcvdpuxws -+; P8BE: xvcvdpuxws -+; P8BE: vmrgew v2 - ; P8LE: lxsdx - ; P8LE: lxsdx - ; P8LE: lxsdx - ; P8LE: lxsdx - ; P8LE: xxmrghd - ; P8LE: xxmrghd --; P8LE: xvcvdpsp --; P8LE: xvcvdpsp --; P8LE: vmrgew --; P8LE: xvcvspuxws v2 -+; P8LE: xvcvdpuxws -+; P8LE: xvcvdpuxws -+; P8LE: vmrgew v2 - } - - ; Function Attrs: norecurse nounwind readonly -@@ -2886,40 +2843,36 @@ entry: - ; P9BE: lfd - ; P9BE: xxmrghd - ; P9BE: xxmrghd --; P9BE: xvcvdpsp --; P9BE: xvcvdpsp --; P9BE: vmrgew --; P9BE: xvcvspuxws v2 -+; P9BE: xvcvdpuxws -+; P9BE: xvcvdpuxws -+; P9BE: vmrgew v2 - ; P9LE: lfdux - ; P9LE: lfd - ; P9LE: lfd - ; P9LE: lfd - ; P9LE: xxmrghd - ; P9LE: xxmrghd --; P9LE: xvcvdpsp --; P9LE: xvcvdpsp --; P9LE: vmrgew --; P9LE: xvcvspuxws v2 -+; P9LE: xvcvdpuxws -+; P9LE: xvcvdpuxws -+; P9LE: vmrgew v2 - ; P8BE: lfdux - ; P8BE: lxsdx - ; P8BE: lxsdx - ; P8BE: lxsdx - ; P8BE: xxmrghd - ; P8BE: xxmrghd --; P8BE: xvcvdpsp --; P8BE: xvcvdpsp --; P8BE: vmrgew --; P8BE: xvcvspuxws v2 -+; P8BE: xvcvdpuxws -+; P8BE: xvcvdpuxws -+; P8BE: vmrgew v2 - ; P8LE: lfdux - ; P8LE: lxsdx - ; P8LE: lxsdx - ; P8LE: lxsdx - ; P8LE: xxmrghd - ; P8LE: xxmrghd --; P8LE: xvcvdpsp --; P8LE: xvcvdpsp --; P8LE: vmrgew --; P8LE: xvcvspuxws v2 -+; P8LE: xvcvdpuxws -+; P8LE: xvcvdpuxws -+; P8LE: vmrgew v2 - } - - ; Function Attrs: norecurse nounwind readonly -@@ -2959,40 +2912,36 @@ entry: - ; P9BE: lfd - ; P9BE: xxmrghd - ; P9BE: xxmrghd --; P9BE: xvcvdpsp --; P9BE: xvcvdpsp --; P9BE: vmrgew --; P9BE: xvcvspuxws v2 -+; P9BE: xvcvdpuxws -+; P9BE: xvcvdpuxws -+; P9BE: vmrgew v2 - ; P9LE: lfdux - ; P9LE: lfd - ; P9LE: lfd - ; P9LE: lfd - ; P9LE: xxmrghd - ; P9LE: xxmrghd --; P9LE: xvcvdpsp --; P9LE: xvcvdpsp --; P9LE: vmrgew --; P9LE: xvcvspuxws v2 -+; P9LE: xvcvdpuxws -+; P9LE: xvcvdpuxws -+; P9LE: vmrgew v2 - ; P8BE: lfdux - ; P8BE: lxsdx - ; P8BE: lxsdx - ; P8BE: lxsdx - ; P8BE: xxmrghd - ; P8BE: xxmrghd --; P8BE: xvcvdpsp --; P8BE: xvcvdpsp --; P8BE: vmrgew --; P8BE: xvcvspuxws v2 -+; P8BE: xvcvdpuxws -+; P8BE: xvcvdpuxws -+; P8BE: vmrgew v2 - ; P8LE: lfdux - ; P8LE: lxsdx - ; P8LE: lxsdx - ; P8LE: lxsdx - ; P8LE: xxmrghd - ; P8LE: xxmrghd --; P8LE: xvcvdpsp --; P8LE: xvcvdpsp --; P8LE: vmrgew --; P8LE: xvcvspuxws v2 -+; P8LE: xvcvdpuxws -+; P8LE: xvcvdpuxws -+; P8LE: vmrgew v2 - } - - ; Function Attrs: norecurse nounwind readnone --- -1.8.3.1 - diff --git a/0001-SystemZ-TableGen-Fix-shift-count-handling.patch b/0001-SystemZ-TableGen-Fix-shift-count-handling.patch deleted file mode 100644 index 5777e3d..0000000 --- a/0001-SystemZ-TableGen-Fix-shift-count-handling.patch +++ /dev/null @@ -1,360 +0,0 @@ -From 2ac90db51fc323d183aabe744e57f4feca6d3008 Mon Sep 17 00:00:00 2001 -From: Ulrich Weigand -Date: Wed, 1 Aug 2018 11:57:58 +0000 -Subject: [PATCH] [SystemZ, TableGen] Fix shift count handling - -*Backport of this patch from trunk without the TableGen fix and modified -to work with LLVM 6.0 TableGen. * - -The DAG combiner logic to simplify AND masks in shift counts is invalid. -While it is true that the SystemZ shift instructions ignore all but the -low 6 bits of the shift count, it is still invalid to simplify the AND -masks while the DAG still uses the standard shift operators (which are -*not* defined to match the SystemZ instruction behavior). - -Instead, this patch performs equivalent operations during instruction -selection. For completely removing the AND, this now happens via -additional DAG match patterns implemented by a multi-alternative -PatFrags. For simplifying a 32-bit AND to a 16-bit AND, the existing DAG -patterns were already mostly OK, they just needed an output XForm to -actually truncate the immediate value. - -Unfortunately, the latter change also exposed a bug in TableGen: it -seems XForms are currently only handled correctly for direct operands of -the outermost operation node. This patch also fixes that bug by simply -recurring through the whole pattern. This should be NFC for all other -targets. - -Differential Revision: https://reviews.llvm.org/D50096 - -git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338521 91177308-0d34-0410-b5e6-96231b3b80d8 ---- - lib/Target/SystemZ/SystemZISelLowering.cpp | 78 ------------------------------ - lib/Target/SystemZ/SystemZISelLowering.h | 1 - - lib/Target/SystemZ/SystemZInstrInfo.td | 49 +++++++++++++------ - lib/Target/SystemZ/SystemZOperands.td | 1 + - lib/Target/SystemZ/SystemZOperators.td | 6 +++ - test/CodeGen/SystemZ/shift-12.ll | 12 +++++ - utils/TableGen/CodeGenDAGPatterns.cpp | 39 ++++++++------- - 7 files changed, 71 insertions(+), 115 deletions(-) - -diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp -index adf3683..505b143 100644 ---- a/lib/Target/SystemZ/SystemZISelLowering.cpp -+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp -@@ -522,10 +522,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, - setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); - setTargetDAGCombine(ISD::FP_ROUND); - setTargetDAGCombine(ISD::BSWAP); -- setTargetDAGCombine(ISD::SHL); -- setTargetDAGCombine(ISD::SRA); -- setTargetDAGCombine(ISD::SRL); -- setTargetDAGCombine(ISD::ROTL); - - // Handle intrinsics. - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); -@@ -5405,76 +5401,6 @@ SDValue SystemZTargetLowering::combineBSWAP( - return SDValue(); - } - --SDValue SystemZTargetLowering::combineSHIFTROT( -- SDNode *N, DAGCombinerInfo &DCI) const { -- -- SelectionDAG &DAG = DCI.DAG; -- -- // Shift/rotate instructions only use the last 6 bits of the second operand -- // register. If the second operand is the result of an AND with an immediate -- // value that has its last 6 bits set, we can safely remove the AND operation. -- // -- // If the AND operation doesn't have the last 6 bits set, we can't remove it -- // entirely, but we can still truncate it to a 16-bit value. This prevents -- // us from ending up with a NILL with a signed operand, which will cause the -- // instruction printer to abort. -- SDValue N1 = N->getOperand(1); -- if (N1.getOpcode() == ISD::AND) { -- SDValue AndMaskOp = N1->getOperand(1); -- auto *AndMask = dyn_cast(AndMaskOp); -- -- // The AND mask is constant -- if (AndMask) { -- auto AmtVal = AndMask->getZExtValue(); -- -- // Bottom 6 bits are set -- if ((AmtVal & 0x3f) == 0x3f) { -- SDValue AndOp = N1->getOperand(0); -- -- // This is the only use, so remove the node -- if (N1.hasOneUse()) { -- // Combine the AND away -- DCI.CombineTo(N1.getNode(), AndOp); -- -- // Return N so it isn't rechecked -- return SDValue(N, 0); -- -- // The node will be reused, so create a new node for this one use -- } else { -- SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N), -- N->getValueType(0), N->getOperand(0), -- AndOp); -- DCI.AddToWorklist(Replace.getNode()); -- -- return Replace; -- } -- -- // We can't remove the AND, but we can use NILL here (normally we would -- // use NILF). Only keep the last 16 bits of the mask. The actual -- // transformation will be handled by .td definitions. -- } else if (AmtVal >> 16 != 0) { -- SDValue AndOp = N1->getOperand(0); -- -- auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff, -- SDLoc(AndMaskOp), -- AndMaskOp.getValueType()); -- -- auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(), -- AndOp, NewMask); -- -- SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N), -- N->getValueType(0), N->getOperand(0), -- NewAnd); -- DCI.AddToWorklist(Replace.getNode()); -- -- return Replace; -- } -- } -- } -- -- return SDValue(); --} -- - SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - switch(N->getOpcode()) { -@@ -5487,10 +5413,6 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, - case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); - case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); - case ISD::BSWAP: return combineBSWAP(N, DCI); -- case ISD::SHL: -- case ISD::SRA: -- case ISD::SRL: -- case ISD::ROTL: return combineSHIFTROT(N, DCI); - } - - return SDValue(); -diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h -index 2cdc88d..1918d45 100644 ---- a/lib/Target/SystemZ/SystemZISelLowering.h -+++ b/lib/Target/SystemZ/SystemZISelLowering.h -@@ -570,7 +570,6 @@ private: - SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const; -- SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const; - - // If the last instruction before MBBI in MBB was some form of COMPARE, - // try to replace it with a COMPARE AND BRANCH just before MBBI. -diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td -index abb8045..fb40cb4 100644 ---- a/lib/Target/SystemZ/SystemZInstrInfo.td -+++ b/lib/Target/SystemZ/SystemZInstrInfo.td -@@ -1318,9 +1318,20 @@ def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))), - // Shifts - //===----------------------------------------------------------------------===// - -+// Complexity is 8 so we match it before the NILL paterns below. -+let AddedComplexity = 8 in { -+ -+class ShiftAndPat : Pat < -+ (node vt:$val, (and i32:$count, imm32bottom6set)), -+ (inst vt:$val, i32:$count, 0) -+>; -+} -+ - // Logical shift left. - defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; -+def : ShiftAndPat ; - def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>; -+def : ShiftAndPat ; - def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>; - - // Arithmetic shift left. -@@ -1332,7 +1343,9 @@ let Defs = [CC] in { - - // Logical shift right. - defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; -+def : ShiftAndPat ; - def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>; -+def : ShiftAndPat ; - def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>; - - // Arithmetic shift right. -@@ -1341,10 +1354,14 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { - def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>; - def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>; - } -+def : ShiftAndPat ; -+def : ShiftAndPat ; - - // Rotate left. - def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>; -+def : ShiftAndPat ; - def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>; -+def : ShiftAndPat ; - - // Rotate second operand left and inserted selected bits into first operand. - // These can act like 32-bit operands provided that the constant start and -@@ -2154,29 +2171,29 @@ def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y), - // Complexity is added so that we match this before we match NILF on the AND - // operation alone. - let AddedComplexity = 4 in { -- def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)), -- (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; -+ def : Pat<(shl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), -+ (SLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - -- def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)), -- (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; -+ def : Pat<(sra GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), -+ (SRA GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - -- def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)), -- (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; -+ def : Pat<(srl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), -+ (SRL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - -- def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)), -- (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; -+ def : Pat<(shl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), -+ (SLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - -- def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)), -- (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; -+ def : Pat<(sra GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), -+ (SRAG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - -- def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)), -- (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; -+ def : Pat<(srl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), -+ (SRLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - -- def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)), -- (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; -+ def : Pat<(rotl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), -+ (RLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - -- def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)), -- (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; -+ def : Pat<(rotl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), -+ (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - } - - // Peepholes for turning scalar operations into block operations. -diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td -index 7136121..61a1124 100644 ---- a/lib/Target/SystemZ/SystemZOperands.td -+++ b/lib/Target/SystemZ/SystemZOperands.td -@@ -341,6 +341,7 @@ def imm32zx16 : Immediate; - - def imm32sx16trunc : Immediate; -+def imm32zx16trunc : Immediate; - - // Full 32-bit immediates. we need both signed and unsigned versions - // because the assembler is picky. E.g. AFI requires signed operands -diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td -index d067f33..269c3d0 100644 ---- a/lib/Target/SystemZ/SystemZOperators.td -+++ b/lib/Target/SystemZ/SystemZOperators.td -@@ -611,6 +611,12 @@ class storei - : PatFrag<(ops node:$addr), - (store (operator), node:$addr)>; - -+// Create a shift operator that optionally ignores an AND of the -+// shift count with an immediate if the bottom 6 bits are all set. -+def imm32bottom6set : PatLeaf<(i32 imm), [{ -+ return (N->getZExtValue() & 0x3f) == 0x3f; -+}]>; -+ - // Vector representation of all-zeros and all-ones. - def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>; - def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>; -diff --git a/test/CodeGen/SystemZ/shift-12.ll b/test/CodeGen/SystemZ/shift-12.ll -index 4ebc42b..53d3d53 100644 ---- a/test/CodeGen/SystemZ/shift-12.ll -+++ b/test/CodeGen/SystemZ/shift-12.ll -@@ -104,3 +104,15 @@ define i32 @f10(i32 %a, i32 %sh) { - %reuse = add i32 %and, %shift - ret i32 %reuse - } -+ -+; Test that AND is not removed for i128 (which calls __ashlti3) -+define i128 @f11(i128 %a, i32 %sh) { -+; CHECK-LABEL: f11: -+; CHECK: risbg %r4, %r4, 57, 191, 0 -+; CHECK: brasl %r14, __ashlti3@PLT -+ %and = and i32 %sh, 127 -+ %ext = zext i32 %and to i128 -+ %shift = shl i128 %a, %ext -+ ret i128 %shift -+} -+ -diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp -index 493066e..74af62b 100644 ---- a/utils/TableGen/CodeGenDAGPatterns.cpp -+++ b/utils/TableGen/CodeGenDAGPatterns.cpp -@@ -3919,6 +3919,24 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) { - return false; - } - -+// Promote xform function to be an explicit node wherever set. -+static TreePatternNode* PromoteXForms(TreePatternNode* N) { -+ if (Record *Xform = N->getTransformFn()) { -+ N->setTransformFn(nullptr); -+ std::vector Children; -+ Children.push_back(PromoteXForms(N)); -+ return new TreePatternNode(Xform, std::move(Children), -+ N->getNumTypes()); -+ } -+ -+ if (!N->isLeaf()) -+ for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) { -+ TreePatternNode* Child = N->getChild(i); -+ N->setChild(i, std::move(PromoteXForms(Child))); -+ } -+ return N; -+} -+ - void CodeGenDAGPatterns::ParsePatterns() { - std::vector Patterns = Records.getAllDerivedDefinitions("Pattern"); - -@@ -4009,26 +4027,7 @@ void CodeGenDAGPatterns::ParsePatterns() { - InstImpResults); - - // Promote the xform function to be an explicit node if set. -- TreePatternNode *DstPattern = Result.getOnlyTree(); -- std::vector ResultNodeOperands; -- for (unsigned ii = 0, ee = DstPattern->getNumChildren(); ii != ee; ++ii) { -- TreePatternNode *OpNode = DstPattern->getChild(ii); -- if (Record *Xform = OpNode->getTransformFn()) { -- OpNode->setTransformFn(nullptr); -- std::vector Children; -- Children.push_back(OpNode); -- OpNode = new TreePatternNode(Xform, Children, OpNode->getNumTypes()); -- } -- ResultNodeOperands.push_back(OpNode); -- } -- DstPattern = Result.getOnlyTree(); -- if (!DstPattern->isLeaf()) -- DstPattern = new TreePatternNode(DstPattern->getOperator(), -- ResultNodeOperands, -- DstPattern->getNumTypes()); -- -- for (unsigned i = 0, e = Result.getOnlyTree()->getNumTypes(); i != e; ++i) -- DstPattern->setType(i, Result.getOnlyTree()->getExtType(i)); -+ TreePatternNode* DstPattern = PromoteXForms(Result.getOnlyTree()); - - TreePattern Temp(Result.getRecord(), DstPattern, false, *this); - Temp.InferAllTypes(); --- -1.8.3.1 - diff --git a/llvm-3.7.1-cmake-s390.patch b/llvm-3.7.1-cmake-s390.patch deleted file mode 100644 index bc9b583..0000000 --- a/llvm-3.7.1-cmake-s390.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff -up llvm-3.7.1.src/cmake/config-ix.cmake.s390 llvm-3.7.1.src/cmake/config-ix.cmake ---- llvm-3.7.1.src/cmake/config-ix.cmake.s390 2016-02-16 12:27:36.000000000 +0100 -+++ llvm-3.7.1.src/cmake/config-ix.cmake 2016-02-16 12:27:52.000000000 +0100 -@@ -356,6 +356,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "msp430 - set(LLVM_NATIVE_ARCH MSP430) - elseif (LLVM_NATIVE_ARCH MATCHES "hexagon") - set(LLVM_NATIVE_ARCH Hexagon) -+elseif (LLVM_NATIVE_ARCH MATCHES "s390") -+ set(LLVM_NATIVE_ARCH SystemZ) - elseif (LLVM_NATIVE_ARCH MATCHES "s390x") - set(LLVM_NATIVE_ARCH SystemZ) - elseif (LLVM_NATIVE_ARCH MATCHES "wasm32") diff --git a/llvm.spec b/llvm.spec index f100956..6b82eff 100644 --- a/llvm.spec +++ b/llvm.spec @@ -8,9 +8,26 @@ %global compat_build 0 %global llvm_bindir %{_libdir}/%{name} -%global maj_ver 6 +%global maj_ver 7 %global min_ver 0 -%global patch_ver 1 +%global patch_ver 0 +%global rc_ver 1 + +%ifarch s390x +%global llvm_targets SystemZ;BPF +%endif +%ifarch ppc64 ppc64le +%global llvm_targets PowerPC;AMDGPU;BPF +%endif +%ifarch %ix86 x86_64 +%global llvm_targets X86;AMDGPU;NVPTX;BPF +%endif +%ifarch aarch64 +%global llvm_targets AArch64;AMDGPU;BPF +%endif +%ifarch %{arm} +%global llvm_targets ARM;BPF +%endif %if 0%{?compat_build} %global pkg_name llvm%{maj_ver}.%{min_ver} @@ -26,26 +43,24 @@ %else %global pkg_name llvm %global install_prefix /usr +%global install_libdir %{_libdir} +%global pkg_libdir %{install_libdir} %endif Name: %{pkg_name} Version: %{maj_ver}.%{min_ver}.%{patch_ver} -Release: 6%{?dist} +Release: 0.1.rc%{rc_ver}%{?dist} Summary: The Low Level Virtual Machine License: NCSA URL: http://llvm.org -Source0: http://llvm.org/releases/%{version}/llvm-%{version}%{?rc_ver:rc%{rc_ver}}.src.tar.xz +Source0: http://%{?rc_ver:pre}releases.llvm.org/%{version}/%{?rc_ver:rc%{rc_ver}}/llvm-%{version}%{?rc_ver:rc%{rc_ver}}.src.tar.xz # recognize s390 as SystemZ when configuring build -Patch0: llvm-3.7.1-cmake-s390.patch Patch3: 0001-CMake-Split-static-library-exports-into-their-own-ex.patch Patch7: 0001-Filter-out-cxxflags-not-supported-by-clang.patch -Patch9: 0001-Export-LLVM_DYLIB_COMPONENTS-in-LLVMConfig.cmake.patch Patch10: 0001-Don-t-run-BV-DAG-Combine-before-legalization-if-it-a.patch -Patch11: 0001-PowerPC-Do-not-round-values-prior-to-converting-to-i.patch -Patch12: 0001-SystemZ-TableGen-Fix-shift-count-handling.patch BuildRequires: gcc BuildRequires: gcc-c++ @@ -135,7 +150,7 @@ cd _build %endif %endif \ - -DLLVM_TARGETS_TO_BUILD="X86;AMDGPU;PowerPC;NVPTX;SystemZ;AArch64;ARM;Mips;BPF" \ + -DLLVM_TARGETS_TO_BUILD="%{llvm_targets}" \ -DLLVM_ENABLE_LIBCXX:BOOL=OFF \ -DLLVM_ENABLE_ZLIB:BOOL=ON \ -DLLVM_ENABLE_FFI:BOOL=ON \ @@ -185,6 +200,9 @@ ninja -v cd _build ninja -v install +# FIXME: Patch upstream to not install this +rm %{buildroot}%{install_libdir}/TestPlugin.so + %if !0%{?compat_build} # fix multi-lib mv -v %{buildroot}%{_bindir}/llvm-config{,-%{__isa_bits}} @@ -261,6 +279,7 @@ fi %endif %files libs +%{pkg_libdir}/libLLVM-%{maj_ver}.so %if !0%{?compat_build} %{_libdir}/BugpointPasses.so %{_libdir}/LLVMHello.so @@ -314,6 +333,11 @@ fi %endif %changelog +* Fri Aug 10 2018 Tom Stellard - 7.0.0-0.1.rc1 +- 7.0.0-rc1 Release +- Reduce number of enabled targets on all arches. +- Drop s390 detection patch, LLVM does not support s390 codegen. + * Mon Aug 06 2018 Tom Stellard - 6.0.1-6 - Backport some fixes needed by mesa and rust