#1 Rebase to latest master branch for LLVM 5.0 support
Merged 6 years ago by tstellar. Opened 6 years ago by tstellar.
rpms/ tstellar/pocl llvm5-f27  into  f27

@@ -0,0 +1,192 @@ 

+ From 14c2f22ae98f854e927b24ba4c4c6c8dcd2ba90c Mon Sep 17 00:00:00 2001

+ From: Tom Stellard <tstellar@redhat.com>

+ Date: Tue, 24 Oct 2017 14:44:56 -0700

+ Subject: [PATCH] Fix build on i686

+ 

+ Use the clang builtin names for intrinsics in vload_store_half_f16c.c

+ so we don't need to include x86intrin.h which redifes the size_t

+ typedef which is also defined in pocl_types.h

+ 

+ In file included from /builddir/build/BUILD/pocl-53ef5e89aa99b5e85ad512ff46aa8f38b36acf4f/lib/kernel/vload_store_half_f16c.c:50:

+ In file included from /usr/lib/clang/5.0.0/include/x86intrin.h:29:

+ In file included from /usr/lib/clang/5.0.0/include/immintrin.h:32:

+ In file included from /usr/lib/clang/5.0.0/include/xmmintrin.h:39:

+ In file included from /usr/lib/clang/5.0.0/include/mm_malloc.h:27:

+ In file included from /usr/include/stdlib.h:31:

+ /usr/lib/clang/5.0.0/include/stddef.h:62:23: error: typedef redefinition with different types ('unsigned int' vs 'ulong' (aka 'unsigned long'))

+ typedef __SIZE_TYPE__ size_t;

+                       ^

+ /builddir/build/BUILD/pocl-53ef5e89aa99b5e85ad512ff46aa8f38b36acf4f/include/pocl_types.h:23:15: note: previous definition is here

+ typedef ulong size_t;

+               ^

+ ---

+  lib/kernel/vload_store_half_f16c.c | 38 ++++++++++++++++++--------------------

+  1 file changed, 18 insertions(+), 20 deletions(-)

+ 

+ diff --git a/lib/kernel/vload_store_half_f16c.c b/lib/kernel/vload_store_half_f16c.c

+ index ae21e1f..a494380 100644

+ --- a/lib/kernel/vload_store_half_f16c.c

+ +++ b/lib/kernel/vload_store_half_f16c.c

+ @@ -47,19 +47,17 @@

+  

+  

+  

+ -#include <x86intrin.h>

+ -

+  /** FLOAT -> HALF vec4 ************************************************/

+  

+  typedef union

+  {

+ -  __m128 i;

+ +  float4 i;

+    float4 low, hi;

+  } f2h4_i;

+  

+  typedef union

+  {

+ -  __m128i o;

+ +  short8 o;

+    ushort4 low, hi;

+  } f2h4_o;

+  

+ @@ -69,7 +67,7 @@ _cl_float2half4_rte (const float4 data)

+    f2h4_i ui;

+    f2h4_o uo;

+    ui.low = data;

+ -  uo.o = _mm_cvtps_ph (ui.i, 0);

+ +  uo.o = __builtin_ia32_vcvtps2ph (ui.i, 0);

+    return uo.low;

+  }

+  

+ @@ -79,7 +77,7 @@ _cl_float2half4_rtn (const float4 data)

+    f2h4_i ui;

+    f2h4_o uo;

+    ui.low = data;

+ -  uo.o = _mm_cvtps_ph (ui.i, 1);

+ +  uo.o = __builtin_ia32_vcvtps2ph (ui.i, 1);

+    return uo.low;

+  }

+  

+ @@ -89,7 +87,7 @@ _cl_float2half4_rtp (const float4 data)

+    f2h4_i ui;

+    f2h4_o uo;

+    ui.low = data;

+ -  uo.o = _mm_cvtps_ph (ui.i, 2);

+ +  uo.o = __builtin_ia32_vcvtps2ph (ui.i, 2);

+    return uo.low;

+  }

+  

+ @@ -99,7 +97,7 @@ _cl_float2half4_rtz (const float4 data)

+    f2h4_i ui;

+    f2h4_o uo;

+    ui.low = data;

+ -  uo.o = _mm_cvtps_ph (ui.i, 3);

+ +  uo.o = __builtin_ia32_vcvtps2ph (ui.i, 3);

+    return uo.low;

+  }

+  

+ @@ -113,13 +111,13 @@ _cl_float2half4 (const float4 data)

+  

+  typedef union

+  {

+ -  __m256 i;

+ +  float8 i;

+    float8 f;

+  } f2h8_i;

+  

+  typedef union

+  {

+ -  __m128i o;

+ +  ushort8 o;

+    ushort8 f;

+  } f2h8_o;

+  

+ @@ -129,7 +127,7 @@ _cl_float2half8_rte (const float8 data)

+    f2h8_i ui;

+    f2h8_o uo;

+    ui.f = data;

+ -  uo.o = _mm256_cvtps_ph (ui.i, 0);

+ +  uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 0);

+    return uo.f;

+  }

+  

+ @@ -139,7 +137,7 @@ _cl_float2half8_rtn (const float8 data)

+    f2h8_i ui;

+    f2h8_o uo;

+    ui.f = data;

+ -  uo.o = _mm256_cvtps_ph (ui.i, 1);

+ +  uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 1);

+    return uo.f;

+  }

+  

+ @@ -149,7 +147,7 @@ _cl_float2half8_rtp (const float8 data)

+    f2h8_i ui;

+    f2h8_o uo;

+    ui.f = data;

+ -  uo.o = _mm256_cvtps_ph (ui.i, 2);

+ +  uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 2);

+    return uo.f;

+  }

+  

+ @@ -159,7 +157,7 @@ _cl_float2half8_rtz (const float8 data)

+    f2h8_i ui;

+    f2h8_o uo;

+    ui.f = data;

+ -  uo.o = _mm256_cvtps_ph (ui.i, 3);

+ +  uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 3);

+    return uo.f;

+  }

+  

+ @@ -173,13 +171,13 @@ _cl_float2half8 (const float8 data)

+  

+  typedef union

+  {

+ -  __m128i i;

+ +  short8 i;

+    ushort4 low, hi;

+  } h2f4_i;

+  

+  typedef union

+  {

+ -  __m128 o;

+ +  float4 o;

+    float4 f;

+  } h2f4_o;

+  

+ @@ -189,7 +187,7 @@ _cl_half2float4 (const ushort4 data)

+    h2f4_i ui;

+    h2f4_o uo;

+    ui.low = data;

+ -  uo.o = _mm_cvtph_ps (ui.i);

+ +  uo.o = __builtin_ia32_vcvtph2ps (ui.i);

+    return uo.f;

+  }

+  

+ @@ -197,13 +195,13 @@ _cl_half2float4 (const ushort4 data)

+  

+  typedef union

+  {

+ -  __m128i i;

+ +  short8 i;

+    ushort8 u;

+  } h2f8_i;

+  

+  typedef union

+  {

+ -  __m256 o;

+ +  float8 o;

+    float8 f;

+  } h2f8_o;

+  

+ @@ -213,7 +211,7 @@ _cl_half2float8 (const ushort8 data)

+    h2f8_i ui;

+    h2f8_o uo;

+    ui.u = data;

+ -  uo.o = _mm256_cvtph_ps (ui.i);

+ +  uo.o = __builtin_ia32_vcvtph2ps256 (ui.i);

+    return uo.f;

+  }

+  

+ -- 

+ 1.8.3.1

+ 

@@ -0,0 +1,56 @@ 

+ From e9bab88cffde2871c3f7ca4339552fb3dcb66154 Mon Sep 17 00:00:00 2001

+ From: Tom Stellard <tstellar@redhat.com>

+ Date: Wed, 25 Oct 2017 15:04:43 -0700

+ Subject: [PATCH] Revert "lib/kernel/printf.c: fix "-nan" on output"

+ 

+ This reverts commit 3877d6316985fdf36a54cfee745952436744ebcc.

+ ---

+  lib/kernel/printf.c | 11 +++++------

+  1 file changed, 5 insertions(+), 6 deletions(-)

+ 

+ diff --git a/lib/kernel/printf.c b/lib/kernel/printf.c

+ index bd522fb..1564121 100644

+ --- a/lib/kernel/printf.c

+ +++ b/lib/kernel/printf.c

+ @@ -28,7 +28,6 @@

+  #include <limits.h>

+  #include <stdarg.h>

+  #include <stdbool.h>

+ -#include <math.h>

+  

+  // We implement the OpenCL printf by calling the C99 printf. This is

+  // not very efficient, but is easy to implement.

+ @@ -111,6 +110,9 @@ float __attribute__((overloadable)) vload_half(size_t offset,

+  

+  // Note: To simplify implementation, we print double values with %lf,

+  // although %f would suffice as well

+ +#define FLOAT_CONV_half   "h"

+ +#define FLOAT_CONV_float  ""

+ +#define FLOAT_CONV_double "l"

+  #define FLOAT_GET_half(ptr)   vload_half(0, ptr)

+  #define FLOAT_GET_float(ptr)  (*(ptr))

+  #define FLOAT_GET_double(ptr) (*(ptr))

+ @@ -121,7 +123,7 @@ float __attribute__((overloadable)) vload_half(size_t offset,

+    {                                                                     \

+      DEBUG_PRINTF(("[printf:floats:n=%dd]\n", n));                       \

+      char outfmt[1000];                                                  \

+ -    OCL_C_AS char str[] = "%%%s%s%s%s%s%.0d%s%.0d" "%c";                \

+ +    OCL_C_AS char str[] = "%%%s%s%s%s%s%.0d%s%.0d" FLOAT_CONV_##WIDTH "%c"; \

+      snprintf(outfmt, sizeof outfmt,                                     \

+               str,                                                       \

+               flags.left ? "-" : "",                                     \

+ @@ -138,10 +140,7 @@ float __attribute__((overloadable)) vload_half(size_t offset,

+      for (int d=0; d<n; ++d) {                                           \

+        DEBUG_PRINTF(("[printf:floats:d=%d]\n", d));                      \

+        if (d != 0) printf(comma);                                        \

+ -      WIDTH val = (FLOAT_GET_##WIDTH((OCL_C_AS const WIDTH*)vals+d));   \

+ -      if (isnan (val))                                                  \

+ -        val = NAN;                                                      \

+ -      printf(outfmt, (double)val);                                      \

+ +      printf(outfmt, FLOAT_GET_##WIDTH((OCL_C_AS const WIDTH*)vals+d)); \

+      }                                                                   \

+      DEBUG_PRINTF(("[printf:floats:done]\n"));                           \

+    }

+ -- 

+ 1.8.3.1

+ 

@@ -1,42 +0,0 @@ 

- From d7f89ad2696a253296a5116bd89fe3031de1fb1c Mon Sep 17 00:00:00 2001

- From: Tom Stellard <tstellar@redhat.com>

- Date: Thu, 25 May 2017 19:46:47 +0000

- Subject: [PATCH] Use generic cpu when LLVM can't detect the host processor

- 

- LLVM's CPU detection is very basic, it parses /proc/cpuinfo and uses

- information from their to look up CPUs in its own internal tables.

- These tables are incomplete, so it's not uncommon for LLVM to not

- be able to detect the CPU.

- 

- Ideally pocl would use something better than llc --version for CPU

- detection, but for now we can at least fallback to the generic

- cpu when LLVM can't detect the host.

- ---

-  cmake/LLVM.cmake | 7 ++++++-

-  1 file changed, 6 insertions(+), 1 deletion(-)

- 

- diff --git a/cmake/LLVM.cmake b/cmake/LLVM.cmake

- index 22e9b851..440fc0ca 100644

- --- a/cmake/LLVM.cmake

- +++ b/cmake/LLVM.cmake

- @@ -567,6 +567,10 @@ endif()

-  

-  set_cache_var(LLC_TRIPLE "LLC_TRIPLE")

-  

- +# FIXME: The cpu name printed by llc --version is the same cpu that will be

- +# targeted if ypu pass -mcpu=native to llc, so we could replace this auto-detection

- +# with just: set(LLC_HOST_CPU "native"), however, we can't do this at the moment

- +# because of the work-around for arm1176jz-s.

-  if(NOT DEFINED LLC_HOST_CPU AND NOT CMAKE_CROSSCOMPILING)

-    message(STATUS "Find out LLC host CPU with ${LLVM_LLC}")

-    execute_process(COMMAND ${LLVM_LLC} "--version" RESULT_VARIABLE RES_VAR OUTPUT_VARIABLE OUTPUT_VAR)

- @@ -589,7 +593,8 @@ if(NOT DEFINED LLC_HOST_CPU AND NOT CMAKE_CROSSCOMPILING)

-  endif()

-  

-  if(LLC_HOST_CPU MATCHES "unknown")

- -  message(FATAL_ERROR "LLVM could not recognize your CPU model automatically. Please rerun cmake with -DLLC_HOST_CPU=<model> (to see a list of models, try: llc -mcpu help)")

- +  message(WARNING "LLVM could not recognize your CPU model automatically.  Using a generic CPU target.")

- +  set(LLC_HOST_CPU "generic")

-  endif()

-  

-  set(LLC_HOST_CPU "${LLC_HOST_CPU}" CACHE STRING "The Host CPU to use with llc")

file modified
+17 -5
@@ -1,22 +1,29 @@ 

  %global sover 1

  

+ %global commit 53ef5e89aa99b5e85ad512ff46aa8f38b36acf4f

+ %global shortcommit %(c=%{commit}; echo ${c:0:7})

+ 

  # hardended builds are breaking the pocl tests with llvm 3.9

  #undefine _hardened_build

  

  Summary:        Portable Computing Language - an OpenCL implementation

  Name:           pocl

- Version:        0.14

- Release:        4%{?dist}

+ Version:        0.15

+ Release:        0.1.20171023git%{shortcommit}%{?dist}

  

  # The whole code is under MIT

  # except include/utlist.h which is under BSD (and unbundled) and

  # except lib/kernel/vecmath which is under GPLv3+ or LGPLv3+ (and unbundled in future)

  License:        MIT and BSD and (GPLv3+ or LGPLv3+)

  URL:            http://pocl.sourceforge.net

- Source0:        http://portablecl.org/downloads/%{name}-%{version}.tar.gz

+ #Source0:        http://portablecl.org/downloads/%{name}-%{version}.tar.gz

+ Source0:        https://github.com/pocl/pocl/archive/%{commit}/%{name}-%{shortcommit}.tar.gz

  

  Patch1:         0001-wrap-more-stuff-for-64-bit-atomics.patch

- Patch2:         pocl-fix-arm-detection.patch

+ Patch2:         0001-Fix-build-on-i686.patch

+ #https://github.com/pocl/pocl/issues/544

+ Patch3:         0001-Revert-lib-kernel-printf.c-fix-nan-on-output.patch

+ 

  # aarch64 seems to have a build system problem that makes it pass -march=(unknown).

  # ppc64 builds fine, but: 41% tests passed, 64 tests failed out of 108.

  # ppc64le fails with compilation errors.
@@ -69,7 +76,7 @@ 

  Portable Computing Language development files.

  

  %prep

- %autosetup -p1

+ %autosetup -p1 -n %{name}-%{commit}

  

  # Unbundle uthash

  find . -depth -name utlist* | xargs rm -f
@@ -78,6 +85,7 @@ 

  %build

  mkdir %{_target_platform}

  pushd %{_target_platform}

+ # CPU detection fails on ARM, so we need to manually specify the CPU as generic.

    %cmake ../ \

      -DENABLE_ICD=1 \

      -DPOCL_INSTALL_ICD_VENDORDIR=%{_sysconfdir}/OpenCL/vendors \
@@ -86,6 +94,7 @@ 

      -DKERNELLIB_HOST_CPU_VARIANTS=distro

  %endif

  %ifarch %{arm}

+     -DLLC_HOST_CPU="generic" \

      -DCL_DISABLE_LONG=true

  %endif

      # -DENABLE_TESTSUITES=all Requires clBLAS
@@ -129,6 +138,9 @@ 

  %{_includedir}/%{name}u.h

  

  %changelog

+ * Tue Oct 24 2017 Tom Stellard <tstellar@redhat.com> - 0.15-0.1.20171023git53ef5e8

+ - Rebase to latest master branch for LLVM 5.0 support

+ 

  * Thu Aug 03 2017 Fedora Release Engineering <releng@fedoraproject.org> - 0.14-4

  - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild

  

file modified
+1 -1
@@ -1,1 +1,1 @@ 

- SHA512 (pocl-0.14.tar.gz) = 36bf41222315e13bac6c37cc942b9c09e22af0374abf63052fa8deb1ffe383da0ed9b5dc3548844d52749642d0b1288af5e0128e8dd4deb38e70128adb28c066

+ SHA512 (pocl-53ef5e8.tar.gz) = 313c3dd937825178823bcba0254be462748c221025df2d79ea93ae0201bc7e04c0e7fa965d39cac0e05ae8e1105f604c48177b4cb54d08a1d38a78e756f783d1

This is for the LLVM 5.0 rebase in f27. I will take care of creating the update once the package has been built. The necessary build overrides are already in place.

Pull-Request has been merged by tstellar

6 years ago