diff --git a/.gitignore b/.gitignore index 4a406bf..8cba2ba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -*~ *.rpm *.tar* results_*/ diff --git a/0001-Fix-issue-597-for-32bit-ARM.patch b/0001-Fix-issue-597-for-32bit-ARM.patch deleted file mode 100644 index 1473b15..0000000 --- a/0001-Fix-issue-597-for-32bit-ARM.patch +++ /dev/null @@ -1,32 +0,0 @@ -From dca128f42d3b9401eeb5760b129987091657753f Mon Sep 17 00:00:00 2001 -From: Michal Babej -Date: Wed, 3 Jan 2018 11:07:35 +0100 -Subject: [PATCH 01/14] Fix issue #597 for 32bit ARM - ---- - lib/kernel/libclc/vtables_fp64.cl | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/lib/kernel/libclc/vtables_fp64.cl b/lib/kernel/libclc/vtables_fp64.cl -index 44142909..0f4d5753 100644 ---- a/lib/kernel/libclc/vtables_fp64.cl -+++ b/lib/kernel/libclc/vtables_fp64.cl -@@ -1025,9 +1025,6 @@ DECLARE_VTABLE(double2, CBRT_REM_TBL, 5) = { - (double2)(0x1.965fea0000000p+0, 0x1.4f5b8f20ac166p-26), - }; - --#endif // cl_khr_fp64 -- -- - VTABLE_FUNCTION(double, CBRT_INV_TBL, cbrt_inv_tbl); - - VTABLE_FUNCTION2(v2double, LN_TBL, ln_tbl); -@@ -1037,3 +1034,5 @@ VTABLE_FUNCTION2(v2double, SINH_TBL, sinh_tbl); - VTABLE_FUNCTION2(v2double, COSH_TBL, cosh_tbl); - VTABLE_FUNCTION2(v2double, CBRT_DBL_TBL, cbrt_dbl_tbl); - VTABLE_FUNCTION2(v2double, CBRT_REM_TBL, cbrt_rem_tbl); -+ -+#endif // cl_khr_fp64 --- -2.15.1 - diff --git a/0002-Try-to-fix-596-provide-additional-pocl-build-informa.patch b/0002-Try-to-fix-596-provide-additional-pocl-build-informa.patch deleted file mode 100644 index 709f6a9..0000000 --- a/0002-Try-to-fix-596-provide-additional-pocl-build-informa.patch +++ /dev/null @@ -1,177 +0,0 @@ -From 020cd9dbd8e356cc370b090d416c6e8b912c6b47 Mon Sep 17 00:00:00 2001 -From: Michal Babej -Date: Wed, 3 Jan 2018 12:55:54 +0100 -Subject: [PATCH 02/14] Try to fix #596 - provide additional pocl build - information - ---- - CMakeLists.txt | 6 +++++- - config.h.in.cmake | 5 +++++ - lib/CL/clGetPlatformInfo.c | 43 ++++++++++++++++++++++++++++++++++++++-- - lib/CL/devices/basic/basic.c | 8 +++++++- - lib/CL/devices/pthread/pthread.c | 12 ++++++++++- - tests/runtime/test_version.c | 12 ++++++----- - 6 files changed, 76 insertions(+), 10 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 44a51c94..a836da90 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -308,7 +308,11 @@ if (OCS_AVAILABLE) - endif() - - if(NOT DEFINED HOST_DEVICE_BUILD_HASH) -- set(HOST_DEVICE_BUILD_HASH "${LLC_TRIPLE}-${LLC_HOST_CPU}") -+ if(KERNELLIB_HOST_CPU_VARIANTS STREQUAL "distro") -+ set(HOST_DEVICE_BUILD_HASH "${LLC_TRIPLE}") -+ else() -+ set(HOST_DEVICE_BUILD_HASH "${LLC_TRIPLE}-${LLC_HOST_CPU}") -+ endif() - endif() - - if(ARM AND LLVM_3_9) -diff --git a/config.h.in.cmake b/config.h.in.cmake -index 7a6f72a1..cd75d7ce 100644 ---- a/config.h.in.cmake -+++ b/config.h.in.cmake -@@ -28,6 +28,11 @@ - - #define FORCED_CLFLAGS "@FORCED_CLFLAGS@" - -+#cmakedefine ENABLE_ASAN -+#cmakedefine ENABLE_LSAN -+#cmakedefine ENABLE_TSAN -+#cmakedefine ENABLE_UBSAN -+ - #cmakedefine ENABLE_CONFORMANCE - - #cmakedefine ENABLE_POCL_BUILDING -diff --git a/lib/CL/clGetPlatformInfo.c b/lib/CL/clGetPlatformInfo.c -index 74c15c92..5bb9c0ac 100644 ---- a/lib/CL/clGetPlatformInfo.c -+++ b/lib/CL/clGetPlatformInfo.c -@@ -51,8 +51,47 @@ POname(clGetPlatformInfo)(cl_platform_id platform, - - case CL_PLATFORM_VERSION: - #ifdef OCS_AVAILABLE -- POCL_RETURN_GETINFO_STR("OpenCL " POCL_CL_VERSION\ -- " pocl " PACKAGE_VERSION ", LLVM " LLVM_VERSION); -+ POCL_RETURN_GETINFO_STR ("OpenCL " POCL_CL_VERSION -+ " pocl " PACKAGE_VERSION ", LLVM " LLVM_VERSION -+#ifdef LLVM_BUILD_MODE_DEBUG -+ " - debug" -+#endif -+ -+#ifdef ENABLE_ASAN -+ ", ASAN" -+#endif -+#ifdef ENABLE_TSAN -+ ", TSAN" -+#endif -+#ifdef ENABLE_LSAN -+ ", LSAN" -+#endif -+#ifdef ENABLE_UBSAN -+ ", UBSAN" -+#endif -+ -+#ifdef BUILD_CUDA -+ ", CUDA" -+#endif -+#ifdef BUILD_HSA -+ ", HSA" -+#endif -+#ifdef TCE_AVAILABLE -+ ", TCE" -+#endif -+#ifdef HAVE_LTTNG_UST -+ ", LTTNG" -+#endif -+#ifdef KERNELLIB_HOST_DISTRO_VARIANTS -+ ", DISTRO" -+#endif -+#ifdef POCL_DEBUG_MESSAGES -+ ", POCL_DEBUG" -+#endif -+#ifndef _CL_DISABLE_HALF -+ ", FP16" -+#endif -+ ); - #else - POCL_RETURN_GETINFO_STR("OpenCL " POCL_CL_VERSION\ - " pocl " PACKAGE_VERSION ", no online compiler support"); -diff --git a/lib/CL/devices/basic/basic.c b/lib/CL/devices/basic/basic.c -index 433ce943..8d1d8910 100644 ---- a/lib/CL/devices/basic/basic.c -+++ b/lib/CL/devices/basic/basic.c -@@ -134,7 +134,13 @@ char * - pocl_basic_build_hash (cl_device_id device) - { - char* res = calloc(1000, sizeof(char)); -- snprintf(res, 1000, "basic-%s", HOST_DEVICE_BUILD_HASH); -+#ifdef KERNELLIB_HOST_DISTRO_VARIANTS -+ char *name = get_cpu_name (); -+ snprintf (res, 1000, "basic-%s-%s", HOST_DEVICE_BUILD_HASH, name); -+ POCL_MEM_FREE (name); -+#else -+ snprintf (res, 1000, "basic-%s", HOST_DEVICE_BUILD_HASH); -+#endif - return res; - } - -diff --git a/lib/CL/devices/pthread/pthread.c b/lib/CL/devices/pthread/pthread.c -index b3d91fdb..0b88a74c 100644 ---- a/lib/CL/devices/pthread/pthread.c -+++ b/lib/CL/devices/pthread/pthread.c -@@ -51,6 +51,10 @@ - #include "pocl_util.h" - #include "pocl_mem_management.h" - -+#ifdef OCS_AVAILABLE -+#include "pocl_llvm.h" -+#endif -+ - //#define DEBUG_MT - - #ifdef CUSTOM_BUFFER_ALLOCATOR -@@ -149,7 +153,13 @@ char * - pocl_pthread_build_hash (cl_device_id device) - { - char* res = calloc(1000, sizeof(char)); -- snprintf(res, 1000, "pthread-%s", HOST_DEVICE_BUILD_HASH); -+#ifdef KERNELLIB_HOST_DISTRO_VARIANTS -+ char *name = get_cpu_name (); -+ snprintf (res, 1000, "pthread-%s-%s", HOST_DEVICE_BUILD_HASH, name); -+ POCL_MEM_FREE (name); -+#else -+ snprintf (res, 1000, "pthread-%s", HOST_DEVICE_BUILD_HASH); -+#endif - return res; - } - -diff --git a/tests/runtime/test_version.c b/tests/runtime/test_version.c -index c41ef515..6bbc70de 100644 ---- a/tests/runtime/test_version.c -+++ b/tests/runtime/test_version.c -@@ -33,11 +33,13 @@ int main(void) - sizeof(result), result, &rvs)); - - result[rvs]=0; // spec doesn't say it is null-terminated. -- if( strcmp( result, -- "OpenCL " POCL_CL_VERSION " pocl " PACKAGE_VERSION ", LLVM " LLVM_VERSION) != 0 ) { -- printf("Error: platform is: %s\n", result); -- return 2; -- } -+ const char *expected = "OpenCL " POCL_CL_VERSION -+ " pocl " PACKAGE_VERSION ", LLVM " LLVM_VERSION; -+ if (strncmp (result, expected, strlen (expected)) != 0) -+ { -+ printf ("Error: platform is: %s\n", result); -+ return 2; -+ } - - - /* Pocl devices have the form 'type'-'details', if details are --- -2.15.1 - diff --git a/0003-Remove-CL_DISABLE_LONG.patch b/0003-Remove-CL_DISABLE_LONG.patch deleted file mode 100644 index 434764d..0000000 --- a/0003-Remove-CL_DISABLE_LONG.patch +++ /dev/null @@ -1,139 +0,0 @@ -From 019531349964b7d61a2b658eb81a6b168cde3f29 Mon Sep 17 00:00:00 2001 -From: Michal Babej -Date: Wed, 3 Jan 2018 10:46:47 +0100 -Subject: [PATCH 03/14] Remove CL_DISABLE_LONG - -This was a workaround for an OpenCL bug in LLVM 3.3 ---- - CMakeLists.txt | 37 +++++++++++++++++-------------------- - config.h.in.cmake | 3 --- - lib/CL/devices/basic/basic.c | 5 ----- - lib/CL/devices/pthread/pthread.c | 5 ----- - 4 files changed, 17 insertions(+), 33 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index a836da90..60d570b8 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -744,9 +744,13 @@ endif() - set(DEFAULT_HOST_CLANG_FLAGS "${CLANG_TARGET_OPTION}${LLC_TRIPLE}") - set(DEFAULT_HOST_LLC_FLAGS "-relocation-model=pic -mtriple=${LLC_TRIPLE}") - --if(ARM AND (NOT LLVM_OLDER_THAN_4_0)) -- #ARMs need to enable FP64 manually with 4.0 -- option(ENABLE_FP64 "Enable FP64" ON) -+if(ARM) -+ #ARMs need to enable FP64 manually with LLVM < 4.0 -+ if(LLVM_OLDER_THAN_4_0) -+ option(ENABLE_FP64 "Enable FP64 on ARM32 - disabled by default for LLVM < 4" OFF) -+ else() -+ option(ENABLE_FP64 "Enable FP64 on ARM32 - if you have at least VFP support for doubles, you can leave it ON" ON ) -+ endif() - endif() - - if(ARM32 OR (LLC_TRIPLE MATCHES "^arm")) -@@ -764,9 +768,6 @@ if(ARM32 OR (LLC_TRIPLE MATCHES "^arm")) - endif() - endif() - --if(CL_DISABLE_LONG) -- set(DEFAULT_HOST_CLANG_FLAGS "${DEFAULT_HOST_CLANG_FLAGS} -D_CL_DISABLE_LONG") --endif() - if(CL_DISABLE_HALF) - set(DEFAULT_HOST_CLANG_FLAGS "${DEFAULT_HOST_CLANG_FLAGS} -D_CL_DISABLE_HALF") - endif() -@@ -790,19 +791,17 @@ if(NOT CL_DISABLE_HALF) - set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_fp16") - endif() - --if(NOT CL_DISABLE_LONG) -- # must not be defined in HOST_DEVICE_EXTENSIONS list, because -- # this extension doesn't exist in official extension list -- set(HOST_DEVICE_EXTENSION_DEFINES "-Dcl_khr_int64") -+# must not be defined in HOST_DEVICE_EXTENSIONS list, because -+# this extension doesn't exist in official extension list -+# there is "cles_khr_int64" which indicates int64 support for embedded profiles -+set(HOST_DEVICE_EXTENSION_DEFINES "-Dcl_khr_int64") - -- # fp64 requires int64 -- if(X86) -- set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics") -- endif() -- if(ENABLE_FP64 AND (NOT LLVM_OLDER_THAN_4_0)) -- # 32bit arm doesnt always uspport doubles -- set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_fp64") -- endif() -+if(X86) -+ set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics") -+endif() -+# 32bit arm doesnt always support doubles -+if(ENABLE_FP64 AND (NOT LLVM_OLDER_THAN_4_0)) -+ set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_fp64") - endif() - - set(TEMP_EXT "${HOST_DEVICE_EXTENSIONS}") -@@ -1145,7 +1144,6 @@ set(BUILD_ICD ${ENABLE_ICD}) - set(BUILD_HSA ${ENABLE_HSA}) - set(TCE_AVAILABLE ${ENABLE_TCE}) - set(TCEMC_AVAILABLE ${ENABLE_TCEMC}) --set(_CL_DISABLE_LONG ${CL_DISABLE_LONG}) - set(_CL_DISABLE_HALF ${CL_DISABLE_HALF}) - set(PACKAGE_VERSION "${POCL_VERSION}") - -@@ -1277,7 +1275,6 @@ MESSAGE(STATUS " ") - MESSAGE(STATUS "CLANG_MARCH_FLAG: ${CLANG_MARCH_FLAG}") - MESSAGE(STATUS "CLANG_TARGET_OPTION: ${CLANG_TARGET_OPTION}") - MESSAGE(STATUS "CL_DISABLE_HALF: ${CL_DISABLE_HALF}") --MESSAGE(STATUS "CL_DISABLE_LONG: ${CL_DISABLE_LONG}") - MESSAGE(STATUS "HAVE_CLOCK_GETTIME: ${HAVE_CLOCK_GETTIME}") - MESSAGE(STATUS "HAVE_GLEW: ${HAVE_GLEW}") - MESSAGE(STATUS "HAVE_LTTNG_UST: ${HAVE_LTTNG_UST}") -diff --git a/config.h.in.cmake b/config.h.in.cmake -index cd75d7ce..7b7e87e6 100644 ---- a/config.h.in.cmake -+++ b/config.h.in.cmake -@@ -149,9 +149,6 @@ - /* Defined on big endian systems */ - #define WORDS_BIGENDIAN @WORDS_BIGENDIAN@ - --/* Disable 64bit ints when a clang bug is present */ --#cmakedefine _CL_DISABLE_LONG -- - /* Disable cl_khr_fp16 because fp16 is not supported */ - #cmakedefine _CL_DISABLE_HALF - -diff --git a/lib/CL/devices/basic/basic.c b/lib/CL/devices/basic/basic.c -index 8d1d8910..1dc7d303 100644 ---- a/lib/CL/devices/basic/basic.c -+++ b/lib/CL/devices/basic/basic.c -@@ -381,11 +381,6 @@ pocl_basic_init (unsigned j, cl_device_id device, const char* parameters) - if(device->llvm_cpu && (!strcmp(device->llvm_cpu, "(unknown)"))) - device->llvm_cpu = NULL; - -- // work-around LLVM bug where sizeof(long)=4 -- #ifdef _CL_DISABLE_LONG -- device->has_64bit_long=0; -- #endif -- - return ret; - } - -diff --git a/lib/CL/devices/pthread/pthread.c b/lib/CL/devices/pthread/pthread.c -index 0b88a74c..494ae8b2 100644 ---- a/lib/CL/devices/pthread/pthread.c -+++ b/lib/CL/devices/pthread/pthread.c -@@ -263,11 +263,6 @@ pocl_pthread_init (unsigned j, cl_device_id device, const char* parameters) - if(device->llvm_cpu && (!strcmp(device->llvm_cpu, "(unknown)"))) - device->llvm_cpu = NULL; - -- // work-around LLVM bug where sizeof(long)=4 -- #ifdef _CL_DISABLE_LONG -- device->has_64bit_long=0; -- #endif -- - if (!scheduler_initialized) - { - scheduler_initialized = 1; --- -2.15.1 - diff --git a/0004-Fix-an-incorrect-env-var-check.patch b/0004-Fix-an-incorrect-env-var-check.patch deleted file mode 100644 index 3431131..0000000 --- a/0004-Fix-an-incorrect-env-var-check.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 6c2059eace57aa6fe12db161ac92bd72556680e7 Mon Sep 17 00:00:00 2001 -From: Michal Babej -Date: Thu, 4 Jan 2018 18:17:17 +0100 -Subject: [PATCH 04/14] Fix an incorrect env var check - -Thish should be a check for compiler files, not POCL_DEBUG. ---- - lib/CL/pocl_llvm_build.cc | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/lib/CL/pocl_llvm_build.cc b/lib/CL/pocl_llvm_build.cc -index 0d67d22f..9ad22796 100644 ---- a/lib/CL/pocl_llvm_build.cc -+++ b/lib/CL/pocl_llvm_build.cc -@@ -109,8 +109,7 @@ load_source(FrontendOptions &fe, - static inline int - unlink_source(FrontendOptions &fe) - { -- // don't unlink in debug mode -- if (pocl_get_bool_option("POCL_DEBUG", 0)) -+ if (pocl_get_bool_option("POCL_LEAVE_KERNEL_COMPILER_TEMP_FILES", 0) != 0) - return 0; - - FrontendInputFile const& file = fe.Inputs.front(); --- -2.15.1 - diff --git a/0005-Add-asserts-to-pthread-API-calls.patch b/0005-Add-asserts-to-pthread-API-calls.patch deleted file mode 100644 index e157788..0000000 --- a/0005-Add-asserts-to-pthread-API-calls.patch +++ /dev/null @@ -1,194 +0,0 @@ -From ae9c7bc62f6fe78fe894e1a9a9aaca8ed5e04878 Mon Sep 17 00:00:00 2001 -From: Michal Babej -Date: Fri, 5 Jan 2018 12:47:02 +0100 -Subject: [PATCH 05/14] Add asserts to pthread API calls - ---- - lib/CL/devices/common.c | 8 ++-- - lib/CL/devices/pthread/pocl-pthread_utils.h | 64 +++++++++++++++++++++++++---- - lib/CL/pocl_cl.h | 49 ++++++++++++++++++---- - 3 files changed, 102 insertions(+), 19 deletions(-) - -diff --git a/lib/CL/devices/common.c b/lib/CL/devices/common.c -index 8c01191e..9895f1ae 100644 ---- a/lib/CL/devices/common.c -+++ b/lib/CL/devices/common.c -@@ -1070,12 +1070,12 @@ pocl_memalign_alloc_global_mem(cl_device_id device, size_t align, size_t size) - if (!ptr) - return NULL; - -- POCL_LOCK_OBJ (mem); -+ POCL_LOCK (mem->pocl_lock); - mem->currently_allocated += size; - if (mem->max_ever_allocated < mem->currently_allocated) - mem->max_ever_allocated = mem->currently_allocated; - assert(mem->currently_allocated <= mem->total_alloc_limit); -- POCL_UNLOCK_OBJ (mem); -+ POCL_UNLOCK (mem->pocl_lock); - - return ptr; - } -@@ -1085,10 +1085,10 @@ pocl_free_global_mem(cl_device_id device, void* ptr, size_t size) - { - pocl_global_mem_t *mem = device->global_memory; - -- POCL_LOCK_OBJ (mem); -+ POCL_LOCK (mem->pocl_lock); - assert(mem->currently_allocated >= size); - mem->currently_allocated -= size; -- POCL_UNLOCK_OBJ (mem); -+ POCL_UNLOCK (mem->pocl_lock); - - POCL_MEM_FREE(ptr); - } -diff --git a/lib/CL/devices/pthread/pocl-pthread_utils.h b/lib/CL/devices/pthread/pocl-pthread_utils.h -index 310bab6c..47afc65d 100644 ---- a/lib/CL/devices/pthread/pocl-pthread_utils.h -+++ b/lib/CL/devices/pthread/pocl-pthread_utils.h -@@ -8,19 +8,67 @@ - #endif - - /* locking macros */ --#define PTHREAD_LOCK(__lock) pthread_mutex_lock(__lock) --#define PTHREAD_UNLOCK(__lock) pthread_mutex_unlock(__lock) --#define PTHREAD_INIT_LOCK(__lock) pthread_mutex_init(__lock, NULL) --#define PTHREAD_DESTROY_LOCK(__lock) pthread_mutex_destroy(__lock) -+#define PTHREAD_LOCK(__lock) \ -+ do \ -+ { \ -+ int r = pthread_mutex_lock (__lock); \ -+ assert (r == 0); \ -+ } \ -+ while (0) -+#define PTHREAD_UNLOCK(__lock) \ -+ do \ -+ { \ -+ int r = pthread_mutex_unlock (__lock); \ -+ assert (r == 0); \ -+ } \ -+ while (0) -+#define PTHREAD_INIT_LOCK(__lock) \ -+ do \ -+ { \ -+ int r = pthread_mutex_init (__lock, NULL); \ -+ assert (r == 0); \ -+ } \ -+ while (0) -+#define PTHREAD_DESTROY_LOCK(__lock) \ -+ do \ -+ { \ -+ int r = pthread_mutex_destroy (__lock); \ -+ assert (r == 0); \ -+ } \ -+ while (0) - - /* Apparently Mac OS X does not have spinlock, despite having pthreads. - * for now only enable spinlocks on linux.*/ - #ifdef __linux__ - #define PTHREAD_FAST_LOCK_T pthread_spinlock_t -- #define PTHREAD_FAST_LOCK(l) pthread_spin_lock(l) -- #define PTHREAD_FAST_UNLOCK(l) pthread_spin_unlock(l) -- #define PTHREAD_FAST_INIT(l) pthread_spin_init(l, PTHREAD_PROCESS_PRIVATE) -- #define PTHREAD_FAST_DESTROY(l) pthread_spin_destroy(l) -+#define PTHREAD_FAST_LOCK(l) \ -+ do \ -+ { \ -+ int r = pthread_spin_lock (l); \ -+ assert (r == 0); \ -+ } \ -+ while (0) -+#define PTHREAD_FAST_UNLOCK(l) \ -+ do \ -+ { \ -+ int r = pthread_spin_unlock (l); \ -+ assert (r == 0); \ -+ } \ -+ while (0) -+#define PTHREAD_FAST_INIT(l) \ -+ do \ -+ { \ -+ int r = pthread_spin_init (l, PTHREAD_PROCESS_PRIVATE); \ -+ assert (r == 0); \ -+ } \ -+ while (0) -+#define PTHREAD_FAST_DESTROY(l) \ -+ do \ -+ { \ -+ int r = pthread_spin_destroy (l); \ -+ assert (r == 0); \ -+ } \ -+ while (0) - #else - #define PTHREAD_FAST_LOCK_T pthread_mutex_t - #define PTHREAD_FAST_LOCK(l) pthread_mutex_lock(l) -diff --git a/lib/CL/pocl_cl.h b/lib/CL/pocl_cl.h -index 03a0e91e..4c399784 100644 ---- a/lib/CL/pocl_cl.h -+++ b/lib/CL/pocl_cl.h -@@ -63,21 +63,56 @@ typedef pthread_mutex_t pocl_lock_t; - /* Generic functionality for handling different types of - OpenCL (host) objects. */ - --#define POCL_LOCK(__LOCK__) pthread_mutex_lock (&(__LOCK__)) --#define POCL_UNLOCK(__LOCK__) pthread_mutex_unlock (&(__LOCK__)) --#define POCL_INIT_LOCK(__LOCK__) pthread_mutex_init (&(__LOCK__), NULL) -+#define POCL_LOCK(__LOCK__) \ -+ do \ -+ { \ -+ int r = pthread_mutex_lock (&(__LOCK__)); \ -+ assert (r == 0); \ -+ } \ -+ while (0) -+#define POCL_UNLOCK(__LOCK__) \ -+ do \ -+ { \ -+ int r = pthread_mutex_unlock (&(__LOCK__)); \ -+ assert (r == 0); \ -+ } \ -+ while (0) -+#define POCL_INIT_LOCK(__LOCK__) \ -+ do \ -+ { \ -+ int r = pthread_mutex_init (&(__LOCK__), NULL); \ -+ assert (r == 0); \ -+ } \ -+ while (0) - /* We recycle OpenCL objects by not actually freeing them until the - very end. Thus, the lock should not be destoryed at the refcount 0. */ --#define POCL_DESTROY_LOCK(__LOCK__) pthread_mutex_destroy (&(__LOCK__)) -+#define POCL_DESTROY_LOCK(__LOCK__) \ -+ do \ -+ { \ -+ int r = pthread_mutex_destroy (&(__LOCK__)); \ -+ assert (r == 0); \ -+ } \ -+ while (0) - --#define POCL_LOCK_OBJ(__OBJ__) POCL_LOCK((__OBJ__)->pocl_lock) --#define POCL_UNLOCK_OBJ(__OBJ__) POCL_UNLOCK((__OBJ__)->pocl_lock) -+#define POCL_LOCK_OBJ(__OBJ__) \ -+ do \ -+ { \ -+ POCL_LOCK ((__OBJ__)->pocl_lock); \ -+ assert ((__OBJ__)->pocl_refcount > 0); \ -+ } \ -+ while (0) -+#define POCL_UNLOCK_OBJ(__OBJ__) \ -+ do \ -+ { \ -+ assert ((__OBJ__)->pocl_refcount >= 0); \ -+ POCL_UNLOCK ((__OBJ__)->pocl_lock); \ -+ } \ -+ while (0) - - #define POCL_RELEASE_OBJECT(__OBJ__, __NEW_REFCOUNT__) \ - do { \ - POCL_LOCK_OBJ (__OBJ__); \ - __NEW_REFCOUNT__ = --(__OBJ__)->pocl_refcount; \ -- assert((__OBJ__)->pocl_refcount >= 0); \ - POCL_UNLOCK_OBJ (__OBJ__); \ - } while (0) - --- -2.15.1 - diff --git a/0006-Do-not-call-clFinish-on-CmdQueues-with-refcount-0.patch b/0006-Do-not-call-clFinish-on-CmdQueues-with-refcount-0.patch deleted file mode 100644 index 2c49751..0000000 --- a/0006-Do-not-call-clFinish-on-CmdQueues-with-refcount-0.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 4e1bb5729dca7e294c70c1ed4b3959561d94acb4 Mon Sep 17 00:00:00 2001 -From: Michal Babej -Date: Tue, 9 Jan 2018 12:09:41 +0100 -Subject: [PATCH 06/14] Do not call clFinish on CmdQueues with refcount == 0 - -This fails with the additional asserts from previous commit. -Also it's just wrong :) ---- - lib/CL/clReleaseCommandQueue.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/lib/CL/clReleaseCommandQueue.c b/lib/CL/clReleaseCommandQueue.c -index 6feaa450..4e6920bd 100644 ---- a/lib/CL/clReleaseCommandQueue.c -+++ b/lib/CL/clReleaseCommandQueue.c -@@ -39,8 +39,8 @@ POname(clReleaseCommandQueue)(cl_command_queue command_queue) CL_API_SUFFIX__VER - - if (new_refcount == 0) - { -+ assert (command_queue->command_count == 0); - POCL_MSG_PRINT_REFCOUNTS ("Free Command Queue %p\n", command_queue); -- POname(clFinish)(command_queue); - if (command_queue->device->ops->free_queue) - command_queue->device->ops->free_queue (command_queue); - POCL_DESTROY_OBJECT (command_queue); --- -2.15.1 - diff --git a/0007-Fix-a-potential-data-race-in-pocl_memalign_alloc_glo.patch b/0007-Fix-a-potential-data-race-in-pocl_memalign_alloc_glo.patch deleted file mode 100644 index ab8a56d..0000000 --- a/0007-Fix-a-potential-data-race-in-pocl_memalign_alloc_glo.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 638b0c3a37d599876b83faf42df79079512193a2 Mon Sep 17 00:00:00 2001 -From: Michal Babej -Date: Tue, 9 Jan 2018 13:57:47 +0100 -Subject: [PATCH 07/14] Fix a potential data race in - pocl_memalign_alloc_global_mem() - ---- - lib/CL/devices/common.c | 16 ++++++++++------ - 1 file changed, 10 insertions(+), 6 deletions(-) - -diff --git a/lib/CL/devices/common.c b/lib/CL/devices/common.c -index 9895f1ae..f90d2321 100644 ---- a/lib/CL/devices/common.c -+++ b/lib/CL/devices/common.c -@@ -1063,21 +1063,25 @@ void* - pocl_memalign_alloc_global_mem(cl_device_id device, size_t align, size_t size) - { - pocl_global_mem_t *mem = device->global_memory; -+ void *retval = NULL; -+ -+ POCL_LOCK (mem->pocl_lock); - if ((mem->total_alloc_limit - mem->currently_allocated) < size) -- return NULL; -+ goto ERROR; - -- void* ptr = pocl_memalign_alloc(align, size); -- if (!ptr) -- return NULL; -+ retval = pocl_memalign_alloc (align, size); -+ if (!retval) -+ goto ERROR; - -- POCL_LOCK (mem->pocl_lock); - mem->currently_allocated += size; - if (mem->max_ever_allocated < mem->currently_allocated) - mem->max_ever_allocated = mem->currently_allocated; - assert(mem->currently_allocated <= mem->total_alloc_limit); -+ -+ERROR: - POCL_UNLOCK (mem->pocl_lock); - -- return ptr; -+ return retval; - } - - void --- -2.15.1 - diff --git a/0008-Fix-a-potential-data-race-in-pocl_create_event_sync.patch b/0008-Fix-a-potential-data-race-in-pocl_create_event_sync.patch deleted file mode 100644 index 8a9e5ae..0000000 --- a/0008-Fix-a-potential-data-race-in-pocl_create_event_sync.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 59c55c4d58f8f4b60796efb6e53a498086b8f93e Mon Sep 17 00:00:00 2001 -From: Michal Babej -Date: Tue, 9 Jan 2018 13:58:53 +0100 -Subject: [PATCH 08/14] Fix a potential data race in pocl_create_event_sync() - ---- - lib/CL/pocl_util.c | 12 ++++-------- - 1 file changed, 4 insertions(+), 8 deletions(-) - -diff --git a/lib/CL/pocl_util.c b/lib/CL/pocl_util.c -index 1c342924..20bc63b4 100644 ---- a/lib/CL/pocl_util.c -+++ b/lib/CL/pocl_util.c -@@ -374,16 +374,12 @@ pocl_create_event_sync(cl_event waiting_event, - if (notifier_event == NULL) - return CL_SUCCESS; - -- assert(notifier_event->pocl_refcount != 0); -+ pocl_lock_events_inorder (waiting_event, notifier_event); -+ - POCL_MSG_PRINT_INFO("create event sync: waiting %d, notifier %d\n", waiting_event->id, notifier_event->id); -- if (waiting_event == notifier_event) -- { -- printf("waiting id %d, notifier id = %d\n", waiting_event->id, -- notifier_event->id); -- assert(waiting_event != notifier_event); -- } - -- pocl_lock_events_inorder (waiting_event, notifier_event); -+ assert (notifier_event->pocl_refcount != 0); -+ assert (waiting_event != notifier_event); - - LL_FOREACH (waiting_event->wait_list, wait_list_item) - { --- -2.15.1 - diff --git a/0009-Fix-potential-race-in-clGetEventInfo.patch b/0009-Fix-potential-race-in-clGetEventInfo.patch deleted file mode 100644 index b771ce1..0000000 --- a/0009-Fix-potential-race-in-clGetEventInfo.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 8b77a178d1e6fa657b6b4b66a67a63dd608ca702 Mon Sep 17 00:00:00 2001 -From: Michal Babej -Date: Tue, 9 Jan 2018 14:49:37 +0100 -Subject: [PATCH 09/14] Fix potential race in clGetEventInfo() - ---- - lib/CL/clGetEventInfo.c | 17 ++++++++++++----- - 1 file changed, 12 insertions(+), 5 deletions(-) - -diff --git a/lib/CL/clGetEventInfo.c b/lib/CL/clGetEventInfo.c -index 5ab504b8..f36f6445 100644 ---- a/lib/CL/clGetEventInfo.c -+++ b/lib/CL/clGetEventInfo.c -@@ -9,19 +9,26 @@ POname(clGetEventInfo)(cl_event event , - CL_API_SUFFIX__VERSION_1_0 - { - POCL_RETURN_ERROR_COND((event == NULL), CL_INVALID_EVENT); -+ POCL_LOCK_OBJ (event); -+ cl_int s = event->status; -+ cl_command_queue q = event->queue; -+ cl_command_type t = event->command_type; -+ cl_uint r = event->pocl_refcount; -+ cl_context c = event->context; -+ POCL_UNLOCK_OBJ (event); - - switch (param_name) - { - case CL_EVENT_COMMAND_EXECUTION_STATUS: -- POCL_RETURN_GETINFO (cl_int, event->status); -+ POCL_RETURN_GETINFO (cl_int, s); - case CL_EVENT_COMMAND_QUEUE: -- POCL_RETURN_GETINFO(cl_command_queue, event->queue); -+ POCL_RETURN_GETINFO (cl_command_queue, q); - case CL_EVENT_COMMAND_TYPE: -- POCL_RETURN_GETINFO(cl_command_type, event->command_type); -+ POCL_RETURN_GETINFO (cl_command_type, t); - case CL_EVENT_REFERENCE_COUNT: -- POCL_RETURN_GETINFO(cl_uint, event->pocl_refcount); -+ POCL_RETURN_GETINFO (cl_uint, r); - case CL_EVENT_CONTEXT: -- POCL_RETURN_GETINFO(cl_context, event->context); -+ POCL_RETURN_GETINFO (cl_context, c); - default: - break; - } --- -2.15.1 - diff --git a/0010-Add-fsanitize-recover-address-to-ASAN-flags.patch b/0010-Add-fsanitize-recover-address-to-ASAN-flags.patch deleted file mode 100644 index d6a5136..0000000 --- a/0010-Add-fsanitize-recover-address-to-ASAN-flags.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 16d2c90e7b046137d5a0386ffda9c111ebd3761c Mon Sep 17 00:00:00 2001 -From: Michal Babej -Date: Tue, 9 Jan 2018 14:22:42 +0100 -Subject: [PATCH 10/14] Add "-fsanitize-recover=address" to ASAN flags - -When used with env variable ASAN_OPTIONS=halt_on_error=0, this -should enable ASAN builds to continue after first error - unlike -the default which is to exit after first error. ---- - lib/CMakeLists.txt | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt -index 57690091..2cd3291d 100644 ---- a/lib/CMakeLists.txt -+++ b/lib/CMakeLists.txt -@@ -34,7 +34,7 @@ endif() - set(SANITIZER_OPTIONS "") - - if(ENABLE_ASAN) -- list(APPEND SANITIZER_OPTIONS "-fsanitize=address") -+ list(APPEND SANITIZER_OPTIONS "-fsanitize=address" "-fsanitize-recover=address") - list(APPEND SANITIZER_LIBS "asan") - endif() - --- -2.15.1 - diff --git a/0011-Fix-a-few-incorrect-missing-ifdef-cl_khr_fp64.patch b/0011-Fix-a-few-incorrect-missing-ifdef-cl_khr_fp64.patch deleted file mode 100644 index 15e7a97..0000000 --- a/0011-Fix-a-few-incorrect-missing-ifdef-cl_khr_fp64.patch +++ /dev/null @@ -1,361 +0,0 @@ -From c7102b1d347639d12545f1f00d8c8b1aa076182e Mon Sep 17 00:00:00 2001 -From: Michal Babej -Date: Thu, 11 Jan 2018 09:33:58 +0100 -Subject: [PATCH 11/14] Fix a few incorrect / missing #ifdef cl_khr_fp64 - -Simplifies printf.c to not require math.h, which was conflicting -with pocl-types.h in case cl_khr_fp64 was disabled. - -Also fixes CMake variable ENABLE_FP64 to properly disable -doubles. Pocl now builds with -DENABLE_FP64=OFF on 32bit ARM. ---- - CMakeLists.txt | 4 +++- - doc/sphinx/source/install.rst | 4 ++++ - lib/kernel/host/CMakeLists.txt | 14 +++++++---- - lib/kernel/printf.c | 48 ++++++++++++++++++++----------------- - lib/kernel/sleef-pocl/expfrexp.cl | 9 ++++--- - lib/kernel/sleef/include/sleef_cl.h | 47 ++++++++++++++++++++++++++---------- - lib/kernel/sleef/libm/sleef_glue.cl | 2 ++ - 7 files changed, 83 insertions(+), 45 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 60d570b8..b89ccd80 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -751,6 +751,8 @@ if(ARM) - else() - option(ENABLE_FP64 "Enable FP64 on ARM32 - if you have at least VFP support for doubles, you can leave it ON" ON ) - endif() -+else() -+ set(ENABLE_FP64 ON CACHE INTERNAL "FP64, always on except ARM") - endif() - - if(ARM32 OR (LLC_TRIPLE MATCHES "^arm")) -@@ -800,7 +802,7 @@ if(X86) - set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics") - endif() - # 32bit arm doesnt always support doubles --if(ENABLE_FP64 AND (NOT LLVM_OLDER_THAN_4_0)) -+if(ARM AND ENABLE_FP64) - set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_fp64") - endif() - -diff --git a/doc/sphinx/source/install.rst b/doc/sphinx/source/install.rst -index ea983cef..9703668e 100644 ---- a/doc/sphinx/source/install.rst -+++ b/doc/sphinx/source/install.rst -@@ -134,6 +134,10 @@ use ";" as separator (you'll have to escape it for bash). - if you want to avoid ICD and build pocl directly as libOpenCL library. - See also :ref:`linking-with-icd` - -+- ``-DENABLE_FP64`` - for ARM platform only. If your CPU doesn't support any -+ doubles (VFP is enough), disable this. Defaults to OFF when LLVM is older -+ than 4.0, otherwise defaults to ON. -+ - - ``-DPOCL_INSTALL__DIR`` The equivalent of ``--bindir``, - ``--sbindir`` etc fine-tuning of paths for autotools. See the beginning - of toplevel CMakeLists.txt for all the variables. -diff --git a/lib/kernel/host/CMakeLists.txt b/lib/kernel/host/CMakeLists.txt -index c3657a06..7a08f2ee 100644 ---- a/lib/kernel/host/CMakeLists.txt -+++ b/lib/kernel/host/CMakeLists.txt -@@ -118,13 +118,13 @@ function(compile_sleef VARIANT SLEEF_CONFIG SLEEF_CONFIG_NEW SLEEF_BC) - - set(EXTRA_FLAGS "-DDORENAME;-DPURE_C;-I${CMAKE_SOURCE_DIR}/lib/kernel/sleef/include") - -- if(ENABLE_CONFORMANCE) -+# disabled - this code uses libm - # compile_sleef_c_to_bc("c" "sleef/libm/sleef_builtin.c" - # "${VARIANT}" BC_FILE_LIST ${EXTRA_FLAGS}) --# else() -- # these may be faster than using libm, but not always precise -- compile_sleef_c_to_bc("c" "sleef/libm/sleefsp.c" -- "${VARIANT}" BC_FILE_LIST ${EXTRA_FLAGS}) -+ compile_sleef_c_to_bc("c" "sleef/libm/sleefsp.c" -+ "${VARIANT}" BC_FILE_LIST ${EXTRA_FLAGS}) -+ -+ if(ENABLE_FP64) - compile_sleef_c_to_bc("c" "sleef/libm/sleefdp.c" - "${VARIANT}" BC_FILE_LIST ${EXTRA_FLAGS}) - endif() -@@ -135,6 +135,10 @@ function(compile_sleef VARIANT SLEEF_CONFIG SLEEF_CONFIG_NEW SLEEF_BC) - file(READ "${CMAKE_SOURCE_DIR}/lib/kernel/sleef/test.c" TEST_SRC) - file(READ "${CMAKE_SOURCE_DIR}/lib/kernel/sleef/fma_test.c" FMA_TEST_SRC) - -+ if(ENABLE_FP64) -+ set(STR "#define SLEEF_DOUBLE_AVAILABLE\n") -+ file(APPEND "${SLEEF_CONFIG_NEW}" "${STR}") -+ endif() - # current SLEEF code does not have code for - # ARM32 NEON double vectors (if they even exist) - if(NOT ARM32) -diff --git a/lib/kernel/printf.c b/lib/kernel/printf.c -index bd522fb0..37c7d6d6 100644 ---- a/lib/kernel/printf.c -+++ b/lib/kernel/printf.c -@@ -25,10 +25,16 @@ - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wformat-security" - --#include -+#ifdef cl_khr_fp64 -+#define LARGEST_FLOAT_TYPE double -+#define NAN __builtin_nan("1") -+#else -+#define LARGEST_FLOAT_TYPE float -+#define NAN __builtin_nanf("1") -+#endif -+ - #include --#include --#include -+#include - - // We implement the OpenCL printf by calling the C99 printf. This is - // not very efficient, but is easy to implement. -@@ -48,15 +54,13 @@ int snprintf(OCL_C_AS char* restrict str, size_t size, - - // Conversion flags - typedef struct { -- bool left; -- bool plus; -- bool space; -- bool alt; -- bool zero; -+ int left:1; -+ int plus:1; -+ int space:1; -+ int alt:1; -+ int zero:1; - } flags_t; - -- -- - // Helper routines to output integers - - #define INT_CONV_char "hh" -@@ -139,9 +143,9 @@ float __attribute__((overloadable)) vload_half(size_t offset, - DEBUG_PRINTF(("[printf:floats:d=%d]\n", d)); \ - if (d != 0) printf(comma); \ - WIDTH val = (FLOAT_GET_##WIDTH((OCL_C_AS const WIDTH*)vals+d)); \ -- if (isnan (val)) \ -+ if (val != val) \ - val = NAN; \ -- printf(outfmt, (double)val); \ -+ printf (outfmt, (LARGEST_FLOAT_TYPE)val); \ - } \ - DEBUG_PRINTF(("[printf:floats:done]\n")); \ - } -@@ -249,18 +253,18 @@ int __cl_printf(const OCL_CONSTANT_AS char* restrict format, ...) - DEBUG_PRINTF(("[printf:arg]\n")); - // Flags - flags_t flags; -- flags.left = false; -- flags.plus = false; -- flags.space = false; -- flags.alt = false; -- flags.zero = false; -+ flags.left = 0; -+ flags.plus = 0; -+ flags.space = 0; -+ flags.alt = 0; -+ flags.zero = 0; - for (;;) { - switch (ch) { -- case '-': if (flags.left) goto error; flags.left = true; break; -- case '+': if (flags.plus) goto error; flags.plus = true; break; -- case ' ': if (flags.space) goto error; flags.space = true; break; -- case '#': if (flags.alt) goto error; flags.alt = true; break; -- case '0': if (flags.zero) goto error; flags.zero = true; break; -+ case '-': if (flags.left) goto error; flags.left = 1; break; -+ case '+': if (flags.plus) goto error; flags.plus = 1; break; -+ case ' ': if (flags.space) goto error; flags.space = 1; break; -+ case '#': if (flags.alt) goto error; flags.alt = 1; break; -+ case '0': if (flags.zero) goto error; flags.zero = 1; break; - default: goto flags_done; - } - ch = *++format; -diff --git a/lib/kernel/sleef-pocl/expfrexp.cl b/lib/kernel/sleef-pocl/expfrexp.cl -index 7c3c595b..60d5f2aa 100644 ---- a/lib/kernel/sleef-pocl/expfrexp.cl -+++ b/lib/kernel/sleef-pocl/expfrexp.cl -@@ -1,10 +1,5 @@ - #include "sleef_cl.h" - --_CL_ALWAYSINLINE long2 Sleef_expfrexpd2_long (double2 x); --_CL_ALWAYSINLINE long4 Sleef_expfrexpd4_long (double4 x); --_CL_ALWAYSINLINE long8 Sleef_expfrexpd8_long (double8 x); -- -- - _CL_OVERLOADABLE - int - _cl_expfrexp (float x) -@@ -91,6 +86,10 @@ _cl_expfrexp (float16 x) - - #ifdef cl_khr_fp64 - -+_CL_ALWAYSINLINE long2 Sleef_expfrexpd2_long (double2 x); -+_CL_ALWAYSINLINE long4 Sleef_expfrexpd4_long (double4 x); -+_CL_ALWAYSINLINE long8 Sleef_expfrexpd8_long (double8 x); -+ - _CL_OVERLOADABLE - long - _cl_expfrexp (double x) -diff --git a/lib/kernel/sleef/include/sleef_cl.h b/lib/kernel/sleef/include/sleef_cl.h -index 89d2b3aa..6f436271 100644 ---- a/lib/kernel/sleef/include/sleef_cl.h -+++ b/lib/kernel/sleef/include/sleef_cl.h -@@ -59,14 +59,21 @@ typedef float float4 __attribute__ ((__ext_vector_type__ (4))); - typedef float float8 __attribute__ ((__ext_vector_type__ (8))); - typedef float float16 __attribute__ ((__ext_vector_type__ (16))); - -+#ifdef SLEEF_DOUBLE_AVAILABLE - typedef double double2 __attribute__ ((__ext_vector_type__ (2))); - typedef double double3 __attribute__ ((__ext_vector_type__ (3))); - typedef double double4 __attribute__ ((__ext_vector_type__ (4))); - typedef double double8 __attribute__ ((__ext_vector_type__ (8))); - typedef double double16 __attribute__ ((__ext_vector_type__ (16))); - -+#define cl_khr_fp64 -+ -+#endif -+ - #endif - -+#ifdef cl_khr_fp64 -+ - #ifndef Sleef_double2_DEFINED - #define Sleef_double2_DEFINED - typedef struct -@@ -75,14 +82,6 @@ typedef struct - } Sleef_double2; - #endif - --#ifndef Sleef_float2_DEFINED --#define Sleef_float2_DEFINED --typedef struct --{ -- float x, y; --} Sleef_float2; --#endif -- - double Sleef_sin_u35 (double); - double Sleef_cos_u35 (double); - Sleef_double2 Sleef_sincos_u35 (double); -@@ -147,6 +146,19 @@ double Sleef_tgamma_u10 (double); - double Sleef_erf_u10 (double); - double Sleef_erfc_u15 (double); - -+double Sleef_pown_u10 (double, int); -+double Sleef_powr_u10 (double, double); -+ -+#endif -+ -+#ifndef Sleef_float2_DEFINED -+#define Sleef_float2_DEFINED -+typedef struct -+{ -+ float x, y; -+} Sleef_float2; -+#endif -+ - float Sleef_sinf_u35 (float); - float Sleef_cosf_u35 (float); - Sleef_float2 Sleef_sincosf_u35 (float); -@@ -212,9 +224,7 @@ float Sleef_tgammaf_u10 (float); - float Sleef_erff_u10 (float); - float Sleef_erfcf_u15 (float); - --double Sleef_pown_u10 (double, int); - float Sleef_pownf_u10 (float, int); --double Sleef_powr_u10 (double, double); - float Sleef_powrf_u10 (float, float); - - -@@ -222,6 +232,8 @@ float Sleef_powrf_u10 (float, float); - - #ifdef SLEEF_VEC_512_AVAILABLE - -+#ifdef cl_khr_fp64 -+ - #ifndef Sleef_double8_2_DEFINED - typedef struct - { -@@ -295,6 +307,8 @@ double8 Sleef_tgammad8_u10 (double8); - double8 Sleef_erfd8_u10 (double8); - double8 Sleef_erfcd8_u15 (double8); - -+#endif -+ - #ifndef Sleef_float16_2_DEFINED - typedef struct - { -@@ -380,6 +394,8 @@ float16 Sleef_powrf16_u10 (float16, float16); - - #ifdef SLEEF_VEC_256_AVAILABLE - -+#ifdef cl_khr_fp64 -+ - #ifndef Sleef_double4_2_DEFINED - typedef struct - { -@@ -453,6 +469,8 @@ double4 Sleef_tgammad4_u10 (double4); - double4 Sleef_erfd4_u10 (double4); - double4 Sleef_erfcd4_u15 (double4); - -+#endif -+ - #ifndef Sleef_float8_2_DEFINED - typedef struct - { -@@ -535,6 +553,8 @@ float8 Sleef_powrf8_u10 (float8, float8); - - #ifdef SLEEF_VEC_128_AVAILABLE - -+#ifdef cl_khr_fp64 -+ - #ifndef Sleef_double2_2_DEFINED - typedef struct - { -@@ -608,6 +628,11 @@ double2 Sleef_tgammad2_u10 (double2); - double2 Sleef_erfd2_u10 (double2); - double2 Sleef_erfcd2_u15 (double2); - -+double2 Sleef_pownd2_u10 (double2, int2); -+double2 Sleef_powrd2_u10 (double2, double2); -+ -+#endif -+ - #ifndef Sleef_float4_2_DEFINED - typedef struct - { -@@ -681,9 +706,7 @@ float4 Sleef_tgammaf4_u10 (float4); - float4 Sleef_erff4_u10 (float4); - float4 Sleef_erfcf4_u15 (float4); - --double2 Sleef_pownd2_u10 (double2, int2); - float4 Sleef_pownf4_u10 (float4, int4); --double2 Sleef_powrd2_u10 (double2, double2); - float4 Sleef_powrf4_u10 (float4, float4); - - #endif -diff --git a/lib/kernel/sleef/libm/sleef_glue.cl b/lib/kernel/sleef/libm/sleef_glue.cl -index ad98b43e..cd4bfa34 100644 ---- a/lib/kernel/sleef/libm/sleef_glue.cl -+++ b/lib/kernel/sleef/libm/sleef_glue.cl -@@ -23,6 +23,7 @@ - - #include "sleef_cl.h" - -+#ifdef cl_khr_fp64 - #ifdef SLEEF_VEC_128_AVAILABLE - - _CL_ALWAYSINLINE double2 Sleef_ldexpd2_long (double2 x, long2 k); -@@ -76,3 +77,4 @@ Sleef_expfrexpd8 (double8 x) - } - - #endif -+#endif --- -2.15.1 - diff --git a/0012-Mark-test_clSetEventCallback-as-WILL_FAIL-when-TSan-.patch b/0012-Mark-test_clSetEventCallback-as-WILL_FAIL-when-TSan-.patch deleted file mode 100644 index e6c8dfb..0000000 --- a/0012-Mark-test_clSetEventCallback-as-WILL_FAIL-when-TSan-.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 0029cab0c15179cb8a46b419104227c2c1d877a8 Mon Sep 17 00:00:00 2001 -From: Michal Babej -Date: Thu, 11 Jan 2018 12:48:19 +0100 -Subject: [PATCH 12/14] Mark test_clSetEventCallback as WILL_FAIL when TSan is - enabled - -due to how the test is written, TSan will inevitably complain. ---- - tests/runtime/CMakeLists.txt | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/tests/runtime/CMakeLists.txt b/tests/runtime/CMakeLists.txt -index c68fe327..a0409da9 100644 ---- a/tests/runtime/CMakeLists.txt -+++ b/tests/runtime/CMakeLists.txt -@@ -126,6 +126,13 @@ set_tests_properties("runtime/test_kernel_cache_includes" - PROPERTIES PASS_REGULAR_EXPRESSION - "function 1.*first include.*function 2.*second include") - -+# event callback test changes shared variables in the callbacks -+# doesn't need fixing in test, but TSan inevitably complains -+if(ENABLE_TSAN) -+ set_tests_properties("runtime/clSetEventCallback" -+ PROPERTIES WILL_FAIL 1) -+endif() -+ - # Label tests that work with CUDA backend - set_property(TEST - "runtime/clGetDeviceInfo" --- -2.15.1 - diff --git a/0013-Fix-a-compilation-warning.patch b/0013-Fix-a-compilation-warning.patch deleted file mode 100644 index 13b1491..0000000 --- a/0013-Fix-a-compilation-warning.patch +++ /dev/null @@ -1,234 +0,0 @@ -From 84029fdf7dde331a45632272854922db0da8e3ad Mon Sep 17 00:00:00 2001 -From: Michal Babej -Date: Thu, 11 Jan 2018 12:50:15 +0100 -Subject: [PATCH 13/14] Fix a compilation warning - -(float)(double literal) complains when cl_khr_fp64 is disabled ---- - lib/kernel/convert_type.cl | 48 +++++++++++++++++++++++----------------------- - lib/kernel/convert_type.py | 4 ++-- - 2 files changed, 26 insertions(+), 26 deletions(-) - -diff --git a/lib/kernel/convert_type.cl b/lib/kernel/convert_type.cl -index 8da102a3..11aca55c 100644 ---- a/lib/kernel/convert_type.cl -+++ b/lib/kernel/convert_type.cl -@@ -38567,7 +38567,7 @@ float convert_float_rtz(long x) - ulong abs_x = abs(x); - ulong abs_y = abs(y); - float res = select(r, nextafter(r, sign(r) * (float)-INFINITY), convert_int(abs_y > abs_x)); -- return select(res, (float)(0x1.fffffep+62), convert_int(x >= 0x7fffffffffffffffL)); -+ return select(res, (float)(0x1.fffffep+62f), convert_int(x >= 0x7fffffffffffffffL)); - } - #endif - -@@ -38597,7 +38597,7 @@ float convert_float_rtn(long x) - float r = convert_float(x); - long y = convert_long_sat(r); - float res = select(r, nextafter(r, (float)-INFINITY), convert_int(y > x)); -- return select(res, (float)(0x1.fffffep+62), convert_int(x >= 0x7fffffffffffffffL)); -+ return select(res, (float)(0x1.fffffep+62f), convert_int(x >= 0x7fffffffffffffffL)); - } - #endif - -@@ -38610,7 +38610,7 @@ float2 convert_float2_rtz(long2 x) - ulong2 abs_x = abs(x); - ulong2 abs_y = abs(y); - float2 res = select(r, nextafter(r, sign(r) * (float2)-INFINITY), convert_int2(abs_y > abs_x)); -- return select(res, (float2)(0x1.fffffep+62), convert_int2(x >= 0x7fffffffffffffffL)); -+ return select(res, (float2)(0x1.fffffep+62f), convert_int2(x >= 0x7fffffffffffffffL)); - } - #endif - -@@ -38640,7 +38640,7 @@ float2 convert_float2_rtn(long2 x) - float2 r = convert_float2(x); - long2 y = convert_long2_sat(r); - float2 res = select(r, nextafter(r, (float2)-INFINITY), convert_int2(y > x)); -- return select(res, (float2)(0x1.fffffep+62), convert_int2(x >= 0x7fffffffffffffffL)); -+ return select(res, (float2)(0x1.fffffep+62f), convert_int2(x >= 0x7fffffffffffffffL)); - } - #endif - -@@ -38653,7 +38653,7 @@ float3 convert_float3_rtz(long3 x) - ulong3 abs_x = abs(x); - ulong3 abs_y = abs(y); - float3 res = select(r, nextafter(r, sign(r) * (float3)-INFINITY), convert_int3(abs_y > abs_x)); -- return select(res, (float3)(0x1.fffffep+62), convert_int3(x >= 0x7fffffffffffffffL)); -+ return select(res, (float3)(0x1.fffffep+62f), convert_int3(x >= 0x7fffffffffffffffL)); - } - #endif - -@@ -38683,7 +38683,7 @@ float3 convert_float3_rtn(long3 x) - float3 r = convert_float3(x); - long3 y = convert_long3_sat(r); - float3 res = select(r, nextafter(r, (float3)-INFINITY), convert_int3(y > x)); -- return select(res, (float3)(0x1.fffffep+62), convert_int3(x >= 0x7fffffffffffffffL)); -+ return select(res, (float3)(0x1.fffffep+62f), convert_int3(x >= 0x7fffffffffffffffL)); - } - #endif - -@@ -38696,7 +38696,7 @@ float4 convert_float4_rtz(long4 x) - ulong4 abs_x = abs(x); - ulong4 abs_y = abs(y); - float4 res = select(r, nextafter(r, sign(r) * (float4)-INFINITY), convert_int4(abs_y > abs_x)); -- return select(res, (float4)(0x1.fffffep+62), convert_int4(x >= 0x7fffffffffffffffL)); -+ return select(res, (float4)(0x1.fffffep+62f), convert_int4(x >= 0x7fffffffffffffffL)); - } - #endif - -@@ -38726,7 +38726,7 @@ float4 convert_float4_rtn(long4 x) - float4 r = convert_float4(x); - long4 y = convert_long4_sat(r); - float4 res = select(r, nextafter(r, (float4)-INFINITY), convert_int4(y > x)); -- return select(res, (float4)(0x1.fffffep+62), convert_int4(x >= 0x7fffffffffffffffL)); -+ return select(res, (float4)(0x1.fffffep+62f), convert_int4(x >= 0x7fffffffffffffffL)); - } - #endif - -@@ -38739,7 +38739,7 @@ float8 convert_float8_rtz(long8 x) - ulong8 abs_x = abs(x); - ulong8 abs_y = abs(y); - float8 res = select(r, nextafter(r, sign(r) * (float8)-INFINITY), convert_int8(abs_y > abs_x)); -- return select(res, (float8)(0x1.fffffep+62), convert_int8(x >= 0x7fffffffffffffffL)); -+ return select(res, (float8)(0x1.fffffep+62f), convert_int8(x >= 0x7fffffffffffffffL)); - } - #endif - -@@ -38769,7 +38769,7 @@ float8 convert_float8_rtn(long8 x) - float8 r = convert_float8(x); - long8 y = convert_long8_sat(r); - float8 res = select(r, nextafter(r, (float8)-INFINITY), convert_int8(y > x)); -- return select(res, (float8)(0x1.fffffep+62), convert_int8(x >= 0x7fffffffffffffffL)); -+ return select(res, (float8)(0x1.fffffep+62f), convert_int8(x >= 0x7fffffffffffffffL)); - } - #endif - -@@ -38782,7 +38782,7 @@ float16 convert_float16_rtz(long16 x) - ulong16 abs_x = abs(x); - ulong16 abs_y = abs(y); - float16 res = select(r, nextafter(r, sign(r) * (float16)-INFINITY), convert_int16(abs_y > abs_x)); -- return select(res, (float16)(0x1.fffffep+62), convert_int16(x >= 0x7fffffffffffffffL)); -+ return select(res, (float16)(0x1.fffffep+62f), convert_int16(x >= 0x7fffffffffffffffL)); - } - #endif - -@@ -38812,7 +38812,7 @@ float16 convert_float16_rtn(long16 x) - float16 r = convert_float16(x); - long16 y = convert_long16_sat(r); - float16 res = select(r, nextafter(r, (float16)-INFINITY), convert_int16(y > x)); -- return select(res, (float16)(0x1.fffffep+62), convert_int16(x >= 0x7fffffffffffffffL)); -+ return select(res, (float16)(0x1.fffffep+62f), convert_int16(x >= 0x7fffffffffffffffL)); - } - #endif - -@@ -39083,7 +39083,7 @@ float convert_float_rtz(ulong x) - ulong abs_x = abs(x); - ulong abs_y = abs(y); - float res = select(r, nextafter(r, sign(r) * (float)-INFINITY), convert_int(abs_y > abs_x)); -- return select(res, (float)(0x1.fffffep+63), convert_int(x >= 0xfffffffffffffffeUL)); -+ return select(res, (float)(0x1.fffffep+63f), convert_int(x >= 0xfffffffffffffffeUL)); - } - #endif - -@@ -39113,7 +39113,7 @@ float convert_float_rtn(ulong x) - float r = convert_float(x); - ulong y = convert_ulong_sat(r); - float res = select(r, nextafter(r, (float)-INFINITY), convert_int(y > x)); -- return select(res, (float)(0x1.fffffep+63), convert_int(x >= 0xfffffffffffffffeUL)); -+ return select(res, (float)(0x1.fffffep+63f), convert_int(x >= 0xfffffffffffffffeUL)); - } - #endif - -@@ -39126,7 +39126,7 @@ float2 convert_float2_rtz(ulong2 x) - ulong2 abs_x = abs(x); - ulong2 abs_y = abs(y); - float2 res = select(r, nextafter(r, sign(r) * (float2)-INFINITY), convert_int2(abs_y > abs_x)); -- return select(res, (float2)(0x1.fffffep+63), convert_int2(x >= 0xfffffffffffffffeUL)); -+ return select(res, (float2)(0x1.fffffep+63f), convert_int2(x >= 0xfffffffffffffffeUL)); - } - #endif - -@@ -39156,7 +39156,7 @@ float2 convert_float2_rtn(ulong2 x) - float2 r = convert_float2(x); - ulong2 y = convert_ulong2_sat(r); - float2 res = select(r, nextafter(r, (float2)-INFINITY), convert_int2(y > x)); -- return select(res, (float2)(0x1.fffffep+63), convert_int2(x >= 0xfffffffffffffffeUL)); -+ return select(res, (float2)(0x1.fffffep+63f), convert_int2(x >= 0xfffffffffffffffeUL)); - } - #endif - -@@ -39169,7 +39169,7 @@ float3 convert_float3_rtz(ulong3 x) - ulong3 abs_x = abs(x); - ulong3 abs_y = abs(y); - float3 res = select(r, nextafter(r, sign(r) * (float3)-INFINITY), convert_int3(abs_y > abs_x)); -- return select(res, (float3)(0x1.fffffep+63), convert_int3(x >= 0xfffffffffffffffeUL)); -+ return select(res, (float3)(0x1.fffffep+63f), convert_int3(x >= 0xfffffffffffffffeUL)); - } - #endif - -@@ -39199,7 +39199,7 @@ float3 convert_float3_rtn(ulong3 x) - float3 r = convert_float3(x); - ulong3 y = convert_ulong3_sat(r); - float3 res = select(r, nextafter(r, (float3)-INFINITY), convert_int3(y > x)); -- return select(res, (float3)(0x1.fffffep+63), convert_int3(x >= 0xfffffffffffffffeUL)); -+ return select(res, (float3)(0x1.fffffep+63f), convert_int3(x >= 0xfffffffffffffffeUL)); - } - #endif - -@@ -39212,7 +39212,7 @@ float4 convert_float4_rtz(ulong4 x) - ulong4 abs_x = abs(x); - ulong4 abs_y = abs(y); - float4 res = select(r, nextafter(r, sign(r) * (float4)-INFINITY), convert_int4(abs_y > abs_x)); -- return select(res, (float4)(0x1.fffffep+63), convert_int4(x >= 0xfffffffffffffffeUL)); -+ return select(res, (float4)(0x1.fffffep+63f), convert_int4(x >= 0xfffffffffffffffeUL)); - } - #endif - -@@ -39242,7 +39242,7 @@ float4 convert_float4_rtn(ulong4 x) - float4 r = convert_float4(x); - ulong4 y = convert_ulong4_sat(r); - float4 res = select(r, nextafter(r, (float4)-INFINITY), convert_int4(y > x)); -- return select(res, (float4)(0x1.fffffep+63), convert_int4(x >= 0xfffffffffffffffeUL)); -+ return select(res, (float4)(0x1.fffffep+63f), convert_int4(x >= 0xfffffffffffffffeUL)); - } - #endif - -@@ -39255,7 +39255,7 @@ float8 convert_float8_rtz(ulong8 x) - ulong8 abs_x = abs(x); - ulong8 abs_y = abs(y); - float8 res = select(r, nextafter(r, sign(r) * (float8)-INFINITY), convert_int8(abs_y > abs_x)); -- return select(res, (float8)(0x1.fffffep+63), convert_int8(x >= 0xfffffffffffffffeUL)); -+ return select(res, (float8)(0x1.fffffep+63f), convert_int8(x >= 0xfffffffffffffffeUL)); - } - #endif - -@@ -39285,7 +39285,7 @@ float8 convert_float8_rtn(ulong8 x) - float8 r = convert_float8(x); - ulong8 y = convert_ulong8_sat(r); - float8 res = select(r, nextafter(r, (float8)-INFINITY), convert_int8(y > x)); -- return select(res, (float8)(0x1.fffffep+63), convert_int8(x >= 0xfffffffffffffffeUL)); -+ return select(res, (float8)(0x1.fffffep+63f), convert_int8(x >= 0xfffffffffffffffeUL)); - } - #endif - -@@ -39298,7 +39298,7 @@ float16 convert_float16_rtz(ulong16 x) - ulong16 abs_x = abs(x); - ulong16 abs_y = abs(y); - float16 res = select(r, nextafter(r, sign(r) * (float16)-INFINITY), convert_int16(abs_y > abs_x)); -- return select(res, (float16)(0x1.fffffep+63), convert_int16(x >= 0xfffffffffffffffeUL)); -+ return select(res, (float16)(0x1.fffffep+63f), convert_int16(x >= 0xfffffffffffffffeUL)); - } - #endif - -@@ -39328,7 +39328,7 @@ float16 convert_float16_rtn(ulong16 x) - float16 r = convert_float16(x); - ulong16 y = convert_ulong16_sat(r); - float16 res = select(r, nextafter(r, (float16)-INFINITY), convert_int16(y > x)); -- return select(res, (float16)(0x1.fffffep+63), convert_int16(x >= 0xfffffffffffffffeUL)); -+ return select(res, (float16)(0x1.fffffep+63f), convert_int16(x >= 0xfffffffffffffffeUL)); - } - #endif - --- -2.15.1 - diff --git a/0014-Fix-a-data-race-in-pocl_runtime_config.c.patch b/0014-Fix-a-data-race-in-pocl_runtime_config.c.patch deleted file mode 100644 index 259637f..0000000 --- a/0014-Fix-a-data-race-in-pocl_runtime_config.c.patch +++ /dev/null @@ -1,27 +0,0 @@ -From da7d8cee0fffeebe5490d0a9770e760af0197909 Mon Sep 17 00:00:00 2001 -From: Michal Babej -Date: Thu, 11 Jan 2018 13:17:46 +0100 -Subject: [PATCH 14/14] Fix a data race in pocl_runtime_config.c - -LL_PREPEND(head, item) changes the head (env_cache) on every call, -which conflicts with other threads calling find_env() with env_cache. ---- - lib/CL/pocl_runtime_config.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/lib/CL/pocl_runtime_config.c b/lib/CL/pocl_runtime_config.c -index d84ea263..c7000068 100644 ---- a/lib/CL/pocl_runtime_config.c -+++ b/lib/CL/pocl_runtime_config.c -@@ -62,7 +62,7 @@ static env_data* find_env (env_data* cache, const char* key) - ed->env = strdup (key); - ed->value = strdup (value); - ed->next = NULL; -- LL_PREPEND(env_cache, ed); -+ LL_APPEND(env_cache, ed); - POCL_UNLOCK(lock); - return ed; - } --- -2.15.1 - diff --git a/pocl.spec b/pocl.spec index d386314..4e638f2 100644 --- a/pocl.spec +++ b/pocl.spec @@ -1,9 +1,10 @@ %global sover 2 +%global with_tests 1 Summary: Portable Computing Language - an OpenCL implementation Name: pocl Version: 1.1 -Release: 1%{?dist} +Release: 2%{?dist} # The whole code is under MIT # except include/utlist.h which is under BSD (and unbundled) and @@ -15,11 +16,9 @@ Source0: https://github.com/pocl/pocl/archive/v%{version}.tar.gz # FIXME: link Patch1001: 0001-wrap-more-stuff-for-64-bit-atomics.patch -# aarch64 seems to have a build system problem that makes it pass -march=(unknown). -# ppc64 builds fine, but: 41% tests passed, 64 tests failed out of 108. -# ppc64le fails with compilation errors. -# s390(x) clang build still seems to be suffering from bug 1309055. -ExcludeArch: aarch64 ppc64 ppc64le s390 s390x +# aarch64 builds fine, but: 15% tests passed, 103 tests failed out of 121 +# ppc64/ppc64le/s390x builds fine, but: 15% tests passed, 102 tests failed out of 120 +ExclusiveArch: %{arm} %{x86} x86_64 BuildRequires: cmake BuildRequires: clang clang-devel @@ -101,6 +100,7 @@ popd #ln -vs %%{_includedir}/vecmath %%{buildroot}/%%{_libdir}/pocl/vecmath # but you need to run the .py to generate the files under the pocl dir +%if 0%{?with_tests} %check pushd %{_target_platform} # https://github.com/pocl/pocl/issues/602 @@ -112,6 +112,7 @@ pushd %{_target_platform} ; %endif popd +%endif %ldconfig_scriptlets @@ -128,6 +129,9 @@ popd %{_libdir}/pkgconfig/%{name}.pc %changelog +* Mon Mar 26 2018 Peter Robinson 1.1-2 +- Minor spec updates/cleanups + * Fri Mar 23 2018 Tom Stellard - 1.1-1 - Update to 1.1