diff --git a/.gitignore b/.gitignore index 8c81fe6..ad7f91f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /pyfastnoisesimd-0.3.2.tar.gz +/pyfastnoisesimd-0.4.1.tar.gz diff --git a/0001-Add-cstdlib-for-posix_memalign.patch b/0001-Add-cstdlib-for-posix_memalign.patch deleted file mode 100644 index 773ffa6..0000000 --- a/0001-Add-cstdlib-for-posix_memalign.patch +++ /dev/null @@ -1,25 +0,0 @@ -From cc83e9986ad96f93f3737128a34d9e1e9fae4cae Mon Sep 17 00:00:00 2001 -From: Elliott Sales de Andrade -Date: Tue, 16 Jan 2018 02:32:57 -0500 -Subject: [PATCH 1/6] Add cstdlib for posix_memalign. - -Signed-off-by: Elliott Sales de Andrade ---- - pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp -index 975d6cb..0af1bf8 100644 ---- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp -+++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp -@@ -112,6 +112,7 @@ typedef int SIMDi; - #ifdef _WIN32 - #define SIMD_ALLOCATE_SET(floatP, floatCount) floatP = (float*)_aligned_malloc((floatCount)* sizeof(float), MEMORY_ALIGNMENT) - #else -+#include - #define SIMD_ALLOCATE_SET(floatP, floatCount) posix_memalign((void**)&floatP, MEMORY_ALIGNMENT, (floatCount)* sizeof(float)) - #endif - #else --- -2.17.1 - diff --git a/0001-Update-casts-for-NEON.patch b/0001-Update-casts-for-NEON.patch new file mode 100644 index 0000000..d9f2cc5 --- /dev/null +++ b/0001-Update-casts-for-NEON.patch @@ -0,0 +1,96 @@ +From 4346f6cdb02192edad480c323e7ab34d0f00a6e5 Mon Sep 17 00:00:00 2001 +From: Elliott Sales de Andrade +Date: Fri, 20 Jul 2018 16:46:43 -0400 +Subject: [PATCH 1/5] Update casts for NEON. + +A merge of various upstream commits. + +Signed-off-by: Elliott Sales de Andrade +--- + pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h | 7 +++++-- + .../fastnoisesimd/FastNoiseSIMD_internal.cpp | 16 +++++++++------- + 2 files changed, 14 insertions(+), 9 deletions(-) + +diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h +index d0b5ed6..9554384 100644 +--- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h ++++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h +@@ -77,6 +77,9 @@ AMD Carrizo - Q2 2015 + FMA3 + Intel Haswell - Q2 2013 + AMD Piledriver - 2012 ++ ++AVX-512F ++Intel Skylake-X - Q2 2017 + */ + + struct FastNoiseVectorSet; +@@ -97,7 +100,7 @@ public: + + // Returns highest detected level of CPU support + // 5: ARM NEON +- // 4: AVX512 ++ // 4: AVX-512F + // 3: AVX2 & FMA3 + // 2: SSE4.1 + // 1: SSE2 +@@ -106,7 +109,7 @@ public: + + // Sets the SIMD level for newly created FastNoiseSIMD objects + // 5: ARM NEON +- // 4: AVX512 ++ // 4: AVX-512F + // 3: AVX2 & FMA3 + // 2: SSE4.1 + // 1: SSE2 +diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp +index d7f5b42..ba5c83e 100644 +--- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp ++++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp +@@ -178,10 +178,10 @@ static SIMDf VECTORCALL FUNC(DIV)(SIMDf a, SIMDf b) + #define SIMDf_MAX(a,b) vmaxq_f32(a,b) + #define SIMDf_INV_SQRT(a) vrsqrteq_f32(a) + +-#define SIMDf_LESS_THAN(a,b) vreinterpretq_f32_u32(vcltq_f32(a,b)) +-#define SIMDf_GREATER_THAN(a,b) vreinterpretq_f32_u32(vcgtq_f32(a,b)) +-#define SIMDf_LESS_EQUAL(a,b) vreinterpretq_f32_u32(vcleq_f32(a,b)) +-#define SIMDf_GREATER_EQUAL(a,b) vreinterpretq_f32_u32(vcgeq_f32(a,b)) ++#define SIMDf_LESS_THAN(a,b) vreinterpretq_s32_u32(vcltq_f32(a,b)) ++#define SIMDf_GREATER_THAN(a,b) vreinterpretq_s32_u32(vcgtq_f32(a,b)) ++#define SIMDf_LESS_EQUAL(a,b) vreinterpretq_s32_u32(vcleq_f32(a,b)) ++#define SIMDf_GREATER_EQUAL(a,b) vreinterpretq_s32_u32(vcgeq_f32(a,b)) + + #define SIMDf_AND(a,b) SIMDf_CAST_TO_FLOAT(vandq_s32(vreinterpretq_s32_f32(a),vreinterpretq_s32_f32(b))) + #define SIMDf_AND_NOT(a,b) SIMDf_CAST_TO_FLOAT(vandq_s32(vmvnq_s32(vreinterpretq_s32_f32(a)),vreinterpretq_s32_f32(b))) +@@ -192,7 +192,9 @@ static SIMDf VECTORCALL FUNC(FLOOR)(SIMDf a) + { + SIMDf fval = SIMDf_CONVERT_TO_FLOAT(SIMDi_CONVERT_TO_INT(a)); + +- return vsubq_f32(fval, SIMDf_AND(SIMDf_LESS_THAN(a, fval), SIMDf_NUM(1))); ++ return vsubq_f32(fval, ++ SIMDf_CAST_TO_FLOAT(vandq_s32(SIMDf_LESS_THAN(a, fval), ++ SIMDi_CAST_TO_INT(SIMDf_NUM(1))))); + } + #define SIMDf_FLOOR(a) FUNC(FLOOR)(a) + #else +@@ -201,7 +203,7 @@ static SIMDf VECTORCALL FUNC(FLOOR)(SIMDf a) + #endif + + #define SIMDf_ABS(a) vabsq_f32(a) +-#define SIMDf_BLENDV(a,b,mask) vbslq_f32(mask,b,a) ++#define SIMDf_BLENDV(a,b,mask) vbslq_f32(vreinterpretq_u32_s32(mask),b,a) + + #define SIMDi_ADD(a,b) vaddq_s32(a,b) + #define SIMDi_SUB(a,b) vsubq_s32(a,b) +@@ -1834,7 +1836,7 @@ static SIMDf VECTORCALL FUNC(CellularDistance##distanceFunc##Single)(SIMDi seed, + #define CELLULAR_DISTANCE2_SINGLE(distanceFunc, returnFunc)\ + static SIMDf VECTORCALL FUNC(Cellular##returnFunc##distanceFunc##Single)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z, SIMDf cellJitter, int index0, int index1)\ + {\ +- SIMDf distance[4] = {SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999)};\ ++ SIMDf distance[FN_CELLULAR_INDEX_MAX+1] = {SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999)};\ + \ + SIMDi xc = SIMDi_SUB(SIMDi_CONVERT_TO_INT(x), SIMDi_NUM(1));\ + SIMDi ycBase = SIMDi_SUB(SIMDi_CONVERT_TO_INT(y), SIMDi_NUM(1));\ +-- +2.20.1 + diff --git a/0002-Update-casts-for-NEON.patch b/0002-Update-casts-for-NEON.patch deleted file mode 100644 index 82f249b..0000000 --- a/0002-Update-casts-for-NEON.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 2e3868c40a667ec1c3fe02b997b66d5f8393e22f Mon Sep 17 00:00:00 2001 -From: Elliott Sales de Andrade -Date: Fri, 20 Jul 2018 16:46:43 -0400 -Subject: [PATCH 2/6] Update casts for NEON. - -A merge of various upstream commits. - -Signed-off-by: Elliott Sales de Andrade ---- - pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h | 7 +++++-- - .../fastnoisesimd/FastNoiseSIMD_internal.cpp | 16 +++++++++------- - 2 files changed, 14 insertions(+), 9 deletions(-) - -diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h -index d0b5ed6..9554384 100644 ---- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h -+++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h -@@ -77,6 +77,9 @@ AMD Carrizo - Q2 2015 - FMA3 - Intel Haswell - Q2 2013 - AMD Piledriver - 2012 -+ -+AVX-512F -+Intel Skylake-X - Q2 2017 - */ - - struct FastNoiseVectorSet; -@@ -97,7 +100,7 @@ public: - - // Returns highest detected level of CPU support - // 5: ARM NEON -- // 4: AVX512 -+ // 4: AVX-512F - // 3: AVX2 & FMA3 - // 2: SSE4.1 - // 1: SSE2 -@@ -106,7 +109,7 @@ public: - - // Sets the SIMD level for newly created FastNoiseSIMD objects - // 5: ARM NEON -- // 4: AVX512 -+ // 4: AVX-512F - // 3: AVX2 & FMA3 - // 2: SSE4.1 - // 1: SSE2 -diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp -index 0af1bf8..d63143a 100644 ---- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp -+++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp -@@ -176,10 +176,10 @@ static SIMDf VECTORCALL FUNC(DIV)(SIMDf a, SIMDf b) - #define SIMDf_MAX(a,b) vmaxq_f32(a,b) - #define SIMDf_INV_SQRT(a) vrsqrteq_f32(a) - --#define SIMDf_LESS_THAN(a,b) vreinterpretq_f32_u32(vcltq_f32(a,b)) --#define SIMDf_GREATER_THAN(a,b) vreinterpretq_f32_u32(vcgtq_f32(a,b)) --#define SIMDf_LESS_EQUAL(a,b) vreinterpretq_f32_u32(vcleq_f32(a,b)) --#define SIMDf_GREATER_EQUAL(a,b) vreinterpretq_f32_u32(vcgeq_f32(a,b)) -+#define SIMDf_LESS_THAN(a,b) vreinterpretq_s32_u32(vcltq_f32(a,b)) -+#define SIMDf_GREATER_THAN(a,b) vreinterpretq_s32_u32(vcgtq_f32(a,b)) -+#define SIMDf_LESS_EQUAL(a,b) vreinterpretq_s32_u32(vcleq_f32(a,b)) -+#define SIMDf_GREATER_EQUAL(a,b) vreinterpretq_s32_u32(vcgeq_f32(a,b)) - - #define SIMDf_AND(a,b) SIMDf_CAST_TO_FLOAT(vandq_s32(vreinterpretq_s32_f32(a),vreinterpretq_s32_f32(b))) - #define SIMDf_AND_NOT(a,b) SIMDf_CAST_TO_FLOAT(vandq_s32(vmvnq_s32(vreinterpretq_s32_f32(a)),vreinterpretq_s32_f32(b))) -@@ -190,7 +190,9 @@ static SIMDf VECTORCALL FUNC(FLOOR)(SIMDf a) - { - SIMDf fval = SIMDf_CONVERT_TO_FLOAT(SIMDi_CONVERT_TO_INT(a)); - -- return vsubq_f32(fval, SIMDf_AND(SIMDf_LESS_THAN(a, fval), SIMDf_NUM(1))); -+ return vsubq_f32(fval, -+ SIMDf_CAST_TO_FLOAT(vandq_s32(SIMDf_LESS_THAN(a, fval), -+ SIMDi_CAST_TO_INT(SIMDf_NUM(1))))); - } - #define SIMDf_FLOOR(a) FUNC(FLOOR)(a) - #else -@@ -199,7 +201,7 @@ static SIMDf VECTORCALL FUNC(FLOOR)(SIMDf a) - #endif - - #define SIMDf_ABS(a) vabsq_f32(a) --#define SIMDf_BLENDV(a,b,mask) vbslq_f32(mask,b,a) -+#define SIMDf_BLENDV(a,b,mask) vbslq_f32(vreinterpretq_u32_s32(mask),b,a) - - #define SIMDi_ADD(a,b) vaddq_s32(a,b) - #define SIMDi_SUB(a,b) vsubq_s32(a,b) -@@ -1832,7 +1834,7 @@ static SIMDf VECTORCALL FUNC(CellularDistance##distanceFunc##Single)(SIMDi seed, - #define CELLULAR_DISTANCE2_SINGLE(distanceFunc, returnFunc)\ - static SIMDf VECTORCALL FUNC(Cellular##returnFunc##distanceFunc##Single)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z, SIMDf cellJitter, int index0, int index1)\ - {\ -- SIMDf distance[4] = {SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999)};\ -+ SIMDf distance[FN_CELLULAR_INDEX_MAX+1] = {SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999)};\ - \ - SIMDi xc = SIMDi_SUB(SIMDi_CONVERT_TO_INT(x), SIMDi_NUM(1));\ - SIMDi ycBase = SIMDi_SUB(SIMDi_CONVERT_TO_INT(y), SIMDi_NUM(1));\ --- -2.17.1 - diff --git a/0002-Use-fallback-for-PPC64-and-S390x.patch b/0002-Use-fallback-for-PPC64-and-S390x.patch new file mode 100644 index 0000000..d7bc795 --- /dev/null +++ b/0002-Use-fallback-for-PPC64-and-S390x.patch @@ -0,0 +1,68 @@ +From cde6cd59345e6a950d10f77a911895840e680488 Mon Sep 17 00:00:00 2001 +From: Elliott Sales de Andrade +Date: Fri, 20 Jul 2018 05:50:13 -0400 +Subject: [PATCH 2/5] Use fallback for PPC64 and S390x. + +There is no SSE2 on PPC64, and no other optimized version is implemented +yet. + +Signed-off-by: Elliott Sales de Andrade +--- + pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp | 12 ++++++++++++ + pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h | 2 +- + 2 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp +index fac36b6..6da16c4 100644 +--- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp ++++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp +@@ -75,7 +75,9 @@ + #include "ARM/cpu-features.h" + #endif + #else // 'nix ++#if !(defined(__ppc64__) || defined(__PPC64__) || defined(__s390x__)) + #include ++#endif + #include "inttypes.h" + #endif + int FastNoiseSIMD::s_currentSIMDLevel = -1; +@@ -111,6 +113,15 @@ uint64_t xgetbv(unsigned int x) { + return _xgetbv(x); + } + #else ++#if defined(__ppc64__) || defined(__PPC64__) || defined(__s390x__) ++void cpuid(int32_t out[4], int32_t x) { ++ /* Just disable it as anything better is unimplemented. */ ++ out[0] = 0; ++} ++uint64_t xgetbv(unsigned int index) { ++ return 0; ++} ++#else + void cpuid(int32_t out[4], int32_t x) { + __cpuid_count(x, 0, out[0], out[1], out[2], out[3]); + } +@@ -119,6 +130,7 @@ uint64_t xgetbv(unsigned int index) { + __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); + return ((uint64_t)edx << 32) | eax; + } ++#endif + #define _XCR_XFEATURE_ENABLED_MASK 0 + #endif + +diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h +index 9554384..6fff8bf 100644 +--- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h ++++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h +@@ -45,7 +45,7 @@ + #define FN_ALIGNED_SETS + + // SSE2/NEON support is guaranteed on 64bit CPUs so no fallback is needed +-#if !(defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__) || defined(__aarch64__) || defined(FN_IOS)) || defined(_DEBUG) ++#if !(defined(_WIN64) || defined(__x86_64__) || defined(__aarch64__) || defined(FN_IOS)) || defined(_DEBUG) + #define FN_COMPILE_NO_SIMD_FALLBACK + #endif + +-- +2.20.1 + diff --git a/0003-Add-platform-specific-flags-for-NEON.patch b/0003-Add-platform-specific-flags-for-NEON.patch new file mode 100644 index 0000000..bd227cf --- /dev/null +++ b/0003-Add-platform-specific-flags-for-NEON.patch @@ -0,0 +1,93 @@ +From 329df8d668477934f9fd5d1b587a8f7ac66414d1 Mon Sep 17 00:00:00 2001 +From: Elliott Sales de Andrade +Date: Fri, 20 Jul 2018 21:14:19 -0400 +Subject: [PATCH 3/5] Add platform-specific flags for NEON. + +Signed-off-by: Elliott Sales de Andrade +--- + setup.py | 27 ++++++++++++++++++++++++++- + 1 file changed, 26 insertions(+), 1 deletion(-) + +diff --git a/setup.py b/setup.py +index 552fdfb..9c7bd4e 100644 +--- a/setup.py ++++ b/setup.py +@@ -44,7 +44,6 @@ open('pyfastnoisesimd/version.py', 'w').write('__version__ = "%s"\n' % VERSION) + sources = [ + 'pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp', + 'pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp', +- 'pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_neon.cpp', + 'pyfastnoisesimd/wrapper.cpp', + ] + +@@ -85,6 +84,14 @@ if os.name == 'nt': + '/arch:AVX2', + ] + } ++ neon = { ++ 'sources': [ ++ 'pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_neon.cpp', ++ ], ++ 'cflags': [ ++ '/Oi', ++ ], ++ } + + if platform.machine() == 'AMD64': # 64-bit windows + #`/arch:SSE2` doesn't exist on Windows x64 builds, and generates a needless warnings +@@ -158,6 +165,19 @@ else: # Linux + '-msse2', + ], + } ++ neon = { ++ 'sources': [ ++ 'pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_neon.cpp', ++ ], ++ 'cflags': [ ++ '-std=c++11', ++ '-mfpu=neon', ++ ], ++ } ++ if platform.machine() == 'aarch64': ++ # Flag is not supported, but NEON is always available. ++ neon['cflags'].remove('-mfpu=neon') ++ + fma_flags = ['-mfma'] + + clibs = [ +@@ -165,6 +185,7 @@ clibs = [ + ('avx2', avx2), + ('sse41', sse41), + ('sse2', sse2), ++ ('neon', neon), + ] + + +@@ -174,6 +195,7 @@ class build(_build): + ('with-avx2=', None, 'Use AVX2 instructions: auto|yes|no'), + ('with-sse41=', None, 'Use SSE4.1 instructions: auto|yes|no'), + ('with-sse2=', None, 'Use SSE2 instructions: auto|yes|no'), ++ ('with-neon=', None, 'Use NEON instructions: auto|yes|no'), + ('with-fma=', None, 'Use FMA instructions: auto|yes|no'), + ] + +@@ -183,6 +205,7 @@ class build(_build): + self.with_avx2 = 'auto' + self.with_sse41 = 'auto' + self.with_sse2 = 'auto' ++ self.with_neon = 'auto' + self.with_fma = 'auto' + + def finalize_options(self): +@@ -221,6 +244,8 @@ class build(_build): + disabled_libraries.append('avx512') + if msc_version < 1900: + disabled_libraries.append('avx2') ++ if not platform.machine().startswith('arm'): ++ disabled_libraries.append('neon') + # End of SIMD limits + + for name, lib in self.distribution.libraries: +-- +2.20.1 + diff --git a/0003-Use-fallback-for-PPC64-and-S390x.patch b/0003-Use-fallback-for-PPC64-and-S390x.patch deleted file mode 100644 index 4c715bc..0000000 --- a/0003-Use-fallback-for-PPC64-and-S390x.patch +++ /dev/null @@ -1,68 +0,0 @@ -From adf5365d8105c8fdc9890e6abbc5c603fa5d1586 Mon Sep 17 00:00:00 2001 -From: Elliott Sales de Andrade -Date: Fri, 20 Jul 2018 05:50:13 -0400 -Subject: [PATCH 3/6] Use fallback for PPC64 and S390x. - -There is no SSE2 on PPC64, and no other optimized version is implemented -yet. - -Signed-off-by: Elliott Sales de Andrade ---- - pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp | 12 ++++++++++++ - pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h | 2 +- - 2 files changed, 13 insertions(+), 1 deletion(-) - -diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp -index be8e183..0d09b18 100644 ---- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp -+++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp -@@ -74,7 +74,9 @@ - #include "ARM/cpu-features.h" - #endif - #else // 'nix -+#if !(defined(__ppc64__) || defined(__PPC64__) || defined(__s390x__)) - #include -+#endif - #include "inttypes.h" - #endif - int FastNoiseSIMD::s_currentSIMDLevel = -1; -@@ -110,6 +112,15 @@ uint64_t xgetbv(unsigned int x) { - return _xgetbv(x); - } - #else -+#if defined(__ppc64__) || defined(__PPC64__) || defined(__s390x__) -+void cpuid(int32_t out[4], int32_t x) { -+ /* Just disable it as anything better is unimplemented. */ -+ out[0] = 0; -+} -+uint64_t xgetbv(unsigned int index) { -+ return 0; -+} -+#else - void cpuid(int32_t out[4], int32_t x) { - __cpuid_count(x, 0, out[0], out[1], out[2], out[3]); - } -@@ -118,6 +129,7 @@ uint64_t xgetbv(unsigned int index) { - __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); - return ((uint64_t)edx << 32) | eax; - } -+#endif - #define _XCR_XFEATURE_ENABLED_MASK 0 - #endif - -diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h -index 9554384..6fff8bf 100644 ---- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h -+++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h -@@ -45,7 +45,7 @@ - #define FN_ALIGNED_SETS - - // SSE2/NEON support is guaranteed on 64bit CPUs so no fallback is needed --#if !(defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__) || defined(__aarch64__) || defined(FN_IOS)) || defined(_DEBUG) -+#if !(defined(_WIN64) || defined(__x86_64__) || defined(__aarch64__) || defined(FN_IOS)) || defined(_DEBUG) - #define FN_COMPILE_NO_SIMD_FALLBACK - #endif - --- -2.17.1 - diff --git a/0004-Use-correct-types-to-parse-NumPy-array-dimensions.patch b/0004-Use-correct-types-to-parse-NumPy-array-dimensions.patch deleted file mode 100644 index 409d003..0000000 --- a/0004-Use-correct-types-to-parse-NumPy-array-dimensions.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 1d128f884eaf397738fd36497d09b349ff842ebc Mon Sep 17 00:00:00 2001 -From: Elliott Sales de Andrade -Date: Fri, 20 Jul 2018 19:09:13 -0400 -Subject: [PATCH 4/6] Use correct types to parse NumPy array dimensions. - -The wrong type can cause errors on big-endian systems or others with -differing int and pointer sizes. - -Signed-off-by: Elliott Sales de Andrade ---- - pyfastnoisesimd/wrapper.cpp | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/pyfastnoisesimd/wrapper.cpp b/pyfastnoisesimd/wrapper.cpp -index 019ef55..4ebcee8 100644 ---- a/pyfastnoisesimd/wrapper.cpp -+++ b/pyfastnoisesimd/wrapper.cpp -@@ -93,7 +93,7 @@ PyFNS_GetEmptySet(PyObject *self, PyObject *args) - { - // Make a NumPy array and return it. Note the array is empty, not zeroed. - npy_intp dims[3] = {0, 0, 0}; -- const char *format = "i|ii"; -+ const char *format = "n|nn"; - float *data; - - if (!PyArg_ParseTuple(args, format, &dims[0], &dims[1], &dims[2])) { -@@ -538,7 +538,7 @@ PyFNS_GetNoiseSet(FNSObject *self, PyObject *args) - int xStart, yStart, zStart; - npy_intp dims[3] = {0, 0, 0}; - float scaleMod = 1.0; -- const char *format = "iiiiii|f"; -+ const char *format = "iiinnn|f"; - float *data = NULL; - - if (!PyArg_ParseTuple(args, format, &zStart, &yStart, &xStart, &dims[0], &dims[1], &dims[2], &scaleMod)) --- -2.17.1 - diff --git a/0004-Use-getauxval-to-check-for-NEON-on-Linux.patch b/0004-Use-getauxval-to-check-for-NEON-on-Linux.patch new file mode 100644 index 0000000..1bfdcf8 --- /dev/null +++ b/0004-Use-getauxval-to-check-for-NEON-on-Linux.patch @@ -0,0 +1,51 @@ +From 4e731ae8a88e5275033c3561926ac53032e41f05 Mon Sep 17 00:00:00 2001 +From: Elliott Sales de Andrade +Date: Fri, 20 Jul 2018 23:17:13 -0400 +Subject: [PATCH 4/5] Use getauxval to check for NEON on Linux. + +Signed-off-by: Elliott Sales de Andrade +--- + pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp +index 6da16c4..389b3b3 100644 +--- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp ++++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp +@@ -71,7 +71,10 @@ + #ifdef _WIN32 + #include + #elif defined(FN_ARM) +-#if !defined(__aarch64__) && !defined(FN_IOS) ++#if defined(__linux__) || defined(linux) || defined(__LINUX__) || defined(__linux) ++#include ++#include ++#elif !defined(__aarch64__) && !defined(FN_IOS) + #include "ARM/cpu-features.h" + #endif + #else // 'nix +@@ -88,6 +91,10 @@ int GetFastestSIMD() + { + #if defined(__aarch64__) || defined(FN_IOS) + return FN_NEON; ++#elif defined(__linux__) || defined(linux) || defined(__LINUX__) || defined(__linux) ++ if ((getauxval(AT_HWCAP) & HWCAP_NEON) != 0) { ++ return FN_NEON; ++ } + #else + if (android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM) + { +@@ -99,9 +106,9 @@ int GetFastestSIMD() + #endif + return FN_NEON; + } ++#endif + + return FN_NO_SIMD_FALLBACK; +-#endif + } + #else + +-- +2.20.1 + diff --git a/0005-Add-platform-specific-flags-for-NEON.patch b/0005-Add-platform-specific-flags-for-NEON.patch deleted file mode 100644 index 7ada2f9..0000000 --- a/0005-Add-platform-specific-flags-for-NEON.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 078dc6b42caa3a93fc8cfa6456bd224dcd928ec1 Mon Sep 17 00:00:00 2001 -From: Elliott Sales de Andrade -Date: Fri, 20 Jul 2018 21:14:19 -0400 -Subject: [PATCH 5/6] Add platform-specific flags for NEON. - -Signed-off-by: Elliott Sales de Andrade ---- - setup.py | 27 ++++++++++++++++++++++++++- - 1 file changed, 26 insertions(+), 1 deletion(-) - -diff --git a/setup.py b/setup.py -index 5aec105..f65cc8d 100644 ---- a/setup.py -+++ b/setup.py -@@ -44,7 +44,6 @@ open('pyfastnoisesimd/version.py', 'w').write('__version__ = "%s"\n' % VERSION) - sources = [ - 'pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp', - 'pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp', -- 'pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_neon.cpp', - 'pyfastnoisesimd/wrapper.cpp' - ] - -@@ -85,6 +84,14 @@ if os.name == 'nt': - '/arch:AVX2', - ] - } -+ neon = { -+ 'sources': [ -+ 'pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_neon.cpp', -+ ], -+ 'cflags': [ -+ '/Oi', -+ ], -+ } - - if platform.machine() == 'AMD64': # 64-bit windows - #`/arch:SSE2` doesn't exist on Windows x64 builds, and generates a needless warnings -@@ -158,6 +165,19 @@ else: # Linux - '-msse2', - ], - } -+ neon = { -+ 'sources': [ -+ 'pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_neon.cpp', -+ ], -+ 'cflags': [ -+ '-std=c++11', -+ '-mfpu=neon', -+ ], -+ } -+ if platform.machine() == 'aarch64': -+ # Flag is not supported, but NEON is always available. -+ neon['cflags'].remove('-mfpu=neon') -+ - fma_flags = ['-mfma'] - - clibs = [ -@@ -165,6 +185,7 @@ clibs = [ - ('avx2', avx2), - ('sse41', sse41), - ('sse2', sse2), -+ ('neon', neon), - ] - - -@@ -174,6 +195,7 @@ class build(_build): - ('with-avx2=', None, 'Use AVX2 instructions: auto|yes|no'), - ('with-sse41=', None, 'Use SSE4.1 instructions: auto|yes|no'), - ('with-sse2=', None, 'Use SSE2 instructions: auto|yes|no'), -+ ('with-neon=', None, 'Use NEON instructions: auto|yes|no'), - ('with-fma=', None, 'Use FMA instructions: auto|yes|no'), - ] - -@@ -183,6 +205,7 @@ class build(_build): - self.with_avx2 = 'auto' - self.with_sse41 = 'auto' - self.with_sse2 = 'auto' -+ self.with_neon = 'auto' - self.with_fma = 'auto' - - def finalize_options(self): -@@ -219,6 +242,8 @@ class build(_build): - disabled_libraries.append('avx512') - if msc_version < 1900: - disabled_libraries.append('avx2') -+ if not platform.machine().startswith('arm'): -+ disabled_libraries.append('neon') - # End of SIMD limits - - for name, lib in self.distribution.libraries: --- -2.17.1 - diff --git a/0005-Fix-alignment-on-non-optimized-systems.patch b/0005-Fix-alignment-on-non-optimized-systems.patch new file mode 100644 index 0000000..a9d04b4 --- /dev/null +++ b/0005-Fix-alignment-on-non-optimized-systems.patch @@ -0,0 +1,48 @@ +From d1ac49b993d625651e61dd115115b609a0c8b22a Mon Sep 17 00:00:00 2001 +From: Elliott Sales de Andrade +Date: Sun, 13 Jan 2019 01:17:40 -0500 +Subject: [PATCH 5/5] Fix alignment on non-optimized systems. + +PPC64LE, for example, does not have any optimized code, so alignment is +2, and dividing by item size (which is greater than 2), the result is 0. +This produces a divide-by-zero later, so it should be limited to 1 at +minimum. + +Signed-off-by: Elliott Sales de Andrade +--- + pyfastnoisesimd/helpers.py | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/pyfastnoisesimd/helpers.py b/pyfastnoisesimd/helpers.py +index b62aeba..a9e8c58 100644 +--- a/pyfastnoisesimd/helpers.py ++++ b/pyfastnoisesimd/helpers.py +@@ -59,7 +59,7 @@ def empty_coords(length, dtype=np.float32, n_byte=ext.SIMD_ALIGNMENT): + itemsize = dtype.itemsize + + # We need to expand length to be a multiple of the vector size +- vect_len = ext.SIMD_ALIGNMENT // itemsize ++ vect_len = max(ext.SIMD_ALIGNMENT // itemsize, 1) + aligned_len = int(vect_len*np.ceil(length/vect_len)) + shape = (3, aligned_len) + +@@ -125,7 +125,7 @@ def aligned_chunks(array, n_chunks, axis=0): + block_size = np.product(array.shape[axis:]) + # print(f'Got blocksize of {block_size}') + +- vect_len = ext.SIMD_ALIGNMENT // array.dtype.itemsize ++ vect_len = max(ext.SIMD_ALIGNMENT // array.dtype.itemsize, 1) + + if block_size % vect_len == 0: + # Iterate at-will, the underlying blocks have the correct shape +@@ -880,7 +880,6 @@ class Noise(object): + # for I, ((result_chunk, r_offset), (coord_chunk, offset)) in enumerate(zip( + # aligned_chunks(result, self._num_workers, axis=0), + # aligned_chunks(coords, self._num_workers, axis=1))): +- vect_len = ext.SIMD_ALIGNMENT // itemsize + for I, (result_chunk, offset) in enumerate( + aligned_chunks(result, self._num_workers, axis=0)): + +-- +2.20.1 + diff --git a/0006-Use-getauxval-to-check-for-NEON-on-Linux.patch b/0006-Use-getauxval-to-check-for-NEON-on-Linux.patch deleted file mode 100644 index a804bcc..0000000 --- a/0006-Use-getauxval-to-check-for-NEON-on-Linux.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 26c18ebe8438d28bfc047a1145131c17b63cad60 Mon Sep 17 00:00:00 2001 -From: Elliott Sales de Andrade -Date: Fri, 20 Jul 2018 23:17:13 -0400 -Subject: [PATCH 6/6] Use getauxval to check for NEON on Linux. - -Signed-off-by: Elliott Sales de Andrade ---- - pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp -index 0d09b18..42137c9 100644 ---- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp -+++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.cpp -@@ -70,7 +70,10 @@ - #ifdef _WIN32 - #include - #elif defined(FN_ARM) --#if !defined(__aarch64__) && !defined(FN_IOS) -+#if defined(__linux__) || defined(linux) || defined(__LINUX__) || defined(__linux) -+#include -+#include -+#elif !defined(__aarch64__) && !defined(FN_IOS) - #include "ARM/cpu-features.h" - #endif - #else // 'nix -@@ -87,6 +90,10 @@ int GetFastestSIMD() - { - #if defined(__aarch64__) || defined(FN_IOS) - return FN_NEON; -+#elif defined(__linux__) || defined(linux) || defined(__LINUX__) || defined(__linux) -+ if ((getauxval(AT_HWCAP) & HWCAP_NEON) != 0) { -+ return FN_NEON; -+ } - #else - if (android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM) - { -@@ -98,9 +105,9 @@ int GetFastestSIMD() - #endif - return FN_NEON; - } -+#endif - - return FN_NO_SIMD_FALLBACK; --#endif - } - #else - --- -2.17.1 - diff --git a/python-pyfastnoisesimd.spec b/python-pyfastnoisesimd.spec index a702e41..9f2e2e9 100644 --- a/python-pyfastnoisesimd.spec +++ b/python-pyfastnoisesimd.spec @@ -1,27 +1,25 @@ %global srcname pyfastnoisesimd Name: python-%{srcname} -Version: 0.3.2 +Version: 0.4.1 Release: 1%{?dist} Summary: Python Fast Noise with SIMD License: BSD URL: http://github.com/robbmcleod/pyfastnoisesimd Source0: https://files.pythonhosted.org/packages/source/p/%{srcname}/%{srcname}-%{version}.tar.gz -# https://github.com/Auburns/FastNoiseSIMD/pull/26 -Patch0001: 0001-Add-cstdlib-for-posix_memalign.patch # https://github.com/Auburns/FastNoiseSIMD/commit/32873404111701397781fe9ef21931fed4f7f766 # https://github.com/Auburns/FastNoiseSIMD/commit/575c0047bbfd2bac841359daa9db220a9f97a638 # https://github.com/Auburns/FastNoiseSIMD/pull/27 -Patch0002: 0002-Update-casts-for-NEON.patch +Patch0001: 0001-Update-casts-for-NEON.patch # https://github.com/Auburns/FastNoiseSIMD/pull/31 -Patch0003: 0003-Use-fallback-for-PPC64-and-S390x.patch -# https://github.com/robbmcleod/pyfastnoisesimd/pull/14 -Patch0004: 0004-Use-correct-types-to-parse-NumPy-array-dimensions.patch +Patch0002: 0002-Use-fallback-for-PPC64-and-S390x.patch # https://github.com/robbmcleod/pyfastnoisesimd/pull/15 -Patch0005: 0005-Add-platform-specific-flags-for-NEON.patch +Patch0003: 0003-Add-platform-specific-flags-for-NEON.patch # https://github.com/Auburns/FastNoiseSIMD/pull/32 -Patch0006: 0006-Use-getauxval-to-check-for-NEON-on-Linux.patch +Patch0004: 0004-Use-getauxval-to-check-for-NEON-on-Linux.patch +# https://github.com/robbmcleod/pyfastnoisesimd/pull/20 +Patch0005: 0005-Fix-alignment-on-non-optimized-systems.patch %global _description \ PyFastNoiseSIMD is a wrapper around Jordan Peck's synthetic noise library which \ @@ -55,8 +53,6 @@ Requires: python3dist(numpy) > 1.7 # Remove bundled egg-info rm -rf %{srcname}.egg-info -# Remove no longer necessary file -rm %{srcname}/cpuinfo.py # Fix line endings for file in README.rst; do sed "s|\r||g" ${file} > ${file}.new @@ -89,5 +85,8 @@ popd %changelog +* Sun Jan 13 2019 Elliott Sales de Andrade - 0.4.1-1 +- Update to latest version + * Fri Jul 20 2018 Elliott Sales de Andrade - 0.3.2-1 - Initial package. diff --git a/sources b/sources index b77911a..e702da5 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (pyfastnoisesimd-0.3.2.tar.gz) = 90468a86587a7ff9773fca7fa11d0e0249561c92faef5a4a827238112a83c0dd1f88119f7355b344241361eac39b288606247ab94f92fe1221d8b354a5887336 +SHA512 (pyfastnoisesimd-0.4.1.tar.gz) = 6f8f0957b3e71e7221221af3bbfff0617ffd550461803c936baeb0dba217b42f223d3b0121b5072d5f61a3a45f0718bcca31e78196a53b30a513b4cf34c3f301