From 1f178940dc55dc50bcc6ddf1a1b1b50d0e385c0b Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Fri, 20 Jul 2018 16:46:43 -0400 Subject: [PATCH 1/5] Update casts for NEON. A merge of various upstream commits. Signed-off-by: Elliott Sales de Andrade --- pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h | 7 +++++-- .../fastnoisesimd/FastNoiseSIMD_internal.cpp | 16 +++++++++------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h index d0b5ed6..9554384 100644 --- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h +++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h @@ -77,6 +77,9 @@ AMD Carrizo - Q2 2015 FMA3 Intel Haswell - Q2 2013 AMD Piledriver - 2012 + +AVX-512F +Intel Skylake-X - Q2 2017 */ struct FastNoiseVectorSet; @@ -97,7 +100,7 @@ public: // Returns highest detected level of CPU support // 5: ARM NEON - // 4: AVX512 + // 4: AVX-512F // 3: AVX2 & FMA3 // 2: SSE4.1 // 1: SSE2 @@ -106,7 +109,7 @@ public: // Sets the SIMD level for newly created FastNoiseSIMD objects // 5: ARM NEON - // 4: AVX512 + // 4: AVX-512F // 3: AVX2 & FMA3 // 2: SSE4.1 // 1: SSE2 diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp index d7f5b42..ba5c83e 100644 --- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp +++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp @@ -178,10 +178,10 @@ static SIMDf VECTORCALL FUNC(DIV)(SIMDf a, SIMDf b) #define SIMDf_MAX(a,b) vmaxq_f32(a,b) #define SIMDf_INV_SQRT(a) vrsqrteq_f32(a) -#define SIMDf_LESS_THAN(a,b) vreinterpretq_f32_u32(vcltq_f32(a,b)) -#define SIMDf_GREATER_THAN(a,b) vreinterpretq_f32_u32(vcgtq_f32(a,b)) -#define SIMDf_LESS_EQUAL(a,b) vreinterpretq_f32_u32(vcleq_f32(a,b)) -#define SIMDf_GREATER_EQUAL(a,b) vreinterpretq_f32_u32(vcgeq_f32(a,b)) +#define SIMDf_LESS_THAN(a,b) vreinterpretq_s32_u32(vcltq_f32(a,b)) +#define SIMDf_GREATER_THAN(a,b) vreinterpretq_s32_u32(vcgtq_f32(a,b)) +#define SIMDf_LESS_EQUAL(a,b) vreinterpretq_s32_u32(vcleq_f32(a,b)) +#define SIMDf_GREATER_EQUAL(a,b) vreinterpretq_s32_u32(vcgeq_f32(a,b)) #define SIMDf_AND(a,b) SIMDf_CAST_TO_FLOAT(vandq_s32(vreinterpretq_s32_f32(a),vreinterpretq_s32_f32(b))) #define SIMDf_AND_NOT(a,b) SIMDf_CAST_TO_FLOAT(vandq_s32(vmvnq_s32(vreinterpretq_s32_f32(a)),vreinterpretq_s32_f32(b))) @@ -192,7 +192,9 @@ static SIMDf VECTORCALL FUNC(FLOOR)(SIMDf a) { SIMDf fval = SIMDf_CONVERT_TO_FLOAT(SIMDi_CONVERT_TO_INT(a)); - return vsubq_f32(fval, SIMDf_AND(SIMDf_LESS_THAN(a, fval), SIMDf_NUM(1))); + return vsubq_f32(fval, + SIMDf_CAST_TO_FLOAT(vandq_s32(SIMDf_LESS_THAN(a, fval), + SIMDi_CAST_TO_INT(SIMDf_NUM(1))))); } #define SIMDf_FLOOR(a) FUNC(FLOOR)(a) #else @@ -201,7 +203,7 @@ static SIMDf VECTORCALL FUNC(FLOOR)(SIMDf a) #endif #define SIMDf_ABS(a) vabsq_f32(a) -#define SIMDf_BLENDV(a,b,mask) vbslq_f32(mask,b,a) +#define SIMDf_BLENDV(a,b,mask) vbslq_f32(vreinterpretq_u32_s32(mask),b,a) #define SIMDi_ADD(a,b) vaddq_s32(a,b) #define SIMDi_SUB(a,b) vsubq_s32(a,b) @@ -1834,7 +1836,7 @@ static SIMDf VECTORCALL FUNC(CellularDistance##distanceFunc##Single)(SIMDi seed, #define CELLULAR_DISTANCE2_SINGLE(distanceFunc, returnFunc)\ static SIMDf VECTORCALL FUNC(Cellular##returnFunc##distanceFunc##Single)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z, SIMDf cellJitter, int index0, int index1)\ {\ - SIMDf distance[4] = {SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999)};\ + SIMDf distance[FN_CELLULAR_INDEX_MAX+1] = {SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999)};\ \ SIMDi xc = SIMDi_SUB(SIMDi_CONVERT_TO_INT(x), SIMDi_NUM(1));\ SIMDi ycBase = SIMDi_SUB(SIMDi_CONVERT_TO_INT(y), SIMDi_NUM(1));\ -- 2.29.2