From 1f178940dc55dc50bcc6ddf1a1b1b50d0e385c0b Mon Sep 17 00:00:00 2001
From: Elliott Sales de Andrade <quantum.analyst@gmail.com>
Date: Fri, 20 Jul 2018 16:46:43 -0400
Subject: [PATCH 1/5] Update casts for NEON.
A merge of various upstream commits.
Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
---
pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h | 7 +++++--
.../fastnoisesimd/FastNoiseSIMD_internal.cpp | 16 +++++++++-------
2 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h
index d0b5ed6..9554384 100644
--- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h
+++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD.h
@@ -77,6 +77,9 @@ AMD Carrizo - Q2 2015
FMA3
Intel Haswell - Q2 2013
AMD Piledriver - 2012
+
+AVX-512F
+Intel Skylake-X - Q2 2017
*/
struct FastNoiseVectorSet;
@@ -97,7 +100,7 @@ public:
// Returns highest detected level of CPU support
// 5: ARM NEON
- // 4: AVX512
+ // 4: AVX-512F
// 3: AVX2 & FMA3
// 2: SSE4.1
// 1: SSE2
@@ -106,7 +109,7 @@ public:
// Sets the SIMD level for newly created FastNoiseSIMD objects
// 5: ARM NEON
- // 4: AVX512
+ // 4: AVX-512F
// 3: AVX2 & FMA3
// 2: SSE4.1
// 1: SSE2
diff --git a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp
index d7f5b42..ba5c83e 100644
--- a/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp
+++ b/pyfastnoisesimd/fastnoisesimd/FastNoiseSIMD_internal.cpp
@@ -178,10 +178,10 @@ static SIMDf VECTORCALL FUNC(DIV)(SIMDf a, SIMDf b)
#define SIMDf_MAX(a,b) vmaxq_f32(a,b)
#define SIMDf_INV_SQRT(a) vrsqrteq_f32(a)
-#define SIMDf_LESS_THAN(a,b) vreinterpretq_f32_u32(vcltq_f32(a,b))
-#define SIMDf_GREATER_THAN(a,b) vreinterpretq_f32_u32(vcgtq_f32(a,b))
-#define SIMDf_LESS_EQUAL(a,b) vreinterpretq_f32_u32(vcleq_f32(a,b))
-#define SIMDf_GREATER_EQUAL(a,b) vreinterpretq_f32_u32(vcgeq_f32(a,b))
+#define SIMDf_LESS_THAN(a,b) vreinterpretq_s32_u32(vcltq_f32(a,b))
+#define SIMDf_GREATER_THAN(a,b) vreinterpretq_s32_u32(vcgtq_f32(a,b))
+#define SIMDf_LESS_EQUAL(a,b) vreinterpretq_s32_u32(vcleq_f32(a,b))
+#define SIMDf_GREATER_EQUAL(a,b) vreinterpretq_s32_u32(vcgeq_f32(a,b))
#define SIMDf_AND(a,b) SIMDf_CAST_TO_FLOAT(vandq_s32(vreinterpretq_s32_f32(a),vreinterpretq_s32_f32(b)))
#define SIMDf_AND_NOT(a,b) SIMDf_CAST_TO_FLOAT(vandq_s32(vmvnq_s32(vreinterpretq_s32_f32(a)),vreinterpretq_s32_f32(b)))
@@ -192,7 +192,9 @@ static SIMDf VECTORCALL FUNC(FLOOR)(SIMDf a)
{
SIMDf fval = SIMDf_CONVERT_TO_FLOAT(SIMDi_CONVERT_TO_INT(a));
- return vsubq_f32(fval, SIMDf_AND(SIMDf_LESS_THAN(a, fval), SIMDf_NUM(1)));
+ return vsubq_f32(fval,
+ SIMDf_CAST_TO_FLOAT(vandq_s32(SIMDf_LESS_THAN(a, fval),
+ SIMDi_CAST_TO_INT(SIMDf_NUM(1)))));
}
#define SIMDf_FLOOR(a) FUNC(FLOOR)(a)
#else
@@ -201,7 +203,7 @@ static SIMDf VECTORCALL FUNC(FLOOR)(SIMDf a)
#endif
#define SIMDf_ABS(a) vabsq_f32(a)
-#define SIMDf_BLENDV(a,b,mask) vbslq_f32(mask,b,a)
+#define SIMDf_BLENDV(a,b,mask) vbslq_f32(vreinterpretq_u32_s32(mask),b,a)
#define SIMDi_ADD(a,b) vaddq_s32(a,b)
#define SIMDi_SUB(a,b) vsubq_s32(a,b)
@@ -1834,7 +1836,7 @@ static SIMDf VECTORCALL FUNC(CellularDistance##distanceFunc##Single)(SIMDi seed,
#define CELLULAR_DISTANCE2_SINGLE(distanceFunc, returnFunc)\
static SIMDf VECTORCALL FUNC(Cellular##returnFunc##distanceFunc##Single)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z, SIMDf cellJitter, int index0, int index1)\
{\
- SIMDf distance[4] = {SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999)};\
+ SIMDf distance[FN_CELLULAR_INDEX_MAX+1] = {SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999)};\
\
SIMDi xc = SIMDi_SUB(SIMDi_CONVERT_TO_INT(x), SIMDi_NUM(1));\
SIMDi ycBase = SIMDi_SUB(SIMDi_CONVERT_TO_INT(y), SIMDi_NUM(1));\
--
2.29.2