|
|
d965f86 |
From f0f41e08f88560ed5cc97a8062d1f813b7693068 Mon Sep 17 00:00:00 2001
|
|
|
d965f86 |
From: Larry Gritz <lg@larrygritz.com>
|
|
|
d965f86 |
Date: Wed, 7 Feb 2024 12:01:35 -0800
|
|
|
d965f86 |
Subject: [PATCH] fix(simd.h): Address NEON issues
|
|
|
d965f86 |
|
|
|
d965f86 |
Primarily, recent changes (PR 4071) to vint4::store for the NEON case
|
|
|
d965f86 |
appear to have some type mismatches, which apple clang on ARM-based
|
|
|
d965f86 |
Mac (including our CI) seems ok with, but which is generating type
|
|
|
d965f86 |
errors on other ARM Linux platforms.
|
|
|
d965f86 |
|
|
|
d965f86 |
I think the types were weird here, so I tightened it up to get the
|
|
|
d965f86 |
types right for temporary variables in that function. That's the
|
|
|
d965f86 |
primary fix here.
|
|
|
d965f86 |
|
|
|
d965f86 |
Secondarily, I modfied simd.h and the CMake setup so that build option
|
|
|
d965f86 |
USE_SIMD=0 will disable NEON in the same way that it disables SSE. (I
|
|
|
d965f86 |
realized that USE_SIMD=0 was not disabling NEON, so there was no way
|
|
|
d965f86 |
for a NEON platform to completely disable SIMD if they needed to.)
|
|
|
d965f86 |
|
|
|
d965f86 |
Signed-off-by: Larry Gritz <lg@larrygritz.com>
|
|
|
d965f86 |
---
|
|
|
d965f86 |
src/cmake/compiler.cmake | 2 +-
|
|
|
d965f86 |
src/include/OpenImageIO/simd.h | 13 ++++++++++---
|
|
|
d965f86 |
2 files changed, 11 insertions(+), 4 deletions(-)
|
|
|
d965f86 |
|
|
|
d965f86 |
diff --git a/src/cmake/compiler.cmake b/src/cmake/compiler.cmake
|
|
|
d965f86 |
index 657277e0d3..83d6788731 100644
|
|
|
d965f86 |
--- a/src/cmake/compiler.cmake
|
|
|
d965f86 |
+++ b/src/cmake/compiler.cmake
|
|
|
d965f86 |
@@ -303,7 +303,7 @@ set (SIMD_COMPILE_FLAGS "")
|
|
|
d965f86 |
if (NOT USE_SIMD STREQUAL "")
|
|
|
d965f86 |
message (STATUS "Compiling with SIMD level ${USE_SIMD}")
|
|
|
d965f86 |
if (USE_SIMD STREQUAL "0")
|
|
|
d965f86 |
- set (SIMD_COMPILE_FLAGS ${SIMD_COMPILE_FLAGS} "-DOIIO_NO_SSE=1")
|
|
|
d965f86 |
+ set (SIMD_COMPILE_FLAGS ${SIMD_COMPILE_FLAGS} "-DOIIO_NO_SIMD=1")
|
|
|
d965f86 |
else ()
|
|
|
d965f86 |
string (REPLACE "," ";" SIMD_FEATURE_LIST ${USE_SIMD})
|
|
|
d965f86 |
foreach (feature ${SIMD_FEATURE_LIST})
|
|
|
d965f86 |
diff --git a/src/include/OpenImageIO/simd.h b/src/include/OpenImageIO/simd.h
|
|
|
d965f86 |
index 72e771a43d..46c8b392b8 100644
|
|
|
d965f86 |
--- a/src/include/OpenImageIO/simd.h
|
|
|
d965f86 |
+++ b/src/include/OpenImageIO/simd.h
|
|
|
d965f86 |
@@ -77,6 +77,13 @@
|
|
|
d965f86 |
// OIIO_SIMD_HAS_SIMD8 : nonzero if vfloat8, vint8, vbool8 are defined
|
|
|
d965f86 |
// OIIO_SIMD_HAS_SIMD16 : nonzero if vfloat16, vint16, vbool16 are defined
|
|
|
d965f86 |
|
|
|
d965f86 |
+#ifdef OIIO_NO_SIMD /* Request to disable all SIMD */
|
|
|
d965f86 |
+# define OIIO_NO_SSE 1
|
|
|
d965f86 |
+# define OIIO_NO_AVX 1
|
|
|
d965f86 |
+# define OIIO_NO_AVX2 1
|
|
|
d965f86 |
+# define OIIO_NO_NEON 1
|
|
|
d965f86 |
+#endif
|
|
|
d965f86 |
+
|
|
|
d965f86 |
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__aarch64)
|
|
|
d965f86 |
# ifndef __ARM_NEON__
|
|
|
d965f86 |
# define __ARM_NEON__
|
|
|
d965f86 |
@@ -4788,9 +4795,9 @@ OIIO_FORCEINLINE void vint4::store (unsigned char *values) const {
|
|
|
d965f86 |
_mm_store_ss((float*)values, _mm_castsi128_ps(val8));
|
|
|
d965f86 |
#elif OIIO_SIMD_NEON
|
|
|
d965f86 |
vint4 clamped = m_simd & vint4(0xff);
|
|
|
d965f86 |
- simd_t val16 = vcombine_s16(vqmovn_s32(clamped), vdup_n_s16(0));
|
|
|
d965f86 |
- simd_t val8 = vcombine_u8(vqmovun_s16(val16), vdup_n_u8(0));
|
|
|
d965f86 |
- vst1q_lane_u32((uint32_t*)values, val8, 0);
|
|
|
d965f86 |
+ int16x8_t val16 = vcombine_s16(vqmovn_s32(clamped), vdup_n_s16(0));
|
|
|
d965f86 |
+ uint8x16_t val8 = vcombine_u8(vqmovun_s16(val16), vdup_n_u8(0));
|
|
|
d965f86 |
+ vst1q_lane_u32((uint32_t*)values, vreinterpretq_u32_u8(val8), 0);
|
|
|
d965f86 |
#else
|
|
|
d965f86 |
SIMD_DO (values[i] = m_val[i]);
|
|
|
d965f86 |
#endif
|