Blob Blame History Raw
From a06fa0fcfd3cbcd7a90a63002a4742073a181c21 Mon Sep 17 00:00:00 2001
From: Leon Marz <main@lmarz.org>
Date: Sat, 18 Nov 2023 15:06:55 +0100
Subject: [PATCH 1/2] Fix build error on architectures without SSE or sse2neon

As color_srgb_to_linear is only defined for them.

Pull Request: https://projects.blender.org/blender/blender/pulls/115098
---
 intern/cycles/blender/attribute_convert.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/intern/cycles/blender/attribute_convert.h b/intern/cycles/blender/attribute_convert.h
index ee26e1d9571..34b75d55855 100644
--- a/intern/cycles/blender/attribute_convert.h
+++ b/intern/cycles/blender/attribute_convert.h
@@ -66,10 +66,10 @@ template<> struct AttributeConverter<blender::ColorGeometry4b> {
   static constexpr auto type_desc = TypeRGBA;
   static CyclesT convert(const blender::ColorGeometry4b &value)
   {
-    return color_srgb_to_linear(make_float4(byte_to_float(value[0]),
-                                            byte_to_float(value[1]),
-                                            byte_to_float(value[2]),
-                                            byte_to_float(value[3])));
+    return color_srgb_to_linear_v4(make_float4(byte_to_float(value[0]),
+                                               byte_to_float(value[1]),
+                                               byte_to_float(value[2]),
+                                               byte_to_float(value[3])));
   }
 };
 template<> struct AttributeConverter<bool> {
-- 
2.30.2


From 67bc11c859e73058ead1081a82726de2bb3bdc7c Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brecht@blender.org>
Date: Wed, 22 Nov 2023 19:41:47 +0100
Subject: [PATCH 2/2] Cleanup: rename sse2 specific functions to avoid
 accidentally using them

Ref #115098
---
 intern/cycles/util/color.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/intern/cycles/util/color.h b/intern/cycles/util/color.h
index 8b3082a3ade..842b186899b 100644
--- a/intern/cycles/util/color.h
+++ b/intern/cycles/util/color.h
@@ -253,7 +253,7 @@ ccl_device float3 xyY_to_xyz(float x, float y, float Y)
  * exp = exponent, encoded as uint32_t
  * e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t
  */
-template<unsigned exp, unsigned e2coeff> ccl_device_inline float4 fastpow(const float4 &arg)
+template<unsigned exp, unsigned e2coeff> ccl_device_inline float4 fastpow_sse2(const float4 &arg)
 {
   float4 ret = arg * cast(make_int4(e2coeff));
   ret = make_float4(cast(ret));
@@ -263,7 +263,7 @@ template<unsigned exp, unsigned e2coeff> ccl_device_inline float4 fastpow(const
 }
 
 /* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */
-ccl_device_inline float4 improve_5throot_solution(const float4 &old_result, const float4 &x)
+ccl_device_inline float4 improve_5throot_solution_sse2(const float4 &old_result, const float4 &x)
 {
   float4 approx2 = old_result * old_result;
   float4 approx4 = approx2 * approx2;
@@ -273,7 +273,7 @@ ccl_device_inline float4 improve_5throot_solution(const float4 &old_result, cons
 }
 
 /* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */
-ccl_device_inline float4 fastpow24(const float4 &arg)
+ccl_device_inline float4 fastpow24_sse2(const float4 &arg)
 {
   /* max, avg and |avg| errors were calculated in gcc without FMA instructions
    * The final precision should be better than powf in glibc */
@@ -281,27 +281,27 @@ ccl_device_inline float4 fastpow24(const float4 &arg)
   /* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */
   /* 0x3F4CCCCD = 4/5 */
   /* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */
-  float4 x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(
+  float4 x = fastpow_sse2<0x3F4CCCCD, 0x4F55A7FB>(
       arg);  // error max = 0.17  avg = 0.0018    |avg| = 0.05
   float4 arg2 = arg * arg;
   float4 arg4 = arg2 * arg2;
 
   /* error max = 0.018     avg = 0.0031    |avg| = 0.0031 */
-  x = improve_5throot_solution(x, arg4);
+  x = improve_5throot_solution_sse2(x, arg4);
   /* error max = 0.00021   avg = 1.6e-05   |avg| = 1.6e-05 */
-  x = improve_5throot_solution(x, arg4);
+  x = improve_5throot_solution_sse2(x, arg4);
   /* error max = 6.1e-07   avg = 5.2e-08   |avg| = 1.1e-07 */
-  x = improve_5throot_solution(x, arg4);
+  x = improve_5throot_solution_sse2(x, arg4);
 
   return x * (x * x);
 }
 
-ccl_device float4 color_srgb_to_linear(const float4 &c)
+ccl_device float4 color_srgb_to_linear_sse2(const float4 &c)
 {
   int4 cmp = c < make_float4(0.04045f);
   float4 lt = max(c * make_float4(1.0f / 12.92f), make_float4(0.0f));
   float4 gtebase = (c + make_float4(0.055f)) * make_float4(1.0f / 1.055f); /* fma */
-  float4 gte = fastpow24(gtebase);
+  float4 gte = fastpow24_sse2(gtebase);
   return select(cmp, lt, gte);
 }
 #endif /* __KERNEL_SSE2__ */
@@ -328,7 +328,7 @@ ccl_device float4 color_srgb_to_linear_v4(float4 c)
 {
 #ifdef __KERNEL_SSE2__
   float4 r = c;
-  r = color_srgb_to_linear(r);
+  r = color_srgb_to_linear_sse2(r);
   r.w = c.w;
   return r;
 #else
-- 
2.30.2