3f72327
Index: chromium-125.0.6422.41/third_party/skia/BUILD.gn
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/BUILD.gn
3f72327
+++ chromium-125.0.6422.41/third_party/skia/BUILD.gn
765ac9d
@@ -188,6 +188,12 @@ opts("skx") {
aacff36
   }
aacff36
 }
aacff36
 
aacff36
+opts("vsx") {
aacff36
+  enabled = current_cpu == "ppc64"
aacff36
+  sources = skia_opts.vsx_sources
aacff36
+  cflags = [ "-mcpu=power9", "-mtune=power9" ]
aacff36
+}
aacff36
+
aacff36
 # Any feature of Skia that requires third-party code should be optional and use this template.
aacff36
 template("optional") {
aacff36
   if (invoker.enabled) {
765ac9d
@@ -1478,6 +1484,7 @@ skia_component("skia") {
aacff36
     ":skx",
aacff36
     ":typeface_fontations",
aacff36
     ":vello",
aacff36
+    ":vsx",
aacff36
     ":webp_decode",
aacff36
     ":wuffs",
aacff36
     ":xml",
765ac9d
@@ -1653,7 +1660,10 @@ skia_static_library("pathkit") {
aacff36
   public_configs = [ ":skia_public" ]
aacff36
   configs = skia_library_configs
aacff36
 
aacff36
-  deps = [ ":hsw" ]
aacff36
+  deps = [
aacff36
+    ":hsw",
aacff36
+    ":vsx",
aacff36
+  ]
aacff36
 
aacff36
   sources = []
aacff36
   sources += skia_pathops_sources
3f72327
Index: chromium-125.0.6422.41/third_party/skia/gn/skia/BUILD.gn
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/gn/skia/BUILD.gn
3f72327
+++ chromium-125.0.6422.41/third_party/skia/gn/skia/BUILD.gn
aacff36
@@ -163,6 +163,8 @@ config("default") {
aacff36
       "-mfpmath=sse",
aacff36
     ]
aacff36
     ldflags += [ "-m32" ]
aacff36
+  } else if (current_cpu == "ppc64") {
aacff36
+    cflags += [ "-mcpu=power9", "-mtune=power9" ]
765ac9d
   } else if (current_cpu == "loong64") {
765ac9d
     cflags += [
765ac9d
       "-mlsx",
3f72327
Index: chromium-125.0.6422.41/third_party/skia/include/core/SkTypes.h
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/include/core/SkTypes.h
3f72327
+++ chromium-125.0.6422.41/third_party/skia/include/core/SkTypes.h
aacff36
@@ -195,5 +195,44 @@ static constexpr uint32_t SK_InvalidGenI
aacff36
 */
aacff36
 static constexpr uint32_t SK_InvalidUniqueID = 0;
aacff36
 
aacff36
+//////////////////////////////////////////////////////////////////////
aacff36
+// PPC defines
aacff36
+
aacff36
+#if defined(__powerpc64__) || defined(__PPC64__)
aacff36
+    #ifndef SK_CPU_PPC64
aacff36
+        #define SK_CPU_PPC64
aacff36
+    #endif
aacff36
+    #undef SK_CPU_SSE_LEVEL
aacff36
+#endif
aacff36
+
aacff36
+// Newer versions of clang and gcc for ppc64 ship with wrappers that translate
aacff36
+// Intel vector intrinsics into PPC VSX instrinsics, so we can pretend to have
aacff36
+// to be Intel. Currently, full API support for SSSE3 on POWER8 and later
aacff36
+// processors.
aacff36
+#if defined(__POWER8_VECTOR__) && defined(__has_include) && \
aacff36
+  !defined(SK_CPU_SSE_LEVEL)
aacff36
+
aacff36
+    // Clang ships both Intel and PPC headers in its PPC version, storing the
aacff36
+    // PPC compatibility in a subdirectory that the compiler will include before
aacff36
+    // its standard library include directory.
aacff36
+    #if (__has_include(<tmmintrin.h>) && !defined(__clang__)) || \
aacff36
+         __has_include(<ppc_wrappers/tmmintrin.h>)
aacff36
+        #define SK_CPU_SSE_LEVEL    SK_CPU_SSE_LEVEL_SSSE3
aacff36
+    #elif (__has_include(<emmintrin.h>) && !defined(__clang__)) || \
aacff36
+           __has_include(<ppc_wrappers/emmintrin.h>)
aacff36
+        #define SK_CPU_SSE_LEVEL    SK_CPU_SSE_LEVEL_SSE2
aacff36
+    #endif
aacff36
+
aacff36
+    #ifdef SK_CPU_SSE_LEVEL
aacff36
+        #define SK_PPC64_HAS_SSE_COMPAT
aacff36
+        #ifndef NO_WARN_X86_INTRINSICS
aacff36
+            #define NO_WARN_X86_INTRINSICS
aacff36
+        #endif
aacff36
+        #if defined(__clang__)
aacff36
+            #define SK_PPC64_CLANG_MFPPR_BUG
aacff36
+        #endif
aacff36
+    #endif
aacff36
+#endif
aacff36
+
aacff36
 
aacff36
 #endif
3f72327
Index: chromium-125.0.6422.41/third_party/skia/src/base/SkSpinlock.cpp
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/src/base/SkSpinlock.cpp
3f72327
+++ chromium-125.0.6422.41/third_party/skia/src/base/SkSpinlock.cpp
aacff36
@@ -33,7 +33,8 @@
aacff36
 #endif
aacff36
 
aacff36
 // Renamed from "pause" to avoid conflict with function defined in unistd.h
aacff36
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
aacff36
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 && \
aacff36
+    !defined(SK_PPC64_CLANG_MFPPR_BUG)
aacff36
     #include <emmintrin.h>
aacff36
     static void do_pause() { _mm_pause(); }
aacff36
 #else
3f72327
Index: chromium-125.0.6422.41/third_party/skia/src/opts/SkBitmapProcState_opts.h
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/src/opts/SkBitmapProcState_opts.h
3f72327
+++ chromium-125.0.6422.41/third_party/skia/src/opts/SkBitmapProcState_opts.h
aacff36
@@ -21,7 +21,13 @@
aacff36
 // The rest are scattershot at the moment but I want to get them
aacff36
 // all migrated to be normal code inside SkBitmapProcState.cpp.
aacff36
 
aacff36
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
aacff36
+#if defined(SK_PPC64_HAS_SSE_COMPAT)
aacff36
+    #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
aacff36
+        #include <tmmintrin.h>
aacff36
+    #else
aacff36
+        #include <emmintrin.h>
aacff36
+    #endif
aacff36
+#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
aacff36
     #include <immintrin.h>
aacff36
 #elif defined(SK_ARM_HAS_NEON)
aacff36
     #include <arm_neon.h>
3f72327
Index: chromium-125.0.6422.41/third_party/skia/src/opts/SkBlitRow_opts.h
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/src/opts/SkBlitRow_opts.h
3f72327
+++ chromium-125.0.6422.41/third_party/skia/src/opts/SkBlitRow_opts.h
aacff36
@@ -69,7 +69,7 @@
aacff36
 #endif
aacff36
 
aacff36
 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
aacff36
-    #include <immintrin.h>
aacff36
+    #include <emmintrin.h>
aacff36
 
aacff36
     static inline __m128i SkPMSrcOver_SSE2(const __m128i& src, const __m128i& dst) {
aacff36
         __m128i scale = _mm_sub_epi32(_mm_set1_epi32(256),
3f72327
Index: chromium-125.0.6422.41/third_party/skia/src/opts/SkRasterPipeline_opts.h
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/src/opts/SkRasterPipeline_opts.h
3f72327
+++ chromium-125.0.6422.41/third_party/skia/src/opts/SkRasterPipeline_opts.h
aacff36
@@ -1,5 +1,6 @@
aacff36
 /*
aacff36
  * Copyright 2018 Google Inc.
3f72327
+ * Copyright 2023-2024 Raptor Engineering, LLC
aacff36
  *
aacff36
  * Use of this source code is governed by a BSD-style license that can be
aacff36
  * found in the LICENSE file.
3f8e3da
@@ -75,6 +76,8 @@ using NoCtx = const void*;
aacff36
     #define JUMPER_IS_SCALAR
aacff36
 #elif defined(SK_ARM_HAS_NEON)
aacff36
     #define JUMPER_IS_NEON
aacff36
+#elif defined(SK_PPC64_HAS_SSE_COMPAT)
aacff36
+    #define JUMPER_IS_VSX
aacff36
 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SKX
aacff36
     #define JUMPER_IS_SKX
aacff36
 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
3f72327
@@ -111,6 +114,8 @@ using NoCtx = const void*;
3f72327
     #include <math.h>
3f72327
 #elif defined(JUMPER_IS_NEON)
3f72327
     #include <arm_neon.h>
aacff36
+#elif defined(JUMPER_IS_VSX)
aacff36
+    #include <emmintrin.h>
3f72327
 #elif defined(JUMPER_IS_LASX)
3f72327
     #include <lasxintrin.h>
3f72327
     #include <lsxintrin.h>
765ac9d
@@ -209,6 +214,184 @@ namespace SK_OPTS_NS {
aacff36
         ptr[3] = a;
aacff36
     }
aacff36
 
aacff36
+#elif defined(JUMPER_IS_VSX)
aacff36
+    // Since we know we're using Clang, we can use its vector extensions.
aacff36
+    template <typename T> using V = T __attribute__((ext_vector_type(4)));
aacff36
+    using F   = V<float   >;
aacff36
+    using I32 = V< int32_t>;
aacff36
+    using U64 = V<uint64_t>;
aacff36
+    using U32 = V<uint32_t>;
aacff36
+    using U16 = V<uint16_t>;
aacff36
+    using U8  = V<uint8_t >;
aacff36
+
aacff36
+    // We polyfill a few routines that Clang doesn't build into ext_vector_types.
aacff36
+    SI F   min(F a, F b)     { return vec_min(a,b); }
aacff36
+    SI I32 min(I32 a, I32 b) { return vec_min(a,b); }
aacff36
+    SI U32 min(U32 a, U32 b) { return vec_min(a,b); }
aacff36
+    SI F   max(F a, F b)     { return vec_max(a,b); }
aacff36
+    SI I32 max(I32 a, I32 b) { return vec_max(a,b); }
aacff36
+    SI U32 max(U32 a, U32 b) { return vec_max(a,b); }
aacff36
+
aacff36
+    SI F   abs_  (F v)   { return vec_abs(v); }
aacff36
+    SI I32 abs_  (I32 v) { return vec_abs(v); }
aacff36
+    SI F   rcp_approx(F v) { return vec_re(v); }
aacff36
+    SI F   rcp_precise (F v) { F e = rcp_approx(v); return e * (2.0f - v * e); }
aacff36
+    SI F   rsqrt_approx (F v)   { return vec_rsqrte(v); }
aacff36
+
aacff36
+    SI U16 pack(U32 v)       { return __builtin_convertvector(v, U16); }
aacff36
+    SI U8  pack(U16 v)       { return __builtin_convertvector(v,  U8); }
aacff36
+
aacff36
+    SI F if_then_else(I32 c, F t, F e) {
aacff36
+        return vec_or((vector float)vec_and((vector float)c, (vector float)t), (vector float)vec_andc((vector float)e, (vector float)c));
aacff36
+    }
aacff36
+    SI I32 if_then_else(I32 c, I32 t, I32 e) {
aacff36
+        return vec_or((vector unsigned int)vec_and((vector unsigned int)c, (vector unsigned int)t), (vector unsigned int)vec_andc((vector unsigned int)e, (vector unsigned int)c));
aacff36
+    }
aacff36
+
aacff36
+    // In both AltiVec and SSE there is no horizontal element compare, unlike ARM.  Fall back to scalar operations here...
aacff36
+    SI bool any(I32 c) {
aacff36
+        if (vec_extract((U32)c, 0) != 0) return 1;
aacff36
+        if (vec_extract((U32)c, 1) != 0) return 1;
aacff36
+        if (vec_extract((U32)c, 2) != 0) return 1;
aacff36
+        if (vec_extract((U32)c, 3) != 0) return 1;
aacff36
+        return 0;
aacff36
+    }
aacff36
+    SI bool all(I32 c) {
aacff36
+        if (vec_extract((U32)c, 0) == 0) return 0;
aacff36
+        if (vec_extract((U32)c, 1) == 0) return 0;
aacff36
+        if (vec_extract((U32)c, 2) == 0) return 0;
aacff36
+        if (vec_extract((U32)c, 3) == 0) return 0;
aacff36
+        return 1;
aacff36
+    }
aacff36
+
aacff36
+    SI F     mad(F f, F m, F a) { return vec_madd(f,m,a); }
a85a4df
+    SI F    nmad(F f, F m, F a) { return vec_nmsub(f,m,a); }
aacff36
+    SI F  floor_(F v) { return vec_floor(v); }
aacff36
+    SI F   ceil_(F v) { return vec_ceil(v); }
aacff36
+    SI F   sqrt_(F v) { return vec_sqrt(v); }
cd1739f
+    SI I32 iround(F v) { return vec_cts((vector float)vec_rint(v), 0); }
cd1739f
+    SI U32 round(F v)  { return vec_ctu((vector float)vec_rint(v), 0); }
aacff36
+    SI U32 round(F v, F scale) { return vec_cts((vector float)vec_rint(v*scale), 0); }
aacff36
+
aacff36
+    template <typename T>
aacff36
+    SI V<T> gather(const T* p, U32 ix) {
aacff36
+        return {p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]]};
aacff36
+    }
aacff36
+    template <typename V, typename S>
aacff36
+    SI void scatter_masked(V src, S* dst, U32 ix, I32 mask) {
aacff36
+        V before = gather(dst, ix);
aacff36
+        V after = if_then_else(mask, src, before);
aacff36
+        dst[ix[0]] = after[0];
aacff36
+        dst[ix[1]] = after[1];
aacff36
+        dst[ix[2]] = after[2];
aacff36
+        dst[ix[3]] = after[3];
aacff36
+    }
aacff36
+
aacff36
+    // TODO
aacff36
+    // Finish converting these functions from the SSE translation layer to native AltiVec / VSX
aacff36
+    SI void load2(const uint16_t* ptr, U16* r, U16* g) {
aacff36
+        __m128i _01;
aacff36
+        _01 = _mm_loadu_si128(((__m128i*)ptr) + 0);  // r0 g0 r1 g1 r2 g2 r3 g3
aacff36
+        auto rg01_23 = _mm_shufflelo_epi16(_01, 0xD8);      // r0 r1 g0 g1 r2 g2 r3 g3
aacff36
+        auto rg      = _mm_shufflehi_epi16(rg01_23, 0xD8);  // r0 r1 g0 g1 r2 r3 g2 g3
aacff36
+
aacff36
+        auto R = _mm_shuffle_epi32(rg, 0x88);  // r0 r1 r2 r3 r0 r1 r2 r3
aacff36
+        auto G = _mm_shuffle_epi32(rg, 0xDD);  // g0 g1 g2 g3 g0 g1 g2 g3
aacff36
+        *r = sk_unaligned_load<U16>(&R);
aacff36
+        *g = sk_unaligned_load<U16>(&G);
aacff36
+    }
aacff36
+
aacff36
+    SI void store2(uint16_t* ptr, U16 r, U16 g) {
aacff36
+        U32 rg = _mm_unpacklo_epi16(widen_cast<__m128i>(r), widen_cast<__m128i>(g));
aacff36
+        _mm_storeu_si128((__m128i*)ptr + 0, rg);
aacff36
+    }
aacff36
+
aacff36
+    SI void load3(const uint16_t* ptr, U16* r, U16* g, U16* b) {
aacff36
+        __m128i _0, _1, _2, _3;
aacff36
+        // Load slightly weirdly to make sure we don't load past the end of 4x48 bits.
aacff36
+        auto _01 =                _mm_loadu_si128((const __m128i*)(ptr + 0))    ,
aacff36
+             _23 = _mm_srli_si128(_mm_loadu_si128((const __m128i*)(ptr + 4)), 4);
aacff36
+
aacff36
+        // Each _N holds R,G,B for pixel N in its lower 3 lanes (upper 5 are ignored).
aacff36
+        _0 = _01;
aacff36
+        _1 = _mm_srli_si128(_01, 6);
aacff36
+        _2 = _23;
aacff36
+        _3 = _mm_srli_si128(_23, 6);
aacff36
+
aacff36
+        // De-interlace to R,G,B.
aacff36
+        auto _02 = _mm_unpacklo_epi16(_0, _2),  // r0 r2 g0 g2 b0 b2 xx xx
aacff36
+             _13 = _mm_unpacklo_epi16(_1, _3);  // r1 r3 g1 g3 b1 b3 xx xx
aacff36
+
aacff36
+        auto R = _mm_unpacklo_epi16(_02, _13),  // r0 r1 r2 r3 g0 g1 g2 g3
aacff36
+             G = _mm_srli_si128(R, 8),
aacff36
+             B = _mm_unpackhi_epi16(_02, _13);  // b0 b1 b2 b3 xx xx xx xx
aacff36
+
aacff36
+        *r = sk_unaligned_load<U16>(&R);
aacff36
+        *g = sk_unaligned_load<U16>(&G);
aacff36
+        *b = sk_unaligned_load<U16>(&B);
aacff36
+    }
aacff36
+
aacff36
+    SI void load4(const uint16_t* ptr, U16* r, U16* g, U16* b, U16* a) {
aacff36
+        __m128i _01, _23;
aacff36
+        _01 = _mm_loadu_si128(((__m128i*)ptr) + 0); // r0 g0 b0 a0 r1 g1 b1 a1
aacff36
+        _23 = _mm_loadu_si128(((__m128i*)ptr) + 1); // r2 g2 b2 a2 r3 g3 b3 a3
aacff36
+
aacff36
+        auto _02 = _mm_unpacklo_epi16(_01, _23),  // r0 r2 g0 g2 b0 b2 a0 a2
aacff36
+             _13 = _mm_unpackhi_epi16(_01, _23);  // r1 r3 g1 g3 b1 b3 a1 a3
aacff36
+
aacff36
+        auto rg = _mm_unpacklo_epi16(_02, _13),  // r0 r1 r2 r3 g0 g1 g2 g3
aacff36
+             ba = _mm_unpackhi_epi16(_02, _13);  // b0 b1 b2 b3 a0 a1 a2 a3
aacff36
+
aacff36
+        *r = sk_unaligned_load<U16>((uint16_t*)&rg + 0);
aacff36
+        *g = sk_unaligned_load<U16>((uint16_t*)&rg + 4);
aacff36
+        *b = sk_unaligned_load<U16>((uint16_t*)&ba + 0);
aacff36
+        *a = sk_unaligned_load<U16>((uint16_t*)&ba + 4);
aacff36
+    }
aacff36
+
aacff36
+    SI void store4(uint16_t* ptr, U16 r, U16 g, U16 b, U16 a) {
aacff36
+        auto rg = _mm_unpacklo_epi16(widen_cast<__m128i>(r), widen_cast<__m128i>(g)),
aacff36
+             ba = _mm_unpacklo_epi16(widen_cast<__m128i>(b), widen_cast<__m128i>(a));
aacff36
+
aacff36
+        _mm_storeu_si128((__m128i*)ptr + 0, _mm_unpacklo_epi32(rg, ba));
aacff36
+        _mm_storeu_si128((__m128i*)ptr + 1, _mm_unpackhi_epi32(rg, ba));
aacff36
+    }
aacff36
+
aacff36
+    SI void load2(const float* ptr, F* r, F* g) {
aacff36
+        F _01, _23;
aacff36
+        _01 = _mm_loadu_ps(ptr + 0);
aacff36
+        _23 = _mm_loadu_ps(ptr + 4);
aacff36
+        *r = _mm_shuffle_ps(_01, _23, 0x88);
aacff36
+        *g = _mm_shuffle_ps(_01, _23, 0xDD);
aacff36
+    }
aacff36
+
aacff36
+    SI void store2(float* ptr, F r, F g) {
aacff36
+        F _01 = _mm_unpacklo_ps(r, g),
aacff36
+          _23 = _mm_unpackhi_ps(r, g);
aacff36
+        _mm_storeu_ps(ptr + 0, _01);
aacff36
+        _mm_storeu_ps(ptr + 4, _23);
aacff36
+    }
aacff36
+
aacff36
+    SI void load4(const float* ptr, F* r, F* g, F* b, F* a) {
aacff36
+        F _0, _1, _2, _3;
aacff36
+        _0 = _mm_loadu_ps(ptr + 0);
aacff36
+        _1 = _mm_loadu_ps(ptr + 4);
aacff36
+        _2 = _mm_loadu_ps(ptr + 8);
aacff36
+        _3 = _mm_loadu_ps(ptr +12);
aacff36
+        _MM_TRANSPOSE4_PS(_0,_1,_2,_3);
aacff36
+        *r = _0;
aacff36
+        *g = _1;
aacff36
+        *b = _2;
aacff36
+        *a = _3;
aacff36
+    }
aacff36
+
aacff36
+    SI void store4(float* ptr, F r, F g, F b, F a) {
aacff36
+        _MM_TRANSPOSE4_PS(r,g,b,a);
aacff36
+        _mm_storeu_ps(ptr + 0, r);
aacff36
+        _mm_storeu_ps(ptr + 4, g);
aacff36
+        _mm_storeu_ps(ptr + 8, b);
aacff36
+        _mm_storeu_ps(ptr +12, a);
aacff36
+    }
aacff36
+
aacff36
 #elif defined(JUMPER_IS_NEON)
aacff36
     template <typename T> using V = Vec<4, T>;
aacff36
     using F   = V<float   >;
765ac9d
@@ -1406,6 +1589,15 @@ SI F from_half(U16 h) {
aacff36
 #elif defined(JUMPER_IS_HSW)
aacff36
     return _mm256_cvtph_ps((__m128i)h);
aacff36
 
aacff36
+// Disabled for now as this is not a particularly hot function
aacff36
+// and there is no good reason to lock Chromium to POWER9+ yet.
aacff36
+#elif 0 && defined(JUMPER_IS_VSX) && __has_builtin(__builtin_vsx_xvcvhpsp)
aacff36
+    #if defined(SK_CPU_LENDIAN)
aacff36
+        return __builtin_vsx_xvcvhpsp({h[0], 0, h[1], 0, h[2], 0, h[3], 0});
aacff36
+    #else
aacff36
+        return __builtin_vsx_xvcvhpsp({0, h[0], 0, h[1], 0, h[2], 0, h[3]});
aacff36
+    #endif
aacff36
+
aacff36
 #else
aacff36
     // Remember, a half is 1-5-10 (sign-exponent-mantissa) with 15 exponent bias.
aacff36
     U32 sem = expand(h),
765ac9d
@@ -1429,6 +1621,16 @@ SI U16 to_half(F f) {
aacff36
 #elif defined(JUMPER_IS_HSW)
aacff36
     return (U16)_mm256_cvtps_ph(f, _MM_FROUND_CUR_DIRECTION);
aacff36
 
aacff36
+// Disabled for now as this is not a particularly hot function
aacff36
+// and there is no good reason to lock Chromium to POWER9+ yet.
aacff36
+#elif 0 && defined(JUMPER_IS_VSX) && __has_builtin(__builtin_vsx_xvcvsphp)
aacff36
+    __vector unsigned short v = __builtin_vsx_xvcvsphp(f);
aacff36
+    #if defined(SK_CPU_LENDIAN)
aacff36
+        return U16{v[0], v[2], v[4], v[6]};
aacff36
+    #else
aacff36
+        return U16{v[1], v[3], v[5], v[7]};
aacff36
+    #endif
aacff36
+
aacff36
 #else
aacff36
     // Remember, a float is 1-8-23 (sign-exponent-mantissa) with 127 exponent bias.
aacff36
     U32 sem = sk_bit_cast<U32>(f),
765ac9d
@@ -1504,7 +1706,7 @@ static constexpr size_t N = sizeof(F) /
aacff36
     // instead of {b,a} on the stack.  Narrow stages work best for __vectorcall.
aacff36
     #define ABI __vectorcall
aacff36
     #define JUMPER_NARROW_STAGES 1
765ac9d
-#elif defined(__x86_64__) || defined(SK_CPU_ARM64) || defined(SK_CPU_LOONGARCH)
765ac9d
+#elif defined(__x86_64__) || defined(SK_CPU_ARM64) || defined(SK_CPU_LOONGARCH) || defined(SK_CPU_PPC64)
aacff36
     // These platforms are ideal for wider stages, and their default ABI is ideal.
aacff36
     #define ABI
aacff36
     #define JUMPER_NARROW_STAGES 0
3f72327
@@ -5467,6 +5669,10 @@ SI F sqrt_(F x) {
3f72327
     float32x4_t lo,hi;
aacff36
     split(x, &lo,&hi;;
3f72327
     return join<F>(sqrt(lo), sqrt(hi));
aacff36
+#elif defined(JUMPER_IS_VSX)
aacff36
+    vector float lo,hi;
aacff36
+    split(x, &lo,&hi;;
aacff36
+    return join<F>(vec_sqrt(lo), vec_sqrt(hi));
3f72327
 #elif defined(JUMPER_IS_LASX)
3f72327
     __m256 lo,hi;
3f72327
     split(x, &lo,&hi;;
3f72327
@@ -5498,6 +5704,10 @@ SI F floor_(F x) {
aacff36
     __m128 lo,hi;
aacff36
     split(x, &lo,&hi;;
3f72327
     return join<F>(_mm_floor_ps(lo), _mm_floor_ps(hi));
aacff36
+#elif defined(JUMPER_IS_VSX)
aacff36
+    vector float lo,hi;
aacff36
+    split(x, &lo,&hi;;
aacff36
+    return join<F>(vec_floor(lo), vec_floor(hi));
3f72327
 #elif defined(JUMPER_IS_LASX)
3f72327
     __m256 lo,hi;
3f72327
     split(x, &lo,&hi;;
765ac9d
@@ -5517,6 +5727,7 @@ SI F floor_(F x) {
aacff36
 //     (2 * a * b + (1 << 15)) >> 16
aacff36
 // The result is a number on [-1, 1).
aacff36
 // Note: on neon this is a saturating multiply while the others are not.
aacff36
+// Note: for POWER, the code below was borrowed from emmintrin.h
aacff36
 SI I16 scaled_mult(I16 a, I16 b) {
aacff36
 #if defined(JUMPER_IS_SKX)
aacff36
     return (I16)_mm256_mulhrs_epi16((__m256i)a, (__m256i)b);
3f72327
@@ -5528,6 +5739,22 @@ SI I16 scaled_mult(I16 a, I16 b) {
3f72327
     return vqrdmulhq_s16(a, b);
3f72327
 #elif defined(JUMPER_IS_NEON)
3f72327
     return vqrdmulhq_s16(a, b);
aacff36
+#elif defined(JUMPER_IS_VSX)
aacff36
+    const vector unsigned int shift = vec_splats((unsigned int)14);
aacff36
+    const vector int ones = vec_splats((signed int)1);
aacff36
+    vector int c = vec_unpackh((vector short)a);
aacff36
+    vector int d = vec_unpackh((vector short)b);
aacff36
+    vector int e = vec_unpackl((vector short)b);
aacff36
+    c = vec_mul(c, d);
aacff36
+    d = vec_unpackl((vector short)a);
aacff36
+    d = vec_mul(d, e);
aacff36
+    c = vec_sr(c, shift);
aacff36
+    d = vec_sr(d, shift);
aacff36
+    c = vec_add(c, ones);
aacff36
+    c = vec_sr(c,(vector unsigned int)ones);
aacff36
+    d = vec_add(d, ones);
aacff36
+    d = vec_sr(d,(vector unsigned int)ones);
aacff36
+    return vec_pack(c, d);
3f72327
 #elif defined(JUMPER_IS_LASX)
3f72327
     I16 res = __lasx_xvmuh_h(a, b);
3f72327
     return __lasx_xvslli_h(res, 1);
765ac9d
@@ -5555,7 +5782,26 @@ SI U16 constrained_add(I16 a, U16 b) {
aacff36
             SkASSERT(-ib <= ia && ia <= 65535 - ib);
aacff36
         }
aacff36
     #endif
aacff36
+
aacff36
+    // Technically, trying to add a signed and unsigned vector invokes undefined behavior
aacff36
+    // Just because it sort of seems to work on Intel/ARM on Clang doesn't mean it works everywhere...
aacff36
+    // FIXME: For added fun, the existing Skia unit tests do NOT properly test for issues in the
aacff36
+    // lowp bilerp path.  Investigate and write an appropriate test case...
aacff36
+#if defined(JUMPER_IS_VSX)
aacff36
+    // Most POWER compilers end up doing some kind of width promotion that causes memory corruption
aacff36
+    // and/or incorrect results.  This shows up as snow and general graphics corruption, especially
aacff36
+    // noticeable when trying to display a PNG at less than 50% size (resize the browser window down
aacff36
+    // until the artifacts appear).
aacff36
+    // Take the (likely invisible) loss of precision, convert b to a signed int immediately, and do
aacff36
+    // a proper saturated add here.  This seems to fully resolve the issue for all test cases Raptor
aacff36
+    // has seen so far...
aacff36
+    // In half precision mode, this function expects both input arguments to have been divided by
aacff36
+    // two prior to being called, and returns the output without being multiplied back up by two
aacff36
+    return vec_adds(a, (I16)b);
aacff36
+#else
aacff36
+    // Hic Sunt Dragones!
aacff36
     return b + sk_bit_cast<U16>(a);
aacff36
+#endif
aacff36
 }
aacff36
 
aacff36
 SI F fract(F x) { return x - floor_(x); }
765ac9d
@@ -6479,8 +6725,14 @@ STAGE_GP(bilerp_clamp_8888, const SkRast
aacff36
     //         2^-8 * v = 2^-9 * (tx*(R - L) + (R + L))
aacff36
     //                v = 1/2 * (tx*(R - L) + (R + L))
aacff36
     auto lerpX = [&](U16 left, U16 right) -> U16 {
aacff36
+#if defined(JUMPER_IS_VSX)
aacff36
+	// constrained_add() on POWER is run in half precision mode to avoid undefined behavior
aacff36
+        I16 width  = (I16)(right - left) << 6;
aacff36
+        U16 middle = (right + left) << 6;
aacff36
+#else
aacff36
         I16 width  = (I16)(right - left) << 7;
aacff36
         U16 middle = (right + left) << 7;
aacff36
+#endif
aacff36
         // The constrained_add is the most subtle part of lerp. The first term is on the interval
aacff36
         // [-1, 1), and the second term is on the interval is on the interval [0, 1) because
aacff36
         // both terms are too high by a factor of 2 which will be handled below. (Both R and L are
765ac9d
@@ -6492,7 +6744,12 @@ STAGE_GP(bilerp_clamp_8888, const SkRast
aacff36
         U16 v2  = constrained_add(scaled_mult(tx, width), middle) + 1;
aacff36
         // Divide by 2 to calculate v and at the same time bring the intermediate value onto the
aacff36
         // interval [0, 1/2] to set up for the lerpY.
aacff36
+#if defined(JUMPER_IS_VSX)
aacff36
+	// constrained_add() on POWER is run in half precision mode to avoid undefined behavior
aacff36
+        return v2;
aacff36
+#else
aacff36
         return v2 >> 1;
aacff36
+#endif
aacff36
     };
aacff36
 
aacff36
     const uint32_t* ptr;
765ac9d
@@ -6526,9 +6783,15 @@ STAGE_GP(bilerp_clamp_8888, const SkRast
aacff36
         I16 width  = (I16)bottom - (I16)top;
aacff36
         U16 middle = bottom + top;
aacff36
         // Add + 0x80 for rounding.
aacff36
+#if defined(JUMPER_IS_VSX)
aacff36
+	// constrained_add() on POWER is run in half precision mode to avoid undefined behavior
aacff36
+        U16 blend  = constrained_add(scaled_mult(ty, width) / 2, middle / 2) + (0x80 / 2);
aacff36
+        return blend >> 7;
aacff36
+#else
aacff36
         U16 blend  = constrained_add(scaled_mult(ty, width), middle) + 0x80;
aacff36
-
aacff36
         return blend >> 8;
aacff36
+#endif
aacff36
+
aacff36
     };
aacff36
 
aacff36
     r = lerpY(topR, bottomR);
3f72327
Index: chromium-125.0.6422.41/third_party/skia/src/base/SkVx.h
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/src/base/SkVx.h
3f72327
+++ chromium-125.0.6422.41/third_party/skia/src/base/SkVx.h
3f72327
@@ -42,7 +42,13 @@
3f72327
 
3f72327
 #if SKVX_USE_SIMD
3f72327
     #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
3f72327
-        #include <immintrin.h>
3f72327
+        #if __PPC64__
3f72327
+            #include <mmintrin.h>
3f72327
+            #include <emmintrin.h>
3f72327
+            #include <tmmintrin.h>
3f72327
+        #else
3f72327
+            #include <immintrin.h>
3f72327
+        #endif
3f72327
     #elif defined(SK_ARM_HAS_NEON)
3f72327
         #include <arm_neon.h>
3f72327
     #elif defined(__wasm_simd128__)
3f72327
Index: chromium-125.0.6422.41/third_party/skia/src/core/SkBlitMask_opts_ssse3.cpp
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/src/core/SkBlitMask_opts_ssse3.cpp
3f72327
+++ chromium-125.0.6422.41/third_party/skia/src/core/SkBlitMask_opts_ssse3.cpp
3f72327
@@ -9,7 +9,7 @@
3f72327
 #include "src/core/SkBlitMask.h"
3f72327
 #include "src/core/SkOptsTargets.h"
3f72327
 
3f72327
-#if defined(SK_CPU_X86) && !defined(SK_ENABLE_OPTIMIZE_SIZE)
3f72327
+#if (defined(SK_CPU_X86) || defined(SK_CPU_PPC64)) && !defined(SK_ENABLE_OPTIMIZE_SIZE)
3f72327
 
3f72327
 // The order of these includes is important:
3f72327
 // 1) Select the target CPU architecture by defining SK_OPTS_TARGET and including SkOpts_SetTarget
3f72327
Index: chromium-125.0.6422.41/third_party/skia/src/core/SkSwizzler_opts_ssse3.cpp
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/src/core/SkSwizzler_opts_ssse3.cpp
3f72327
+++ chromium-125.0.6422.41/third_party/skia/src/core/SkSwizzler_opts_ssse3.cpp
3f72327
@@ -10,7 +10,7 @@
3f72327
 #include "src/core/SkOptsTargets.h"
3f72327
 #include "src/core/SkSwizzlePriv.h"
3f72327
 
3f72327
-#if defined(SK_CPU_X86) && \
3f72327
+#if (defined(SK_CPU_X86) || defined(SK_CPU_PPC64)) && \
3f72327
     !defined(SK_ENABLE_OPTIMIZE_SIZE) && \
3f72327
     SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_SSSE3
3f72327
 
3f72327
Index: chromium-125.0.6422.41/third_party/skia/src/core/SkBlitMask_opts.cpp
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/src/core/SkBlitMask_opts.cpp
3f72327
+++ chromium-125.0.6422.41/third_party/skia/src/core/SkBlitMask_opts.cpp
3f72327
@@ -25,7 +25,7 @@ namespace SkOpts {
3f72327
     static bool init() {
3f72327
     #if defined(SK_ENABLE_OPTIMIZE_SIZE)
3f72327
         // All Init_foo functions are omitted when optimizing for size
3f72327
-    #elif defined(SK_CPU_X86)
3f72327
+    #elif defined(SK_CPU_X86) || defined(SK_CPU_PPC64)
3f72327
         #if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_SSSE3
3f72327
             if (SkCpu::Supports(SkCpu::SSSE3)) { Init_BlitMask_ssse3(); }
3f72327
         #endif
3f72327
Index: chromium-125.0.6422.41/third_party/skia/src/core/SkBitmapProcState_opts.cpp
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/src/core/SkBitmapProcState_opts.cpp
3f72327
+++ chromium-125.0.6422.41/third_party/skia/src/core/SkBitmapProcState_opts.cpp
3f72327
@@ -25,7 +25,7 @@ namespace SkOpts {
3f72327
     static bool init() {
3f72327
     #if defined(SK_ENABLE_OPTIMIZE_SIZE)
3f72327
         // All Init_foo functions are omitted when optimizing for size
3f72327
-    #elif defined(SK_CPU_X86)
3f72327
+    #elif defined(SK_CPU_X86) || defined(SK_CPU_PPC64)
3f72327
         #if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_SSSE3
3f72327
             if (SkCpu::Supports(SkCpu::SSSE3)) { Init_BitmapProcState_ssse3(); }
3f72327
         #endif
3f72327
Index: chromium-125.0.6422.41/third_party/skia/src/core/SkCpu.h
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/src/core/SkCpu.h
3f72327
+++ chromium-125.0.6422.41/third_party/skia/src/core/SkCpu.h
3f72327
@@ -60,7 +60,7 @@ inline bool SkCpu::Supports(uint32_t mas
3f72327
 
3f72327
     // If we mask in compile-time known lower limits, the compiler can
3f72327
     // often compile away this entire function.
3f72327
-#if SK_CPU_X86
3f72327
+#if SK_CPU_X86 || defined(SK_CPU_PPC64)
3f72327
     #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
3f72327
     features |= SSE1;
3f72327
     #endif
3f72327
Index: chromium-125.0.6422.41/third_party/skia/src/core/SkBitmapProcState_opts_ssse3.cpp
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/src/core/SkBitmapProcState_opts_ssse3.cpp
3f72327
+++ chromium-125.0.6422.41/third_party/skia/src/core/SkBitmapProcState_opts_ssse3.cpp
3f72327
@@ -8,7 +8,7 @@
3f72327
 #include "include/private/base/SkFeatures.h"
3f72327
 #include "src/core/SkOptsTargets.h"
3f72327
 
3f72327
-#if defined(SK_CPU_X86) && !defined(SK_ENABLE_OPTIMIZE_SIZE)
3f72327
+#if (defined(SK_CPU_X86) || defined(SK_CPU_PPC64)) && !defined(SK_ENABLE_OPTIMIZE_SIZE)
3f72327
 
3f72327
 // The order of these includes is important:
3f72327
 // 1) Select the target CPU architecture by defining SK_OPTS_TARGET and including SkOpts_SetTarget
3f72327
Index: chromium-125.0.6422.41/third_party/skia/include/private/base/SkFeatures.h
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/include/private/base/SkFeatures.h
3f72327
+++ chromium-125.0.6422.41/third_party/skia/include/private/base/SkFeatures.h
3f72327
@@ -63,6 +63,8 @@
3f72327
 
3f72327
 #if defined(__i386) || defined(_M_IX86) ||  defined(__x86_64__) || defined(_M_X64)
3f72327
   #define SK_CPU_X86 1
3f72327
+#elif defined(__powerpc64__) || defined(__PPC64__)
3f72327
+  #define SK_CPU_PPC64 1
3f72327
 #endif
3f72327
 
3f72327
 #if defined(__loongarch__) || defined (__loongarch64)
3f72327
Index: chromium-125.0.6422.41/third_party/skia/modules/skcms/src/skcms_internals.h
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/modules/skcms/src/skcms_internals.h
3f72327
+++ chromium-125.0.6422.41/third_party/skia/modules/skcms/src/skcms_internals.h
3f72327
@@ -47,6 +47,7 @@ extern "C" {
3f72327
                                                  && !defined(__EMSCRIPTEN__) \
3f72327
                                                  && !defined(__arm__) \
3f72327
                                                  && !defined(__riscv) \
3f72327
+                                                 && !defined(__powerpc64__) \
3f72327
                                                  && !defined(__loongarch__) \
3f72327
                                                  && !defined(_WIN32) && !defined(__SYMBIAN32__)
3f72327
             #define SKCMS_HAS_MUSTTAIL 1
3f72327
Index: chromium-125.0.6422.41/third_party/skia/src/opts/SkSwizzler_opts.inc
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/src/opts/SkSwizzler_opts.inc
3f72327
+++ chromium-125.0.6422.41/third_party/skia/src/opts/SkSwizzler_opts.inc
3f8e3da
@@ -14,7 +14,10 @@
3f8e3da
 #include <cmath>
3f8e3da
 #include <utility>
3f8e3da
 
3f8e3da
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
3f8e3da
+#if defined(SK_PPC64_HAS_SSE_COMPAT)
3f8e3da
+    #include <emmintrin.h>
3f8e3da
+    #include <tmmintrin.h>
3f8e3da
+#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
3f8e3da
     #include <immintrin.h>
3f8e3da
 #elif defined(SK_ARM_HAS_NEON)
3f8e3da
     #include <arm_neon.h>
765ac9d
@@ -65,6 +68,33 @@ SI float reciprocal_alpha_times_255(floa
3f8e3da
 SI float reciprocal_alpha(float a) {
3f8e3da
     return reciprocal_alpha_portable(a);
3f8e3da
 }
3f8e3da
+#elif defined(SK_PPC64_HAS_SSE_COMPAT)
3f8e3da
+// -- VSX -- Harden against timing attacks
3f8e3da
+SK_NO_SANITIZE("float-divide-by-zero")
cd1739f
+static inline float reciprocal_alpha_times_255(float a) {
3f8e3da
+    SkASSERT(0 <= a && a <= 255);
3f8e3da
+
3f8e3da
+    vector float vA{a,a,a,a};
3f8e3da
+    vector float vB{255.0f,255.0f,255.0f,255.0f};
3f8e3da
+    vector float vC{0.0f,0.0f,0.0f,0.0f};
3f8e3da
+    vector float q = vec_div(vB, vA);
3f8e3da
+    vector float vCmp{static_cast<float>(vA != vC)};
3f8e3da
+
3f8e3da
+    return vec_and(vCmp, q)[0];
3f8e3da
+}
3f8e3da
+
3f8e3da
+SK_NO_SANITIZE("float-divide-by-zero")
cd1739f
+static inline float reciprocal_alpha(float a) {
3f8e3da
+    SkASSERT(0 <= a && a <= 1);
3f8e3da
+
3f8e3da
+    vector float vA{a,a,a,a};
3f8e3da
+    vector float vB{1.0f,1.0f,1.0f,1.0f};
3f8e3da
+    vector float vC{0.0f,0.0f,0.0f,0.0f};
3f8e3da
+    vector float q = vec_div(vB, vA);
3f8e3da
+    vector float vCmp{static_cast<float>(vA != vC)};
3f8e3da
+
3f8e3da
+    return vec_and(vCmp, q)[0];
3f8e3da
+}
3f8e3da
 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1 && (defined(__clang__) || !defined(_MSC_VER))
3f8e3da
 // -- SSE -- Harden against timing attacks -- MSVC is not supported.
3f8e3da
 using F4 = __m128;
3f72327
Index: chromium-125.0.6422.41/third_party/skia/src/core/SkBlitter_ARGB32.cpp
3f72327
===================================================================
3f72327
--- chromium-125.0.6422.41.orig/third_party/skia/src/core/SkBlitter_ARGB32.cpp
3f72327
+++ chromium-125.0.6422.41/third_party/skia/src/core/SkBlitter_ARGB32.cpp
3f72327
@@ -126,6 +126,16 @@ static inline SkPMColor blend_lcd16_opaq
3f72327
 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
3f72327
     #include <emmintrin.h>
3f72327
 
3f72327
+#if defined(SK_CPU_PPC64)
3f72327
+    /* Load signed 64-bit integer from P into vector element 0.  The address need not be 16-byte aligned.  */
3f72327
+    extern __inline __m128i
3f72327
+        __attribute__((__gnu_inline__, __always_inline__, __artificial__))
3f72327
+        _mm_loadu_si64 (void const *__P)
3f72327
+    {
3f72327
+      return _mm_set_epi64((__m64)0LL, *(__m64 *)__P);
3f72327
+    }
3f72327
+#endif
3f72327
+
3f72327
     // The following (left) shifts cause the top 5 bits of the mask components to
3f72327
     // line up with the corresponding components in an SkPMColor.
3f72327
     // Note that the mask's RGB16 order may differ from the SkPMColor order.