Blob Blame History Raw
diff --git a/gfx/wr/swgl/src/blend.h b/gfx/wr/swgl/src/blend.h
--- a/gfx/wr/swgl/src/blend.h
+++ b/gfx/wr/swgl/src/blend.h
@@ -405,7 +405,7 @@
   blend_key = BlendKey(AA_BLEND_KEY_NONE + blend_key);
 }
 
-static ALWAYS_INLINE WideRGBA8 blend_pixels(uint32_t* buf, PackedRGBA8 pdst,
+static PREFER_INLINE WideRGBA8 blend_pixels(uint32_t* buf, PackedRGBA8 pdst,
                                             WideRGBA8 src, int span = 4) {
   WideRGBA8 dst = unpack(pdst);
   const WideRGBA8 RGB_MASK = {0xFFFF, 0xFFFF, 0xFFFF, 0,      0xFFFF, 0xFFFF,
@@ -686,7 +686,7 @@
   // clang-format on
 }
 
-static ALWAYS_INLINE WideR8 blend_pixels(uint8_t* buf, WideR8 dst, WideR8 src,
+static PREFER_INLINE WideR8 blend_pixels(uint8_t* buf, WideR8 dst, WideR8 src,
                                          int span = 4) {
 // clang-format off
 #define BLEND_CASE_KEY(key)                          \
diff --git a/gfx/wr/swgl/src/gl.cc b/gfx/wr/swgl/src/gl.cc
--- a/gfx/wr/swgl/src/gl.cc
+++ b/gfx/wr/swgl/src/gl.cc
@@ -58,10 +58,24 @@
 }
 
 #else
-#  define ALWAYS_INLINE __attribute__((always_inline)) inline
+// GCC is slower when dealing with always_inline, especially in debug builds.
+// When using Clang, use always_inline more aggressively.
+#  if defined(__clang__) || defined(NDEBUG)
+#    define ALWAYS_INLINE __attribute__((always_inline)) inline
+#  else
+#    define ALWAYS_INLINE inline
+#  endif
 #  define NO_INLINE __attribute__((noinline))
 #endif
 
+// Some functions may cause excessive binary bloat if inlined in debug or with
+// GCC builds, so use PREFER_INLINE on these instead of ALWAYS_INLINE.
+#if defined(__clang__) && defined(NDEBUG)
+#  define PREFER_INLINE ALWAYS_INLINE
+#else
+#  define PREFER_INLINE inline
+#endif
+
 #define UNREACHABLE __builtin_unreachable()
 
 #define UNUSED [[maybe_unused]]