Index: chromium-120.0.6099.71/third_party/libvpx/source/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c =================================================================== --- chromium-120.0.6099.71.orig/third_party/libvpx/source/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c +++ chromium-120.0.6099.71/third_party/libvpx/source/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c @@ -38,6 +38,28 @@ static INLINE int16x8_t vec_max_across(i return vec_max(a, vec_perm(a, a, vec_perm16)); } +static INLINE void +vec_u64_store(vector unsigned long long vecu64, unsigned long offset, void *ptr) +{ +#ifndef WORDS_BIGENDIAN + __asm__ ("xxswapd %x0, %x1" + : "=wa" (vecu64) + : "wa" (vecu64)); +#endif +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ ("stxvd2x %x0,0,%1\n\t" + : + : "wa" (vecu64), "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ ("stxvd2x %x0,%1,%2\n\t" + : + : "wa" (vecu64), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +} + void vp9_quantize_fp_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, @@ -60,10 +82,10 @@ void vp9_quantize_fp_vsx(const tran_low_ qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant); zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); qcoeff0 = vec_sign(qcoeff0, coeff0); - vec_vsx_st(qcoeff0, 0, qcoeff_ptr); + vec_u64_store(qcoeff0, 0, qcoeff_ptr); dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); - vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr); + vec_u64_store(dqcoeff0, 0, dqcoeff_ptr); // Remove DC value from round and quant round = vec_splat(round, 1); @@ -76,10 +98,10 @@ void vp9_quantize_fp_vsx(const tran_low_ qcoeff1 = vec_mulhi(vec_vaddshs(vec_abs(coeff1), round), quant); zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); qcoeff1 = vec_sign(qcoeff1, coeff1); - vec_vsx_st(qcoeff1, 16, qcoeff_ptr); + vec_u64_store(qcoeff1, 16, qcoeff_ptr); dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); - vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr); + vec_u64_store(dqcoeff1, 16, dqcoeff_ptr); eob = vec_max(vec_or(scan0, zero_coeff0), vec_or(scan1, zero_coeff1)); @@ -107,23 +129,23 @@ void vp9_quantize_fp_vsx(const tran_low_ qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant); zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); qcoeff0 = vec_sign(qcoeff0, coeff0); - vec_vsx_st(qcoeff0, off0, qcoeff_ptr); + vec_u64_store(qcoeff0, off0, qcoeff_ptr); dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); - vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr); + vec_u64_store(dqcoeff0, off0, dqcoeff_ptr); qcoeff1 = vec_mulhi(vec_vaddshs(vec_abs(coeff1), round), quant); zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); qcoeff1 = vec_sign(qcoeff1, coeff1); - vec_vsx_st(qcoeff1, off1, qcoeff_ptr); + vec_u64_store(qcoeff1, off1, qcoeff_ptr); dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); - vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr); + vec_u64_store(dqcoeff1, off1, dqcoeff_ptr); qcoeff2 = vec_mulhi(vec_vaddshs(vec_abs(coeff2), round), quant); zero_coeff2 = vec_cmpeq(qcoeff2, vec_zeros_s16); qcoeff2 = vec_sign(qcoeff2, coeff2); - vec_vsx_st(qcoeff2, off2, qcoeff_ptr); + vec_u64_store(qcoeff2, off2, qcoeff_ptr); dqcoeff2 = vec_mladd(qcoeff2, dequant, vec_zeros_s16); - vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr); + vec_u64_store(dqcoeff2, off2, dqcoeff_ptr); eob = vec_max(eob, vec_or(scan0, zero_coeff0)); eob2 = vec_max(vec_or(scan1, zero_coeff1), vec_or(scan2, zero_coeff2)); @@ -200,10 +222,10 @@ void vp9_quantize_fp_32x32_vsx(const tra qcoeff0 = vec_and(qcoeff0, mask0); zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); qcoeff0 = vec_sign(qcoeff0, coeff0); - vec_vsx_st(qcoeff0, 0, qcoeff_ptr); + vec_u64_store(qcoeff0, 0, qcoeff_ptr); dqcoeff0 = dequantize_coeff_32(qcoeff0, dequant); - vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr); + vec_u64_store(dqcoeff0, 0, dqcoeff_ptr); // Remove DC value from thres, round, quant and dequant thres = vec_splat(thres, 1); @@ -219,10 +241,10 @@ void vp9_quantize_fp_32x32_vsx(const tra qcoeff1 = vec_and(qcoeff1, mask1); zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); qcoeff1 = vec_sign(qcoeff1, coeff1); - vec_vsx_st(qcoeff1, 16, qcoeff_ptr); + vec_u64_store(qcoeff1, 16, qcoeff_ptr); dqcoeff1 = dequantize_coeff_32(qcoeff1, dequant); - vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr); + vec_u64_store(dqcoeff1, 16, dqcoeff_ptr); eob = vec_max(vec_or(scan0, zero_coeff0), vec_or(scan1, zero_coeff1)); @@ -260,17 +282,17 @@ void vp9_quantize_fp_32x32_vsx(const tra qcoeff1 = vec_sign(qcoeff1, coeff1); qcoeff2 = vec_sign(qcoeff2, coeff2); - vec_vsx_st(qcoeff0, off0, qcoeff_ptr); - vec_vsx_st(qcoeff1, off1, qcoeff_ptr); - vec_vsx_st(qcoeff2, off2, qcoeff_ptr); + vec_u64_store(qcoeff0, off0, qcoeff_ptr); + vec_u64_store(qcoeff1, off1, qcoeff_ptr); + vec_u64_store(qcoeff2, off2, qcoeff_ptr); dqcoeff0 = dequantize_coeff_32(qcoeff0, dequant); dqcoeff1 = dequantize_coeff_32(qcoeff1, dequant); dqcoeff2 = dequantize_coeff_32(qcoeff2, dequant); - vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr); - vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr); - vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr); + vec_u64_store(dqcoeff0, off0, dqcoeff_ptr); + vec_u64_store(dqcoeff1, off1, dqcoeff_ptr); + vec_u64_store(dqcoeff2, off2, dqcoeff_ptr); eob = vec_max(eob, vec_or(scan0, zero_coeff0)); eob2 = vec_max(vec_or(scan1, zero_coeff1), vec_or(scan2, zero_coeff2)); Index: chromium-120.0.6099.71/third_party/libvpx/source/libvpx/vpx_dsp/ppc/fdct32x32_vsx.c =================================================================== --- chromium-120.0.6099.71.orig/third_party/libvpx/source/libvpx/vpx_dsp/ppc/fdct32x32_vsx.c +++ chromium-120.0.6099.71/third_party/libvpx/source/libvpx/vpx_dsp/ppc/fdct32x32_vsx.c @@ -15,6 +15,28 @@ #include "vpx_dsp/ppc/txfm_common_vsx.h" #include "vpx_dsp/ppc/types_vsx.h" +static INLINE void +vec_u64_store(vector unsigned long long vecu64, unsigned long offset, void *ptr) +{ +#ifndef WORDS_BIGENDIAN + __asm__ ("xxswapd %x0, %x1" + : "=wa" (vecu64) + : "wa" (vecu64)); +#endif +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ ("stxvd2x %x0,0,%1\n\t" + : + : "wa" (vecu64), "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ ("stxvd2x %x0,%1,%2\n\t" + : + : "wa" (vecu64), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +} + // Returns ((a +/- b) * cospi16 + (2 << 13)) >> 14. static INLINE void single_butterfly(int16x8_t a, int16x8_t b, int16x8_t *add, int16x8_t *sub) { @@ -164,45 +186,45 @@ static INLINE void load(const int16_t *a } static INLINE void store(tran_low_t *a, const int16x8_t *b) { - vec_vsx_st(b[0], 0, a); - vec_vsx_st(b[8], 0, a + 8); - vec_vsx_st(b[16], 0, a + 16); - vec_vsx_st(b[24], 0, a + 24); - - vec_vsx_st(b[1], 0, a + 32); - vec_vsx_st(b[9], 0, a + 40); - vec_vsx_st(b[17], 0, a + 48); - vec_vsx_st(b[25], 0, a + 56); - - vec_vsx_st(b[2], 0, a + 64); - vec_vsx_st(b[10], 0, a + 72); - vec_vsx_st(b[18], 0, a + 80); - vec_vsx_st(b[26], 0, a + 88); - - vec_vsx_st(b[3], 0, a + 96); - vec_vsx_st(b[11], 0, a + 104); - vec_vsx_st(b[19], 0, a + 112); - vec_vsx_st(b[27], 0, a + 120); - - vec_vsx_st(b[4], 0, a + 128); - vec_vsx_st(b[12], 0, a + 136); - vec_vsx_st(b[20], 0, a + 144); - vec_vsx_st(b[28], 0, a + 152); - - vec_vsx_st(b[5], 0, a + 160); - vec_vsx_st(b[13], 0, a + 168); - vec_vsx_st(b[21], 0, a + 176); - vec_vsx_st(b[29], 0, a + 184); - - vec_vsx_st(b[6], 0, a + 192); - vec_vsx_st(b[14], 0, a + 200); - vec_vsx_st(b[22], 0, a + 208); - vec_vsx_st(b[30], 0, a + 216); - - vec_vsx_st(b[7], 0, a + 224); - vec_vsx_st(b[15], 0, a + 232); - vec_vsx_st(b[23], 0, a + 240); - vec_vsx_st(b[31], 0, a + 248); + vec_u64_store(b[0], 0, a); + vec_u64_store(b[8], 0, a + 8); + vec_u64_store(b[16], 0, a + 16); + vec_u64_store(b[24], 0, a + 24); + + vec_u64_store(b[1], 0, a + 32); + vec_u64_store(b[9], 0, a + 40); + vec_u64_store(b[17], 0, a + 48); + vec_u64_store(b[25], 0, a + 56); + + vec_u64_store(b[2], 0, a + 64); + vec_u64_store(b[10], 0, a + 72); + vec_u64_store(b[18], 0, a + 80); + vec_u64_store(b[26], 0, a + 88); + + vec_u64_store(b[3], 0, a + 96); + vec_u64_store(b[11], 0, a + 104); + vec_u64_store(b[19], 0, a + 112); + vec_u64_store(b[27], 0, a + 120); + + vec_u64_store(b[4], 0, a + 128); + vec_u64_store(b[12], 0, a + 136); + vec_u64_store(b[20], 0, a + 144); + vec_u64_store(b[28], 0, a + 152); + + vec_u64_store(b[5], 0, a + 160); + vec_u64_store(b[13], 0, a + 168); + vec_u64_store(b[21], 0, a + 176); + vec_u64_store(b[29], 0, a + 184); + + vec_u64_store(b[6], 0, a + 192); + vec_u64_store(b[14], 0, a + 200); + vec_u64_store(b[22], 0, a + 208); + vec_u64_store(b[30], 0, a + 216); + + vec_u64_store(b[7], 0, a + 224); + vec_u64_store(b[15], 0, a + 232); + vec_u64_store(b[23], 0, a + 240); + vec_u64_store(b[31], 0, a + 248); } // Returns 1 if negative 0 if positive Index: chromium-120.0.6099.71/third_party/libvpx/source/libvpx/vpx_dsp/ppc/quantize_vsx.c =================================================================== --- chromium-120.0.6099.71.orig/third_party/libvpx/source/libvpx/vpx_dsp/ppc/quantize_vsx.c +++ chromium-120.0.6099.71/third_party/libvpx/source/libvpx/vpx_dsp/ppc/quantize_vsx.c @@ -13,6 +13,28 @@ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ppc/types_vsx.h" +static INLINE void +vec_u64_store(vector unsigned long long vecu64, unsigned long offset, void *ptr) +{ +#ifndef WORDS_BIGENDIAN + __asm__ ("xxswapd %x0, %x1" + : "=wa" (vecu64) + : "wa" (vecu64)); +#endif +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ ("stxvd2x %x0,0,%1\n\t" + : + : "wa" (vecu64), "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ ("stxvd2x %x0,%1,%2\n\t" + : + : "wa" (vecu64), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +} + // Negate 16-bit integers in a when the corresponding signed 16-bit // integer in b is negative. static INLINE int16x8_t vec_sign(int16x8_t a, int16x8_t b) { @@ -124,19 +146,19 @@ void vpx_quantize_b_vsx(const tran_low_t qcoeff0 = quantize_coeff(coeff0, coeff0_abs, round, quant, quant_shift, zero_mask0); - vec_vsx_st(qcoeff0, 0, qcoeff_ptr); + vec_u64_store(qcoeff0, 0, qcoeff_ptr); round = vec_splat(round, 1); quant = vec_splat(quant, 1); quant_shift = vec_splat(quant_shift, 1); qcoeff1 = quantize_coeff(coeff1, coeff1_abs, round, quant, quant_shift, zero_mask1); - vec_vsx_st(qcoeff1, 16, qcoeff_ptr); + vec_u64_store(qcoeff1, 16, qcoeff_ptr); dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); - vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr); + vec_u64_store(dqcoeff0, 0, dqcoeff_ptr); dequant = vec_splat(dequant, 1); dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); - vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr); + vec_u64_store(dqcoeff1, 16, dqcoeff_ptr); eob = vec_max(nonzero_scanindex(qcoeff0, iscan_ptr, 0), nonzero_scanindex(qcoeff1, iscan_ptr, 16)); @@ -164,17 +186,17 @@ void vpx_quantize_b_vsx(const tran_low_t zero_mask1); qcoeff2 = quantize_coeff(coeff2, coeff2_abs, round, quant, quant_shift, zero_mask2); - vec_vsx_st(qcoeff0, off0, qcoeff_ptr); - vec_vsx_st(qcoeff1, off1, qcoeff_ptr); - vec_vsx_st(qcoeff2, off2, qcoeff_ptr); + vec_u64_store(qcoeff0, off0, qcoeff_ptr); + vec_u64_store(qcoeff1, off1, qcoeff_ptr); + vec_u64_store(qcoeff2, off2, qcoeff_ptr); dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); dqcoeff2 = vec_mladd(qcoeff2, dequant, vec_zeros_s16); - vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr); - vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr); - vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr); + vec_u64_store(dqcoeff0, off0, dqcoeff_ptr); + vec_u64_store(dqcoeff1, off1, dqcoeff_ptr); + vec_u64_store(dqcoeff2, off2, dqcoeff_ptr); eob = vec_max(eob, nonzero_scanindex(qcoeff0, iscan_ptr, off0)); eob2 = vec_max(nonzero_scanindex(qcoeff1, iscan_ptr, off1), @@ -243,12 +265,12 @@ void vpx_quantize_b_32x32_vsx(const tran qcoeff1 = quantize_coeff_32(coeff1, coeff1_abs, round, quant, quant_shift, zero_mask1); - vec_vsx_st(qcoeff0, 0, qcoeff_ptr); - vec_vsx_st(qcoeff1, 16, qcoeff_ptr); + vec_u64_store(qcoeff0, 0, qcoeff_ptr); + vec_u64_store(qcoeff1, 16, qcoeff_ptr); - vec_vsx_st(dequantize_coeff_32(qcoeff0, dequant), 0, dqcoeff_ptr); + vec_u64_store(dequantize_coeff_32(qcoeff0, dequant), 0, dqcoeff_ptr); dequant = vec_splat(dequant, 1); // remove DC from dequant - vec_vsx_st(dequantize_coeff_32(qcoeff1, dequant), 16, dqcoeff_ptr); + vec_u64_store(dequantize_coeff_32(qcoeff1, dequant), 16, dqcoeff_ptr); eob = vec_max(nonzero_scanindex(qcoeff0, iscan_ptr, 0), nonzero_scanindex(qcoeff1, iscan_ptr, 16)); @@ -276,13 +298,13 @@ void vpx_quantize_b_32x32_vsx(const tran qcoeff2 = quantize_coeff_32(coeff2, coeff2_abs, round, quant, quant_shift, zero_mask2); - vec_vsx_st(qcoeff0, off0, qcoeff_ptr); - vec_vsx_st(qcoeff1, off1, qcoeff_ptr); - vec_vsx_st(qcoeff2, off2, qcoeff_ptr); - - vec_vsx_st(dequantize_coeff_32(qcoeff0, dequant), off0, dqcoeff_ptr); - vec_vsx_st(dequantize_coeff_32(qcoeff1, dequant), off1, dqcoeff_ptr); - vec_vsx_st(dequantize_coeff_32(qcoeff2, dequant), off2, dqcoeff_ptr); + vec_u64_store(qcoeff0, off0, qcoeff_ptr); + vec_u64_store(qcoeff1, off1, qcoeff_ptr); + vec_u64_store(qcoeff2, off2, qcoeff_ptr); + + vec_u64_store(dequantize_coeff_32(qcoeff0, dequant), off0, dqcoeff_ptr); + vec_u64_store(dequantize_coeff_32(qcoeff1, dequant), off1, dqcoeff_ptr); + vec_u64_store(dequantize_coeff_32(qcoeff2, dequant), off2, dqcoeff_ptr); eob = vec_max(eob, nonzero_scanindex(qcoeff0, iscan_ptr, off0)); eob2 = vec_max(nonzero_scanindex(qcoeff1, iscan_ptr, off1),