| |
@@ -0,0 +1,914 @@
|
| |
+ --- /dev/null
|
| |
+ +++ b/third_party/libpng/powerpc/filter_vsx_intrinsics.c
|
| |
+ @@ -0,0 +1,768 @@
|
| |
+ +/* filter_vsx_intrinsics.c - PowerPC optimised filter functions
|
| |
+ + *
|
| |
+ + * Copyright (c) 2018 Cosmin Truta
|
| |
+ + * Copyright (c) 2017 Glenn Randers-Pehrson
|
| |
+ + * Written by Vadim Barkov, 2017.
|
| |
+ + *
|
| |
+ + * This code is released under the libpng license.
|
| |
+ + * For conditions of distribution and use, see the disclaimer
|
| |
+ + * and license in png.h
|
| |
+ + */
|
| |
+ +
|
| |
+ +#include <stdio.h>
|
| |
+ +#include <stdint.h>
|
| |
+ +#include "../pngpriv.h"
|
| |
+ +
|
| |
+ +#ifdef PNG_READ_SUPPORTED
|
| |
+ +
|
| |
+ +/* This code requires -maltivec and -mvsx on the command line: */
|
| |
+ +#if PNG_POWERPC_VSX_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */
|
| |
+ +
|
| |
+ +#include <altivec.h>
|
| |
+ +
|
| |
+ +#if PNG_POWERPC_VSX_OPT > 0
|
| |
+ +
|
| |
+ +#ifndef __VSX__
|
| |
+ +# error "This code requires VSX support (POWER7 and later). Please provide -mvsx compiler flag."
|
| |
+ +#endif
|
| |
+ +
|
| |
+ +#define vec_ld_unaligned(vec,data) vec = vec_vsx_ld(0,data)
|
| |
+ +#define vec_st_unaligned(vec,data) vec_vsx_st(vec,0,data)
|
| |
+ +
|
| |
+ +
|
| |
+ +/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d).
|
| |
+ + * They're positioned like this:
|
| |
+ + * prev: c b
|
| |
+ + * row: a d
|
| |
+ + * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be
|
| |
+ + * whichever of a, b, or c is closest to p=a+b-c.
|
| |
+ + * ( this is taken from ../intel/filter_sse2_intrinsics.c )
|
| |
+ + */
|
| |
+ +
|
| |
+ +#define vsx_declare_common_vars(row_info,row,prev_row,offset) \
|
| |
+ + png_byte i;\
|
| |
+ + png_bytep rp = row + offset;\
|
| |
+ + png_const_bytep pp = prev_row;\
|
| |
+ + size_t unaligned_top = 16 - (((size_t)rp % 16));\
|
| |
+ + size_t istop;\
|
| |
+ + if(unaligned_top == 16)\
|
| |
+ + unaligned_top = 0;\
|
| |
+ + istop = row_info->rowbytes;\
|
| |
+ + if((unaligned_top < istop))\
|
| |
+ + istop -= unaligned_top;\
|
| |
+ + else{\
|
| |
+ + unaligned_top = istop;\
|
| |
+ + istop = 0;\
|
| |
+ + }
|
| |
+ +
|
| |
+ +void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row,
|
| |
+ + png_const_bytep prev_row)
|
| |
+ +{
|
| |
+ + vector unsigned char rp_vec;
|
| |
+ + vector unsigned char pp_vec;
|
| |
+ + vsx_declare_common_vars(row_info,row,prev_row,0)
|
| |
+ +
|
| |
+ + /* Altivec operations require 16-byte aligned data
|
| |
+ + * but input can be unaligned. So we calculate
|
| |
+ + * unaligned part as usual.
|
| |
+ + */
|
| |
+ + for (i = 0; i < unaligned_top; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ +
|
| |
+ + /* Using SIMD while we can */
|
| |
+ + while( istop >= 16 )
|
| |
+ + {
|
| |
+ + rp_vec = vec_ld(0,rp);
|
| |
+ + vec_ld_unaligned(pp_vec,pp);
|
| |
+ +
|
| |
+ + rp_vec = vec_add(rp_vec,pp_vec);
|
| |
+ +
|
| |
+ + vec_st(rp_vec,0,rp);
|
| |
+ +
|
| |
+ + pp += 16;
|
| |
+ + rp += 16;
|
| |
+ + istop -= 16;
|
| |
+ + }
|
| |
+ +
|
| |
+ + if(istop > 0)
|
| |
+ + {
|
| |
+ + /* If byte count of row is not divisible by 16
|
| |
+ + * we will process remaining part as usual
|
| |
+ + */
|
| |
+ + for (i = 0; i < istop; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ +}
|
| |
+ +
|
| |
+ +}
|
| |
+ +
|
| |
+ +static const vector unsigned char VSX_LEFTSHIFTED1_4 = {16,16,16,16, 0, 1, 2, 3,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_LEFTSHIFTED2_4 = {16,16,16,16,16,16,16,16, 4, 5, 6, 7,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_LEFTSHIFTED3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 8, 9,10,11};
|
| |
+ +
|
| |
+ +static const vector unsigned char VSX_LEFTSHIFTED1_3 = {16,16,16, 0, 1, 2,16,16,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_LEFTSHIFTED2_3 = {16,16,16,16,16,16, 3, 4, 5,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_LEFTSHIFTED3_3 = {16,16,16,16,16,16,16,16,16, 6, 7, 8,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_LEFTSHIFTED4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 9,10,11,16};
|
| |
+ +
|
| |
+ +static const vector unsigned char VSX_NOT_SHIFTED1_4 = {16,16,16,16, 4, 5, 6, 7,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_NOT_SHIFTED2_4 = {16,16,16,16,16,16,16,16, 8, 9,10,11,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_NOT_SHIFTED3_4 = {16,16,16,16,16,16,16,16,16,16,16,16,12,13,14,15};
|
| |
+ +
|
| |
+ +static const vector unsigned char VSX_NOT_SHIFTED1_3 = {16,16,16, 3, 4, 5,16,16,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_NOT_SHIFTED2_3 = {16,16,16,16,16,16, 6, 7, 8,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_NOT_SHIFTED3_3 = {16,16,16,16,16,16,16,16,16, 9,10,11,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_NOT_SHIFTED4_3 = {16,16,16,16,16,16,16,16,16,16,16,16,12,13,14,16};
|
| |
+ +
|
| |
+ +static const vector unsigned char VSX_CHAR_ZERO = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
| |
+ +#ifdef __LITTLE_ENDIAN__
|
| |
+ +
|
| |
+ +static const vector unsigned char VSX_CHAR_TO_SHORT1_4 = { 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_CHAR_TO_SHORT2_4 = { 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_CHAR_TO_SHORT3_4 = {12,16,13,16,14,16,15,16,16,16,16,16,16,16,16,16};
|
| |
+ +
|
| |
+ +static const vector unsigned char VSX_SHORT_TO_CHAR1_4 = {16,16,16,16, 0, 2, 4, 6,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_SHORT_TO_CHAR2_4 = {16,16,16,16,16,16,16,16, 0, 2, 4, 6,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_SHORT_TO_CHAR3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4, 6};
|
| |
+ +
|
| |
+ +static const vector unsigned char VSX_CHAR_TO_SHORT1_3 = { 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_CHAR_TO_SHORT2_3 = { 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_CHAR_TO_SHORT3_3 = { 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_CHAR_TO_SHORT4_3 = {12,16,13,16,14,16,16,16,16,16,16,16,16,16,16,16};
|
| |
+ +
|
| |
+ +static const vector unsigned char VSX_SHORT_TO_CHAR1_3 = {16,16,16, 0, 2, 4,16,16,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_SHORT_TO_CHAR2_3 = {16,16,16,16,16,16, 0, 2, 4,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_SHORT_TO_CHAR3_3 = {16,16,16,16,16,16,16,16,16, 0, 2, 4,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_SHORT_TO_CHAR4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4,16};
|
| |
+ +
|
| |
+ +#elif defined(__BIG_ENDIAN__)
|
| |
+ +
|
| |
+ +static const vector unsigned char VSX_CHAR_TO_SHORT1_4 = {16, 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_CHAR_TO_SHORT2_4 = {16, 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_CHAR_TO_SHORT3_4 = {16,12,16,13,16,14,16,15,16,16,16,16,16,16,16,16};
|
| |
+ +
|
| |
+ +static const vector unsigned char VSX_SHORT_TO_CHAR1_4 = {16,16,16,16, 1, 3, 5, 7,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_SHORT_TO_CHAR2_4 = {16,16,16,16,16,16,16,16, 1, 3, 5, 7,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_SHORT_TO_CHAR3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 1, 3, 5, 7};
|
| |
+ +
|
| |
+ +static const vector unsigned char VSX_CHAR_TO_SHORT1_3 = {16, 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_CHAR_TO_SHORT2_3 = {16, 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_CHAR_TO_SHORT3_3 = {16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_CHAR_TO_SHORT4_3 = {16,12,16,13,16,14,16,16,16,16,16,16,16,16,16,16};
|
| |
+ +
|
| |
+ +static const vector unsigned char VSX_SHORT_TO_CHAR1_3 = {16,16,16, 1, 3, 5,16,16,16,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_SHORT_TO_CHAR2_3 = {16,16,16,16,16,16, 1, 3, 5,16,16,16,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_SHORT_TO_CHAR3_3 = {16,16,16,16,16,16,16,16,16, 1, 3, 5,16,16,16,16};
|
| |
+ +static const vector unsigned char VSX_SHORT_TO_CHAR4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 1, 3, 5,16};
|
| |
+ +
|
| |
+ +#endif
|
| |
+ +
|
| |
+ +#define vsx_char_to_short(vec,offset,bpp) (vector unsigned short)vec_perm((vec),VSX_CHAR_ZERO,VSX_CHAR_TO_SHORT##offset##_##bpp)
|
| |
+ +#define vsx_short_to_char(vec,offset,bpp) vec_perm(((vector unsigned char)(vec)),VSX_CHAR_ZERO,VSX_SHORT_TO_CHAR##offset##_##bpp)
|
| |
+ +
|
| |
+ +#ifdef PNG_USE_ABS
|
| |
+ +# define vsx_abs(number) abs(number)
|
| |
+ +#else
|
| |
+ +# define vsx_abs(number) (number > 0) ? (number) : -(number)
|
| |
+ +#endif
|
| |
+ +
|
| |
+ +void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row,
|
| |
+ + png_const_bytep prev_row)
|
| |
+ +{
|
| |
+ + png_byte bpp = 4;
|
| |
+ +
|
| |
+ + vector unsigned char rp_vec;
|
| |
+ + vector unsigned char part_vec;
|
| |
+ +
|
| |
+ + vsx_declare_common_vars(row_info,row,prev_row,bpp)
|
| |
+ +
|
| |
+ + PNG_UNUSED(pp)
|
| |
+ +
|
| |
+ + /* Altivec operations require 16-byte aligned data
|
| |
+ + * but input can be unaligned. So we calculate
|
| |
+ + * unaligned part as usual.
|
| |
+ + */
|
| |
+ + for (i = 0; i < unaligned_top; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ +
|
| |
+ + /* Using SIMD while we can */
|
| |
+ + while( istop >= 16 )
|
| |
+ + {
|
| |
+ + for(i=0;i < bpp ; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ + rp -= bpp;
|
| |
+ +
|
| |
+ + rp_vec = vec_ld(0,rp);
|
| |
+ + part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_4);
|
| |
+ + rp_vec = vec_add(rp_vec,part_vec);
|
| |
+ +
|
| |
+ + part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_4);
|
| |
+ + rp_vec = vec_add(rp_vec,part_vec);
|
| |
+ +
|
| |
+ + part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_4);
|
| |
+ + rp_vec = vec_add(rp_vec,part_vec);
|
| |
+ +
|
| |
+ + vec_st(rp_vec,0,rp);
|
| |
+ +
|
| |
+ + rp += 16;
|
| |
+ + istop -= 16;
|
| |
+ + }
|
| |
+ +
|
| |
+ + if(istop > 0)
|
| |
+ + for (i = 0; i < istop % 16; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) + (int)(*(rp - bpp))) & 0xff);
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ +
|
| |
+ +}
|
| |
+ +
|
| |
+ +void png_read_filter_row_sub3_vsx(png_row_infop row_info, png_bytep row,
|
| |
+ + png_const_bytep prev_row)
|
| |
+ +{
|
| |
+ + png_byte bpp = 3;
|
| |
+ +
|
| |
+ + vector unsigned char rp_vec;
|
| |
+ + vector unsigned char part_vec;
|
| |
+ +
|
| |
+ + vsx_declare_common_vars(row_info,row,prev_row,bpp)
|
| |
+ +
|
| |
+ + PNG_UNUSED(pp)
|
| |
+ +
|
| |
+ + /* Altivec operations require 16-byte aligned data
|
| |
+ + * but input can be unaligned. So we calculate
|
| |
+ + * unaligned part as usual.
|
| |
+ + */
|
| |
+ + for (i = 0; i < unaligned_top; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ +
|
| |
+ + /* Using SIMD while we can */
|
| |
+ + while( istop >= 16 )
|
| |
+ + {
|
| |
+ + for(i=0;i < bpp ; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ + rp -= bpp;
|
| |
+ +
|
| |
+ + rp_vec = vec_ld(0,rp);
|
| |
+ + part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_3);
|
| |
+ + rp_vec = vec_add(rp_vec,part_vec);
|
| |
+ +
|
| |
+ + part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_3);
|
| |
+ + rp_vec = vec_add(rp_vec,part_vec);
|
| |
+ +
|
| |
+ + part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_3);
|
| |
+ + rp_vec = vec_add(rp_vec,part_vec);
|
| |
+ +
|
| |
+ + part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED4_3);
|
| |
+ + rp_vec = vec_add(rp_vec,part_vec);
|
| |
+ +
|
| |
+ + vec_st(rp_vec,0,rp);
|
| |
+ + rp += 15;
|
| |
+ + istop -= 16;
|
| |
+ +
|
| |
+ + /* Since 16 % bpp = 16 % 3 = 1, last element of array must
|
| |
+ + * be proceeded manually
|
| |
+ + */
|
| |
+ + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ +
|
| |
+ + if(istop > 0)
|
| |
+ + for (i = 0; i < istop % 16; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ +}
|
| |
+ +
|
| |
+ +void png_read_filter_row_avg4_vsx(png_row_infop row_info, png_bytep row,
|
| |
+ + png_const_bytep prev_row)
|
| |
+ +{
|
| |
+ + png_byte bpp = 4;
|
| |
+ +
|
| |
+ + vector unsigned char rp_vec;
|
| |
+ + vector unsigned char pp_vec;
|
| |
+ + vector unsigned char pp_part_vec;
|
| |
+ + vector unsigned char rp_part_vec;
|
| |
+ + vector unsigned char avg_vec;
|
| |
+ +
|
| |
+ + vsx_declare_common_vars(row_info,row,prev_row,bpp)
|
| |
+ + rp -= bpp;
|
| |
+ + if(istop >= bpp)
|
| |
+ + istop -= bpp;
|
| |
+ +
|
| |
+ + for (i = 0; i < bpp; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) +
|
| |
+ + ((int)(*pp++) / 2 )) & 0xff);
|
| |
+ +
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ +
|
| |
+ + /* Altivec operations require 16-byte aligned data
|
| |
+ + * but input can be unaligned. So we calculate
|
| |
+ + * unaligned part as usual.
|
| |
+ + */
|
| |
+ + for (i = 0; i < unaligned_top; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) +
|
| |
+ + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
|
| |
+ +
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ +
|
| |
+ + /* Using SIMD while we can */
|
| |
+ + while( istop >= 16 )
|
| |
+ + {
|
| |
+ + for(i=0;i < bpp ; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) +
|
| |
+ + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
|
| |
+ +
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ + rp -= bpp;
|
| |
+ + pp -= bpp;
|
| |
+ +
|
| |
+ + vec_ld_unaligned(pp_vec,pp);
|
| |
+ + rp_vec = vec_ld(0,rp);
|
| |
+ +
|
| |
+ + rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_4);
|
| |
+ + pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED1_4);
|
| |
+ + avg_vec = vec_avg(rp_part_vec,pp_part_vec);
|
| |
+ + avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
|
| |
+ + rp_vec = vec_add(rp_vec,avg_vec);
|
| |
+ +
|
| |
+ + rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_4);
|
| |
+ + pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED2_4);
|
| |
+ + avg_vec = vec_avg(rp_part_vec,pp_part_vec);
|
| |
+ + avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
|
| |
+ + rp_vec = vec_add(rp_vec,avg_vec);
|
| |
+ +
|
| |
+ + rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_4);
|
| |
+ + pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED3_4);
|
| |
+ + avg_vec = vec_avg(rp_part_vec,pp_part_vec);
|
| |
+ + avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
|
| |
+ + rp_vec = vec_add(rp_vec,avg_vec);
|
| |
+ +
|
| |
+ + vec_st(rp_vec,0,rp);
|
| |
+ +
|
| |
+ + rp += 16;
|
| |
+ + pp += 16;
|
| |
+ + istop -= 16;
|
| |
+ + }
|
| |
+ +
|
| |
+ + if(istop > 0)
|
| |
+ + for (i = 0; i < istop % 16; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) +
|
| |
+ + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
|
| |
+ +
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ +}
|
| |
+ +
|
| |
+ +void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row,
|
| |
+ + png_const_bytep prev_row)
|
| |
+ +{
|
| |
+ + png_byte bpp = 3;
|
| |
+ +
|
| |
+ + vector unsigned char rp_vec;
|
| |
+ + vector unsigned char pp_vec;
|
| |
+ + vector unsigned char pp_part_vec;
|
| |
+ + vector unsigned char rp_part_vec;
|
| |
+ + vector unsigned char avg_vec;
|
| |
+ +
|
| |
+ + vsx_declare_common_vars(row_info,row,prev_row,bpp)
|
| |
+ + rp -= bpp;
|
| |
+ + if(istop >= bpp)
|
| |
+ + istop -= bpp;
|
| |
+ +
|
| |
+ + for (i = 0; i < bpp; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) +
|
| |
+ + ((int)(*pp++) / 2 )) & 0xff);
|
| |
+ +
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ +
|
| |
+ + /* Altivec operations require 16-byte aligned data
|
| |
+ + * but input can be unaligned. So we calculate
|
| |
+ + * unaligned part as usual.
|
| |
+ + */
|
| |
+ + for (i = 0; i < unaligned_top; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) +
|
| |
+ + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
|
| |
+ +
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ +
|
| |
+ + /* Using SIMD while we can */
|
| |
+ + while( istop >= 16 )
|
| |
+ + {
|
| |
+ + for(i=0;i < bpp ; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) +
|
| |
+ + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
|
| |
+ +
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ + rp -= bpp;
|
| |
+ + pp -= bpp;
|
| |
+ +
|
| |
+ + vec_ld_unaligned(pp_vec,pp);
|
| |
+ + rp_vec = vec_ld(0,rp);
|
| |
+ +
|
| |
+ + rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_3);
|
| |
+ + pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED1_3);
|
| |
+ + avg_vec = vec_avg(rp_part_vec,pp_part_vec);
|
| |
+ + avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
|
| |
+ + rp_vec = vec_add(rp_vec,avg_vec);
|
| |
+ +
|
| |
+ + rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_3);
|
| |
+ + pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED2_3);
|
| |
+ + avg_vec = vec_avg(rp_part_vec,pp_part_vec);
|
| |
+ + avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
|
| |
+ + rp_vec = vec_add(rp_vec,avg_vec);
|
| |
+ +
|
| |
+ + rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_3);
|
| |
+ + pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED3_3);
|
| |
+ + avg_vec = vec_avg(rp_part_vec,pp_part_vec);
|
| |
+ + avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
|
| |
+ + rp_vec = vec_add(rp_vec,avg_vec);
|
| |
+ +
|
| |
+ + rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED4_3);
|
| |
+ + pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED4_3);
|
| |
+ + avg_vec = vec_avg(rp_part_vec,pp_part_vec);
|
| |
+ + avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
|
| |
+ + rp_vec = vec_add(rp_vec,avg_vec);
|
| |
+ +
|
| |
+ + vec_st(rp_vec,0,rp);
|
| |
+ +
|
| |
+ + rp += 15;
|
| |
+ + pp += 15;
|
| |
+ + istop -= 16;
|
| |
+ +
|
| |
+ + /* Since 16 % bpp = 16 % 3 = 1, last element of array must
|
| |
+ + * be proceeded manually
|
| |
+ + */
|
| |
+ + *rp = (png_byte)(((int)(*rp) +
|
| |
+ + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ +
|
| |
+ + if(istop > 0)
|
| |
+ + for (i = 0; i < istop % 16; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)(((int)(*rp) +
|
| |
+ + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
|
| |
+ +
|
| |
+ + rp++;
|
| |
+ + }
|
| |
+ +}
|
| |
+ +
|
| |
+ +/* Bytewise c ? t : e. */
|
| |
+ +#define if_then_else(c,t,e) vec_sel(e,t,c)
|
| |
+ +
|
| |
+ +#define vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) {\
|
| |
+ + c = *(pp - bpp);\
|
| |
+ + a = *(rp - bpp);\
|
| |
+ + b = *pp++;\
|
| |
+ + p = b - c;\
|
| |
+ + pc = a - c;\
|
| |
+ + pa = vsx_abs(p);\
|
| |
+ + pb = vsx_abs(pc);\
|
| |
+ + pc = vsx_abs(p + pc);\
|
| |
+ + if (pb < pa) pa = pb, a = b;\
|
| |
+ + if (pc < pa) a = c;\
|
| |
+ + a += *rp;\
|
| |
+ + *rp++ = (png_byte)a;\
|
| |
+ + }
|
| |
+ +
|
| |
+ +void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
|
| |
+ + png_const_bytep prev_row)
|
| |
+ +{
|
| |
+ + png_byte bpp = 4;
|
| |
+ +
|
| |
+ + int a, b, c, pa, pb, pc, p;
|
| |
+ + vector unsigned char rp_vec;
|
| |
+ + vector unsigned char pp_vec;
|
| |
+ + vector unsigned short a_vec,b_vec,c_vec,nearest_vec;
|
| |
+ + vector signed short pa_vec,pb_vec,pc_vec,smallest_vec;
|
| |
+ +
|
| |
+ + vsx_declare_common_vars(row_info,row,prev_row,bpp)
|
| |
+ + rp -= bpp;
|
| |
+ + if(istop >= bpp)
|
| |
+ + istop -= bpp;
|
| |
+ +
|
| |
+ + /* Process the first pixel in the row completely (this is the same as 'up'
|
| |
+ + * because there is only one candidate predictor for the first row).
|
| |
+ + */
|
| |
+ + for(i = 0; i < bpp ; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)( *rp + *pp);
|
| |
+ + rp++;
|
| |
+ + pp++;
|
| |
+ + }
|
| |
+ +
|
| |
+ + for(i = 0; i < unaligned_top ; i++)
|
| |
+ + {
|
| |
+ + vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
|
| |
+ + }
|
| |
+ +
|
| |
+ + while( istop >= 16)
|
| |
+ + {
|
| |
+ + for(i = 0; i < bpp ; i++)
|
| |
+ + {
|
| |
+ + vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
|
| |
+ + }
|
| |
+ +
|
| |
+ + rp -= bpp;
|
| |
+ + pp -= bpp;
|
| |
+ + rp_vec = vec_ld(0,rp);
|
| |
+ + vec_ld_unaligned(pp_vec,pp);
|
| |
+ +
|
| |
+ + a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_4),1,4);
|
| |
+ + b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED1_4),1,4);
|
| |
+ + c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_4),1,4);
|
| |
+ + pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
|
| |
+ + pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
|
| |
+ + pc_vec = vec_add(pa_vec,pb_vec);
|
| |
+ + pa_vec = vec_abs(pa_vec);
|
| |
+ + pb_vec = vec_abs(pb_vec);
|
| |
+ + pc_vec = vec_abs(pc_vec);
|
| |
+ + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
|
| |
+ + nearest_vec = if_then_else(
|
| |
+ + vec_cmpeq(pa_vec,smallest_vec),
|
| |
+ + a_vec,
|
| |
+ + if_then_else(
|
| |
+ + vec_cmpeq(pb_vec,smallest_vec),
|
| |
+ + b_vec,
|
| |
+ + c_vec
|
| |
+ + )
|
| |
+ + );
|
| |
+ + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,4)));
|
| |
+ +
|
| |
+ + a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_4),2,4);
|
| |
+ + b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED2_4),2,4);
|
| |
+ + c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_4),2,4);
|
| |
+ + pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
|
| |
+ + pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
|
| |
+ + pc_vec = vec_add(pa_vec,pb_vec);
|
| |
+ + pa_vec = vec_abs(pa_vec);
|
| |
+ + pb_vec = vec_abs(pb_vec);
|
| |
+ + pc_vec = vec_abs(pc_vec);
|
| |
+ + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
|
| |
+ + nearest_vec = if_then_else(
|
| |
+ + vec_cmpeq(pa_vec,smallest_vec),
|
| |
+ + a_vec,
|
| |
+ + if_then_else(
|
| |
+ + vec_cmpeq(pb_vec,smallest_vec),
|
| |
+ + b_vec,
|
| |
+ + c_vec
|
| |
+ + )
|
| |
+ + );
|
| |
+ + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,4)));
|
| |
+ +
|
| |
+ + a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_4),3,4);
|
| |
+ + b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED3_4),3,4);
|
| |
+ + c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_4),3,4);
|
| |
+ + pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
|
| |
+ + pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
|
| |
+ + pc_vec = vec_add(pa_vec,pb_vec);
|
| |
+ + pa_vec = vec_abs(pa_vec);
|
| |
+ + pb_vec = vec_abs(pb_vec);
|
| |
+ + pc_vec = vec_abs(pc_vec);
|
| |
+ + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
|
| |
+ + nearest_vec = if_then_else(
|
| |
+ + vec_cmpeq(pa_vec,smallest_vec),
|
| |
+ + a_vec,
|
| |
+ + if_then_else(
|
| |
+ + vec_cmpeq(pb_vec,smallest_vec),
|
| |
+ + b_vec,
|
| |
+ + c_vec
|
| |
+ + )
|
| |
+ + );
|
| |
+ + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,4)));
|
| |
+ +
|
| |
+ + vec_st(rp_vec,0,rp);
|
| |
+ +
|
| |
+ + rp += 16;
|
| |
+ + pp += 16;
|
| |
+ + istop -= 16;
|
| |
+ + }
|
| |
+ +
|
| |
+ + if(istop > 0)
|
| |
+ + for (i = 0; i < istop % 16; i++)
|
| |
+ + {
|
| |
+ + vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
|
| |
+ + }
|
| |
+ +}
|
| |
+ +
|
| |
+ +void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row,
|
| |
+ + png_const_bytep prev_row)
|
| |
+ +{
|
| |
+ + png_byte bpp = 3;
|
| |
+ +
|
| |
+ + int a, b, c, pa, pb, pc, p;
|
| |
+ + vector unsigned char rp_vec;
|
| |
+ + vector unsigned char pp_vec;
|
| |
+ + vector unsigned short a_vec,b_vec,c_vec,nearest_vec;
|
| |
+ + vector signed short pa_vec,pb_vec,pc_vec,smallest_vec;
|
| |
+ +
|
| |
+ + vsx_declare_common_vars(row_info,row,prev_row,bpp)
|
| |
+ + rp -= bpp;
|
| |
+ + if(istop >= bpp)
|
| |
+ + istop -= bpp;
|
| |
+ +
|
| |
+ + /* Process the first pixel in the row completely (this is the same as 'up'
|
| |
+ + * because there is only one candidate predictor for the first row).
|
| |
+ + */
|
| |
+ + for(i = 0; i < bpp ; i++)
|
| |
+ + {
|
| |
+ + *rp = (png_byte)( *rp + *pp);
|
| |
+ + rp++;
|
| |
+ + pp++;
|
| |
+ + }
|
| |
+ +
|
| |
+ + for(i = 0; i < unaligned_top ; i++)
|
| |
+ + {
|
| |
+ + vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
|
| |
+ + }
|
| |
+ +
|
| |
+ + while( istop >= 16)
|
| |
+ + {
|
| |
+ + for(i = 0; i < bpp ; i++)
|
| |
+ + {
|
| |
+ + vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
|
| |
+ + }
|
| |
+ +
|
| |
+ + rp -= bpp;
|
| |
+ + pp -= bpp;
|
| |
+ + rp_vec = vec_ld(0,rp);
|
| |
+ + vec_ld_unaligned(pp_vec,pp);
|
| |
+ +
|
| |
+ + a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_3),1,3);
|
| |
+ + b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED1_3),1,3);
|
| |
+ + c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_3),1,3);
|
| |
+ + pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
|
| |
+ + pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
|
| |
+ + pc_vec = vec_add(pa_vec,pb_vec);
|
| |
+ + pa_vec = vec_abs(pa_vec);
|
| |
+ + pb_vec = vec_abs(pb_vec);
|
| |
+ + pc_vec = vec_abs(pc_vec);
|
| |
+ + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
|
| |
+ + nearest_vec = if_then_else(
|
| |
+ + vec_cmpeq(pa_vec,smallest_vec),
|
| |
+ + a_vec,
|
| |
+ + if_then_else(
|
| |
+ + vec_cmpeq(pb_vec,smallest_vec),
|
| |
+ + b_vec,
|
| |
+ + c_vec
|
| |
+ + )
|
| |
+ + );
|
| |
+ + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,3)));
|
| |
+ +
|
| |
+ + a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_3),2,3);
|
| |
+ + b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED2_3),2,3);
|
| |
+ + c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_3),2,3);
|
| |
+ + pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
|
| |
+ + pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
|
| |
+ + pc_vec = vec_add(pa_vec,pb_vec);
|
| |
+ + pa_vec = vec_abs(pa_vec);
|
| |
+ + pb_vec = vec_abs(pb_vec);
|
| |
+ + pc_vec = vec_abs(pc_vec);
|
| |
+ + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
|
| |
+ + nearest_vec = if_then_else(
|
| |
+ + vec_cmpeq(pa_vec,smallest_vec),
|
| |
+ + a_vec,
|
| |
+ + if_then_else(
|
| |
+ + vec_cmpeq(pb_vec,smallest_vec),
|
| |
+ + b_vec,
|
| |
+ + c_vec
|
| |
+ + )
|
| |
+ + );
|
| |
+ + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,3)));
|
| |
+ +
|
| |
+ + a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_3),3,3);
|
| |
+ + b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED3_3),3,3);
|
| |
+ + c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_3),3,3);
|
| |
+ + pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
|
| |
+ + pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
|
| |
+ + pc_vec = vec_add(pa_vec,pb_vec);
|
| |
+ + pa_vec = vec_abs(pa_vec);
|
| |
+ + pb_vec = vec_abs(pb_vec);
|
| |
+ + pc_vec = vec_abs(pc_vec);
|
| |
+ + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
|
| |
+ + nearest_vec = if_then_else(
|
| |
+ + vec_cmpeq(pa_vec,smallest_vec),
|
| |
+ + a_vec,
|
| |
+ + if_then_else(
|
| |
+ + vec_cmpeq(pb_vec,smallest_vec),
|
| |
+ + b_vec,
|
| |
+ + c_vec
|
| |
+ + )
|
| |
+ + );
|
| |
+ + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,3)));
|
| |
+ +
|
| |
+ + a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED4_3),4,3);
|
| |
+ + b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED4_3),4,3);
|
| |
+ + c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED4_3),4,3);
|
| |
+ + pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
|
| |
+ + pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
|
| |
+ + pc_vec = vec_add(pa_vec,pb_vec);
|
| |
+ + pa_vec = vec_abs(pa_vec);
|
| |
+ + pb_vec = vec_abs(pb_vec);
|
| |
+ + pc_vec = vec_abs(pc_vec);
|
| |
+ + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
|
| |
+ + nearest_vec = if_then_else(
|
| |
+ + vec_cmpeq(pa_vec,smallest_vec),
|
| |
+ + a_vec,
|
| |
+ + if_then_else(
|
| |
+ + vec_cmpeq(pb_vec,smallest_vec),
|
| |
+ + b_vec,
|
| |
+ + c_vec
|
| |
+ + )
|
| |
+ + );
|
| |
+ + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,4,3)));
|
| |
+ +
|
| |
+ + vec_st(rp_vec,0,rp);
|
| |
+ +
|
| |
+ + rp += 15;
|
| |
+ + pp += 15;
|
| |
+ + istop -= 16;
|
| |
+ +
|
| |
+ + /* Since 16 % bpp = 16 % 3 = 1, last element of array must
|
| |
+ + * be proceeded manually
|
| |
+ + */
|
| |
+ + vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
|
| |
+ + }
|
| |
+ +
|
| |
+ + if(istop > 0)
|
| |
+ + for (i = 0; i < istop % 16; i++)
|
| |
+ + {
|
| |
+ + vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
|
| |
+ + }
|
| |
+ +}
|
| |
+ +
|
| |
+ +#endif /* PNG_POWERPC_VSX_OPT > 0 */
|
| |
+ +#endif /* PNG_POWERPC_VSX_IMPLEMENTATION == 1 (intrinsics) */
|
| |
+ +#endif /* READ */
|
| |
+ --- /dev/null
|
| |
+ +++ b/third_party/libpng/powerpc/powerpc_init.c
|
| |
+ @@ -0,0 +1,126 @@
|
| |
+ +
|
| |
+ +/* powerpc_init.c - POWERPC optimised filter functions
|
| |
+ + *
|
| |
+ + * Copyright (c) 2018 Cosmin Truta
|
| |
+ + * Copyright (c) 2017 Glenn Randers-Pehrson
|
| |
+ + * Written by Vadim Barkov, 2017.
|
| |
+ + *
|
| |
+ + * This code is released under the libpng license.
|
| |
+ + * For conditions of distribution and use, see the disclaimer
|
| |
+ + * and license in png.h
|
| |
+ + */
|
| |
+ +
|
| |
+ +/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are
|
| |
+ + * called.
|
| |
+ + */
|
| |
+ +#define _POSIX_SOURCE 1
|
| |
+ +
|
| |
+ +#include <stdio.h>
|
| |
+ +#include "../pngpriv.h"
|
| |
+ +
|
| |
+ +#ifdef PNG_READ_SUPPORTED
|
| |
+ +
|
| |
+ +#if PNG_POWERPC_VSX_OPT > 0
|
| |
+ +#ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED /* Do run-time checks */
|
| |
+ +/* WARNING: it is strongly recommended that you do not build libpng with
|
| |
+ + * run-time checks for CPU features if at all possible. In the case of the PowerPC
|
| |
+ + * VSX instructions there is no processor-specific way of detecting the
|
| |
+ + * presence of the required support, therefore run-time detection is extremely
|
| |
+ + * OS specific.
|
| |
+ + *
|
| |
+ + * You may set the macro PNG_POWERPC_VSX_FILE to the file name of file containing
|
| |
+ + * a fragment of C source code which defines the png_have_vsx function. There
|
| |
+ + * are a number of implementations in contrib/powerpc-vsx, but the only one that
|
| |
+ + * has partial support is contrib/powerpc-vsx/linux.c - a generic Linux
|
| |
+ + * implementation which reads /proc/cpufino.
|
| |
+ + */
|
| |
+ +#ifndef PNG_POWERPC_VSX_FILE
|
| |
+ +# ifdef __linux__
|
| |
+ +# define PNG_POWERPC_VSX_FILE "contrib/powerpc-vsx/linux_aux.c"
|
| |
+ +# endif
|
| |
+ +#endif
|
| |
+ +
|
| |
+ +#ifdef PNG_POWERPC_VSX_FILE
|
| |
+ +
|
| |
+ +#include <signal.h> /* for sig_atomic_t */
|
| |
+ +static int png_have_vsx(png_structp png_ptr);
|
| |
+ +#include PNG_POWERPC_VSX_FILE
|
| |
+ +
|
| |
+ +#else /* PNG_POWERPC_VSX_FILE */
|
| |
+ +# error "PNG_POWERPC_VSX_FILE undefined: no support for run-time POWERPC VSX checks"
|
| |
+ +#endif /* PNG_POWERPC_VSX_FILE */
|
| |
+ +#endif /* PNG_POWERPC_VSX_CHECK_SUPPORTED */
|
| |
+ +
|
| |
+ +void
|
| |
+ +png_init_filter_functions_vsx(png_structp pp, unsigned int bpp)
|
| |
+ +{
|
| |
+ + /* The switch statement is compiled in for POWERPC_VSX_API, the call to
|
| |
+ + * png_have_vsx is compiled in for POWERPC_VSX_CHECK. If both are defined
|
| |
+ + * the check is only performed if the API has not set the PowerPC option on
|
| |
+ + * or off explicitly. In this case the check controls what happens.
|
| |
+ + */
|
| |
+ +
|
| |
+ +#ifdef PNG_POWERPC_VSX_API_SUPPORTED
|
| |
+ + switch ((pp->options >> PNG_POWERPC_VSX) & 3)
|
| |
+ + {
|
| |
+ + case PNG_OPTION_UNSET:
|
| |
+ + /* Allow the run-time check to execute if it has been enabled -
|
| |
+ + * thus both API and CHECK can be turned on. If it isn't supported
|
| |
+ + * this case will fall through to the 'default' below, which just
|
| |
+ + * returns.
|
| |
+ + */
|
| |
+ +#endif /* PNG_POWERPC_VSX_API_SUPPORTED */
|
| |
+ +#ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED
|
| |
+ + {
|
| |
+ + static volatile sig_atomic_t no_vsx = -1; /* not checked */
|
| |
+ +
|
| |
+ + if (no_vsx < 0)
|
| |
+ + no_vsx = !png_have_vsx(pp);
|
| |
+ +
|
| |
+ + if (no_vsx)
|
| |
+ + return;
|
| |
+ + }
|
| |
+ +#ifdef PNG_POWERPC_VSX_API_SUPPORTED
|
| |
+ + break;
|
| |
+ +#endif
|
| |
+ +#endif /* PNG_POWERPC_VSX_CHECK_SUPPORTED */
|
| |
+ +
|
| |
+ +#ifdef PNG_POWERPC_VSX_API_SUPPORTED
|
| |
+ + default: /* OFF or INVALID */
|
| |
+ + return;
|
| |
+ +
|
| |
+ + case PNG_OPTION_ON:
|
| |
+ + /* Option turned on */
|
| |
+ + break;
|
| |
+ + }
|
| |
+ +#endif
|
| |
+ +
|
| |
+ + /* IMPORTANT: any new internal functions used here must be declared using
|
| |
+ + * PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the
|
| |
+ + * 'prefix' option to configure works:
|
| |
+ + *
|
| |
+ + * ./configure --with-libpng-prefix=foobar_
|
| |
+ + *
|
| |
+ + * Verify you have got this right by running the above command, doing a build
|
| |
+ + * and examining pngprefix.h; it must contain a #define for every external
|
| |
+ + * function you add. (Notice that this happens automatically for the
|
| |
+ + * initialization function.)
|
| |
+ + */
|
| |
+ + pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_vsx;
|
| |
+ +
|
| |
+ + if (bpp == 3)
|
| |
+ + {
|
| |
+ + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vsx;
|
| |
+ + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_vsx;
|
| |
+ + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_vsx;
|
| |
+ + }
|
| |
+ +
|
| |
+ + else if (bpp == 4)
|
| |
+ + {
|
| |
+ + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vsx;
|
| |
+ + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_vsx;
|
| |
+ + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_vsx;
|
| |
+ + }
|
| |
+ +}
|
| |
+ +#endif /* PNG_POWERPC_VSX_OPT > 0 */
|
| |
+ +#endif /* READ */
|
| |
+ --- a/third_party/libpng/BUILD.gn
|
| |
+ +++ b/third_party/libpng/BUILD.gn
|
| |
+ @@ -89,6 +91,11 @@ source_set("libpng_sources") {
|
| |
+ "mips/filter_msa_intrinsics.c",
|
| |
+ "mips/mips_init.c",
|
| |
+ ]
|
| |
+ + } else if (current_cpu == "ppc64") {
|
| |
+ + sources += [
|
| |
+ + "powerpc/filter_vsx_intrinsics.c",
|
| |
+ + "powerpc/powerpc_init.c",
|
| |
+ + ]
|
| |
+ }
|
| |
+
|
| |
+ configs -= [ "//build/config/compiler:chromium_code" ]
|
| |
DO NOT MERGE! - THIS PULL REQUEST IS STILL IN TESTING!
Changelog
Notes
I do not know how long it would take for the CI to build the ppc64le so my intention is to leave this PR open and iron out all rough edges. Once ready, I will let the core team know