commit 81d9832226d6e3d1ee78ee3133189d7b520e7eea Author: Julian Seward Date: Tue Nov 20 11:36:53 2018 +0100 ppc front end: use new IROps added in 42719898. This pertains to bug 386945. VEX/priv/guest_ppc_toIR.c: gen_POPCOUNT: use Iop_PopCount{32,64} where possible. gen_vpopcntd_mode32: use Iop_PopCount32. for cntlz{w,d}, use Iop_CtzNat{32,64}. gen_byterev32: use Iop_Reverse8sIn32_x1 instead of lengthy sequence. verbose_Clz32: remove (was unused anyway). diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index cb1cae1..8977d4f 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -1595,7 +1595,8 @@ typedef enum { /* Generate an IR sequence to do a popcount operation on the supplied IRTemp, and return a new IRTemp holding the result. 'ty' may be Ity_I32 or Ity_I64 only. */ -static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_type ) +static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, + _popcount_data_type data_type ) { /* Do count across 2^data_type bits, byte: data_type = 3 @@ -1611,6 +1612,22 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ vassert(ty == Ity_I64 || ty == Ity_I32); + // Use a single IROp in cases where we can. + + if (ty == Ity_I64 && data_type == DWORD) { + IRTemp res = newTemp(Ity_I64); + assign(res, unop(Iop_PopCount64, mkexpr(src))); + return res; + } + + if (ty == Ity_I32 && data_type == WORD) { + IRTemp res = newTemp(Ity_I32); + assign(res, unop(Iop_PopCount32, mkexpr(src))); + return res; + } + + // For the rest, we have to do it the slow way. + if (ty == Ity_I32) { for (idx = 0; idx < WORD; idx++) { @@ -1638,7 +1655,7 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ return nyu; } -// else, ty == Ity_I64 + // else, ty == Ity_I64 vassert(mode64); for (i = 0; i < DWORD; i++) { @@ -1670,52 +1687,15 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ */ static IRTemp gen_vpopcntd_mode32 ( IRTemp src1, IRTemp src2 ) { - Int i, shift[6]; - IRTemp mask[6]; - IRTemp old = IRTemp_INVALID; - IRTemp nyu1 = IRTemp_INVALID; - IRTemp nyu2 = IRTemp_INVALID; IRTemp retval = newTemp(Ity_I64); vassert(!mode64); - for (i = 0; i < WORD; i++) { - mask[i] = newTemp(Ity_I32); - shift[i] = 1 << i; - } - assign(mask[0], mkU32(0x55555555)); - assign(mask[1], mkU32(0x33333333)); - assign(mask[2], mkU32(0x0F0F0F0F)); - assign(mask[3], mkU32(0x00FF00FF)); - assign(mask[4], mkU32(0x0000FFFF)); - old = src1; - for (i = 0; i < WORD; i++) { - nyu1 = newTemp(Ity_I32); - assign(nyu1, - binop(Iop_Add32, - binop(Iop_And32, - mkexpr(old), - mkexpr(mask[i])), - binop(Iop_And32, - binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])), - mkexpr(mask[i])))); - old = nyu1; - } - - old = src2; - for (i = 0; i < WORD; i++) { - nyu2 = newTemp(Ity_I32); - assign(nyu2, - binop(Iop_Add32, - binop(Iop_And32, - mkexpr(old), - mkexpr(mask[i])), - binop(Iop_And32, - binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])), - mkexpr(mask[i])))); - old = nyu2; - } - assign(retval, unop(Iop_32Uto64, binop(Iop_Add32, mkexpr(nyu1), mkexpr(nyu2)))); + assign(retval, + unop(Iop_32Uto64, + binop(Iop_Add32, + unop(Iop_PopCount32, mkexpr(src1)), + unop(Iop_PopCount32, mkexpr(src2))))); return retval; } @@ -5715,7 +5695,7 @@ static Bool dis_modulo_int ( UInt theInstr ) rA_address, rS_address); assign( rS, getIReg( rS_address ) ); - assign( result, unop( Iop_Ctz32, + assign( result, unop( Iop_CtzNat32, unop( Iop_64to32, mkexpr( rS ) ) ) ); assign( rA, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( result ) ) ); @@ -5746,7 +5726,7 @@ static Bool dis_modulo_int ( UInt theInstr ) rA_address, rS_address); assign( rS, getIReg( rS_address ) ); - assign( rA, unop( Iop_Ctz64, mkexpr( rS ) ) ); + assign( rA, unop( Iop_CtzNat64, mkexpr( rS ) ) ); if ( flag_rC == 1 ) set_CR0( mkexpr( rA ) ); @@ -6307,7 +6287,6 @@ static Bool dis_int_logic ( UInt theInstr ) IRTemp rS = newTemp(ty); IRTemp rA = newTemp(ty); IRTemp rB = newTemp(ty); - IRExpr* irx; Bool do_rc = False; assign( rS, getIReg(rS_addr) ); @@ -6404,26 +6383,16 @@ static Bool dis_int_logic ( UInt theInstr ) break; case 0x01A: { // cntlzw (Count Leading Zeros Word, PPC32 p371) - IRExpr* lo32; if (rB_addr!=0) { vex_printf("dis_int_logic(ppc)(cntlzw,rB_addr)\n"); return False; } - DIP("cntlzw%s r%u,r%u\n", - flag_rC ? ".":"", rA_addr, rS_addr); + DIP("cntlzw%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr); // mode64: count in low word only - lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS); - - // Iop_Clz32 undefined for arg==0, so deal with that case: - irx = binop(Iop_CmpNE32, lo32, mkU32(0)); - assign(rA, mkWidenFrom32(ty, - IRExpr_ITE( irx, - unop(Iop_Clz32, lo32), - mkU32(32)), - False)); - - // TODO: alternatively: assign(rA, verbose_Clz32(rS)); + IRExpr* lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS); + IRExpr* res32 = unop(Iop_ClzNat32, lo32); + assign(rA, mode64 ? unop(Iop_32Uto64, res32) : res32); break; } @@ -6521,14 +6490,8 @@ static Bool dis_int_logic ( UInt theInstr ) vex_printf("dis_int_logic(ppc)(cntlzd,rB_addr)\n"); return False; } - DIP("cntlzd%s r%u,r%u\n", - flag_rC ? ".":"", rA_addr, rS_addr); - // Iop_Clz64 undefined for arg==0, so deal with that case: - irx = binop(Iop_CmpNE64, mkexpr(rS), mkU64(0)); - assign(rA, IRExpr_ITE( irx, - unop(Iop_Clz64, mkexpr(rS)), - mkU64(64) )); - // TODO: alternatively: assign(rA, verbose_Clz64(rS)); + DIP("cntlzd%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr); + assign(rA, unop(Iop_ClzNat64, mkexpr(rS))); break; case 0x1FC: // cmpb (Power6: compare bytes) @@ -6574,8 +6537,9 @@ static Bool dis_int_logic ( UInt theInstr ) putFReg( rS_addr, mkexpr(frA)); return True; } - case 0x1FA: // popcntd (population count doubleword + case 0x1FA: // popcntd (population count doubleword) { + vassert(mode64); DIP("popcntd r%u,r%u\n", rA_addr, rS_addr); IRTemp result = gen_POPCOUNT(ty, rS, DWORD); putIReg( rA_addr, mkexpr(result) ); @@ -9154,18 +9118,7 @@ static Bool dis_int_shift ( UInt theInstr ) static IRExpr* /* :: Ity_I32 */ gen_byterev32 ( IRTemp t ) { vassert(typeOfIRTemp(irsb->tyenv, t) == Ity_I32); - return - binop(Iop_Or32, - binop(Iop_Shl32, mkexpr(t), mkU8(24)), - binop(Iop_Or32, - binop(Iop_And32, binop(Iop_Shl32, mkexpr(t), mkU8(8)), - mkU32(0x00FF0000)), - binop(Iop_Or32, - binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(8)), - mkU32(0x0000FF00)), - binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(24)), - mkU32(0x000000FF) ) - ))); + return unop(Iop_Reverse8sIn32_x1, mkexpr(t)); } /* Generates code to swap the byte order in the lower half of an Ity_I32, @@ -9225,6 +9178,10 @@ static Bool dis_int_ldst_rev ( UInt theInstr ) case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed) { + // JRS FIXME: + // * is the host_endness conditional below actually necessary? + // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1? + // That would be a lot more efficient. IRExpr * nextAddr; IRTemp w3 = newTemp( Ity_I32 ); IRTemp w4 = newTemp( Ity_I32 ); @@ -17056,8 +17013,8 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 ) case 0x7C3: // vpopcntd { if (mode64) { - /* Break vector into 64-bit double words and do the population count - * on each double word. + /* Break vector into 64-bit double words and do the population + count on each double word. */ IRType ty = Ity_I64; IRTemp bits0_63 = newTemp(Ity_I64); @@ -17077,15 +17034,16 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 ) mkexpr( cnt_bits0_63 ) ) ); } else { /* Break vector into 32-bit words and do the population count - * on each doubleword. + on each 32-bit word. */ IRTemp bits0_31, bits32_63, bits64_95, bits96_127; bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID; - IRTemp cnt_bits0_63 = newTemp(Ity_I64); + IRTemp cnt_bits0_63 = newTemp(Ity_I64); IRTemp cnt_bits64_127 = newTemp(Ity_I64); DIP("vpopcntd v%d,v%d\n", vRT_addr, vRB_addr); - breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 ); + breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, + &bits32_63, &bits0_31 ); cnt_bits0_63 = gen_vpopcntd_mode32(bits0_31, bits32_63); cnt_bits64_127 = gen_vpopcntd_mode32(bits64_95, bits96_127); @@ -29103,10 +29061,12 @@ DisResult disInstr_PPC_WRK ( /* Miscellaneous ISA 2.06 instructions */ case 0x1FA: // popcntd + if (!mode64) goto decode_failure; + /* else fallthru */ case 0x17A: // popcntw case 0x7A: // popcntb - if (dis_int_logic( theInstr )) goto decode_success; - goto decode_failure; + if (dis_int_logic( theInstr )) goto decode_success; + goto decode_failure; case 0x0FC: // bpermd if (!mode64) goto decode_failure; @@ -29669,94 +29629,6 @@ DisResult disInstr_PPC ( IRSB* irsb_IN, return dres; } - -/*------------------------------------------------------------*/ -/*--- Unused stuff ---*/ -/*------------------------------------------------------------*/ - -///* A potentially more memcheck-friendly implementation of Clz32, with -// the boundary case Clz32(0) = 32, which is what ppc requires. */ -// -//static IRExpr* /* :: Ity_I32 */ verbose_Clz32 ( IRTemp arg ) -//{ -// /* Welcome ... to SSA R Us. */ -// IRTemp n1 = newTemp(Ity_I32); -// IRTemp n2 = newTemp(Ity_I32); -// IRTemp n3 = newTemp(Ity_I32); -// IRTemp n4 = newTemp(Ity_I32); -// IRTemp n5 = newTemp(Ity_I32); -// IRTemp n6 = newTemp(Ity_I32); -// IRTemp n7 = newTemp(Ity_I32); -// IRTemp n8 = newTemp(Ity_I32); -// IRTemp n9 = newTemp(Ity_I32); -// IRTemp n10 = newTemp(Ity_I32); -// IRTemp n11 = newTemp(Ity_I32); -// IRTemp n12 = newTemp(Ity_I32); -// -// /* First, propagate the most significant 1-bit into all lower -// positions in the word. */ -// /* unsigned int clz ( unsigned int n ) -// { -// n |= (n >> 1); -// n |= (n >> 2); -// n |= (n >> 4); -// n |= (n >> 8); -// n |= (n >> 16); -// return bitcount(~n); -// } -// */ -// assign(n1, mkexpr(arg)); -// assign(n2, binop(Iop_Or32, mkexpr(n1), binop(Iop_Shr32, mkexpr(n1), mkU8(1)))); -// assign(n3, binop(Iop_Or32, mkexpr(n2), binop(Iop_Shr32, mkexpr(n2), mkU8(2)))); -// assign(n4, binop(Iop_Or32, mkexpr(n3), binop(Iop_Shr32, mkexpr(n3), mkU8(4)))); -// assign(n5, binop(Iop_Or32, mkexpr(n4), binop(Iop_Shr32, mkexpr(n4), mkU8(8)))); -// assign(n6, binop(Iop_Or32, mkexpr(n5), binop(Iop_Shr32, mkexpr(n5), mkU8(16)))); -// /* This gives a word of the form 0---01---1. Now invert it, giving -// a word of the form 1---10---0, then do a population-count idiom -// (to count the 1s, which is the number of leading zeroes, or 32 -// if the original word was 0. */ -// assign(n7, unop(Iop_Not32, mkexpr(n6))); -// -// /* unsigned int bitcount ( unsigned int n ) -// { -// n = n - ((n >> 1) & 0x55555555); -// n = (n & 0x33333333) + ((n >> 2) & 0x33333333); -// n = (n + (n >> 4)) & 0x0F0F0F0F; -// n = n + (n >> 8); -// n = (n + (n >> 16)) & 0x3F; -// return n; -// } -// */ -// assign(n8, -// binop(Iop_Sub32, -// mkexpr(n7), -// binop(Iop_And32, -// binop(Iop_Shr32, mkexpr(n7), mkU8(1)), -// mkU32(0x55555555)))); -// assign(n9, -// binop(Iop_Add32, -// binop(Iop_And32, mkexpr(n8), mkU32(0x33333333)), -// binop(Iop_And32, -// binop(Iop_Shr32, mkexpr(n8), mkU8(2)), -// mkU32(0x33333333)))); -// assign(n10, -// binop(Iop_And32, -// binop(Iop_Add32, -// mkexpr(n9), -// binop(Iop_Shr32, mkexpr(n9), mkU8(4))), -// mkU32(0x0F0F0F0F))); -// assign(n11, -// binop(Iop_Add32, -// mkexpr(n10), -// binop(Iop_Shr32, mkexpr(n10), mkU8(8)))); -// assign(n12, -// binop(Iop_Add32, -// mkexpr(n11), -// binop(Iop_Shr32, mkexpr(n11), mkU8(16)))); -// return -// binop(Iop_And32, mkexpr(n12), mkU32(0x3F)); -//} - /*--------------------------------------------------------------------*/ /*--- end guest_ppc_toIR.c ---*/ /*--------------------------------------------------------------------*/