b3eda9b
commit 81d9832226d6e3d1ee78ee3133189d7b520e7eea
b3eda9b
Author: Julian Seward <jseward@acm.org>
b3eda9b
Date:   Tue Nov 20 11:36:53 2018 +0100
b3eda9b
b3eda9b
    ppc front end: use new IROps added in 42719898.
b3eda9b
    
b3eda9b
    This pertains to bug 386945.
b3eda9b
    
b3eda9b
    VEX/priv/guest_ppc_toIR.c:
b3eda9b
    
b3eda9b
    gen_POPCOUNT: use Iop_PopCount{32,64} where possible.
b3eda9b
    
b3eda9b
    gen_vpopcntd_mode32: use Iop_PopCount32.
b3eda9b
    
b3eda9b
    for cntlz{w,d}, use Iop_CtzNat{32,64}.
b3eda9b
    
b3eda9b
    gen_byterev32: use Iop_Reverse8sIn32_x1 instead of lengthy sequence.
b3eda9b
    
b3eda9b
    verbose_Clz32: remove (was unused anyway).
b3eda9b
b3eda9b
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
b3eda9b
index cb1cae1..8977d4f 100644
b3eda9b
--- a/VEX/priv/guest_ppc_toIR.c
b3eda9b
+++ b/VEX/priv/guest_ppc_toIR.c
b3eda9b
@@ -1595,7 +1595,8 @@ typedef enum {
b3eda9b
 /* Generate an IR sequence to do a popcount operation on the supplied
b3eda9b
    IRTemp, and return a new IRTemp holding the result.  'ty' may be
b3eda9b
    Ity_I32 or Ity_I64 only. */
b3eda9b
-static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_type )
b3eda9b
+static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src,
b3eda9b
+                             _popcount_data_type data_type )
b3eda9b
 {
b3eda9b
   /* Do count across 2^data_type bits,
b3eda9b
      byte:        data_type = 3
b3eda9b
@@ -1611,6 +1612,22 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
b3eda9b
 
b3eda9b
    vassert(ty == Ity_I64 || ty == Ity_I32);
b3eda9b
 
b3eda9b
+   // Use a single IROp in cases where we can.
b3eda9b
+
b3eda9b
+   if (ty == Ity_I64 && data_type == DWORD) {
b3eda9b
+      IRTemp res = newTemp(Ity_I64);
b3eda9b
+      assign(res, unop(Iop_PopCount64, mkexpr(src)));
b3eda9b
+      return res;
b3eda9b
+   }
b3eda9b
+
b3eda9b
+   if (ty == Ity_I32 && data_type == WORD) {
b3eda9b
+      IRTemp res = newTemp(Ity_I32);
b3eda9b
+      assign(res, unop(Iop_PopCount32, mkexpr(src)));
b3eda9b
+      return res;
b3eda9b
+   }
b3eda9b
+
b3eda9b
+   // For the rest, we have to do it the slow way.
b3eda9b
+
b3eda9b
    if (ty == Ity_I32) {
b3eda9b
 
b3eda9b
       for (idx = 0; idx < WORD; idx++) {
b3eda9b
@@ -1638,7 +1655,7 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
b3eda9b
       return nyu;
b3eda9b
    }
b3eda9b
 
b3eda9b
-// else, ty == Ity_I64
b3eda9b
+   // else, ty == Ity_I64
b3eda9b
    vassert(mode64);
b3eda9b
 
b3eda9b
    for (i = 0; i < DWORD; i++) {
b3eda9b
@@ -1670,52 +1687,15 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
b3eda9b
  */
b3eda9b
 static IRTemp gen_vpopcntd_mode32 ( IRTemp src1, IRTemp src2 )
b3eda9b
 {
b3eda9b
-   Int i, shift[6];
b3eda9b
-   IRTemp mask[6];
b3eda9b
-   IRTemp old = IRTemp_INVALID;
b3eda9b
-   IRTemp nyu1 = IRTemp_INVALID;
b3eda9b
-   IRTemp nyu2 = IRTemp_INVALID;
b3eda9b
    IRTemp retval = newTemp(Ity_I64);
b3eda9b
 
b3eda9b
    vassert(!mode64);
b3eda9b
 
b3eda9b
-   for (i = 0; i < WORD; i++) {
b3eda9b
-      mask[i]  = newTemp(Ity_I32);
b3eda9b
-      shift[i] = 1 << i;
b3eda9b
-   }
b3eda9b
-   assign(mask[0], mkU32(0x55555555));
b3eda9b
-   assign(mask[1], mkU32(0x33333333));
b3eda9b
-   assign(mask[2], mkU32(0x0F0F0F0F));
b3eda9b
-   assign(mask[3], mkU32(0x00FF00FF));
b3eda9b
-   assign(mask[4], mkU32(0x0000FFFF));
b3eda9b
-   old = src1;
b3eda9b
-   for (i = 0; i < WORD; i++) {
b3eda9b
-      nyu1 = newTemp(Ity_I32);
b3eda9b
-      assign(nyu1,
b3eda9b
-             binop(Iop_Add32,
b3eda9b
-                   binop(Iop_And32,
b3eda9b
-                         mkexpr(old),
b3eda9b
-                         mkexpr(mask[i])),
b3eda9b
-                   binop(Iop_And32,
b3eda9b
-                         binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
b3eda9b
-                         mkexpr(mask[i]))));
b3eda9b
-      old = nyu1;
b3eda9b
-   }
b3eda9b
-
b3eda9b
-   old = src2;
b3eda9b
-   for (i = 0; i < WORD; i++) {
b3eda9b
-      nyu2 = newTemp(Ity_I32);
b3eda9b
-      assign(nyu2,
b3eda9b
-             binop(Iop_Add32,
b3eda9b
-                   binop(Iop_And32,
b3eda9b
-                         mkexpr(old),
b3eda9b
-                         mkexpr(mask[i])),
b3eda9b
-                   binop(Iop_And32,
b3eda9b
-                         binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
b3eda9b
-                         mkexpr(mask[i]))));
b3eda9b
-      old = nyu2;
b3eda9b
-   }
b3eda9b
-   assign(retval, unop(Iop_32Uto64, binop(Iop_Add32, mkexpr(nyu1), mkexpr(nyu2))));
b3eda9b
+   assign(retval,
b3eda9b
+          unop(Iop_32Uto64,
b3eda9b
+               binop(Iop_Add32,
b3eda9b
+                     unop(Iop_PopCount32, mkexpr(src1)),
b3eda9b
+                     unop(Iop_PopCount32, mkexpr(src2)))));
b3eda9b
    return retval;
b3eda9b
 }
b3eda9b
 
b3eda9b
@@ -5715,7 +5695,7 @@ static Bool dis_modulo_int ( UInt theInstr )
b3eda9b
                 rA_address, rS_address);
b3eda9b
 
b3eda9b
             assign( rS, getIReg( rS_address ) );
b3eda9b
-            assign( result, unop( Iop_Ctz32,
b3eda9b
+            assign( result, unop( Iop_CtzNat32,
b3eda9b
                                   unop( Iop_64to32, mkexpr( rS ) ) ) );
b3eda9b
             assign( rA, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( result ) ) );
b3eda9b
 
b3eda9b
@@ -5746,7 +5726,7 @@ static Bool dis_modulo_int ( UInt theInstr )
b3eda9b
                 rA_address, rS_address);
b3eda9b
 
b3eda9b
             assign( rS, getIReg( rS_address ) );
b3eda9b
-            assign( rA, unop( Iop_Ctz64, mkexpr( rS ) ) );
b3eda9b
+            assign( rA, unop( Iop_CtzNat64, mkexpr( rS ) ) );
b3eda9b
 
b3eda9b
             if ( flag_rC == 1 )
b3eda9b
                set_CR0( mkexpr( rA ) );
b3eda9b
@@ -6307,7 +6287,6 @@ static Bool dis_int_logic ( UInt theInstr )
b3eda9b
    IRTemp rS     = newTemp(ty);
b3eda9b
    IRTemp rA     = newTemp(ty);
b3eda9b
    IRTemp rB     = newTemp(ty);
b3eda9b
-   IRExpr* irx;
b3eda9b
    Bool do_rc    = False;
b3eda9b
 
b3eda9b
    assign( rS, getIReg(rS_addr) );
b3eda9b
@@ -6404,26 +6383,16 @@ static Bool dis_int_logic ( UInt theInstr )
b3eda9b
          break;
b3eda9b
          
b3eda9b
       case 0x01A: { // cntlzw (Count Leading Zeros Word, PPC32 p371)
b3eda9b
-         IRExpr* lo32;
b3eda9b
          if (rB_addr!=0) {
b3eda9b
             vex_printf("dis_int_logic(ppc)(cntlzw,rB_addr)\n");
b3eda9b
             return False;
b3eda9b
          }
b3eda9b
-         DIP("cntlzw%s r%u,r%u\n",
b3eda9b
-             flag_rC ? ".":"", rA_addr, rS_addr);
b3eda9b
+         DIP("cntlzw%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
b3eda9b
          
b3eda9b
          // mode64: count in low word only
b3eda9b
-         lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
b3eda9b
-         
b3eda9b
-         // Iop_Clz32 undefined for arg==0, so deal with that case:
b3eda9b
-         irx =  binop(Iop_CmpNE32, lo32, mkU32(0));
b3eda9b
-         assign(rA, mkWidenFrom32(ty,
b3eda9b
-                         IRExpr_ITE( irx,
b3eda9b
-                                     unop(Iop_Clz32, lo32),
b3eda9b
-                                     mkU32(32)),
b3eda9b
-                         False));
b3eda9b
-
b3eda9b
-         // TODO: alternatively: assign(rA, verbose_Clz32(rS));
b3eda9b
+         IRExpr* lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
b3eda9b
+         IRExpr* res32 = unop(Iop_ClzNat32, lo32);
b3eda9b
+         assign(rA, mode64 ? unop(Iop_32Uto64, res32) : res32);
b3eda9b
          break;
b3eda9b
       }
b3eda9b
          
b3eda9b
@@ -6521,14 +6490,8 @@ static Bool dis_int_logic ( UInt theInstr )
b3eda9b
             vex_printf("dis_int_logic(ppc)(cntlzd,rB_addr)\n");
b3eda9b
             return False;
b3eda9b
          }
b3eda9b
-         DIP("cntlzd%s r%u,r%u\n",
b3eda9b
-             flag_rC ? ".":"", rA_addr, rS_addr);
b3eda9b
-         // Iop_Clz64 undefined for arg==0, so deal with that case:
b3eda9b
-         irx =  binop(Iop_CmpNE64, mkexpr(rS), mkU64(0));
b3eda9b
-         assign(rA, IRExpr_ITE( irx,
b3eda9b
-                                unop(Iop_Clz64, mkexpr(rS)),
b3eda9b
-                                mkU64(64) ));
b3eda9b
-         // TODO: alternatively: assign(rA, verbose_Clz64(rS));
b3eda9b
+         DIP("cntlzd%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
b3eda9b
+         assign(rA, unop(Iop_ClzNat64, mkexpr(rS)));
b3eda9b
          break;
b3eda9b
 
b3eda9b
       case 0x1FC: // cmpb (Power6: compare bytes)
b3eda9b
@@ -6574,8 +6537,9 @@ static Bool dis_int_logic ( UInt theInstr )
b3eda9b
          putFReg( rS_addr, mkexpr(frA));
b3eda9b
          return True;
b3eda9b
       }
b3eda9b
-      case 0x1FA: // popcntd (population count doubleword
b3eda9b
+      case 0x1FA: // popcntd (population count doubleword)
b3eda9b
       {
b3eda9b
+          vassert(mode64);
b3eda9b
     	  DIP("popcntd r%u,r%u\n", rA_addr, rS_addr);
b3eda9b
     	  IRTemp result = gen_POPCOUNT(ty, rS, DWORD);
b3eda9b
     	  putIReg( rA_addr, mkexpr(result) );
b3eda9b
@@ -9154,18 +9118,7 @@ static Bool dis_int_shift ( UInt theInstr )
b3eda9b
 static IRExpr* /* :: Ity_I32 */ gen_byterev32 ( IRTemp t )
b3eda9b
 {
b3eda9b
    vassert(typeOfIRTemp(irsb->tyenv, t) == Ity_I32);
b3eda9b
-   return
b3eda9b
-      binop(Iop_Or32,
b3eda9b
-         binop(Iop_Shl32, mkexpr(t), mkU8(24)),
b3eda9b
-      binop(Iop_Or32,
b3eda9b
-         binop(Iop_And32, binop(Iop_Shl32, mkexpr(t), mkU8(8)), 
b3eda9b
-                          mkU32(0x00FF0000)),
b3eda9b
-      binop(Iop_Or32,
b3eda9b
-         binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(8)),
b3eda9b
-                          mkU32(0x0000FF00)),
b3eda9b
-         binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(24)),
b3eda9b
-                          mkU32(0x000000FF) )
b3eda9b
-      )));
b3eda9b
+   return unop(Iop_Reverse8sIn32_x1, mkexpr(t));
b3eda9b
 }
b3eda9b
 
b3eda9b
 /* Generates code to swap the byte order in the lower half of an Ity_I32,
b3eda9b
@@ -9225,6 +9178,10 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
b3eda9b
 
b3eda9b
       case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
b3eda9b
       {
b3eda9b
+         // JRS FIXME:
b3eda9b
+         // * is the host_endness conditional below actually necessary?
b3eda9b
+         // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
b3eda9b
+         //   That would be a lot more efficient.
b3eda9b
          IRExpr * nextAddr;
b3eda9b
          IRTemp w3 = newTemp( Ity_I32 );
b3eda9b
          IRTemp w4 = newTemp( Ity_I32 );
b3eda9b
@@ -17056,8 +17013,8 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
b3eda9b
       case 0x7C3:    // vpopcntd
b3eda9b
       {
b3eda9b
          if (mode64) {
b3eda9b
-            /* Break vector into 64-bit double words and do the population count
b3eda9b
-             * on each double word.
b3eda9b
+            /* Break vector into 64-bit double words and do the population
b3eda9b
+               count on each double word.
b3eda9b
              */
b3eda9b
             IRType ty = Ity_I64;
b3eda9b
             IRTemp bits0_63   = newTemp(Ity_I64);
b3eda9b
@@ -17077,15 +17034,16 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
b3eda9b
                                       mkexpr( cnt_bits0_63 ) ) );
b3eda9b
          } else {
b3eda9b
             /* Break vector into 32-bit words and do the population count
b3eda9b
-             * on each doubleword.
b3eda9b
+               on each 32-bit word.
b3eda9b
              */
b3eda9b
             IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
b3eda9b
             bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
b3eda9b
-            IRTemp cnt_bits0_63   = newTemp(Ity_I64);
b3eda9b
+            IRTemp cnt_bits0_63    = newTemp(Ity_I64);
b3eda9b
             IRTemp cnt_bits64_127  = newTemp(Ity_I64);
b3eda9b
 
b3eda9b
             DIP("vpopcntd v%d,v%d\n", vRT_addr, vRB_addr);
b3eda9b
-            breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
b3eda9b
+            breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95,
b3eda9b
+                                         &bits32_63, &bits0_31 );
b3eda9b
 
b3eda9b
             cnt_bits0_63   = gen_vpopcntd_mode32(bits0_31, bits32_63);
b3eda9b
             cnt_bits64_127 = gen_vpopcntd_mode32(bits64_95, bits96_127);
b3eda9b
@@ -29103,10 +29061,12 @@ DisResult disInstr_PPC_WRK (
b3eda9b
 
b3eda9b
       /* Miscellaneous ISA 2.06 instructions */
b3eda9b
       case 0x1FA: // popcntd
b3eda9b
+         if (!mode64) goto decode_failure;
b3eda9b
+         /* else fallthru */
b3eda9b
       case 0x17A: // popcntw
b3eda9b
       case 0x7A:  // popcntb
b3eda9b
-	  if (dis_int_logic( theInstr )) goto decode_success;
b3eda9b
-    	  goto decode_failure;
b3eda9b
+         if (dis_int_logic( theInstr )) goto decode_success;
b3eda9b
+         goto decode_failure;
b3eda9b
 
b3eda9b
       case 0x0FC: // bpermd
b3eda9b
          if (!mode64) goto decode_failure;
b3eda9b
@@ -29669,94 +29629,6 @@ DisResult disInstr_PPC ( IRSB*        irsb_IN,
b3eda9b
    return dres;
b3eda9b
 }
b3eda9b
 
b3eda9b
-
b3eda9b
-/*------------------------------------------------------------*/
b3eda9b
-/*--- Unused stuff                                         ---*/
b3eda9b
-/*------------------------------------------------------------*/
b3eda9b
-
b3eda9b
-///* A potentially more memcheck-friendly implementation of Clz32, with
b3eda9b
-//   the boundary case Clz32(0) = 32, which is what ppc requires. */
b3eda9b
-//
b3eda9b
-//static IRExpr* /* :: Ity_I32 */ verbose_Clz32 ( IRTemp arg )
b3eda9b
-//{
b3eda9b
-//   /* Welcome ... to SSA R Us. */
b3eda9b
-//   IRTemp n1  = newTemp(Ity_I32);
b3eda9b
-//   IRTemp n2  = newTemp(Ity_I32);
b3eda9b
-//   IRTemp n3  = newTemp(Ity_I32);
b3eda9b
-//   IRTemp n4  = newTemp(Ity_I32);
b3eda9b
-//   IRTemp n5  = newTemp(Ity_I32);
b3eda9b
-//   IRTemp n6  = newTemp(Ity_I32);
b3eda9b
-//   IRTemp n7  = newTemp(Ity_I32);
b3eda9b
-//   IRTemp n8  = newTemp(Ity_I32);
b3eda9b
-//   IRTemp n9  = newTemp(Ity_I32);
b3eda9b
-//   IRTemp n10 = newTemp(Ity_I32);
b3eda9b
-//   IRTemp n11 = newTemp(Ity_I32);
b3eda9b
-//   IRTemp n12 = newTemp(Ity_I32);
b3eda9b
-//
b3eda9b
-//   /* First, propagate the most significant 1-bit into all lower
b3eda9b
-//      positions in the word. */
b3eda9b
-//   /* unsigned int clz ( unsigned int n )
b3eda9b
-//      {
b3eda9b
-//         n |= (n >> 1);
b3eda9b
-//         n |= (n >> 2);
b3eda9b
-//         n |= (n >> 4);
b3eda9b
-//         n |= (n >> 8);
b3eda9b
-//         n |= (n >> 16);
b3eda9b
-//         return bitcount(~n);
b3eda9b
-//      }
b3eda9b
-//   */
b3eda9b
-//   assign(n1, mkexpr(arg));
b3eda9b
-//   assign(n2, binop(Iop_Or32, mkexpr(n1), binop(Iop_Shr32, mkexpr(n1), mkU8(1))));
b3eda9b
-//   assign(n3, binop(Iop_Or32, mkexpr(n2), binop(Iop_Shr32, mkexpr(n2), mkU8(2))));
b3eda9b
-//   assign(n4, binop(Iop_Or32, mkexpr(n3), binop(Iop_Shr32, mkexpr(n3), mkU8(4))));
b3eda9b
-//   assign(n5, binop(Iop_Or32, mkexpr(n4), binop(Iop_Shr32, mkexpr(n4), mkU8(8))));
b3eda9b
-//   assign(n6, binop(Iop_Or32, mkexpr(n5), binop(Iop_Shr32, mkexpr(n5), mkU8(16))));
b3eda9b
-//   /* This gives a word of the form 0---01---1.  Now invert it, giving
b3eda9b
-//      a word of the form 1---10---0, then do a population-count idiom
b3eda9b
-//      (to count the 1s, which is the number of leading zeroes, or 32
b3eda9b
-//      if the original word was 0. */
b3eda9b
-//   assign(n7, unop(Iop_Not32, mkexpr(n6)));
b3eda9b
-//
b3eda9b
-//   /* unsigned int bitcount ( unsigned int n )
b3eda9b
-//      {
b3eda9b
-//         n = n - ((n >> 1) & 0x55555555);
b3eda9b
-//         n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
b3eda9b
-//         n = (n + (n >> 4)) & 0x0F0F0F0F;
b3eda9b
-//         n = n + (n >> 8);
b3eda9b
-//         n = (n + (n >> 16)) & 0x3F;
b3eda9b
-//         return n;
b3eda9b
-//      }
b3eda9b
-//   */
b3eda9b
-//   assign(n8, 
b3eda9b
-//          binop(Iop_Sub32, 
b3eda9b
-//                mkexpr(n7),  
b3eda9b
-//                binop(Iop_And32, 
b3eda9b
-//                      binop(Iop_Shr32, mkexpr(n7), mkU8(1)),
b3eda9b
-//                      mkU32(0x55555555))));
b3eda9b
-//   assign(n9,
b3eda9b
-//          binop(Iop_Add32,
b3eda9b
-//                binop(Iop_And32, mkexpr(n8), mkU32(0x33333333)),
b3eda9b
-//                binop(Iop_And32,
b3eda9b
-//                      binop(Iop_Shr32, mkexpr(n8), mkU8(2)),
b3eda9b
-//                      mkU32(0x33333333))));
b3eda9b
-//   assign(n10,
b3eda9b
-//          binop(Iop_And32,
b3eda9b
-//                binop(Iop_Add32, 
b3eda9b
-//                      mkexpr(n9), 
b3eda9b
-//                      binop(Iop_Shr32, mkexpr(n9), mkU8(4))),
b3eda9b
-//                mkU32(0x0F0F0F0F)));
b3eda9b
-//   assign(n11,
b3eda9b
-//          binop(Iop_Add32,
b3eda9b
-//                mkexpr(n10),
b3eda9b
-//                binop(Iop_Shr32, mkexpr(n10), mkU8(8))));
b3eda9b
-//   assign(n12,
b3eda9b
-//          binop(Iop_Add32,
b3eda9b
-//                mkexpr(n11),
b3eda9b
-//                binop(Iop_Shr32, mkexpr(n11), mkU8(16))));
b3eda9b
-//   return
b3eda9b
-//      binop(Iop_And32, mkexpr(n12), mkU32(0x3F));
b3eda9b
-//}
b3eda9b
-
b3eda9b
 /*--------------------------------------------------------------------*/
b3eda9b
 /*--- end                                         guest_ppc_toIR.c ---*/
b3eda9b
 /*--------------------------------------------------------------------*/