be4097
commit 7bdd6731f8337fd57bf91772aa1917e44239d7c2
be4097
Author: Mark Wielaard <mark@klomp.org>
be4097
Date:   Fri Dec 7 10:42:22 2018 -0500
be4097
be4097
    Implement ppc64 ldbrx as 64-bit load and Iop_Reverse8sIn64_x1.
be4097
    
be4097
    This makes it possible for memcheck to analyse the new gcc strcmp
be4097
    inlined code correctly even if the ldbrx load is partly beyond an
be4097
    addressable block.
be4097
    
be4097
    Partially resolves bug 386945.
be4097
be4097
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
be4097
index 8977d4f..a81dace 100644
be4097
--- a/VEX/priv/guest_ppc_toIR.c
be4097
+++ b/VEX/priv/guest_ppc_toIR.c
be4097
@@ -9178,24 +9178,28 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
be4097
 
be4097
       case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
be4097
       {
be4097
-         // JRS FIXME:
be4097
-         // * is the host_endness conditional below actually necessary?
be4097
-         // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
be4097
-         //   That would be a lot more efficient.
be4097
-         IRExpr * nextAddr;
be4097
-         IRTemp w3 = newTemp( Ity_I32 );
be4097
-         IRTemp w4 = newTemp( Ity_I32 );
be4097
-         DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
be4097
-         assign( w1, load( Ity_I32, mkexpr( EA ) ) );
be4097
-         assign( w2, gen_byterev32( w1 ) );
be4097
-         nextAddr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
be4097
-                           ty == Ity_I64 ? mkU64( 4 ) : mkU32( 4 ) );
be4097
-         assign( w3, load( Ity_I32, nextAddr ) );
be4097
-         assign( w4, gen_byterev32( w3 ) );
be4097
-         if (host_endness == VexEndnessLE)
be4097
-            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w2 ), mkexpr( w4 ) ) );
be4097
+         /* Caller makes sure we are only called in mode64. */
be4097
+
be4097
+         /* If we supported swapping LE/BE loads in the backend then we could
be4097
+            just load the value with the bytes reversed by doing a BE load
be4097
+            on an LE machine and a LE load on a BE machine.
be4097
+
be4097
+         IRTemp dw1 = newTemp(Ity_I64);
be4097
+         if (host_endness == VexEndnessBE)
be4097
+            assign( dw1, IRExpr_Load(Iend_LE, Ity_I64, mkexpr(EA)));
be4097
          else
be4097
-            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );
be4097
+            assign( dw1, IRExpr_Load(Iend_BE, Ity_I64, mkexpr(EA)));
be4097
+         putIReg( rD_addr, mkexpr(dw1) );
be4097
+
be4097
+         But since we currently don't we load the value as is and then
be4097
+         switch it around with Iop_Reverse8sIn64_x1. */
be4097
+
be4097
+         IRTemp dw1 = newTemp(Ity_I64);
be4097
+         IRTemp dw2 = newTemp(Ity_I64);
be4097
+         DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
be4097
+         assign( dw1, load(Ity_I64, mkexpr(EA)) );
be4097
+         assign( dw2, unop(Iop_Reverse8sIn64_x1, mkexpr(dw1)) );
be4097
+         putIReg( rD_addr, mkexpr(dw2) );
be4097
          break;
be4097
       }
be4097
 
be4097
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
be4097
index 750cf8d..4fc3eb5 100644
be4097
--- a/VEX/priv/host_ppc_isel.c
be4097
+++ b/VEX/priv/host_ppc_isel.c
be4097
@@ -2210,6 +2210,63 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
be4097
          return rr;
be4097
       }
be4097
 
be4097
+      case Iop_Reverse8sIn64_x1: {
be4097
+	 /* See Iop_Reverse8sIn32_x1, but extended to 64bit.
be4097
+            Can only be used in 64bit mode.  */
be4097
+         vassert (mode64);
be4097
+
be4097
+         HReg r_src  = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
be4097
+         HReg rr     = newVRegI(env);
be4097
+         HReg rMask  = newVRegI(env);
be4097
+         HReg rnMask = newVRegI(env);
be4097
+         HReg rtHi   = newVRegI(env);
be4097
+         HReg rtLo   = newVRegI(env);
be4097
+
be4097
+         // Copy r_src since we need to modify it
be4097
+         addInstr(env, mk_iMOVds_RR(rr, r_src));
be4097
+
be4097
+         // r = (r & 0x00FF00FF00FF00FF) << 8 | (r & 0xFF00FF00FF00FF00) >> 8
be4097
+         addInstr(env, PPCInstr_LI(rMask, 0x00FF00FF00FF00FFULL,
be4097
+                                   True/* 64bit imm*/));
be4097
+         addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
be4097
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
be4097
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
be4097
+                                     rtHi, rtHi,
be4097
+                                     PPCRH_Imm(False/*!signed imm*/, 8)));
be4097
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
be4097
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
be4097
+                                     rtLo, rtLo,
be4097
+                                     PPCRH_Imm(False/*!signed imm*/, 8)));
be4097
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
be4097
+
be4097
+         // r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16
be4097
+         addInstr(env, PPCInstr_LI(rMask, 0x0000FFFF0000FFFFULL,
be4097
+                                   True/* !64bit imm*/));
be4097
+         addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
be4097
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
be4097
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
be4097
+                                     rtHi, rtHi,
be4097
+                                     PPCRH_Imm(False/*!signed imm*/, 16)));
be4097
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
be4097
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
be4097
+                                     rtLo, rtLo,
be4097
+                                     PPCRH_Imm(False/*!signed imm*/, 16)));
be4097
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
be4097
+
be4097
+         // r = (r & 0x00000000FFFFFFFF) << 32 | (r & 0xFFFFFFFF00000000) >> 32
be4097
+         /* We don't need to mask anymore, just two more shifts and an or.  */
be4097
+         addInstr(env, mk_iMOVds_RR(rtLo, rr));
be4097
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
be4097
+                                     rtLo, rtLo,
be4097
+                                     PPCRH_Imm(False/*!signed imm*/, 32)));
be4097
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
be4097
+                                     rr, rr,
be4097
+                                     PPCRH_Imm(False/*!signed imm*/, 32)));
be4097
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rr, PPCRH_Reg(rtLo)));
be4097
+
be4097
+         return rr;
be4097
+      }
be4097
+
be4097
       case Iop_Left8:
be4097
       case Iop_Left16:
be4097
       case Iop_Left32: