b3eda9b
commit 4271989815b5fc933c1e29bc75507c2726dc3738
b3eda9b
Author: Julian Seward <jseward@acm.org>
b3eda9b
Date:   Tue Nov 20 10:52:33 2018 +0100
b3eda9b
b3eda9b
    Add some new IROps to support improved Memcheck analysis of strlen etc.
b3eda9b
    
b3eda9b
    This is part of the fix for bug 386945.  It adds the following IROps, plus
b3eda9b
    their supporting type- and printing- fragments:
b3eda9b
    
b3eda9b
    Iop_Reverse8sIn32_x1: 32-bit byteswap.  A fancy name, but it is consistent
b3eda9b
    with naming for the other swapping IROps that already exist.
b3eda9b
    
b3eda9b
    Iop_PopCount64, Iop_PopCount32: population count
b3eda9b
    
b3eda9b
    Iop_ClzNat64, Iop_ClzNat32, Iop_CtzNat64, Iop_CtzNat32: counting leading and
b3eda9b
    trailing zeroes, with "natural" (Nat) semantics for a zero input, meaning, in
b3eda9b
    the case of zero input, return the number of bits in the word.  These
b3eda9b
    functionally overlap with the existing Iop_Clz64, Iop_Clz32, Iop_Ctz64,
b3eda9b
    Iop_Ctz32.  The existing operations are undefined in case of a zero input.
b3eda9b
    Adding these new variants avoids the complexity of having to change the
b3eda9b
    declared semantics of the existing operations.  Instead they are deprecated
b3eda9b
    but still available for use.
b3eda9b
b3eda9b
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
b3eda9b
index 823b6be..3221033 100644
b3eda9b
--- a/VEX/priv/ir_defs.c
b3eda9b
+++ b/VEX/priv/ir_defs.c
b3eda9b
@@ -194,6 +194,14 @@ void ppIROp ( IROp op )
b3eda9b
       case Iop_Ctz64:    vex_printf("Ctz64"); return;
b3eda9b
       case Iop_Ctz32:    vex_printf("Ctz32"); return;
b3eda9b
 
b3eda9b
+      case Iop_ClzNat64: vex_printf("ClzNat64"); return;
b3eda9b
+      case Iop_ClzNat32: vex_printf("ClzNat32"); return;
b3eda9b
+      case Iop_CtzNat64: vex_printf("CtzNat64"); return;
b3eda9b
+      case Iop_CtzNat32: vex_printf("CtzNat32"); return;
b3eda9b
+
b3eda9b
+      case Iop_PopCount64: vex_printf("PopCount64"); return;
b3eda9b
+      case Iop_PopCount32: vex_printf("PopCount32"); return;
b3eda9b
+
b3eda9b
       case Iop_CmpLT32S: vex_printf("CmpLT32S"); return;
b3eda9b
       case Iop_CmpLE32S: vex_printf("CmpLE32S"); return;
b3eda9b
       case Iop_CmpLT32U: vex_printf("CmpLT32U"); return;
b3eda9b
@@ -395,6 +403,7 @@ void ppIROp ( IROp op )
b3eda9b
 
b3eda9b
       case Iop_CmpNEZ16x2: vex_printf("CmpNEZ16x2"); return;
b3eda9b
       case Iop_CmpNEZ8x4:  vex_printf("CmpNEZ8x4"); return;
b3eda9b
+      case Iop_Reverse8sIn32_x1: vex_printf("Reverse8sIn32_x1"); return;
b3eda9b
 
b3eda9b
       case Iop_CmpF64:    vex_printf("CmpF64"); return;
b3eda9b
 
b3eda9b
@@ -2719,6 +2728,7 @@ void typeOfPrimop ( IROp op,
b3eda9b
          UNARY(Ity_I16, Ity_I16);
b3eda9b
       case Iop_Not32:
b3eda9b
       case Iop_CmpNEZ16x2: case Iop_CmpNEZ8x4:
b3eda9b
+      case Iop_Reverse8sIn32_x1:
b3eda9b
          UNARY(Ity_I32, Ity_I32);
b3eda9b
 
b3eda9b
       case Iop_Not64:
b3eda9b
@@ -2782,9 +2792,13 @@ void typeOfPrimop ( IROp op,
b3eda9b
          BINARY(Ity_I64,Ity_I64, Ity_I128);
b3eda9b
 
b3eda9b
       case Iop_Clz32: case Iop_Ctz32:
b3eda9b
+      case Iop_ClzNat32: case Iop_CtzNat32:
b3eda9b
+      case Iop_PopCount32:
b3eda9b
          UNARY(Ity_I32, Ity_I32);
b3eda9b
 
b3eda9b
       case Iop_Clz64: case Iop_Ctz64:
b3eda9b
+      case Iop_ClzNat64: case Iop_CtzNat64:
b3eda9b
+      case Iop_PopCount64:
b3eda9b
          UNARY(Ity_I64, Ity_I64);
b3eda9b
 
b3eda9b
       case Iop_DivU32: case Iop_DivS32: case Iop_DivU32E: case Iop_DivS32E:
b3eda9b
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
b3eda9b
index 17bcb55..93fa5ac 100644
b3eda9b
--- a/VEX/pub/libvex_ir.h
b3eda9b
+++ b/VEX/pub/libvex_ir.h
b3eda9b
@@ -452,12 +452,21 @@ typedef
b3eda9b
       Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
b3eda9b
       Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
b3eda9b
 
b3eda9b
-      /* Wierdo integer stuff */
b3eda9b
+      /* Counting bits */
b3eda9b
+      /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of zero.
b3eda9b
+         You must ensure they are never given a zero argument.  As of
b3eda9b
+         2018-Nov-14 they are deprecated.  Try to use the Nat variants
b3eda9b
+         immediately below, if you can.
b3eda9b
+      */
b3eda9b
       Iop_Clz64, Iop_Clz32,   /* count leading zeroes */
b3eda9b
       Iop_Ctz64, Iop_Ctz32,   /* count trailing zeros */
b3eda9b
-      /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
b3eda9b
-         zero.  You must ensure they are never given a zero argument.
b3eda9b
-      */
b3eda9b
+      /* Count leading/trailing zeroes, with "natural" semantics for the
b3eda9b
+         case where the input is zero: then the result is the number of bits
b3eda9b
+         in the word. */
b3eda9b
+      Iop_ClzNat64, Iop_ClzNat32,
b3eda9b
+      Iop_CtzNat64, Iop_CtzNat32,
b3eda9b
+      /* Population count -- compute the number of 1 bits in the argument. */
b3eda9b
+      Iop_PopCount64, Iop_PopCount32,
b3eda9b
 
b3eda9b
       /* Standard integer comparisons */
b3eda9b
       Iop_CmpLT32S, Iop_CmpLT64S,
b3eda9b
@@ -831,6 +840,9 @@ typedef
b3eda9b
       /* MISC (vector integer cmp != 0) */
b3eda9b
       Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
b3eda9b
 
b3eda9b
+      /* Byte swap in a 32-bit word */
b3eda9b
+      Iop_Reverse8sIn32_x1,
b3eda9b
+
b3eda9b
       /* ------------------ 64-bit SIMD FP ------------------------ */
b3eda9b
 
b3eda9b
       /* Convertion to/from int */
b3eda9b
@@ -1034,8 +1046,9 @@ typedef
b3eda9b
       Iop_Slice64,  // (I64, I64, I8) -> I64
b3eda9b
 
b3eda9b
       /* REVERSE the order of chunks in vector lanes.  Chunks must be
b3eda9b
-         smaller than the vector lanes (obviously) and so may be 8-,
b3eda9b
-         16- and 32-bit in size. */
b3eda9b
+         smaller than the vector lanes (obviously) and so may be 8-, 16- and
b3eda9b
+         32-bit in size.  Note that the degenerate case,
b3eda9b
+         Iop_Reverse8sIn64_x1, is a simply a vanilla byte-swap. */
b3eda9b
       /* Examples:
b3eda9b
             Reverse8sIn16_x4([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
b3eda9b
             Reverse8sIn32_x2([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]