carlwgeorge / rpms / qemu

Forked from rpms/qemu a year ago
Clone
5544c1b
From bf408071104de13f79a0c3c8cac892f440462e7c Mon Sep 17 00:00:00 2001
5544c1b
From: Aurelien Jarno <aurelien@aurel32.net>
5544c1b
Date: Tue, 11 Sep 2012 12:31:21 +0200
5544c1b
Subject: [PATCH] tcg/optimize: rework copy progagation
5544c1b
5544c1b
The copy propagation pass tries to keep track what is a copy of what
5544c1b
and what has copy of what, and in addition it keep a circular list of
5544c1b
of all the copies. Unfortunately this doesn't fully work: a mov from
5544c1b
a temp which has a state "COPY" changed it into a state "HAS_COPY".
5544c1b
Later when this temp is used again, it is considered has not having
5544c1b
copy and thus no propagation is done.
5544c1b
5544c1b
This patch fixes that by removing the hiearchy between copies, and thus
5544c1b
only keeping a "COPY" state both meaning "is a copy" and "has a copy".
5544c1b
The decision of which copy to use is deferred to the actual temp
5544c1b
replacement. At this stage there is not one best choice to do, but only
5544c1b
better choices than others. For doing the best choice the operation
5544c1b
would have to be parsed in reversed to know if a temp is going to be
5544c1b
used later or not. That what is done by the liveness analysis. At this
5544c1b
stage it is known that globals will be always live, that local temps
5544c1b
will be dead at the end of the translation block, and that the temps
5544c1b
will be dead at the end of the basic block. This means that this stage
5544c1b
should try to replace temps by local temps or globals and local temps
5544c1b
by globals.
5544c1b
5544c1b
Reviewed-by: Richard Henderson <rth@twiddle.net>
5544c1b
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
5544c1b
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
5544c1b
---
5544c1b
 tcg/optimize.c | 167 +++++++++++++++++++++++++++++++--------------------------
5544c1b
 1 file changed, 92 insertions(+), 75 deletions(-)
5544c1b
5544c1b
diff --git a/tcg/optimize.c b/tcg/optimize.c
5544c1b
index da8dffe..1904b39 100644
5544c1b
--- a/tcg/optimize.c
5544c1b
+++ b/tcg/optimize.c
5544c1b
@@ -39,7 +39,6 @@ typedef enum {
5544c1b
     TCG_TEMP_UNDEF = 0,
5544c1b
     TCG_TEMP_CONST,
5544c1b
     TCG_TEMP_COPY,
5544c1b
-    TCG_TEMP_HAS_COPY
5544c1b
 } tcg_temp_state;
5544c1b
 
5544c1b
 struct tcg_temp_info {
5544c1b
@@ -51,39 +50,19 @@ struct tcg_temp_info {
5544c1b
 
5544c1b
 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
5544c1b
 
5544c1b
-/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP was a representative of some
5544c1b
-   class of equivalent temp's, a new representative should be chosen in this
5544c1b
-   class. */
5544c1b
-static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
5544c1b
+/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP only had one copy, remove
5544c1b
+   the copy flag from the left temp.  */
5544c1b
+static void reset_temp(TCGArg temp)
5544c1b
 {
5544c1b
-    int i;
5544c1b
-    TCGArg new_base = (TCGArg)-1;
5544c1b
-    if (temps[temp].state == TCG_TEMP_HAS_COPY) {
5544c1b
-        for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
5544c1b
-            if (i >= nb_globals) {
5544c1b
-                temps[i].state = TCG_TEMP_HAS_COPY;
5544c1b
-                new_base = i;
5544c1b
-                break;
5544c1b
-            }
5544c1b
-        }
5544c1b
-        for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
5544c1b
-            if (new_base == (TCGArg)-1) {
5544c1b
-                temps[i].state = TCG_TEMP_UNDEF;
5544c1b
-            } else {
5544c1b
-                temps[i].val = new_base;
5544c1b
-            }
5544c1b
+    if (temps[temp].state == TCG_TEMP_COPY) {
5544c1b
+        if (temps[temp].prev_copy == temps[temp].next_copy) {
5544c1b
+            temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
5544c1b
+        } else {
5544c1b
+            temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
5544c1b
+            temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
5544c1b
         }
5544c1b
-        temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
5544c1b
-        temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
5544c1b
-    } else if (temps[temp].state == TCG_TEMP_COPY) {
5544c1b
-        temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
5544c1b
-        temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
5544c1b
-        new_base = temps[temp].val;
5544c1b
     }
5544c1b
     temps[temp].state = TCG_TEMP_UNDEF;
5544c1b
-    if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
5544c1b
-        temps[new_base].state = TCG_TEMP_UNDEF;
5544c1b
-    }
5544c1b
 }
5544c1b
 
5544c1b
 static int op_bits(TCGOpcode op)
5544c1b
@@ -106,34 +85,83 @@ static TCGOpcode op_to_movi(TCGOpcode op)
5544c1b
     }
5544c1b
 }
5544c1b
 
5544c1b
+static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
5544c1b
+{
5544c1b
+    TCGArg i;
5544c1b
+
5544c1b
+    /* If this is already a global, we can't do better. */
5544c1b
+    if (temp < s->nb_globals) {
5544c1b
+        return temp;
5544c1b
+    }
5544c1b
+
5544c1b
+    /* Search for a global first. */
5544c1b
+    for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
5544c1b
+        if (i < s->nb_globals) {
5544c1b
+            return i;
5544c1b
+        }
5544c1b
+    }
5544c1b
+
5544c1b
+    /* If it is a temp, search for a temp local. */
5544c1b
+    if (!s->temps[temp].temp_local) {
5544c1b
+        for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
5544c1b
+            if (s->temps[i].temp_local) {
5544c1b
+                return i;
5544c1b
+            }
5544c1b
+        }
5544c1b
+    }
5544c1b
+
5544c1b
+    /* Failure to find a better representation, return the same temp. */
5544c1b
+    return temp;
5544c1b
+}
5544c1b
+
5544c1b
+static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
5544c1b
+{
5544c1b
+    TCGArg i;
5544c1b
+
5544c1b
+    if (arg1 == arg2) {
5544c1b
+        return true;
5544c1b
+    }
5544c1b
+
5544c1b
+    if (temps[arg1].state != TCG_TEMP_COPY
5544c1b
+        || temps[arg2].state != TCG_TEMP_COPY) {
5544c1b
+        return false;
5544c1b
+    }
5544c1b
+
5544c1b
+    for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
5544c1b
+        if (i == arg2) {
5544c1b
+            return true;
5544c1b
+        }
5544c1b
+    }
5544c1b
+
5544c1b
+    return false;
5544c1b
+}
5544c1b
+
5544c1b
 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
5544c1b
                             TCGArg dst, TCGArg src)
5544c1b
 {
5544c1b
-        reset_temp(dst, s->nb_temps, s->nb_globals);
5544c1b
-        assert(temps[src].state != TCG_TEMP_COPY);
5544c1b
-        /* Only consider temps with the same type (width) as copies. */
5544c1b
-        if (src >= s->nb_globals && s->temps[dst].type == s->temps[src].type) {
5544c1b
-            assert(temps[src].state != TCG_TEMP_CONST);
5544c1b
-            if (temps[src].state != TCG_TEMP_HAS_COPY) {
5544c1b
-                temps[src].state = TCG_TEMP_HAS_COPY;
5544c1b
+        reset_temp(dst);
5544c1b
+        assert(temps[src].state != TCG_TEMP_CONST);
5544c1b
+
5544c1b
+        if (s->temps[src].type == s->temps[dst].type) {
5544c1b
+            if (temps[src].state != TCG_TEMP_COPY) {
5544c1b
+                temps[src].state = TCG_TEMP_COPY;
5544c1b
                 temps[src].next_copy = src;
5544c1b
                 temps[src].prev_copy = src;
5544c1b
             }
5544c1b
             temps[dst].state = TCG_TEMP_COPY;
5544c1b
-            temps[dst].val = src;
5544c1b
             temps[dst].next_copy = temps[src].next_copy;
5544c1b
             temps[dst].prev_copy = src;
5544c1b
             temps[temps[dst].next_copy].prev_copy = dst;
5544c1b
             temps[src].next_copy = dst;
5544c1b
         }
5544c1b
+
5544c1b
         gen_args[0] = dst;
5544c1b
         gen_args[1] = src;
5544c1b
 }
5544c1b
 
5544c1b
-static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
5544c1b
-                             int nb_temps, int nb_globals)
5544c1b
+static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
5544c1b
 {
5544c1b
-        reset_temp(dst, nb_temps, nb_globals);
5544c1b
+        reset_temp(dst);
5544c1b
         temps[dst].state = TCG_TEMP_CONST;
5544c1b
         temps[dst].val = val;
5544c1b
         gen_args[0] = dst;
5544c1b
@@ -324,7 +352,6 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
5544c1b
     tcg_abort();
5544c1b
 }
5544c1b
 
5544c1b
-
5544c1b
 /* Propagate constants and copies, fold constant expressions. */
5544c1b
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
5544c1b
@@ -338,10 +365,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
 
5544c1b
     /* Array VALS has an element for each temp.
5544c1b
        If this temp holds a constant then its value is kept in VALS' element.
5544c1b
-       If this temp is a copy of other ones then this equivalence class'
5544c1b
-       representative is kept in VALS' element.
5544c1b
-       If this temp is neither copy nor constant then corresponding VALS'
5544c1b
-       element is unused. */
5544c1b
+       If this temp is a copy of other ones then the other copies are
5544c1b
+       available through the doubly linked circular list. */
5544c1b
 
5544c1b
     nb_temps = s->nb_temps;
5544c1b
     nb_globals = s->nb_globals;
5544c1b
@@ -357,7 +382,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
             assert(op != INDEX_op_call);
5544c1b
             for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
5544c1b
                 if (temps[args[i]].state == TCG_TEMP_COPY) {
5544c1b
-                    args[i] = temps[args[i]].val;
5544c1b
+                    args[i] = find_better_copy(s, args[i]);
5544c1b
                 }
5544c1b
             }
5544c1b
         }
5544c1b
@@ -429,7 +454,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
             if (temps[args[1]].state == TCG_TEMP_CONST
5544c1b
                 && temps[args[1]].val == 0) {
5544c1b
                 gen_opc_buf[op_index] = op_to_movi(op);
5544c1b
-                tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
5544c1b
+                tcg_opt_gen_movi(gen_args, args[0], 0);
5544c1b
                 args += 3;
5544c1b
                 gen_args += 2;
5544c1b
                 continue;
5544c1b
@@ -456,9 +481,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
             }
5544c1b
             if (temps[args[2]].state == TCG_TEMP_CONST
5544c1b
                 && temps[args[2]].val == 0) {
5544c1b
-                if ((temps[args[0]].state == TCG_TEMP_COPY
5544c1b
-                    && temps[args[0]].val == args[1])
5544c1b
-                    || args[0] == args[1]) {
5544c1b
+                if (temps_are_copies(args[0], args[1])) {
5544c1b
                     gen_opc_buf[op_index] = INDEX_op_nop;
5544c1b
                 } else {
5544c1b
                     gen_opc_buf[op_index] = op_to_mov(op);
5544c1b
@@ -480,7 +503,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
             if ((temps[args[2]].state == TCG_TEMP_CONST
5544c1b
                 && temps[args[2]].val == 0)) {
5544c1b
                 gen_opc_buf[op_index] = op_to_movi(op);
5544c1b
-                tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
5544c1b
+                tcg_opt_gen_movi(gen_args, args[0], 0);
5544c1b
                 args += 3;
5544c1b
                 gen_args += 2;
5544c1b
                 continue;
5544c1b
@@ -495,7 +518,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
         CASE_OP_32_64(or):
5544c1b
         CASE_OP_32_64(and):
5544c1b
             if (args[1] == args[2]) {
5544c1b
-                if (args[1] == args[0]) {
5544c1b
+                if (temps_are_copies(args[0], args[1])) {
5544c1b
                     gen_opc_buf[op_index] = INDEX_op_nop;
5544c1b
                 } else {
5544c1b
                     gen_opc_buf[op_index] = op_to_mov(op);
5544c1b
@@ -515,9 +538,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
            allocator where needed and possible.  Also detect copies. */
5544c1b
         switch (op) {
5544c1b
         CASE_OP_32_64(mov):
5544c1b
-            if ((temps[args[1]].state == TCG_TEMP_COPY
5544c1b
-                && temps[args[1]].val == args[0])
5544c1b
-                || args[0] == args[1]) {
5544c1b
+            if (temps_are_copies(args[0], args[1])) {
5544c1b
                 args += 2;
5544c1b
                 gen_opc_buf[op_index] = INDEX_op_nop;
5544c1b
                 break;
5544c1b
@@ -535,7 +556,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
             args[1] = temps[args[1]].val;
5544c1b
             /* fallthrough */
5544c1b
         CASE_OP_32_64(movi):
5544c1b
-            tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
5544c1b
+            tcg_opt_gen_movi(gen_args, args[0], args[1]);
5544c1b
             gen_args += 2;
5544c1b
             args += 2;
5544c1b
             break;
5544c1b
@@ -550,9 +571,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
             if (temps[args[1]].state == TCG_TEMP_CONST) {
5544c1b
                 gen_opc_buf[op_index] = op_to_movi(op);
5544c1b
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
5544c1b
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
5544c1b
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
5544c1b
             } else {
5544c1b
-                reset_temp(args[0], nb_temps, nb_globals);
5544c1b
+                reset_temp(args[0]);
5544c1b
                 gen_args[0] = args[0];
5544c1b
                 gen_args[1] = args[1];
5544c1b
             }
5544c1b
@@ -580,10 +601,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
                 gen_opc_buf[op_index] = op_to_movi(op);
5544c1b
                 tmp = do_constant_folding(op, temps[args[1]].val,
5544c1b
                                           temps[args[2]].val);
5544c1b
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
5544c1b
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
5544c1b
                 gen_args += 2;
5544c1b
             } else {
5544c1b
-                reset_temp(args[0], nb_temps, nb_globals);
5544c1b
+                reset_temp(args[0]);
5544c1b
                 gen_args[0] = args[0];
5544c1b
                 gen_args[1] = args[1];
5544c1b
                 gen_args[2] = args[2];
5544c1b
@@ -597,10 +618,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
                 gen_opc_buf[op_index] = op_to_movi(op);
5544c1b
                 tmp = do_constant_folding_cond(op, temps[args[1]].val,
5544c1b
                                                temps[args[2]].val, args[3]);
5544c1b
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
5544c1b
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
5544c1b
                 gen_args += 2;
5544c1b
             } else {
5544c1b
-                reset_temp(args[0], nb_temps, nb_globals);
5544c1b
+                reset_temp(args[0]);
5544c1b
                 gen_args[0] = args[0];
5544c1b
                 gen_args[1] = args[1];
5544c1b
                 gen_args[2] = args[2];
5544c1b
@@ -623,7 +644,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
                 }
5544c1b
             } else {
5544c1b
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
5544c1b
-                reset_temp(args[0], nb_temps, nb_globals);
5544c1b
+                reset_temp(args[0]);
5544c1b
                 gen_args[0] = args[0];
5544c1b
                 gen_args[1] = args[1];
5544c1b
                 gen_args[2] = args[2];
5544c1b
@@ -637,23 +658,19 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
                 && temps[args[2]].state == TCG_TEMP_CONST) {
5544c1b
                 tmp = do_constant_folding_cond(op, temps[args[1]].val,
5544c1b
                                                temps[args[2]].val, args[5]);
5544c1b
-                if (args[0] == args[4-tmp]
5544c1b
-                    || (temps[args[4-tmp]].state == TCG_TEMP_COPY
5544c1b
-                        && temps[args[4-tmp]].val == args[0])) {
5544c1b
+                if (temps_are_copies(args[0], args[4-tmp])) {
5544c1b
                     gen_opc_buf[op_index] = INDEX_op_nop;
5544c1b
                 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
5544c1b
                     gen_opc_buf[op_index] = op_to_movi(op);
5544c1b
-                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val,
5544c1b
-                                     nb_temps, nb_globals);
5544c1b
+                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
5544c1b
                     gen_args += 2;
5544c1b
                 } else {
5544c1b
                     gen_opc_buf[op_index] = op_to_mov(op);
5544c1b
-                    tcg_opt_gen_mov(gen_args, args[0], args[4-tmp],
5544c1b
-                                    nb_temps, nb_globals);
5544c1b
+                    tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
5544c1b
                     gen_args += 2;
5544c1b
                 }
5544c1b
             } else {
5544c1b
-                reset_temp(args[0], nb_temps, nb_globals);
5544c1b
+                reset_temp(args[0]);
5544c1b
                 gen_args[0] = args[0];
5544c1b
                 gen_args[1] = args[1];
5544c1b
                 gen_args[2] = args[2];
5544c1b
@@ -668,11 +685,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
5544c1b
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
5544c1b
                 for (i = 0; i < nb_globals; i++) {
5544c1b
-                    reset_temp(i, nb_temps, nb_globals);
5544c1b
+                    reset_temp(i);
5544c1b
                 }
5544c1b
             }
5544c1b
             for (i = 0; i < (args[0] >> 16); i++) {
5544c1b
-                reset_temp(args[i + 1], nb_temps, nb_globals);
5544c1b
+                reset_temp(args[i + 1]);
5544c1b
             }
5544c1b
             i = nb_call_args + 3;
5544c1b
             while (i) {
5544c1b
@@ -691,7 +708,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1b
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
5544c1b
             } else {
5544c1b
                 for (i = 0; i < def->nb_oargs; i++) {
5544c1b
-                    reset_temp(args[i], nb_temps, nb_globals);
5544c1b
+                    reset_temp(args[i]);
5544c1b
                 }
5544c1b
             }
5544c1b
             for (i = 0; i < def->nb_args; i++) {
5544c1b
-- 
5544c1b
1.7.12.1
5544c1b