c3b415b
2009-04-14  Michael Meissner  <meissner@linux.vnet.ibm.com>
c3b415b
c3b415b
	* config/rs6000/rs6000.c (rs6000_secondary_reload_inner): Handle
c3b415b
	more possible combinations of addresses.
c3b415b
c3b415b
	* config/rs6000/vector.md (vec_reload_and_plus_<mptrsize>): Allow
c3b415b
	register+small constant in addition to register+register, and
c3b415b
	restrict the insn to only match during reload and afterwards.
c3b415b
	(vec_reload_and_reg_<mptrsize>): Allow for and of register
c3b415b
	indirect to not generate insn not found message.
c3b415b
c3b415b
--- gcc/config/rs6000/vector.md	(revision 146069)
c3b415b
+++ gcc/config/rs6000/vector.md	(revision 146118)
c3b415b
@@ -129,14 +129,15 @@ (define_expand "reload_<VEC_R:mode>_
c3b415b
 })
c3b415b
 
c3b415b
 ;; Reload sometimes tries to move the address to a GPR, and can generate
c3b415b
-;; invalid RTL for addresses involving AND -16.
c3b415b
+;; invalid RTL for addresses involving AND -16.  Allow addresses involving
c3b415b
+;; reg+reg, reg+small constant, or just reg, all wrapped in an AND -16.
c3b415b
 
c3b415b
 (define_insn_and_split "*vec_reload_and_plus_<mptrsize>"
c3b415b
   [(set (match_operand:P 0 "gpc_reg_operand" "=b")
c3b415b
 	(and:P (plus:P (match_operand:P 1 "gpc_reg_operand" "r")
c3b415b
-		       (match_operand:P 2 "gpc_reg_operand" "r"))
c3b415b
+		       (match_operand:P 2 "reg_or_cint_operand" "rI"))
c3b415b
 	       (const_int -16)))]
c3b415b
-  "TARGET_ALTIVEC || TARGET_VSX"
c3b415b
+  "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)"
c3b415b
   "#"
c3b415b
   "&& reload_completed"
c3b415b
   [(set (match_dup 0)
c3b415b
@@ -146,6 +147,21 @@ (define_insn_and_split "*vec_reload_and_
c3b415b
 		   (and:P (match_dup 0)
c3b415b
 			  (const_int -16)))
c3b415b
 	      (clobber:CC (scratch:CC))])])
c3b415b
+
c3b415b
+;; The normal ANDSI3/ANDDI3 won't match if reload decides to move an AND -16
c3b415b
+;; address to a register because there is no clobber of a (scratch), so we add
c3b415b
+;; it here.
c3b415b
+(define_insn_and_split "*vec_reload_and_reg_<mptrsize>"
c3b415b
+  [(set (match_operand:P 0 "gpc_reg_operand" "=b")
c3b415b
+	(and:P (match_operand:P 1 "gpc_reg_operand" "r")
c3b415b
+	       (const_int -16)))]
c3b415b
+  "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)"
c3b415b
+  "#"
c3b415b
+  "&& reload_completed"
c3b415b
+  [(parallel [(set (match_dup 0)
c3b415b
+		   (and:P (match_dup 1)
c3b415b
+			  (const_int -16)))
c3b415b
+	      (clobber:CC (scratch:CC))])])
c3b415b
 
c3b415b
 ;; Generic floating point vector arithmetic support
c3b415b
 (define_expand "add<mode>3"
c3b415b
--- gcc/config/rs6000/rs6000.c	(revision 146069)
c3b415b
+++ gcc/config/rs6000/rs6000.c	(revision 146118)
c3b415b
@@ -12574,6 +12574,11 @@ rs6000_secondary_reload_inner (rtx reg, 
c3b415b
   enum reg_class rclass;
c3b415b
   rtx addr;
c3b415b
   rtx and_op2 = NULL_RTX;
c3b415b
+  rtx addr_op1;
c3b415b
+  rtx addr_op2;
c3b415b
+  rtx scratch_or_premodify = scratch;
c3b415b
+  rtx and_rtx;
c3b415b
+  rtx cc_clobber;
c3b415b
 
c3b415b
   if (TARGET_DEBUG_ADDR)
c3b415b
     {
c3b415b
@@ -12595,7 +12600,8 @@ rs6000_secondary_reload_inner (rtx reg, 
c3b415b
 
c3b415b
   switch (rclass)
c3b415b
     {
c3b415b
-      /* Move reg+reg addresses into a scratch register for GPRs.  */
c3b415b
+      /* GPRs can handle reg + small constant, all other addresses need to use
c3b415b
+	 the scratch register.  */
c3b415b
     case GENERAL_REGS:
c3b415b
     case BASE_REGS:
c3b415b
       if (GET_CODE (addr) == AND)
c3b415b
@@ -12603,70 +12609,152 @@ rs6000_secondary_reload_inner (rtx reg, 
c3b415b
 	  and_op2 = XEXP (addr, 1);
c3b415b
 	  addr = XEXP (addr, 0);
c3b415b
 	}
c3b415b
+
c3b415b
+      if (GET_CODE (addr) == PRE_MODIFY)
c3b415b
+	{
c3b415b
+	  scratch_or_premodify = XEXP (addr, 0);
c3b415b
+	  gcc_assert (REG_P (scratch_or_premodify));
c3b415b
+	  gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
c3b415b
+	  addr = XEXP (addr, 1);
c3b415b
+	}
c3b415b
+
c3b415b
       if (GET_CODE (addr) == PLUS
c3b415b
 	  && (!rs6000_legitimate_offset_address_p (TImode, addr, true)
c3b415b
 	      || and_op2 != NULL_RTX))
c3b415b
 	{
c3b415b
-	  if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
c3b415b
-	      || GET_CODE (addr) == CONST_INT)
c3b415b
-	    rs6000_emit_move (scratch, addr, GET_MODE (addr));
c3b415b
-	  else
c3b415b
-	    emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
c3b415b
-	  addr = scratch;
c3b415b
+	  addr_op1 = XEXP (addr, 0);
c3b415b
+	  addr_op2 = XEXP (addr, 1);
c3b415b
+	  gcc_assert (legitimate_indirect_address_p (addr_op1, true));
c3b415b
+
c3b415b
+	  if (!REG_P (addr_op2)
c3b415b
+	      && (GET_CODE (addr_op2) != CONST_INT
c3b415b
+		  || !satisfies_constraint_I (addr_op2)))
c3b415b
+	    {
c3b415b
+	      rs6000_emit_move (scratch, addr_op2, Pmode);
c3b415b
+	      addr_op2 = scratch;
c3b415b
+	    }
c3b415b
+
c3b415b
+	  emit_insn (gen_rtx_SET (VOIDmode,
c3b415b
+				  scratch_or_premodify,
c3b415b
+				  gen_rtx_PLUS (Pmode,
c3b415b
+						addr_op1,
c3b415b
+						addr_op2)));
c3b415b
+
c3b415b
+	  addr = scratch_or_premodify;
c3b415b
+	  scratch_or_premodify = scratch;
c3b415b
 	}
c3b415b
-      else if (GET_CODE (addr) == PRE_MODIFY
c3b415b
-	       && REG_P (XEXP (addr, 0))
c3b415b
-	       && GET_CODE (XEXP (addr, 1)) == PLUS)
c3b415b
+      else if (!legitimate_indirect_address_p (addr, true)
c3b415b
+	       && !rs6000_legitimate_offset_address_p (TImode, addr, true))
c3b415b
 	{
c3b415b
-	  emit_insn (gen_rtx_SET (VOIDmode, XEXP (addr, 0), XEXP (addr, 1)));
c3b415b
-	  addr = XEXP (addr, 0);
c3b415b
+	  rs6000_emit_move (scratch_or_premodify, addr, Pmode);
c3b415b
+	  addr = scratch_or_premodify;
c3b415b
+	  scratch_or_premodify = scratch;
c3b415b
 	}
c3b415b
       break;
c3b415b
 
c3b415b
+      /* Float/Altivec registers can only handle reg+reg addressing.  Move
c3b415b
+	 other addresses into a scratch register.  */
c3b415b
+    case FLOAT_REGS:
c3b415b
+    case VSX_REGS:
c3b415b
+    case ALTIVEC_REGS:
c3b415b
+
c3b415b
       /* With float regs, we need to handle the AND ourselves, since we can't
c3b415b
 	 use the Altivec instruction with an implicit AND -16.  Allow scalar
c3b415b
 	 loads to float registers to use reg+offset even if VSX.  */
c3b415b
-    case FLOAT_REGS:
c3b415b
-    case VSX_REGS:
c3b415b
-      if (GET_CODE (addr) == AND)
c3b415b
+      if (GET_CODE (addr) == AND
c3b415b
+	  && (rclass != ALTIVEC_REGS || GET_MODE_SIZE (mode) != 16))
c3b415b
 	{
c3b415b
 	  and_op2 = XEXP (addr, 1);
c3b415b
 	  addr = XEXP (addr, 0);
c3b415b
 	}
c3b415b
-      /* fall through */
c3b415b
 
c3b415b
-      /* Move reg+offset addresses into a scratch register.  */
c3b415b
-    case ALTIVEC_REGS:
c3b415b
-      if (!legitimate_indirect_address_p (addr, true)
c3b415b
-	  && !legitimate_indexed_address_p (addr, true)
c3b415b
-	  && (GET_CODE (addr) != PRE_MODIFY
c3b415b
-	      || !legitimate_indexed_address_p (XEXP (addr, 1), true))
c3b415b
-	  && (rclass != FLOAT_REGS
c3b415b
-	      || GET_MODE_SIZE (mode) != 8
c3b415b
+      /* If we aren't using a VSX load, save the PRE_MODIFY register and use it
c3b415b
+	 as the address later.  */
c3b415b
+      if (GET_CODE (addr) == PRE_MODIFY
c3b415b
+	  && (!VECTOR_MEM_VSX_P (mode)
c3b415b
 	      || and_op2 != NULL_RTX
c3b415b
-	      || !rs6000_legitimate_offset_address_p (mode, addr, true)))
c3b415b
+	      || !legitimate_indexed_address_p (XEXP (addr, 1), true)))
c3b415b
 	{
c3b415b
-	  if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
c3b415b
-	      || GET_CODE (addr) == CONST_INT)
c3b415b
-	    rs6000_emit_move (scratch, addr, GET_MODE (addr));
c3b415b
-	  else
c3b415b
-	    emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
c3b415b
-	  addr = scratch;
c3b415b
+	  scratch_or_premodify = XEXP (addr, 0);
c3b415b
+	  gcc_assert (legitimate_indirect_address_p (scratch_or_premodify,
c3b415b
+						     true));
c3b415b
+	  gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
c3b415b
+	  addr = XEXP (addr, 1);
c3b415b
+	}
c3b415b
+
c3b415b
+      if (legitimate_indirect_address_p (addr, true)	/* reg */
c3b415b
+	  || legitimate_indexed_address_p (addr, true)	/* reg+reg */
c3b415b
+	  || GET_CODE (addr) == PRE_MODIFY		/* VSX pre-modify */
c3b415b
+	  || GET_CODE (addr) == AND			/* Altivec memory */
c3b415b
+	  || (rclass == FLOAT_REGS			/* legacy float mem */
c3b415b
+	      && GET_MODE_SIZE (mode) == 8
c3b415b
+	      && and_op2 == NULL_RTX
c3b415b
+	      && scratch_or_premodify == scratch
c3b415b
+	      && rs6000_legitimate_offset_address_p (mode, addr, true)))
c3b415b
+	;
c3b415b
+
c3b415b
+      else if (GET_CODE (addr) == PLUS)
c3b415b
+	{
c3b415b
+	  addr_op1 = XEXP (addr, 0);
c3b415b
+	  addr_op2 = XEXP (addr, 1);
c3b415b
+	  gcc_assert (REG_P (addr_op1));
c3b415b
+
c3b415b
+	  rs6000_emit_move (scratch, addr_op2, Pmode);
c3b415b
+	  emit_insn (gen_rtx_SET (VOIDmode,
c3b415b
+				  scratch_or_premodify,
c3b415b
+				  gen_rtx_PLUS (Pmode,
c3b415b
+						addr_op1,
c3b415b
+						scratch)));
c3b415b
+	  addr = scratch_or_premodify;
c3b415b
+	  scratch_or_premodify = scratch;
c3b415b
 	}
c3b415b
+
c3b415b
+      else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
c3b415b
+	       || GET_CODE (addr) == CONST_INT)
c3b415b
+	{
c3b415b
+	  rs6000_emit_move (scratch_or_premodify, addr, Pmode);
c3b415b
+	  addr = scratch_or_premodify;
c3b415b
+	  scratch_or_premodify = scratch;
c3b415b
+	}
c3b415b
+
c3b415b
+      else
c3b415b
+	gcc_unreachable ();
c3b415b
+
c3b415b
       break;
c3b415b
 
c3b415b
     default:
c3b415b
       gcc_unreachable ();
c3b415b
     }
c3b415b
 
c3b415b
-  /* If the original address involved an AND -16 that is part of the Altivec
c3b415b
-     addresses, recreate the and now.  */
c3b415b
+  /* If the original address involved a pre-modify that we couldn't use the VSX
c3b415b
+     memory instruction with update, and we haven't taken care of already,
c3b415b
+     store the address in the pre-modify register and use that as the
c3b415b
+     address.  */
c3b415b
+  if (scratch_or_premodify != scratch && scratch_or_premodify != addr)
c3b415b
+    {
c3b415b
+      emit_insn (gen_rtx_SET (VOIDmode, scratch_or_premodify, addr));
c3b415b
+      addr = scratch_or_premodify;
c3b415b
+    }
c3b415b
+
c3b415b
+  /* If the original address involved an AND -16 and we couldn't use an ALTIVEC
c3b415b
+     memory instruction, recreate the AND now, including the clobber which is
c3b415b
+     generated by the general ANDSI3/ANDDI3 patterns for the
c3b415b
+     andi. instruction.  */
c3b415b
   if (and_op2 != NULL_RTX)
c3b415b
     {
c3b415b
-      rtx and_rtx = gen_rtx_SET (VOIDmode,
c3b415b
-				 scratch,
c3b415b
-				 gen_rtx_AND (Pmode, addr, and_op2));
c3b415b
-      rtx cc_clobber = gen_rtx_CLOBBER (CCmode, gen_rtx_SCRATCH (CCmode));
c3b415b
+      if (! legitimate_indirect_address_p (addr, true))
c3b415b
+	{
c3b415b
+	  emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
c3b415b
+	  addr = scratch;
c3b415b
+	}
c3b415b
+
c3b415b
+      and_rtx = gen_rtx_SET (VOIDmode,
c3b415b
+			     scratch,
c3b415b
+			     gen_rtx_AND (Pmode,
c3b415b
+					  addr,
c3b415b
+					  and_op2));
c3b415b
+
c3b415b
+      cc_clobber = gen_rtx_CLOBBER (CCmode, gen_rtx_SCRATCH (CCmode));
c3b415b
       emit_insn (gen_rtx_PARALLEL (VOIDmode,
c3b415b
 				   gen_rtvec (2, and_rtx, cc_clobber)));
c3b415b
       addr = scratch;