Blob Blame History Raw
From 8fc4ce1c981967fccd5366ace6add6d14cfcde89 Mon Sep 17 00:00:00 2001
From: Siddhesh Poyarekar <siddhesh@gotplt.org>
Date: Mon, 25 Feb 2019 14:40:39 +0000
Subject: [PATCH 63/72] aarch64: Use the xzr register whenever possible

Using the xzr register for store inputs and the second operand of
arithmetic operations frees up a register for use elsewhere.
---
 src/lj_asm_arm64.h | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index c214e10..a826687 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -1007,10 +1007,30 @@ static void asm_xload(ASMState *as, IRIns *ir)
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
 }
 
+static int maybe_zero_val(ASMState *as, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+
+  switch(ir->o) {
+  case IR_KNULL:
+    return 1;
+  case IR_KINT:
+    return 0 == ir->i;
+  case IR_KINT64:
+    return 0 == ir_kint64(ir)->u64;
+  }
+
+  return 0;
+}
+
 static void asm_xstore(ASMState *as, IRIns *ir)
 {
   if (ir->r != RID_SINK) {
-    Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+    Reg src;
+    if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2))
+      src = RID_ZERO;
+    else
+      src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
     asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
 		 rset_exclude(RSET_GPR, src));
   }
@@ -1198,7 +1218,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
   /* Initialize immutable cdata object. */
   if (ir->o == IR_CNEWI) {
     int32_t ofs = sizeof(GCcdata);
-    Reg r = ra_alloc1(as, ir->op2, allow);
+    Reg r;
+    if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2))
+      r = RID_ZERO;
+    else
+      r = ra_alloc1(as, ir->op2, allow);
+
     lua_assert(sz == 4 || sz == 8);
     emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
   } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
@@ -1214,7 +1239,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
 
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   {
-    Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
+    Reg r = id == 0 ? RID_ZERO : (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
     emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
     emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
     emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP);
-- 
2.20.1