8002493
From ca5514b85161d480fb711ac26d74fc447e1e9bda Mon Sep 17 00:00:00 2001
8002493
From: Rob Clark <robclark@freedesktop.org>
8002493
Date: Sat, 24 Aug 2013 13:00:07 -0400
8002493
Subject: [PATCH 12/17] freedreno/a3xx/compiler: bit of re-arrange/cleanup
8002493
8002493
It seems there are a number of cases where instructions have limitations
8002493
about taking reading src's from const register file, so make
8002493
get_unconst() a bit easier to use.
8002493
8002493
Signed-off-by: Rob Clark <robclark@freedesktop.org>
8002493
---
8002493
 src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 132 ++++++++++++----------
8002493
 1 file changed, 71 insertions(+), 61 deletions(-)
8002493
8002493
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
8002493
index e6c5bb7..b5cdda8 100644
8002493
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
8002493
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
8002493
@@ -91,6 +91,7 @@ struct fd3_compile_context {
8002493
 
8002493
 	unsigned next_inloc;
8002493
 	unsigned num_internal_temps;
8002493
+	struct tgsi_src_register internal_temps[6];
8002493
 
8002493
 	/* track registers which need to synchronize w/ "complex alu" cat3
8002493
 	 * instruction pipeline:
8002493
@@ -128,7 +129,7 @@ struct fd3_compile_context {
8002493
 	 * up the vector operation
8002493
 	 */
8002493
 	struct tgsi_dst_register tmp_dst;
8002493
-	struct tgsi_src_register tmp_src;
8002493
+	struct tgsi_src_register *tmp_src;
8002493
 };
8002493
 
8002493
 
8002493
@@ -309,11 +310,11 @@ src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
8002493
 /* Get internal-temp src/dst to use for a sequence of instructions
8002493
  * generated by a single TGSI op.
8002493
  */
8002493
-static void
8002493
+static struct tgsi_src_register *
8002493
 get_internal_temp(struct fd3_compile_context *ctx,
8002493
-		struct tgsi_dst_register *tmp_dst,
8002493
-		struct tgsi_src_register *tmp_src)
8002493
+		struct tgsi_dst_register *tmp_dst)
8002493
 {
8002493
+	struct tgsi_src_register *tmp_src;
8002493
 	int n;
8002493
 
8002493
 	tmp_dst->File      = TGSI_FILE_TEMPORARY;
8002493
@@ -323,23 +324,28 @@ get_internal_temp(struct fd3_compile_context *ctx,
8002493
 
8002493
 	/* assign next temporary: */
8002493
 	n = ctx->num_internal_temps++;
8002493
+	compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
8002493
+	tmp_src = &ctx->internal_temps[n];
8002493
 
8002493
 	tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
8002493
 
8002493
 	src_from_dst(tmp_src, tmp_dst);
8002493
+
8002493
+	return tmp_src;
8002493
 }
8002493
 
8002493
 /* same as get_internal_temp, but w/ src.xxxx (for instructions that
8002493
  * replicate their results)
8002493
  */
8002493
-static void
8002493
+static struct tgsi_src_register *
8002493
 get_internal_temp_repl(struct fd3_compile_context *ctx,
8002493
-		struct tgsi_dst_register *tmp_dst,
8002493
-		struct tgsi_src_register *tmp_src)
8002493
+		struct tgsi_dst_register *tmp_dst)
8002493
 {
8002493
-	get_internal_temp(ctx, tmp_dst, tmp_src);
8002493
+	struct tgsi_src_register *tmp_src =
8002493
+			get_internal_temp(ctx, tmp_dst);
8002493
 	tmp_src->SwizzleX = tmp_src->SwizzleY =
8002493
 		tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X;
8002493
+	return tmp_src;
8002493
 }
8002493
 
8002493
 static inline bool
8002493
@@ -349,6 +355,22 @@ is_const(struct tgsi_src_register *src)
8002493
 			(src->File == TGSI_FILE_IMMEDIATE);
8002493
 }
8002493
 
8002493
+/* for instructions that cannot take a const register as src, if needed
8002493
+ * generate a move to temporary gpr:
8002493
+ */
8002493
+static struct tgsi_src_register *
8002493
+get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src)
8002493
+{
8002493
+	if (is_const(src)) {
8002493
+		static struct tgsi_dst_register tmp_dst;
8002493
+		struct tgsi_src_register *tmp_src =
8002493
+				get_internal_temp(ctx, &tmp_dst);
8002493
+		create_mov(ctx, &tmp_dst, src);
8002493
+		src = tmp_src;
8002493
+	}
8002493
+	return src;
8002493
+}
8002493
+
8002493
 static void
8002493
 get_immediate(struct fd3_compile_context *ctx,
8002493
 		struct tgsi_src_register *reg, uint32_t val)
8002493
@@ -396,27 +418,16 @@ get_immediate(struct fd3_compile_context *ctx,
8002493
 	reg->SwizzleW  = swiz2tgsi[swiz];
8002493
 }
8002493
 
8002493
-/* for instructions that cannot take a const register as src, if needed
8002493
- * generate a move to temporary gpr:
8002493
- */
8002493
-static struct tgsi_src_register *
8002493
-get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src,
8002493
-		struct tgsi_src_register *tmp_src)
8002493
+static type_t
8002493
+get_ftype(struct fd3_compile_context *ctx)
8002493
 {
8002493
-	static struct tgsi_dst_register tmp_dst;
8002493
-	if ((src->File == TGSI_FILE_CONSTANT) ||
8002493
-			(src->File == TGSI_FILE_IMMEDIATE)) {
8002493
-		get_internal_temp(ctx, &tmp_dst, tmp_src);
8002493
-		create_mov(ctx, &tmp_dst, src);
8002493
-		src = tmp_src;
8002493
-	}
8002493
-	return src;
8002493
+	return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
8002493
 }
8002493
 
8002493
 static type_t
8002493
-get_type(struct fd3_compile_context *ctx)
8002493
+get_utype(struct fd3_compile_context *ctx)
8002493
 {
8002493
-	return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
8002493
+	return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
8002493
 }
8002493
 
8002493
 static unsigned
8002493
@@ -436,7 +447,7 @@ static void
8002493
 create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
8002493
 		struct tgsi_src_register *src)
8002493
 {
8002493
-	type_t type_mov = get_type(ctx);
8002493
+	type_t type_mov = get_ftype(ctx);
8002493
 	unsigned i;
8002493
 
8002493
 	for (i = 0; i < 4; i++) {
8002493
@@ -492,7 +503,7 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
8002493
 	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
8002493
 		struct tgsi_src_register *src = &inst->Src[i].Register;
8002493
 		if ((src->File == dst->File) && (src->Index == dst->Index)) {
8002493
-			get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src);
8002493
+			ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
8002493
 			ctx->tmp_dst.WriteMask = dst->WriteMask;
8002493
 			dst = &ctx->tmp_dst;
8002493
 			break;
8002493
@@ -507,7 +518,7 @@ put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst,
8002493
 {
8002493
 	/* if necessary, add mov back into original dst: */
8002493
 	if (dst != &inst->Dst[0].Register) {
8002493
-		create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src);
8002493
+		create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
8002493
 	}
8002493
 }
8002493
 
8002493
@@ -580,7 +591,7 @@ trans_dotp(const struct instr_translater *t,
8002493
 {
8002493
 	struct ir3_instruction *instr;
8002493
 	struct tgsi_dst_register tmp_dst;
8002493
-	struct tgsi_src_register tmp_src;
8002493
+	struct tgsi_src_register *tmp_src;
8002493
 	struct tgsi_dst_register *dst  = &inst->Dst[0].Register;
8002493
 	struct tgsi_src_register *src0 = &inst->Src[0].Register;
8002493
 	struct tgsi_src_register *src1 = &inst->Src[1].Register;
8002493
@@ -590,7 +601,7 @@ trans_dotp(const struct instr_translater *t,
8002493
 	unsigned n = t->arg;     /* number of components */
8002493
 	unsigned i;
8002493
 
8002493
-	get_internal_temp_repl(ctx, &tmp_dst, &tmp_src);
8002493
+	tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
8002493
 
8002493
 	/* Blob compiler never seems to use a const in src1 position for
8002493
 	 * mad.*, although there does seem (according to disassembler
8002493
@@ -609,7 +620,7 @@ trans_dotp(const struct instr_translater *t,
8002493
 		 * because after that point we no longer need tmp.x:
8002493
 		 */
8002493
 		create_mov(ctx, &tmp_dst, src1);
8002493
-		src1 = &tmp_src;
8002493
+		src1 = tmp_src;
8002493
 	}
8002493
 
8002493
 	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
8002493
@@ -624,7 +635,7 @@ trans_dotp(const struct instr_translater *t,
8002493
 		add_dst_reg(ctx, instr, &tmp_dst, 0);
8002493
 		add_src_reg(ctx, instr, src0, swiz0[i]);
8002493
 		add_src_reg(ctx, instr, src1, swiz1[i]);
8002493
-		add_src_reg(ctx, instr, &tmp_src, 0);
8002493
+		add_src_reg(ctx, instr, tmp_src, 0);
8002493
 	}
8002493
 
8002493
 	/* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */
8002493
@@ -634,7 +645,7 @@ trans_dotp(const struct instr_translater *t,
8002493
 		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
8002493
 		add_dst_reg(ctx, instr, &tmp_dst, 0);
8002493
 		add_src_reg(ctx, instr, src1, swiz1[i]);
8002493
-		add_src_reg(ctx, instr, &tmp_src, 0);
8002493
+		add_src_reg(ctx, instr, tmp_src, 0);
8002493
 
8002493
 		n++;
8002493
 	}
8002493
@@ -646,7 +657,7 @@ trans_dotp(const struct instr_translater *t,
8002493
 		ir3_instr_create(ctx->ir, 0, OPC_NOP);
8002493
 	}
8002493
 
8002493
-	create_mov(ctx, dst, &tmp_src);
8002493
+	create_mov(ctx, dst, tmp_src);
8002493
 }
8002493
 
8002493
 /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
8002493
@@ -657,11 +668,11 @@ trans_lrp(const struct instr_translater *t,
8002493
 {
8002493
 	struct ir3_instruction *instr;
8002493
 	struct tgsi_dst_register tmp_dst1, tmp_dst2;
8002493
-	struct tgsi_src_register tmp_src1, tmp_src2;
8002493
+	struct tgsi_src_register *tmp_src1, *tmp_src2;
8002493
 	struct tgsi_src_register tmp_const;
8002493
 
8002493
-	get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
8002493
-	get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
8002493
+	tmp_src1 = get_internal_temp(ctx, &tmp_dst1);
8002493
+	tmp_src2 = get_internal_temp(ctx, &tmp_dst2);
8002493
 
8002493
 	get_immediate(ctx, &tmp_const, fui(1.0));
8002493
 
8002493
@@ -680,14 +691,14 @@ trans_lrp(const struct instr_translater *t,
8002493
 	/* tmp2 = tmp2 * c */
8002493
 	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
8002493
 	vectorize(ctx, instr, &tmp_dst2, 2,
8002493
-			&tmp_src2, 0,
8002493
+			tmp_src2, 0,
8002493
 			&inst->Src[2].Register, 0);
8002493
 
8002493
 	/* dst = tmp1 + tmp2 */
8002493
 	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
8002493
 	vectorize(ctx, instr, &inst->Dst[0].Register, 2,
8002493
-			&tmp_src1, 0,
8002493
-			&tmp_src2, 0);
8002493
+			tmp_src1, 0,
8002493
+			tmp_src2, 0);
8002493
 }
8002493
 
8002493
 /* FRC(x) = x - FLOOR(x) */
8002493
@@ -698,9 +709,9 @@ trans_frac(const struct instr_translater *t,
8002493
 {
8002493
 	struct ir3_instruction *instr;
8002493
 	struct tgsi_dst_register tmp_dst;
8002493
-	struct tgsi_src_register tmp_src;
8002493
+	struct tgsi_src_register *tmp_src;
8002493
 
8002493
-	get_internal_temp(ctx, &tmp_dst, &tmp_src);
8002493
+	tmp_src = get_internal_temp(ctx, &tmp_dst);
8002493
 
8002493
 	/* tmp = FLOOR(x) */
8002493
 	instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F);
8002493
@@ -711,7 +722,7 @@ trans_frac(const struct instr_translater *t,
8002493
 	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
8002493
 	vectorize(ctx, instr, &inst->Dst[0].Register, 2,
8002493
 			&inst->Src[0].Register, 0,
8002493
-			&tmp_src, IR3_REG_NEGATE);
8002493
+			tmp_src, IR3_REG_NEGATE);
8002493
 }
8002493
 
8002493
 /* POW(a,b) = EXP2(b * LOG2(a)) */
8002493
@@ -723,12 +734,12 @@ trans_pow(const struct instr_translater *t,
8002493
 	struct ir3_instruction *instr;
8002493
 	struct ir3_register *r;
8002493
 	struct tgsi_dst_register tmp_dst;
8002493
-	struct tgsi_src_register tmp_src;
8002493
+	struct tgsi_src_register *tmp_src;
8002493
 	struct tgsi_dst_register *dst  = &inst->Dst[0].Register;
8002493
 	struct tgsi_src_register *src0 = &inst->Src[0].Register;
8002493
 	struct tgsi_src_register *src1 = &inst->Src[1].Register;
8002493
 
8002493
-	get_internal_temp_repl(ctx, &tmp_dst, &tmp_src);
8002493
+	tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
8002493
 
8002493
 	/* log2 Rtmp, Rsrc0 */
8002493
 	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
8002493
@@ -740,7 +751,7 @@ trans_pow(const struct instr_translater *t,
8002493
 	/* mul.f Rtmp, Rtmp, Rsrc1 */
8002493
 	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
8002493
 	add_dst_reg(ctx, instr, &tmp_dst, 0);
8002493
-	add_src_reg(ctx, instr, &tmp_src, 0);
8002493
+	add_src_reg(ctx, instr, tmp_src, 0);
8002493
 	add_src_reg(ctx, instr, src1, src1->SwizzleX);
8002493
 
8002493
 	/* blob compiler seems to ensure there are at least 6 instructions
8002493
@@ -752,10 +763,10 @@ trans_pow(const struct instr_translater *t,
8002493
 	/* exp2 Rdst, Rtmp */
8002493
 	instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
8002493
 	r = add_dst_reg(ctx, instr, &tmp_dst, 0);
8002493
-	add_src_reg(ctx, instr, &tmp_src, 0);
8002493
+	add_src_reg(ctx, instr, tmp_src, 0);
8002493
 	regmask_set(ctx->needs_ss, r);
8002493
 
8002493
-	create_mov(ctx, dst, &tmp_src);
8002493
+	create_mov(ctx, dst, tmp_src);
8002493
 }
8002493
 
8002493
 /* texture fetch/sample instructions: */
8002493
@@ -766,8 +777,6 @@ trans_samp(const struct instr_translater *t,
8002493
 {
8002493
 	struct ir3_register *r;
8002493
 	struct ir3_instruction *instr;
8002493
-	struct tgsi_dst_register tmp_dst;
8002493
-	struct tgsi_src_register tmp_src;
8002493
 	struct tgsi_src_register *coord = &inst->Src[0].Register;
8002493
 	struct tgsi_src_register *samp  = &inst->Src[1].Register;
8002493
 	unsigned tex = inst->Texture.Texture;
8002493
@@ -802,10 +811,13 @@ trans_samp(const struct instr_translater *t,
8002493
 	 */
8002493
 	for (i = 1; (i < 4) && (order[i] >= 0); i++) {
8002493
 		if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) {
8002493
-			type_t type_mov = get_type(ctx);
8002493
+			struct tgsi_dst_register tmp_dst;
8002493
+			struct tgsi_src_register *tmp_src;
8002493
+
8002493
+			type_t type_mov = get_ftype(ctx);
8002493
 
8002493
 			/* need to move things around: */
8002493
-			get_internal_temp(ctx, &tmp_dst, &tmp_src);
8002493
+			tmp_src = get_internal_temp(ctx, &tmp_dst);
8002493
 
8002493
 			for (j = 0; (j < 4) && (order[j] >= 0); j++) {
8002493
 				instr = ir3_instr_create(ctx->ir, 1, 0);
8002493
@@ -816,7 +828,7 @@ trans_samp(const struct instr_translater *t,
8002493
 						src_swiz(coord, order[j]));
8002493
 			}
8002493
 
8002493
-			coord = &tmp_src;
8002493
+			coord = tmp_src;
8002493
 
8002493
 			if (j < 4)
8002493
 				ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1;
8002493
@@ -826,7 +838,7 @@ trans_samp(const struct instr_translater *t,
8002493
 	}
8002493
 
8002493
 	instr = ir3_instr_create(ctx->ir, 5, t->opc);
8002493
-	instr->cat5.type = get_type(ctx);
8002493
+	instr->cat5.type = get_ftype(ctx);
8002493
 	instr->cat5.samp = samp->Index;
8002493
 	instr->cat5.tex  = samp->Index;
8002493
 	instr->flags |= flags;
8002493
@@ -847,12 +859,12 @@ trans_cmp(const struct instr_translater *t,
8002493
 {
8002493
 	struct ir3_instruction *instr;
8002493
 	struct tgsi_dst_register tmp_dst;
8002493
-	struct tgsi_src_register tmp_src;
8002493
+	struct tgsi_src_register *tmp_src;
8002493
 	struct tgsi_src_register constval;
8002493
 	/* final instruction uses original src1 and src2, so we need get_dst() */
8002493
 	struct tgsi_dst_register *dst = get_dst(ctx, inst);
8002493
 
8002493
-	get_internal_temp(ctx, &tmp_dst, &tmp_src);
8002493
+	tmp_src = get_internal_temp(ctx, &tmp_dst);
8002493
 
8002493
 	/* cmps.f.ge tmp, src0, 0.0 */
8002493
 	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
8002493
@@ -866,7 +878,7 @@ trans_cmp(const struct instr_translater *t,
8002493
 	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
8002493
 	instr->repeat = 3;
8002493
 	add_dst_reg(ctx, instr, &tmp_dst, 0);
8002493
-	add_src_reg(ctx, instr, &tmp_src, 0)->flags |= IR3_REG_R;
8002493
+	add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
8002493
 	ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
8002493
 
8002493
 	/* sel.{f32,f16} dst, src2, tmp, src1 */
8002493
@@ -874,7 +886,7 @@ trans_cmp(const struct instr_translater *t,
8002493
 			OPC_SEL_F16 : OPC_SEL_F32);
8002493
 	vectorize(ctx, instr, dst, 3,
8002493
 			&inst->Src[2].Register, 0,
8002493
-			&tmp_src, 0,
8002493
+			tmp_src, 0,
8002493
 			&inst->Src[1].Register, 0);
8002493
 
8002493
 	put_dst(ctx, inst, dst);
8002493
@@ -1066,7 +1078,6 @@ instr_cat3(const struct instr_translater *t,
8002493
 {
8002493
 	struct tgsi_dst_register *dst = get_dst(ctx, inst);
8002493
 	struct tgsi_src_register *src1;
8002493
-	struct tgsi_src_register tmp_src;
8002493
 	struct ir3_instruction *instr;
8002493
 
8002493
 	/* Blob compiler never seems to use a const in src1 position..
8002493
@@ -1075,7 +1086,7 @@ instr_cat3(const struct instr_translater *t,
8002493
 	 * const.  Not sure if this is a hw bug, or simply that the
8002493
 	 * disassembler lies.
8002493
 	 */
8002493
-	src1 = get_unconst(ctx, &inst->Src[1].Register, &tmp_src);
8002493
+	src1 = get_unconst(ctx, &inst->Src[1].Register);
8002493
 
8002493
 	instr = ir3_instr_create(ctx->ir, 3,
8002493
 			ctx->so->half_precision ? t->hopc : t->opc);
8002493
@@ -1093,11 +1104,10 @@ instr_cat4(const struct instr_translater *t,
8002493
 {
8002493
 	struct tgsi_dst_register *dst = get_dst(ctx, inst);
8002493
 	struct tgsi_src_register *src;
8002493
-	struct tgsi_src_register tmp_src;
8002493
 	struct ir3_instruction *instr;
8002493
 
8002493
 	/* seems like blob compiler avoids const as src.. */
8002493
-	src = get_unconst(ctx, &inst->Src[0].Register, &tmp_src);
8002493
+	src = get_unconst(ctx, &inst->Src[0].Register);
8002493
 
8002493
 	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
8002493
 	instr = ir3_instr_create(ctx->ir, 4, t->opc);
8002493
-- 
8002493
1.8.4.2
8002493