Jeremy Cline dce4dd4
From 90de578c81e983b3d992ca3e1a7e5910c803abba Mon Sep 17 00:00:00 2001
Jeremy Cline dce4dd4
From: Chris Wilson <chris@chris-wilson.co.uk>
Jeremy Cline dce4dd4
Date: Mon, 30 Dec 2019 11:15:30 +0000
Jeremy Cline dce4dd4
Subject: [PATCH] drm/i915/gt: Detect if we miss WaIdleLiteRestore
Jeremy Cline dce4dd4
Jeremy Cline dce4dd4
In order to avoid confusing the HW, we must never submit an empty ring
Jeremy Cline dce4dd4
during lite-restore, that is we should always advance the RING_TAIL
Jeremy Cline dce4dd4
before submitting to stay ahead of the RING_HEAD.
Jeremy Cline dce4dd4
Jeremy Cline dce4dd4
Normally this is prevented by keeping a couple of spare NOPs in the
Jeremy Cline dce4dd4
request->wa_tail so that on resubmission we can advance the tail. This
Jeremy Cline dce4dd4
relies on the request only being resubmitted once, which is the normal
Jeremy Cline dce4dd4
condition as it is seen once for ELSP[1] and then later in ELSP[0]. On
Jeremy Cline dce4dd4
preemption, the requests are unwound and the tail reset back to the
Jeremy Cline dce4dd4
normal end point (as we know the request is incomplete and therefore its
Jeremy Cline dce4dd4
RING_HEAD is even earlier).
Jeremy Cline dce4dd4
Jeremy Cline dce4dd4
However, if this w/a should fail we would try and resubmit the request
Jeremy Cline dce4dd4
with the RING_TAIL already set to the location of this request's wa_tail
Jeremy Cline dce4dd4
potentially causing a GPU hang. We can spot when we do try and
Jeremy Cline dce4dd4
incorrectly resubmit without advancing the RING_TAIL and spare any
Jeremy Cline dce4dd4
embarrassment by forcing the context restore.
Jeremy Cline dce4dd4
Jeremy Cline dce4dd4
In the case of preempt-to-busy, we leave the requests running on the HW
Jeremy Cline dce4dd4
while we unwind. As the ring is still live, we cannot rewind our
Jeremy Cline dce4dd4
rq->tail without forcing a reload so leave it set to rq->wa_tail and
Jeremy Cline dce4dd4
only force a reload if we resubmit after a lite-restore. (Normally, the
Jeremy Cline dce4dd4
forced reload will be a part of the preemption event.)
Jeremy Cline dce4dd4
Jeremy Cline dce4dd4
Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy")
Jeremy Cline dce4dd4
Closes: https://gitlab.freedesktop.org/drm/intel/issues/673
Jeremy Cline dce4dd4
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Jeremy Cline dce4dd4
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Jeremy Cline dce4dd4
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Jeremy Cline dce4dd4
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Jeremy Cline dce4dd4
Cc: stable@vger.kernel.org
Jeremy Cline dce4dd4
Link: https://patchwork.freedesktop.org/patch/msgid/20191209023215.3519970-1-chris@chris-wilson.co.uk
Jeremy Cline dce4dd4
(cherry picked from commit 82c69bf58650e644c61aa2bf5100b63a1070fd2f)
Jeremy Cline dce4dd4
---
Jeremy Cline dce4dd4
 drivers/gpu/drm/i915/gt/intel_lrc.c | 42 ++++++++++++++---------------
Jeremy Cline dce4dd4
 1 file changed, 20 insertions(+), 22 deletions(-)
Jeremy Cline dce4dd4
Jeremy Cline dce4dd4
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
Jeremy Cline dce4dd4
index d564bfcab6a3..49ce15553e7b 100644
Jeremy Cline dce4dd4
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
Jeremy Cline dce4dd4
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
Jeremy Cline dce4dd4
@@ -471,12 +471,6 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
Jeremy Cline dce4dd4
 	return desc;
Jeremy Cline dce4dd4
 }
Jeremy Cline dce4dd4
 
Jeremy Cline dce4dd4
-static void unwind_wa_tail(struct i915_request *rq)
Jeremy Cline dce4dd4
-{
Jeremy Cline dce4dd4
-	rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
Jeremy Cline dce4dd4
-	assert_ring_tail_valid(rq->ring, rq->tail);
Jeremy Cline dce4dd4
-}
Jeremy Cline dce4dd4
-
Jeremy Cline dce4dd4
 static struct i915_request *
Jeremy Cline dce4dd4
 __unwind_incomplete_requests(struct intel_engine_cs *engine)
Jeremy Cline dce4dd4
 {
Jeremy Cline dce4dd4
@@ -495,7 +489,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
Jeremy Cline dce4dd4
 			continue; /* XXX */
Jeremy Cline dce4dd4
 
Jeremy Cline dce4dd4
 		__i915_request_unsubmit(rq);
Jeremy Cline dce4dd4
-		unwind_wa_tail(rq);
Jeremy Cline dce4dd4
 
Jeremy Cline dce4dd4
 		/*
Jeremy Cline dce4dd4
 		 * Push the request back into the queue for later resubmission.
Jeremy Cline dce4dd4
@@ -650,13 +643,29 @@ execlists_schedule_out(struct i915_request *rq)
Jeremy Cline dce4dd4
 	i915_request_put(rq);
Jeremy Cline dce4dd4
 }
Jeremy Cline dce4dd4
 
Jeremy Cline dce4dd4
-static u64 execlists_update_context(const struct i915_request *rq)
Jeremy Cline dce4dd4
+static u64 execlists_update_context(struct i915_request *rq)
Jeremy Cline dce4dd4
 {
Jeremy Cline dce4dd4
 	struct intel_context *ce = rq->hw_context;
Jeremy Cline dce4dd4
-	u64 desc;
Jeremy Cline dce4dd4
+	u64 desc = ce->lrc_desc;
Jeremy Cline dce4dd4
+	u32 tail;
Jeremy Cline dce4dd4
 
Jeremy Cline dce4dd4
-	ce->lrc_reg_state[CTX_RING_TAIL + 1] =
Jeremy Cline dce4dd4
-		intel_ring_set_tail(rq->ring, rq->tail);
Jeremy Cline dce4dd4
+	/*
Jeremy Cline dce4dd4
+	 * WaIdleLiteRestore:bdw,skl
Jeremy Cline dce4dd4
+	 *
Jeremy Cline dce4dd4
+	 * We should never submit the context with the same RING_TAIL twice
Jeremy Cline dce4dd4
+	 * just in case we submit an empty ring, which confuses the HW.
Jeremy Cline dce4dd4
+	 *
Jeremy Cline dce4dd4
+	 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
Jeremy Cline dce4dd4
+	 * the normal request to be able to always advance the RING_TAIL on
Jeremy Cline dce4dd4
+	 * subsequent resubmissions (for lite restore). Should that fail us,
Jeremy Cline dce4dd4
+	 * and we try and submit the same tail again, force the context
Jeremy Cline dce4dd4
+	 * reload.
Jeremy Cline dce4dd4
+	 */
Jeremy Cline dce4dd4
+	tail = intel_ring_set_tail(rq->ring, rq->tail);
Jeremy Cline dce4dd4
+	if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL + 1] == tail))
Jeremy Cline dce4dd4
+		desc |= CTX_DESC_FORCE_RESTORE;
Jeremy Cline dce4dd4
+	ce->lrc_reg_state[CTX_RING_TAIL + 1] = tail;
Jeremy Cline dce4dd4
+	rq->tail = rq->wa_tail;
Jeremy Cline dce4dd4
 
Jeremy Cline dce4dd4
 	/*
Jeremy Cline dce4dd4
 	 * Make sure the context image is complete before we submit it to HW.
Jeremy Cline dce4dd4
@@ -675,7 +684,6 @@ static u64 execlists_update_context(const struct i915_request *rq)
Jeremy Cline dce4dd4
 	 */
Jeremy Cline dce4dd4
 	mb();
Jeremy Cline dce4dd4
 
Jeremy Cline dce4dd4
-	desc = ce->lrc_desc;
Jeremy Cline dce4dd4
 	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
Jeremy Cline dce4dd4
 
Jeremy Cline dce4dd4
 	return desc;
Jeremy Cline dce4dd4
@@ -1150,16 +1158,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
Jeremy Cline dce4dd4
 			if (!list_is_last(&last->sched.link,
Jeremy Cline dce4dd4
 					  &engine->active.requests))
Jeremy Cline dce4dd4
 				return;
Jeremy Cline dce4dd4
-
Jeremy Cline dce4dd4
-			/*
Jeremy Cline dce4dd4
-			 * WaIdleLiteRestore:bdw,skl
Jeremy Cline dce4dd4
-			 * Apply the wa NOOPs to prevent
Jeremy Cline dce4dd4
-			 * ring:HEAD == rq:TAIL as we resubmit the
Jeremy Cline dce4dd4
-			 * request. See gen8_emit_fini_breadcrumb() for
Jeremy Cline dce4dd4
-			 * where we prepare the padding after the
Jeremy Cline dce4dd4
-			 * end of the request.
Jeremy Cline dce4dd4
-			 */
Jeremy Cline dce4dd4
-			last->tail = last->wa_tail;
Jeremy Cline dce4dd4
 		}
Jeremy Cline dce4dd4
 	}
Jeremy Cline dce4dd4
 
Jeremy Cline dce4dd4
-- 
Jeremy Cline dce4dd4
2.24.1
Jeremy Cline dce4dd4