15a2072
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
15a2072
From: Peter Jones <pjones@redhat.com>
15a2072
Date: Tue, 7 Nov 2017 17:12:17 -0500
15a2072
Subject: [PATCH] Make pmtimer tsc calibration not take 51 seconds to fail.
15a2072
15a2072
On my laptop running at 2.4GHz, if I run a VM where tsc calibration
15a2072
using pmtimer will fail presuming a broken pmtimer, it takes ~51 seconds
15a2072
to do so (as measured with the stopwatch on my phone), with a tsc delta
15a2072
of 0x1cd1c85300, or around 125 billion cycles.
15a2072
15a2072
If instead of trying to wait for 5-200ms to show up on the pmtimer, we try
15a2072
to wait for 5-200us, it decides it's broken in ~0x2626aa0 TSCs, aka ~2.4
15a2072
million cycles, or more or less instantly.
15a2072
15a2072
Additionally, this reading the pmtimer was returning 0xffffffff anyway,
15a2072
and that's obviously an invalid return.  I've added a check for that and
15a2072
0 so we don't bother waiting for the test if what we're seeing is dead
15a2072
pins with no response at all.
15a2072
15a2072
If "debug" is includes "pmtimer", you will see one of the following
15a2072
three outcomes.  If pmtimer gives all 0 or all 1 bits, you will see:
15a2072
15a2072
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 1
15a2072
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 2
15a2072
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 3
15a2072
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 4
15a2072
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 5
15a2072
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 6
15a2072
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 7
15a2072
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 8
15a2072
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 9
15a2072
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 10
15a2072
kern/i386/tsc_pmtimer.c:78: timer is broken; giving up.
15a2072
15a2072
This outcome was tested using qemu+kvm with UEFI (OVMF) firmware and
15a2072
these options: -machine pc-q35-2.10 -cpu Broadwell-noTSX
15a2072
15a2072
If pmtimer gives any other bit patterns but is not actually marching
15a2072
forward fast enough to use for clock calibration, you will see:
15a2072
15a2072
kern/i386/tsc_pmtimer.c:121: pmtimer delta is 0x0 (1904 iterations)
15a2072
kern/i386/tsc_pmtimer.c:124: tsc delta is implausible: 0x2626aa0
15a2072
15a2072
This outcome was tested using grub compiled with GRUB_PMTIMER_IGNORE_BAD_READS
15a2072
defined (so as not to trip the bad read test) using qemu+kvm with UEFI
15a2072
(OVMF) firmware, and these options: -machine pc-q35-2.10 -cpu Broadwell-noTSX
15a2072
15a2072
If pmtimer actually works, you'll see something like:
15a2072
15a2072
kern/i386/tsc_pmtimer.c:121: pmtimer delta is 0x0 (1904 iterations)
15a2072
kern/i386/tsc_pmtimer.c:124: tsc delta is implausible: 0x2626aa0
15a2072
15a2072
This outcome was tested using qemu+kvm with UEFI (OVMF) firmware, and
15a2072
these options: -machine pc-i440fx-2.4 -cpu Broadwell-noTSX
15a2072
15a2072
I've also tested this outcome on a real Intel Xeon E3-1275v3 on an Intel
15a2072
Server Board S1200V3RPS using the SDV.RP.B8 "Release" build here:
15a2072
https://firmware.intel.com/sites/default/files/UEFIDevKit_S1200RP_vB8.zip
15a2072
15a2072
Signed-off-by: Peter Jones <pjones@redhat.com>
15a2072
---
15a2072
 grub-core/kern/i386/tsc_pmtimer.c | 109 +++++++++++++++++++++++++++++++-------
15a2072
 1 file changed, 89 insertions(+), 20 deletions(-)
15a2072
15a2072
diff --git a/grub-core/kern/i386/tsc_pmtimer.c b/grub-core/kern/i386/tsc_pmtimer.c
15a2072
index c9c36169978..ca15c3aacd7 100644
15a2072
--- a/grub-core/kern/i386/tsc_pmtimer.c
15a2072
+++ b/grub-core/kern/i386/tsc_pmtimer.c
15a2072
@@ -28,40 +28,101 @@
15a2072
 #include <grub/acpi.h>
15a2072
 #include <grub/cpu/io.h>
15a2072
 
15a2072
+/*
15a2072
+ * Define GRUB_PMTIMER_IGNORE_BAD_READS if you're trying to test a timer that's
15a2072
+ * present but doesn't keep time well.
15a2072
+ */
15a2072
+// #define GRUB_PMTIMER_IGNORE_BAD_READS
15a2072
+
15a2072
 grub_uint64_t
15a2072
 grub_pmtimer_wait_count_tsc (grub_port_t pmtimer,
15a2072
 			     grub_uint16_t num_pm_ticks)
15a2072
 {
15a2072
   grub_uint32_t start;
15a2072
-  grub_uint32_t last;
15a2072
-  grub_uint32_t cur, end;
15a2072
+  grub_uint64_t cur, end;
15a2072
   grub_uint64_t start_tsc;
15a2072
   grub_uint64_t end_tsc;
15a2072
-  int num_iter = 0;
15a2072
+  unsigned int num_iter = 0;
15a2072
+#ifndef GRUB_PMTIMER_IGNORE_BAD_READS
15a2072
+  int bad_reads = 0;
15a2072
+#endif
15a2072
 
15a2072
-  start = grub_inl (pmtimer) & 0xffffff;
15a2072
-  last = start;
15a2072
+  /*
15a2072
+   * Some timers are 24-bit and some are 32-bit, but it doesn't make much
15a2072
+   * difference to us.  Caring which one we have isn't really worth it since
15a2072
+   * the low-order digits will give us enough data to calibrate TSC.  So just
15a2072
+   * mask the top-order byte off.
15a2072
+   */
15a2072
+  cur = start = grub_inl (pmtimer) & 0xffffffUL;
15a2072
   end = start + num_pm_ticks;
15a2072
   start_tsc = grub_get_tsc ();
15a2072
   while (1)
15a2072
     {
15a2072
-      cur = grub_inl (pmtimer) & 0xffffff;
15a2072
-      if (cur < last)
15a2072
-	cur |= 0x1000000;
15a2072
-      num_iter++;
15a2072
+      cur &= 0xffffffffff000000ULL;
15a2072
+      cur |= grub_inl (pmtimer) & 0xffffffUL;
15a2072
+
15a2072
+      end_tsc = grub_get_tsc();
15a2072
+
15a2072
+#ifndef GRUB_PMTIMER_IGNORE_BAD_READS
15a2072
+      /*
15a2072
+       * If we get 10 reads in a row that are obviously dead pins, there's no
15a2072
+       * reason to do this thousands of times.
15a2072
+       */
15a2072
+      if (cur == 0xffffffUL || cur == 0)
15a2072
+	{
15a2072
+	  bad_reads++;
15a2072
+	  grub_dprintf ("pmtimer",
15a2072
+			"pmtimer: 0x%"PRIxGRUB_UINT64_T" bad_reads: %d\n",
15a2072
+			cur, bad_reads);
15a2072
+	  grub_dprintf ("pmtimer", "timer is broken; giving up.\n");
15a2072
+
15a2072
+	  if (bad_reads == 10)
15a2072
+	    return 0;
15a2072
+	}
15a2072
+#endif
15a2072
+
15a2072
+      if (cur < start)
15a2072
+	cur += 0x1000000;
15a2072
+
15a2072
       if (cur >= end)
15a2072
 	{
15a2072
-	  end_tsc = grub_get_tsc ();
15a2072
+	  grub_dprintf ("pmtimer", "pmtimer delta is 0x%"PRIxGRUB_UINT64_T"\n",
15a2072
+			cur - start);
15a2072
+	  grub_dprintf ("pmtimer", "tsc delta is 0x%"PRIxGRUB_UINT64_T"\n",
15a2072
+			end_tsc - start_tsc);
15a2072
 	  return end_tsc - start_tsc;
15a2072
 	}
15a2072
-      /* Check for broken PM timer.
15a2072
-	 50000000 TSCs is between 5 ms (10GHz) and 200 ms (250 MHz)
15a2072
-	 if after this time we still don't have 1 ms on pmtimer, then
15a2072
-	 pmtimer is broken.
15a2072
+
15a2072
+      /*
15a2072
+       * Check for broken PM timer.  1ms at 10GHz should be 1E+7 TSCs; at
15a2072
+       * 250MHz it should be 2.5E6.  So if after 4E+7 TSCs on a 10GHz machine,
15a2072
+       * we should have seen pmtimer show 4ms of change (i.e. cur =~
15a2072
+       * start+14320); on a 250MHz machine that should be 16ms (start+57280).
15a2072
+       * If after this a time we still don't have 1ms on pmtimer, then pmtimer
15a2072
+       * is broken.
15a2072
+       *
15a2072
+       * Likewise, if our code is perfectly efficient and introduces no delays
15a2072
+       * whatsoever, on a 10GHz system we should see a TSC delta of 3580 in
15a2072
+       * ~3580 iterations.  On a 250MHz machine that should be ~900 iterations.
15a2072
+       *
15a2072
+       * With those factors in mind, there are two limits here.  There's a hard
15a2072
+       * limit here at 8x our desired pm timer delta, picked as an arbitrarily
15a2072
+       * large value that's still not a lot of time to humans, because if we
15a2072
+       * get that far this is either an implausibly fast machine or the pmtimer
15a2072
+       * is not running.  And there's another limit on 4x our 10GHz tsc delta
15a2072
+       * without seeing cur converge on our target value.
15a2072
        */
15a2072
-      if ((num_iter & 0xffffff) == 0 && grub_get_tsc () - start_tsc > 5000000) {
15a2072
-	return 0;
15a2072
-      }
15a2072
+      if ((++num_iter > (grub_uint32_t)num_pm_ticks << 3UL) ||
15a2072
+	  end_tsc - start_tsc > 40000000)
15a2072
+	{
15a2072
+	  grub_dprintf ("pmtimer",
15a2072
+			"pmtimer delta is 0x%"PRIxGRUB_UINT64_T" (%u iterations)\n",
15a2072
+			cur - start, num_iter);
15a2072
+	  grub_dprintf ("pmtimer",
15a2072
+			"tsc delta is implausible: 0x%"PRIxGRUB_UINT64_T"\n",
15a2072
+			end_tsc - start_tsc);
15a2072
+	  return 0;
15a2072
+	}
15a2072
     }
15a2072
 }
15a2072
 
15a2072
@@ -74,12 +135,20 @@ grub_tsc_calibrate_from_pmtimer (void)
15a2072
 
15a2072
   fadt = grub_acpi_find_fadt ();
15a2072
   if (!fadt)
15a2072
-    return 0;
15a2072
+    {
15a2072
+      grub_dprintf ("pmtimer", "No FADT found; not using pmtimer.\n");
15a2072
+      return 0;
15a2072
+    }
15a2072
   pmtimer = fadt->pmtimer;
15a2072
   if (!pmtimer)
15a2072
-    return 0;
15a2072
+    {
15a2072
+      grub_dprintf ("pmtimer", "FADT does not specify pmtimer; skipping.\n");
15a2072
+      return 0;
15a2072
+    }
15a2072
 
15a2072
-  /* It's 3.579545 MHz clock. Wait 1 ms.  */
15a2072
+  /*
15a2072
+   * It's 3.579545 MHz clock. Wait 1 ms.
15a2072
+   */
15a2072
   tsc_diff = grub_pmtimer_wait_count_tsc (pmtimer, 3580);
15a2072
   if (tsc_diff == 0)
15a2072
     return 0;