3c4dd19
From 0b158c30810db86f549cabd6e6021ab15962370d Mon Sep 17 00:00:00 2001
da63b36
From: Peter Jones <pjones@redhat.com>
da63b36
Date: Tue, 7 Nov 2017 17:12:17 -0500
3c4dd19
Subject: [PATCH 197/229] Make pmtimer tsc calibration not take 51 seconds to
da63b36
 fail.
da63b36
da63b36
On my laptop running at 2.4GHz, if I run a VM where tsc calibration
da63b36
using pmtimer will fail presuming a broken pmtimer, it takes ~51 seconds
da63b36
to do so (as measured with the stopwatch on my phone), with a tsc delta
da63b36
of 0x1cd1c85300, or around 125 billion cycles.
da63b36
da63b36
If instead of trying to wait for 5-200ms to show up on the pmtimer, we try
81987f4
to wait for 5-200us, it decides it's broken in ~0x2626aa0 TSCs, aka ~2.4
da63b36
million cycles, or more or less instantly.
da63b36
da63b36
Additionally, this reading the pmtimer was returning 0xffffffff anyway,
da63b36
and that's obviously an invalid return.  I've added a check for that and
da63b36
0 so we don't bother waiting for the test if what we're seeing is dead
da63b36
pins with no response at all.
da63b36
81987f4
If "debug" is includes "pmtimer", you will see one of the following
81987f4
three outcomes.  If pmtimer gives all 0 or all 1 bits, you will see:
81987f4
81987f4
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 1
81987f4
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 2
81987f4
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 3
81987f4
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 4
81987f4
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 5
81987f4
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 6
81987f4
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 7
81987f4
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 8
81987f4
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 9
81987f4
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 10
81987f4
kern/i386/tsc_pmtimer.c:78: timer is broken; giving up.
81987f4
81987f4
This outcome was tested using qemu+kvm with UEFI (OVMF) firmware and
81987f4
these options: -machine pc-q35-2.10 -cpu Broadwell-noTSX
81987f4
81987f4
If pmtimer gives any other bit patterns but is not actually marching
81987f4
forward fast enough to use for clock calibration, you will see:
81987f4
81987f4
kern/i386/tsc_pmtimer.c:121: pmtimer delta is 0x0 (1904 iterations)
81987f4
kern/i386/tsc_pmtimer.c:124: tsc delta is implausible: 0x2626aa0
81987f4
81987f4
This outcome was tested using grub compiled with GRUB_PMTIMER_IGNORE_BAD_READS
81987f4
defined (so as not to trip the bad read test) using qemu+kvm with UEFI
81987f4
(OVMF) firmware, and these options: -machine pc-q35-2.10 -cpu Broadwell-noTSX
81987f4
81987f4
If pmtimer actually works, you'll see something like:
81987f4
81987f4
kern/i386/tsc_pmtimer.c:121: pmtimer delta is 0x0 (1904 iterations)
81987f4
kern/i386/tsc_pmtimer.c:124: tsc delta is implausible: 0x2626aa0
81987f4
81987f4
This outcome was tested using qemu+kvm with UEFI (OVMF) firmware, and
81987f4
these options: -machine pc-i440fx-2.4 -cpu Broadwell-noTSX
81987f4
81987f4
I've also tested this outcome on a real Intel Xeon E3-1275v3 on an Intel
81987f4
Server Board S1200V3RPS using the SDV.RP.B8 "Release" build here:
81987f4
https://firmware.intel.com/sites/default/files/UEFIDevKit_S1200RP_vB8.zip
81987f4
da63b36
Signed-off-by: Peter Jones <pjones@redhat.com>
da63b36
---
81987f4
 grub-core/kern/i386/tsc_pmtimer.c | 109 +++++++++++++++++++++++++++++++-------
81987f4
 1 file changed, 89 insertions(+), 20 deletions(-)
da63b36
da63b36
diff --git a/grub-core/kern/i386/tsc_pmtimer.c b/grub-core/kern/i386/tsc_pmtimer.c
81987f4
index c9c36169978..ca15c3aacd7 100644
da63b36
--- a/grub-core/kern/i386/tsc_pmtimer.c
da63b36
+++ b/grub-core/kern/i386/tsc_pmtimer.c
81987f4
@@ -28,40 +28,101 @@
81987f4
 #include <grub/acpi.h>
81987f4
 #include <grub/cpu/io.h>
81987f4
 
81987f4
+/*
81987f4
+ * Define GRUB_PMTIMER_IGNORE_BAD_READS if you're trying to test a timer that's
81987f4
+ * present but doesn't keep time well.
81987f4
+ */
81987f4
+// #define GRUB_PMTIMER_IGNORE_BAD_READS
81987f4
+
81987f4
 grub_uint64_t
81987f4
 grub_pmtimer_wait_count_tsc (grub_port_t pmtimer,
81987f4
 			     grub_uint16_t num_pm_ticks)
81987f4
 {
81987f4
   grub_uint32_t start;
81987f4
-  grub_uint32_t last;
81987f4
-  grub_uint32_t cur, end;
81987f4
+  grub_uint64_t cur, end;
da63b36
   grub_uint64_t start_tsc;
da63b36
   grub_uint64_t end_tsc;
81987f4
-  int num_iter = 0;
81987f4
+  unsigned int num_iter = 0;
81987f4
+#ifndef GRUB_PMTIMER_IGNORE_BAD_READS
da63b36
+  int bad_reads = 0;
81987f4
+#endif
da63b36
 
da63b36
-  start = grub_inl (pmtimer) & 0xffffff;
81987f4
-  last = start;
81987f4
+  /*
81987f4
+   * Some timers are 24-bit and some are 32-bit, but it doesn't make much
81987f4
+   * difference to us.  Caring which one we have isn't really worth it since
81987f4
+   * the low-order digits will give us enough data to calibrate TSC.  So just
81987f4
+   * mask the top-order byte off.
81987f4
+   */
81987f4
+  cur = start = grub_inl (pmtimer) & 0xffffffUL;
da63b36
   end = start + num_pm_ticks;
da63b36
   start_tsc = grub_get_tsc ();
da63b36
   while (1)
da63b36
     {
da63b36
-      cur = grub_inl (pmtimer) & 0xffffff;
81987f4
-      if (cur < last)
81987f4
-	cur |= 0x1000000;
81987f4
-      num_iter++;
81987f4
+      cur &= 0xffffffffff000000ULL;
81987f4
+      cur |= grub_inl (pmtimer) & 0xffffffUL;
81987f4
+
81987f4
+      end_tsc = grub_get_tsc();
da63b36
+
81987f4
+#ifndef GRUB_PMTIMER_IGNORE_BAD_READS
81987f4
+      /*
81987f4
+       * If we get 10 reads in a row that are obviously dead pins, there's no
81987f4
+       * reason to do this thousands of times.
da63b36
+       */
81987f4
+      if (cur == 0xffffffUL || cur == 0)
da63b36
+	{
da63b36
+	  bad_reads++;
81987f4
+	  grub_dprintf ("pmtimer",
81987f4
+			"pmtimer: 0x%"PRIxGRUB_UINT64_T" bad_reads: %d\n",
81987f4
+			cur, bad_reads);
81987f4
+	  grub_dprintf ("pmtimer", "timer is broken; giving up.\n");
da63b36
+
da63b36
+	  if (bad_reads == 10)
da63b36
+	    return 0;
da63b36
+	}
81987f4
+#endif
da63b36
+
81987f4
+      if (cur < start)
81987f4
+	cur += 0x1000000;
da63b36
+
da63b36
       if (cur >= end)
da63b36
 	{
81987f4
-	  end_tsc = grub_get_tsc ();
81987f4
+	  grub_dprintf ("pmtimer", "pmtimer delta is 0x%"PRIxGRUB_UINT64_T"\n",
81987f4
+			cur - start);
81987f4
+	  grub_dprintf ("pmtimer", "tsc delta is 0x%"PRIxGRUB_UINT64_T"\n",
da63b36
+			end_tsc - start_tsc);
da63b36
 	  return end_tsc - start_tsc;
da63b36
 	}
da63b36
-      /* Check for broken PM timer.
da63b36
-	 50000000 TSCs is between 5 ms (10GHz) and 200 ms (250 MHz)
da63b36
-	 if after this time we still don't have 1 ms on pmtimer, then
da63b36
-	 pmtimer is broken.
81987f4
+
81987f4
+      /*
81987f4
+       * Check for broken PM timer.  1ms at 10GHz should be 1E+7 TSCs; at
81987f4
+       * 250MHz it should be 2.5E6.  So if after 4E+7 TSCs on a 10GHz machine,
81987f4
+       * we should have seen pmtimer show 4ms of change (i.e. cur =~
81987f4
+       * start+14320); on a 250MHz machine that should be 16ms (start+57280).
81987f4
+       * If after this a time we still don't have 1ms on pmtimer, then pmtimer
81987f4
+       * is broken.
81987f4
+       *
81987f4
+       * Likewise, if our code is perfectly efficient and introduces no delays
81987f4
+       * whatsoever, on a 10GHz system we should see a TSC delta of 3580 in
81987f4
+       * ~3580 iterations.  On a 250MHz machine that should be ~900 iterations.
81987f4
+       *
81987f4
+       * With those factors in mind, there are two limits here.  There's a hard
81987f4
+       * limit here at 8x our desired pm timer delta, picked as an arbitrarily
81987f4
+       * large value that's still not a lot of time to humans, because if we
81987f4
+       * get that far this is either an implausibly fast machine or the pmtimer
81987f4
+       * is not running.  And there's another limit on 4x our 10GHz tsc delta
81987f4
+       * without seeing cur converge on our target value.
da63b36
        */
da63b36
-      if ((num_iter & 0xffffff) == 0 && grub_get_tsc () - start_tsc > 5000000) {
da63b36
-	return 0;
da63b36
-      }
81987f4
+      if ((++num_iter > (grub_uint32_t)num_pm_ticks << 3UL) ||
81987f4
+	  end_tsc - start_tsc > 40000000)
da63b36
+	{
81987f4
+	  grub_dprintf ("pmtimer",
81987f4
+			"pmtimer delta is 0x%"PRIxGRUB_UINT64_T" (%u iterations)\n",
81987f4
+			cur - start, num_iter);
81987f4
+	  grub_dprintf ("pmtimer",
81987f4
+			"tsc delta is implausible: 0x%"PRIxGRUB_UINT64_T"\n",
da63b36
+			end_tsc - start_tsc);
da63b36
+	  return 0;
da63b36
+	}
da63b36
     }
da63b36
 }
da63b36
 
81987f4
@@ -74,12 +135,20 @@ grub_tsc_calibrate_from_pmtimer (void)
81987f4
 
81987f4
   fadt = grub_acpi_find_fadt ();
81987f4
   if (!fadt)
81987f4
-    return 0;
81987f4
+    {
81987f4
+      grub_dprintf ("pmtimer", "No FADT found; not using pmtimer.\n");
81987f4
+      return 0;
81987f4
+    }
81987f4
   pmtimer = fadt->pmtimer;
81987f4
   if (!pmtimer)
81987f4
-    return 0;
81987f4
+    {
81987f4
+      grub_dprintf ("pmtimer", "FADT does not specify pmtimer; skipping.\n");
81987f4
+      return 0;
81987f4
+    }
81987f4
 
81987f4
-  /* It's 3.579545 MHz clock. Wait 1 ms.  */
81987f4
+  /*
81987f4
+   * It's 3.579545 MHz clock. Wait 1 ms.
81987f4
+   */
81987f4
   tsc_diff = grub_pmtimer_wait_count_tsc (pmtimer, 3580);
81987f4
   if (tsc_diff == 0)
81987f4
     return 0;
da63b36
-- 
81987f4
2.15.0
da63b36