Blob Blame History Raw
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/spec-ctrl: Mitigate Cross-Thread Return Address Predictions

This is XSA-426 / CVE-2022-27672

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
index 923910f553c5..a2ff38cdebf2 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -2355,7 +2355,7 @@ guests to use.
   on entry and exit.  These blocks are necessary to virtualise support for
   guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc.
 * `rsb=` offers control over whether to overwrite the Return Stack Buffer /
-  Return Address Stack on entry to Xen.
+  Return Address Stack on entry to Xen and on idle.
 * `md-clear=` offers control over whether to use VERW to flush
   microarchitectural buffers on idle and exit from Xen.  *Note: For
   compatibility with development versions of this fix, `mds=` is also accepted
diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h
index 865f1109866d..da0593de8542 100644
--- a/xen/arch/x86/include/asm/cpufeatures.h
+++ b/xen/arch/x86/include/asm/cpufeatures.h
@@ -35,7 +35,8 @@ XEN_CPUFEATURE(SC_RSB_HVM,        X86_SYNTH(19)) /* RSB overwrite needed for HVM
 XEN_CPUFEATURE(XEN_SELFSNOOP,     X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */
 XEN_CPUFEATURE(SC_MSR_IDLE,       X86_SYNTH(21)) /* Clear MSR_SPEC_CTRL on idle */
 XEN_CPUFEATURE(XEN_LBR,           X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */
-/* Bits 23,24 unused. */
+/* Bits 23 unused. */
+XEN_CPUFEATURE(SC_RSB_IDLE,       X86_SYNTH(24)) /* RSB overwrite needed for idle. */
 XEN_CPUFEATURE(SC_VERW_IDLE,      X86_SYNTH(25)) /* VERW used by Xen for idle */
 XEN_CPUFEATURE(XEN_SHSTK,         X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */
 XEN_CPUFEATURE(XEN_IBT,           X86_SYNTH(27)) /* Xen uses CET Indirect Branch Tracking */
diff --git a/xen/arch/x86/include/asm/spec_ctrl.h b/xen/arch/x86/include/asm/spec_ctrl.h
index 6a77c3937844..391973ef6a28 100644
--- a/xen/arch/x86/include/asm/spec_ctrl.h
+++ b/xen/arch/x86/include/asm/spec_ctrl.h
@@ -159,6 +159,21 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info)
      */
     alternative_input("", "verw %[sel]", X86_FEATURE_SC_VERW_IDLE,
                       [sel] "m" (info->verw_sel));
+
+    /*
+     * Cross-Thread Return Address Predictions:
+     *
+     * On vulnerable systems, the return predictions (RSB/RAS) are statically
+     * partitioned between active threads.  When entering idle, our entries
+     * are re-partitioned to allow the other threads to use them.
+     *
+     * In some cases, we might still have guest entries in the RAS, so flush
+     * them before injecting them sideways to our sibling thread.
+     *
+     * (ab)use alternative_input() to specify clobbers.
+     */
+    alternative_input("", "DO_OVERWRITE_RSB", X86_FEATURE_SC_RSB_IDLE,
+                      : "rax", "rcx");
 }
 
 /* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */
diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index a320b81947c8..e80e2a5ed1a9 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -1327,13 +1327,38 @@ void __init init_speculation_mitigations(void)
      * 3) Some CPUs have RSBs which are not full width, which allow the
      *    attacker's entries to alias Xen addresses.
      *
+     * 4) Some CPUs have RSBs which are re-partitioned based on thread
+     *    idleness, which allows an attacker to inject entries into the other
+     *    thread.  We still active the optimisation in this case, and mitigate
+     *    in the idle path which has lower overhead.
+     *
      * It is safe to turn off RSB stuffing when Xen is using SMEP itself, and
      * 32bit PV guests are disabled, and when the RSB is full width.
      */
     BUILD_BUG_ON(RO_MPT_VIRT_START != PML4_ADDR(256));
-    if ( opt_rsb_pv == -1 && boot_cpu_has(X86_FEATURE_XEN_SMEP) &&
-         !opt_pv32 && rsb_is_full_width() )
-        opt_rsb_pv = 0;
+    if ( opt_rsb_pv == -1 )
+    {
+        opt_rsb_pv = (opt_pv32 || !boot_cpu_has(X86_FEATURE_XEN_SMEP) ||
+                      !rsb_is_full_width());
+
+        /*
+         * Cross-Thread Return Address Predictions.
+         *
+         * Vulnerable systems are Zen1/Zen2 uarch, which is AMD Fam17 / Hygon
+         * Fam18, when SMT is active.
+         *
+         * To mitigate, we must flush the RSB/RAS/RAP once between entering
+         * Xen and going idle.
+         *
+         * Most cases flush on entry to Xen anyway.  The one case where we
+         * don't is when using the SMEP optimisation for PV guests.  Flushing
+         * before going idle is less overhead than flushing on PV entry.
+         */
+        if ( !opt_rsb_pv && hw_smt_enabled &&
+             (boot_cpu_data.x86_vendor & (X86_VENDOR_AMD|X86_VENDOR_HYGON)) &&
+             (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) )
+            setup_force_cpu_cap(X86_FEATURE_SC_RSB_IDLE);
+    }
 
     if ( opt_rsb_pv )
     {