Blob Blame History Raw
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/spec-ctrl: Perform VERW flushing later in exit paths

On parts vulnerable to RFDS, VERW's side effects are extended to scrub all
non-architectural entries in various Physical Register Files.  To remove all
of Xen's values, the VERW must be after popping the GPRs.

Rework SPEC_CTRL_COND_VERW to default to an CPUINFO_error_code %rsp position,
but with overrides for other contexts.  Identify that it clobbers eflags; this
is particularly relevant for the SYSRET path.

For the IST exit return to Xen, have the main SPEC_CTRL_EXIT_TO_XEN put a
shadow copy of spec_ctrl_flags, as GPRs can't be used at the point we want to
issue the VERW.

This is part of XSA-452 / CVE-2023-28746.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
(cherry picked from commit 0a666cf2cd99df6faf3eebc81a1fc286e4eca4c7)

diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
index 525745a06608..13acebc75dff 100644
--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
@@ -159,16 +159,23 @@
  */
 #define STK_REL(field, top_of_stk) ((field) - (top_of_stk))
 
-.macro DO_SPEC_CTRL_COND_VERW
+.macro SPEC_CTRL_COND_VERW \
+    scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_error_code), \
+    sel=STK_REL(CPUINFO_verw_sel,        CPUINFO_error_code)
 /*
- * Requires %rsp=cpuinfo
+ * Requires \scf and \sel as %rsp-relative expressions
+ * Clobbers eflags
+ *
+ * VERW needs to run after guest GPRs have been restored, where only %rsp is
+ * good to use.  Default to expecting %rsp pointing at CPUINFO_error_code.
+ * Contexts where this is not true must provide an alternative \scf and \sel.
  *
  * Issue a VERW for its flushing side effect, if indicated.  This is a Spectre
  * v1 gadget, but the IRET/VMEntry is serialising.
  */
-    testb $SCF_verw, CPUINFO_spec_ctrl_flags(%rsp)
+    testb $SCF_verw, \scf(%rsp)
     jz .L\@_verw_skip
-    verw CPUINFO_verw_sel(%rsp)
+    verw \sel(%rsp)
 .L\@_verw_skip:
 .endm
 
@@ -286,8 +293,6 @@
  */
     ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV
 
-    DO_SPEC_CTRL_COND_VERW
-
     ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
 .endm
 
@@ -367,7 +372,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
  */
 .macro SPEC_CTRL_EXIT_TO_XEN
 /*
- * Requires %r12=ist_exit, %r14=stack_end
+ * Requires %r12=ist_exit, %r14=stack_end, %rsp=regs
  * Clobbers %rax, %rbx, %rcx, %rdx
  */
     movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx
@@ -395,11 +400,18 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
     test %r12, %r12
     jz .L\@_skip_ist_exit
 
-    /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */
-    testb $SCF_verw, %bl
-    jz .L\@_skip_verw
-    verw STACK_CPUINFO_FIELD(verw_sel)(%r14)
-.L\@_skip_verw:
+    /*
+     * Stash SCF and verw_sel above eflags in the case of an IST_exit.  The
+     * VERW logic needs to run after guest GPRs have been restored; i.e. where
+     * we cannot use %r12 or %r14 for the purposes they have here.
+     *
+     * When the CPU pushed this exception frame, it zero-extended eflags.
+     * Therefore it is safe for the VERW logic to look at the stashed SCF
+     * outside of the ist_exit condition.  Also, this stashing won't influence
+     * any other restore_all_guest() paths.
+     */
+    or $(__HYPERVISOR_DS32 << 16), %ebx
+    mov %ebx, UREGS_eflags + 4(%rsp) /* EFRAME_shadow_scf/sel */
 
     ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
 
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index 0d336788989f..85c7d0c98967 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -55,14 +55,22 @@ void __dummy__(void)
      * EFRAME_* is for the entry/exit logic where %rsp is pointing at
      * UREGS_error_code and GPRs are still/already guest values.
      */
-#define OFFSET_EF(sym, mem)                                             \
+#define OFFSET_EF(sym, mem, ...)                                        \
     DEFINE(sym, offsetof(struct cpu_user_regs, mem) -                   \
-                offsetof(struct cpu_user_regs, error_code))
+                offsetof(struct cpu_user_regs, error_code) __VA_ARGS__)
 
     OFFSET_EF(EFRAME_entry_vector,    entry_vector);
     OFFSET_EF(EFRAME_rip,             rip);
     OFFSET_EF(EFRAME_cs,              cs);
     OFFSET_EF(EFRAME_eflags,          eflags);
+
+    /*
+     * These aren't real fields.  They're spare space, used by the IST
+     * exit-to-xen path.
+     */
+    OFFSET_EF(EFRAME_shadow_scf,      eflags, +4);
+    OFFSET_EF(EFRAME_shadow_sel,      eflags, +6);
+
     OFFSET_EF(EFRAME_rsp,             rsp);
     BLANK();
 
@@ -136,6 +144,7 @@ void __dummy__(void)
 
     OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs);
     OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code);
+    OFFSET(CPUINFO_rip, struct cpu_info, guest_cpu_user_regs.rip);
     OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel);
     OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
     OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset);
diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
index cb473f08eebd..3bbe3a79a5b7 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -161,6 +161,12 @@ ENTRY(compat_restore_all_guest)
         SPEC_CTRL_EXIT_TO_PV    /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
 
         RESTORE_ALL adj=8 compat=1
+
+        /* Account for ev/ec having already been popped off the stack. */
+        SPEC_CTRL_COND_VERW \
+            scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_rip), \
+            sel=STK_REL(CPUINFO_verw_sel,        CPUINFO_rip)
+
 .Lft0:  iretq
         _ASM_PRE_EXTABLE(.Lft0, handle_exception)
 
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 968da9d727b1..2c7512130f49 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -214,6 +214,9 @@ restore_all_guest:
 #endif
 
         mov   EFRAME_rip(%rsp), %rcx
+
+        SPEC_CTRL_COND_VERW     /* Req: %rsp=eframe                    Clob: efl */
+
         cmpw  $FLAT_USER_CS32, EFRAME_cs(%rsp)
         mov   EFRAME_rsp(%rsp), %rsp
         je    1f
@@ -227,6 +230,9 @@ restore_all_guest:
 iret_exit_to_guest:
         andl  $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp)
         orl   $X86_EFLAGS_IF, EFRAME_eflags(%rsp)
+
+        SPEC_CTRL_COND_VERW     /* Req: %rsp=eframe                    Clob: efl */
+
         addq  $8,%rsp
 .Lft0:  iretq
         _ASM_PRE_EXTABLE(.Lft0, handle_exception)
@@ -679,9 +685,22 @@ UNLIKELY_START(ne, exit_cr3)
 UNLIKELY_END(exit_cr3)
 
         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
-        SPEC_CTRL_EXIT_TO_XEN     /* Req: %r12=ist_exit %r14=end, Clob: abcd */
+        SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end %rsp=regs, Clob: abcd */
 
         RESTORE_ALL adj=8
+
+        /*
+         * When the CPU pushed this exception frame, it zero-extended eflags.
+         * For an IST exit, SPEC_CTRL_EXIT_TO_XEN stashed shadow copies of
+         * spec_ctrl_flags and ver_sel above eflags, as we can't use any GPRs,
+         * and we're at a random place on the stack, not in a CPUFINFO block.
+         *
+         * Account for ev/ec having already been popped off the stack.
+         */
+        SPEC_CTRL_COND_VERW \
+            scf=STK_REL(EFRAME_shadow_scf, EFRAME_rip), \
+            sel=STK_REL(EFRAME_shadow_sel, EFRAME_rip)
+
         iretq
 
 ENTRY(common_interrupt)