Blob Blame History Raw
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/traps: Use an Interrupt Stack Table for #DB

PV guests can use architectural corner cases to cause #DB to be raised after
transitioning into supervisor mode.

Use an interrupt stack table for #DB to prevent the exception being taken with
a guest controlled stack pointer.

This is part of XSA-260 / CVE-2018-8897.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -681,6 +681,7 @@ void load_system_tables(void)
 	tss->ist[IST_MCE - 1] = stack_top + IST_MCE * PAGE_SIZE;
 	tss->ist[IST_DF  - 1] = stack_top + IST_DF  * PAGE_SIZE;
 	tss->ist[IST_NMI - 1] = stack_top + IST_NMI * PAGE_SIZE;
+	tss->ist[IST_DB  - 1] = stack_top + IST_DB  * PAGE_SIZE;
 
 	_set_tssldt_desc(
 		gdt + TSS_ENTRY,
@@ -701,6 +702,7 @@ void load_system_tables(void)
 	set_ist(&idt_tables[cpu][TRAP_double_fault],  IST_DF);
 	set_ist(&idt_tables[cpu][TRAP_nmi],	      IST_NMI);
 	set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE);
+	set_ist(&idt_tables[cpu][TRAP_debug],         IST_DB);
 
 	/*
 	 * Bottom-of-stack must be 16-byte aligned!
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1042,6 +1042,7 @@ static void svm_ctxt_switch_from(struct
     set_ist(&idt_tables[cpu][TRAP_double_fault],  IST_DF);
     set_ist(&idt_tables[cpu][TRAP_nmi],           IST_NMI);
     set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE);
+    set_ist(&idt_tables[cpu][TRAP_debug],         IST_DB);
 }
 
 static void svm_ctxt_switch_to(struct vcpu *v)
@@ -1063,6 +1064,7 @@ static void svm_ctxt_switch_to(struct vc
     set_ist(&idt_tables[cpu][TRAP_double_fault],  IST_NONE);
     set_ist(&idt_tables[cpu][TRAP_nmi],           IST_NONE);
     set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
+    set_ist(&idt_tables[cpu][TRAP_debug],         IST_NONE);
 
     svm_restore_dr(v);
 
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -960,6 +960,7 @@ static int cpu_smpboot_alloc(unsigned in
     set_ist(&idt_tables[cpu][TRAP_double_fault],  IST_NONE);
     set_ist(&idt_tables[cpu][TRAP_nmi],           IST_NONE);
     set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
+    set_ist(&idt_tables[cpu][TRAP_debug],         IST_NONE);
 
     for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1);
           i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i )
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -259,13 +259,13 @@ static void show_guest_stack(struct vcpu
 /*
  * Notes for get_stack_trace_bottom() and get_stack_dump_bottom()
  *
- * Stack pages 0, 1 and 2:
+ * Stack pages 0 - 3:
  *   These are all 1-page IST stacks.  Each of these stacks have an exception
  *   frame and saved register state at the top.  The interesting bound for a
  *   trace is the word adjacent to this, while the bound for a dump is the
  *   very top, including the exception frame.
  *
- * Stack pages 3, 4 and 5:
+ * Stack pages 4 and 5:
  *   None of these are particularly interesting.  With MEMORY_GUARD, page 5 is
  *   explicitly not present, so attempting to dump or trace it is
  *   counterproductive.  Without MEMORY_GUARD, it is possible for a call chain
@@ -286,12 +286,12 @@ unsigned long get_stack_trace_bottom(uns
 {
     switch ( get_stack_page(sp) )
     {
-    case 0 ... 2:
+    case 0 ... 3:
         return ROUNDUP(sp, PAGE_SIZE) -
             offsetof(struct cpu_user_regs, es) - sizeof(unsigned long);
 
 #ifndef MEMORY_GUARD
-    case 3 ... 5:
+    case 4 ... 5:
 #endif
     case 6 ... 7:
         return ROUNDUP(sp, STACK_SIZE) -
@@ -306,11 +306,11 @@ unsigned long get_stack_dump_bottom(unsi
 {
     switch ( get_stack_page(sp) )
     {
-    case 0 ... 2:
+    case 0 ... 3:
         return ROUNDUP(sp, PAGE_SIZE) - sizeof(unsigned long);
 
 #ifndef MEMORY_GUARD
-    case 3 ... 5:
+    case 4 ... 5:
 #endif
     case 6 ... 7:
         return ROUNDUP(sp, STACK_SIZE) - sizeof(unsigned long);
@@ -4219,6 +4219,7 @@ void __init init_idt_traps(void)
     set_ist(&idt_table[TRAP_double_fault],  IST_DF);
     set_ist(&idt_table[TRAP_nmi],           IST_NMI);
     set_ist(&idt_table[TRAP_machine_check], IST_MCE);
+    set_ist(&idt_table[TRAP_debug],         IST_DB);
 
     /* CPU0 uses the master IDT. */
     idt_tables[0] = idt_table;
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -731,7 +731,7 @@ ENTRY(device_not_available)
 ENTRY(debug)
         pushq $0
         movl  $TRAP_debug,4(%rsp)
-        jmp   handle_exception
+        jmp   handle_ist_exception
 
 ENTRY(int3)
         pushq $0
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -505,7 +505,8 @@ struct __packed __cacheline_aligned tss_
 #define IST_DF   1UL
 #define IST_NMI  2UL
 #define IST_MCE  3UL
-#define IST_MAX  3UL
+#define IST_DB   4UL
+#define IST_MAX  4UL
 
 /* Set the interrupt stack table used by a particular interrupt
  * descriptor table entry. */