From db2bf3b4f8affeee94f7de72d4105ce889c4f893 Mon Sep 17 00:00:00 2001
From: Michael Young <m.a.young@durham.ac.uk>
Date: Jun 27 2018 21:54:20 +0000
Subject: preemption checks bypassed in x86 PV MM handling [XSA-264, CVE-2018-12891]

x86: #DB exception safety check can be triggered by a guest [XSA-265,
        CVE-2018-12893]
libxl fails to honour readonly flag on HVM emulated SCSI disks [XSA-266,
        CVE-2018-12892]

---

diff --git a/xen.spec b/xen.spec
index 5f7b7ec..7a46934 100644
--- a/xen.spec
+++ b/xen.spec
@@ -50,7 +50,7 @@
 Summary: Xen is a virtual machine monitor
 Name:    xen
 Version: 4.9.2
-Release: 5%{?dist}
+Release: 6%{?dist}
 Group:   Development/Libraries
 License: GPLv2+ and LGPLv2+ and BSD
 URL:     http://xen.org/
@@ -151,6 +151,10 @@ Patch113: xsa263-4.9-0013-x86-msr-Virtualise-MSR_SPEC_CTRL.SSBD-for-guests-to-.p
 Patch114: xsa267-4.9-1.patch
 Patch115: xsa267-4.9-2.patch
 Patch116: xen.git-858dbaaeda33b05c1ac80aea0ba9a03924e09005.patch
+Patch117: xsa264-4.10.patch
+Patch118: xsa265.patch
+Patch119: xsa266-4.9-0001-libxl-qemu_disk_scsi_drive_string-Break-out-common-p.patch
+Patch120: xsa266-4.9-0002-libxl-restore-passing-readonly-to-qemu-for-SCSI-disk.patch
 
 
 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
@@ -383,6 +387,10 @@ manage Xen virtual machines.
 %patch114 -p1
 %patch115 -p1
 %patch116 -p1
+%patch117 -p1
+%patch118 -p1
+%patch119 -p1
+%patch120 -p1
 
 # qemu-xen-traditional patches
 pushd tools/qemu-xen-traditional
@@ -922,6 +930,13 @@ rm -rf %{buildroot}
 %endif
 
 %changelog
+* Wed Jun 27 2018 Michael Young <m.a.young@durham.ac.uk> - 4.9.2-6
+- preemption checks bypassed in x86 PV MM handling [XSA-264, CVE-2018-12891]
+- x86: #DB exception safety check can be triggered by a guest [XSA-265,
+        CVE-2018-12893]
+- libxl fails to honour readonly flag on HVM emulated SCSI disks [XSA-266,
+        CVE-2018-12892]
+
 * Sat Jun 16 2018 Michael Young <m.a.young@durham.ac.uk> - 4.9.2-5
 - Speculative register leakage from lazy FPU context switching
 	[XSA-267, CVE-2018-3665]
diff --git a/xsa264-4.10.patch b/xsa264-4.10.patch
new file mode 100644
index 0000000..45b417e
--- /dev/null
+++ b/xsa264-4.10.patch
@@ -0,0 +1,52 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/mm: don't bypass preemption checks
+
+While unlikely, it is not impossible for a multi-vCPU guest to leverage
+bypasses of preemption checks to drive Xen into an unbounded loop.
+
+This is XSA-264.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2526,7 +2526,7 @@ static int _put_page_type(struct page_in
+                 nx = x & ~(PGT_validated|PGT_partial);
+                 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
+                                            x, nx)) != x) )
+-                    continue;
++                    goto maybe_preempt;
+                 /* We cleared the 'valid bit' so we do the clean up. */
+                 rc = _put_final_page_type(page, x, preemptible, ptpg);
+                 ptpg = NULL;
+@@ -2558,12 +2558,13 @@ static int _put_page_type(struct page_in
+              */
+             cpu_relax();
+             y = page->u.inuse.type_info;
+-            continue;
++            goto maybe_preempt;
+         }
+ 
+         if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
+             break;
+ 
++    maybe_preempt:
+         if ( preemptible && hypercall_preempt_check() )
+             return -EINTR;
+     }
+@@ -2676,12 +2677,11 @@ static int __get_page_type(struct page_i
+             if ( !(x & PGT_partial) )
+             {
+                 /* Someone else is updating validation of this page. Wait... */
+-                while ( (y = page->u.inuse.type_info) == x )
+-                {
++                do {
+                     if ( preemptible && hypercall_preempt_check() )
+                         return -EINTR;
+                     cpu_relax();
+-                }
++                } while ( (y = page->u.inuse.type_info) == x );
+                 continue;
+             }
+             /* Type ref count was left at 1 when PGT_partial got set. */
diff --git a/xsa265.patch b/xsa265.patch
new file mode 100644
index 0000000..ea3f40d
--- /dev/null
+++ b/xsa265.patch
@@ -0,0 +1,104 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86: Refine checks in #DB handler for faulting conditions
+
+One of the fix for XSA-260 (c/s 75d6828bc2 "x86/traps: Fix handling of #DB
+exceptions in hypervisor context") added some safety checks to help avoid
+livelocks of #DB faults.
+
+While a General Detect #DB exception does have fault semantics, hardware
+clears %dr7.gd on entry to the handler, meaning that it is actually safe to
+return to.  Furthermore, %dr6.gd is guest controlled and sticky (never cleared
+by hardware).  A malicious PV guest can therefore trigger the fatal_trap() and
+crash Xen.
+
+Instruction breakpoints are more tricky.  The breakpoint match bits in %dr6
+are not sticky, but the Intel manual warns that they may be set for
+non-enabled breakpoints, so add a breakpoint enabled check.
+
+Beyond that, because of the restriction on the linear addresses PV guests can
+set, and the fault (rather than trap) nature of instruction breakpoints
+(i.e. can't be deferred by a MovSS shadow), there should be no way to
+encounter an instruction breakpoint in Xen context.  However, for extra
+robustness, deal with this situation by clearing the breakpoint configuration,
+rather than crashing.
+
+This is XSA-265
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
+index e79ca88..3e05cf1 100644
+--- a/xen/arch/x86/traps.c
++++ b/xen/arch/x86/traps.c
+@@ -1809,6 +1809,13 @@ void do_debug(struct cpu_user_regs *regs)
+ 
+     if ( !guest_mode(regs) )
+     {
++        /*
++         * !!! WARNING !!!
++         *
++         * %dr6 is mostly guest controlled at this point.  Any decsions base
++         * on its value must be crosschecked with non-guest controlled state.
++         */
++
+         if ( regs->eflags & X86_EFLAGS_TF )
+         {
+             /* In SYSENTER entry path we can't zap TF until EFLAGS is saved. */
+@@ -1830,33 +1837,44 @@ void do_debug(struct cpu_user_regs *regs)
+          * Check for fault conditions.  General Detect, and instruction
+          * breakpoints are faults rather than traps, at which point attempting
+          * to ignore and continue will result in a livelock.
++         *
++         * However, on entering the #DB handler, hardware clears %dr7.gd for
++         * us (as confirmed by the earlier %dr6 accesses succeeding), meaning
++         * that a real General Detect exception is restartable.
++         *
++         * PV guests are not permitted to point %dr{0..3} at Xen linear
++         * addresses, and Instruction Breakpoints (being faults) don't get
++         * delayed by a MovSS shadow, so we should never encounter one in
++         * hypervisor context.
++         *
++         * If however we do, safety measures need to be enacted.  Use a big
++         * hammer and clear all debug settings.
+          */
+-        if ( dr6 & DR_GENERAL_DETECT )
+-        {
+-            printk(XENLOG_ERR "Hit General Detect in Xen context\n");
+-            fatal_trap(regs, 0);
+-        }
+-
+         if ( dr6 & (DR_TRAP3 | DR_TRAP2 | DR_TRAP1 | DR_TRAP0) )
+         {
+-            unsigned int bp, dr7 = read_debugreg(7) >> DR_CONTROL_SHIFT;
++            unsigned int bp, dr7 = read_debugreg(7);
+ 
+             for ( bp = 0; bp < 4; ++bp )
+             {
+                 if ( (dr6 & (1u << bp)) && /* Breakpoint triggered? */
+-                     ((dr7 & (3u << (bp * DR_CONTROL_SIZE))) == 0) /* Insn? */ )
++                     (dr7 & (3u << (bp * DR_ENABLE_SIZE))) && /* Enabled? */
++                     ((dr7 & (3u << ((bp * DR_CONTROL_SIZE) + /* Insn? */
++                                     DR_CONTROL_SHIFT))) == DR_RW_EXECUTE) )
+                 {
++                    ASSERT_UNREACHABLE();
++
+                     printk(XENLOG_ERR
+                            "Hit instruction breakpoint in Xen context\n");
+-                    fatal_trap(regs, 0);
++                    write_debugreg(7, 0);
++                    break;
+                 }
+             }
+         }
+ 
+         /*
+-         * Whatever caused this #DB should be a trap.  Note it and continue.
+-         * Guests can trigger this in certain corner cases, so ensure the
+-         * message is ratelimited.
++         * Whatever caused this #DB should be restartable by this point.  Note
++         * it and continue.  Guests can trigger this in certain corner cases,
++         * so ensure the message is ratelimited.
+          */
+         gprintk(XENLOG_WARNING,
+                 "Hit #DB in Xen context: %04x:%p [%ps], stk %04x:%p, dr6 %lx\n",
diff --git a/xsa266-4.9-0001-libxl-qemu_disk_scsi_drive_string-Break-out-common-p.patch b/xsa266-4.9-0001-libxl-qemu_disk_scsi_drive_string-Break-out-common-p.patch
new file mode 100644
index 0000000..cab9fc9
--- /dev/null
+++ b/xsa266-4.9-0001-libxl-qemu_disk_scsi_drive_string-Break-out-common-p.patch
@@ -0,0 +1,78 @@
+From 3abf0feed50bd0c3450b6b168424693baf0f7039 Mon Sep 17 00:00:00 2001
+From: Ian Jackson <ian.jackson@eu.citrix.com>
+Date: Wed, 13 Jun 2018 15:51:36 +0100
+Subject: [PATCH 1/2] libxl: qemu_disk_scsi_drive_string: Break out common
+ parts of disk config
+
+The generated configurations are identical apart from, in some cases,
+reordering of the id=%s element.  So, overall, no functional change.
+
+This is part of XSA-266.
+
+Reported-by: Andrew Reimers <andrew.reimers@orionvm.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
+---
+ tools/libxl/libxl_dm.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
+index 40f9dc7..96d5702 100644
+--- a/tools/libxl/libxl_dm.c
++++ b/tools/libxl/libxl_dm.c
+@@ -773,6 +773,7 @@ static char *qemu_disk_scsi_drive_string(libxl__gc *gc, const char *target_path,
+                                          int colo_mode)
+ {
+     char *drive = NULL;
++    char *common = GCSPRINTF("cache=writeback");
+     const char *exportname = disk->colo_export;
+     const char *active_disk = disk->active_disk;
+     const char *hidden_disk = disk->hidden_disk;
+@@ -780,8 +781,8 @@ static char *qemu_disk_scsi_drive_string(libxl__gc *gc, const char *target_path,
+     switch (colo_mode) {
+     case LIBXL__COLO_NONE:
+         drive = libxl__sprintf
+-            (gc, "file=%s,if=scsi,bus=0,unit=%d,format=%s,cache=writeback",
+-             target_path, unit, format);
++            (gc, "%s,file=%s,if=scsi,bus=0,unit=%d,format=%s",
++             common, target_path, unit, format);
+         break;
+     case LIBXL__COLO_PRIMARY:
+         /*
+@@ -794,13 +795,13 @@ static char *qemu_disk_scsi_drive_string(libxl__gc *gc, const char *target_path,
+          *  vote-threshold=1
+          */
+         drive = GCSPRINTF(
+-            "if=scsi,bus=0,unit=%d,cache=writeback,driver=quorum,"
++            "%s,if=scsi,bus=0,unit=%d,,driver=quorum,"
+             "id=%s,"
+             "children.0.file.filename=%s,"
+             "children.0.driver=%s,"
+             "read-pattern=fifo,"
+             "vote-threshold=1",
+-            unit, exportname, target_path, format);
++            common, unit, exportname, target_path, format);
+         break;
+     case LIBXL__COLO_SECONDARY:
+         /*
+@@ -814,7 +815,7 @@ static char *qemu_disk_scsi_drive_string(libxl__gc *gc, const char *target_path,
+          *  file.backing.backing=exportname,
+          */
+         drive = GCSPRINTF(
+-            "if=scsi,id=top-colo,bus=0,unit=%d,cache=writeback,"
++            "%s,if=scsi,id=top-colo,bus=0,unit=%d,"
+             "driver=replication,"
+             "mode=secondary,"
+             "top-id=top-colo,"
+@@ -823,7 +824,7 @@ static char *qemu_disk_scsi_drive_string(libxl__gc *gc, const char *target_path,
+             "file.backing.driver=qcow2,"
+             "file.backing.file.filename=%s,"
+             "file.backing.backing=%s",
+-            unit, active_disk, hidden_disk, exportname);
++            common, unit, active_disk, hidden_disk, exportname);
+         break;
+     default:
+         abort();
+-- 
+2.1.4
+
diff --git a/xsa266-4.9-0002-libxl-restore-passing-readonly-to-qemu-for-SCSI-disk.patch b/xsa266-4.9-0002-libxl-restore-passing-readonly-to-qemu-for-SCSI-disk.patch
new file mode 100644
index 0000000..3c3d99f
--- /dev/null
+++ b/xsa266-4.9-0002-libxl-restore-passing-readonly-to-qemu-for-SCSI-disk.patch
@@ -0,0 +1,63 @@
+From 44110b046be937fa75f8822a99c21418a299abb3 Mon Sep 17 00:00:00 2001
+From: Ian Jackson <ian.jackson@eu.citrix.com>
+Date: Wed, 13 Jun 2018 15:54:53 +0100
+Subject: [PATCH 2/2] libxl: restore passing "readonly=" to qemu for SCSI disks
+
+A read-only check was introduced for XSA-142, commit ef6cb76026 ("libxl:
+relax readonly check introduced by XSA-142 fix") added the passing of
+the extra setting, but commit dab0539568 ("Introduce COLO mode and
+refactor relevant function") dropped the passing of the setting again,
+quite likely due to improper re-basing.
+
+Restore the readonly= parameter to SCSI disks.  For IDE disks this is
+supposed to be rejected; add an assert.  And there is a bare ad-hoc
+disk drive string in libxl__build_device_model_args_new, which we also
+update.
+
+This is XSA-266.
+
+Reported-by: Andrew Reimers <andrew.reimers@orionvm.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
+---
+ tools/libxl/libxl_dm.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
+index 96d5702..b049b44 100644
+--- a/tools/libxl/libxl_dm.c
++++ b/tools/libxl/libxl_dm.c
+@@ -773,7 +773,8 @@ static char *qemu_disk_scsi_drive_string(libxl__gc *gc, const char *target_path,
+                                          int colo_mode)
+ {
+     char *drive = NULL;
+-    char *common = GCSPRINTF("cache=writeback");
++    char *common = GCSPRINTF("cache=writeback,readonly=%s",
++                             disk->readwrite ? "off" : "on");
+     const char *exportname = disk->colo_export;
+     const char *active_disk = disk->active_disk;
+     const char *hidden_disk = disk->hidden_disk;
+@@ -842,6 +843,8 @@ static char *qemu_disk_ide_drive_string(libxl__gc *gc, const char *target_path,
+     const char *exportname = disk->colo_export;
+     const char *active_disk = disk->active_disk;
+     const char *hidden_disk = disk->hidden_disk;
++    
++    assert(disk->readwrite); /* should have been checked earlier */
+ 
+     switch (colo_mode) {
+     case LIBXL__COLO_NONE:
+@@ -1546,8 +1549,9 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
+                 if (strncmp(disks[i].vdev, "sd", 2) == 0) {
+                     if (colo_mode == LIBXL__COLO_SECONDARY) {
+                         drive = libxl__sprintf
+-                            (gc, "if=none,driver=%s,file=%s,id=%s",
+-                             format, target_path, disks[i].colo_export);
++                            (gc, "if=none,driver=%s,file=%s,id=%s,readonly=%s",
++                             format, target_path, disks[i].colo_export,
++                             disks[i].readwrite ? "off" : "on");
+ 
+                         flexarray_append(dm_args, "-drive");
+                         flexarray_append(dm_args, drive);
+-- 
+2.1.4
+