From e0018c4e82f3d411b0572d3947733ab6adde4353 Mon Sep 17 00:00:00 2001
From: Michael Young <m.a.young@durham.ac.uk>
Date: Jun 08 2021 21:56:27 +0000
Subject: 4 security updates


xen/arm: Boot modules are not scrubbed [XSA-372, CVE-2021-28693]
inappropriate x86 IOMMU timeout detection / handling
	[XSA-373, CVE-2021-28692]
Speculative Code Store Bypass [XSA-375, CVE-2021-0089]
x86: TSX Async Abort protections not restored after S3
	[XSA-377, CVE-2021-28690]

---

diff --git a/xen.spec b/xen.spec
index c4773ac..0ded91e 100644
--- a/xen.spec
+++ b/xen.spec
@@ -58,7 +58,7 @@
 Summary: Xen is a virtual machine monitor
 Name:    xen
 Version: 4.14.2
-Release: 1%{?dist}
+Release: 2%{?dist}
 License: GPLv2+ and LGPLv2+ and BSD
 URL:     http://xen.org/
 Source0: https://downloads.xenproject.org/release/xen/%{version}/xen-%{version}.tar.gz
@@ -118,6 +118,15 @@ Patch47: xen.git-d6627cf1b63ce57a6a7e2c1800dbc50eed742c32.patch
 Patch48: xen.git-d8099d94dfaa3573bd86ebfc457cbc8f70a3ecda.patch
 Patch49: xen.git-8169f82049efb5b2044b33aa482ba3a136b7804d.patch
 Patch50: xsa363.patch
+Patch51: xsa372-4.14-0001-xen-arm-Create-dom0less-domUs-earlier.patch
+Patch52: xsa372-4.14-0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch
+Patch53: xsa373-4.14-1.patch
+Patch54: xsa373-4.14-2.patch
+Patch55: xsa373-4.14-3.patch
+Patch56: xsa373-4.14-4.patch
+Patch57: xsa373-4.14-5.patch
+Patch58: xsa375.patch
+Patch59: xsa377.patch
 
 
 %if %build_qemutrad
@@ -332,6 +341,15 @@ manage Xen virtual machines.
 %patch48 -p1
 %patch49 -p1
 %patch50 -p1
+%patch51 -p1
+%patch52 -p1
+%patch53 -p1
+%patch54 -p1
+%patch55 -p1
+%patch56 -p1
+%patch57 -p1
+%patch58 -p1
+%patch59 -p1
 
 # qemu-xen-traditional patches
 pushd tools/qemu-xen-traditional
@@ -925,6 +943,14 @@ fi
 %endif
 
 %changelog
+* Tue Jun 08 2021 Michael Young <m.a.young@durham.ac.uk> - 4.14.2-2
+- xen/arm: Boot modules are not scrubbed [XSA-372, CVE-2021-28693]
+- inappropriate x86 IOMMU timeout detection / handling
+	[XSA-373, CVE-2021-28692]
+- Speculative Code Store Bypass [XSA-375, CVE-2021-0089]
+- x86: TSX Async Abort protections not restored after S3
+	[XSA-377, CVE-2021-28690]
+
 * Tue May 04 2021 Michael Young <m.a.young@durham.ac.uk> - 4.14.2-1
 - update to 4.14.2
   remove or adjust patch content now included or superceded upstream
diff --git a/xsa372-4.14-0001-xen-arm-Create-dom0less-domUs-earlier.patch b/xsa372-4.14-0001-xen-arm-Create-dom0less-domUs-earlier.patch
new file mode 100644
index 0000000..a5289a8
--- /dev/null
+++ b/xsa372-4.14-0001-xen-arm-Create-dom0less-domUs-earlier.patch
@@ -0,0 +1,83 @@
+From f98c20aaaf909be04ada5cb6cb88c14b9bc75e15 Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Mon, 17 May 2021 17:47:13 +0100
+Subject: [PATCH 1/2] xen/arm: Create dom0less domUs earlier
+
+In a follow-up patch we will need to unallocate the boot modules
+before heap_init_late() is called.
+
+The modules will contain the domUs kernel and initramfs. Therefore Xen
+will need to create extra domUs (used by dom0less) before heap_init_late().
+
+This has two consequences on dom0less:
+    1) Domains will not be unpaused as soon as they are created but
+    once all have been created. However, Xen doesn't guarantee an order
+    to unpause, so this is not something one could rely on.
+
+    2) The memory allocated for a domU will not be scrubbed anymore when an
+    admin select bootscrub=on. This is not something we advertised, but if
+    this is a concern we can introduce either force scrub for all domUs or
+    a per-domain flag in the DT. The behavior for bootscrub=off and
+    bootscrub=idle (default) has not changed.
+
+This is part of XSA-372 / CVE-2021-28693.
+
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Tested-by: Stefano Stabellini <sstabellini@kernel.org>
+---
+ xen/arch/arm/domain_build.c | 2 --
+ xen/arch/arm/setup.c        | 9 +++++----
+ 2 files changed, 5 insertions(+), 6 deletions(-)
+
+diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
+index e824ba34b012..b07461f5d376 100644
+--- a/xen/arch/arm/domain_build.c
++++ b/xen/arch/arm/domain_build.c
+@@ -2515,8 +2515,6 @@ void __init create_domUs(void)
+ 
+         if ( construct_domU(d, node) != 0 )
+             panic("Could not set up domain %s\n", dt_node_name(node));
+-
+-        domain_unpause_by_systemcontroller(d);
+     }
+ }
+ 
+diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
+index 7968cee47d05..1f26080b30bf 100644
+--- a/xen/arch/arm/setup.c
++++ b/xen/arch/arm/setup.c
+@@ -779,7 +779,7 @@ void __init start_xen(unsigned long boot_phys_offset,
+     int cpus, i;
+     const char *cmdline;
+     struct bootmodule *xen_bootmodule;
+-    struct domain *dom0;
++    struct domain *dom0, *d;
+     struct xen_domctl_createdomain dom0_cfg = {
+         .flags = XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap,
+         .max_evtchn_port = -1,
+@@ -962,6 +962,8 @@ void __init start_xen(unsigned long boot_phys_offset,
+     if ( construct_dom0(dom0) != 0)
+         panic("Could not set up DOM0 guest OS\n");
+ 
++    create_domUs();
++
+     heap_init_late();
+ 
+     init_trace_bufs();
+@@ -975,9 +977,8 @@ void __init start_xen(unsigned long boot_phys_offset,
+ 
+     system_state = SYS_STATE_active;
+ 
+-    create_domUs();
+-
+-    domain_unpause_by_systemcontroller(dom0);
++    for_each_domain( d )
++        domain_unpause_by_systemcontroller(d);
+ 
+     /* Switch on to the dynamically allocated stack for the idle vcpu
+      * since the static one we're running on is about to be freed. */
+-- 
+2.17.1
+
diff --git a/xsa372-4.14-0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch b/xsa372-4.14-0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch
new file mode 100644
index 0000000..3ed62f3
--- /dev/null
+++ b/xsa372-4.14-0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch
@@ -0,0 +1,58 @@
+From e7e475c1a3dc6b149252413589eebaa4ae138824 Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Sat, 17 Apr 2021 17:38:28 +0100
+Subject: [PATCH 2/2] xen/arm: Boot modules should always be scrubbed if
+ bootscrub={on, idle}
+
+The function to initialize the pages (see init_heap_pages()) will request
+scrub when the admin request idle bootscrub (default) and state ==
+SYS_STATE_active. When bootscrub=on, Xen will scrub any free pages in
+heap_init_late().
+
+Currently, the boot modules (e.g. kernels, initramfs) will be discarded/
+freed after heap_init_late() is called and system_state switched to
+SYS_STATE_active. This means the pages associated with the boot modules
+will not get scrubbed before getting re-purposed.
+
+If the memory is assigned to an untrusted domU, it may be able to
+retrieve secrets from the modules.
+
+This is part of XSA-372 / CVE-2021-28693.
+
+Fixes: 1774e9b1df27 ("xen/arm: introduce create_domUs")
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Tested-by: Stefano Stabellini <sstabellini@kernel.org>
+---
+ xen/arch/arm/setup.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
+index 1f26080b30bf..34b1c1a11ef6 100644
+--- a/xen/arch/arm/setup.c
++++ b/xen/arch/arm/setup.c
+@@ -75,7 +75,6 @@ static __used void init_done(void)
+     /* Must be done past setting system_state. */
+     unregister_init_virtual_region();
+ 
+-    discard_initial_modules();
+     free_init_memory();
+     startup_cpu_idle_loop();
+ }
+@@ -964,6 +963,12 @@ void __init start_xen(unsigned long boot_phys_offset,
+ 
+     create_domUs();
+ 
++    /*
++     * This needs to be called **before** heap_init_late() so modules
++     * will be scrubbed (unless suppressed).
++     */
++    discard_initial_modules();
++
+     heap_init_late();
+ 
+     init_trace_bufs();
+-- 
+2.17.1
+
diff --git a/xsa373-4.14-1.patch b/xsa373-4.14-1.patch
new file mode 100644
index 0000000..ee5229a
--- /dev/null
+++ b/xsa373-4.14-1.patch
@@ -0,0 +1,120 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: VT-d: size qinval queue dynamically
+
+With the present synchronous model, we need two slots for every
+operation (the operation itself and a wait descriptor).  There can be
+one such pair of requests pending per CPU. To ensure that under all
+normal circumstances a slot is always available when one is requested,
+size the queue ring according to the number of present CPUs.
+
+This is part of XSA-373 / CVE-2021-28692.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Paul Durrant <paul@xen.org>
+
+--- a/xen/drivers/passthrough/vtd/iommu.h
++++ b/xen/drivers/passthrough/vtd/iommu.h
+@@ -450,17 +450,9 @@ struct qinval_entry {
+     }q;
+ };
+ 
+-/* Order of queue invalidation pages(max is 8) */
+-#define QINVAL_PAGE_ORDER   2
+-
+-#define QINVAL_ARCH_PAGE_ORDER  (QINVAL_PAGE_ORDER + PAGE_SHIFT_4K - PAGE_SHIFT)
+-#define QINVAL_ARCH_PAGE_NR     ( QINVAL_ARCH_PAGE_ORDER < 0 ?  \
+-                                1 :                             \
+-                                1 << QINVAL_ARCH_PAGE_ORDER )
+-
+ /* Each entry is 16 bytes, so 2^8 entries per page */
+ #define QINVAL_ENTRY_ORDER  ( PAGE_SHIFT - 4 )
+-#define QINVAL_ENTRY_NR     (1 << (QINVAL_PAGE_ORDER + 8))
++#define QINVAL_MAX_ENTRY_NR (1u << (7 + QINVAL_ENTRY_ORDER))
+ 
+ /* Status data flag */
+ #define QINVAL_STAT_INIT  0
+--- a/xen/drivers/passthrough/vtd/qinval.c
++++ b/xen/drivers/passthrough/vtd/qinval.c
+@@ -31,6 +31,9 @@
+ 
+ #define VTD_QI_TIMEOUT	1
+ 
++static unsigned int __read_mostly qi_pg_order;
++static unsigned int __read_mostly qi_entry_nr;
++
+ static int __must_check invalidate_sync(struct vtd_iommu *iommu);
+ 
+ static void print_qi_regs(struct vtd_iommu *iommu)
+@@ -55,7 +58,7 @@ static unsigned int qinval_next_index(st
+     tail >>= QINVAL_INDEX_SHIFT;
+ 
+     /* (tail+1 == head) indicates a full queue, wait for HW */
+-    while ( ( tail + 1 ) % QINVAL_ENTRY_NR ==
++    while ( ((tail + 1) & (qi_entry_nr - 1)) ==
+             ( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) )
+         cpu_relax();
+ 
+@@ -68,7 +71,7 @@ static void qinval_update_qtail(struct v
+ 
+     /* Need hold register lock when update tail */
+     ASSERT( spin_is_locked(&iommu->register_lock) );
+-    val = (index + 1) % QINVAL_ENTRY_NR;
++    val = (index + 1) & (qi_entry_nr - 1);
+     dmar_writeq(iommu->reg, DMAR_IQT_REG, (val << QINVAL_INDEX_SHIFT));
+ }
+ 
+@@ -403,8 +406,28 @@ int enable_qinval(struct vtd_iommu *iomm
+ 
+     if ( iommu->qinval_maddr == 0 )
+     {
+-        iommu->qinval_maddr = alloc_pgtable_maddr(QINVAL_ARCH_PAGE_NR,
+-                                                  iommu->node);
++        if ( !qi_entry_nr )
++        {
++            /*
++             * With the present synchronous model, we need two slots for every
++             * operation (the operation itself and a wait descriptor).  There
++             * can be one such pair of requests pending per CPU.  One extra
++             * entry is needed as the ring is considered full when there's
++             * only one entry left.
++             */
++            BUILD_BUG_ON(CONFIG_NR_CPUS * 2 >= QINVAL_MAX_ENTRY_NR);
++            qi_pg_order = get_order_from_bytes((num_present_cpus() * 2 + 1) <<
++                                               (PAGE_SHIFT -
++                                                QINVAL_ENTRY_ORDER));
++            qi_entry_nr = 1u << (qi_pg_order + QINVAL_ENTRY_ORDER);
++
++            dprintk(XENLOG_INFO VTDPREFIX,
++                    "QI: using %u-entry ring(s)\n", qi_entry_nr);
++        }
++
++        iommu->qinval_maddr =
++            alloc_pgtable_maddr(qi_entry_nr >> QINVAL_ENTRY_ORDER,
++                                iommu->node);
+         if ( iommu->qinval_maddr == 0 )
+         {
+             dprintk(XENLOG_WARNING VTDPREFIX,
+@@ -418,15 +441,16 @@ int enable_qinval(struct vtd_iommu *iomm
+ 
+     spin_lock_irqsave(&iommu->register_lock, flags);
+ 
+-    /* Setup Invalidation Queue Address(IQA) register with the
+-     * address of the page we just allocated.  QS field at
+-     * bits[2:0] to indicate size of queue is one 4KB page.
+-     * That's 256 entries.  Queued Head (IQH) and Queue Tail (IQT)
+-     * registers are automatically reset to 0 with write
+-     * to IQA register.
++    /*
++     * Setup Invalidation Queue Address (IQA) register with the address of the
++     * pages we just allocated.  The QS field at bits[2:0] indicates the size
++     * (page order) of the queue.
++     *
++     * Queued Head (IQH) and Queue Tail (IQT) registers are automatically
++     * reset to 0 with write to IQA register.
+      */
+     dmar_writeq(iommu->reg, DMAR_IQA_REG,
+-                iommu->qinval_maddr | QINVAL_PAGE_ORDER);
++                iommu->qinval_maddr | qi_pg_order);
+ 
+     dmar_writeq(iommu->reg, DMAR_IQT_REG, 0);
+ 
diff --git a/xsa373-4.14-2.patch b/xsa373-4.14-2.patch
new file mode 100644
index 0000000..773cbfd
--- /dev/null
+++ b/xsa373-4.14-2.patch
@@ -0,0 +1,102 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: AMD/IOMMU: size command buffer dynamically
+
+With the present synchronous model, we need two slots for every
+operation (the operation itself and a wait command).  There can be one
+such pair of commands pending per CPU. To ensure that under all normal
+circumstances a slot is always available when one is requested, size the
+command ring according to the number of present CPUs.
+
+This is part of XSA-373 / CVE-2021-28692.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Paul Durrant <paul@xen.org>
+
+--- a/xen/drivers/passthrough/amd/iommu-defs.h
++++ b/xen/drivers/passthrough/amd/iommu-defs.h
+@@ -20,9 +20,6 @@
+ #ifndef AMD_IOMMU_DEFS_H
+ #define AMD_IOMMU_DEFS_H
+ 
+-/* IOMMU Command Buffer entries: in power of 2 increments, minimum of 256 */
+-#define IOMMU_CMD_BUFFER_DEFAULT_ENTRIES	512
+-
+ /* IOMMU Event Log entries: in power of 2 increments, minimum of 256 */
+ #define IOMMU_EVENT_LOG_DEFAULT_ENTRIES     512
+ 
+@@ -164,8 +161,8 @@ struct amd_iommu_dte {
+ #define IOMMU_CMD_BUFFER_LENGTH_MASK		0x0F000000
+ #define IOMMU_CMD_BUFFER_LENGTH_SHIFT		24
+ 
+-#define IOMMU_CMD_BUFFER_ENTRY_SIZE			16
+-#define IOMMU_CMD_BUFFER_POWER_OF2_ENTRIES_PER_PAGE	8
++#define IOMMU_CMD_BUFFER_ENTRY_ORDER            4
++#define IOMMU_CMD_BUFFER_MAX_ENTRIES            (1u << 15)
+ 
+ #define IOMMU_CMD_OPCODE_MASK			0xF0000000
+ #define IOMMU_CMD_OPCODE_SHIFT			28
+--- a/xen/drivers/passthrough/amd/iommu_cmd.c
++++ b/xen/drivers/passthrough/amd/iommu_cmd.c
+@@ -24,7 +24,7 @@ static int queue_iommu_command(struct am
+ {
+     uint32_t tail, head;
+ 
+-    tail = iommu->cmd_buffer.tail + IOMMU_CMD_BUFFER_ENTRY_SIZE;
++    tail = iommu->cmd_buffer.tail + sizeof(cmd_entry_t);
+     if ( tail == iommu->cmd_buffer.size )
+         tail = 0;
+ 
+@@ -33,7 +33,7 @@ static int queue_iommu_command(struct am
+     if ( head != tail )
+     {
+         memcpy(iommu->cmd_buffer.buffer + iommu->cmd_buffer.tail,
+-               cmd, IOMMU_CMD_BUFFER_ENTRY_SIZE);
++               cmd, sizeof(cmd_entry_t));
+ 
+         iommu->cmd_buffer.tail = tail;
+         return 1;
+--- a/xen/drivers/passthrough/amd/iommu_init.c
++++ b/xen/drivers/passthrough/amd/iommu_init.c
+@@ -118,7 +118,7 @@ static void register_iommu_cmd_buffer_in
+     writel(entry, iommu->mmio_base + IOMMU_CMD_BUFFER_BASE_LOW_OFFSET);
+ 
+     power_of2_entries = get_order_from_bytes(iommu->cmd_buffer.size) +
+-        IOMMU_CMD_BUFFER_POWER_OF2_ENTRIES_PER_PAGE;
++        PAGE_SHIFT - IOMMU_CMD_BUFFER_ENTRY_ORDER;
+ 
+     entry = 0;
+     iommu_set_addr_hi_to_reg(&entry, addr_hi);
+@@ -1022,9 +1022,31 @@ static void *__init allocate_ring_buffer
+ static void * __init allocate_cmd_buffer(struct amd_iommu *iommu)
+ {
+     /* allocate 'command buffer' in power of 2 increments of 4K */
++    static unsigned int __read_mostly nr_ents;
++
++    if ( !nr_ents )
++    {
++        unsigned int order;
++
++        /*
++         * With the present synchronous model, we need two slots for every
++         * operation (the operation itself and a wait command).  There can be
++         * one such pair of requests pending per CPU.  One extra entry is
++         * needed as the ring is considered full when there's only one entry
++         * left.
++         */
++        BUILD_BUG_ON(CONFIG_NR_CPUS * 2 >= IOMMU_CMD_BUFFER_MAX_ENTRIES);
++        order = get_order_from_bytes((num_present_cpus() * 2 + 1) <<
++                                     IOMMU_CMD_BUFFER_ENTRY_ORDER);
++        nr_ents = 1u << (order + PAGE_SHIFT - IOMMU_CMD_BUFFER_ENTRY_ORDER);
++
++        AMD_IOMMU_DEBUG("using %u-entry cmd ring(s)\n", nr_ents);
++    }
++
++    BUILD_BUG_ON(sizeof(cmd_entry_t) != (1u << IOMMU_CMD_BUFFER_ENTRY_ORDER));
++
+     return allocate_ring_buffer(&iommu->cmd_buffer, sizeof(cmd_entry_t),
+-                                IOMMU_CMD_BUFFER_DEFAULT_ENTRIES,
+-                                "Command Buffer", false);
++                                nr_ents, "Command Buffer", false);
+ }
+ 
+ static void * __init allocate_event_log(struct amd_iommu *iommu)
diff --git a/xsa373-4.14-3.patch b/xsa373-4.14-3.patch
new file mode 100644
index 0000000..fe34546
--- /dev/null
+++ b/xsa373-4.14-3.patch
@@ -0,0 +1,163 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: VT-d: eliminate flush related timeouts
+
+Leaving an in-progress operation pending when it appears to take too
+long is problematic: If e.g. a QI command completed later, the write to
+the "poll slot" may instead be understood to signal a subsequently
+started command's completion. Also our accounting of the timeout period
+was actually wrong: We included the time it took for the command to
+actually make it to the front of the queue, which could be heavily
+affected by guests other than the one for which the flush is being
+performed.
+
+Do away with all timeout detection on all flush related code paths.
+Log excessively long processing times (with a progressive threshold) to
+have some indication of problems in this area.
+
+Additionally log (once) if qinval_next_index() didn't immediately find
+an available slot. Together with the earlier change sizing the queue(s)
+dynamically, we should now have a guarantee that with our fully
+synchronous model any demand for slots can actually be satisfied.
+
+This is part of XSA-373 / CVE-2021-28692.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Paul Durrant <paul@xen.org>
+
+--- a/xen/drivers/passthrough/vtd/dmar.h
++++ b/xen/drivers/passthrough/vtd/dmar.h
+@@ -127,6 +127,34 @@ do {
+     }                                                           \
+ } while (0)
+ 
++#define IOMMU_FLUSH_WAIT(what, iommu, offset, op, cond, sts)       \
++do {                                                               \
++    static unsigned int __read_mostly threshold = 1;               \
++    s_time_t start = NOW();                                        \
++    s_time_t timeout = start + DMAR_OPERATION_TIMEOUT * threshold; \
++                                                                   \
++    for ( ; ; )                                                    \
++    {                                                              \
++        sts = op(iommu->reg, offset);                              \
++        if ( cond )                                                \
++            break;                                                 \
++        if ( timeout && NOW() > timeout )                          \
++        {                                                          \
++            threshold |= threshold << 1;                           \
++            printk(XENLOG_WARNING VTDPREFIX                        \
++                   " IOMMU#%u: %s flush taking too long\n",        \
++                   iommu->index, what);                            \
++            timeout = 0;                                           \
++        }                                                          \
++        cpu_relax();                                               \
++    }                                                              \
++                                                                   \
++    if ( !timeout )                                                \
++        printk(XENLOG_WARNING VTDPREFIX                            \
++               " IOMMU#%u: %s flush took %lums\n",                 \
++               iommu->index, what, (NOW() - start) / 10000000);    \
++} while ( false )
++
+ int vtd_hw_check(void);
+ void disable_pmr(struct vtd_iommu *iommu);
+ int is_igd_drhd(struct acpi_drhd_unit *drhd);
+--- a/xen/drivers/passthrough/vtd/iommu.c
++++ b/xen/drivers/passthrough/vtd/iommu.c
+@@ -326,8 +326,8 @@ static void iommu_flush_write_buffer(str
+     dmar_writel(iommu->reg, DMAR_GCMD_REG, val | DMA_GCMD_WBF);
+ 
+     /* Make sure hardware complete it */
+-    IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
+-                  !(val & DMA_GSTS_WBFS), val);
++    IOMMU_FLUSH_WAIT("write buffer", iommu, DMAR_GSTS_REG, dmar_readl,
++                     !(val & DMA_GSTS_WBFS), val);
+ 
+     spin_unlock_irqrestore(&iommu->register_lock, flags);
+ }
+@@ -376,8 +376,8 @@ int vtd_flush_context_reg(struct vtd_iom
+     dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
+ 
+     /* Make sure hardware complete it */
+-    IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, dmar_readq,
+-                  !(val & DMA_CCMD_ICC), val);
++    IOMMU_FLUSH_WAIT("context", iommu, DMAR_CCMD_REG, dmar_readq,
++                     !(val & DMA_CCMD_ICC), val);
+ 
+     spin_unlock_irqrestore(&iommu->register_lock, flags);
+     /* flush context entry will implicitly flush write buffer */
+@@ -454,8 +454,8 @@ int vtd_flush_iotlb_reg(struct vtd_iommu
+     dmar_writeq(iommu->reg, tlb_offset + 8, val);
+ 
+     /* Make sure hardware complete it */
+-    IOMMU_WAIT_OP(iommu, (tlb_offset + 8), dmar_readq,
+-                  !(val & DMA_TLB_IVT), val);
++    IOMMU_FLUSH_WAIT("iotlb", iommu, (tlb_offset + 8), dmar_readq,
++                     !(val & DMA_TLB_IVT), val);
+     spin_unlock_irqrestore(&iommu->register_lock, flags);
+ 
+     /* check IOTLB invalidation granularity */
+--- a/xen/drivers/passthrough/vtd/qinval.c
++++ b/xen/drivers/passthrough/vtd/qinval.c
+@@ -29,8 +29,6 @@
+ #include "extern.h"
+ #include "../ats.h"
+ 
+-#define VTD_QI_TIMEOUT	1
+-
+ static unsigned int __read_mostly qi_pg_order;
+ static unsigned int __read_mostly qi_entry_nr;
+ 
+@@ -60,7 +58,11 @@ static unsigned int qinval_next_index(st
+     /* (tail+1 == head) indicates a full queue, wait for HW */
+     while ( ((tail + 1) & (qi_entry_nr - 1)) ==
+             ( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) )
++    {
++        printk_once(XENLOG_ERR VTDPREFIX " IOMMU#%u: no QI slot available\n",
++                    iommu->index);
+         cpu_relax();
++    }
+ 
+     return tail;
+ }
+@@ -180,23 +182,32 @@ static int __must_check queue_invalidate
+     /* Now we don't support interrupt method */
+     if ( sw )
+     {
+-        s_time_t timeout;
+-
+-        /* In case all wait descriptor writes to same addr with same data */
+-        timeout = NOW() + MILLISECS(flush_dev_iotlb ?
+-                                    iommu_dev_iotlb_timeout : VTD_QI_TIMEOUT);
++        static unsigned int __read_mostly threshold = 1;
++        s_time_t start = NOW();
++        s_time_t timeout = start + (flush_dev_iotlb
++                                    ? iommu_dev_iotlb_timeout
++                                    : 100) * MILLISECS(threshold);
+ 
+         while ( ACCESS_ONCE(*this_poll_slot) != QINVAL_STAT_DONE )
+         {
+-            if ( NOW() > timeout )
++            if ( timeout && NOW() > timeout )
+             {
+-                print_qi_regs(iommu);
++                threshold |= threshold << 1;
+                 printk(XENLOG_WARNING VTDPREFIX
+-                       " Queue invalidate wait descriptor timed out\n");
+-                return -ETIMEDOUT;
++                       " IOMMU#%u: QI%s wait descriptor taking too long\n",
++                       iommu->index, flush_dev_iotlb ? " dev" : "");
++                print_qi_regs(iommu);
++                timeout = 0;
+             }
+             cpu_relax();
+         }
++
++        if ( !timeout )
++            printk(XENLOG_WARNING VTDPREFIX
++                   " IOMMU#%u: QI%s wait descriptor took %lums\n",
++                   iommu->index, flush_dev_iotlb ? " dev" : "",
++                   (NOW() - start) / 10000000);
++
+         return 0;
+     }
+ 
diff --git a/xsa373-4.14-4.patch b/xsa373-4.14-4.patch
new file mode 100644
index 0000000..a1f186b
--- /dev/null
+++ b/xsa373-4.14-4.patch
@@ -0,0 +1,81 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: AMD/IOMMU: wait for command slot to be available
+
+No caller cared about send_iommu_command() indicating unavailability of
+a slot. Hence if a sufficient number prior commands timed out, we did
+blindly assume that the requested command was submitted to the IOMMU
+when really it wasn't. This could mean both a hanging system (waiting
+for a command to complete that was never seen by the IOMMU) or blindly
+propagating success back to callers, making them believe they're fine
+to e.g. free previously unmapped pages.
+
+Fold the three involved functions into one, add spin waiting for an
+available slot along the lines of VT-d's qinval_next_index(), and as a
+consequence drop all error indicator return types/values.
+
+This is part of XSA-373 / CVE-2021-28692.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Paul Durrant <paul@xen.org>
+
+--- a/xen/drivers/passthrough/amd/iommu_cmd.c
++++ b/xen/drivers/passthrough/amd/iommu_cmd.c
+@@ -20,43 +20,32 @@
+ #include "iommu.h"
+ #include "../ats.h"
+ 
+-static int queue_iommu_command(struct amd_iommu *iommu, u32 cmd[])
++static void send_iommu_command(struct amd_iommu *iommu,
++                               const uint32_t cmd[4])
+ {
+-    uint32_t tail, head;
++    uint32_t tail;
+ 
+     tail = iommu->cmd_buffer.tail + sizeof(cmd_entry_t);
+     if ( tail == iommu->cmd_buffer.size )
+         tail = 0;
+ 
+-    head = readl(iommu->mmio_base +
+-                 IOMMU_CMD_BUFFER_HEAD_OFFSET) & IOMMU_RING_BUFFER_PTR_MASK;
+-    if ( head != tail )
++    while ( tail == (readl(iommu->mmio_base +
++                           IOMMU_CMD_BUFFER_HEAD_OFFSET) &
++                     IOMMU_RING_BUFFER_PTR_MASK) )
+     {
+-        memcpy(iommu->cmd_buffer.buffer + iommu->cmd_buffer.tail,
+-               cmd, sizeof(cmd_entry_t));
+-
+-        iommu->cmd_buffer.tail = tail;
+-        return 1;
++        printk_once(XENLOG_ERR
++                    "AMD IOMMU %04x:%02x:%02x.%u: no cmd slot available\n",
++                    iommu->seg, PCI_BUS(iommu->bdf),
++                    PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf));
++        cpu_relax();
+     }
+ 
+-    return 0;
+-}
+-
+-static void commit_iommu_command_buffer(struct amd_iommu *iommu)
+-{
+-    writel(iommu->cmd_buffer.tail,
+-           iommu->mmio_base + IOMMU_CMD_BUFFER_TAIL_OFFSET);
+-}
++    memcpy(iommu->cmd_buffer.buffer + iommu->cmd_buffer.tail,
++           cmd, sizeof(cmd_entry_t));
+ 
+-static int send_iommu_command(struct amd_iommu *iommu, u32 cmd[])
+-{
+-    if ( queue_iommu_command(iommu, cmd) )
+-    {
+-        commit_iommu_command_buffer(iommu);
+-        return 1;
+-    }
++    iommu->cmd_buffer.tail = tail;
+ 
+-    return 0;
++    writel(tail, iommu->mmio_base + IOMMU_CMD_BUFFER_TAIL_OFFSET);
+ }
+ 
+ static void flush_command_buffer(struct amd_iommu *iommu)
diff --git a/xsa373-4.14-5.patch b/xsa373-4.14-5.patch
new file mode 100644
index 0000000..01556a8
--- /dev/null
+++ b/xsa373-4.14-5.patch
@@ -0,0 +1,143 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: AMD/IOMMU: drop command completion timeout
+
+First and foremost - such timeouts were not signaled to callers, making
+them believe they're fine to e.g. free previously unmapped pages.
+
+Mirror VT-d's behavior: A fixed number of loop iterations is not a
+suitable way to detect timeouts in an environment (CPU and bus speeds)
+independent manner anyway. Furthermore, leaving an in-progress operation
+pending when it appears to take too long is problematic: If a command
+completed later, the signaling of its completion may instead be
+understood to signal a subsequently started command's completion.
+
+Log excessively long processing times (with a progressive threshold) to
+have some indication of problems in this area. Allow callers to specify
+a non-default timeout bias for this logging, using the same values as
+VT-d does, which in particular means a (by default) much larger value
+for device IO TLB invalidation.
+
+This is part of XSA-373 / CVE-2021-28692.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Paul Durrant <paul@xen.org>
+
+--- a/xen/drivers/passthrough/amd/iommu_cmd.c
++++ b/xen/drivers/passthrough/amd/iommu_cmd.c
+@@ -48,10 +48,12 @@ static void send_iommu_command(struct am
+     writel(tail, iommu->mmio_base + IOMMU_CMD_BUFFER_TAIL_OFFSET);
+ }
+ 
+-static void flush_command_buffer(struct amd_iommu *iommu)
++static void flush_command_buffer(struct amd_iommu *iommu,
++                                 unsigned int timeout_base)
+ {
+-    unsigned int cmd[4], status, loop_count;
+-    bool comp_wait;
++    uint32_t cmd[4];
++    s_time_t start, timeout;
++    static unsigned int __read_mostly threshold = 1;
+ 
+     /* RW1C 'ComWaitInt' in status register */
+     writel(IOMMU_STATUS_COMP_WAIT_INT,
+@@ -67,22 +69,31 @@ static void flush_command_buffer(struct
+                          IOMMU_COMP_WAIT_I_FLAG_SHIFT, &cmd[0]);
+     send_iommu_command(iommu, cmd);
+ 
+-    /* Make loop_count long enough for polling completion wait bit */
+-    loop_count = 1000;
+-    do {
+-        status = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+-        comp_wait = status & IOMMU_STATUS_COMP_WAIT_INT;
+-        --loop_count;
+-    } while ( !comp_wait && loop_count );
+-
+-    if ( comp_wait )
++    start = NOW();
++    timeout = start + (timeout_base ?: 100) * MILLISECS(threshold);
++    while ( !(readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET) &
++              IOMMU_STATUS_COMP_WAIT_INT) )
+     {
+-        /* RW1C 'ComWaitInt' in status register */
+-        writel(IOMMU_STATUS_COMP_WAIT_INT,
+-               iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+-        return;
++        if ( timeout && NOW() > timeout )
++        {
++            threshold |= threshold << 1;
++            printk(XENLOG_WARNING
++                   "AMD IOMMU %04x:%02x:%02x.%u: %scompletion wait taking too long\n",
++                   iommu->seg, PCI_BUS(iommu->bdf),
++                   PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf),
++                   timeout_base ? "iotlb " : "");
++            timeout = 0;
++        }
++        cpu_relax();
+     }
+-    AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n");
++
++    if ( !timeout )
++        printk(XENLOG_WARNING
++               "AMD IOMMU %04x:%02x:%02x.%u: %scompletion wait took %lums\n",
++               iommu->seg, PCI_BUS(iommu->bdf),
++               PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf),
++               timeout_base ? "iotlb " : "",
++               (NOW() - start) / 10000000);
+ }
+ 
+ /* Build low level iommu command messages */
+@@ -294,7 +305,7 @@ void amd_iommu_flush_iotlb(u8 devfn, con
+     /* send INVALIDATE_IOTLB_PAGES command */
+     spin_lock_irqsave(&iommu->lock, flags);
+     invalidate_iotlb_pages(iommu, maxpend, 0, queueid, daddr, req_id, order);
+-    flush_command_buffer(iommu);
++    flush_command_buffer(iommu, iommu_dev_iotlb_timeout);
+     spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+ 
+@@ -331,7 +342,7 @@ static void _amd_iommu_flush_pages(struc
+     {
+         spin_lock_irqsave(&iommu->lock, flags);
+         invalidate_iommu_pages(iommu, daddr, dom_id, order);
+-        flush_command_buffer(iommu);
++        flush_command_buffer(iommu, 0);
+         spin_unlock_irqrestore(&iommu->lock, flags);
+     }
+ 
+@@ -355,7 +366,7 @@ void amd_iommu_flush_device(struct amd_i
+     ASSERT( spin_is_locked(&iommu->lock) );
+ 
+     invalidate_dev_table_entry(iommu, bdf);
+-    flush_command_buffer(iommu);
++    flush_command_buffer(iommu, 0);
+ }
+ 
+ void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf)
+@@ -363,7 +374,7 @@ void amd_iommu_flush_intremap(struct amd
+     ASSERT( spin_is_locked(&iommu->lock) );
+ 
+     invalidate_interrupt_table(iommu, bdf);
+-    flush_command_buffer(iommu);
++    flush_command_buffer(iommu, 0);
+ }
+ 
+ void amd_iommu_flush_all_caches(struct amd_iommu *iommu)
+@@ -371,7 +382,7 @@ void amd_iommu_flush_all_caches(struct a
+     ASSERT( spin_is_locked(&iommu->lock) );
+ 
+     invalidate_iommu_all(iommu);
+-    flush_command_buffer(iommu);
++    flush_command_buffer(iommu, 0);
+ }
+ 
+ void amd_iommu_send_guest_cmd(struct amd_iommu *iommu, u32 cmd[])
+@@ -381,7 +392,8 @@ void amd_iommu_send_guest_cmd(struct amd
+     spin_lock_irqsave(&iommu->lock, flags);
+ 
+     send_iommu_command(iommu, cmd);
+-    flush_command_buffer(iommu);
++    /* TBD: Timeout selection may require peeking into cmd[]. */
++    flush_command_buffer(iommu, 0);
+ 
+     spin_unlock_irqrestore(&iommu->lock, flags);
+ }
diff --git a/xsa375.patch b/xsa375.patch
new file mode 100644
index 0000000..aa2e5ad
--- /dev/null
+++ b/xsa375.patch
@@ -0,0 +1,50 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/spec-ctrl: Protect against Speculative Code Store Bypass
+
+Modern x86 processors have far-better-than-architecturally-guaranteed self
+modifying code detection.  Typically, when a write hits an instruction in
+flight, a Machine Clear occurs to flush stale content in the frontend and
+backend.
+
+For self modifying code, before a write which hits an instruction in flight
+retires, the frontend can speculatively decode and execute the old instruction
+stream.  Speculation of this form can suffer from type confusion in registers,
+and potentially leak data.
+
+Furthermore, updates are typically byte-wise, rather than atomic.  Depending
+on timing, speculation can race ahead multiple times between individual
+writes, and execute the transiently-malformed instruction stream.
+
+Xen has stubs which are used in certain cases for emulation purposes.  Inhibit
+speculation between updating the stub and executing it.
+
+This is XSA-375 / CVE-2021-0089.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
+index 8889509d2a..11467a1e3a 100644
+--- a/xen/arch/x86/pv/emul-priv-op.c
++++ b/xen/arch/x86/pv/emul-priv-op.c
+@@ -138,6 +138,8 @@ static io_emul_stub_t *io_emul_stub_setup(struct priv_op_ctxt *ctxt, u8 opcode,
+     /* Runtime confirmation that we haven't clobbered an adjacent stub. */
+     BUG_ON(STUB_BUF_SIZE / 2 < (p - ctxt->io_emul_stub));
+ 
++    block_speculation(); /* SCSB */
++
+     /* Handy function-typed pointer to the stub. */
+     return (void *)stub_va;
+ 
+diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
+index c25d88d0d8..f42ff2a837 100644
+--- a/xen/arch/x86/x86_emulate/x86_emulate.c
++++ b/xen/arch/x86/x86_emulate/x86_emulate.c
+@@ -1257,6 +1257,7 @@ static inline int mkec(uint8_t e, int32_t ec, ...)
+ # define invoke_stub(pre, post, constraints...) do {                    \
+     stub_exn.info = (union stub_exception_token) { .raw = ~0 };         \
+     stub_exn.line = __LINE__; /* Utility outweighs livepatching cost */ \
++    block_speculation(); /* SCSB */                                     \
+     asm volatile ( pre "\n\tINDIRECT_CALL %[stub]\n\t" post "\n"        \
+                    ".Lret%=:\n\t"                                       \
+                    ".pushsection .fixup,\"ax\"\n"                       \
diff --git a/xsa377.patch b/xsa377.patch
new file mode 100644
index 0000000..1a1887b
--- /dev/null
+++ b/xsa377.patch
@@ -0,0 +1,27 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/spec-ctrl: Mitigate TAA after S3 resume
+
+The user chosen setting for MSR_TSX_CTRL needs restoring after S3.
+
+All APs get the correct setting via start_secondary(), but the BSP was missed
+out.
+
+This is XSA-377 / CVE-2021-28690.
+
+Fixes: 8c4330818f6 ("x86/spec-ctrl: Mitigate the TSX Asynchronous Abort sidechannel")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
+index 91a8c4d0bd..31a56f02d0 100644
+--- a/xen/arch/x86/acpi/power.c
++++ b/xen/arch/x86/acpi/power.c
+@@ -288,6 +288,8 @@ static int enter_state(u32 state)
+ 
+     microcode_update_one();
+ 
++    tsx_init(); /* Needs microcode.  May change HLE/RTM feature bits. */
++
+     if ( !recheck_cpu_features(0) )
+         panic("Missing previously available feature(s)\n");
+