From e0018c4e82f3d411b0572d3947733ab6adde4353 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Jun 08 2021 21:56:27 +0000 Subject: 4 security updates xen/arm: Boot modules are not scrubbed [XSA-372, CVE-2021-28693] inappropriate x86 IOMMU timeout detection / handling [XSA-373, CVE-2021-28692] Speculative Code Store Bypass [XSA-375, CVE-2021-0089] x86: TSX Async Abort protections not restored after S3 [XSA-377, CVE-2021-28690] --- diff --git a/xen.spec b/xen.spec index c4773ac..0ded91e 100644 --- a/xen.spec +++ b/xen.spec @@ -58,7 +58,7 @@ Summary: Xen is a virtual machine monitor Name: xen Version: 4.14.2 -Release: 1%{?dist} +Release: 2%{?dist} License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ Source0: https://downloads.xenproject.org/release/xen/%{version}/xen-%{version}.tar.gz @@ -118,6 +118,15 @@ Patch47: xen.git-d6627cf1b63ce57a6a7e2c1800dbc50eed742c32.patch Patch48: xen.git-d8099d94dfaa3573bd86ebfc457cbc8f70a3ecda.patch Patch49: xen.git-8169f82049efb5b2044b33aa482ba3a136b7804d.patch Patch50: xsa363.patch +Patch51: xsa372-4.14-0001-xen-arm-Create-dom0less-domUs-earlier.patch +Patch52: xsa372-4.14-0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch +Patch53: xsa373-4.14-1.patch +Patch54: xsa373-4.14-2.patch +Patch55: xsa373-4.14-3.patch +Patch56: xsa373-4.14-4.patch +Patch57: xsa373-4.14-5.patch +Patch58: xsa375.patch +Patch59: xsa377.patch %if %build_qemutrad @@ -332,6 +341,15 @@ manage Xen virtual machines. %patch48 -p1 %patch49 -p1 %patch50 -p1 +%patch51 -p1 +%patch52 -p1 +%patch53 -p1 +%patch54 -p1 +%patch55 -p1 +%patch56 -p1 +%patch57 -p1 +%patch58 -p1 +%patch59 -p1 # qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -925,6 +943,14 @@ fi %endif %changelog +* Tue Jun 08 2021 Michael Young - 4.14.2-2 +- xen/arm: Boot modules are not scrubbed [XSA-372, CVE-2021-28693] +- inappropriate x86 IOMMU timeout detection / handling + [XSA-373, CVE-2021-28692] +- Speculative Code Store Bypass [XSA-375, CVE-2021-0089] +- x86: TSX Async Abort protections not restored after S3 + [XSA-377, CVE-2021-28690] + * Tue May 04 2021 Michael Young - 4.14.2-1 - update to 4.14.2 remove or adjust patch content now included or superceded upstream diff --git a/xsa372-4.14-0001-xen-arm-Create-dom0less-domUs-earlier.patch b/xsa372-4.14-0001-xen-arm-Create-dom0less-domUs-earlier.patch new file mode 100644 index 0000000..a5289a8 --- /dev/null +++ b/xsa372-4.14-0001-xen-arm-Create-dom0less-domUs-earlier.patch @@ -0,0 +1,83 @@ +From f98c20aaaf909be04ada5cb6cb88c14b9bc75e15 Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Mon, 17 May 2021 17:47:13 +0100 +Subject: [PATCH 1/2] xen/arm: Create dom0less domUs earlier + +In a follow-up patch we will need to unallocate the boot modules +before heap_init_late() is called. + +The modules will contain the domUs kernel and initramfs. Therefore Xen +will need to create extra domUs (used by dom0less) before heap_init_late(). + +This has two consequences on dom0less: + 1) Domains will not be unpaused as soon as they are created but + once all have been created. However, Xen doesn't guarantee an order + to unpause, so this is not something one could rely on. + + 2) The memory allocated for a domU will not be scrubbed anymore when an + admin select bootscrub=on. This is not something we advertised, but if + this is a concern we can introduce either force scrub for all domUs or + a per-domain flag in the DT. The behavior for bootscrub=off and + bootscrub=idle (default) has not changed. + +This is part of XSA-372 / CVE-2021-28693. + +Signed-off-by: Julien Grall +Reviewed-by: Jan Beulich +Reviewed-by: Stefano Stabellini +Tested-by: Stefano Stabellini +--- + xen/arch/arm/domain_build.c | 2 -- + xen/arch/arm/setup.c | 9 +++++---- + 2 files changed, 5 insertions(+), 6 deletions(-) + +diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c +index e824ba34b012..b07461f5d376 100644 +--- a/xen/arch/arm/domain_build.c ++++ b/xen/arch/arm/domain_build.c +@@ -2515,8 +2515,6 @@ void __init create_domUs(void) + + if ( construct_domU(d, node) != 0 ) + panic("Could not set up domain %s\n", dt_node_name(node)); +- +- domain_unpause_by_systemcontroller(d); + } + } + +diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c +index 7968cee47d05..1f26080b30bf 100644 +--- a/xen/arch/arm/setup.c ++++ b/xen/arch/arm/setup.c +@@ -779,7 +779,7 @@ void __init start_xen(unsigned long boot_phys_offset, + int cpus, i; + const char *cmdline; + struct bootmodule *xen_bootmodule; +- struct domain *dom0; ++ struct domain *dom0, *d; + struct xen_domctl_createdomain dom0_cfg = { + .flags = XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap, + .max_evtchn_port = -1, +@@ -962,6 +962,8 @@ void __init start_xen(unsigned long boot_phys_offset, + if ( construct_dom0(dom0) != 0) + panic("Could not set up DOM0 guest OS\n"); + ++ create_domUs(); ++ + heap_init_late(); + + init_trace_bufs(); +@@ -975,9 +977,8 @@ void __init start_xen(unsigned long boot_phys_offset, + + system_state = SYS_STATE_active; + +- create_domUs(); +- +- domain_unpause_by_systemcontroller(dom0); ++ for_each_domain( d ) ++ domain_unpause_by_systemcontroller(d); + + /* Switch on to the dynamically allocated stack for the idle vcpu + * since the static one we're running on is about to be freed. */ +-- +2.17.1 + diff --git a/xsa372-4.14-0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch b/xsa372-4.14-0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch new file mode 100644 index 0000000..3ed62f3 --- /dev/null +++ b/xsa372-4.14-0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch @@ -0,0 +1,58 @@ +From e7e475c1a3dc6b149252413589eebaa4ae138824 Mon Sep 17 00:00:00 2001 +From: Julien Grall +Date: Sat, 17 Apr 2021 17:38:28 +0100 +Subject: [PATCH 2/2] xen/arm: Boot modules should always be scrubbed if + bootscrub={on, idle} + +The function to initialize the pages (see init_heap_pages()) will request +scrub when the admin request idle bootscrub (default) and state == +SYS_STATE_active. When bootscrub=on, Xen will scrub any free pages in +heap_init_late(). + +Currently, the boot modules (e.g. kernels, initramfs) will be discarded/ +freed after heap_init_late() is called and system_state switched to +SYS_STATE_active. This means the pages associated with the boot modules +will not get scrubbed before getting re-purposed. + +If the memory is assigned to an untrusted domU, it may be able to +retrieve secrets from the modules. + +This is part of XSA-372 / CVE-2021-28693. + +Fixes: 1774e9b1df27 ("xen/arm: introduce create_domUs") +Signed-off-by: Julien Grall +Reviewed-by: Jan Beulich +Reviewed-by: Stefano Stabellini +Tested-by: Stefano Stabellini +--- + xen/arch/arm/setup.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c +index 1f26080b30bf..34b1c1a11ef6 100644 +--- a/xen/arch/arm/setup.c ++++ b/xen/arch/arm/setup.c +@@ -75,7 +75,6 @@ static __used void init_done(void) + /* Must be done past setting system_state. */ + unregister_init_virtual_region(); + +- discard_initial_modules(); + free_init_memory(); + startup_cpu_idle_loop(); + } +@@ -964,6 +963,12 @@ void __init start_xen(unsigned long boot_phys_offset, + + create_domUs(); + ++ /* ++ * This needs to be called **before** heap_init_late() so modules ++ * will be scrubbed (unless suppressed). ++ */ ++ discard_initial_modules(); ++ + heap_init_late(); + + init_trace_bufs(); +-- +2.17.1 + diff --git a/xsa373-4.14-1.patch b/xsa373-4.14-1.patch new file mode 100644 index 0000000..ee5229a --- /dev/null +++ b/xsa373-4.14-1.patch @@ -0,0 +1,120 @@ +From: Jan Beulich +Subject: VT-d: size qinval queue dynamically + +With the present synchronous model, we need two slots for every +operation (the operation itself and a wait descriptor). There can be +one such pair of requests pending per CPU. To ensure that under all +normal circumstances a slot is always available when one is requested, +size the queue ring according to the number of present CPUs. + +This is part of XSA-373 / CVE-2021-28692. + +Signed-off-by: Jan Beulich +Reviewed-by: Paul Durrant + +--- a/xen/drivers/passthrough/vtd/iommu.h ++++ b/xen/drivers/passthrough/vtd/iommu.h +@@ -450,17 +450,9 @@ struct qinval_entry { + }q; + }; + +-/* Order of queue invalidation pages(max is 8) */ +-#define QINVAL_PAGE_ORDER 2 +- +-#define QINVAL_ARCH_PAGE_ORDER (QINVAL_PAGE_ORDER + PAGE_SHIFT_4K - PAGE_SHIFT) +-#define QINVAL_ARCH_PAGE_NR ( QINVAL_ARCH_PAGE_ORDER < 0 ? \ +- 1 : \ +- 1 << QINVAL_ARCH_PAGE_ORDER ) +- + /* Each entry is 16 bytes, so 2^8 entries per page */ + #define QINVAL_ENTRY_ORDER ( PAGE_SHIFT - 4 ) +-#define QINVAL_ENTRY_NR (1 << (QINVAL_PAGE_ORDER + 8)) ++#define QINVAL_MAX_ENTRY_NR (1u << (7 + QINVAL_ENTRY_ORDER)) + + /* Status data flag */ + #define QINVAL_STAT_INIT 0 +--- a/xen/drivers/passthrough/vtd/qinval.c ++++ b/xen/drivers/passthrough/vtd/qinval.c +@@ -31,6 +31,9 @@ + + #define VTD_QI_TIMEOUT 1 + ++static unsigned int __read_mostly qi_pg_order; ++static unsigned int __read_mostly qi_entry_nr; ++ + static int __must_check invalidate_sync(struct vtd_iommu *iommu); + + static void print_qi_regs(struct vtd_iommu *iommu) +@@ -55,7 +58,7 @@ static unsigned int qinval_next_index(st + tail >>= QINVAL_INDEX_SHIFT; + + /* (tail+1 == head) indicates a full queue, wait for HW */ +- while ( ( tail + 1 ) % QINVAL_ENTRY_NR == ++ while ( ((tail + 1) & (qi_entry_nr - 1)) == + ( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) ) + cpu_relax(); + +@@ -68,7 +71,7 @@ static void qinval_update_qtail(struct v + + /* Need hold register lock when update tail */ + ASSERT( spin_is_locked(&iommu->register_lock) ); +- val = (index + 1) % QINVAL_ENTRY_NR; ++ val = (index + 1) & (qi_entry_nr - 1); + dmar_writeq(iommu->reg, DMAR_IQT_REG, (val << QINVAL_INDEX_SHIFT)); + } + +@@ -403,8 +406,28 @@ int enable_qinval(struct vtd_iommu *iomm + + if ( iommu->qinval_maddr == 0 ) + { +- iommu->qinval_maddr = alloc_pgtable_maddr(QINVAL_ARCH_PAGE_NR, +- iommu->node); ++ if ( !qi_entry_nr ) ++ { ++ /* ++ * With the present synchronous model, we need two slots for every ++ * operation (the operation itself and a wait descriptor). There ++ * can be one such pair of requests pending per CPU. One extra ++ * entry is needed as the ring is considered full when there's ++ * only one entry left. ++ */ ++ BUILD_BUG_ON(CONFIG_NR_CPUS * 2 >= QINVAL_MAX_ENTRY_NR); ++ qi_pg_order = get_order_from_bytes((num_present_cpus() * 2 + 1) << ++ (PAGE_SHIFT - ++ QINVAL_ENTRY_ORDER)); ++ qi_entry_nr = 1u << (qi_pg_order + QINVAL_ENTRY_ORDER); ++ ++ dprintk(XENLOG_INFO VTDPREFIX, ++ "QI: using %u-entry ring(s)\n", qi_entry_nr); ++ } ++ ++ iommu->qinval_maddr = ++ alloc_pgtable_maddr(qi_entry_nr >> QINVAL_ENTRY_ORDER, ++ iommu->node); + if ( iommu->qinval_maddr == 0 ) + { + dprintk(XENLOG_WARNING VTDPREFIX, +@@ -418,15 +441,16 @@ int enable_qinval(struct vtd_iommu *iomm + + spin_lock_irqsave(&iommu->register_lock, flags); + +- /* Setup Invalidation Queue Address(IQA) register with the +- * address of the page we just allocated. QS field at +- * bits[2:0] to indicate size of queue is one 4KB page. +- * That's 256 entries. Queued Head (IQH) and Queue Tail (IQT) +- * registers are automatically reset to 0 with write +- * to IQA register. ++ /* ++ * Setup Invalidation Queue Address (IQA) register with the address of the ++ * pages we just allocated. The QS field at bits[2:0] indicates the size ++ * (page order) of the queue. ++ * ++ * Queued Head (IQH) and Queue Tail (IQT) registers are automatically ++ * reset to 0 with write to IQA register. + */ + dmar_writeq(iommu->reg, DMAR_IQA_REG, +- iommu->qinval_maddr | QINVAL_PAGE_ORDER); ++ iommu->qinval_maddr | qi_pg_order); + + dmar_writeq(iommu->reg, DMAR_IQT_REG, 0); + diff --git a/xsa373-4.14-2.patch b/xsa373-4.14-2.patch new file mode 100644 index 0000000..773cbfd --- /dev/null +++ b/xsa373-4.14-2.patch @@ -0,0 +1,102 @@ +From: Jan Beulich +Subject: AMD/IOMMU: size command buffer dynamically + +With the present synchronous model, we need two slots for every +operation (the operation itself and a wait command). There can be one +such pair of commands pending per CPU. To ensure that under all normal +circumstances a slot is always available when one is requested, size the +command ring according to the number of present CPUs. + +This is part of XSA-373 / CVE-2021-28692. + +Signed-off-by: Jan Beulich +Reviewed-by: Paul Durrant + +--- a/xen/drivers/passthrough/amd/iommu-defs.h ++++ b/xen/drivers/passthrough/amd/iommu-defs.h +@@ -20,9 +20,6 @@ + #ifndef AMD_IOMMU_DEFS_H + #define AMD_IOMMU_DEFS_H + +-/* IOMMU Command Buffer entries: in power of 2 increments, minimum of 256 */ +-#define IOMMU_CMD_BUFFER_DEFAULT_ENTRIES 512 +- + /* IOMMU Event Log entries: in power of 2 increments, minimum of 256 */ + #define IOMMU_EVENT_LOG_DEFAULT_ENTRIES 512 + +@@ -164,8 +161,8 @@ struct amd_iommu_dte { + #define IOMMU_CMD_BUFFER_LENGTH_MASK 0x0F000000 + #define IOMMU_CMD_BUFFER_LENGTH_SHIFT 24 + +-#define IOMMU_CMD_BUFFER_ENTRY_SIZE 16 +-#define IOMMU_CMD_BUFFER_POWER_OF2_ENTRIES_PER_PAGE 8 ++#define IOMMU_CMD_BUFFER_ENTRY_ORDER 4 ++#define IOMMU_CMD_BUFFER_MAX_ENTRIES (1u << 15) + + #define IOMMU_CMD_OPCODE_MASK 0xF0000000 + #define IOMMU_CMD_OPCODE_SHIFT 28 +--- a/xen/drivers/passthrough/amd/iommu_cmd.c ++++ b/xen/drivers/passthrough/amd/iommu_cmd.c +@@ -24,7 +24,7 @@ static int queue_iommu_command(struct am + { + uint32_t tail, head; + +- tail = iommu->cmd_buffer.tail + IOMMU_CMD_BUFFER_ENTRY_SIZE; ++ tail = iommu->cmd_buffer.tail + sizeof(cmd_entry_t); + if ( tail == iommu->cmd_buffer.size ) + tail = 0; + +@@ -33,7 +33,7 @@ static int queue_iommu_command(struct am + if ( head != tail ) + { + memcpy(iommu->cmd_buffer.buffer + iommu->cmd_buffer.tail, +- cmd, IOMMU_CMD_BUFFER_ENTRY_SIZE); ++ cmd, sizeof(cmd_entry_t)); + + iommu->cmd_buffer.tail = tail; + return 1; +--- a/xen/drivers/passthrough/amd/iommu_init.c ++++ b/xen/drivers/passthrough/amd/iommu_init.c +@@ -118,7 +118,7 @@ static void register_iommu_cmd_buffer_in + writel(entry, iommu->mmio_base + IOMMU_CMD_BUFFER_BASE_LOW_OFFSET); + + power_of2_entries = get_order_from_bytes(iommu->cmd_buffer.size) + +- IOMMU_CMD_BUFFER_POWER_OF2_ENTRIES_PER_PAGE; ++ PAGE_SHIFT - IOMMU_CMD_BUFFER_ENTRY_ORDER; + + entry = 0; + iommu_set_addr_hi_to_reg(&entry, addr_hi); +@@ -1022,9 +1022,31 @@ static void *__init allocate_ring_buffer + static void * __init allocate_cmd_buffer(struct amd_iommu *iommu) + { + /* allocate 'command buffer' in power of 2 increments of 4K */ ++ static unsigned int __read_mostly nr_ents; ++ ++ if ( !nr_ents ) ++ { ++ unsigned int order; ++ ++ /* ++ * With the present synchronous model, we need two slots for every ++ * operation (the operation itself and a wait command). There can be ++ * one such pair of requests pending per CPU. One extra entry is ++ * needed as the ring is considered full when there's only one entry ++ * left. ++ */ ++ BUILD_BUG_ON(CONFIG_NR_CPUS * 2 >= IOMMU_CMD_BUFFER_MAX_ENTRIES); ++ order = get_order_from_bytes((num_present_cpus() * 2 + 1) << ++ IOMMU_CMD_BUFFER_ENTRY_ORDER); ++ nr_ents = 1u << (order + PAGE_SHIFT - IOMMU_CMD_BUFFER_ENTRY_ORDER); ++ ++ AMD_IOMMU_DEBUG("using %u-entry cmd ring(s)\n", nr_ents); ++ } ++ ++ BUILD_BUG_ON(sizeof(cmd_entry_t) != (1u << IOMMU_CMD_BUFFER_ENTRY_ORDER)); ++ + return allocate_ring_buffer(&iommu->cmd_buffer, sizeof(cmd_entry_t), +- IOMMU_CMD_BUFFER_DEFAULT_ENTRIES, +- "Command Buffer", false); ++ nr_ents, "Command Buffer", false); + } + + static void * __init allocate_event_log(struct amd_iommu *iommu) diff --git a/xsa373-4.14-3.patch b/xsa373-4.14-3.patch new file mode 100644 index 0000000..fe34546 --- /dev/null +++ b/xsa373-4.14-3.patch @@ -0,0 +1,163 @@ +From: Jan Beulich +Subject: VT-d: eliminate flush related timeouts + +Leaving an in-progress operation pending when it appears to take too +long is problematic: If e.g. a QI command completed later, the write to +the "poll slot" may instead be understood to signal a subsequently +started command's completion. Also our accounting of the timeout period +was actually wrong: We included the time it took for the command to +actually make it to the front of the queue, which could be heavily +affected by guests other than the one for which the flush is being +performed. + +Do away with all timeout detection on all flush related code paths. +Log excessively long processing times (with a progressive threshold) to +have some indication of problems in this area. + +Additionally log (once) if qinval_next_index() didn't immediately find +an available slot. Together with the earlier change sizing the queue(s) +dynamically, we should now have a guarantee that with our fully +synchronous model any demand for slots can actually be satisfied. + +This is part of XSA-373 / CVE-2021-28692. + +Signed-off-by: Jan Beulich +Reviewed-by: Paul Durrant + +--- a/xen/drivers/passthrough/vtd/dmar.h ++++ b/xen/drivers/passthrough/vtd/dmar.h +@@ -127,6 +127,34 @@ do { + } \ + } while (0) + ++#define IOMMU_FLUSH_WAIT(what, iommu, offset, op, cond, sts) \ ++do { \ ++ static unsigned int __read_mostly threshold = 1; \ ++ s_time_t start = NOW(); \ ++ s_time_t timeout = start + DMAR_OPERATION_TIMEOUT * threshold; \ ++ \ ++ for ( ; ; ) \ ++ { \ ++ sts = op(iommu->reg, offset); \ ++ if ( cond ) \ ++ break; \ ++ if ( timeout && NOW() > timeout ) \ ++ { \ ++ threshold |= threshold << 1; \ ++ printk(XENLOG_WARNING VTDPREFIX \ ++ " IOMMU#%u: %s flush taking too long\n", \ ++ iommu->index, what); \ ++ timeout = 0; \ ++ } \ ++ cpu_relax(); \ ++ } \ ++ \ ++ if ( !timeout ) \ ++ printk(XENLOG_WARNING VTDPREFIX \ ++ " IOMMU#%u: %s flush took %lums\n", \ ++ iommu->index, what, (NOW() - start) / 10000000); \ ++} while ( false ) ++ + int vtd_hw_check(void); + void disable_pmr(struct vtd_iommu *iommu); + int is_igd_drhd(struct acpi_drhd_unit *drhd); +--- a/xen/drivers/passthrough/vtd/iommu.c ++++ b/xen/drivers/passthrough/vtd/iommu.c +@@ -326,8 +326,8 @@ static void iommu_flush_write_buffer(str + dmar_writel(iommu->reg, DMAR_GCMD_REG, val | DMA_GCMD_WBF); + + /* Make sure hardware complete it */ +- IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl, +- !(val & DMA_GSTS_WBFS), val); ++ IOMMU_FLUSH_WAIT("write buffer", iommu, DMAR_GSTS_REG, dmar_readl, ++ !(val & DMA_GSTS_WBFS), val); + + spin_unlock_irqrestore(&iommu->register_lock, flags); + } +@@ -376,8 +376,8 @@ int vtd_flush_context_reg(struct vtd_iom + dmar_writeq(iommu->reg, DMAR_CCMD_REG, val); + + /* Make sure hardware complete it */ +- IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, dmar_readq, +- !(val & DMA_CCMD_ICC), val); ++ IOMMU_FLUSH_WAIT("context", iommu, DMAR_CCMD_REG, dmar_readq, ++ !(val & DMA_CCMD_ICC), val); + + spin_unlock_irqrestore(&iommu->register_lock, flags); + /* flush context entry will implicitly flush write buffer */ +@@ -454,8 +454,8 @@ int vtd_flush_iotlb_reg(struct vtd_iommu + dmar_writeq(iommu->reg, tlb_offset + 8, val); + + /* Make sure hardware complete it */ +- IOMMU_WAIT_OP(iommu, (tlb_offset + 8), dmar_readq, +- !(val & DMA_TLB_IVT), val); ++ IOMMU_FLUSH_WAIT("iotlb", iommu, (tlb_offset + 8), dmar_readq, ++ !(val & DMA_TLB_IVT), val); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + /* check IOTLB invalidation granularity */ +--- a/xen/drivers/passthrough/vtd/qinval.c ++++ b/xen/drivers/passthrough/vtd/qinval.c +@@ -29,8 +29,6 @@ + #include "extern.h" + #include "../ats.h" + +-#define VTD_QI_TIMEOUT 1 +- + static unsigned int __read_mostly qi_pg_order; + static unsigned int __read_mostly qi_entry_nr; + +@@ -60,7 +58,11 @@ static unsigned int qinval_next_index(st + /* (tail+1 == head) indicates a full queue, wait for HW */ + while ( ((tail + 1) & (qi_entry_nr - 1)) == + ( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) ) ++ { ++ printk_once(XENLOG_ERR VTDPREFIX " IOMMU#%u: no QI slot available\n", ++ iommu->index); + cpu_relax(); ++ } + + return tail; + } +@@ -180,23 +182,32 @@ static int __must_check queue_invalidate + /* Now we don't support interrupt method */ + if ( sw ) + { +- s_time_t timeout; +- +- /* In case all wait descriptor writes to same addr with same data */ +- timeout = NOW() + MILLISECS(flush_dev_iotlb ? +- iommu_dev_iotlb_timeout : VTD_QI_TIMEOUT); ++ static unsigned int __read_mostly threshold = 1; ++ s_time_t start = NOW(); ++ s_time_t timeout = start + (flush_dev_iotlb ++ ? iommu_dev_iotlb_timeout ++ : 100) * MILLISECS(threshold); + + while ( ACCESS_ONCE(*this_poll_slot) != QINVAL_STAT_DONE ) + { +- if ( NOW() > timeout ) ++ if ( timeout && NOW() > timeout ) + { +- print_qi_regs(iommu); ++ threshold |= threshold << 1; + printk(XENLOG_WARNING VTDPREFIX +- " Queue invalidate wait descriptor timed out\n"); +- return -ETIMEDOUT; ++ " IOMMU#%u: QI%s wait descriptor taking too long\n", ++ iommu->index, flush_dev_iotlb ? " dev" : ""); ++ print_qi_regs(iommu); ++ timeout = 0; + } + cpu_relax(); + } ++ ++ if ( !timeout ) ++ printk(XENLOG_WARNING VTDPREFIX ++ " IOMMU#%u: QI%s wait descriptor took %lums\n", ++ iommu->index, flush_dev_iotlb ? " dev" : "", ++ (NOW() - start) / 10000000); ++ + return 0; + } + diff --git a/xsa373-4.14-4.patch b/xsa373-4.14-4.patch new file mode 100644 index 0000000..a1f186b --- /dev/null +++ b/xsa373-4.14-4.patch @@ -0,0 +1,81 @@ +From: Jan Beulich +Subject: AMD/IOMMU: wait for command slot to be available + +No caller cared about send_iommu_command() indicating unavailability of +a slot. Hence if a sufficient number prior commands timed out, we did +blindly assume that the requested command was submitted to the IOMMU +when really it wasn't. This could mean both a hanging system (waiting +for a command to complete that was never seen by the IOMMU) or blindly +propagating success back to callers, making them believe they're fine +to e.g. free previously unmapped pages. + +Fold the three involved functions into one, add spin waiting for an +available slot along the lines of VT-d's qinval_next_index(), and as a +consequence drop all error indicator return types/values. + +This is part of XSA-373 / CVE-2021-28692. + +Signed-off-by: Jan Beulich +Reviewed-by: Paul Durrant + +--- a/xen/drivers/passthrough/amd/iommu_cmd.c ++++ b/xen/drivers/passthrough/amd/iommu_cmd.c +@@ -20,43 +20,32 @@ + #include "iommu.h" + #include "../ats.h" + +-static int queue_iommu_command(struct amd_iommu *iommu, u32 cmd[]) ++static void send_iommu_command(struct amd_iommu *iommu, ++ const uint32_t cmd[4]) + { +- uint32_t tail, head; ++ uint32_t tail; + + tail = iommu->cmd_buffer.tail + sizeof(cmd_entry_t); + if ( tail == iommu->cmd_buffer.size ) + tail = 0; + +- head = readl(iommu->mmio_base + +- IOMMU_CMD_BUFFER_HEAD_OFFSET) & IOMMU_RING_BUFFER_PTR_MASK; +- if ( head != tail ) ++ while ( tail == (readl(iommu->mmio_base + ++ IOMMU_CMD_BUFFER_HEAD_OFFSET) & ++ IOMMU_RING_BUFFER_PTR_MASK) ) + { +- memcpy(iommu->cmd_buffer.buffer + iommu->cmd_buffer.tail, +- cmd, sizeof(cmd_entry_t)); +- +- iommu->cmd_buffer.tail = tail; +- return 1; ++ printk_once(XENLOG_ERR ++ "AMD IOMMU %04x:%02x:%02x.%u: no cmd slot available\n", ++ iommu->seg, PCI_BUS(iommu->bdf), ++ PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf)); ++ cpu_relax(); + } + +- return 0; +-} +- +-static void commit_iommu_command_buffer(struct amd_iommu *iommu) +-{ +- writel(iommu->cmd_buffer.tail, +- iommu->mmio_base + IOMMU_CMD_BUFFER_TAIL_OFFSET); +-} ++ memcpy(iommu->cmd_buffer.buffer + iommu->cmd_buffer.tail, ++ cmd, sizeof(cmd_entry_t)); + +-static int send_iommu_command(struct amd_iommu *iommu, u32 cmd[]) +-{ +- if ( queue_iommu_command(iommu, cmd) ) +- { +- commit_iommu_command_buffer(iommu); +- return 1; +- } ++ iommu->cmd_buffer.tail = tail; + +- return 0; ++ writel(tail, iommu->mmio_base + IOMMU_CMD_BUFFER_TAIL_OFFSET); + } + + static void flush_command_buffer(struct amd_iommu *iommu) diff --git a/xsa373-4.14-5.patch b/xsa373-4.14-5.patch new file mode 100644 index 0000000..01556a8 --- /dev/null +++ b/xsa373-4.14-5.patch @@ -0,0 +1,143 @@ +From: Jan Beulich +Subject: AMD/IOMMU: drop command completion timeout + +First and foremost - such timeouts were not signaled to callers, making +them believe they're fine to e.g. free previously unmapped pages. + +Mirror VT-d's behavior: A fixed number of loop iterations is not a +suitable way to detect timeouts in an environment (CPU and bus speeds) +independent manner anyway. Furthermore, leaving an in-progress operation +pending when it appears to take too long is problematic: If a command +completed later, the signaling of its completion may instead be +understood to signal a subsequently started command's completion. + +Log excessively long processing times (with a progressive threshold) to +have some indication of problems in this area. Allow callers to specify +a non-default timeout bias for this logging, using the same values as +VT-d does, which in particular means a (by default) much larger value +for device IO TLB invalidation. + +This is part of XSA-373 / CVE-2021-28692. + +Signed-off-by: Jan Beulich +Reviewed-by: Paul Durrant + +--- a/xen/drivers/passthrough/amd/iommu_cmd.c ++++ b/xen/drivers/passthrough/amd/iommu_cmd.c +@@ -48,10 +48,12 @@ static void send_iommu_command(struct am + writel(tail, iommu->mmio_base + IOMMU_CMD_BUFFER_TAIL_OFFSET); + } + +-static void flush_command_buffer(struct amd_iommu *iommu) ++static void flush_command_buffer(struct amd_iommu *iommu, ++ unsigned int timeout_base) + { +- unsigned int cmd[4], status, loop_count; +- bool comp_wait; ++ uint32_t cmd[4]; ++ s_time_t start, timeout; ++ static unsigned int __read_mostly threshold = 1; + + /* RW1C 'ComWaitInt' in status register */ + writel(IOMMU_STATUS_COMP_WAIT_INT, +@@ -67,22 +69,31 @@ static void flush_command_buffer(struct + IOMMU_COMP_WAIT_I_FLAG_SHIFT, &cmd[0]); + send_iommu_command(iommu, cmd); + +- /* Make loop_count long enough for polling completion wait bit */ +- loop_count = 1000; +- do { +- status = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); +- comp_wait = status & IOMMU_STATUS_COMP_WAIT_INT; +- --loop_count; +- } while ( !comp_wait && loop_count ); +- +- if ( comp_wait ) ++ start = NOW(); ++ timeout = start + (timeout_base ?: 100) * MILLISECS(threshold); ++ while ( !(readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET) & ++ IOMMU_STATUS_COMP_WAIT_INT) ) + { +- /* RW1C 'ComWaitInt' in status register */ +- writel(IOMMU_STATUS_COMP_WAIT_INT, +- iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); +- return; ++ if ( timeout && NOW() > timeout ) ++ { ++ threshold |= threshold << 1; ++ printk(XENLOG_WARNING ++ "AMD IOMMU %04x:%02x:%02x.%u: %scompletion wait taking too long\n", ++ iommu->seg, PCI_BUS(iommu->bdf), ++ PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf), ++ timeout_base ? "iotlb " : ""); ++ timeout = 0; ++ } ++ cpu_relax(); + } +- AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n"); ++ ++ if ( !timeout ) ++ printk(XENLOG_WARNING ++ "AMD IOMMU %04x:%02x:%02x.%u: %scompletion wait took %lums\n", ++ iommu->seg, PCI_BUS(iommu->bdf), ++ PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf), ++ timeout_base ? "iotlb " : "", ++ (NOW() - start) / 10000000); + } + + /* Build low level iommu command messages */ +@@ -294,7 +305,7 @@ void amd_iommu_flush_iotlb(u8 devfn, con + /* send INVALIDATE_IOTLB_PAGES command */ + spin_lock_irqsave(&iommu->lock, flags); + invalidate_iotlb_pages(iommu, maxpend, 0, queueid, daddr, req_id, order); +- flush_command_buffer(iommu); ++ flush_command_buffer(iommu, iommu_dev_iotlb_timeout); + spin_unlock_irqrestore(&iommu->lock, flags); + } + +@@ -331,7 +342,7 @@ static void _amd_iommu_flush_pages(struc + { + spin_lock_irqsave(&iommu->lock, flags); + invalidate_iommu_pages(iommu, daddr, dom_id, order); +- flush_command_buffer(iommu); ++ flush_command_buffer(iommu, 0); + spin_unlock_irqrestore(&iommu->lock, flags); + } + +@@ -355,7 +366,7 @@ void amd_iommu_flush_device(struct amd_i + ASSERT( spin_is_locked(&iommu->lock) ); + + invalidate_dev_table_entry(iommu, bdf); +- flush_command_buffer(iommu); ++ flush_command_buffer(iommu, 0); + } + + void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf) +@@ -363,7 +374,7 @@ void amd_iommu_flush_intremap(struct amd + ASSERT( spin_is_locked(&iommu->lock) ); + + invalidate_interrupt_table(iommu, bdf); +- flush_command_buffer(iommu); ++ flush_command_buffer(iommu, 0); + } + + void amd_iommu_flush_all_caches(struct amd_iommu *iommu) +@@ -371,7 +382,7 @@ void amd_iommu_flush_all_caches(struct a + ASSERT( spin_is_locked(&iommu->lock) ); + + invalidate_iommu_all(iommu); +- flush_command_buffer(iommu); ++ flush_command_buffer(iommu, 0); + } + + void amd_iommu_send_guest_cmd(struct amd_iommu *iommu, u32 cmd[]) +@@ -381,7 +392,8 @@ void amd_iommu_send_guest_cmd(struct amd + spin_lock_irqsave(&iommu->lock, flags); + + send_iommu_command(iommu, cmd); +- flush_command_buffer(iommu); ++ /* TBD: Timeout selection may require peeking into cmd[]. */ ++ flush_command_buffer(iommu, 0); + + spin_unlock_irqrestore(&iommu->lock, flags); + } diff --git a/xsa375.patch b/xsa375.patch new file mode 100644 index 0000000..aa2e5ad --- /dev/null +++ b/xsa375.patch @@ -0,0 +1,50 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: Protect against Speculative Code Store Bypass + +Modern x86 processors have far-better-than-architecturally-guaranteed self +modifying code detection. Typically, when a write hits an instruction in +flight, a Machine Clear occurs to flush stale content in the frontend and +backend. + +For self modifying code, before a write which hits an instruction in flight +retires, the frontend can speculatively decode and execute the old instruction +stream. Speculation of this form can suffer from type confusion in registers, +and potentially leak data. + +Furthermore, updates are typically byte-wise, rather than atomic. Depending +on timing, speculation can race ahead multiple times between individual +writes, and execute the transiently-malformed instruction stream. + +Xen has stubs which are used in certain cases for emulation purposes. Inhibit +speculation between updating the stub and executing it. + +This is XSA-375 / CVE-2021-0089. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c +index 8889509d2a..11467a1e3a 100644 +--- a/xen/arch/x86/pv/emul-priv-op.c ++++ b/xen/arch/x86/pv/emul-priv-op.c +@@ -138,6 +138,8 @@ static io_emul_stub_t *io_emul_stub_setup(struct priv_op_ctxt *ctxt, u8 opcode, + /* Runtime confirmation that we haven't clobbered an adjacent stub. */ + BUG_ON(STUB_BUF_SIZE / 2 < (p - ctxt->io_emul_stub)); + ++ block_speculation(); /* SCSB */ ++ + /* Handy function-typed pointer to the stub. */ + return (void *)stub_va; + +diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c +index c25d88d0d8..f42ff2a837 100644 +--- a/xen/arch/x86/x86_emulate/x86_emulate.c ++++ b/xen/arch/x86/x86_emulate/x86_emulate.c +@@ -1257,6 +1257,7 @@ static inline int mkec(uint8_t e, int32_t ec, ...) + # define invoke_stub(pre, post, constraints...) do { \ + stub_exn.info = (union stub_exception_token) { .raw = ~0 }; \ + stub_exn.line = __LINE__; /* Utility outweighs livepatching cost */ \ ++ block_speculation(); /* SCSB */ \ + asm volatile ( pre "\n\tINDIRECT_CALL %[stub]\n\t" post "\n" \ + ".Lret%=:\n\t" \ + ".pushsection .fixup,\"ax\"\n" \ diff --git a/xsa377.patch b/xsa377.patch new file mode 100644 index 0000000..1a1887b --- /dev/null +++ b/xsa377.patch @@ -0,0 +1,27 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: Mitigate TAA after S3 resume + +The user chosen setting for MSR_TSX_CTRL needs restoring after S3. + +All APs get the correct setting via start_secondary(), but the BSP was missed +out. + +This is XSA-377 / CVE-2021-28690. + +Fixes: 8c4330818f6 ("x86/spec-ctrl: Mitigate the TSX Asynchronous Abort sidechannel") +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c +index 91a8c4d0bd..31a56f02d0 100644 +--- a/xen/arch/x86/acpi/power.c ++++ b/xen/arch/x86/acpi/power.c +@@ -288,6 +288,8 @@ static int enter_state(u32 state) + + microcode_update_one(); + ++ tsx_init(); /* Needs microcode. May change HLE/RTM feature bits. */ ++ + if ( !recheck_cpu_features(0) ) + panic("Missing previously available feature(s)\n"); +