From bf947f8f9369e91f4cc0a1f3bef036338e2eecd2 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Mar 21 2023 17:20:08 +0000 Subject: 3 security issues (#2180425) x86 shadow plus log-dirty mode use-after-free [XSA-427, CVE-2022-42332] x86/HVM pinned cache attributes mis-handling [XSA-428, CVE-2022-42333, CVE-2022-42334] x86: speculative vulnerability in 32bit SYSCALL path [XSA-429, CVE-2022-42331] --- diff --git a/xen.spec b/xen.spec index 60e5db9..8ae2d41 100644 --- a/xen.spec +++ b/xen.spec @@ -55,7 +55,7 @@ Summary: Xen is a virtual machine monitor Name: xen Version: 4.17.0 -Release: 7%{?dist} +Release: 8%{?dist} License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ Source0: https://downloads.xenproject.org/release/xen/%{version}/xen-%{version}.tar.gz @@ -112,6 +112,10 @@ Patch46: xen.efi.build.patch Patch47: xen.gcc13.fixes.patch Patch48: xsa425.patch Patch49: xsa426.patch +Patch50: xsa427.patch +Patch51: xsa428-4.17-1.patch +Patch52: xsa428-4.17-2.patch +Patch53: xsa429.patch %if %build_qemutrad @@ -326,6 +330,10 @@ manage Xen virtual machines. %patch47 -p1 %patch48 -p1 %patch49 -p1 +%patch50 -p1 +%patch51 -p1 +%patch52 -p1 +%patch53 -p1 # qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -933,6 +941,14 @@ fi %endif %changelog +* Tue Mar 21 2023 Michael Young - 4.17.0-8 +- 3 security issues (#2180425) + x86 shadow plus log-dirty mode use-after-free [XSA-427, CVE-2022-42332] + x86/HVM pinned cache attributes mis-handling [XSA-428, CVE-2022-42333, + CVE-2022-42334] + x86: speculative vulnerability in 32bit SYSCALL path [XSA-429, + CVE-2022-42331] + * Sat Feb 18 2023 Michael Young - 4.17.0-7 - use OVMF.fd from new edk2-ovmf-xen package as ovmf.bin file built from edk2-ovmf package no longer supports xen (#2170930) diff --git a/xsa427.patch b/xsa427.patch new file mode 100644 index 0000000..861f81d --- /dev/null +++ b/xsa427.patch @@ -0,0 +1,76 @@ +From: Jan Beulich +Subject: x86/shadow: account for log-dirty mode when pre-allocating + +Pre-allocation is intended to ensure that in the course of constructing +or updating shadows there won't be any risk of just made shadows or +shadows being acted upon can disappear under our feet. The amount of +pages pre-allocated then, however, needs to account for all possible +subsequent allocations. While the use in sh_page_fault() accounts for +all shadows which may need making, so far it didn't account for +allocations coming from log-dirty tracking (which piggybacks onto the +P2M allocation functions). + +Since shadow_prealloc() takes a count of shadows (or other data +structures) rather than a count of pages, putting the adjustment at the +call site of this function won't work very well: We simply can't express +the correct count that way in all cases. Instead take care of this in +the function itself, by "snooping" for L1 type requests. (While not +applicable right now, future new request sites of L1 tables would then +also be covered right away.) + +It is relevant to note here that pre-allocations like the one done from +shadow_alloc_p2m_page() are benign when they fall in the "scope" of an +earlier pre-alloc which already included that count: The inner call will +simply find enough pages available then; it'll bail right away. + +This is CVE-2022-42332 / XSA-427. + +Signed-off-by: Jan Beulich +Reviewed-by: Tim Deegan +--- +v2: Entirely different approach. + +--- a/xen/arch/x86/include/asm/paging.h ++++ b/xen/arch/x86/include/asm/paging.h +@@ -189,6 +189,10 @@ bool paging_mfn_is_dirty(const struct do + #define L4_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER * 2)) & \ + (LOGDIRTY_NODE_ENTRIES-1)) + ++#define paging_logdirty_levels() \ ++ (DIV_ROUND_UP(PADDR_BITS - PAGE_SHIFT - (PAGE_SHIFT + 3), \ ++ PAGE_SHIFT - ilog2(sizeof(mfn_t))) + 1) ++ + #ifdef CONFIG_HVM + /* VRAM dirty tracking support */ + struct sh_dirty_vram { +--- a/xen/arch/x86/mm/paging.c ++++ b/xen/arch/x86/mm/paging.c +@@ -282,6 +282,7 @@ void paging_mark_pfn_dirty(struct domain + if ( unlikely(!VALID_M2P(pfn_x(pfn))) ) + return; + ++ BUILD_BUG_ON(paging_logdirty_levels() != 4); + i1 = L1_LOGDIRTY_IDX(pfn); + i2 = L2_LOGDIRTY_IDX(pfn); + i3 = L3_LOGDIRTY_IDX(pfn); +--- a/xen/arch/x86/mm/shadow/common.c ++++ b/xen/arch/x86/mm/shadow/common.c +@@ -1011,7 +1011,17 @@ bool shadow_prealloc(struct domain *d, u + if ( unlikely(d->is_dying) ) + return false; + +- ret = _shadow_prealloc(d, shadow_size(type) * count); ++ count *= shadow_size(type); ++ /* ++ * Log-dirty handling may result in allocations when populating its ++ * tracking structures. Tie this to the caller requesting space for L1 ++ * shadows. ++ */ ++ if ( paging_mode_log_dirty(d) && ++ ((SHF_L1_ANY | SHF_FL1_ANY) & (1u << type)) ) ++ count += paging_logdirty_levels(); ++ ++ ret = _shadow_prealloc(d, count); + if ( !ret && (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) ) + /* + * Failing to allocate memory required for shadow usage can only result in diff --git a/xsa428-4.17-1.patch b/xsa428-4.17-1.patch new file mode 100644 index 0000000..3852f08 --- /dev/null +++ b/xsa428-4.17-1.patch @@ -0,0 +1,40 @@ +From: Jan Beulich +Subject: x86/HVM: bound number of pinned cache attribute regions + +This is exposed via DMOP, i.e. to potentially not fully privileged +device models. With that we may not permit registration of an (almost) +unbounded amount of such regions. + +This is CVE-2022-42333 / part of XSA-428. + +Reported-by: Andrew Cooper +Fixes: 642123c5123f ("x86/hvm: provide XEN_DMOP_pin_memory_cacheattr") +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/hvm/mtrr.c ++++ b/xen/arch/x86/hvm/mtrr.c +@@ -595,6 +595,7 @@ int hvm_set_mem_pinned_cacheattr(struct + uint64_t gfn_end, uint32_t type) + { + struct hvm_mem_pinned_cacheattr_range *range; ++ unsigned int nr = 0; + int rc = 1; + + if ( !is_hvm_domain(d) ) +@@ -666,11 +667,15 @@ int hvm_set_mem_pinned_cacheattr(struct + rc = -EBUSY; + break; + } ++ ++nr; + } + rcu_read_unlock(&pinned_cacheattr_rcu_lock); + if ( rc <= 0 ) + return rc; + ++ if ( nr >= 64 /* The limit is arbitrary. */ ) ++ return -ENOSPC; ++ + range = xzalloc(struct hvm_mem_pinned_cacheattr_range); + if ( range == NULL ) + return -ENOMEM; diff --git a/xsa428-4.17-2.patch b/xsa428-4.17-2.patch new file mode 100644 index 0000000..087230a --- /dev/null +++ b/xsa428-4.17-2.patch @@ -0,0 +1,114 @@ +From: Jan Beulich +Subject: x86/HVM: serialize pinned cache attribute list manipulation + +While the RCU variants of list insertion and removal allow lockless list +traversal (with RCU just read-locked), insertions and removals still +need serializing amongst themselves. To keep things simple, use the +domain lock for this purpose. + +This is CVE-2022-42334 / part of XSA-428. + +Fixes: 642123c5123f ("x86/hvm: provide XEN_DMOP_pin_memory_cacheattr") +Signed-off-by: Jan Beulich +Reviewed-by: Julien Grall + +--- a/xen/arch/x86/hvm/mtrr.c ++++ b/xen/arch/x86/hvm/mtrr.c +@@ -594,7 +594,7 @@ static void cf_check free_pinned_cacheat + int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start, + uint64_t gfn_end, uint32_t type) + { +- struct hvm_mem_pinned_cacheattr_range *range; ++ struct hvm_mem_pinned_cacheattr_range *range, *newr; + unsigned int nr = 0; + int rc = 1; + +@@ -608,14 +608,15 @@ int hvm_set_mem_pinned_cacheattr(struct + { + case XEN_DOMCTL_DELETE_MEM_CACHEATTR: + /* Remove the requested range. */ +- rcu_read_lock(&pinned_cacheattr_rcu_lock); +- list_for_each_entry_rcu ( range, +- &d->arch.hvm.pinned_cacheattr_ranges, +- list ) ++ domain_lock(d); ++ list_for_each_entry ( range, ++ &d->arch.hvm.pinned_cacheattr_ranges, ++ list ) + if ( range->start == gfn_start && range->end == gfn_end ) + { +- rcu_read_unlock(&pinned_cacheattr_rcu_lock); + list_del_rcu(&range->list); ++ domain_unlock(d); ++ + type = range->type; + call_rcu(&range->rcu, free_pinned_cacheattr_entry); + p2m_memory_type_changed(d); +@@ -636,7 +637,7 @@ int hvm_set_mem_pinned_cacheattr(struct + } + return 0; + } +- rcu_read_unlock(&pinned_cacheattr_rcu_lock); ++ domain_unlock(d); + return -ENOENT; + + case PAT_TYPE_UC_MINUS: +@@ -651,7 +652,10 @@ int hvm_set_mem_pinned_cacheattr(struct + return -EINVAL; + } + +- rcu_read_lock(&pinned_cacheattr_rcu_lock); ++ newr = xzalloc(struct hvm_mem_pinned_cacheattr_range); ++ ++ domain_lock(d); ++ + list_for_each_entry_rcu ( range, + &d->arch.hvm.pinned_cacheattr_ranges, + list ) +@@ -669,27 +673,34 @@ int hvm_set_mem_pinned_cacheattr(struct + } + ++nr; + } +- rcu_read_unlock(&pinned_cacheattr_rcu_lock); ++ + if ( rc <= 0 ) +- return rc; ++ /* nothing */; ++ else if ( nr >= 64 /* The limit is arbitrary. */ ) ++ rc = -ENOSPC; ++ else if ( !newr ) ++ rc = -ENOMEM; ++ else ++ { ++ newr->start = gfn_start; ++ newr->end = gfn_end; ++ newr->type = type; + +- if ( nr >= 64 /* The limit is arbitrary. */ ) +- return -ENOSPC; ++ list_add_rcu(&newr->list, &d->arch.hvm.pinned_cacheattr_ranges); ++ ++ newr = NULL; ++ rc = 0; ++ } + +- range = xzalloc(struct hvm_mem_pinned_cacheattr_range); +- if ( range == NULL ) +- return -ENOMEM; ++ domain_unlock(d); + +- range->start = gfn_start; +- range->end = gfn_end; +- range->type = type; ++ xfree(newr); + +- list_add_rcu(&range->list, &d->arch.hvm.pinned_cacheattr_ranges); + p2m_memory_type_changed(d); + if ( type != PAT_TYPE_WRBACK ) + flush_all(FLUSH_CACHE); + +- return 0; ++ return rc; + } + + static int cf_check hvm_save_mtrr_msr(struct vcpu *v, hvm_domain_context_t *h) diff --git a/xsa429.patch b/xsa429.patch new file mode 100644 index 0000000..443869f --- /dev/null +++ b/xsa429.patch @@ -0,0 +1,46 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: Defer CR4_PV32_RESTORE on the cstar_enter path + +As stated (correctly) by the comment next to SPEC_CTRL_ENTRY_FROM_PV, between +the two hunks visible in the patch, RET's are not safe prior to this point. + +CR4_PV32_RESTORE hides a CALL/RET pair in certain configurations (PV32 +compiled in, SMEP or SMAP active), and the RET can be attacked with one of +several known speculative issues. + +Furthermore, CR4_PV32_RESTORE also hides a reference to the cr4_pv32_mask +global variable, which is not safe when XPTI is active before restoring Xen's +full pagetables. + +This crash has gone unnoticed because it is only AMD CPUs which permit the +SYSCALL instruction in compatibility mode, and these are not vulnerable to +Meltdown so don't activate XPTI by default. + +This is XSA-429 / CVE-2022-42331 + +Fixes: 5e7962901131 ("x86/entry: Organise the use of MSR_SPEC_CTRL at each entry/exit point") +Fixes: 5784de3e2067 ("x86: Meltdown band-aid against malicious 64-bit PV guests") +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index ae012851819a..7675a59ff057 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -288,7 +288,6 @@ ENTRY(cstar_enter) + ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK + #endif + push %rax /* Guest %rsp */ +- CR4_PV32_RESTORE + movq 8(%rsp), %rax /* Restore guest %rax. */ + movq $FLAT_USER_SS32, 8(%rsp) /* Assume a 64bit domain. Compat handled lower. */ + pushq %r11 +@@ -312,6 +311,8 @@ ENTRY(cstar_enter) + .Lcstar_cr3_okay: + sti + ++ CR4_PV32_RESTORE ++ + movq STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx + + #ifdef CONFIG_PV32