From bf947f8f9369e91f4cc0a1f3bef036338e2eecd2 Mon Sep 17 00:00:00 2001
From: Michael Young <m.a.young@durham.ac.uk>
Date: Mar 21 2023 17:20:08 +0000
Subject: 3 security issues (#2180425)


x86 shadow plus log-dirty mode use-after-free [XSA-427, CVE-2022-42332]
x86/HVM pinned cache attributes mis-handling [XSA-428, CVE-2022-42333,
	CVE-2022-42334]
x86: speculative vulnerability in 32bit SYSCALL path [XSA-429,
	CVE-2022-42331]

---

diff --git a/xen.spec b/xen.spec
index 60e5db9..8ae2d41 100644
--- a/xen.spec
+++ b/xen.spec
@@ -55,7 +55,7 @@
 Summary: Xen is a virtual machine monitor
 Name:    xen
 Version: 4.17.0
-Release: 7%{?dist}
+Release: 8%{?dist}
 License: GPLv2+ and LGPLv2+ and BSD
 URL:     http://xen.org/
 Source0: https://downloads.xenproject.org/release/xen/%{version}/xen-%{version}.tar.gz
@@ -112,6 +112,10 @@ Patch46: xen.efi.build.patch
 Patch47: xen.gcc13.fixes.patch
 Patch48: xsa425.patch
 Patch49: xsa426.patch
+Patch50: xsa427.patch
+Patch51: xsa428-4.17-1.patch
+Patch52: xsa428-4.17-2.patch
+Patch53: xsa429.patch
 
 
 %if %build_qemutrad
@@ -326,6 +330,10 @@ manage Xen virtual machines.
 %patch47 -p1
 %patch48 -p1
 %patch49 -p1
+%patch50 -p1
+%patch51 -p1
+%patch52 -p1
+%patch53 -p1
 
 # qemu-xen-traditional patches
 pushd tools/qemu-xen-traditional
@@ -933,6 +941,14 @@ fi
 %endif
 
 %changelog
+* Tue Mar 21 2023 Michael Young <m.a.young@durham.ac.uk> - 4.17.0-8
+- 3 security issues (#2180425)
+  x86 shadow plus log-dirty mode use-after-free [XSA-427, CVE-2022-42332]
+  x86/HVM pinned cache attributes mis-handling [XSA-428, CVE-2022-42333,
+	CVE-2022-42334]
+  x86: speculative vulnerability in 32bit SYSCALL path [XSA-429,
+	CVE-2022-42331]
+
 * Sat Feb 18 2023 Michael Young <m.a.young@durham.ac.uk> - 4.17.0-7
 - use OVMF.fd from new edk2-ovmf-xen package as ovmf.bin file
 	built from edk2-ovmf package no longer supports xen (#2170930)
diff --git a/xsa427.patch b/xsa427.patch
new file mode 100644
index 0000000..861f81d
--- /dev/null
+++ b/xsa427.patch
@@ -0,0 +1,76 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/shadow: account for log-dirty mode when pre-allocating
+
+Pre-allocation is intended to ensure that in the course of constructing
+or updating shadows there won't be any risk of just made shadows or
+shadows being acted upon can disappear under our feet. The amount of
+pages pre-allocated then, however, needs to account for all possible
+subsequent allocations. While the use in sh_page_fault() accounts for
+all shadows which may need making, so far it didn't account for
+allocations coming from log-dirty tracking (which piggybacks onto the
+P2M allocation functions).
+
+Since shadow_prealloc() takes a count of shadows (or other data
+structures) rather than a count of pages, putting the adjustment at the
+call site of this function won't work very well: We simply can't express
+the correct count that way in all cases. Instead take care of this in
+the function itself, by "snooping" for L1 type requests. (While not
+applicable right now, future new request sites of L1 tables would then
+also be covered right away.)
+
+It is relevant to note here that pre-allocations like the one done from
+shadow_alloc_p2m_page() are benign when they fall in the "scope" of an
+earlier pre-alloc which already included that count: The inner call will
+simply find enough pages available then; it'll bail right away.
+
+This is CVE-2022-42332 / XSA-427.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Tim Deegan <tim@xen.org>
+---
+v2: Entirely different approach.
+
+--- a/xen/arch/x86/include/asm/paging.h
++++ b/xen/arch/x86/include/asm/paging.h
+@@ -189,6 +189,10 @@ bool paging_mfn_is_dirty(const struct do
+ #define L4_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER * 2)) & \
+                               (LOGDIRTY_NODE_ENTRIES-1))
+ 
++#define paging_logdirty_levels() \
++    (DIV_ROUND_UP(PADDR_BITS - PAGE_SHIFT - (PAGE_SHIFT + 3), \
++                  PAGE_SHIFT - ilog2(sizeof(mfn_t))) + 1)
++
+ #ifdef CONFIG_HVM
+ /* VRAM dirty tracking support */
+ struct sh_dirty_vram {
+--- a/xen/arch/x86/mm/paging.c
++++ b/xen/arch/x86/mm/paging.c
+@@ -282,6 +282,7 @@ void paging_mark_pfn_dirty(struct domain
+     if ( unlikely(!VALID_M2P(pfn_x(pfn))) )
+         return;
+ 
++    BUILD_BUG_ON(paging_logdirty_levels() != 4);
+     i1 = L1_LOGDIRTY_IDX(pfn);
+     i2 = L2_LOGDIRTY_IDX(pfn);
+     i3 = L3_LOGDIRTY_IDX(pfn);
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -1011,7 +1011,17 @@ bool shadow_prealloc(struct domain *d, u
+     if ( unlikely(d->is_dying) )
+        return false;
+ 
+-    ret = _shadow_prealloc(d, shadow_size(type) * count);
++    count *= shadow_size(type);
++    /*
++     * Log-dirty handling may result in allocations when populating its
++     * tracking structures.  Tie this to the caller requesting space for L1
++     * shadows.
++     */
++    if ( paging_mode_log_dirty(d) &&
++         ((SHF_L1_ANY | SHF_FL1_ANY) & (1u << type)) )
++        count += paging_logdirty_levels();
++
++    ret = _shadow_prealloc(d, count);
+     if ( !ret && (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
+         /*
+          * Failing to allocate memory required for shadow usage can only result in
diff --git a/xsa428-4.17-1.patch b/xsa428-4.17-1.patch
new file mode 100644
index 0000000..3852f08
--- /dev/null
+++ b/xsa428-4.17-1.patch
@@ -0,0 +1,40 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/HVM: bound number of pinned cache attribute regions
+
+This is exposed via DMOP, i.e. to potentially not fully privileged
+device models. With that we may not permit registration of an (almost)
+unbounded amount of such regions.
+
+This is CVE-2022-42333 / part of XSA-428.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Fixes: 642123c5123f ("x86/hvm: provide XEN_DMOP_pin_memory_cacheattr")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- a/xen/arch/x86/hvm/mtrr.c
++++ b/xen/arch/x86/hvm/mtrr.c
+@@ -595,6 +595,7 @@ int hvm_set_mem_pinned_cacheattr(struct
+                                  uint64_t gfn_end, uint32_t type)
+ {
+     struct hvm_mem_pinned_cacheattr_range *range;
++    unsigned int nr = 0;
+     int rc = 1;
+ 
+     if ( !is_hvm_domain(d) )
+@@ -666,11 +667,15 @@ int hvm_set_mem_pinned_cacheattr(struct
+             rc = -EBUSY;
+             break;
+         }
++        ++nr;
+     }
+     rcu_read_unlock(&pinned_cacheattr_rcu_lock);
+     if ( rc <= 0 )
+         return rc;
+ 
++    if ( nr >= 64 /* The limit is arbitrary. */ )
++        return -ENOSPC;
++
+     range = xzalloc(struct hvm_mem_pinned_cacheattr_range);
+     if ( range == NULL )
+         return -ENOMEM;
diff --git a/xsa428-4.17-2.patch b/xsa428-4.17-2.patch
new file mode 100644
index 0000000..087230a
--- /dev/null
+++ b/xsa428-4.17-2.patch
@@ -0,0 +1,114 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/HVM: serialize pinned cache attribute list manipulation
+
+While the RCU variants of list insertion and removal allow lockless list
+traversal (with RCU just read-locked), insertions and removals still
+need serializing amongst themselves. To keep things simple, use the
+domain lock for this purpose.
+
+This is CVE-2022-42334 / part of XSA-428.
+
+Fixes: 642123c5123f ("x86/hvm: provide XEN_DMOP_pin_memory_cacheattr")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+
+--- a/xen/arch/x86/hvm/mtrr.c
++++ b/xen/arch/x86/hvm/mtrr.c
+@@ -594,7 +594,7 @@ static void cf_check free_pinned_cacheat
+ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
+                                  uint64_t gfn_end, uint32_t type)
+ {
+-    struct hvm_mem_pinned_cacheattr_range *range;
++    struct hvm_mem_pinned_cacheattr_range *range, *newr;
+     unsigned int nr = 0;
+     int rc = 1;
+ 
+@@ -608,14 +608,15 @@ int hvm_set_mem_pinned_cacheattr(struct
+     {
+     case XEN_DOMCTL_DELETE_MEM_CACHEATTR:
+         /* Remove the requested range. */
+-        rcu_read_lock(&pinned_cacheattr_rcu_lock);
+-        list_for_each_entry_rcu ( range,
+-                                  &d->arch.hvm.pinned_cacheattr_ranges,
+-                                  list )
++        domain_lock(d);
++        list_for_each_entry ( range,
++                              &d->arch.hvm.pinned_cacheattr_ranges,
++                              list )
+             if ( range->start == gfn_start && range->end == gfn_end )
+             {
+-                rcu_read_unlock(&pinned_cacheattr_rcu_lock);
+                 list_del_rcu(&range->list);
++                domain_unlock(d);
++
+                 type = range->type;
+                 call_rcu(&range->rcu, free_pinned_cacheattr_entry);
+                 p2m_memory_type_changed(d);
+@@ -636,7 +637,7 @@ int hvm_set_mem_pinned_cacheattr(struct
+                 }
+                 return 0;
+             }
+-        rcu_read_unlock(&pinned_cacheattr_rcu_lock);
++        domain_unlock(d);
+         return -ENOENT;
+ 
+     case PAT_TYPE_UC_MINUS:
+@@ -651,7 +652,10 @@ int hvm_set_mem_pinned_cacheattr(struct
+         return -EINVAL;
+     }
+ 
+-    rcu_read_lock(&pinned_cacheattr_rcu_lock);
++    newr = xzalloc(struct hvm_mem_pinned_cacheattr_range);
++
++    domain_lock(d);
++
+     list_for_each_entry_rcu ( range,
+                               &d->arch.hvm.pinned_cacheattr_ranges,
+                               list )
+@@ -669,27 +673,34 @@ int hvm_set_mem_pinned_cacheattr(struct
+         }
+         ++nr;
+     }
+-    rcu_read_unlock(&pinned_cacheattr_rcu_lock);
++
+     if ( rc <= 0 )
+-        return rc;
++        /* nothing */;
++    else if ( nr >= 64 /* The limit is arbitrary. */ )
++        rc = -ENOSPC;
++    else if ( !newr )
++        rc = -ENOMEM;
++    else
++    {
++        newr->start = gfn_start;
++        newr->end = gfn_end;
++        newr->type = type;
+ 
+-    if ( nr >= 64 /* The limit is arbitrary. */ )
+-        return -ENOSPC;
++        list_add_rcu(&newr->list, &d->arch.hvm.pinned_cacheattr_ranges);
++
++        newr = NULL;
++        rc = 0;
++    }
+ 
+-    range = xzalloc(struct hvm_mem_pinned_cacheattr_range);
+-    if ( range == NULL )
+-        return -ENOMEM;
++    domain_unlock(d);
+ 
+-    range->start = gfn_start;
+-    range->end = gfn_end;
+-    range->type = type;
++    xfree(newr);
+ 
+-    list_add_rcu(&range->list, &d->arch.hvm.pinned_cacheattr_ranges);
+     p2m_memory_type_changed(d);
+     if ( type != PAT_TYPE_WRBACK )
+         flush_all(FLUSH_CACHE);
+ 
+-    return 0;
++    return rc;
+ }
+ 
+ static int cf_check hvm_save_mtrr_msr(struct vcpu *v, hvm_domain_context_t *h)
diff --git a/xsa429.patch b/xsa429.patch
new file mode 100644
index 0000000..443869f
--- /dev/null
+++ b/xsa429.patch
@@ -0,0 +1,46 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/spec-ctrl: Defer CR4_PV32_RESTORE on the cstar_enter path
+
+As stated (correctly) by the comment next to SPEC_CTRL_ENTRY_FROM_PV, between
+the two hunks visible in the patch, RET's are not safe prior to this point.
+
+CR4_PV32_RESTORE hides a CALL/RET pair in certain configurations (PV32
+compiled in, SMEP or SMAP active), and the RET can be attacked with one of
+several known speculative issues.
+
+Furthermore, CR4_PV32_RESTORE also hides a reference to the cr4_pv32_mask
+global variable, which is not safe when XPTI is active before restoring Xen's
+full pagetables.
+
+This crash has gone unnoticed because it is only AMD CPUs which permit the
+SYSCALL instruction in compatibility mode, and these are not vulnerable to
+Meltdown so don't activate XPTI by default.
+
+This is XSA-429 / CVE-2022-42331
+
+Fixes: 5e7962901131 ("x86/entry: Organise the use of MSR_SPEC_CTRL at each entry/exit point")
+Fixes: 5784de3e2067 ("x86: Meltdown band-aid against malicious 64-bit PV guests")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index ae012851819a..7675a59ff057 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -288,7 +288,6 @@ ENTRY(cstar_enter)
+         ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK
+ #endif
+         push  %rax          /* Guest %rsp */
+-        CR4_PV32_RESTORE
+         movq  8(%rsp), %rax /* Restore guest %rax. */
+         movq  $FLAT_USER_SS32, 8(%rsp) /* Assume a 64bit domain.  Compat handled lower. */
+         pushq %r11
+@@ -312,6 +311,8 @@ ENTRY(cstar_enter)
+ .Lcstar_cr3_okay:
+         sti
+ 
++        CR4_PV32_RESTORE
++
+         movq  STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx
+ 
+ #ifdef CONFIG_PV32