diff --git a/xen.gcc11.fixes.patch b/xen.gcc11.fixes.patch new file mode 100644 index 0000000..e3c4058 --- /dev/null +++ b/xen.gcc11.fixes.patch @@ -0,0 +1,45 @@ +--- xen-4.14.0/xen/include/crypto/rijndael.h.orig 2020-07-23 16:07:51.000000000 +0100 ++++ xen-4.14.0/xen/include/crypto/rijndael.h 2020-10-24 14:59:34.349318594 +0100 +@@ -52,7 +52,7 @@ + + int rijndaelKeySetupEnc(unsigned int [], const unsigned char [], int); + int rijndaelKeySetupDec(unsigned int [], const unsigned char [], int); +-void rijndaelEncrypt(const unsigned int [], int, const unsigned char [], +- unsigned char []); ++void rijndaelEncrypt(const unsigned int [], int, const unsigned char [16], ++ unsigned char [16]); + + #endif /* __RIJNDAEL_H */ +--- xen-4.14.0/xen/include/crypto/vmac.h.orig 2020-07-23 16:07:51.000000000 +0100 ++++ xen-4.14.0/xen/include/crypto/vmac.h 2020-10-24 15:45:49.246467465 +0100 +@@ -142,7 +142,7 @@ + + #define vmac_update vhash_update + +-void vhash_update(unsigned char m[], ++void vhash_update(uint8_t *m, + unsigned int mbytes, + vmac_ctx_t *ctx); + +--- xen-4.14.0/tools/libs/foreignmemory/linux.c.orig 2020-07-23 16:07:51.000000000 +0100 ++++ xen-4.14.0/tools/libs/foreignmemory/linux.c 2020-10-25 21:36:00.982040566 +0000 +@@ -162,7 +162,7 @@ + void *osdep_xenforeignmemory_map(xenforeignmemory_handle *fmem, + uint32_t dom, void *addr, + int prot, int flags, size_t num, +- const xen_pfn_t arr[/*num*/], int err[/*num*/]) ++ const xen_pfn_t arr[num], int err[num]) + { + int fd = fmem->fd; + privcmd_mmapbatch_v2_t ioctlx; +--- xen-4.14.0/tools/libs/foreignmemory/minios.c.orig 2020-07-23 16:07:51.000000000 +0100 ++++ xen-4.14.0/tools/libs/foreignmemory/minios.c 2020-10-26 22:36:12.423883688 +0000 +@@ -42,7 +42,7 @@ + void *osdep_xenforeignmemory_map(xenforeignmemory_handle *fmem, + uint32_t dom, void *addr, + int prot, int flags, size_t num, +- const xen_pfn_t arr[/*num*/], int err[/*num*/]) ++ const xen_pfn_t arr[num], int err[num]) + { + unsigned long pt_prot = 0; + if (prot & PROT_READ) diff --git a/xen.spec b/xen.spec index fb8aad0..531de5f 100644 --- a/xen.spec +++ b/xen.spec @@ -58,7 +58,7 @@ Summary: Xen is a virtual machine monitor Name: xen Version: 4.14.0 -Release: 6%{?dist} +Release: 7%{?dist} License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ Source0: https://downloads.xenproject.org/release/xen/%{version}/xen-%{version}.tar.gz @@ -135,6 +135,13 @@ Patch66: xsa346-2.patch Patch67: xsa347-4.14-1.patch Patch68: xsa347-4.14-2.patch Patch69: xsa347-4.14-3.patch +Patch70: xen.gcc11.fixes.patch +Patch71: xsa286-4.14-0001-x86-mm-split-L4-and-L3-parts-of-the-walk-out-of-do_p.patch +Patch72: xsa286-4.14-0002-x86-mm-check-page-types-in-do_page_walk.patch +Patch73: xsa286-4.14-0003-x86-mm-avoid-using-linear-page-tables-in-map_guest_l.patch +Patch74: xsa286-4.14-0004-x86-mm-avoid-using-linear-page-tables-in-guest_get_e.patch +Patch75: xsa286-4.14-0005-x86-mm-avoid-using-top-level-linear-page-tables-in-u.patch +Patch76: xsa286-4.14-0006-x86-mm-restrict-use-of-linear-page-tables-to-shadow-.patch %if %build_qemutrad @@ -359,6 +366,13 @@ manage Xen virtual machines. %patch67 -p1 %patch68 -p1 %patch69 -p1 +%patch70 -p1 +%patch71 -p1 +%patch72 -p1 +%patch73 -p1 +%patch74 -p1 +%patch75 -p1 +%patch76 -p1 # qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -629,56 +643,51 @@ fi %if %build_hyp %post hypervisor -if [ $1 == 1 -a -f /sbin/grub2-mkconfig ]; then - if [ -f /boot/grub2/grub.cfg ]; then - /sbin/grub2-mkconfig -o /boot/grub2/grub.cfg - sed -i -e '/insmod module2/d' /boot/grub2/grub.cfg - fi - if [ -f /boot/efi/EFI/fedora/grub.cfg ]; then - /sbin/grub2-mkconfig -o /boot/efi/EFI/fedora/grub.cfg - sed -i -e '/insmod module2/d' /boot/efi/EFI/fedora/grub.cfg - fi -fi -if [ -f /sbin/grub2-mkconfig ]; then - if [ -f /boot/grub2/grub.cfg ]; then - if [ -d /usr/lib/grub/i386-pc ]; then - if [ ! -d /boot/grub2/i386-pc ]; then - mkdir /boot/grub2/i386-pc +do_it() { + DIR=$1 + TARGET=$2 + if [ -d $DIR ]; then + if [ ! -d $TARGET ]; then + mkdir $TARGET fi - if [ -f /usr/lib/grub/i386-pc/relocator.mod -a ! -f /boot/grub2/i386-pc/relocator.mod ]; then - cp -p /usr/lib/grub/i386-pc/relocator.mod /boot/grub2/i386-pc/relocator.mod + if [ -f $DIR/relocator.mod -a ! -f $TARGET/relocator.mod ]; then + cp -p $DIR/relocator.mod $TARGET/relocator.mod fi - if [ -f /usr/lib/grub/i386-pc/multiboot2.mod -a ! -f /boot/grub2/i386-pc/multiboot2.mod ]; then - cp -p /usr/lib/grub/i386-pc/multiboot2.mod /boot/grub2/i386-pc/multiboot2.mod + if [ -f $DIR/multiboot2.mod -a ! -f $TARGET/multiboot2.mod ]; then + cp -p $DIR/multiboot2.mod $TARGET/multiboot2.mod fi fi - fi - if [ -f /boot/efi/EFI/fedora/grub.cfg ]; then - if [ -d /usr/lib/grub/x86_64-efi ]; then - if [ ! -d /boot/efi/EFI/fedora/x86_64-efi ]; then - mkdir /boot/efi/EFI/fedora/x86_64-efi - fi - if [ -f /usr/lib/grub/x86_64-efi/relocator.mod -a ! -f /boot/efi/EFI/fedora/x86_64-efi/relocator.mod ]; then - cp -p /usr/lib/grub/x86_64-efi/relocator.mod /boot/efi/EFI/fedora/x86_64-efi/relocator.mod - fi - if [ -f /usr/lib/grub/x86_64-efi/multiboot2.mod -a ! -f /boot/efi/EFI/fedora/x86_64-efi/multiboot2.mod ]; then - cp -p /usr/lib/grub/x86_64-efi/multiboot2.mod /boot/efi/EFI/fedora/x86_64-efi/multiboot2.mod - fi +} +if [ $1 == 1 -a -f /sbin/grub2-mkconfig ]; then + for f in /boot/grub2/grub.cfg /boot/efi/EFI/fedora/grub.cfg; do + if [ -f $f ]; then + /sbin/grub2-mkconfig -o $f + sed -i -e '/insmod module2/d' $f fi - fi + done fi - -%postun hypervisor if [ -f /sbin/grub2-mkconfig ]; then if [ -f /boot/grub2/grub.cfg ]; then - /sbin/grub2-mkconfig -o /boot/grub2/grub.cfg - sed -i -e '/insmod module2/d' /boot/grub2/grub.cfg + DIR=/usr/lib/grub/i386-pc + TARGET=/boot/grub2/i386-pc + do_it $DIR $TARGET fi if [ -f /boot/efi/EFI/fedora/grub.cfg ]; then - /sbin/grub2-mkconfig -o /boot/efi/EFI/fedora/grub.cfg - sed -i -e '/insmod module2/d' /boot/efi/EFI/fedora/grub.cfg + DIR=/usr/lib/grub/x86_64-efi + TARGET=/boot/efi/EFI/fedora/x86_64-efi + do_it $DIR $TARGET fi fi + +%postun hypervisor +if [ -f /sbin/grub2-mkconfig ]; then + for f in /boot/grub2/grub.cfg /boot/efi/EFI/fedora/grub.cfg; do + if [ -f $f ]; then + /sbin/grub2-mkconfig -o $f + sed -i -e '/insmod module2/d' $f + fi + done +fi %endif %if %build_ocaml @@ -957,10 +966,21 @@ fi %endif %changelog +* Wed Oct 28 2020 Michael Young - 4.14.0-7 +- x86 PV guest INVLPG-like flushes may leave stale TLB entries + [XSA-286, CVE-2020-27674] (#1891092) +- simplify grub scripts (patches from Thierry Vignaud ) +- some fixes for gcc 11 + +* Thu Oct 22 2020 Michael Young - 4.14.0-6.1 + * Tue Oct 20 2020 Michael Young - 4.14.0-6 -- x86: Race condition in Xen mapping code [XSA-345] -- undue deferral of IOMMU TLB flushes [XSA-346] -- unsafe AMD IOMMU page table updates [XSA-347] +- x86: Race condition in Xen mapping code [XSA-345, CVE-2020-27672] + (#1891097) +- undue deferral of IOMMU TLB flushes [XSA-346, CVE-2020-27671] + (#1891093) +- unsafe AMD IOMMU page table updates [XSA-347, CVE-2020-27670] + (#1891088) * Tue Sep 22 2020 Michael Young - 4.14.0-5 - x86 pv: Crash when handling guest access to MSR_MISC_ENABLE [XSA-333, CVE-2020-25602] (#1881619) diff --git a/xsa286-4.14-0001-x86-mm-split-L4-and-L3-parts-of-the-walk-out-of-do_p.patch b/xsa286-4.14-0001-x86-mm-split-L4-and-L3-parts-of-the-walk-out-of-do_p.patch new file mode 100644 index 0000000..6459e1f --- /dev/null +++ b/xsa286-4.14-0001-x86-mm-split-L4-and-L3-parts-of-the-walk-out-of-do_p.patch @@ -0,0 +1,73 @@ +From: Jan Beulich +Subject: x86/mm: split L4 and L3 parts of the walk out of do_page_walk() + +The L3 one at least is going to be re-used by a subsequent patch, and +splitting the L4 one then as well seems only natural. + +This is part of XSA-286. + +Signed-off-by: Jan Beulich +Reviewed-by: George Dunlap +Reviewed-by: Andrew Cooper + +diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c +index 48fd60a876..c25eb01e41 100644 +--- a/xen/arch/x86/x86_64/mm.c ++++ b/xen/arch/x86/x86_64/mm.c +@@ -44,26 +44,47 @@ unsigned int __read_mostly m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START; + + l2_pgentry_t *compat_idle_pg_table_l2; + +-void *do_page_walk(struct vcpu *v, unsigned long addr) ++static l4_pgentry_t page_walk_get_l4e(pagetable_t root, unsigned long addr) + { +- unsigned long mfn = pagetable_get_pfn(v->arch.guest_table); +- l4_pgentry_t l4e, *l4t; +- l3_pgentry_t l3e, *l3t; +- l2_pgentry_t l2e, *l2t; +- l1_pgentry_t l1e, *l1t; ++ unsigned long mfn = pagetable_get_pfn(root); ++ l4_pgentry_t *l4t, l4e; + +- if ( !is_pv_vcpu(v) || !is_canonical_address(addr) ) +- return NULL; ++ if ( !is_canonical_address(addr) ) ++ return l4e_empty(); + + l4t = map_domain_page(_mfn(mfn)); + l4e = l4t[l4_table_offset(addr)]; + unmap_domain_page(l4t); ++ ++ return l4e; ++} ++ ++static l3_pgentry_t page_walk_get_l3e(pagetable_t root, unsigned long addr) ++{ ++ l4_pgentry_t l4e = page_walk_get_l4e(root, addr); ++ l3_pgentry_t *l3t, l3e; ++ + if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ) +- return NULL; ++ return l3e_empty(); + + l3t = map_l3t_from_l4e(l4e); + l3e = l3t[l3_table_offset(addr)]; + unmap_domain_page(l3t); ++ ++ return l3e; ++} ++ ++void *do_page_walk(struct vcpu *v, unsigned long addr) ++{ ++ l3_pgentry_t l3e; ++ l2_pgentry_t l2e, *l2t; ++ l1_pgentry_t l1e, *l1t; ++ unsigned long mfn; ++ ++ if ( !is_pv_vcpu(v) ) ++ return NULL; ++ ++ l3e = page_walk_get_l3e(v->arch.guest_table, addr); + mfn = l3e_get_pfn(l3e); + if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || !mfn_valid(_mfn(mfn)) ) + return NULL; diff --git a/xsa286-4.14-0002-x86-mm-check-page-types-in-do_page_walk.patch b/xsa286-4.14-0002-x86-mm-check-page-types-in-do_page_walk.patch new file mode 100644 index 0000000..3a05a74 --- /dev/null +++ b/xsa286-4.14-0002-x86-mm-check-page-types-in-do_page_walk.patch @@ -0,0 +1,170 @@ +From: Jan Beulich +Subject: x86/mm: check page types in do_page_walk() + +For page table entries read to be guaranteed valid, transiently locking +the pages and validating their types is necessary. Note that guest use +of linear page tables is intentionally not taken into account here, as +ordinary data (guest stacks) can't possibly live inside page tables. + +This is part of XSA-286. + +Signed-off-by: Jan Beulich +Reviewed-by: George Dunlap +Reviewed-by: Andrew Cooper + +diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c +index c25eb01e41..6305cf6033 100644 +--- a/xen/arch/x86/x86_64/mm.c ++++ b/xen/arch/x86/x86_64/mm.c +@@ -46,15 +46,29 @@ l2_pgentry_t *compat_idle_pg_table_l2; + + static l4_pgentry_t page_walk_get_l4e(pagetable_t root, unsigned long addr) + { +- unsigned long mfn = pagetable_get_pfn(root); +- l4_pgentry_t *l4t, l4e; ++ mfn_t mfn = pagetable_get_mfn(root); ++ /* current's root page table can't disappear under our feet. */ ++ bool need_lock = !mfn_eq(mfn, pagetable_get_mfn(current->arch.guest_table)); ++ struct page_info *pg; ++ l4_pgentry_t l4e = l4e_empty(); + + if ( !is_canonical_address(addr) ) + return l4e_empty(); + +- l4t = map_domain_page(_mfn(mfn)); +- l4e = l4t[l4_table_offset(addr)]; +- unmap_domain_page(l4t); ++ pg = mfn_to_page(mfn); ++ if ( need_lock && !page_lock(pg) ) ++ return l4e_empty(); ++ ++ if ( (pg->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table ) ++ { ++ l4_pgentry_t *l4t = map_domain_page(mfn); ++ ++ l4e = l4t[l4_table_offset(addr)]; ++ unmap_domain_page(l4t); ++ } ++ ++ if ( need_lock ) ++ page_unlock(pg); + + return l4e; + } +@@ -62,14 +76,26 @@ static l4_pgentry_t page_walk_get_l4e(pagetable_t root, unsigned long addr) + static l3_pgentry_t page_walk_get_l3e(pagetable_t root, unsigned long addr) + { + l4_pgentry_t l4e = page_walk_get_l4e(root, addr); +- l3_pgentry_t *l3t, l3e; ++ mfn_t mfn = l4e_get_mfn(l4e); ++ struct page_info *pg; ++ l3_pgentry_t l3e = l3e_empty(); + + if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ) + return l3e_empty(); + +- l3t = map_l3t_from_l4e(l4e); +- l3e = l3t[l3_table_offset(addr)]; +- unmap_domain_page(l3t); ++ pg = mfn_to_page(mfn); ++ if ( !page_lock(pg) ) ++ return l3e_empty(); ++ ++ if ( (pg->u.inuse.type_info & PGT_type_mask) == PGT_l3_page_table ) ++ { ++ l3_pgentry_t *l3t = map_domain_page(mfn); ++ ++ l3e = l3t[l3_table_offset(addr)]; ++ unmap_domain_page(l3t); ++ } ++ ++ page_unlock(pg); + + return l3e; + } +@@ -77,44 +103,67 @@ static l3_pgentry_t page_walk_get_l3e(pagetable_t root, unsigned long addr) + void *do_page_walk(struct vcpu *v, unsigned long addr) + { + l3_pgentry_t l3e; +- l2_pgentry_t l2e, *l2t; +- l1_pgentry_t l1e, *l1t; +- unsigned long mfn; ++ l2_pgentry_t l2e = l2e_empty(); ++ l1_pgentry_t l1e = l1e_empty(); ++ mfn_t mfn; ++ struct page_info *pg; + + if ( !is_pv_vcpu(v) ) + return NULL; + + l3e = page_walk_get_l3e(v->arch.guest_table, addr); +- mfn = l3e_get_pfn(l3e); +- if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || !mfn_valid(_mfn(mfn)) ) ++ mfn = l3e_get_mfn(l3e); ++ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || !mfn_valid(mfn) ) + return NULL; + if ( (l3e_get_flags(l3e) & _PAGE_PSE) ) + { +- mfn += PFN_DOWN(addr & ((1UL << L3_PAGETABLE_SHIFT) - 1)); ++ mfn = mfn_add(mfn, PFN_DOWN(addr & ((1UL << L3_PAGETABLE_SHIFT) - 1))); + goto ret; + } + +- l2t = map_domain_page(_mfn(mfn)); +- l2e = l2t[l2_table_offset(addr)]; +- unmap_domain_page(l2t); +- mfn = l2e_get_pfn(l2e); +- if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || !mfn_valid(_mfn(mfn)) ) ++ pg = mfn_to_page(mfn); ++ if ( !page_lock(pg) ) ++ return NULL; ++ ++ if ( (pg->u.inuse.type_info & PGT_type_mask) == PGT_l2_page_table ) ++ { ++ const l2_pgentry_t *l2t = map_domain_page(mfn); ++ ++ l2e = l2t[l2_table_offset(addr)]; ++ unmap_domain_page(l2t); ++ } ++ ++ page_unlock(pg); ++ ++ mfn = l2e_get_mfn(l2e); ++ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || !mfn_valid(mfn) ) + return NULL; + if ( (l2e_get_flags(l2e) & _PAGE_PSE) ) + { +- mfn += PFN_DOWN(addr & ((1UL << L2_PAGETABLE_SHIFT) - 1)); ++ mfn = mfn_add(mfn, PFN_DOWN(addr & ((1UL << L2_PAGETABLE_SHIFT) - 1))); + goto ret; + } + +- l1t = map_domain_page(_mfn(mfn)); +- l1e = l1t[l1_table_offset(addr)]; +- unmap_domain_page(l1t); +- mfn = l1e_get_pfn(l1e); +- if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(_mfn(mfn)) ) ++ pg = mfn_to_page(mfn); ++ if ( !page_lock(pg) ) ++ return NULL; ++ ++ if ( (pg->u.inuse.type_info & PGT_type_mask) == PGT_l1_page_table ) ++ { ++ const l1_pgentry_t *l1t = map_domain_page(mfn); ++ ++ l1e = l1t[l1_table_offset(addr)]; ++ unmap_domain_page(l1t); ++ } ++ ++ page_unlock(pg); ++ ++ mfn = l1e_get_mfn(l1e); ++ if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(mfn) ) + return NULL; + + ret: +- return map_domain_page(_mfn(mfn)) + (addr & ~PAGE_MASK); ++ return map_domain_page(mfn) + (addr & ~PAGE_MASK); + } + + /* diff --git a/xsa286-4.14-0003-x86-mm-avoid-using-linear-page-tables-in-map_guest_l.patch b/xsa286-4.14-0003-x86-mm-avoid-using-linear-page-tables-in-map_guest_l.patch new file mode 100644 index 0000000..5a5c251 --- /dev/null +++ b/xsa286-4.14-0003-x86-mm-avoid-using-linear-page-tables-in-map_guest_l.patch @@ -0,0 +1,92 @@ +From: Jan Beulich +Subject: x86/mm: avoid using linear page tables in map_guest_l1e() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Replace the linear L2 table access by an actual page walk. + +This is part of XSA-286. + +Reported-by: Jann Horn +Signed-off-by: Jan Beulich +Signed-off-by: Roger Pau Monné +Reviewed-by: George Dunlap +Reviewed-by: Andrew Cooper + +diff --git a/xen/arch/x86/pv/mm.c b/xen/arch/x86/pv/mm.c +index 5d4cd00941..7be098f5ef 100644 +--- a/xen/arch/x86/pv/mm.c ++++ b/xen/arch/x86/pv/mm.c +@@ -40,11 +40,14 @@ l1_pgentry_t *map_guest_l1e(unsigned long linear, mfn_t *gl1mfn) + if ( unlikely(!__addr_ok(linear)) ) + return NULL; + +- /* Find this l1e and its enclosing l1mfn in the linear map. */ +- if ( __copy_from_user(&l2e, +- &__linear_l2_table[l2_linear_offset(linear)], +- sizeof(l2_pgentry_t)) ) ++ if ( unlikely(!(current->arch.flags & TF_kernel_mode)) ) ++ { ++ ASSERT_UNREACHABLE(); + return NULL; ++ } ++ ++ /* Find this l1e and its enclosing l1mfn. */ ++ l2e = page_walk_get_l2e(current->arch.guest_table, linear); + + /* Check flags that it will be safe to read the l1e. */ + if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) != _PAGE_PRESENT ) +diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c +index 6305cf6033..71a8bfc024 100644 +--- a/xen/arch/x86/x86_64/mm.c ++++ b/xen/arch/x86/x86_64/mm.c +@@ -100,6 +100,34 @@ static l3_pgentry_t page_walk_get_l3e(pagetable_t root, unsigned long addr) + return l3e; + } + ++l2_pgentry_t page_walk_get_l2e(pagetable_t root, unsigned long addr) ++{ ++ l3_pgentry_t l3e = page_walk_get_l3e(root, addr); ++ mfn_t mfn = l3e_get_mfn(l3e); ++ struct page_info *pg; ++ l2_pgentry_t l2e = l2e_empty(); ++ ++ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || ++ (l3e_get_flags(l3e) & _PAGE_PSE) ) ++ return l2e_empty(); ++ ++ pg = mfn_to_page(mfn); ++ if ( !page_lock(pg) ) ++ return l2e_empty(); ++ ++ if ( (pg->u.inuse.type_info & PGT_type_mask) == PGT_l2_page_table ) ++ { ++ l2_pgentry_t *l2t = map_domain_page(mfn); ++ ++ l2e = l2t[l2_table_offset(addr)]; ++ unmap_domain_page(l2t); ++ } ++ ++ page_unlock(pg); ++ ++ return l2e; ++} ++ + void *do_page_walk(struct vcpu *v, unsigned long addr) + { + l3_pgentry_t l3e; +diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h +index 7e74996053..12ea812381 100644 +--- a/xen/include/asm-x86/mm.h ++++ b/xen/include/asm-x86/mm.h +@@ -579,7 +579,9 @@ void audit_domains(void); + void make_cr3(struct vcpu *v, mfn_t mfn); + void update_cr3(struct vcpu *v); + int vcpu_destroy_pagetables(struct vcpu *); ++ + void *do_page_walk(struct vcpu *v, unsigned long addr); ++l2_pgentry_t page_walk_get_l2e(pagetable_t root, unsigned long addr); + + /* Allocator functions for Xen pagetables. */ + void *alloc_xen_pagetable(void); diff --git a/xsa286-4.14-0004-x86-mm-avoid-using-linear-page-tables-in-guest_get_e.patch b/xsa286-4.14-0004-x86-mm-avoid-using-linear-page-tables-in-guest_get_e.patch new file mode 100644 index 0000000..9783f79 --- /dev/null +++ b/xsa286-4.14-0004-x86-mm-avoid-using-linear-page-tables-in-guest_get_e.patch @@ -0,0 +1,172 @@ +From: Jan Beulich +Subject: x86/mm: avoid using linear page tables in guest_get_eff_kern_l1e() + +First of all drop guest_get_eff_l1e() entirely - there's no actual user +of it: pv_ro_page_fault() has a guest_kernel_mode() conditional around +its only call site. + +Then replace the linear L1 table access by an actual page walk. + +This is part of XSA-286. + +Reported-by: Jann Horn +Signed-off-by: Jan Beulich +Reviewed-by: George Dunlap +Reviewed-by: Andrew Cooper + +diff --git a/xen/arch/x86/pv/mm.c b/xen/arch/x86/pv/mm.c +index 7be098f5ef..5e4081aecd 100644 +--- a/xen/arch/x86/pv/mm.c ++++ b/xen/arch/x86/pv/mm.c +@@ -59,27 +59,6 @@ l1_pgentry_t *map_guest_l1e(unsigned long linear, mfn_t *gl1mfn) + } + + /* +- * Read the guest's l1e that maps this address, from the kernel-mode +- * page tables. +- */ +-static l1_pgentry_t guest_get_eff_kern_l1e(unsigned long linear) +-{ +- struct vcpu *curr = current; +- const bool user_mode = !(curr->arch.flags & TF_kernel_mode); +- l1_pgentry_t l1e; +- +- if ( user_mode ) +- toggle_guest_pt(curr); +- +- l1e = guest_get_eff_l1e(linear); +- +- if ( user_mode ) +- toggle_guest_pt(curr); +- +- return l1e; +-} +- +-/* + * Map a guest's LDT page (covering the byte at @offset from start of the LDT) + * into Xen's virtual range. Returns true if the mapping changed, false + * otherwise. +diff --git a/xen/arch/x86/pv/mm.h b/xen/arch/x86/pv/mm.h +index a1bd473b29..43d33a1fd1 100644 +--- a/xen/arch/x86/pv/mm.h ++++ b/xen/arch/x86/pv/mm.h +@@ -5,19 +5,19 @@ l1_pgentry_t *map_guest_l1e(unsigned long linear, mfn_t *gl1mfn); + + int new_guest_cr3(mfn_t mfn); + +-/* Read a PV guest's l1e that maps this linear address. */ +-static inline l1_pgentry_t guest_get_eff_l1e(unsigned long linear) ++/* ++ * Read the guest's l1e that maps this address, from the kernel-mode ++ * page tables. ++ */ ++static inline l1_pgentry_t guest_get_eff_kern_l1e(unsigned long linear) + { +- l1_pgentry_t l1e; ++ l1_pgentry_t l1e = l1e_empty(); + + ASSERT(!paging_mode_translate(current->domain)); + ASSERT(!paging_mode_external(current->domain)); + +- if ( unlikely(!__addr_ok(linear)) || +- __copy_from_user(&l1e, +- &__linear_l1_table[l1_linear_offset(linear)], +- sizeof(l1_pgentry_t)) ) +- l1e = l1e_empty(); ++ if ( likely(__addr_ok(linear)) ) ++ l1e = page_walk_get_l1e(current->arch.guest_table, linear); + + return l1e; + } +diff --git a/xen/arch/x86/pv/ro-page-fault.c b/xen/arch/x86/pv/ro-page-fault.c +index 0eedb70002..ce31dd401d 100644 +--- a/xen/arch/x86/pv/ro-page-fault.c ++++ b/xen/arch/x86/pv/ro-page-fault.c +@@ -349,7 +349,7 @@ int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs) + bool mmio_ro; + + /* Attempt to read the PTE that maps the VA being accessed. */ +- pte = guest_get_eff_l1e(addr); ++ pte = guest_get_eff_kern_l1e(addr); + + /* We are only looking for read-only mappings */ + if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT | _PAGE_RW)) != _PAGE_PRESENT) ) +diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c +index 71a8bfc024..9e87a55174 100644 +--- a/xen/arch/x86/x86_64/mm.c ++++ b/xen/arch/x86/x86_64/mm.c +@@ -128,6 +128,62 @@ l2_pgentry_t page_walk_get_l2e(pagetable_t root, unsigned long addr) + return l2e; + } + ++/* ++ * For now no "set_accessed" parameter, as all callers want it set to true. ++ * For now also no "set_dirty" parameter, as all callers deal with r/o ++ * mappings, and we don't want to set the dirty bit there (conflicts with ++ * CET-SS). However, as there are CPUs which may set the dirty bit on r/o ++ * PTEs, the logic below tolerates the bit becoming set "behind our backs". ++ */ ++l1_pgentry_t page_walk_get_l1e(pagetable_t root, unsigned long addr) ++{ ++ l2_pgentry_t l2e = page_walk_get_l2e(root, addr); ++ mfn_t mfn = l2e_get_mfn(l2e); ++ struct page_info *pg; ++ l1_pgentry_t l1e = l1e_empty(); ++ ++ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || ++ (l2e_get_flags(l2e) & _PAGE_PSE) ) ++ return l1e_empty(); ++ ++ pg = mfn_to_page(mfn); ++ if ( !page_lock(pg) ) ++ return l1e_empty(); ++ ++ if ( (pg->u.inuse.type_info & PGT_type_mask) == PGT_l1_page_table ) ++ { ++ l1_pgentry_t *l1t = map_domain_page(mfn); ++ ++ l1e = l1t[l1_table_offset(addr)]; ++ ++ if ( (l1e_get_flags(l1e) & (_PAGE_ACCESSED | _PAGE_PRESENT)) == ++ _PAGE_PRESENT ) ++ { ++ l1_pgentry_t ol1e = l1e; ++ ++ l1e_add_flags(l1e, _PAGE_ACCESSED); ++ /* ++ * Best effort only; with the lock held the page shouldn't ++ * change anyway, except for the dirty bit to perhaps become set. ++ */ ++ while ( cmpxchg(&l1e_get_intpte(l1t[l1_table_offset(addr)]), ++ l1e_get_intpte(ol1e), l1e_get_intpte(l1e)) != ++ l1e_get_intpte(ol1e) && ++ !(l1e_get_flags(l1e) & _PAGE_DIRTY) ) ++ { ++ l1e_add_flags(ol1e, _PAGE_DIRTY); ++ l1e_add_flags(l1e, _PAGE_DIRTY); ++ } ++ } ++ ++ unmap_domain_page(l1t); ++ } ++ ++ page_unlock(pg); ++ ++ return l1e; ++} ++ + void *do_page_walk(struct vcpu *v, unsigned long addr) + { + l3_pgentry_t l3e; +diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h +index 12ea812381..da1a6f5712 100644 +--- a/xen/include/asm-x86/mm.h ++++ b/xen/include/asm-x86/mm.h +@@ -582,6 +582,7 @@ int vcpu_destroy_pagetables(struct vcpu *); + + void *do_page_walk(struct vcpu *v, unsigned long addr); + l2_pgentry_t page_walk_get_l2e(pagetable_t root, unsigned long addr); ++l1_pgentry_t page_walk_get_l1e(pagetable_t root, unsigned long addr); + + /* Allocator functions for Xen pagetables. */ + void *alloc_xen_pagetable(void); diff --git a/xsa286-4.14-0005-x86-mm-avoid-using-top-level-linear-page-tables-in-u.patch b/xsa286-4.14-0005-x86-mm-avoid-using-top-level-linear-page-tables-in-u.patch new file mode 100644 index 0000000..ca38773 --- /dev/null +++ b/xsa286-4.14-0005-x86-mm-avoid-using-top-level-linear-page-tables-in-u.patch @@ -0,0 +1,101 @@ +From: Jan Beulich +Subject: x86/mm: avoid using top level linear page tables in + {,un}map_domain_page() + +Move the page table recursion two levels down. This entails avoiding +to free the recursive mapping prematurely in free_perdomain_mappings(). + +This is part of XSA-286. + +Reported-by: Jann Horn +Signed-off-by: Jan Beulich +Reviewed-by: George Dunlap +Reviewed-by: Andrew Cooper + +diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c +index b03728e18e..ed6a2bf081 100644 +--- a/xen/arch/x86/domain_page.c ++++ b/xen/arch/x86/domain_page.c +@@ -65,7 +65,8 @@ void __init mapcache_override_current(struct vcpu *v) + #define mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER) + #define MAPCACHE_L2_ENTRIES (mapcache_l2_entry(MAPCACHE_ENTRIES - 1) + 1) + #define MAPCACHE_L1ENT(idx) \ +- __linear_l1_table[l1_linear_offset(MAPCACHE_VIRT_START + pfn_to_paddr(idx))] ++ ((l1_pgentry_t *)(MAPCACHE_VIRT_START | \ ++ ((L2_PAGETABLE_ENTRIES - 1) << L2_PAGETABLE_SHIFT)))[idx] + + void *map_domain_page(mfn_t mfn) + { +@@ -235,6 +236,7 @@ int mapcache_domain_init(struct domain *d) + { + struct mapcache_domain *dcache = &d->arch.pv.mapcache; + unsigned int bitmap_pages; ++ int rc; + + ASSERT(is_pv_domain(d)); + +@@ -243,8 +245,10 @@ int mapcache_domain_init(struct domain *d) + return 0; + #endif + ++ BUILD_BUG_ON(MAPCACHE_VIRT_START & ((1 << L3_PAGETABLE_SHIFT) - 1)); + BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 + +- 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) > ++ 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) + ++ (1U << L2_PAGETABLE_SHIFT) > + MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20)); + bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long)); + dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE; +@@ -253,9 +257,25 @@ int mapcache_domain_init(struct domain *d) + + spin_lock_init(&dcache->lock); + +- return create_perdomain_mapping(d, (unsigned long)dcache->inuse, +- 2 * bitmap_pages + 1, +- NIL(l1_pgentry_t *), NULL); ++ rc = create_perdomain_mapping(d, (unsigned long)dcache->inuse, ++ 2 * bitmap_pages + 1, ++ NIL(l1_pgentry_t *), NULL); ++ if ( !rc ) ++ { ++ /* ++ * Install mapping of our L2 table into its own last slot, for easy ++ * access to the L1 entries via MAPCACHE_L1ENT(). ++ */ ++ l3_pgentry_t *l3t = __map_domain_page(d->arch.perdomain_l3_pg); ++ l3_pgentry_t l3e = l3t[l3_table_offset(MAPCACHE_VIRT_END)]; ++ l2_pgentry_t *l2t = map_l2t_from_l3e(l3e); ++ ++ l2e_get_intpte(l2t[L2_PAGETABLE_ENTRIES - 1]) = l3e_get_intpte(l3e); ++ unmap_domain_page(l2t); ++ unmap_domain_page(l3t); ++ } ++ ++ return rc; + } + + int mapcache_vcpu_init(struct vcpu *v) +@@ -346,7 +366,7 @@ mfn_t domain_page_map_to_mfn(const void *ptr) + else + { + ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END); +- pl1e = &__linear_l1_table[l1_linear_offset(va)]; ++ pl1e = &MAPCACHE_L1ENT(PFN_DOWN(va - MAPCACHE_VIRT_START)); + } + + return l1e_get_mfn(*pl1e); +diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c +index 82bc676553..582ea09725 100644 +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -5953,6 +5953,10 @@ void free_perdomain_mappings(struct domain *d) + { + struct page_info *l1pg = l2e_get_page(l2tab[j]); + ++ /* mapcache_domain_init() installs a recursive entry. */ ++ if ( l1pg == l2pg ) ++ continue; ++ + if ( l2e_get_flags(l2tab[j]) & _PAGE_AVAIL0 ) + { + l1_pgentry_t *l1tab = __map_domain_page(l1pg); diff --git a/xsa286-4.14-0006-x86-mm-restrict-use-of-linear-page-tables-to-shadow-.patch b/xsa286-4.14-0006-x86-mm-restrict-use-of-linear-page-tables-to-shadow-.patch new file mode 100644 index 0000000..69ad37c --- /dev/null +++ b/xsa286-4.14-0006-x86-mm-restrict-use-of-linear-page-tables-to-shadow-.patch @@ -0,0 +1,106 @@ +From: Jan Beulich +Subject: x86/mm: restrict use of linear page tables to shadow mode code + +Other code does not require them to be set up anymore, so restrict when +to populate the respective L4 slot and reduce visibility of the +accessors. + +While with the removal of all uses the vulnerability is actually fixed, +removing the creation of the linear mapping adds an extra layer of +protection. Similarly reducing visibility of the accessors mostly +eliminates the risk of undue re-introduction of uses of the linear +mappings. + +This is (not strictly) part of XSA-286. + +Signed-off-by: Jan Beulich +Reviewed-by: George Dunlap +Reviewed-by: Andrew Cooper + +diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c +index 582ea09725..57333bb120 100644 +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -1682,9 +1682,10 @@ void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn, + l4t[l4_table_offset(PCI_MCFG_VIRT_START)] = + idle_pg_table[l4_table_offset(PCI_MCFG_VIRT_START)]; + +- /* Slot 258: Self linear mappings. */ ++ /* Slot 258: Self linear mappings (shadow pt only). */ + ASSERT(!mfn_eq(l4mfn, INVALID_MFN)); + l4t[l4_table_offset(LINEAR_PT_VIRT_START)] = ++ !shadow_mode_external(d) ? l4e_empty() : + l4e_from_mfn(l4mfn, __PAGE_HYPERVISOR_RW); + + /* Slot 259: Shadow linear mappings (if applicable) .*/ +diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h +index 3fd3f0617a..bb2f50cb6e 100644 +--- a/xen/arch/x86/mm/shadow/private.h ++++ b/xen/arch/x86/mm/shadow/private.h +@@ -139,6 +139,15 @@ enum { + # define GUEST_PTE_SIZE 4 + #endif + ++/* Where to find each level of the linear mapping */ ++#define __linear_l1_table ((l1_pgentry_t *)(LINEAR_PT_VIRT_START)) ++#define __linear_l2_table \ ++ ((l2_pgentry_t *)(__linear_l1_table + l1_linear_offset(LINEAR_PT_VIRT_START))) ++#define __linear_l3_table \ ++ ((l3_pgentry_t *)(__linear_l2_table + l2_linear_offset(LINEAR_PT_VIRT_START))) ++#define __linear_l4_table \ ++ ((l4_pgentry_t *)(__linear_l3_table + l3_linear_offset(LINEAR_PT_VIRT_START))) ++ + /****************************************************************************** + * Auditing routines + */ +diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c +index 9e87a55174..ce03f83f52 100644 +--- a/xen/arch/x86/x86_64/mm.c ++++ b/xen/arch/x86/x86_64/mm.c +@@ -808,9 +808,6 @@ void __init paging_init(void) + + machine_to_phys_mapping_valid = 1; + +- /* Set up linear page table mapping. */ +- l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)], +- l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR_RW)); + return; + + nomem: +diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h +index 665e9cc31d..17b8ea0cfd 100644 +--- a/xen/include/asm-x86/config.h ++++ b/xen/include/asm-x86/config.h +@@ -197,7 +197,7 @@ extern unsigned char boot_edid_info[128]; + */ + #define PCI_MCFG_VIRT_START (PML4_ADDR(257)) + #define PCI_MCFG_VIRT_END (PCI_MCFG_VIRT_START + PML4_ENTRY_BYTES) +-/* Slot 258: linear page table (guest table). */ ++/* Slot 258: linear page table (monitor table, HVM only). */ + #define LINEAR_PT_VIRT_START (PML4_ADDR(258)) + #define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES) + /* Slot 259: linear page table (shadow table). */ +diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h +index f632affaef..fd2574267c 100644 +--- a/xen/include/asm-x86/page.h ++++ b/xen/include/asm-x86/page.h +@@ -294,19 +294,6 @@ void copy_page_sse2(void *, const void *); + #define vmap_to_mfn(va) _mfn(l1e_get_pfn(*virt_to_xen_l1e((unsigned long)(va)))) + #define vmap_to_page(va) mfn_to_page(vmap_to_mfn(va)) + +-#endif /* !defined(__ASSEMBLY__) */ +- +-/* Where to find each level of the linear mapping */ +-#define __linear_l1_table ((l1_pgentry_t *)(LINEAR_PT_VIRT_START)) +-#define __linear_l2_table \ +- ((l2_pgentry_t *)(__linear_l1_table + l1_linear_offset(LINEAR_PT_VIRT_START))) +-#define __linear_l3_table \ +- ((l3_pgentry_t *)(__linear_l2_table + l2_linear_offset(LINEAR_PT_VIRT_START))) +-#define __linear_l4_table \ +- ((l4_pgentry_t *)(__linear_l3_table + l3_linear_offset(LINEAR_PT_VIRT_START))) +- +- +-#ifndef __ASSEMBLY__ + extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES]; + extern l2_pgentry_t *compat_idle_pg_table_l2; + extern unsigned int m2p_compat_vstart;