Blob Blame History Raw
From: Jan Beulich <jbeulich@suse.com>
Subject: x86/mm: check page types in do_page_walk()

For page table entries read to be guaranteed valid, transiently locking
the pages and validating their types is necessary. Note that guest use
of linear page tables is intentionally not taken into account here, as
ordinary data (guest stacks) can't possibly live inside page tables.

This is part of XSA-286.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index c25eb01e41..6305cf6033 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -46,15 +46,29 @@ l2_pgentry_t *compat_idle_pg_table_l2;
 
 static l4_pgentry_t page_walk_get_l4e(pagetable_t root, unsigned long addr)
 {
-    unsigned long mfn = pagetable_get_pfn(root);
-    l4_pgentry_t *l4t, l4e;
+    mfn_t mfn = pagetable_get_mfn(root);
+    /* current's root page table can't disappear under our feet. */
+    bool need_lock = !mfn_eq(mfn, pagetable_get_mfn(current->arch.guest_table));
+    struct page_info *pg;
+    l4_pgentry_t l4e = l4e_empty();
 
     if ( !is_canonical_address(addr) )
         return l4e_empty();
 
-    l4t = map_domain_page(_mfn(mfn));
-    l4e = l4t[l4_table_offset(addr)];
-    unmap_domain_page(l4t);
+    pg = mfn_to_page(mfn);
+    if ( need_lock && !page_lock(pg) )
+        return l4e_empty();
+
+    if ( (pg->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table )
+    {
+        l4_pgentry_t *l4t = map_domain_page(mfn);
+
+        l4e = l4t[l4_table_offset(addr)];
+        unmap_domain_page(l4t);
+    }
+
+    if ( need_lock )
+        page_unlock(pg);
 
     return l4e;
 }
@@ -62,14 +76,26 @@ static l4_pgentry_t page_walk_get_l4e(pagetable_t root, unsigned long addr)
 static l3_pgentry_t page_walk_get_l3e(pagetable_t root, unsigned long addr)
 {
     l4_pgentry_t l4e = page_walk_get_l4e(root, addr);
-    l3_pgentry_t *l3t, l3e;
+    mfn_t mfn = l4e_get_mfn(l4e);
+    struct page_info *pg;
+    l3_pgentry_t l3e = l3e_empty();
 
     if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
         return l3e_empty();
 
-    l3t = map_l3t_from_l4e(l4e);
-    l3e = l3t[l3_table_offset(addr)];
-    unmap_domain_page(l3t);
+    pg = mfn_to_page(mfn);
+    if ( !page_lock(pg) )
+        return l3e_empty();
+
+    if ( (pg->u.inuse.type_info & PGT_type_mask) == PGT_l3_page_table )
+    {
+        l3_pgentry_t *l3t = map_domain_page(mfn);
+
+        l3e = l3t[l3_table_offset(addr)];
+        unmap_domain_page(l3t);
+    }
+
+    page_unlock(pg);
 
     return l3e;
 }
@@ -77,44 +103,67 @@ static l3_pgentry_t page_walk_get_l3e(pagetable_t root, unsigned long addr)
 void *do_page_walk(struct vcpu *v, unsigned long addr)
 {
     l3_pgentry_t l3e;
-    l2_pgentry_t l2e, *l2t;
-    l1_pgentry_t l1e, *l1t;
-    unsigned long mfn;
+    l2_pgentry_t l2e = l2e_empty();
+    l1_pgentry_t l1e = l1e_empty();
+    mfn_t mfn;
+    struct page_info *pg;
 
     if ( !is_pv_vcpu(v) )
         return NULL;
 
     l3e = page_walk_get_l3e(v->arch.guest_table, addr);
-    mfn = l3e_get_pfn(l3e);
-    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || !mfn_valid(_mfn(mfn)) )
+    mfn = l3e_get_mfn(l3e);
+    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || !mfn_valid(mfn) )
         return NULL;
     if ( (l3e_get_flags(l3e) & _PAGE_PSE) )
     {
-        mfn += PFN_DOWN(addr & ((1UL << L3_PAGETABLE_SHIFT) - 1));
+        mfn = mfn_add(mfn, PFN_DOWN(addr & ((1UL << L3_PAGETABLE_SHIFT) - 1)));
         goto ret;
     }
 
-    l2t = map_domain_page(_mfn(mfn));
-    l2e = l2t[l2_table_offset(addr)];
-    unmap_domain_page(l2t);
-    mfn = l2e_get_pfn(l2e);
-    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || !mfn_valid(_mfn(mfn)) )
+    pg = mfn_to_page(mfn);
+    if ( !page_lock(pg) )
+        return NULL;
+
+    if ( (pg->u.inuse.type_info & PGT_type_mask) == PGT_l2_page_table )
+    {
+        const l2_pgentry_t *l2t = map_domain_page(mfn);
+
+        l2e = l2t[l2_table_offset(addr)];
+        unmap_domain_page(l2t);
+    }
+
+    page_unlock(pg);
+
+    mfn = l2e_get_mfn(l2e);
+    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || !mfn_valid(mfn) )
         return NULL;
     if ( (l2e_get_flags(l2e) & _PAGE_PSE) )
     {
-        mfn += PFN_DOWN(addr & ((1UL << L2_PAGETABLE_SHIFT) - 1));
+        mfn = mfn_add(mfn, PFN_DOWN(addr & ((1UL << L2_PAGETABLE_SHIFT) - 1)));
         goto ret;
     }
 
-    l1t = map_domain_page(_mfn(mfn));
-    l1e = l1t[l1_table_offset(addr)];
-    unmap_domain_page(l1t);
-    mfn = l1e_get_pfn(l1e);
-    if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(_mfn(mfn)) )
+    pg = mfn_to_page(mfn);
+    if ( !page_lock(pg) )
+        return NULL;
+
+    if ( (pg->u.inuse.type_info & PGT_type_mask) == PGT_l1_page_table )
+    {
+        const l1_pgentry_t *l1t = map_domain_page(mfn);
+
+        l1e = l1t[l1_table_offset(addr)];
+        unmap_domain_page(l1t);
+    }
+
+    page_unlock(pg);
+
+    mfn = l1e_get_mfn(l1e);
+    if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(mfn) )
         return NULL;
 
  ret:
-    return map_domain_page(_mfn(mfn)) + (addr & ~PAGE_MASK);
+    return map_domain_page(mfn) + (addr & ~PAGE_MASK);
 }
 
 /*