Blob Blame History Raw
From: Jan Beulich <jbeulich@suse.com>
Subject: x86/mm: avoid using top level linear page tables in
 {,un}map_domain_page()

Move the page table recursion two levels down. This entails avoiding
to free the recursive mapping prematurely in free_perdomain_mappings().

This is part of XSA-286.

Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
index b03728e18e..ed6a2bf081 100644
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -65,7 +65,8 @@ void __init mapcache_override_current(struct vcpu *v)
 #define mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER)
 #define MAPCACHE_L2_ENTRIES (mapcache_l2_entry(MAPCACHE_ENTRIES - 1) + 1)
 #define MAPCACHE_L1ENT(idx) \
-    __linear_l1_table[l1_linear_offset(MAPCACHE_VIRT_START + pfn_to_paddr(idx))]
+    ((l1_pgentry_t *)(MAPCACHE_VIRT_START | \
+                      ((L2_PAGETABLE_ENTRIES - 1) << L2_PAGETABLE_SHIFT)))[idx]
 
 void *map_domain_page(mfn_t mfn)
 {
@@ -235,6 +236,7 @@ int mapcache_domain_init(struct domain *d)
 {
     struct mapcache_domain *dcache = &d->arch.pv.mapcache;
     unsigned int bitmap_pages;
+    int rc;
 
     ASSERT(is_pv_domain(d));
 
@@ -243,8 +245,10 @@ int mapcache_domain_init(struct domain *d)
         return 0;
 #endif
 
+    BUILD_BUG_ON(MAPCACHE_VIRT_START & ((1 << L3_PAGETABLE_SHIFT) - 1));
     BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 +
-                 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) >
+                 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) +
+                 (1U << L2_PAGETABLE_SHIFT) >
                  MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
     bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
     dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
@@ -253,9 +257,25 @@ int mapcache_domain_init(struct domain *d)
 
     spin_lock_init(&dcache->lock);
 
-    return create_perdomain_mapping(d, (unsigned long)dcache->inuse,
-                                    2 * bitmap_pages + 1,
-                                    NIL(l1_pgentry_t *), NULL);
+    rc = create_perdomain_mapping(d, (unsigned long)dcache->inuse,
+                                  2 * bitmap_pages + 1,
+                                  NIL(l1_pgentry_t *), NULL);
+    if ( !rc )
+    {
+        /*
+         * Install mapping of our L2 table into its own last slot, for easy
+         * access to the L1 entries via MAPCACHE_L1ENT().
+         */
+        l3_pgentry_t *l3t = __map_domain_page(d->arch.perdomain_l3_pg);
+        l3_pgentry_t l3e = l3t[l3_table_offset(MAPCACHE_VIRT_END)];
+        l2_pgentry_t *l2t = map_l2t_from_l3e(l3e);
+
+        l2e_get_intpte(l2t[L2_PAGETABLE_ENTRIES - 1]) = l3e_get_intpte(l3e);
+        unmap_domain_page(l2t);
+        unmap_domain_page(l3t);
+    }
+
+    return rc;
 }
 
 int mapcache_vcpu_init(struct vcpu *v)
@@ -346,7 +366,7 @@ mfn_t domain_page_map_to_mfn(const void *ptr)
     else
     {
         ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
-        pl1e = &__linear_l1_table[l1_linear_offset(va)];
+        pl1e = &MAPCACHE_L1ENT(PFN_DOWN(va - MAPCACHE_VIRT_START));
     }
 
     return l1e_get_mfn(*pl1e);
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 82bc676553..582ea09725 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5953,6 +5953,10 @@ void free_perdomain_mappings(struct domain *d)
                 {
                     struct page_info *l1pg = l2e_get_page(l2tab[j]);
 
+                    /* mapcache_domain_init() installs a recursive entry. */
+                    if ( l1pg == l2pg )
+                        continue;
+
                     if ( l2e_get_flags(l2tab[j]) & _PAGE_AVAIL0 )
                     {
                         l1_pgentry_t *l1tab = __map_domain_page(l1pg);