|
|
161d5c0 |
From: Andrew Cooper <andrew.cooper3@citrix.com>
|
|
|
161d5c0 |
Subject: x86/pv: Track and flush non-coherent mappings of RAM
|
|
|
161d5c0 |
|
|
|
161d5c0 |
There are legitimate uses of WC mappings of RAM, e.g. for DMA buffers with
|
|
|
161d5c0 |
devices that make non-coherent writes. The Linux sound subsystem makes
|
|
|
161d5c0 |
extensive use of this technique.
|
|
|
161d5c0 |
|
|
|
161d5c0 |
For such usecases, the guest's DMA buffer is mapped and consistently used as
|
|
|
161d5c0 |
WC, and Xen doesn't interact with the buffer.
|
|
|
161d5c0 |
|
|
|
161d5c0 |
However, a mischevious guest can use WC mappings to deliberately create
|
|
|
161d5c0 |
non-coherency between the cache and RAM, and use this to trick Xen into
|
|
|
161d5c0 |
validating a pagetable which isn't actually safe.
|
|
|
161d5c0 |
|
|
|
161d5c0 |
Allocate a new PGT_non_coherent to track the non-coherency of mappings. Set
|
|
|
161d5c0 |
it whenever a non-coherent writeable mapping is created. If the page is used
|
|
|
161d5c0 |
as anything other than PGT_writable_page, force a cache flush before
|
|
|
161d5c0 |
validation. Also force a cache flush before the page is returned to the heap.
|
|
|
161d5c0 |
|
|
|
161d5c0 |
This is CVE-2022-26364, part of XSA-402.
|
|
|
161d5c0 |
|
|
|
161d5c0 |
Reported-by: Jann Horn <jannh@google.com>
|
|
|
161d5c0 |
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
|
|
161d5c0 |
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
|
|
|
161d5c0 |
Reviewed-by: Jan Beulich <jbeulich@suse.com>
|
|
|
161d5c0 |
|
|
|
161d5c0 |
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
|
|
|
161d5c0 |
index ab32d13a1a0d..bab9624fabb7 100644
|
|
|
161d5c0 |
--- a/xen/arch/x86/mm.c
|
|
|
161d5c0 |
+++ b/xen/arch/x86/mm.c
|
|
|
161d5c0 |
@@ -997,6 +997,15 @@ get_page_from_l1e(
|
|
|
161d5c0 |
return -EACCES;
|
|
|
161d5c0 |
}
|
|
|
161d5c0 |
|
|
|
161d5c0 |
+ /*
|
|
|
161d5c0 |
+ * Track writeable non-coherent mappings to RAM pages, to trigger a cache
|
|
|
161d5c0 |
+ * flush later if the target is used as anything but a PGT_writeable page.
|
|
|
161d5c0 |
+ * We care about all writeable mappings, including foreign mappings.
|
|
|
161d5c0 |
+ */
|
|
|
161d5c0 |
+ if ( !boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) &&
|
|
|
161d5c0 |
+ (l1f & (PAGE_CACHE_ATTRS | _PAGE_RW)) == (_PAGE_WC | _PAGE_RW) )
|
|
|
161d5c0 |
+ set_bit(_PGT_non_coherent, &page->u.inuse.type_info);
|
|
|
161d5c0 |
+
|
|
|
161d5c0 |
return 0;
|
|
|
161d5c0 |
|
|
|
161d5c0 |
could_not_pin:
|
|
|
161d5c0 |
@@ -2454,6 +2463,19 @@ static int cleanup_page_mappings(struct page_info *page)
|
|
|
161d5c0 |
}
|
|
|
161d5c0 |
}
|
|
|
161d5c0 |
|
|
|
161d5c0 |
+ /*
|
|
|
161d5c0 |
+ * Flush the cache if there were previously non-coherent writeable
|
|
|
161d5c0 |
+ * mappings of this page. This forces the page to be coherent before it
|
|
|
161d5c0 |
+ * is freed back to the heap.
|
|
|
161d5c0 |
+ */
|
|
|
161d5c0 |
+ if ( __test_and_clear_bit(_PGT_non_coherent, &page->u.inuse.type_info) )
|
|
|
161d5c0 |
+ {
|
|
|
161d5c0 |
+ void *addr = __map_domain_page(page);
|
|
|
161d5c0 |
+
|
|
|
161d5c0 |
+ cache_flush(addr, PAGE_SIZE);
|
|
|
161d5c0 |
+ unmap_domain_page(addr);
|
|
|
161d5c0 |
+ }
|
|
|
161d5c0 |
+
|
|
|
161d5c0 |
return rc;
|
|
|
161d5c0 |
}
|
|
|
161d5c0 |
|
|
|
161d5c0 |
@@ -3028,6 +3050,22 @@ static int _get_page_type(struct page_info *page, unsigned long type,
|
|
|
161d5c0 |
if ( unlikely(!(nx & PGT_validated)) )
|
|
|
161d5c0 |
{
|
|
|
161d5c0 |
/*
|
|
|
161d5c0 |
+ * Flush the cache if there were previously non-coherent mappings of
|
|
|
161d5c0 |
+ * this page, and we're trying to use it as anything other than a
|
|
|
161d5c0 |
+ * writeable page. This forces the page to be coherent before we
|
|
|
161d5c0 |
+ * validate its contents for safety.
|
|
|
161d5c0 |
+ */
|
|
|
161d5c0 |
+ if ( (nx & PGT_non_coherent) && type != PGT_writable_page )
|
|
|
161d5c0 |
+ {
|
|
|
161d5c0 |
+ void *addr = __map_domain_page(page);
|
|
|
161d5c0 |
+
|
|
|
161d5c0 |
+ cache_flush(addr, PAGE_SIZE);
|
|
|
161d5c0 |
+ unmap_domain_page(addr);
|
|
|
161d5c0 |
+
|
|
|
161d5c0 |
+ page->u.inuse.type_info &= ~PGT_non_coherent;
|
|
|
161d5c0 |
+ }
|
|
|
161d5c0 |
+
|
|
|
161d5c0 |
+ /*
|
|
|
161d5c0 |
* No special validation needed for writable or shared pages. Page
|
|
|
161d5c0 |
* tables and GDT/LDT need to have their contents audited.
|
|
|
161d5c0 |
*
|
|
|
161d5c0 |
diff --git a/xen/arch/x86/pv/grant_table.c b/xen/arch/x86/pv/grant_table.c
|
|
|
161d5c0 |
index 0325618c9883..81c72e61ed55 100644
|
|
|
161d5c0 |
--- a/xen/arch/x86/pv/grant_table.c
|
|
|
161d5c0 |
+++ b/xen/arch/x86/pv/grant_table.c
|
|
|
161d5c0 |
@@ -109,7 +109,17 @@ int create_grant_pv_mapping(uint64_t addr, mfn_t frame,
|
|
|
161d5c0 |
|
|
|
161d5c0 |
ol1e = *pl1e;
|
|
|
161d5c0 |
if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) )
|
|
|
161d5c0 |
+ {
|
|
|
161d5c0 |
+ /*
|
|
|
161d5c0 |
+ * We always create mappings in this path. However, our caller,
|
|
|
161d5c0 |
+ * map_grant_ref(), only passes potentially non-zero cache_flags for
|
|
|
161d5c0 |
+ * MMIO frames, so this path doesn't create non-coherent mappings of
|
|
|
161d5c0 |
+ * RAM frames and there's no need to calculate PGT_non_coherent.
|
|
|
161d5c0 |
+ */
|
|
|
161d5c0 |
+ ASSERT(!cache_flags || is_iomem_page(frame));
|
|
|
161d5c0 |
+
|
|
|
161d5c0 |
rc = GNTST_okay;
|
|
|
161d5c0 |
+ }
|
|
|
161d5c0 |
|
|
|
161d5c0 |
out_unlock:
|
|
|
161d5c0 |
page_unlock(page);
|
|
|
161d5c0 |
@@ -294,7 +304,18 @@ int replace_grant_pv_mapping(uint64_t addr, mfn_t frame,
|
|
|
161d5c0 |
l1e_get_flags(ol1e), addr, grant_pte_flags);
|
|
|
161d5c0 |
|
|
|
161d5c0 |
if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) )
|
|
|
161d5c0 |
+ {
|
|
|
161d5c0 |
+ /*
|
|
|
161d5c0 |
+ * Generally, replace_grant_pv_mapping() is used to destroy mappings
|
|
|
161d5c0 |
+ * (n1le = l1e_empty()), but it can be a present mapping on the
|
|
|
161d5c0 |
+ * GNTABOP_unmap_and_replace path.
|
|
|
161d5c0 |
+ *
|
|
|
161d5c0 |
+ * In such cases, the PTE is fully transplanted from its old location
|
|
|
161d5c0 |
+ * via steal_linear_addr(), so we need not perform PGT_non_coherent
|
|
|
161d5c0 |
+ * checking here.
|
|
|
161d5c0 |
+ */
|
|
|
161d5c0 |
rc = GNTST_okay;
|
|
|
161d5c0 |
+ }
|
|
|
161d5c0 |
|
|
|
161d5c0 |
out_unlock:
|
|
|
161d5c0 |
page_unlock(page);
|
|
|
161d5c0 |
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
|
|
|
161d5c0 |
index 8a9a43bb0a9d..7464167ae192 100644
|
|
|
161d5c0 |
--- a/xen/include/asm-x86/mm.h
|
|
|
161d5c0 |
+++ b/xen/include/asm-x86/mm.h
|
|
|
161d5c0 |
@@ -53,8 +53,12 @@
|
|
|
161d5c0 |
#define _PGT_partial PG_shift(8)
|
|
|
161d5c0 |
#define PGT_partial PG_mask(1, 8)
|
|
|
161d5c0 |
|
|
|
161d5c0 |
+/* Has this page been mapped writeable with a non-coherent memory type? */
|
|
|
161d5c0 |
+#define _PGT_non_coherent PG_shift(9)
|
|
|
161d5c0 |
+#define PGT_non_coherent PG_mask(1, 9)
|
|
|
161d5c0 |
+
|
|
|
161d5c0 |
/* Count of uses of this frame as its current type. */
|
|
|
161d5c0 |
-#define PGT_count_width PG_shift(8)
|
|
|
161d5c0 |
+#define PGT_count_width PG_shift(9)
|
|
|
161d5c0 |
#define PGT_count_mask ((1UL<
|
|
|
161d5c0 |
|
|
|
161d5c0 |
/* Are the 'type mask' bits identical? */
|