Blob Blame History Raw
From: Roger Pau Monne <roger.pau@citrix.com>
Subject: x86/hap: do not switch on log dirty for VRAM tracking

XEN_DMOP_track_dirty_vram possibly calls into paging_log_dirty_enable
when using HAP mode, and it can interact badly with other ongoing
paging domctls, as XEN_DMOP_track_dirty_vram is not holding the domctl
lock.

This was detected as a result of the following assert triggering when
doing repeated migrations of a HAP HVM domain with a stubdom:

Assertion 'd->arch.paging.log_dirty.allocs == 0' failed at paging.c:198
----[ Xen-4.17-unstable  x86_64  debug=y  Not tainted ]----
CPU:    34
RIP:    e008:[<ffff82d040314b3b>] arch/x86/mm/paging.c#paging_free_log_dirty_bitmap+0x606/0x6
RFLAGS: 0000000000010206   CONTEXT: hypervisor (d0v23)
[...]
Xen call trace:
   [<ffff82d040314b3b>] R arch/x86/mm/paging.c#paging_free_log_dirty_bitmap+0x606/0x63a
   [<ffff82d040279f96>] S xsm/flask/hooks.c#domain_has_perm+0x5a/0x67
   [<ffff82d04031577f>] F paging_domctl+0x251/0xd41
   [<ffff82d04031640c>] F paging_domctl_continuation+0x19d/0x202
   [<ffff82d0403202fa>] F pv_hypercall+0x150/0x2a7
   [<ffff82d0403a729d>] F lstar_enter+0x12d/0x140

Such assert triggered because the stubdom used
XEN_DMOP_track_dirty_vram while dom0 was in the middle of executing
XEN_DOMCTL_SHADOW_OP_OFF, and so log dirty become enabled while
retiring the old structures, thus leading to new entries being
populated in already clear slots.

Fix this by not enabling log dirty for VRAM tracking, similar to what
is done when using shadow instead of HAP. Call
p2m_enable_hardware_log_dirty when enabling VRAM tracking in order to
get some hardware assistance if available. As a side effect the memory
pressure on the p2m pool should go down if only VRAM tracking is
enabled, as the dirty bitmap is no longer allocated.

Note that paging_log_dirty_range (used to get the dirty bitmap for
VRAM tracking) doesn't use the log dirty bitmap, and instead relies on
checking whether each gfn on the range has been switched from
p2m_ram_logdirty to p2m_ram_rw in order to account for dirty pages.

This is CVE-2022-26356 / XSA-397.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -160,9 +160,6 @@ void paging_log_dirty_range(struct domai
                             unsigned long nr,
                             uint8_t *dirty_bitmap);
 
-/* enable log dirty */
-int paging_log_dirty_enable(struct domain *d, bool log_global);
-
 /* log dirty initialization */
 void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops);
 
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -69,13 +69,6 @@ int hap_track_dirty_vram(struct domain *
     {
         int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
 
-        if ( !paging_mode_log_dirty(d) )
-        {
-            rc = paging_log_dirty_enable(d, false);
-            if ( rc )
-                goto out;
-        }
-
         rc = -ENOMEM;
         dirty_bitmap = vzalloc(size);
         if ( !dirty_bitmap )
@@ -107,6 +100,10 @@ int hap_track_dirty_vram(struct domain *
 
             paging_unlock(d);
 
+            domain_pause(d);
+            p2m_enable_hardware_log_dirty(d);
+            domain_unpause(d);
+
             if ( oend > ostart )
                 p2m_change_type_range(d, ostart, oend,
                                       p2m_ram_logdirty, p2m_ram_rw);
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -209,7 +209,7 @@ static int paging_free_log_dirty_bitmap(
     return rc;
 }
 
-int paging_log_dirty_enable(struct domain *d, bool log_global)
+static int paging_log_dirty_enable(struct domain *d, bool log_global)
 {
     int ret;