4d1d207
From b3ed8e344c733bc8c2223c1b9e424a9fbcea56d4 Mon Sep 17 00:00:00 2001
4d1d207
From: Peter Xu <peterx@redhat.com>
4d1d207
Date: Mon, 7 Feb 2022 20:30:19 +0800
4d1d207
Subject: [PATCH 6/6] memory: Fix qemu crash on starting dirty log twice with
4d1d207
 stopped VM
4d1d207
4d1d207
RH-Author: Peter Xu <peterx@redhat.com>
4d1d207
RH-MergeRequest: 77: memory: Fix qemu crash on continuous migrations of stopped VM
4d1d207
RH-Commit: [2/2] 98ed2ef6226ec80a1896ebb554015aded0dc0c18 (peterx/qemu-kvm)
4d1d207
RH-Bugzilla: 2044818
4d1d207
RH-Acked-by: Paolo Bonzini <None>
4d1d207
RH-Acked-by: David Hildenbrand <david@redhat.com>
4d1d207
RH-Acked-by: quintela1 <quintela@redhat.com>
4d1d207
4d1d207
QEMU can now easily crash with two continuous migration carried out:
4d1d207
4d1d207
(qemu) migrate -d exec:cat>out
4d1d207
(qemu) migrate_cancel
4d1d207
(qemu) migrate -d exec:cat>out
4d1d207
[crash] ../softmmu/memory.c:2782: memory_global_dirty_log_start: Assertion
4d1d207
`!(global_dirty_tracking & flags)' failed.
4d1d207
4d1d207
It's because memory API provides a way to postpone dirty log stop if the VM is
4d1d207
stopped, and that'll be re-done until the next VM start.  It was added in 2017
4d1d207
with commit 1931076077 ("migration: optimize the downtime", 2017-08-01).
4d1d207
4d1d207
However the recent work on allowing dirty tracking to be bitmask broke it,
4d1d207
which is commit 63b41db4bc ("memory: make global_dirty_tracking a bitmask",
4d1d207
2021-11-01).
4d1d207
4d1d207
The fix proposed in this patch contains two things:
4d1d207
4d1d207
  (1) Instead of passing over the flags to postpone stop dirty track, we add a
4d1d207
      global variable (along with current vmstate_change variable) to record
4d1d207
      what flags to stop dirty tracking.
4d1d207
4d1d207
  (2) When start dirty tracking, instead if remove the vmstate hook directly,
4d1d207
      we also execute the postponed stop process so that we make sure all the
4d1d207
      starts and stops will be paired.
4d1d207
4d1d207
This procedure is overlooked in the bitmask-ify work in 2021.
4d1d207
4d1d207
Cc: Hyman Huang <huangy81@chinatelecom.cn>
4d1d207
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2044818
4d1d207
Fixes: 63b41db4bc ("memory: make global_dirty_tracking a bitmask")
4d1d207
Signed-off-by: Peter Xu <peterx@redhat.com>
4d1d207
Message-Id: <20220207123019.27223-1-peterx@redhat.com>
4d1d207
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
4d1d207
(cherry picked from commit a5c90c61a118027b86155cffdf4fe4e2e9de1020)
4d1d207
Signed-off-by: Peter Xu <peterx@redhat.com>
4d1d207
---
4d1d207
 softmmu/memory.c | 61 +++++++++++++++++++++++++++++++++++-------------
4d1d207
 1 file changed, 45 insertions(+), 16 deletions(-)
4d1d207
4d1d207
diff --git a/softmmu/memory.c b/softmmu/memory.c
4d1d207
index 81d4bf1454..0311e362ee 100644
4d1d207
--- a/softmmu/memory.c
4d1d207
+++ b/softmmu/memory.c
4d1d207
@@ -2769,19 +2769,32 @@ void memory_global_after_dirty_log_sync(void)
4d1d207
     MEMORY_LISTENER_CALL_GLOBAL(log_global_after_sync, Forward);
4d1d207
 }
4d1d207
 
4d1d207
+/*
4d1d207
+ * Dirty track stop flags that are postponed due to VM being stopped.  Should
4d1d207
+ * only be used within vmstate_change hook.
4d1d207
+ */
4d1d207
+static unsigned int postponed_stop_flags;
4d1d207
 static VMChangeStateEntry *vmstate_change;
4d1d207
+static void memory_global_dirty_log_stop_postponed_run(void);
4d1d207
 
4d1d207
 void memory_global_dirty_log_start(unsigned int flags)
4d1d207
 {
4d1d207
-    unsigned int old_flags = global_dirty_tracking;
4d1d207
+    unsigned int old_flags;
4d1d207
+
4d1d207
+    assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
4d1d207
 
4d1d207
     if (vmstate_change) {
4d1d207
-        qemu_del_vm_change_state_handler(vmstate_change);
4d1d207
-        vmstate_change = NULL;
4d1d207
+        /* If there is postponed stop(), operate on it first */
4d1d207
+        postponed_stop_flags &= ~flags;
4d1d207
+        memory_global_dirty_log_stop_postponed_run();
4d1d207
     }
4d1d207
 
4d1d207
-    assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
4d1d207
-    assert(!(global_dirty_tracking & flags));
4d1d207
+    flags &= ~global_dirty_tracking;
4d1d207
+    if (!flags) {
4d1d207
+        return;
4d1d207
+    }
4d1d207
+
4d1d207
+    old_flags = global_dirty_tracking;
4d1d207
     global_dirty_tracking |= flags;
4d1d207
     trace_global_dirty_changed(global_dirty_tracking);
4d1d207
 
4d1d207
@@ -2809,29 +2822,45 @@ static void memory_global_dirty_log_do_stop(unsigned int flags)
4d1d207
     }
4d1d207
 }
4d1d207
 
4d1d207
+/*
4d1d207
+ * Execute the postponed dirty log stop operations if there is, then reset
4d1d207
+ * everything (including the flags and the vmstate change hook).
4d1d207
+ */
4d1d207
+static void memory_global_dirty_log_stop_postponed_run(void)
4d1d207
+{
4d1d207
+    /* This must be called with the vmstate handler registered */
4d1d207
+    assert(vmstate_change);
4d1d207
+
4d1d207
+    /* Note: postponed_stop_flags can be cleared in log start routine */
4d1d207
+    if (postponed_stop_flags) {
4d1d207
+        memory_global_dirty_log_do_stop(postponed_stop_flags);
4d1d207
+        postponed_stop_flags = 0;
4d1d207
+    }
4d1d207
+
4d1d207
+    qemu_del_vm_change_state_handler(vmstate_change);
4d1d207
+    vmstate_change = NULL;
4d1d207
+}
4d1d207
+
4d1d207
 static void memory_vm_change_state_handler(void *opaque, bool running,
4d1d207
                                            RunState state)
4d1d207
 {
4d1d207
-    unsigned int flags = (unsigned int)(uintptr_t)opaque;
4d1d207
     if (running) {
4d1d207
-        memory_global_dirty_log_do_stop(flags);
4d1d207
-
4d1d207
-        if (vmstate_change) {
4d1d207
-            qemu_del_vm_change_state_handler(vmstate_change);
4d1d207
-            vmstate_change = NULL;
4d1d207
-        }
4d1d207
+        memory_global_dirty_log_stop_postponed_run();
4d1d207
     }
4d1d207
 }
4d1d207
 
4d1d207
 void memory_global_dirty_log_stop(unsigned int flags)
4d1d207
 {
4d1d207
     if (!runstate_is_running()) {
4d1d207
+        /* Postpone the dirty log stop, e.g., to when VM starts again */
4d1d207
         if (vmstate_change) {
4d1d207
-            return;
4d1d207
+            /* Batch with previous postponed flags */
4d1d207
+            postponed_stop_flags |= flags;
4d1d207
+        } else {
4d1d207
+            postponed_stop_flags = flags;
4d1d207
+            vmstate_change = qemu_add_vm_change_state_handler(
4d1d207
+                memory_vm_change_state_handler, NULL);
4d1d207
         }
4d1d207
-        vmstate_change = qemu_add_vm_change_state_handler(
4d1d207
-                                memory_vm_change_state_handler,
4d1d207
-                                (void *)(uintptr_t)flags);
4d1d207
         return;
4d1d207
     }
4d1d207
 
4d1d207
-- 
4d1d207
2.27.0
4d1d207