From 4d2081bbd8b575e50a5d2a61acd0b796ec47757c Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: May 15 2023 14:23:54 +0000 Subject: * Mon May 15 2023 Miroslav Rezanina - 8.0.0-3 - kvm-migration-Handle-block-device-inactivation-failures-.patch [bz#2058982] - kvm-migration-Minor-control-flow-simplification.patch [bz#2058982] - Resolves: bz#2058982 (Qemu core dump if cut off nfs storage during migration) --- diff --git a/kvm-migration-Handle-block-device-inactivation-failures-.patch b/kvm-migration-Handle-block-device-inactivation-failures-.patch new file mode 100644 index 0000000..26c8437 --- /dev/null +++ b/kvm-migration-Handle-block-device-inactivation-failures-.patch @@ -0,0 +1,116 @@ +From 2aac64623d8d2d06d248c1bcc71aa13572fc843c Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 14 Apr 2023 10:33:58 -0500 +Subject: [PATCH 1/2] migration: Handle block device inactivation failures + better + +RH-Author: Eric Blake +RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. +RH-Bugzilla: 2058982 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [1/2] 5ae143c9234f6eee9fc5154944172bcd56975b36 (ebblake/centos-qemu-kvm) + +Consider what happens when performing a migration between two host +machines connected to an NFS server serving multiple block devices to +the guest, when the NFS server becomes unavailable. The migration +attempts to inactivate all block devices on the source (a necessary +step before the destination can take over); but if the NFS server is +non-responsive, the attempt to inactivate can itself fail. When that +happens, the destination fails to get the migrated guest (good, +because the source wasn't able to flush everything properly): + + (qemu) qemu-kvm: load of migration failed: Input/output error + +at which point, our only hope for the guest is for the source to take +back control. With the current code base, the host outputs a message, but then appears to resume: + + (qemu) qemu-kvm: qemu_savevm_state_complete_precopy_non_iterable: bdrv_inactivate_all() failed (-1) + + (src qemu)info status + VM status: running + +but a second migration attempt now asserts: + + (src qemu) qemu-kvm: ../block.c:6738: int bdrv_inactivate_recurse(BlockDriverState *): Assertion `!(bs->open_flags & BDRV_O_INACTIVE)' failed. + +Whether the guest is recoverable on the source after the first failure +is debatable, but what we do not want is to have qemu itself fail due +to an assertion. It looks like the problem is as follows: + +In migration.c:migration_completion(), the source sets 'inactivate' to +true (since COLO is not enabled), then tries +savevm.c:qemu_savevm_state_complete_precopy() with a request to +inactivate block devices. In turn, this calls +block.c:bdrv_inactivate_all(), which fails when flushing runs up +against the non-responsive NFS server. With savevm failing, we are +now left in a state where some, but not all, of the block devices have +been inactivated; but migration_completion() then jumps to 'fail' +rather than 'fail_invalidate' and skips an attempt to reclaim those +those disks by calling bdrv_activate_all(). Even if we do attempt to +reclaim disks, we aren't taking note of failure there, either. + +Thus, we have reached a state where the migration engine has forgotten +all state about whether a block device is inactive, because we did not +set s->block_inactive in enough places; so migration allows the source +to reach vm_start() and resume execution, violating the block layer +invariant that the guest CPUs should not be restarted while a device +is inactive. Note that the code in migration.c:migrate_fd_cancel() +will also try to reactivate all block devices if s->block_inactive was +set, but because we failed to set that flag after the first failure, +the source assumes it has reclaimed all devices, even though it still +has remaining inactivated devices and does not try again. Normally, +qmp_cont() will also try to reactivate all disks (or correctly fail if +the disks are not reclaimable because NFS is not yet back up), but the +auto-resumption of the source after a migration failure does not go +through qmp_cont(). And because we have left the block layer in an +inconsistent state with devices still inactivated, the later migration +attempt is hitting the assertion failure. + +Since it is important to not resume the source with inactive disks, +this patch marks s->block_inactive before attempting inactivation, +rather than after succeeding, in order to prevent any vm_start() until +it has successfully reactivated all devices. + +See also https://bugzilla.redhat.com/show_bug.cgi?id=2058982 + +Signed-off-by: Eric Blake +Reviewed-by: Juan Quintela +Acked-by: Lukas Straub +Tested-by: Lukas Straub +Signed-off-by: Juan Quintela +(cherry picked from commit 403d18ae384239876764bbfa111d6cc5dcb673d1) +--- + migration/migration.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index bda4789193..cb0d42c061 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3444,13 +3444,11 @@ static void migration_completion(MigrationState *s) + MIGRATION_STATUS_DEVICE); + } + if (ret >= 0) { ++ s->block_inactive = inactivate; + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, + inactivate); + } +- if (inactivate && ret >= 0) { +- s->block_inactive = true; +- } + } + qemu_mutex_unlock_iothread(); + +@@ -3522,6 +3520,7 @@ fail_invalidate: + bdrv_activate_all(&local_err); + if (local_err) { + error_report_err(local_err); ++ s->block_inactive = true; + } else { + s->block_inactive = false; + } +-- +2.39.1 + diff --git a/kvm-migration-Minor-control-flow-simplification.patch b/kvm-migration-Minor-control-flow-simplification.patch new file mode 100644 index 0000000..a0dbdd9 --- /dev/null +++ b/kvm-migration-Minor-control-flow-simplification.patch @@ -0,0 +1,52 @@ +From c3bc974ea4b5186a76daa433209c1209d94dd0b7 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Thu, 20 Apr 2023 09:35:51 -0500 +Subject: [PATCH 2/2] migration: Minor control flow simplification + +RH-Author: Eric Blake +RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. +RH-Bugzilla: 2058982 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [2/2] 5afd8c25d6f14bdb2a380ecc77bc6c2f2a26df87 (ebblake/centos-qemu-kvm) + +No need to declare a temporary variable. + +Suggested-by: Juan Quintela +Fixes: 1df36e8c6289 ("migration: Handle block device inactivation failures better") +Signed-off-by: Eric Blake +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 5d39f44d7ac5c63f53d4d0900ceba9521bc27e49) +--- + migration/migration.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index cb0d42c061..08007cef4e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3436,7 +3436,6 @@ static void migration_completion(MigrationState *s) + ret = global_state_store(); + + if (!ret) { +- bool inactivate = !migrate_colo_enabled(); + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + trace_migration_completion_vm_stop(ret); + if (ret >= 0) { +@@ -3444,10 +3443,10 @@ static void migration_completion(MigrationState *s) + MIGRATION_STATUS_DEVICE); + } + if (ret >= 0) { +- s->block_inactive = inactivate; ++ s->block_inactive = !migrate_colo_enabled(); + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, +- inactivate); ++ s->block_inactive); + } + } + qemu_mutex_unlock_iothread(); +-- +2.39.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 89ff0fd..4cb63d3 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.0.0 -Release: 2%{?rcrel}%{?dist}%{?cc_suffix} +Release: 3%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -195,6 +195,10 @@ Patch20: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch Patch21: kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch # For bz#1934134 - ACPI table limits warning when booting guest with 512 VCPUs Patch22: kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch +# For bz#2058982 - Qemu core dump if cut off nfs storage during migration +Patch23: kvm-migration-Handle-block-device-inactivation-failures-.patch +# For bz#2058982 - Qemu core dump if cut off nfs storage during migration +Patch24: kvm-migration-Minor-control-flow-simplification.patch %if %{have_clang} BuildRequires: clang @@ -1217,6 +1221,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon May 15 2023 Miroslav Rezanina - 8.0.0-3 +- kvm-migration-Handle-block-device-inactivation-failures-.patch [bz#2058982] +- kvm-migration-Minor-control-flow-simplification.patch [bz#2058982] +- Resolves: bz#2058982 + (Qemu core dump if cut off nfs storage during migration) + * Mon May 08 2023 Miroslav Rezanina - 8.0.0-2 - kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch [bz#2087047] - kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch [bz#1934134]