qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PULL 10/12] migration/colo.c: Relaunch failover even if there was an er


From: Dr. David Alan Gilbert (git)
Subject: [PULL 10/12] migration/colo.c: Relaunch failover even if there was an error
Date: Mon, 1 Jun 2020 19:40:02 +0100

From: Lukas Straub <lukasstraub2@web.de>

If vmstate_loading is true, secondary_vm_do_failover will set failover
status to FAILOVER_STATUS_RELAUNCH and return success without initiating
failover. However, if there is an error during the vmstate_loading
section, failover isn't relaunched. Instead we then wait for
failover on colo_incoming_sem.

Fix this by relaunching failover even if there was an error. Also,
to make this work properly, set vmstate_loading to false when
returning during the vmstate_loading section.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
Message-Id: 
<f60b0a8e2fadaaec792e04819dfc46951842d6ba.1589193382.git.lukasstraub2@web.de>
Reviewed-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 migration/colo.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/migration/colo.c b/migration/colo.c
index 4105999634..59639f519f 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -752,6 +752,7 @@ static void 
colo_incoming_process_checkpoint(MigrationIncomingState *mis,
     ret = qemu_load_device_state(fb);
     if (ret < 0) {
         error_setg(errp, "COLO: load device state failed");
+        vmstate_loading = false;
         qemu_mutex_unlock_iothread();
         return;
     }
@@ -760,6 +761,7 @@ static void 
colo_incoming_process_checkpoint(MigrationIncomingState *mis,
     replication_get_error_all(&local_err);
     if (local_err) {
         error_propagate(errp, local_err);
+        vmstate_loading = false;
         qemu_mutex_unlock_iothread();
         return;
     }
@@ -768,6 +770,7 @@ static void 
colo_incoming_process_checkpoint(MigrationIncomingState *mis,
     replication_do_checkpoint_all(&local_err);
     if (local_err) {
         error_propagate(errp, local_err);
+        vmstate_loading = false;
         qemu_mutex_unlock_iothread();
         return;
     }
@@ -779,6 +782,7 @@ static void 
colo_incoming_process_checkpoint(MigrationIncomingState *mis,
 
     if (local_err) {
         error_propagate(errp, local_err);
+        vmstate_loading = false;
         qemu_mutex_unlock_iothread();
         return;
     }
@@ -789,9 +793,6 @@ static void 
colo_incoming_process_checkpoint(MigrationIncomingState *mis,
     qemu_mutex_unlock_iothread();
 
     if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
-        failover_set_state(FAILOVER_STATUS_RELAUNCH,
-                        FAILOVER_STATUS_NONE);
-        failover_request_active(NULL);
         return;
     }
 
@@ -890,6 +891,14 @@ void *colo_process_incoming_thread(void *opaque)
             error_report_err(local_err);
             break;
         }
+
+        if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
+            failover_set_state(FAILOVER_STATUS_RELAUNCH,
+                            FAILOVER_STATUS_NONE);
+            failover_request_active(NULL);
+            break;
+        }
+
         if (failover_get_state() != FAILOVER_STATUS_NONE) {
             error_report("failover request");
             break;
@@ -897,8 +906,6 @@ void *colo_process_incoming_thread(void *opaque)
     }
 
 out:
-    vmstate_loading = false;
-
     /*
      * There are only two reasons we can get here, some error happened
      * or the user triggered failover.
-- 
2.26.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]