qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH COLO-Frame v10 24/38] COLO failover: Shutdown relate


From: zhanghailiang
Subject: [Qemu-devel] [PATCH COLO-Frame v10 24/38] COLO failover: Shutdown related socket fd when do failover
Date: Tue, 3 Nov 2015 19:56:42 +0800

If the net connection between COLO's two sides is broken while colo/colo 
incoming
thread is blocked in 'read'/'write' socket fd. It will not detect this error 
until
connect timeout. It will be a long time.

Here we shutdown all the related socket file descriptors to wake up the blocking
operation in failover BH. Besides, we should close the corresponding file 
descriptors
after failvoer BH shutdown them, or there will be an error.

Signed-off-by: zhanghailiang <address@hidden>
Signed-off-by: Li Zhijian <address@hidden>
---
 migration/colo.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/migration/colo.c b/migration/colo.c
index 247b40f..240ccda 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -74,6 +74,13 @@ static void secondary_vm_do_failover(void)
         /* recover runstate to normal migration finish state */
         autostart = true;
     }
+    /* Make sure colo incoming thread not block in recv */
+    if (mis->from_src_file) {
+        qemu_file_shutdown(mis->from_src_file);
+    }
+    if (mis->to_src_file) {
+        qemu_file_shutdown(mis->to_src_file);
+    }
 
     old_state = failover_set_state(FAILOVER_STATUS_HANDLING,
                                    FAILOVER_STATUS_COMPLETED);
@@ -99,6 +106,13 @@ static void primary_vm_do_failover(void)
     }
     migration_end();
 
+    if (s->from_dst_file) { /* Make sure colo thread no block in recv */
+        qemu_file_shutdown(s->from_dst_file);
+    }
+    if (s->to_dst_file) {
+        qemu_file_shutdown(s->to_dst_file);
+    }
+
     vm_start();
 
     old_state = failover_set_state(FAILOVER_STATUS_HANDLING,
@@ -342,7 +356,7 @@ static void colo_process_checkpoint(MigrationState *s)
 
 out:
     current_time = error_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
-    if (ret < 0) {
+    if (ret < 0 || (!ret && !failover_request_is_active())) {
         error_report("%s: %s", __func__, strerror(-ret));
         qapi_event_send_colo_exit(COLO_MODE_PRIMARY, COLO_EXIT_REASON_ERROR,
                                   true, strerror(-ret), NULL);
@@ -371,6 +385,11 @@ out:
     qsb_free(buffer);
     buffer = NULL;
 
+    /* Hope this not to be too long to loop here */
+    while (failover_get_state() != FAILOVER_STATUS_COMPLETED) {
+        ;
+    }
+    /* Must be called after failover BH is completed */
     if (s->from_dst_file) {
         qemu_fclose(s->from_dst_file);
     }
@@ -534,7 +553,7 @@ void *colo_process_incoming_thread(void *opaque)
 
 out:
     current_time = error_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
-    if (ret < 0) {
+    if (ret < 0 || (!ret && !failover_request_is_active())) {
         error_report("colo incoming thread will exit, detect error: %s",
                      strerror(-ret));
         qapi_event_send_colo_exit(COLO_MODE_SECONDARY, COLO_EXIT_REASON_ERROR,
@@ -573,6 +592,11 @@ out:
     */
     colo_release_ram_cache();
 
+    /* Hope this not to be too long to loop here */
+    while (failover_get_state() != FAILOVER_STATUS_COMPLETED) {
+        ;
+    }
+    /* Must be called after failover BH is completed */
     if (mis->to_src_file) {
         qemu_fclose(mis->to_src_file);
     }
-- 
1.8.3.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]