qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v5 17/45] Add wrappers and handlers for sending/rece


From: Dr. David Alan Gilbert (git)
Subject: [Qemu-devel] [PATCH v5 17/45] Add wrappers and handlers for sending/receiving the postcopy-ram migration messages.
Date: Wed, 25 Feb 2015 16:51:40 +0000

From: "Dr. David Alan Gilbert" <address@hidden>

The state of the postcopy process is managed via a series of messages;
   * Add wrappers and handlers for sending/receiving these messages
   * Add state variable that track the current state of postcopy

Signed-off-by: Dr. David Alan Gilbert <address@hidden>
---
 include/migration/migration.h |  15 ++
 include/sysemu/sysemu.h       |  23 +++
 migration/migration.c         |  13 ++
 savevm.c                      | 325 ++++++++++++++++++++++++++++++++++++++++++
 trace-events                  |  11 ++
 5 files changed, 387 insertions(+)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index f94af5b..81cd1f2 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -52,6 +52,14 @@ typedef struct MigrationState MigrationState;
 
 typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head;
 
+typedef enum {
+    POSTCOPY_INCOMING_NONE = 0,  /* Initial state - no postcopy */
+    POSTCOPY_INCOMING_ADVISE,
+    POSTCOPY_INCOMING_LISTENING,
+    POSTCOPY_INCOMING_RUNNING,
+    POSTCOPY_INCOMING_END
+} PostcopyState;
+
 /* State for the incoming migration */
 struct MigrationIncomingState {
     QEMUFile *file;
@@ -59,6 +67,8 @@ struct MigrationIncomingState {
     /* See savevm.c */
     LoadStateEntry_Head loadvm_handlers;
 
+    PostcopyState postcopy_state;
+
     QEMUFile *return_path;
     QemuMutex      rp_mutex;    /* We send replies from multiple threads */
 };
@@ -219,4 +229,9 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t 
block_offset,
                              ram_addr_t offset, size_t size,
                              int *bytes_sent);
 
+PostcopyState postcopy_state_get(MigrationIncomingState *mis);
+
+/* Set the state and return the old state */
+PostcopyState postcopy_state_set(MigrationIncomingState *mis,
+                                 PostcopyState new_state);
 #endif
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 8da879f..d6a6d51 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -87,6 +87,18 @@ enum qemu_vm_cmd {
     MIG_CMD_INVALID = 0,       /* Must be 0 */
     MIG_CMD_OPEN_RETURN_PATH,  /* Tell the dest to open the Return path */
     MIG_CMD_PING,              /* Request a PONG on the RP */
+
+    MIG_CMD_POSTCOPY_ADVISE = 20,  /* Prior to any page transfers, just
+                                      warn we might want to do PC */
+    MIG_CMD_POSTCOPY_LISTEN,       /* Start listening for incoming
+                                      pages as it's running. */
+    MIG_CMD_POSTCOPY_RUN,          /* Start execution */
+    MIG_CMD_POSTCOPY_END,          /* Postcopy is finished. */
+
+    MIG_CMD_POSTCOPY_RAM_DISCARD,  /* A list of pages to discard that
+                                      were previously sent during
+                                      precopy but are dirty. */
+
 };
 
 bool qemu_savevm_state_blocked(Error **errp);
@@ -101,6 +113,17 @@ void qemu_savevm_command_send(QEMUFile *f, enum 
qemu_vm_cmd command,
                               uint16_t len, uint8_t *data);
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
 void qemu_savevm_send_open_return_path(QEMUFile *f);
+void qemu_savevm_send_postcopy_advise(QEMUFile *f);
+void qemu_savevm_send_postcopy_listen(QEMUFile *f);
+void qemu_savevm_send_postcopy_run(QEMUFile *f);
+void qemu_savevm_send_postcopy_end(QEMUFile *f, uint8_t status);
+
+void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
+                                           uint16_t len, uint8_t offset,
+                                           uint64_t *addrlist,
+                                           uint32_t *masklist);
+
+
 int qemu_loadvm_state(QEMUFile *f);
 
 /* SLIRP */
diff --git a/migration/migration.c b/migration/migration.c
index 434864a..957115a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -971,3 +971,16 @@ void migrate_fd_connect(MigrationState *s)
     qemu_thread_create(&s->thread, "migration", migration_thread, s,
                        QEMU_THREAD_JOINABLE);
 }
+
+PostcopyState  postcopy_state_get(MigrationIncomingState *mis)
+{
+    return atomic_fetch_add(&mis->postcopy_state, 0);
+}
+
+/* Set the state and return the old state */
+PostcopyState postcopy_state_set(MigrationIncomingState *mis,
+                                 PostcopyState new_state)
+{
+    return atomic_xchg(&mis->postcopy_state, new_state);
+}
+
diff --git a/savevm.c b/savevm.c
index 4b619da..e31ccb0 100644
--- a/savevm.c
+++ b/savevm.c
@@ -39,6 +39,7 @@
 #include "exec/memory.h"
 #include "qmp-commands.h"
 #include "trace.h"
+#include "qemu/bitops.h"
 #include "qemu/iov.h"
 #include "block/snapshot.h"
 #include "block/qapi.h"
@@ -635,6 +636,90 @@ void qemu_savevm_send_open_return_path(QEMUFile *f)
     qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
 }
 
+/* Send prior to any postcopy transfer */
+void qemu_savevm_send_postcopy_advise(QEMUFile *f)
+{
+    uint64_t tmp[2];
+    tmp[0] = cpu_to_be64(getpagesize());
+    tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
+
+    trace_qemu_savevm_send_postcopy_advise();
+    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp);
+}
+
+/* Prior to running, to cause pages that have been dirtied after precopy
+ * started to be discarded on the destination.
+ * CMD_POSTCOPY_RAM_DISCARD consist of:
+ *  3 byte header (filled in by qemu_savevm_send_postcopy_ram_discard)
+ *      byte   version (0)
+ *      byte   offset to be subtracted from each page address to deal with
+ *             RAMBlocks that don't start on a mask word boundary.
+ *      byte   Length of name field
+ *  n x byte   RAM block name (NOT 0 terminated)
+ *  n x
+ *      be64   Page addresses for start of an invalidation range
+ *      be32   mask of 32 pages, '1' to discard'
+ *
+ *  Hopefully this is pretty sparse so we don't get too many entries,
+ *  and using the mask should deal with most pagesize differences
+ *  just ending up as a single full mask
+ *
+ * The mask is always 32bits irrespective of the long size
+ *
+ *  name:  RAMBlock name that these entries are part of
+ *  len: Number of page entries
+ *  addrlist: 'len' addresses
+ *  masklist: 'len' masks (corresponding to the addresses)
+ */
+void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
+                                           uint16_t len, uint8_t offset,
+                                           uint64_t *addrlist,
+                                           uint32_t *masklist)
+{
+    uint8_t *buf;
+    uint16_t tmplen;
+    uint16_t t;
+
+    trace_qemu_savevm_send_postcopy_ram_discard();
+    buf = g_malloc0(len*12 + strlen(name) + 3);
+    buf[0] = 0; /* Version */
+    buf[1] = offset;
+    assert(strlen(name) < 256);
+    buf[2] = strlen(name);
+    memcpy(buf+3, name, strlen(name));
+    tmplen = 3+strlen(name);
+
+    for (t = 0; t < len; t++) {
+        cpu_to_be64w((uint64_t *)(buf + tmplen), addrlist[t]);
+        tmplen += 8;
+        cpu_to_be32w((uint32_t *)(buf + tmplen), masklist[t]);
+        tmplen += 4;
+    }
+    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
+    g_free(buf);
+}
+
+/* Get the destination into a state where it can receive postcopy data. */
+void qemu_savevm_send_postcopy_listen(QEMUFile *f)
+{
+    trace_savevm_send_postcopy_listen();
+    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
+}
+
+/* Kick the destination into running */
+void qemu_savevm_send_postcopy_run(QEMUFile *f)
+{
+    trace_savevm_send_postcopy_run();
+    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
+}
+
+/* End of postcopy - with a status byte; 0 is good, anything else is a fail */
+void qemu_savevm_send_postcopy_end(QEMUFile *f, uint8_t status)
+{
+    trace_savevm_send_postcopy_end();
+    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_END, 1, &status);
+}
+
 bool qemu_savevm_state_blocked(Error **errp)
 {
     SaveStateEntry *se;
@@ -961,6 +1046,212 @@ enum LoadVMExitCodes {
 
 static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
 
+/* ------ incoming postcopy messages ------ */
+/* 'advise' arrives before any transfers just to tell us that a postcopy
+ * *might* happen - it might be skipped if precopy transferred everything
+ * quickly.
+ */
+static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
+                                         uint64_t remote_hps,
+                                         uint64_t remote_tps)
+{
+    PostcopyState ps = postcopy_state_get(mis);
+    trace_loadvm_postcopy_handle_advise();
+    if (ps != POSTCOPY_INCOMING_NONE) {
+        error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
+        return -1;
+    }
+
+    if (remote_hps != getpagesize())  {
+        /*
+         * Some combinations of mismatch are probably possible but it gets
+         * a bit more complicated.  In particular we need to place whole
+         * host pages on the dest at once, and we need to ensure that we
+         * handle dirtying to make sure we never end up sending part of
+         * a hostpage on it's own.
+         */
+        error_report("Postcopy needs matching host page sizes (s=%d d=%d)",
+                     (int)remote_hps, getpagesize());
+        return -1;
+    }
+
+    if (remote_tps != (1ul << qemu_target_page_bits())) {
+        /*
+         * Again, some differences could be dealt with, but for now keep it
+         * simple.
+         */
+        error_report("Postcopy needs matching target page sizes (s=%d d=%d)",
+                     (int)remote_tps, 1 << qemu_target_page_bits());
+        return -1;
+    }
+
+    postcopy_state_set(mis, POSTCOPY_INCOMING_ADVISE);
+
+    return 0;
+}
+
+/* After postcopy we will be told to throw some pages away since they're
+ * dirty and will have to be demand fetched.  Must happen before CPU is
+ * started.
+ * There can be 0..many of these messages, each encoding multiple pages.
+ * Bits set in the message represent a page in the source VMs bitmap, but
+ * since the guest/target page sizes can be different on s/d then we have
+ * to convert.
+ */
+static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
+                                              uint16_t len)
+{
+    int tmp;
+    unsigned int first_bit_offset;
+    char ramid[256];
+    PostcopyState ps = postcopy_state_get(mis);
+
+    trace_loadvm_postcopy_ram_handle_discard();
+
+    if (ps != POSTCOPY_INCOMING_ADVISE) {
+        error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
+                     ps);
+        return -1;
+    }
+    /* We're expecting a
+     *    3 byte header,
+     *    a RAM ID string
+     *    then at least 1 12 byte chunks
+    */
+    if (len < 16) {
+        error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
+        return -1;
+    }
+
+    tmp = qemu_get_byte(mis->file);
+    if (tmp != 0) {
+        error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
+        return -1;
+    }
+    first_bit_offset = qemu_get_byte(mis->file);
+
+    if (qemu_get_counted_string(mis->file, ramid)) {
+        error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
+        return -1;
+    }
+
+    len -= 3+strlen(ramid);
+    if (len % 12) {
+        error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
+        return -1;
+    }
+    while (len) {
+        uint64_t startaddr;
+        uint32_t mask;
+        /*
+         * We now have pairs of address, mask
+         *   Each word of mask is 32 bits, where each bit corresponds to one
+         *   target page.
+         *   RAMBlocks don't necessarily start on word boundaries,
+         *   and the offset in the header indicates the offset into the 1st
+         *   mask word that corresponds to the 1st page of the RAMBlock.
+         */
+        startaddr = qemu_get_be64(mis->file);
+        mask = qemu_get_be32(mis->file);
+
+        len -= 12;
+
+        while (mask) {
+            /* mask= .....?10...0 */
+            /*             ^fs    */
+            int firstset = ctz32(mask);
+
+            /* tmp32=.....?11...1 */
+            /*             ^fs    */
+            uint32_t tmp32 = mask | ((((uint32_t)1)<<firstset)-1);
+
+            /* mask= .?01..10...0 */
+            /*         ^fz ^fs    */
+            int firstzero = cto32(tmp32);
+
+            if ((startaddr == 0) && (firstset < first_bit_offset)) {
+                error_report("CMD_POSTCOPY_RAM_DISCARD bad data; bit set"
+                               " prior to block; block=%s offset=%d"
+                               " firstset=%d\n", ramid, first_bit_offset,
+                               firstzero);
+                return -1;
+            }
+
+            /*
+             * we know there must be at least 1 bit set due to the loop entry
+             * If there is no 0 firstzero will be 32
+             */
+            /* TODO - ram_discard_range gets added in a later patch
+            int ret = ram_discard_range(mis, ramid,
+                                startaddr + firstset - first_bit_offset,
+                                startaddr + (firstzero - 1) - 
first_bit_offset);
+            ret = -1;
+            if (ret) {
+                return ret;
+            }
+            */
+
+            /* mask= .?0000000000 */
+            /*         ^fz ^fs    */
+            if (firstzero != 32) {
+                mask &= (((uint32_t)-1) << firstzero);
+            } else {
+                mask = 0;
+            }
+        }
+    }
+    trace_loadvm_postcopy_ram_handle_discard_end();
+
+    return 0;
+}
+
+/* After this message we must be able to immediately receive postcopy data */
+static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
+{
+    PostcopyState ps = postcopy_state_set(mis, POSTCOPY_INCOMING_LISTENING);
+    trace_loadvm_postcopy_handle_listen();
+    if (ps != POSTCOPY_INCOMING_ADVISE) {
+        error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
+        return -1;
+    }
+
+    /* TODO start up the postcopy listening thread */
+    return 0;
+}
+
+/* After all discards we can start running and asking for pages */
+static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
+{
+    PostcopyState ps = postcopy_state_set(mis, POSTCOPY_INCOMING_RUNNING);
+    trace_loadvm_postcopy_handle_run();
+    if (ps != POSTCOPY_INCOMING_LISTENING) {
+        error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
+        return -1;
+    }
+
+    if (autostart) {
+        /* Hold onto your hats, starting the CPU */
+        vm_start();
+    } else {
+        /* leave it paused and let management decide when to start the CPU */
+        runstate_set(RUN_STATE_PAUSED);
+    }
+
+    return 0;
+}
+
+/* The end - with a byte from the source which can tell us to fail. */
+static int loadvm_postcopy_handle_end(MigrationIncomingState *mis)
+{
+    PostcopyState ps = postcopy_state_get(mis);
+    trace_loadvm_postcopy_handle_end();
+    if (ps == POSTCOPY_INCOMING_NONE) {
+        error_report("CMD_POSTCOPY_END in wrong postcopy state (%d)", ps);
+        return -1;
+    }
+    return -1; /* TODO - expecting 1 byte good/fail */
+}
+
 static int loadvm_process_command_simple_lencheck(const char *name,
                                                   unsigned int actual,
                                                   unsigned int expected)
@@ -986,6 +1277,7 @@ static int loadvm_process_command(QEMUFile *f)
     uint16_t com;
     uint16_t len;
     uint32_t tmp32;
+    uint64_t tmp64a, tmp64b;
 
     com = qemu_get_be16(f);
     len = qemu_get_be16(f);
@@ -1023,6 +1315,39 @@ static int loadvm_process_command(QEMUFile *f)
         migrate_send_rp_pong(mis, tmp32);
         break;
 
+    case MIG_CMD_POSTCOPY_ADVISE:
+        if (loadvm_process_command_simple_lencheck("CMD_POSTCOPY_ADVISE",
+                                                   len, 16)) {
+            return -1;
+        }
+        tmp64a = qemu_get_be64(f); /* hps */
+        tmp64b = qemu_get_be64(f); /* tps */
+        return loadvm_postcopy_handle_advise(mis, tmp64a, tmp64b);
+
+    case MIG_CMD_POSTCOPY_LISTEN:
+        if (loadvm_process_command_simple_lencheck("CMD_POSTCOPY_LISTEN",
+                                                   len, 0)) {
+            return -1;
+        }
+        return loadvm_postcopy_handle_listen(mis);
+
+    case MIG_CMD_POSTCOPY_RUN:
+        if (loadvm_process_command_simple_lencheck("CMD_POSTCOPY_RUN",
+                                                   len, 0)) {
+            return -1;
+        }
+        return loadvm_postcopy_handle_run(mis);
+
+    case MIG_CMD_POSTCOPY_END:
+        if (loadvm_process_command_simple_lencheck("CMD_POSTCOPY_END",
+                                                   len, 1)) {
+            return -1;
+        }
+        return loadvm_postcopy_handle_end(mis);
+
+    case MIG_CMD_POSTCOPY_RAM_DISCARD:
+        return loadvm_postcopy_ram_handle_discard(mis, len);
+
     default:
         error_report("VM_COMMAND 0x%x unknown (len 0x%x)", com, len);
         return -1;
diff --git a/trace-events b/trace-events
index 4ff55fe..050f553 100644
--- a/trace-events
+++ b/trace-events
@@ -1171,11 +1171,22 @@ qemu_loadvm_state_main(void) ""
 qemu_loadvm_state_main_quit_parent(void) ""
 qemu_loadvm_state_post_main(int ret) "%d"
 qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, 
uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u"
+loadvm_postcopy_handle_advise(void) ""
+loadvm_postcopy_handle_end(void) ""
+loadvm_postcopy_handle_listen(void) ""
+loadvm_postcopy_handle_run(void) ""
+loadvm_postcopy_ram_handle_discard(void) ""
+loadvm_postcopy_ram_handle_discard_end(void) ""
 loadvm_process_command(uint16_t com, uint16_t len) "com=0x%x len=%d"
 loadvm_process_command_ping(uint32_t val) "%x"
+qemu_savevm_send_postcopy_advise(void) ""
+qemu_savevm_send_postcopy_ram_discard(void) ""
 savevm_section_start(const char *id, unsigned int section_id) "%s, section_id 
%u"
 savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, 
section_id %u -> %d"
 savevm_send_ping(uint32_t val) "%x"
+savevm_send_postcopy_end(void) ""
+savevm_send_postcopy_listen(void) ""
+savevm_send_postcopy_run(void) ""
 savevm_state_begin(void) ""
 savevm_state_header(void) ""
 savevm_state_iterate(void) ""
-- 
2.1.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]