[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v5 17/45] Add wrappers and handlers for sending/rece
From: |
Dr. David Alan Gilbert (git) |
Subject: |
[Qemu-devel] [PATCH v5 17/45] Add wrappers and handlers for sending/receiving the postcopy-ram migration messages. |
Date: |
Wed, 25 Feb 2015 16:51:40 +0000 |
From: "Dr. David Alan Gilbert" <address@hidden>
The state of the postcopy process is managed via a series of messages;
* Add wrappers and handlers for sending/receiving these messages
* Add state variable that track the current state of postcopy
Signed-off-by: Dr. David Alan Gilbert <address@hidden>
---
include/migration/migration.h | 15 ++
include/sysemu/sysemu.h | 23 +++
migration/migration.c | 13 ++
savevm.c | 325 ++++++++++++++++++++++++++++++++++++++++++
trace-events | 11 ++
5 files changed, 387 insertions(+)
diff --git a/include/migration/migration.h b/include/migration/migration.h
index f94af5b..81cd1f2 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -52,6 +52,14 @@ typedef struct MigrationState MigrationState;
typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head;
+typedef enum {
+ POSTCOPY_INCOMING_NONE = 0, /* Initial state - no postcopy */
+ POSTCOPY_INCOMING_ADVISE,
+ POSTCOPY_INCOMING_LISTENING,
+ POSTCOPY_INCOMING_RUNNING,
+ POSTCOPY_INCOMING_END
+} PostcopyState;
+
/* State for the incoming migration */
struct MigrationIncomingState {
QEMUFile *file;
@@ -59,6 +67,8 @@ struct MigrationIncomingState {
/* See savevm.c */
LoadStateEntry_Head loadvm_handlers;
+ PostcopyState postcopy_state;
+
QEMUFile *return_path;
QemuMutex rp_mutex; /* We send replies from multiple threads */
};
@@ -219,4 +229,9 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t
block_offset,
ram_addr_t offset, size_t size,
int *bytes_sent);
+PostcopyState postcopy_state_get(MigrationIncomingState *mis);
+
+/* Set the state and return the old state */
+PostcopyState postcopy_state_set(MigrationIncomingState *mis,
+ PostcopyState new_state);
#endif
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 8da879f..d6a6d51 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -87,6 +87,18 @@ enum qemu_vm_cmd {
MIG_CMD_INVALID = 0, /* Must be 0 */
MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */
MIG_CMD_PING, /* Request a PONG on the RP */
+
+ MIG_CMD_POSTCOPY_ADVISE = 20, /* Prior to any page transfers, just
+ warn we might want to do PC */
+ MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming
+ pages as it's running. */
+ MIG_CMD_POSTCOPY_RUN, /* Start execution */
+ MIG_CMD_POSTCOPY_END, /* Postcopy is finished. */
+
+ MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that
+ were previously sent during
+ precopy but are dirty. */
+
};
bool qemu_savevm_state_blocked(Error **errp);
@@ -101,6 +113,17 @@ void qemu_savevm_command_send(QEMUFile *f, enum
qemu_vm_cmd command,
uint16_t len, uint8_t *data);
void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
void qemu_savevm_send_open_return_path(QEMUFile *f);
+void qemu_savevm_send_postcopy_advise(QEMUFile *f);
+void qemu_savevm_send_postcopy_listen(QEMUFile *f);
+void qemu_savevm_send_postcopy_run(QEMUFile *f);
+void qemu_savevm_send_postcopy_end(QEMUFile *f, uint8_t status);
+
+void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
+ uint16_t len, uint8_t offset,
+ uint64_t *addrlist,
+ uint32_t *masklist);
+
+
int qemu_loadvm_state(QEMUFile *f);
/* SLIRP */
diff --git a/migration/migration.c b/migration/migration.c
index 434864a..957115a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -971,3 +971,16 @@ void migrate_fd_connect(MigrationState *s)
qemu_thread_create(&s->thread, "migration", migration_thread, s,
QEMU_THREAD_JOINABLE);
}
+
+PostcopyState postcopy_state_get(MigrationIncomingState *mis)
+{
+ return atomic_fetch_add(&mis->postcopy_state, 0);
+}
+
+/* Set the state and return the old state */
+PostcopyState postcopy_state_set(MigrationIncomingState *mis,
+ PostcopyState new_state)
+{
+ return atomic_xchg(&mis->postcopy_state, new_state);
+}
+
diff --git a/savevm.c b/savevm.c
index 4b619da..e31ccb0 100644
--- a/savevm.c
+++ b/savevm.c
@@ -39,6 +39,7 @@
#include "exec/memory.h"
#include "qmp-commands.h"
#include "trace.h"
+#include "qemu/bitops.h"
#include "qemu/iov.h"
#include "block/snapshot.h"
#include "block/qapi.h"
@@ -635,6 +636,90 @@ void qemu_savevm_send_open_return_path(QEMUFile *f)
qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
}
+/* Send prior to any postcopy transfer */
+void qemu_savevm_send_postcopy_advise(QEMUFile *f)
+{
+ uint64_t tmp[2];
+ tmp[0] = cpu_to_be64(getpagesize());
+ tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
+
+ trace_qemu_savevm_send_postcopy_advise();
+ qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp);
+}
+
+/* Prior to running, to cause pages that have been dirtied after precopy
+ * started to be discarded on the destination.
+ * CMD_POSTCOPY_RAM_DISCARD consist of:
+ * 3 byte header (filled in by qemu_savevm_send_postcopy_ram_discard)
+ * byte version (0)
+ * byte offset to be subtracted from each page address to deal with
+ * RAMBlocks that don't start on a mask word boundary.
+ * byte Length of name field
+ * n x byte RAM block name (NOT 0 terminated)
+ * n x
+ * be64 Page addresses for start of an invalidation range
+ * be32 mask of 32 pages, '1' to discard'
+ *
+ * Hopefully this is pretty sparse so we don't get too many entries,
+ * and using the mask should deal with most pagesize differences
+ * just ending up as a single full mask
+ *
+ * The mask is always 32bits irrespective of the long size
+ *
+ * name: RAMBlock name that these entries are part of
+ * len: Number of page entries
+ * addrlist: 'len' addresses
+ * masklist: 'len' masks (corresponding to the addresses)
+ */
+void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
+ uint16_t len, uint8_t offset,
+ uint64_t *addrlist,
+ uint32_t *masklist)
+{
+ uint8_t *buf;
+ uint16_t tmplen;
+ uint16_t t;
+
+ trace_qemu_savevm_send_postcopy_ram_discard();
+ buf = g_malloc0(len*12 + strlen(name) + 3);
+ buf[0] = 0; /* Version */
+ buf[1] = offset;
+ assert(strlen(name) < 256);
+ buf[2] = strlen(name);
+ memcpy(buf+3, name, strlen(name));
+ tmplen = 3+strlen(name);
+
+ for (t = 0; t < len; t++) {
+ cpu_to_be64w((uint64_t *)(buf + tmplen), addrlist[t]);
+ tmplen += 8;
+ cpu_to_be32w((uint32_t *)(buf + tmplen), masklist[t]);
+ tmplen += 4;
+ }
+ qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
+ g_free(buf);
+}
+
+/* Get the destination into a state where it can receive postcopy data. */
+void qemu_savevm_send_postcopy_listen(QEMUFile *f)
+{
+ trace_savevm_send_postcopy_listen();
+ qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
+}
+
+/* Kick the destination into running */
+void qemu_savevm_send_postcopy_run(QEMUFile *f)
+{
+ trace_savevm_send_postcopy_run();
+ qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
+}
+
+/* End of postcopy - with a status byte; 0 is good, anything else is a fail */
+void qemu_savevm_send_postcopy_end(QEMUFile *f, uint8_t status)
+{
+ trace_savevm_send_postcopy_end();
+ qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_END, 1, &status);
+}
+
bool qemu_savevm_state_blocked(Error **errp)
{
SaveStateEntry *se;
@@ -961,6 +1046,212 @@ enum LoadVMExitCodes {
static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
+/* ------ incoming postcopy messages ------ */
+/* 'advise' arrives before any transfers just to tell us that a postcopy
+ * *might* happen - it might be skipped if precopy transferred everything
+ * quickly.
+ */
+static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
+ uint64_t remote_hps,
+ uint64_t remote_tps)
+{
+ PostcopyState ps = postcopy_state_get(mis);
+ trace_loadvm_postcopy_handle_advise();
+ if (ps != POSTCOPY_INCOMING_NONE) {
+ error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
+ return -1;
+ }
+
+ if (remote_hps != getpagesize()) {
+ /*
+ * Some combinations of mismatch are probably possible but it gets
+ * a bit more complicated. In particular we need to place whole
+ * host pages on the dest at once, and we need to ensure that we
+ * handle dirtying to make sure we never end up sending part of
+ * a hostpage on it's own.
+ */
+ error_report("Postcopy needs matching host page sizes (s=%d d=%d)",
+ (int)remote_hps, getpagesize());
+ return -1;
+ }
+
+ if (remote_tps != (1ul << qemu_target_page_bits())) {
+ /*
+ * Again, some differences could be dealt with, but for now keep it
+ * simple.
+ */
+ error_report("Postcopy needs matching target page sizes (s=%d d=%d)",
+ (int)remote_tps, 1 << qemu_target_page_bits());
+ return -1;
+ }
+
+ postcopy_state_set(mis, POSTCOPY_INCOMING_ADVISE);
+
+ return 0;
+}
+
+/* After postcopy we will be told to throw some pages away since they're
+ * dirty and will have to be demand fetched. Must happen before CPU is
+ * started.
+ * There can be 0..many of these messages, each encoding multiple pages.
+ * Bits set in the message represent a page in the source VMs bitmap, but
+ * since the guest/target page sizes can be different on s/d then we have
+ * to convert.
+ */
+static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
+ uint16_t len)
+{
+ int tmp;
+ unsigned int first_bit_offset;
+ char ramid[256];
+ PostcopyState ps = postcopy_state_get(mis);
+
+ trace_loadvm_postcopy_ram_handle_discard();
+
+ if (ps != POSTCOPY_INCOMING_ADVISE) {
+ error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
+ ps);
+ return -1;
+ }
+ /* We're expecting a
+ * 3 byte header,
+ * a RAM ID string
+ * then at least 1 12 byte chunks
+ */
+ if (len < 16) {
+ error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
+ return -1;
+ }
+
+ tmp = qemu_get_byte(mis->file);
+ if (tmp != 0) {
+ error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
+ return -1;
+ }
+ first_bit_offset = qemu_get_byte(mis->file);
+
+ if (qemu_get_counted_string(mis->file, ramid)) {
+ error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
+ return -1;
+ }
+
+ len -= 3+strlen(ramid);
+ if (len % 12) {
+ error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
+ return -1;
+ }
+ while (len) {
+ uint64_t startaddr;
+ uint32_t mask;
+ /*
+ * We now have pairs of address, mask
+ * Each word of mask is 32 bits, where each bit corresponds to one
+ * target page.
+ * RAMBlocks don't necessarily start on word boundaries,
+ * and the offset in the header indicates the offset into the 1st
+ * mask word that corresponds to the 1st page of the RAMBlock.
+ */
+ startaddr = qemu_get_be64(mis->file);
+ mask = qemu_get_be32(mis->file);
+
+ len -= 12;
+
+ while (mask) {
+ /* mask= .....?10...0 */
+ /* ^fs */
+ int firstset = ctz32(mask);
+
+ /* tmp32=.....?11...1 */
+ /* ^fs */
+ uint32_t tmp32 = mask | ((((uint32_t)1)<<firstset)-1);
+
+ /* mask= .?01..10...0 */
+ /* ^fz ^fs */
+ int firstzero = cto32(tmp32);
+
+ if ((startaddr == 0) && (firstset < first_bit_offset)) {
+ error_report("CMD_POSTCOPY_RAM_DISCARD bad data; bit set"
+ " prior to block; block=%s offset=%d"
+ " firstset=%d\n", ramid, first_bit_offset,
+ firstzero);
+ return -1;
+ }
+
+ /*
+ * we know there must be at least 1 bit set due to the loop entry
+ * If there is no 0 firstzero will be 32
+ */
+ /* TODO - ram_discard_range gets added in a later patch
+ int ret = ram_discard_range(mis, ramid,
+ startaddr + firstset - first_bit_offset,
+ startaddr + (firstzero - 1) -
first_bit_offset);
+ ret = -1;
+ if (ret) {
+ return ret;
+ }
+ */
+
+ /* mask= .?0000000000 */
+ /* ^fz ^fs */
+ if (firstzero != 32) {
+ mask &= (((uint32_t)-1) << firstzero);
+ } else {
+ mask = 0;
+ }
+ }
+ }
+ trace_loadvm_postcopy_ram_handle_discard_end();
+
+ return 0;
+}
+
+/* After this message we must be able to immediately receive postcopy data */
+static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
+{
+ PostcopyState ps = postcopy_state_set(mis, POSTCOPY_INCOMING_LISTENING);
+ trace_loadvm_postcopy_handle_listen();
+ if (ps != POSTCOPY_INCOMING_ADVISE) {
+ error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
+ return -1;
+ }
+
+ /* TODO start up the postcopy listening thread */
+ return 0;
+}
+
+/* After all discards we can start running and asking for pages */
+static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
+{
+ PostcopyState ps = postcopy_state_set(mis, POSTCOPY_INCOMING_RUNNING);
+ trace_loadvm_postcopy_handle_run();
+ if (ps != POSTCOPY_INCOMING_LISTENING) {
+ error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
+ return -1;
+ }
+
+ if (autostart) {
+ /* Hold onto your hats, starting the CPU */
+ vm_start();
+ } else {
+ /* leave it paused and let management decide when to start the CPU */
+ runstate_set(RUN_STATE_PAUSED);
+ }
+
+ return 0;
+}
+
+/* The end - with a byte from the source which can tell us to fail. */
+static int loadvm_postcopy_handle_end(MigrationIncomingState *mis)
+{
+ PostcopyState ps = postcopy_state_get(mis);
+ trace_loadvm_postcopy_handle_end();
+ if (ps == POSTCOPY_INCOMING_NONE) {
+ error_report("CMD_POSTCOPY_END in wrong postcopy state (%d)", ps);
+ return -1;
+ }
+ return -1; /* TODO - expecting 1 byte good/fail */
+}
+
static int loadvm_process_command_simple_lencheck(const char *name,
unsigned int actual,
unsigned int expected)
@@ -986,6 +1277,7 @@ static int loadvm_process_command(QEMUFile *f)
uint16_t com;
uint16_t len;
uint32_t tmp32;
+ uint64_t tmp64a, tmp64b;
com = qemu_get_be16(f);
len = qemu_get_be16(f);
@@ -1023,6 +1315,39 @@ static int loadvm_process_command(QEMUFile *f)
migrate_send_rp_pong(mis, tmp32);
break;
+ case MIG_CMD_POSTCOPY_ADVISE:
+ if (loadvm_process_command_simple_lencheck("CMD_POSTCOPY_ADVISE",
+ len, 16)) {
+ return -1;
+ }
+ tmp64a = qemu_get_be64(f); /* hps */
+ tmp64b = qemu_get_be64(f); /* tps */
+ return loadvm_postcopy_handle_advise(mis, tmp64a, tmp64b);
+
+ case MIG_CMD_POSTCOPY_LISTEN:
+ if (loadvm_process_command_simple_lencheck("CMD_POSTCOPY_LISTEN",
+ len, 0)) {
+ return -1;
+ }
+ return loadvm_postcopy_handle_listen(mis);
+
+ case MIG_CMD_POSTCOPY_RUN:
+ if (loadvm_process_command_simple_lencheck("CMD_POSTCOPY_RUN",
+ len, 0)) {
+ return -1;
+ }
+ return loadvm_postcopy_handle_run(mis);
+
+ case MIG_CMD_POSTCOPY_END:
+ if (loadvm_process_command_simple_lencheck("CMD_POSTCOPY_END",
+ len, 1)) {
+ return -1;
+ }
+ return loadvm_postcopy_handle_end(mis);
+
+ case MIG_CMD_POSTCOPY_RAM_DISCARD:
+ return loadvm_postcopy_ram_handle_discard(mis, len);
+
default:
error_report("VM_COMMAND 0x%x unknown (len 0x%x)", com, len);
return -1;
diff --git a/trace-events b/trace-events
index 4ff55fe..050f553 100644
--- a/trace-events
+++ b/trace-events
@@ -1171,11 +1171,22 @@ qemu_loadvm_state_main(void) ""
qemu_loadvm_state_main_quit_parent(void) ""
qemu_loadvm_state_post_main(int ret) "%d"
qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr,
uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u"
+loadvm_postcopy_handle_advise(void) ""
+loadvm_postcopy_handle_end(void) ""
+loadvm_postcopy_handle_listen(void) ""
+loadvm_postcopy_handle_run(void) ""
+loadvm_postcopy_ram_handle_discard(void) ""
+loadvm_postcopy_ram_handle_discard_end(void) ""
loadvm_process_command(uint16_t com, uint16_t len) "com=0x%x len=%d"
loadvm_process_command_ping(uint32_t val) "%x"
+qemu_savevm_send_postcopy_advise(void) ""
+qemu_savevm_send_postcopy_ram_discard(void) ""
savevm_section_start(const char *id, unsigned int section_id) "%s, section_id
%u"
savevm_section_end(const char *id, unsigned int section_id, int ret) "%s,
section_id %u -> %d"
savevm_send_ping(uint32_t val) "%x"
+savevm_send_postcopy_end(void) ""
+savevm_send_postcopy_listen(void) ""
+savevm_send_postcopy_run(void) ""
savevm_state_begin(void) ""
savevm_state_header(void) ""
savevm_state_iterate(void) ""
--
2.1.0
- [Qemu-devel] [PATCH v5 10/45] Return path: Control commands, (continued)
- [Qemu-devel] [PATCH v5 10/45] Return path: Control commands, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 08/45] Return path: socket_writev_buffer: Block even on non-blocking fd's, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 13/45] ram_debug_dump_bitmap: Dump a migration bitmap as text, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 11/45] Return path: Send responses from destination to source, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 12/45] Return path: Source handling of return path, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 14/45] Move loadvm_handlers into MigrationIncomingState, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 15/45] Rework loadvm path for subloops, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 19/45] migrate_init: Call from savevm, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 18/45] MIG_CMD_PACKAGED: Send a packaged chunk of migration stream, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 16/45] Add migration-capability boolean for postcopy-ram., Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 17/45] Add wrappers and handlers for sending/receiving the postcopy-ram migration messages.,
Dr. David Alan Gilbert (git) <=
- [Qemu-devel] [PATCH v5 21/45] Add Linux userfaultfd header, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 20/45] Modify savevm handlers for postcopy, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 24/45] MIG_STATE_POSTCOPY_ACTIVE: Add new migration state, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 22/45] postcopy: OS support test, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 23/45] migrate_start_postcopy: Command to trigger transition to postcopy, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 25/45] qemu_savevm_state_complete: Postcopy changes, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 26/45] Postcopy page-map-incoming (PMI) structure, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 29/45] postcopy: ram_enable_notify to switch on userfault, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 28/45] postcopy: Incoming initialisation, Dr. David Alan Gilbert (git), 2015/02/25
- [Qemu-devel] [PATCH v5 27/45] Postcopy: Maintain sentmap and calculate discard, Dr. David Alan Gilbert (git), 2015/02/25