qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v4 29/47] Postcopy page-map-incoming (PMI) structure


From: Dr. David Alan Gilbert (git)
Subject: [Qemu-devel] [PATCH v4 29/47] Postcopy page-map-incoming (PMI) structure
Date: Fri, 3 Oct 2014 18:47:35 +0100

From: "Dr. David Alan Gilbert" <address@hidden>

The PMI holds the state of each page on the incoming side,
so that we can tell if the page is missing, already received
or there is a request outstanding for it.

Signed-off-by: Dr. David Alan Gilbert <address@hidden>
---
 include/migration/migration.h    |  19 ++++
 include/migration/postcopy-ram.h |  12 +++
 include/qemu/typedefs.h          |   1 +
 postcopy-ram.c                   | 220 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 252 insertions(+)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 2ff9d35..1405a15 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -57,6 +57,24 @@ struct MigrationRetPathState {
 
 typedef struct MigrationState MigrationState;
 
+/* Postcopy page-map-incoming - data about each page on the inbound side */
+
+typedef enum {
+   POSTCOPY_PMI_MISSING,   /* page hasn't yet been received */
+   POSTCOPY_PMI_REQUESTED, /* Kernel asked for a page, but we've not got it */
+   POSTCOPY_PMI_RECEIVED   /* We've got the page */
+} PostcopyPMIState;
+
+struct PostcopyPMI {
+    QemuMutex      mutex;
+    unsigned long *received_map;  /* Pages that we have received */
+    unsigned long *requested_map; /* Pages that we're sending a request for */
+    unsigned long  host_mask;     /* A mask with enough bits set to cover one
+                                     host page in the PMI */
+    unsigned long  host_bits;     /* The number of bits in the map representing
+                                     one host page */
+};
+
 /* State for the incoming migration */
 struct MigrationIncomingState {
     QEMUFile *file;
@@ -71,6 +89,7 @@ struct MigrationIncomingState {
 
     QEMUFile *return_path;
     QemuMutex      rp_mutex;    /* We send replies from multiple threads */
+    PostcopyPMI    postcopy_pmi;
 };
 
 MigrationIncomingState *migration_incoming_get_current(void);
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index dcd1afa..addb88a 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -13,7 +13,19 @@
 #ifndef QEMU_POSTCOPY_RAM_H
 #define QEMU_POSTCOPY_RAM_H
 
+#include "migration/migration.h"
+
 /* Return 0 if the host supports everything we need to do postcopy-ram */
 int postcopy_ram_hosttest(void);
 
+/*
+ * In 'advise' mode record that a page has been received.
+ */
+void postcopy_hook_early_receive(MigrationIncomingState *mis,
+                                 size_t bitmap_index);
+
+void postcopy_pmi_destroy(MigrationIncomingState *mis);
+void postcopy_pmi_discard_range(MigrationIncomingState *mis,
+                                size_t start, size_t npages);
+void postcopy_pmi_dump(MigrationIncomingState *mis);
 #endif
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 8539de6..61b330c 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -77,6 +77,7 @@ typedef struct QEMUSGList QEMUSGList;
 typedef struct SHPCDevice SHPCDevice;
 typedef struct FWCfgState FWCfgState;
 typedef struct PcGuestInfo PcGuestInfo;
+typedef struct PostcopyPMI PostcopyPMI;
 typedef struct Range Range;
 typedef struct AdapterInfo AdapterInfo;
 
diff --git a/postcopy-ram.c b/postcopy-ram.c
index bba5c71..210585c 100644
--- a/postcopy-ram.c
+++ b/postcopy-ram.c
@@ -23,6 +23,9 @@
 #include "qemu-common.h"
 #include "migration/migration.h"
 #include "migration/postcopy-ram.h"
+#include "sysemu/sysemu.h"
+#include "qemu/bitmap.h"
+#include "qemu/error-report.h"
 
 //#define DEBUG_POSTCOPY
 
@@ -82,6 +85,216 @@
 #if defined(__linux__) && defined(MADV_USERFAULT) && \
                           defined(__NR_remap_anon_pages)
 
+/* ---------------------------------------------------------------------- */
+/* Postcopy pagemap-inbound (pmi) - data structures that record the       */
+/* state of each page used by the inbound postcopy                        */
+/* It's a pair of bitmaps (of the same structure as the migration bitmaps)*/
+/* holding one bit per target-page, although all operations work on host  */
+/* pages.                                                                 */
+__attribute__ (( unused )) /* Until later in patch series */
+static void postcopy_pmi_init(MigrationIncomingState *mis, size_t ram_pages)
+{
+    unsigned int tpb = qemu_target_page_bits();
+    unsigned long host_bits;
+
+    qemu_mutex_init(&mis->postcopy_pmi.mutex);
+    mis->postcopy_pmi.received_map = bitmap_new(ram_pages);
+    mis->postcopy_pmi.requested_map = bitmap_new(ram_pages);
+    bitmap_clear(mis->postcopy_pmi.received_map, 0, ram_pages);
+    bitmap_clear(mis->postcopy_pmi.requested_map, 0, ram_pages);
+    /*
+     * Each bit in the map represents one 'target page' which is no bigger
+     * than a host page but can be smaller.  It's useful to have some
+     * convenience masks for later
+     */
+
+    /*
+     * The number of bits one host page takes up in the bitmap
+     * e.g. on a 64k host page, 4k Target page, host_bits=64/4=16
+     */
+    host_bits = sysconf(_SC_PAGESIZE) / (1ul << tpb);
+    /* Should be a power of 2 */
+    assert(host_bits && !(host_bits & (host_bits - 1)));
+    /*
+     * If the host_bits isn't a division of the number of bits in long
+     * then the code gets a lot more complex; disallow for now
+     * (I'm not aware of a system where it's true anyway)
+     */
+    assert(((sizeof(long) * 8) % host_bits) == 0);
+
+    mis->postcopy_pmi.host_bits = host_bits;
+    /* A mask, starting at bit 0, containing host_bits continuous set bits */
+    mis->postcopy_pmi.host_mask =  (1ul << host_bits) - 1;
+
+    assert((ram_pages % host_bits) == 0);
+}
+
+void postcopy_pmi_destroy(MigrationIncomingState *mis)
+{
+    if (mis->postcopy_pmi.received_map) {
+        g_free(mis->postcopy_pmi.received_map);
+        mis->postcopy_pmi.received_map = NULL;
+    }
+    if (mis->postcopy_pmi.requested_map) {
+        g_free(mis->postcopy_pmi.requested_map);
+        mis->postcopy_pmi.requested_map = NULL;
+    }
+    qemu_mutex_destroy(&mis->postcopy_pmi.mutex);
+}
+
+/*
+ * Mark a set of pages in the PMI as being clear; this is used by the discard
+ * at the start of postcopy, and before the postcopy stream starts.
+ */
+void postcopy_pmi_discard_range(MigrationIncomingState *mis,
+                                size_t start, size_t npages)
+{
+    bitmap_clear(mis->postcopy_pmi.received_map, start, npages);
+}
+
+/*
+ * Test a host-page worth of bits in the map starting at bitmap_index
+ * The bits should all be consistent
+ */
+static bool test_hpbits(MigrationIncomingState *mis,
+                        size_t bitmap_index, unsigned long *map)
+{
+    long masked;
+
+    assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0);
+
+    masked = (map[BIT_WORD(bitmap_index)] >>
+               (bitmap_index % BITS_PER_LONG)) &
+             mis->postcopy_pmi.host_mask;
+
+    assert((masked == 0) || (masked == mis->postcopy_pmi.host_mask));
+    return !!masked;
+}
+
+/*
+ * Set host-page worth of bits in the map starting at bitmap_index
+ */
+static void set_hpbits(MigrationIncomingState *mis,
+                       size_t bitmap_index, unsigned long *map)
+{
+    assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0);
+
+    map[BIT_WORD(bitmap_index)] |= mis->postcopy_pmi.host_mask <<
+                                    (bitmap_index % BITS_PER_LONG);
+}
+
+/*
+ * Clear host-page worth of bits in the map starting at bitmap_index
+ */
+static void clear_hpbits(MigrationIncomingState *mis,
+                         size_t bitmap_index, unsigned long *map)
+{
+    assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0);
+
+    map[BIT_WORD(bitmap_index)] &= ~(mis->postcopy_pmi.host_mask <<
+                                    (bitmap_index % BITS_PER_LONG));
+}
+
+/*
+ * Retrieve the state of the given page
+ * Note: This version for use by callers already holding the lock
+ */
+static PostcopyPMIState postcopy_pmi_get_state_nolock(
+                            MigrationIncomingState *mis,
+                            size_t bitmap_index)
+{
+    bool received, requested;
+
+    received = test_hpbits(mis, bitmap_index, mis->postcopy_pmi.received_map);
+    requested = test_hpbits(mis, bitmap_index, 
mis->postcopy_pmi.requested_map);
+
+    if (received) {
+        assert(!requested);
+        return POSTCOPY_PMI_RECEIVED;
+    } else {
+        return requested ? POSTCOPY_PMI_REQUESTED : POSTCOPY_PMI_MISSING;
+    }
+}
+
+/* Retrieve the state of the given page */
+__attribute__ (( unused )) /* Until later in patch series */
+static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis,
+                                               size_t bitmap_index)
+{
+    PostcopyPMIState ret;
+    qemu_mutex_lock(&mis->postcopy_pmi.mutex);
+    ret = postcopy_pmi_get_state_nolock(mis, bitmap_index);
+    qemu_mutex_unlock(&mis->postcopy_pmi.mutex);
+
+    return ret;
+}
+
+/*
+ * Set the page state to the given state if the previous state was as expected
+ * Return the actual previous state.
+ */
+__attribute__ (( unused )) /* Until later in patch series */
+static PostcopyPMIState postcopy_pmi_change_state(MigrationIncomingState *mis,
+                                           size_t bitmap_index,
+                                           PostcopyPMIState expected_state,
+                                           PostcopyPMIState new_state)
+{
+    PostcopyPMIState old_state;
+
+    qemu_mutex_lock(&mis->postcopy_pmi.mutex);
+    old_state = postcopy_pmi_get_state_nolock(mis, bitmap_index);
+
+    if (old_state == expected_state) {
+        switch (new_state) {
+        case POSTCOPY_PMI_MISSING:
+          assert(0); /* This shouldn't actually happen - use discard_range */
+          break;
+
+        case POSTCOPY_PMI_REQUESTED:
+          assert(old_state == POSTCOPY_PMI_MISSING);
+          set_hpbits(mis, bitmap_index, mis->postcopy_pmi.requested_map);
+          break;
+
+        case POSTCOPY_PMI_RECEIVED:
+          assert(old_state == POSTCOPY_PMI_MISSING ||
+                 old_state == POSTCOPY_PMI_REQUESTED);
+          set_hpbits(mis, bitmap_index, mis->postcopy_pmi.received_map);
+          clear_hpbits(mis, bitmap_index, mis->postcopy_pmi.requested_map);
+          break;
+        }
+    }
+
+    qemu_mutex_unlock(&mis->postcopy_pmi.mutex);
+    return old_state;
+}
+
+/*
+ * Useful when debugging postcopy, although if it failed early the
+ * received map can be quite sparse and thus big when dumped.
+ */
+void postcopy_pmi_dump(MigrationIncomingState *mis)
+{
+    fprintf(stderr, "postcopy_pmi_dump: requested\n");
+    ram_debug_dump_bitmap(mis->postcopy_pmi.requested_map, false);
+    fprintf(stderr, "postcopy_pmi_dump: received\n");
+    ram_debug_dump_bitmap(mis->postcopy_pmi.received_map, true);
+    fprintf(stderr, "postcopy_pmi_dump: end\n");
+}
+
+/* Called by ram_load prior to mapping the page */
+void postcopy_hook_early_receive(MigrationIncomingState *mis,
+                                 size_t bitmap_index)
+{
+    if (mis->postcopy_ram_state == POSTCOPY_RAM_INCOMING_ADVISE) {
+        /*
+         * If we're in precopy-advise mode we need to track received pages even
+         * though we don't need to place pages atomically yet.
+         * In advise mode there's only a single thread, so don't need locks
+         */
+        set_bit(bitmap_index, mis->postcopy_pmi.received_map);
+    }
+}
+
 int postcopy_ram_hosttest(void)
 {
     /* TODO: Needs guarding with CONFIG_ once we have libc's that have the defs
@@ -156,5 +369,12 @@ int postcopy_ram_hosttest(void)
     return -1;
 }
 
+/* Called by ram_load prior to mapping the page */
+void postcopy_hook_early_receive(MigrationIncomingState *mis,
+                                 size_t bitmap_index)
+{
+    /* We don't support postcopy so don't care */
+}
+
 #endif
 
-- 
1.9.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]