qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH RFC 3/3] migration: use checkpoint during migration


From: Bohdan Trach
Subject: [Qemu-devel] [PATCH RFC 3/3] migration: use checkpoint during migration
Date: Fri, 17 Apr 2015 14:13:02 +0200

From: Bohdan Trach <address@hidden>

Extend memory page saving and loading functions to utilize information
available in checkpoints to avoid sending full pages over the network.

Signed-off-by: Bohdan Trach <address@hidden>
---
 arch_init.c | 167 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 158 insertions(+), 9 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index eda86d4..fca56f0 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -128,6 +128,8 @@ static uint64_t bitmap_sync_count;
 #define RAM_SAVE_FLAG_CONTINUE 0x20
 #define RAM_SAVE_FLAG_XBZRLE   0x40
 /* 0x80 is reserved in migration.h start with 0x100 next */
+#define RAM_SAVE_FLAG_HASH     0x100
+#define RAM_SAVE_FLAG_PAGE_HASH     0x200
 
 static struct defconfig_file {
     const char *filename;
@@ -790,6 +792,7 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, 
ram_addr_t offset,
     uint8_t *p;
     int ret;
     bool send_async = true;
+    uint8_t hash[MD5_DIGEST_LENGTH];
 
     p = memory_region_get_ram_ptr(mr) + offset;
 
@@ -841,16 +844,45 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, 
ram_addr_t offset,
 
     /* XBZRLE overflow or normal page */
     if (pages == -1) {
-        *bytes_transferred += save_page_header(f, block,
-                                               offset | RAM_SAVE_FLAG_PAGE);
-        if (send_async) {
-            qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
-        } else {
-            qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+        if (is_outgoing_with_checkpoint() &&
+            0 == strncmp(block->mr->name, "pc.ram", strlen("pc.ram"))) {
+            MD5(p, TARGET_PAGE_SIZE, hash);
+
+            if (NULL != bsearch(hash, hashes, hashes_entries,
+                                MD5_DIGEST_LENGTH, uint128_compare)) {
+
+                *bytes_transferred += save_page_header(f, block, offset | 
RAM_SAVE_FLAG_HASH);
+#ifdef DEBUG_HASH
+                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+                *bytes_transferred += TARGET_PAGE_SIZE;
+#endif
+                qemu_put_buffer(f, hash, MD5_DIGEST_LENGTH);
+                *bytes_transferred += MD5_DIGEST_LENGTH;
+                pages = 1;
+                DPRINTF("ram_save_page: FLAG_HASH guest_phy_addr=%08lx 
flags=%lx hash=%s\n", offset&TARGET_PAGE_MASK, (offset | RAM_SAVE_FLAG_HASH)& 
~TARGET_PAGE_MASK, md5s(hash));
+            } else {
+                *bytes_transferred += save_page_header(f, block, offset | 
RAM_SAVE_FLAG_PAGE_HASH);
+                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+                qemu_put_buffer(f, hash, MD5_DIGEST_LENGTH);
+                *bytes_transferred += TARGET_PAGE_SIZE;
+                *bytes_transferred += MD5_DIGEST_LENGTH;
+                pages = 1;
+                DPRINTF("ram_save_page: FLAG_PAGE_HASH guest_phy_addr=%08lx 
flags=%lx hash=%s\n", offset&TARGET_PAGE_MASK, (offset | 
RAM_SAVE_FLAG_PAGE_HASH)& ~TARGET_PAGE_MASK, md5s(hash));
+            }
+        }
+        if (pages == -1) {
+            *bytes_transferred += save_page_header(f, block,
+                                                   offset | 
RAM_SAVE_FLAG_PAGE);
+            if (send_async) {
+                qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+            } else {
+                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+            }
+            *bytes_transferred += TARGET_PAGE_SIZE;
+            pages = 1;
+            acct_info.norm_pages++;
+            DPRINTF("ram_save_page: FLAG_PAGE guest_phy_addr=%08lx flags=%lx", 
offset&TARGET_PAGE_MASK, (offset | RAM_SAVE_FLAG_PAGE)& ~TARGET_PAGE_MASK);
         }
-        *bytes_transferred += TARGET_PAGE_SIZE;
-        pages = 1;
-        acct_info.norm_pages++;
     }
 
     XBZRLE_cache_unlock();
@@ -963,6 +995,8 @@ void free_xbzrle_decoded_buf(void)
     xbzrle_decoded_buf = NULL;
 }
 
+extern const char *checkpoint_path;
+
 static void migration_end(void)
 {
     if (migration_bitmap) {
@@ -1281,6 +1315,58 @@ void ram_handle_compressed(void *host, uint8_t ch, 
uint64_t size)
     }
 }
 
+/**
+ * If migration source determined we already have the chunk, it only
+ * sends a hash of the page's content. Read it from local storage,
+ * e.g., an old checkpoint.
+ * @param host Address which, after this function, should have a content 
matching the functions 2nd parameter.
+ * @param hash The hash value.
+ * @param size Size of the memory region in bytes. Typically, size is a single 
page, e.g., 4 KiB.
+ * @param fd file descriptor of checkpoint file
+ */
+void ram_handle_hash(void *host, uint64_t guest_phy_addr, uint8_t *hash, 
uint64_t size);
+void ram_handle_hash(void *host, uint64_t guest_phy_addr, uint8_t *hash, 
uint64_t size)
+{
+    assert(fd_checkpoint != -1);
+
+    /* fprintf(stdout, "ram_handle_hash: incoming has %u!\n", hash); */
+    uint8_t local_page_hash[MD5_DIGEST_LENGTH];
+    MD5(host, TARGET_PAGE_SIZE, local_page_hash);
+
+    if (0 != memcmp(local_page_hash, hash, MD5_DIGEST_LENGTH)) {
+        /* Computed hash does not match the hash the migration source
+           sent us for this page. */
+        hash_offset_entry* v = bsearch(hash, hash_offset_array, 
hash_offset_entries,
+                                       sizeof(hash_offset_entry), 
cmp_hash_offset_entry);
+        if (v == NULL) {
+            /* For some reason the source thought the destination
+               already has this block. But it doesn't. Hmmm ... */
+            DPRINTF("ram_handle_hash: unknown hash %s at guest phy addr 
%08lx\n", md5s(hash), guest_phy_addr);
+            assert(0);
+        }
+
+        DPRINTF("ram_handle_hash: guest_phy_addr=%08lx, hash=%s, 
offset=%08lx\n", guest_phy_addr, md5s(hash), v->offset);
+
+        off_t offset_actual = lseek(fd_checkpoint, v->offset, SEEK_SET);
+        assert(offset_actual == v->offset);
+
+        ssize_t read_actual = read(fd_checkpoint, host, TARGET_PAGE_SIZE);
+        assert(read_actual == TARGET_PAGE_SIZE);
+        MD5(host, TARGET_PAGE_SIZE, local_page_hash);
+        if (0 != memcmp(local_page_hash, hash, MD5_DIGEST_LENGTH)) {
+            DPRINTF("ram_handle_hash: local_page_hash=%s\n", 
md5s(local_page_hash));
+            assert(0);
+        }
+    }
+}
+
+static void add_remote_hash(ram_addr_t addr, uint8_t *hash) {
+    uint64_t page_nr = get_page_nr(addr);
+    memcpy(&hashes[page_nr * MD5_DIGEST_LENGTH],
+           hash,
+           MD5_DIGEST_LENGTH);
+}
+
 static int ram_load(QEMUFile *f, void *opaque, int version_id)
 {
     int flags = 0, ret = 0;
@@ -1302,6 +1388,7 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
         ram_addr_t addr, total_ram_bytes;
         void *host;
         uint8_t ch;
+        uint8_t hash[MD5_DIGEST_LENGTH];
 
         addr = qemu_get_be64(f);
         flags = addr & ~TARGET_PAGE_MASK;
@@ -1354,6 +1441,61 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
             }
             ch = qemu_get_byte(f);
             ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
+            DPRINTF("ram_load: FLAG_COMPRESS, addr=%08lx ch=%u\n", addr, ch);
+            if (fd_checkpoint != -1) {
+                if (ch != 0) {
+                    MD5(host, TARGET_PAGE_SIZE, hash);
+                    add_remote_hash(addr, hash);
+                } else {
+                    add_remote_hash(addr, all_zeroes_hash);
+                }
+            }
+            break;
+        case RAM_SAVE_FLAG_HASH:
+            host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
+            }
+
+#ifdef DEBUG_HASH
+            uint8_t src_page[TARGET_PAGE_SIZE];
+            qemu_get_buffer(f, src_page, TARGET_PAGE_SIZE);
+#endif
+            qemu_get_buffer(f, hash, MD5_DIGEST_LENGTH);
+
+            ram_handle_hash(host, addr, hash, TARGET_PAGE_SIZE);
+
+#ifdef DEBUG_HASH
+            uint8_t local_hash[MD5_DIGEST_LENGTH];
+            MD5(host, TARGET_PAGE_SIZE, local_hash);
+            assert(0 == memcmp(local_hash, hash, MD5_DIGEST_LENGTH));
+
+            uint8_t src_page_hash[MD5_DIGEST_LENGTH];
+            MD5(src_page, TARGET_PAGE_SIZE, src_page_hash);
+            assert(0 == memcmp(src_page_hash, hash, MD5_DIGEST_LENGTH));
+            assert(0 == memcmp(src_page, host, TARGET_PAGE_SIZE));
+#endif
+            assert(is_ram_addr(host));
+            add_remote_hash(addr, hash);
+            DPRINTF("ram_load: FLAG_HASH, recv_hash=%s, addr=%08lx\n", 
md5s(hash), addr);
+            break;
+        case RAM_SAVE_FLAG_PAGE_HASH:
+            host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
+            }
+
+            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+
+            qemu_get_buffer(f, hash, MD5_DIGEST_LENGTH);
+
+            assert(is_ram_addr(host));
+            add_remote_hash(addr, hash);
+            DPRINTF("ram_load: FLAG_PAGE_HASH, hash=%s, addr=%08lx\n", 
md5s(hash), addr);
             break;
         case RAM_SAVE_FLAG_PAGE:
             host = host_from_stream_offset(f, addr, flags);
@@ -1363,6 +1505,13 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
                 break;
             }
             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+
+            if (is_ram_addr(host)) {
+                uint8_t hash[MD5_DIGEST_LENGTH];
+                MD5(host, TARGET_PAGE_SIZE, hash);
+                add_remote_hash(addr, hash);
+                DPRINTF("ram_load: FLAG_PAGE, addr=%08lx, hash=%s\n", addr, 
md5s(hash));
+            }
             break;
         case RAM_SAVE_FLAG_XBZRLE:
             host = host_from_stream_offset(f, addr, flags);
-- 
2.0.5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]