[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH RFC 2/3] memory: implement checkpoint handling
From: |
Bohdan Trach |
Subject: |
[Qemu-devel] [PATCH RFC 2/3] memory: implement checkpoint handling |
Date: |
Fri, 17 Apr 2015 14:13:01 +0200 |
From: Bohdan Trach <address@hidden>
This commit adds functions, which are used to work with checkpoint
files. A new command-line option `-checkpoint` is added, which is used
to specify the checkpoint file. Currently, MD5 function from OpenSSL is
used to checkpoint memory.
Signed-off-by: Bohdan Trach <address@hidden>
---
arch_init.c | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
configure | 2 +
qemu-options.hx | 9 ++++
vl.c | 12 +++++
4 files changed, 172 insertions(+)
diff --git a/arch_init.c b/arch_init.c
index b8a4fb1..eda86d4 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -27,6 +27,7 @@
#ifndef _WIN32
#include <sys/types.h>
#include <sys/mman.h>
+#include <openssl/md5.h>
#endif
#include "config.h"
#include "monitor/monitor.h"
@@ -140,6 +141,8 @@ static struct defconfig_file {
static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
+int fd_checkpoint = -1;
+
int qemu_read_default_config_files(bool userconfig)
{
int ret;
@@ -184,6 +187,30 @@ static void XBZRLE_cache_lock(void)
qemu_mutex_lock(&XBZRLE.lock);
}
+#ifdef DEBUG_ARCH_INIT
+static char* md5s(const uint8_t *digest) {
+ /* MD5 is 16 bytes, i.e., 32 hexadecimal digits. + 1 for trailing \0. */
+ static const size_t size = 32 + 1;
+ static char hex_digits[32+1];
+
+ /* snprintf(hex_digits, */
+ /* MD5_DIGEST_LENGTH+1, */
+ /* "%016lx", */
+ /* *((uint64_t*)digest)); */
+ /* snprintf(hex_digits+MD5_DIGEST_LENGTH, */
+ /* MD5_DIGEST_LENGTH+1, */
+ /* "%016lx", */
+ /* *((uint64_t*)(digest+sizeof(uint64_t)))); */
+ int digit;
+ for (digit = 0; digit < 32; digit += 2) {
+ snprintf(hex_digits+digit, 3, "%02x", digest[digit/2]);
+ }
+
+ hex_digits[size-1] = '\0';
+ return hex_digits;
+}
+#endif
+
static void XBZRLE_cache_unlock(void)
{
if (migrate_use_xbzrle())
@@ -603,6 +630,126 @@ static void migration_bitmap_sync(void)
}
}
+int uint128_compare(const void*, const void*);
+int uint128_compare(const void* x, const void* y)
+{
+ return memcmp(x, y, MD5_DIGEST_LENGTH);
+}
+
+/* indexed by page number */
+static uint64_t hashes_size = 0;
+static uint64_t hashes_entries = 0;
+static uint8_t *hashes = 0;
+
+static uint32_t get_page_nr(uint64_t addr) {
+ assert((addr % TARGET_PAGE_SIZE) == 0);
+ return (addr / TARGET_PAGE_SIZE);
+}
+
+typedef struct {
+ uint8_t hash[MD5_DIGEST_LENGTH];
+ uint64_t offset;
+} hash_offset_entry;
+
+static uint64_t hash_offset_entries = 0;
+static uint64_t max_hash_offset_entries;
+static hash_offset_entry* hash_offset_array = 0;
+static uint8_t all_zeroes_hash[MD5_DIGEST_LENGTH];
+
+int cmp_hash_offset_entry(const void*, const void*);
+int cmp_hash_offset_entry(const void* a, const void* b) {
+ hash_offset_entry* e = (hash_offset_entry*) a;
+ hash_offset_entry* f = (hash_offset_entry*) b;
+
+ return memcmp(e->hash, f->hash, MD5_DIGEST_LENGTH);
+}
+
+void veecycle_init(void);
+void veecycle_init(void) {
+ RAMBlock *block;
+ block = QLIST_FIRST_RCU(&ram_list.blocks);
+ if (block == NULL)
+ return;
+ assert(0 == strncmp(block->mr->name, "pc.ram", strlen("pc.ram")));
+
+ max_hash_offset_entries = hashes_entries = (ram_size / TARGET_PAGE_SIZE);
+ DPRINTF("pages=%lu\n", hashes_entries);
+ hashes_size = hashes_entries * MD5_DIGEST_LENGTH;
+
+ hashes = g_malloc(hashes_size);
+ assert(0 != hashes);
+ bzero(hashes, hashes_size);
+
+ uint8_t all_zeroes[TARGET_PAGE_SIZE];
+ bzero(all_zeroes, TARGET_PAGE_SIZE);
+ MD5(all_zeroes, TARGET_PAGE_SIZE, all_zeroes_hash);
+
+ hash_offset_array = g_malloc(max_hash_offset_entries *
sizeof(hash_offset_entry));
+ assert(0 != hash_offset_array);
+ bzero(hash_offset_array, max_hash_offset_entries *
sizeof(hash_offset_entry));
+}
+
+void init_checksum_lookup_table(const char *checkpoint_path);
+void init_checksum_lookup_table(const char *checkpoint_path) {
+ int rc;
+ uint8_t* ram;
+ RAMBlock *block;
+
+ DPRINTF("ram_size=%lu\n", ram_size);
+
+ struct stat sb;
+ rc = stat(checkpoint_path, &sb);
+ if (rc == -1 && errno == ENOENT) return;
+ assert(0 == rc);
+
+ block = QLIST_FIRST_RCU(&ram_list.blocks);
+ assert(0 == strncmp(block->mr->name, "pc.ram", strlen("pc.ram")));
+ ram = block->host;
+ assert(block->used_length == ram_size);
+
+ /* Ignore checkpoint file if size is different from VM's current memory
size. */
+ assert(sb.st_size == ram_size);
+
+ fd_checkpoint = open(checkpoint_path, O_RDWR);
+ assert(fd_checkpoint != -1);
+
+ uint64_t idx;
+ for (idx=0; idx<ram_size; idx+=TARGET_PAGE_SIZE) {
+ rc = read(fd_checkpoint, ram+idx, TARGET_PAGE_SIZE);
+ assert(rc == TARGET_PAGE_SIZE);
+ assert(hash_offset_entries < max_hash_offset_entries);
+ MD5((unsigned char*)(ram+idx),
+ TARGET_PAGE_SIZE,
+ (unsigned char*)hash_offset_array[hash_offset_entries].hash);
+
+ hash_offset_array[hash_offset_entries].offset = idx;
+
+ DPRINTF("hash=%s offset=%08lx\n",
+ md5s(hash_offset_array[hash_offset_entries].hash),
+ hash_offset_array[hash_offset_entries].offset);
+
+ hash_offset_entries++;
+ };
+
+ qsort(hash_offset_array, hash_offset_entries, sizeof(hash_offset_entry),
+ cmp_hash_offset_entry);
+}
+
+int is_ram_addr(void* host);
+int is_ram_addr(void* host) {
+ static RAMBlock *block = NULL;
+
+ block = QLIST_FIRST_RCU(&ram_list.blocks);
+ assert(0 == strncmp(block->mr->name, "pc.ram", strlen("pc.ram")));
+
+ return (host >= memory_region_get_ram_ptr(block->mr) &&
+ host < memory_region_get_ram_ptr(block->mr) + block->used_length);
+}
+
+static int is_outgoing_with_checkpoint(void) {
+ return (fd_checkpoint != -1);
+}
+
void qmp_dump_pc_ram(const char *file, Error **errp) {
int rc;
@@ -869,6 +1016,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
bitmap_sync_count = 0;
migration_bitmap_sync_init();
+ qsort(hashes, hashes_entries, MD5_DIGEST_LENGTH, uint128_compare);
+
if (migrate_use_xbzrle()) {
XBZRLE_cache_lock();
XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
diff --git a/configure b/configure
index 6969f6f..fd6dd23 100755
--- a/configure
+++ b/configure
@@ -337,6 +337,8 @@ vhdx=""
quorum=""
numa=""
+LIBS="-lcrypto"
+
# parse CC options first
for opt do
optarg=`expr "x$opt" : 'x[^=]*=\(.*\)'`
diff --git a/qemu-options.hx b/qemu-options.hx
index 319d971..ece4758 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -268,6 +268,15 @@ If @var{slots} and @var{maxmem} are not specified, memory
hotplug won't
be enabled and the guest startup RAM will never increase.
ETEXI
+DEF("checkpoint", HAS_ARG, QEMU_OPTION_checkpoint,
+ "-checkpoint file path to checkpoint file\n", QEMU_ARCH_ALL)
+STEXI
address@hidden -checkpoint @var{path}
address@hidden -checkpoint
+Checkpoint file to use during incoming migrations.
+Reduces network traffic and total migration time.
+ETEXI
+
DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
"-mem-path FILE provide backing storage for guest RAM\n", QEMU_ARCH_ALL)
STEXI
diff --git a/vl.c b/vl.c
index 74c2681..d423e99 100644
--- a/vl.c
+++ b/vl.c
@@ -134,6 +134,7 @@ int display_opengl;
static int display_remote;
const char* keyboard_layout = NULL;
ram_addr_t ram_size;
+const char *checkpoint_path = NULL;
const char *mem_path = NULL;
int mem_prealloc = 0; /* force preallocation of physical target memory */
bool enable_mlock = false;
@@ -2643,6 +2644,9 @@ out:
return 0;
}
+void init_checksum_lookup_table(const char *checkpoint_path);
+void veecycle_init(void);
+
static void set_memory_options(uint64_t *ram_slots, ram_addr_t *maxram_size)
{
uint64_t sz;
@@ -3116,6 +3120,9 @@ int main(int argc, char **argv, char **envp)
}
break;
#endif
+ case QEMU_OPTION_checkpoint:
+ checkpoint_path = optarg;
+ break;
case QEMU_OPTION_mempath:
mem_path = optarg;
break;
@@ -4331,6 +4338,7 @@ int main(int argc, char **argv, char **envp)
}
}
+ veecycle_init();
qdev_prop_check_globals();
if (vmstate_dump_file) {
/* dump and exit */
@@ -4339,6 +4347,10 @@ int main(int argc, char **argv, char **envp)
}
if (incoming) {
+ if (checkpoint_path) {
+ init_checksum_lookup_table(checkpoint_path);
+ }
+
Error *local_err = NULL;
qemu_start_incoming_migration(incoming, &local_err);
if (local_err) {
--
2.0.5