[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC 13/13] snapshot: Remove page's write-protect and c
From: |
Dr. David Alan Gilbert |
Subject: |
Re: [Qemu-devel] [RFC 13/13] snapshot: Remove page's write-protect and copy the content during setup stage |
Date: |
Wed, 13 Jul 2016 18:52:53 +0100 |
User-agent: |
Mutt/1.6.1 (2016-04-27) |
* zhanghailiang (address@hidden) wrote:
> If we modify VM's RAM (pages) during setup stage after enable write-protect
> notification in snapshot thread, the modification action will get stuck
> because
> we only remove the page's write-protect in savevm process, it blocked by
> itself.
>
> To fix this bug, we will remove page's write-protect in fault thread during
> the setup stage. Besides, we should not try to get global lock after setup
> stage,
> or there maybe an deadlock error.
Hmm this complicates things a bit more doesn't it.
What's the order of:
a) setup
b) savings devices
c) Being able to transmit the pages?
Are these pages that are being modified during setup, being modified
as part of the device state save?
Dave
>
> Signed-off-by: zhanghailiang <address@hidden>
> ---
> include/migration/migration.h | 4 ++--
> migration/migration.c | 2 +-
> migration/postcopy-ram.c | 17 ++++++++++++++++-
> migration/ram.c | 37 +++++++++++++++++++++++++++++++------
> 4 files changed, 50 insertions(+), 10 deletions(-)
>
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index ef4c071..435de31 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -127,7 +127,7 @@ struct MigrationSrcPageRequest {
> RAMBlock *rb;
> hwaddr offset;
> hwaddr len;
> -
> + uint8_t *pages_copy_addr;
> QSIMPLEQ_ENTRY(MigrationSrcPageRequest) next_req;
> };
>
> @@ -333,7 +333,7 @@ void global_state_store_running(void);
>
> void flush_page_queue(MigrationState *ms);
> int ram_save_queue_pages(MigrationState *ms, const char *rbname,
> - ram_addr_t start, ram_addr_t len);
> + ram_addr_t start, ram_addr_t len, bool copy_pages);
>
> PostcopyState postcopy_state_get(void);
> /* Set the state and return the old state */
> diff --git a/migration/migration.c b/migration/migration.c
> index 3765c3b..bf4c7a1 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1248,7 +1248,7 @@ static void migrate_handle_rp_req_pages(MigrationState
> *ms, const char* rbname,
> return;
> }
>
> - if (ram_save_queue_pages(ms, rbname, start, len)) {
> + if (ram_save_queue_pages(ms, rbname, start, len, false)) {
> mark_source_rp_bad(ms);
> }
> }
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 61392d3..2cf477d 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -543,13 +543,28 @@ static void *postcopy_ram_fault_thread(void *opaque)
> MigrationState *ms = container_of(us, MigrationState,
> userfault_state);
> ret = ram_save_queue_pages(ms, qemu_ram_get_idstr(rb), rb_offset,
> - hostpagesize);
> + hostpagesize, true);
>
> if (ret < 0) {
> error_report("%s: Save: %"PRIx64 " failed!",
> __func__, (uint64_t)msg.arg.pagefault.address);
> break;
> }
> +
> + /* Note: In the setup process, snapshot_thread may modify VM's
> + * write-protected pages, we should not block it there, or there
> + * will be an deadlock error.
> + */
> + if (migration_in_setup(ms)) {
> + uint64_t host = msg.arg.pagefault.address;
> +
> + host &= ~(hostpagesize - 1);
> + ret = ram_set_pages_wp(host, getpagesize(), true,
> + us->userfault_fd);
> + if (ret < 0) {
> + error_report("Remove page's write-protect failed");
> + }
> + }
> }
> }
> trace_postcopy_ram_fault_thread_exit();
> diff --git a/migration/ram.c b/migration/ram.c
> index 8656719..747f9aa 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -233,6 +233,7 @@ struct PageSearchStatus {
> ram_addr_t offset;
> /* Set once we wrap around */
> bool complete_round;
> + uint8_t *pages_copy;
> };
> typedef struct PageSearchStatus PageSearchStatus;
>
> @@ -742,7 +743,12 @@ static int ram_save_page(QEMUFile *f, PageSearchStatus
> *pss,
> RAMBlock *block = pss->block;
> ram_addr_t offset = pss->offset;
>
> - p = block->host + offset;
> + /* If we have a copy of this page, use the backup page first */
> + if (pss->pages_copy) {
> + p = pss->pages_copy;
> + } else {
> + p = block->host + offset;
> + }
>
> /* In doubt sent page as normal */
> bytes_xmit = 0;
> @@ -926,7 +932,12 @@ static int ram_save_compressed_page(QEMUFile *f,
> PageSearchStatus *pss,
> RAMBlock *block = pss->block;
> ram_addr_t offset = pss->offset;
>
> - p = block->host + offset;
> + /* If we have a copy of this page, use the backup first */
> + if (pss->pages_copy) {
> + p = pss->pages_copy;
> + } else {
> + p = block->host + offset;
> + }
>
> bytes_xmit = 0;
> ret = ram_control_save_page(f, block->offset,
> @@ -1043,7 +1054,7 @@ static bool find_dirty_block(QEMUFile *f,
> PageSearchStatus *pss,
> * Returns: block (or NULL if none available)
> */
> static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
> - ram_addr_t *ram_addr_abs)
> + ram_addr_t *ram_addr_abs, uint8
> **pages_copy_addr)
> {
> RAMBlock *block = NULL;
>
> @@ -1055,7 +1066,7 @@ static RAMBlock *unqueue_page(MigrationState *ms,
> ram_addr_t *offset,
> *offset = entry->offset;
> *ram_addr_abs = (entry->offset + entry->rb->offset) &
> TARGET_PAGE_MASK;
> -
> + *pages_copy_addr = entry->pages_copy_addr;
> if (entry->len > TARGET_PAGE_SIZE) {
> entry->len -= TARGET_PAGE_SIZE;
> entry->offset += TARGET_PAGE_SIZE;
> @@ -1086,9 +1097,10 @@ static bool get_queued_page(MigrationState *ms,
> PageSearchStatus *pss,
> RAMBlock *block;
> ram_addr_t offset;
> bool dirty;
> + uint8 *pages_backup_addr = NULL;
>
> do {
> - block = unqueue_page(ms, &offset, ram_addr_abs);
> + block = unqueue_page(ms, &offset, ram_addr_abs, &pages_backup_addr);
> /*
> * We're sending this page, and since it's postcopy nothing else
> * will dirty it, and we must make sure it doesn't get sent again
> @@ -1130,6 +1142,7 @@ static bool get_queued_page(MigrationState *ms,
> PageSearchStatus *pss,
> */
> pss->block = block;
> pss->offset = offset;
> + pss->pages_copy = pages_backup_addr;
> }
>
> return !!block;
> @@ -1166,7 +1179,7 @@ void flush_page_queue(MigrationState *ms)
> * Return: 0 on success
> */
> int ram_save_queue_pages(MigrationState *ms, const char *rbname,
> - ram_addr_t start, ram_addr_t len)
> + ram_addr_t start, ram_addr_t len, bool copy_pages)
> {
> RAMBlock *ramblock;
>
> @@ -1206,6 +1219,17 @@ int ram_save_queue_pages(MigrationState *ms, const
> char *rbname,
> new_entry->rb = ramblock;
> new_entry->offset = start;
> new_entry->len = len;
> + if (copy_pages) {
> + /* Fix me: Better to realize a memory pool */
> + new_entry->pages_copy_addr = g_try_malloc0(len);
> +
> + if (!new_entry->pages_copy_addr) {
> + error_report("%s: Failed to alloc memory", __func__);
> + return -1;
> + }
> +
> + memcpy(new_entry->pages_copy_addr, ramblock_ptr(ramblock, start),
> len);
> + }
>
> memory_region_ref(ramblock->mr);
> qemu_mutex_lock(&ms->src_page_req_mutex);
> @@ -1342,6 +1366,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool
> last_stage,
> pss.block = last_seen_block;
> pss.offset = last_offset;
> pss.complete_round = false;
> + pss.pages_copy = NULL;
>
> if (!pss.block) {
> pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
> --
> 1.8.3.1
>
>
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK
- Re: [Qemu-devel] [RFC 13/13] snapshot: Remove page's write-protect and copy the content during setup stage,
Dr. David Alan Gilbert <=