qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v0 3/4] migration: add background snapshot


From: Peter Xu
Subject: Re: [PATCH v0 3/4] migration: add background snapshot
Date: Thu, 23 Jul 2020 18:15:44 -0400

On Wed, Jul 22, 2020 at 11:11:32AM +0300, Denis Plotnikov wrote:
> +static void *background_snapshot_thread(void *opaque)
> +{
> +    MigrationState *m = opaque;
> +    QIOChannelBuffer *bioc;
> +    QEMUFile *fb;
> +    int res = 0;
> +
> +    rcu_register_thread();
> +
> +    qemu_file_set_rate_limit(m->to_dst_file, INT64_MAX);
> +
> +    qemu_mutex_lock_iothread();
> +    vm_stop(RUN_STATE_PAUSED);
> +
> +    qemu_savevm_state_header(m->to_dst_file);
> +    qemu_mutex_unlock_iothread();
> +    qemu_savevm_state_setup(m->to_dst_file);

Is it intended to skip bql for the setup phase?  IIUC the main thread could
start the vm before we take the lock again below if we released it...

> +    qemu_mutex_lock_iothread();
> +
> +    migrate_set_state(&m->state, MIGRATION_STATUS_SETUP,
> +                      MIGRATION_STATUS_ACTIVE);
> +
> +    /*
> +     * We want to save the vm state for the moment when the snapshot saving 
> was
> +     * called but also we want to write RAM content with vm running. The RAM
> +     * content should appear first in the vmstate.
> +     * So, we first, save non-ram part of the vmstate to the temporary, 
> buffer,
> +     * then write ram part of the vmstate to the migration stream with vCPUs
> +     * running and, finally, write the non-ram part of the vmstate from the
> +     * buffer to the migration stream.
> +     */
> +    bioc = qio_channel_buffer_new(4096);
> +    qio_channel_set_name(QIO_CHANNEL(bioc), "vmstate-buffer");
> +    fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
> +    object_unref(OBJECT(bioc));
> +
> +    if (ram_write_tracking_start()) {
> +        goto failed_resume;
> +    }
> +
> +    if (global_state_store()) {
> +        goto failed_resume;
> +    }

Is this needed?  We should be always in stopped state here, right?

> +
> +    cpu_synchronize_all_states();
> +
> +    if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) {
> +        goto failed_resume;
> +    }
> +
> +    vm_start();
> +    qemu_mutex_unlock_iothread();
> +
> +    while (!res) {
> +        res = qemu_savevm_state_iterate(m->to_dst_file, false);
> +
> +        if (res < 0 || qemu_file_get_error(m->to_dst_file)) {
> +            goto failed;
> +        }
> +    }
> +
> +    /*
> +     * By this moment we have RAM content saved into the migration stream.
> +     * The next step is to flush the non-ram content (vm devices state)
> +     * right after the ram content. The device state was stored in
> +     * the temporary buffer prior to the ram saving.
> +     */
> +    qemu_put_buffer(m->to_dst_file, bioc->data, bioc->usage);
> +    qemu_fflush(m->to_dst_file);
> +
> +    if (qemu_file_get_error(m->to_dst_file)) {
> +        goto failed;
> +    }
> +
> +    migrate_set_state(&m->state, MIGRATION_STATUS_ACTIVE,
> +                                 MIGRATION_STATUS_COMPLETED);
> +    goto exit;
> +
> +failed_resume:
> +    vm_start();
> +    qemu_mutex_unlock_iothread();
> +failed:
> +    migrate_set_state(&m->state, MIGRATION_STATUS_ACTIVE,
> +                      MIGRATION_STATUS_FAILED);
> +exit:
> +    ram_write_tracking_stop();
> +    qemu_fclose(fb);
> +    qemu_mutex_lock_iothread();
> +    qemu_savevm_state_cleanup();
> +    qemu_mutex_unlock_iothread();
> +    rcu_unregister_thread();
> +    return NULL;
> +}
> +
>  void migrate_fd_connect(MigrationState *s, Error *error_in)
>  {
>      Error *local_err = NULL;
> @@ -3599,8 +3694,14 @@ void migrate_fd_connect(MigrationState *s, Error 
> *error_in)
>          migrate_fd_cleanup(s);
>          return;
>      }
> -    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> -                       QEMU_THREAD_JOINABLE);
> +    if (migrate_background_snapshot()) {
> +        qemu_thread_create(&s->thread, "bg_snapshot",

Maybe the name "live_snapshot" suites more (since the other one is
"live_migration")?

> +                           background_snapshot_thread, s,
> +                           QEMU_THREAD_JOINABLE);
> +    } else {
> +        qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> +                           QEMU_THREAD_JOINABLE);
> +    }
>      s->migration_thread_running = true;
>  }
>  

[...]

> @@ -1151,9 +1188,11 @@ static int save_normal_page(RAMState *rs, RAMBlock 
> *block, ram_addr_t offset,
>      ram_counters.transferred += save_page_header(rs, rs->f, block,
>                                                   offset | 
> RAM_SAVE_FLAG_PAGE);
>      if (async) {
> -        qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
> -                              migrate_release_ram() &
> -                              migration_in_postcopy());
> +        bool may_free = migrate_background_snapshot() ||
> +                        (migrate_release_ram() &&
> +                         migration_in_postcopy());

Does background snapshot need to free the memory?  /me confused..

> +
> +        qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, may_free);
>      } else {
>          qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
>      }

[...]

> +void ram_block_list_create(void)
> +{
> +    RAMBlock *block = NULL;
> +    RamBlockList *block_list = ram_bgs_block_list_get();
> +
> +    qemu_mutex_lock_ramlist();
> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
> +        memory_region_ref(block->mr);
> +        QLIST_INSERT_HEAD(block_list, block, bgs_next);
> +    }
> +    qemu_mutex_unlock_ramlist();

This kind of duplicate with ram_list.blocks itself...

> +}
> +
> +static int page_fault_fd;
> +static int thread_quit_fd;
> +static QemuThread page_fault_thread;
> +
> +static int mem_change_wp(void *addr, uint64_t length, bool protect)
> +{
> +    struct uffdio_writeprotect wp = { 0 };
> +
> +    assert(page_fault_fd);
> +
> +    if (protect) {
> +        struct uffdio_register reg = { 0 };
> +
> +        reg.mode = UFFDIO_REGISTER_MODE_WP;
> +        reg.range.start = (uint64_t) addr;
> +        reg.range.len = length;
> +
> +        if (ioctl(page_fault_fd, UFFDIO_REGISTER, &reg)) {
> +            error_report("Can't register memeory at %p len: %"PRIu64
> +                         " for page fault interception", addr, length);
> +            return -1;
> +        }

IMHO it's better to move the register out of mem_change_wp().  mem_change_wp()
should be in page granularity, while we should be clear in the code that the
registeration is happening per-ramblock.

Btw, is UFFDIO_UNREGISTER missing in the whole process?

> +
> +        wp.mode = UFFDIO_WRITEPROTECT_MODE_WP;
> +    }

[...]

> @@ -2338,6 +2881,11 @@ static void ram_list_init_bitmaps(void)
>              bitmap_set(block->bmap, 0, pages);
>              block->clear_bmap_shift = shift;
>              block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
> +
> +            if (migrate_background_snapshot()) {
> +                block->touched_map = bitmap_new(pages);
> +                block->copied_map = bitmap_new(pages);
> +            }

We should be able to avoid allocating bmap & clear_bmap for snapshots.  Or we
can also directly reuse the two bitmaps?

-- 
Peter Xu




reply via email to

[Prev in Thread] Current Thread [Next in Thread]