[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH RESEND v3 08/10] migration: Implement dirty-limit convergence
From: |
Markus Armbruster |
Subject: |
Re: [PATCH RESEND v3 08/10] migration: Implement dirty-limit convergence algo |
Date: |
Wed, 11 Jan 2023 15:11:47 +0100 |
User-agent: |
Gnus/5.13 (Gnus v5.13) Emacs/28.2 (gnu/linux) |
huangy81@chinatelecom.cn writes:
> From: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
>
> Implement dirty-limit convergence algo for live migration,
> which is kind of like auto-converge algo but using dirty-limit
> instead of cpu throttle to make migration convergent.
>
> Enable dirty page limit if dirty_rate_high_cnt greater than 2
> when dirty-limit capability enabled, disable dirty-limit if
> migration be cancled.
>
> Note that "set_vcpu_dirty_limit", "cancel_vcpu_dirty_limit"
> commands are not allowed during dirty-limit live migration.
Only during live migration, or also during migration of a stopped guest?
If the latter, the easy fix is to scratch "live".
> Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
> ---
> migration/migration.c | 3 +++
> migration/ram.c | 63
> ++++++++++++++++++++++++++++++++++++++------------
> migration/trace-events | 1 +
> softmmu/dirtylimit.c | 22 ++++++++++++++++++
> 4 files changed, 74 insertions(+), 15 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index 702e7f4..127d0fe 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -240,6 +240,9 @@ void migration_cancel(const Error *error)
> if (error) {
> migrate_set_error(current_migration, error);
> }
> + if (migrate_dirty_limit()) {
> + qmp_cancel_vcpu_dirty_limit(false, -1, NULL);
> + }
> migrate_fd_cancel(current_migration);
> }
>
> diff --git a/migration/ram.c b/migration/ram.c
> index 5e66652..78b9167 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -45,6 +45,7 @@
> #include "qapi/error.h"
> #include "qapi/qapi-types-migration.h"
> #include "qapi/qapi-events-migration.h"
> +#include "qapi/qapi-commands-migration.h"
> #include "qapi/qmp/qerror.h"
> #include "trace.h"
> #include "exec/ram_addr.h"
> @@ -57,6 +58,8 @@
> #include "qemu/iov.h"
> #include "multifd.h"
> #include "sysemu/runstate.h"
> +#include "sysemu/dirtylimit.h"
> +#include "sysemu/kvm.h"
>
> #include "hw/boards.h" /* for machine_dump_guest_core() */
>
> @@ -1139,6 +1142,30 @@ static void migration_update_rates(RAMState *rs,
> int64_t end_time)
> }
> }
>
> +/*
> + * Enable dirty-limit to throttle down the guest
> + */
> +static void migration_dirty_limit_guest(void)
> +{
> + static int64_t quota_dirtyrate;
> + MigrationState *s = migrate_get_current();
> +
> + /*
> + * If dirty limit already enabled and migration parameter
> + * vcpu-dirty-limit untouched.
> + */
> + if (dirtylimit_in_service() &&
> + quota_dirtyrate == s->parameters.vcpu_dirty_limit) {
> + return;
> + }
> +
> + quota_dirtyrate = s->parameters.vcpu_dirty_limit;
> +
> + /* Set or update quota dirty limit */
> + qmp_set_vcpu_dirty_limit(false, -1, quota_dirtyrate, NULL);
> + trace_migration_dirty_limit_guest(quota_dirtyrate);
> +}
> +
> static void migration_trigger_throttle(RAMState *rs)
> {
> MigrationState *s = migrate_get_current();
> @@ -1148,26 +1175,32 @@ static void migration_trigger_throttle(RAMState *rs)
> uint64_t bytes_dirty_period = rs->num_dirty_pages_period *
> TARGET_PAGE_SIZE;
> uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
>
> - /* During block migration the auto-converge logic incorrectly detects
> - * that ram migration makes no progress. Avoid this by disabling the
> - * throttling logic during the bulk phase of block migration. */
> - if (blk_mig_bulk_active()) {
> - return;
> - }
> + /*
> + * The following detection logic can be refined later. For now:
> + * Check to see if the ratio between dirtied bytes and the approx.
> + * amount of bytes that just got transferred since the last time
> + * we were in this routine reaches the threshold. If that happens
> + * twice, start or increase throttling.
> + */
>
> - if (migrate_auto_converge()) {
> - /* The following detection logic can be refined later. For now:
> - Check to see if the ratio between dirtied bytes and the approx.
> - amount of bytes that just got transferred since the last time
> - we were in this routine reaches the threshold. If that happens
> - twice, start or increase throttling. */
> + if ((bytes_dirty_period > bytes_dirty_threshold) &&
> + (++rs->dirty_rate_high_cnt >= 2)) {
> + rs->dirty_rate_high_cnt = 0;
> + /*
> + * During block migration the auto-converge logic incorrectly detects
> + * that ram migration makes no progress. Avoid this by disabling the
> + * throttling logic during the bulk phase of block migration
> + */
> + if (blk_mig_bulk_active()) {
> + return;
> + }
>
> - if ((bytes_dirty_period > bytes_dirty_threshold) &&
> - (++rs->dirty_rate_high_cnt >= 2)) {
> + if (migrate_auto_converge()) {
> trace_migration_throttle();
> - rs->dirty_rate_high_cnt = 0;
> mig_throttle_guest_down(bytes_dirty_period,
> bytes_dirty_threshold);
> + } else if (migrate_dirty_limit()) {
> + migration_dirty_limit_guest();
> }
> }
> }
> diff --git a/migration/trace-events b/migration/trace-events
> index 57003ed..33a2666 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -91,6 +91,7 @@ migration_bitmap_sync_start(void) ""
> migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
> migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size,
> unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx"
> migration_throttle(void) ""
> +migration_dirty_limit_guest(int64_t dirtyrate) "guest dirty page rate limit
> %" PRIi64 " MB/s"
> ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s:
> start: %" PRIx64 " %zx"
> ram_load_loop(const char *rbname, uint64_t addr, int flags, void *host) "%s:
> addr: 0x%" PRIx64 " flags: 0x%x host: %p"
> ram_load_postcopy_loop(int channel, uint64_t addr, int flags) "chan=%d
> addr=0x%" PRIx64 " flags=0x%x"
> diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
> index 2a07200..b63032c 100644
> --- a/softmmu/dirtylimit.c
> +++ b/softmmu/dirtylimit.c
> @@ -439,6 +439,8 @@ void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
> int64_t cpu_index,
> Error **errp)
> {
> + MigrationState *ms = migrate_get_current();
> +
> if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
> return;
> }
> @@ -452,6 +454,15 @@ void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
> return;
> }
>
> + if (migration_is_running(ms->state) &&
> + (!qemu_thread_is_self(&ms->thread)) &&
> + migrate_dirty_limit() &&
> + dirtylimit_in_service()) {
> + error_setg(errp, "dirty-limit live migration is running, do"
> + " not allow dirty page limit to be canceled manually");
"do not allow" sounds like a request. What about "can't cancel dirty
page limit while migration is running"?
> + return;
> + }
> +
> dirtylimit_state_lock();
>
> if (has_cpu_index) {
> @@ -487,6 +498,8 @@ void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
> uint64_t dirty_rate,
> Error **errp)
> {
> + MigrationState *ms = migrate_get_current();
> +
> if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
> error_setg(errp, "dirty page limit feature requires KVM with"
> " accelerator property 'dirty-ring-size' set'");
> @@ -503,6 +516,15 @@ void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
> return;
> }
>
> + if (migration_is_running(ms->state) &&
> + (!qemu_thread_is_self(&ms->thread)) &&
> + migrate_dirty_limit() &&
> + dirtylimit_in_service()) {
> + error_setg(errp, "dirty-limit live migration is running, do"
> + " not allow dirty page limit to be configured manually");
Likewise.
> + return;
> + }
> +
> dirtylimit_state_lock();
>
> if (!dirtylimit_in_service()) {
- Re: [PATCH RESEND v3 08/10] migration: Implement dirty-limit convergence algo,
Markus Armbruster <=