[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v2 2/3] add 'release-ram' migrate capability
From: |
Dr. David Alan Gilbert |
Subject: |
Re: [Qemu-devel] [PATCH v2 2/3] add 'release-ram' migrate capability |
Date: |
Fri, 10 Feb 2017 12:25:23 +0000 |
User-agent: |
Mutt/1.7.1 (2016-10-04) |
* Dr. David Alan Gilbert (address@hidden) wrote:
> * Pavel Butsykin (address@hidden) wrote:
> > This feature frees the migrated memory on the source during postcopy-ram
> > migration. In the second step of postcopy-ram migration when the source vm
> > is put on pause we can free unnecessary memory. It will allow, in
> > particular,
> > to start relaxing the memory stress on the source host in a load-balancing
> > scenario.
> >
> > Signed-off-by: Pavel Butsykin <address@hidden>
>
> Reviewed-by: Dr. David Alan Gilbert <address@hidden>
Actually, note the error from patchew; you need to fix up the error reports
that print iov_len to %zd I think.
Dave
> > ---
> > include/migration/migration.h | 1 +
> > include/migration/qemu-file.h | 3 ++-
> > migration/migration.c | 9 +++++++
> > migration/qemu-file.c | 59
> > ++++++++++++++++++++++++++++++++++++++-----
> > migration/ram.c | 22 +++++++++++++++-
> > qapi-schema.json | 5 +++-
> > 6 files changed, 89 insertions(+), 10 deletions(-)
> >
> > diff --git a/include/migration/migration.h b/include/migration/migration.h
> > index bd399fc0df..401fbe1f77 100644
> > --- a/include/migration/migration.h
> > +++ b/include/migration/migration.h
> > @@ -307,6 +307,7 @@ int migrate_add_blocker(Error *reason, Error **errp);
> > */
> > void migrate_del_blocker(Error *reason);
> >
> > +bool migrate_release_ram(void);
> > bool migrate_postcopy_ram(void);
> > bool migrate_zero_blocks(void);
> >
> > diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
> > index abedd466c9..0cd648a733 100644
> > --- a/include/migration/qemu-file.h
> > +++ b/include/migration/qemu-file.h
> > @@ -132,7 +132,8 @@ void qemu_put_byte(QEMUFile *f, int v);
> > * put_buffer without copying the buffer.
> > * The buffer should be available till it is sent asynchronously.
> > */
> > -void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size);
> > +void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
> > + bool may_free);
> > bool qemu_file_mode_is_not_valid(const char *mode);
> > bool qemu_file_is_writable(QEMUFile *f);
> >
> > diff --git a/migration/migration.c b/migration/migration.c
> > index 1ae68be0c7..8d5a5f8a6e 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -1302,6 +1302,15 @@ void qmp_migrate_set_downtime(double value, Error
> > **errp)
> > qmp_migrate_set_parameters(&p, errp);
> > }
> >
> > +bool migrate_release_ram(void)
> > +{
> > + MigrationState *s;
> > +
> > + s = migrate_get_current();
> > +
> > + return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
> > +}
> > +
> > bool migrate_postcopy_ram(void)
> > {
> > MigrationState *s;
> > diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> > index e9fae31158..82dbef3c86 100644
> > --- a/migration/qemu-file.c
> > +++ b/migration/qemu-file.c
> > @@ -49,6 +49,7 @@ struct QEMUFile {
> > int buf_size; /* 0 when writing */
> > uint8_t buf[IO_BUF_SIZE];
> >
> > + DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
> > struct iovec iov[MAX_IOV_SIZE];
> > unsigned int iovcnt;
> >
> > @@ -132,6 +133,41 @@ bool qemu_file_is_writable(QEMUFile *f)
> > return f->ops->writev_buffer;
> > }
> >
> > +static void qemu_iovec_release_ram(QEMUFile *f)
> > +{
> > + struct iovec iov;
> > + unsigned long idx;
> > +
> > + /* Find and release all the contiguous memory ranges marked as
> > may_free. */
> > + idx = find_next_bit(f->may_free, f->iovcnt, 0);
> > + if (idx >= f->iovcnt) {
> > + return;
> > + }
> > + iov = f->iov[idx];
> > +
> > + /* The madvise() in the loop is called for iov within a continuous
> > range and
> > + * then reinitialize the iov. And in the end, madvise() is called for
> > the
> > + * last iov.
> > + */
> > + while ((idx = find_next_bit(f->may_free, f->iovcnt, idx + 1)) <
> > f->iovcnt) {
> > + /* check for adjacent buffer and coalesce them */
> > + if (iov.iov_base + iov.iov_len == f->iov[idx].iov_base) {
> > + iov.iov_len += f->iov[idx].iov_len;
> > + continue;
> > + }
> > + if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) <
> > 0) {
> > + error_report("migrate: madvise DONTNEED failed %p %ld: %s",
> > + iov.iov_base, iov.iov_len, strerror(errno));
> > + }
> > + iov = f->iov[idx];
> > + }
> > + if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
> > + error_report("migrate: madvise DONTNEED failed %p %ld: %s",
> > + iov.iov_base, iov.iov_len, strerror(errno));
> > + }
> > + memset(f->may_free, 0, sizeof(f->may_free));
> > +}
> > +
> > /**
> > * Flushes QEMUFile buffer
> > *
> > @@ -151,6 +187,8 @@ void qemu_fflush(QEMUFile *f)
> > if (f->iovcnt > 0) {
> > expect = iov_size(f->iov, f->iovcnt);
> > ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
> > +
> > + qemu_iovec_release_ram(f);
> > }
> >
> > if (ret >= 0) {
> > @@ -304,13 +342,19 @@ int qemu_fclose(QEMUFile *f)
> > return ret;
> > }
> >
> > -static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size)
> > +static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size,
> > + bool may_free)
> > {
> > /* check for adjacent buffer and coalesce them */
> > if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
> > - f->iov[f->iovcnt - 1].iov_len) {
> > + f->iov[f->iovcnt - 1].iov_len &&
> > + may_free == test_bit(f->iovcnt - 1, f->may_free))
> > + {
> > f->iov[f->iovcnt - 1].iov_len += size;
> > } else {
> > + if (may_free) {
> > + set_bit(f->iovcnt, f->may_free);
> > + }
> > f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
> > f->iov[f->iovcnt++].iov_len = size;
> > }
> > @@ -320,14 +364,15 @@ static void add_to_iovec(QEMUFile *f, const uint8_t
> > *buf, size_t size)
> > }
> > }
> >
> > -void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size)
> > +void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
> > + bool may_free)
> > {
> > if (f->last_error) {
> > return;
> > }
> >
> > f->bytes_xfer += size;
> > - add_to_iovec(f, buf, size);
> > + add_to_iovec(f, buf, size, may_free);
> > }
> >
> > void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
> > @@ -345,7 +390,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf,
> > size_t size)
> > }
> > memcpy(f->buf + f->buf_index, buf, l);
> > f->bytes_xfer += l;
> > - add_to_iovec(f, f->buf + f->buf_index, l);
> > + add_to_iovec(f, f->buf + f->buf_index, l, false);
> > f->buf_index += l;
> > if (f->buf_index == IO_BUF_SIZE) {
> > qemu_fflush(f);
> > @@ -366,7 +411,7 @@ void qemu_put_byte(QEMUFile *f, int v)
> >
> > f->buf[f->buf_index] = v;
> > f->bytes_xfer++;
> > - add_to_iovec(f, f->buf + f->buf_index, 1);
> > + add_to_iovec(f, f->buf + f->buf_index, 1, false);
> > f->buf_index++;
> > if (f->buf_index == IO_BUF_SIZE) {
> > qemu_fflush(f);
> > @@ -647,7 +692,7 @@ ssize_t qemu_put_compression_data(QEMUFile *f, const
> > uint8_t *p, size_t size,
> > }
> > qemu_put_be32(f, blen);
> > if (f->ops->writev_buffer) {
> > - add_to_iovec(f, f->buf + f->buf_index, blen);
> > + add_to_iovec(f, f->buf + f->buf_index, blen, false);
> > }
> > f->buf_index += blen;
> > if (f->buf_index == IO_BUF_SIZE) {
> > diff --git a/migration/ram.c b/migration/ram.c
> > index d866b6518b..5a43f716d1 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -726,6 +726,16 @@ static int save_zero_page(QEMUFile *f, RAMBlock
> > *block, ram_addr_t offset,
> > return pages;
> > }
> >
> > +static void ram_release_pages(MigrationState *ms, const char *block_name,
> > + uint64_t offset, int pages)
> > +{
> > + if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
> > + return;
> > + }
> > +
> > + ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS);
> > +}
> > +
> > /**
> > * ram_save_page: Send the given page to the stream
> > *
> > @@ -786,6 +796,7 @@ static int ram_save_page(MigrationState *ms, QEMUFile
> > *f, PageSearchStatus *pss,
> > * page would be stale
> > */
> > xbzrle_cache_zero_page(current_addr);
> > + ram_release_pages(ms, block->idstr, pss->offset, pages);
> > } else if (!ram_bulk_stage &&
> > !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
> > pages = save_xbzrle_page(f, &p, current_addr, block,
> > @@ -804,7 +815,9 @@ static int ram_save_page(MigrationState *ms, QEMUFile
> > *f, PageSearchStatus *pss,
> > *bytes_transferred += save_page_header(f, block,
> > offset |
> > RAM_SAVE_FLAG_PAGE);
> > if (send_async) {
> > - qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
> > + qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
> > + migrate_release_ram() &
> > + migration_in_postcopy(ms));
> > } else {
> > qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
> > }
> > @@ -834,6 +847,8 @@ static int do_compress_ram_page(QEMUFile *f, RAMBlock
> > *block,
> > error_report("compressed data failed!");
> > } else {
> > bytes_sent += blen;
> > + ram_release_pages(migrate_get_current(), block->idstr,
> > + offset & TARGET_PAGE_MASK, 1);
> > }
> >
> > return bytes_sent;
> > @@ -973,12 +988,17 @@ static int ram_save_compressed_page(MigrationState
> > *ms, QEMUFile *f,
> > error_report("compressed data failed!");
> > }
> > }
> > + if (pages > 0) {
> > + ram_release_pages(ms, block->idstr, pss->offset, pages);
> > + }
> > } else {
> > offset |= RAM_SAVE_FLAG_CONTINUE;
> > pages = save_zero_page(f, block, offset, p, bytes_transferred);
> > if (pages == -1) {
> > pages = compress_page_with_multi_thread(f, block, offset,
> > bytes_transferred);
> > + } else {
> > + ram_release_pages(ms, block->idstr, pss->offset, pages);
> > }
> > }
> > }
> > diff --git a/qapi-schema.json b/qapi-schema.json
> > index 82fabc6e24..e58228d083 100644
> > --- a/qapi-schema.json
> > +++ b/qapi-schema.json
> > @@ -865,11 +865,14 @@
> > # side, this process is called COarse-Grain LOck Stepping (COLO) for
> > # Non-stop Service. (since 2.8)
> > #
> > +# @release-ram: if enabled, qemu will free the migrated ram pages on the
> > source
> > +# during postcopy-ram migration. (since 2.9)
> > +#
> > # Since: 1.2
> > ##
> > { 'enum': 'MigrationCapability',
> > 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
> > - 'compress', 'events', 'postcopy-ram', 'x-colo'] }
> > + 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram'] }
> >
> > ##
> > # @MigrationCapabilityStatus:
> > --
> > 2.11.0
> >
> >
> --
> Dr. David Alan Gilbert / address@hidden / Manchester, UK
>
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK
[Qemu-devel] [PATCH v2 1/3] migration: add MigrationState arg for ram_save_/compressed_/page(), Pavel Butsykin, 2017/02/03
[Qemu-devel] [PATCH v2 3/3] migration: discard non-dirty ram pages after the start of postcopy, Pavel Butsykin, 2017/02/03
Re: [Qemu-devel] [PATCH v2 0/3] migration capability to discard the migrated ram pages, no-reply, 2017/02/03
Re: [Qemu-devel] [PATCH v2 0/3] migration capability to discard the migrated ram pages, Dr. David Alan Gilbert, 2017/02/10
Re: [Qemu-devel] [PATCH v2 0/3] migration capability to discard the migrated ram pages, Dr. David Alan Gilbert, 2017/02/14