qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v5 34/45] Page request: Consume pages off the po


From: Dr. David Alan Gilbert
Subject: Re: [Qemu-devel] [PATCH v5 34/45] Page request: Consume pages off the post-copy queue
Date: Tue, 16 Jun 2015 11:48:20 +0100
User-agent: Mutt/1.5.23 (2014-03-12)

* David Gibson (address@hidden) wrote:
> On Wed, Feb 25, 2015 at 04:51:57PM +0000, Dr. David Alan Gilbert (git) wrote:
> > From: "Dr. David Alan Gilbert" <address@hidden>
> > 
> > When transmitting RAM pages, consume pages that have been queued by
> > MIG_RPCOMM_REQPAGE commands and send them ahead of normal page scanning.
> > 
> > Note:
> >   a) After a queued page the linear walk carries on from after the
> > unqueued page; there is a reasonable chance that the destination
> > was about to ask for other closeby pages anyway.
> > 
> >   b) We have to be careful of any assumptions that the page walking
> > code makes, in particular it does some short cuts on its first linear
> > walk that break as soon as we do a queued page.
> > 
> > Signed-off-by: Dr. David Alan Gilbert <address@hidden>
> > ---
> >  arch_init.c  | 154 
> > +++++++++++++++++++++++++++++++++++++++++++++++++----------
> >  trace-events |   2 +
> >  2 files changed, 131 insertions(+), 25 deletions(-)
> > 
> > diff --git a/arch_init.c b/arch_init.c
> > index 9d8fc6b..acf65e1 100644
> > --- a/arch_init.c
> > +++ b/arch_init.c
> > @@ -328,6 +328,7 @@ static RAMBlock *last_seen_block;
> >  /* This is the last block from where we have sent data */
> >  static RAMBlock *last_sent_block;
> >  static ram_addr_t last_offset;
> > +static bool last_was_from_queue;
> >  static unsigned long *migration_bitmap;
> >  static uint64_t migration_dirty_pages;
> >  static uint32_t last_version;
> > @@ -461,6 +462,19 @@ static inline bool 
> > migration_bitmap_set_dirty(ram_addr_t addr)
> >      return ret;
> >  }
> >  
> > +static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
> > +{
> > +    bool ret;
> > +    int nr = addr >> TARGET_PAGE_BITS;
> > +
> > +    ret = test_and_clear_bit(nr, migration_bitmap);
> > +
> > +    if (ret) {
> > +        migration_dirty_pages--;
> > +    }
> > +    return ret;
> > +}
> > +
> >  static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t 
> > length)
> >  {
> >      ram_addr_t addr;
> > @@ -669,6 +683,39 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, 
> > ram_addr_t offset,
> >  }
> >  
> >  /*
> > + * Unqueue a page from the queue fed by postcopy page requests
> > + *
> > + * Returns:      The RAMBlock* to transmit from (or NULL if the queue is 
> > empty)
> > + *      ms:      MigrationState in
> > + *  offset:      the byte offset within the RAMBlock for the start of the 
> > page
> > + * ram_addr_abs: global offset in the dirty/sent bitmaps
> > + */
> > +static RAMBlock *ram_save_unqueue_page(MigrationState *ms, ram_addr_t 
> > *offset,
> > +                                       ram_addr_t *ram_addr_abs)
> > +{
> > +    RAMBlock *result = NULL;
> > +    qemu_mutex_lock(&ms->src_page_req_mutex);
> > +    if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
> > +        struct MigrationSrcPageRequest *entry =
> > +                                    QSIMPLEQ_FIRST(&ms->src_page_requests);
> > +        result = entry->rb;
> > +        *offset = entry->offset;
> > +        *ram_addr_abs = (entry->offset + entry->rb->offset) & 
> > TARGET_PAGE_MASK;
> > +
> > +        if (entry->len > TARGET_PAGE_SIZE) {
> > +            entry->len -= TARGET_PAGE_SIZE;
> > +            entry->offset += TARGET_PAGE_SIZE;
> > +        } else {
> > +            QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
> > +            g_free(entry);
> > +        }
> > +    }
> > +    qemu_mutex_unlock(&ms->src_page_req_mutex);
> > +
> > +    return result;
> > +}
> > +
> > +/*
> >   * Queue the pages for transmission, e.g. a request from postcopy 
> > destination
> >   *   ms: MigrationStatus in which the queue is held
> >   *   rbname: The RAMBlock the request is for - may be NULL (to mean reuse 
> > last)
> > @@ -732,46 +779,102 @@ int ram_save_queue_pages(MigrationState *ms, const 
> > char *rbname,
> >  
> >  static int ram_find_and_save_block(QEMUFile *f, bool last_stage)
> >  {
> > +    MigrationState *ms = migrate_get_current();
> >      RAMBlock *block = last_seen_block;
> > +    RAMBlock *tmpblock;
> >      ram_addr_t offset = last_offset;
> > +    ram_addr_t tmpoffset;
> >      bool complete_round = false;
> >      int bytes_sent = 0;
> > -    MemoryRegion *mr;
> >      ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
> >                                   ram_addr_t space */
> > +    unsigned long hps = sysconf(_SC_PAGESIZE);
> >  
> > -    if (!block)
> > +    if (!block) {
> >          block = QTAILQ_FIRST(&ram_list.blocks);
> > +        last_was_from_queue = false;
> > +    }
> >  
> > -    while (true) {
> > -        mr = block->mr;
> > -        offset = migration_bitmap_find_and_reset_dirty(mr, offset,
> > -                                                       &dirty_ram_abs);
> > -        if (complete_round && block == last_seen_block &&
> > -            offset >= last_offset) {
> > -            break;
> > +    while (true) { /* Until we send a block or run out of stuff to send */
> > +        tmpblock = NULL;
> > +
> > +        /*
> > +         * Don't break host-page chunks up with queue items
> > +         * so only unqueue if,
> > +         *   a) The last item came from the queue anyway
> > +         *   b) The last sent item was the last target-page in a host page
> > +         */
> > +        if (last_was_from_queue || !last_sent_block ||
> > +            ((last_offset & (hps - 1)) == (hps - TARGET_PAGE_SIZE))) {
> > +            tmpblock = ram_save_unqueue_page(ms, &tmpoffset, 
> > &dirty_ram_abs);
> 
> This test for whether we've completed a host page is pretty awkward.
> I think it would be cleaner to have an inner loop / helper function
> that completes sending an entire host page (whether requested or
> scanned), before allowing the outer loop to even come back to here to
> reconsider the queue.

I've reworked that in the v7 series I've just posted; please see if it's
more to your taste (I've not tested it on a machine with bigger pages
yet though).  (This patch is 32/42 in v7).

Dave

> 
> >          }
> > -        if (offset >= block->used_length) {
> > -            offset = 0;
> > -            block = QTAILQ_NEXT(block, next);
> > -            if (!block) {
> > -                block = QTAILQ_FIRST(&ram_list.blocks);
> > -                complete_round = true;
> > -                ram_bulk_stage = false;
> > +
> > +        if (tmpblock) {
> > +            /* We've got a block from the postcopy queue */
> > +            trace_ram_find_and_save_block_postcopy(tmpblock->idstr,
> > +                                                   (uint64_t)tmpoffset,
> > +                                                   
> > (uint64_t)dirty_ram_abs);
> > +            /*
> > +             * We're sending this page, and since it's postcopy nothing 
> > else
> > +             * will dirty it, and we must make sure it doesn't get sent 
> > again.
> > +             */
> > +            if (!migration_bitmap_clear_dirty(dirty_ram_abs)) {
> > +                trace_ram_find_and_save_block_postcopy_not_dirty(
> > +                    tmpblock->idstr, (uint64_t)tmpoffset,
> > +                    (uint64_t)dirty_ram_abs,
> > +                    test_bit(dirty_ram_abs >> TARGET_PAGE_BITS, 
> > ms->sentmap));
> > +
> > +                continue;
> >              }
> > +            /*
> > +             * As soon as we start servicing pages out of order, then we 
> > have
> > +             * to kill the bulk stage, since the bulk stage assumes
> > +             * in (migration_bitmap_find_and_reset_dirty) that every page 
> > is
> > +             * dirty, that's no longer true.
> > +             */
> > +            ram_bulk_stage = false;
> > +            /*
> > +             * We mustn't change block/offset unless it's to a valid one
> > +             * otherwise we can go down some of the exit cases in the 
> > normal
> > +             * path.
> > +             */
> > +            block = tmpblock;
> > +            offset = tmpoffset;
> > +            last_was_from_queue = true;
> 
> Hrm.  So now block can change during the execution of
> ram_save_block(), which really suggests that splitting by block is no
> longer a sensible subdivision of the loop surrounding ram_save_block.
> I think it would make more sense to replace that entire surrounding
> loop, so that the logic is essentially
> 
>     while (not finished) {
>         if (something is queued) {
>           send that
>         } else {
>           scan for an unsent block and offset
>           send that instead
>       }
> 
> 
> >          } else {
> > -            bytes_sent = ram_save_page(f, block, offset, last_stage);
> > -
> > -            /* if page is unmodified, continue to the next */
> > -            if (bytes_sent > 0) {
> > -                MigrationState *ms = migrate_get_current();
> > -                if (ms->sentmap) {
> > -                    set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, 
> > ms->sentmap);
> > +            MemoryRegion *mr;
> > +            /* priority queue empty, so just search for something dirty */
> > +            mr = block->mr;
> > +            offset = migration_bitmap_find_and_reset_dirty(mr, offset,
> > +                                                           &dirty_ram_abs);
> > +            if (complete_round && block == last_seen_block &&
> > +                offset >= last_offset) {
> > +                break;
> > +            }
> > +            if (offset >= block->used_length) {
> > +                offset = 0;
> > +                block = QTAILQ_NEXT(block, next);
> > +                if (!block) {
> > +                    block = QTAILQ_FIRST(&ram_list.blocks);
> > +                    complete_round = true;
> > +                    ram_bulk_stage = false;
> >                  }
> > +                continue; /* pick an offset in the new block */
> > +            }
> > +            last_was_from_queue = false;
> > +        }
> >  
> > -                last_sent_block = block;
> > -                break;
> > +        /* We have a page to send, so send it */
> > +        bytes_sent = ram_save_page(f, block, offset, last_stage);
> > +
> > +        /* if page is unmodified, continue to the next */
> > +        if (bytes_sent > 0) {
> > +            if (ms->sentmap) {
> > +                set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap);
> >              }
> > +
> > +            last_sent_block = block;
> > +            break;
> >          }
> >      }
> >      last_seen_block = block;
> > @@ -865,6 +968,7 @@ static void reset_ram_globals(void)
> >      last_offset = 0;
> >      last_version = ram_list.version;
> >      ram_bulk_stage = true;
> > +    last_was_from_queue = false;
> >  }
> >  
> >  #define MAX_WAIT 50 /* ms, half buffered_file limit */
> > diff --git a/trace-events b/trace-events
> > index 8a0d70d..781cf5c 100644
> > --- a/trace-events
> > +++ b/trace-events
> > @@ -1217,6 +1217,8 @@ qemu_file_fclose(void) ""
> >  migration_bitmap_sync_start(void) ""
> >  migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
> >  migration_throttle(void) ""
> > +ram_find_and_save_block_postcopy(const char *block_name, uint64_t 
> > tmp_offset, uint64_t ram_addr) "%s/%" PRIx64 " ram_addr=%" PRIx64
> > +ram_find_and_save_block_postcopy_not_dirty(const char *block_name, 
> > uint64_t tmp_offset, uint64_t ram_addr, int sent) "%s/%" PRIx64 " 
> > ram_addr=%" PRIx64 " (sent=%d)"
> >  ram_postcopy_send_discard_bitmap(void) ""
> >  ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: 
> > start: %zx len: %zx"
> >  
> 
> -- 
> David Gibson                  | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au        | minimalist, thank you.  NOT _the_ 
> _other_
>                               | _way_ _around_!
> http://www.ozlabs.org/~dgibson


--
Dr. David Alan Gilbert / address@hidden / Manchester, UK



reply via email to

[Prev in Thread] Current Thread [Next in Thread]