qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v4 29/47] Postcopy page-map-incoming (PMI) struc


From: David Gibson
Subject: Re: [Qemu-devel] [PATCH v4 29/47] Postcopy page-map-incoming (PMI) structure
Date: Tue, 4 Nov 2014 14:09:37 +1100
User-agent: Mutt/1.5.23 (2014-03-12)

On Fri, Oct 03, 2014 at 06:47:35PM +0100, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <address@hidden>
> 
> The PMI holds the state of each page on the incoming side,
> so that we can tell if the page is missing, already received
> or there is a request outstanding for it.
> 
> Signed-off-by: Dr. David Alan Gilbert <address@hidden>

Reviewed-by: David Gibson <address@hidden>

Though there are a couple of minor comments below:

> ---
>  include/migration/migration.h    |  19 ++++
>  include/migration/postcopy-ram.h |  12 +++
>  include/qemu/typedefs.h          |   1 +
>  postcopy-ram.c                   | 220 
> +++++++++++++++++++++++++++++++++++++++
>  4 files changed, 252 insertions(+)
> 
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index 2ff9d35..1405a15 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -57,6 +57,24 @@ struct MigrationRetPathState {
>  
>  typedef struct MigrationState MigrationState;
>  
> +/* Postcopy page-map-incoming - data about each page on the inbound side */
> +
> +typedef enum {
> +   POSTCOPY_PMI_MISSING,   /* page hasn't yet been received */
> +   POSTCOPY_PMI_REQUESTED, /* Kernel asked for a page, but we've not got it 
> */
> +   POSTCOPY_PMI_RECEIVED   /* We've got the page */
> +} PostcopyPMIState;
> +
> +struct PostcopyPMI {
> +    QemuMutex      mutex;
> +    unsigned long *received_map;  /* Pages that we have received */
> +    unsigned long *requested_map; /* Pages that we're sending a request for 
> */
> +    unsigned long  host_mask;     /* A mask with enough bits set to cover one
> +                                     host page in the PMI */
> +    unsigned long  host_bits;     /* The number of bits in the map 
> representing
> +                                     one host page */
> +};
> +
>  /* State for the incoming migration */
>  struct MigrationIncomingState {
>      QEMUFile *file;
> @@ -71,6 +89,7 @@ struct MigrationIncomingState {
>  
>      QEMUFile *return_path;
>      QemuMutex      rp_mutex;    /* We send replies from multiple threads */
> +    PostcopyPMI    postcopy_pmi;
>  };
>  
>  MigrationIncomingState *migration_incoming_get_current(void);
> diff --git a/include/migration/postcopy-ram.h 
> b/include/migration/postcopy-ram.h
> index dcd1afa..addb88a 100644
> --- a/include/migration/postcopy-ram.h
> +++ b/include/migration/postcopy-ram.h
> @@ -13,7 +13,19 @@
>  #ifndef QEMU_POSTCOPY_RAM_H
>  #define QEMU_POSTCOPY_RAM_H
>  
> +#include "migration/migration.h"
> +
>  /* Return 0 if the host supports everything we need to do postcopy-ram */
>  int postcopy_ram_hosttest(void);
>  
> +/*
> + * In 'advise' mode record that a page has been received.
> + */
> +void postcopy_hook_early_receive(MigrationIncomingState *mis,
> +                                 size_t bitmap_index);
> +
> +void postcopy_pmi_destroy(MigrationIncomingState *mis);
> +void postcopy_pmi_discard_range(MigrationIncomingState *mis,
> +                                size_t start, size_t npages);
> +void postcopy_pmi_dump(MigrationIncomingState *mis);
>  #endif
> diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
> index 8539de6..61b330c 100644
> --- a/include/qemu/typedefs.h
> +++ b/include/qemu/typedefs.h
> @@ -77,6 +77,7 @@ typedef struct QEMUSGList QEMUSGList;
>  typedef struct SHPCDevice SHPCDevice;
>  typedef struct FWCfgState FWCfgState;
>  typedef struct PcGuestInfo PcGuestInfo;
> +typedef struct PostcopyPMI PostcopyPMI;
>  typedef struct Range Range;
>  typedef struct AdapterInfo AdapterInfo;
>  
> diff --git a/postcopy-ram.c b/postcopy-ram.c
> index bba5c71..210585c 100644
> --- a/postcopy-ram.c
> +++ b/postcopy-ram.c
> @@ -23,6 +23,9 @@
>  #include "qemu-common.h"
>  #include "migration/migration.h"
>  #include "migration/postcopy-ram.h"
> +#include "sysemu/sysemu.h"
> +#include "qemu/bitmap.h"
> +#include "qemu/error-report.h"
>  
>  //#define DEBUG_POSTCOPY
>  
> @@ -82,6 +85,216 @@
>  #if defined(__linux__) && defined(MADV_USERFAULT) && \
>                            defined(__NR_remap_anon_pages)
>  
> +/* ---------------------------------------------------------------------- */
> +/* Postcopy pagemap-inbound (pmi) - data structures that record the       */
> +/* state of each page used by the inbound postcopy                        */
> +/* It's a pair of bitmaps (of the same structure as the migration bitmaps)*/
> +/* holding one bit per target-page, although all operations work on host  */
> +/* pages.                                                                 */
> +__attribute__ (( unused )) /* Until later in patch series */
> +static void postcopy_pmi_init(MigrationIncomingState *mis, size_t ram_pages)
> +{
> +    unsigned int tpb = qemu_target_page_bits();
> +    unsigned long host_bits;
> +
> +    qemu_mutex_init(&mis->postcopy_pmi.mutex);
> +    mis->postcopy_pmi.received_map = bitmap_new(ram_pages);
> +    mis->postcopy_pmi.requested_map = bitmap_new(ram_pages);
> +    bitmap_clear(mis->postcopy_pmi.received_map, 0, ram_pages);
> +    bitmap_clear(mis->postcopy_pmi.requested_map, 0, ram_pages);
> +    /*
> +     * Each bit in the map represents one 'target page' which is no bigger
> +     * than a host page but can be smaller.  It's useful to have some
> +     * convenience masks for later

So, there's no inherent reason a target page couldn't be bigger than a
host page.  It's fair enough not to handle that case for now, but
something somewhere should probably verify that it's no the case.

> +     */
> +
> +    /*
> +     * The number of bits one host page takes up in the bitmap
> +     * e.g. on a 64k host page, 4k Target page, host_bits=64/4=16
> +     */
> +    host_bits = sysconf(_SC_PAGESIZE) / (1ul << tpb);
> +    /* Should be a power of 2 */
> +    assert(host_bits && !(host_bits & (host_bits - 1)));
> +    /*
> +     * If the host_bits isn't a division of the number of bits in long
> +     * then the code gets a lot more complex; disallow for now
> +     * (I'm not aware of a system where it's true anyway)
> +     */
> +    assert(((sizeof(long) * 8) % host_bits) == 0);
> +
> +    mis->postcopy_pmi.host_bits = host_bits;
> +    /* A mask, starting at bit 0, containing host_bits continuous set bits */
> +    mis->postcopy_pmi.host_mask =  (1ul << host_bits) - 1;
> +
> +    assert((ram_pages % host_bits) == 0);
> +}
> +
> +void postcopy_pmi_destroy(MigrationIncomingState *mis)
> +{
> +    if (mis->postcopy_pmi.received_map) {
> +        g_free(mis->postcopy_pmi.received_map);

g_free() is safe to call on NULL anyway, isn't it?

> +        mis->postcopy_pmi.received_map = NULL;
> +    }
> +    if (mis->postcopy_pmi.requested_map) {
> +        g_free(mis->postcopy_pmi.requested_map);
> +        mis->postcopy_pmi.requested_map = NULL;
> +    }
> +    qemu_mutex_destroy(&mis->postcopy_pmi.mutex);
> +}
> +
> +/*
> + * Mark a set of pages in the PMI as being clear; this is used by the discard
> + * at the start of postcopy, and before the postcopy stream starts.
> + */
> +void postcopy_pmi_discard_range(MigrationIncomingState *mis,
> +                                size_t start, size_t npages)
> +{
> +    bitmap_clear(mis->postcopy_pmi.received_map, start, npages);
> +}
> +
> +/*
> + * Test a host-page worth of bits in the map starting at bitmap_index
> + * The bits should all be consistent
> + */
> +static bool test_hpbits(MigrationIncomingState *mis,
> +                        size_t bitmap_index, unsigned long *map)
> +{
> +    long masked;
> +
> +    assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0);
> +
> +    masked = (map[BIT_WORD(bitmap_index)] >>
> +               (bitmap_index % BITS_PER_LONG)) &
> +             mis->postcopy_pmi.host_mask;
> +
> +    assert((masked == 0) || (masked == mis->postcopy_pmi.host_mask));
> +    return !!masked;
> +}
> +
> +/*
> + * Set host-page worth of bits in the map starting at bitmap_index
> + */
> +static void set_hpbits(MigrationIncomingState *mis,
> +                       size_t bitmap_index, unsigned long *map)
> +{
> +    assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0);
> +
> +    map[BIT_WORD(bitmap_index)] |= mis->postcopy_pmi.host_mask <<
> +                                    (bitmap_index % BITS_PER_LONG);
> +}
> +
> +/*
> + * Clear host-page worth of bits in the map starting at bitmap_index
> + */
> +static void clear_hpbits(MigrationIncomingState *mis,
> +                         size_t bitmap_index, unsigned long *map)
> +{
> +    assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0);
> +
> +    map[BIT_WORD(bitmap_index)] &= ~(mis->postcopy_pmi.host_mask <<
> +                                    (bitmap_index % BITS_PER_LONG));
> +}
> +
> +/*
> + * Retrieve the state of the given page
> + * Note: This version for use by callers already holding the lock
> + */
> +static PostcopyPMIState postcopy_pmi_get_state_nolock(
> +                            MigrationIncomingState *mis,
> +                            size_t bitmap_index)
> +{
> +    bool received, requested;
> +
> +    received = test_hpbits(mis, bitmap_index, 
> mis->postcopy_pmi.received_map);
> +    requested = test_hpbits(mis, bitmap_index, 
> mis->postcopy_pmi.requested_map);
> +
> +    if (received) {
> +        assert(!requested);

Clearing the requested bit when you set the received bit seems a bit
pointless.  (requested && received) isn't meaningfully different from
(!requested && received) but there seems no reason to go to extra
trouble to avoid that state, and having the record might be
interesting for gathering statistics.

> +        return POSTCOPY_PMI_RECEIVED;
> +    } else {
> +        return requested ? POSTCOPY_PMI_REQUESTED : POSTCOPY_PMI_MISSING;
> +    }
> +}
> +
> +/* Retrieve the state of the given page */
> +__attribute__ (( unused )) /* Until later in patch series */
> +static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis,
> +                                               size_t bitmap_index)
> +{
> +    PostcopyPMIState ret;
> +    qemu_mutex_lock(&mis->postcopy_pmi.mutex);
> +    ret = postcopy_pmi_get_state_nolock(mis, bitmap_index);
> +    qemu_mutex_unlock(&mis->postcopy_pmi.mutex);
> +
> +    return ret;
> +}
> +
> +/*
> + * Set the page state to the given state if the previous state was as 
> expected
> + * Return the actual previous state.
> + */
> +__attribute__ (( unused )) /* Until later in patch series */
> +static PostcopyPMIState postcopy_pmi_change_state(MigrationIncomingState 
> *mis,
> +                                           size_t bitmap_index,
> +                                           PostcopyPMIState expected_state,
> +                                           PostcopyPMIState new_state)
> +{
> +    PostcopyPMIState old_state;
> +
> +    qemu_mutex_lock(&mis->postcopy_pmi.mutex);
> +    old_state = postcopy_pmi_get_state_nolock(mis, bitmap_index);
> +
> +    if (old_state == expected_state) {
> +        switch (new_state) {
> +        case POSTCOPY_PMI_MISSING:
> +          assert(0); /* This shouldn't actually happen - use discard_range */
> +          break;
> +
> +        case POSTCOPY_PMI_REQUESTED:
> +          assert(old_state == POSTCOPY_PMI_MISSING);
> +          set_hpbits(mis, bitmap_index, mis->postcopy_pmi.requested_map);
> +          break;
> +
> +        case POSTCOPY_PMI_RECEIVED:
> +          assert(old_state == POSTCOPY_PMI_MISSING ||
> +                 old_state == POSTCOPY_PMI_REQUESTED);
> +          set_hpbits(mis, bitmap_index, mis->postcopy_pmi.received_map);
> +          clear_hpbits(mis, bitmap_index, mis->postcopy_pmi.requested_map);
> +          break;
> +        }
> +    }
> +
> +    qemu_mutex_unlock(&mis->postcopy_pmi.mutex);
> +    return old_state;
> +}
> +
> +/*
> + * Useful when debugging postcopy, although if it failed early the
> + * received map can be quite sparse and thus big when dumped.
> + */
> +void postcopy_pmi_dump(MigrationIncomingState *mis)
> +{
> +    fprintf(stderr, "postcopy_pmi_dump: requested\n");
> +    ram_debug_dump_bitmap(mis->postcopy_pmi.requested_map, false);
> +    fprintf(stderr, "postcopy_pmi_dump: received\n");
> +    ram_debug_dump_bitmap(mis->postcopy_pmi.received_map, true);
> +    fprintf(stderr, "postcopy_pmi_dump: end\n");
> +}
> +
> +/* Called by ram_load prior to mapping the page */
> +void postcopy_hook_early_receive(MigrationIncomingState *mis,
> +                                 size_t bitmap_index)
> +{
> +    if (mis->postcopy_ram_state == POSTCOPY_RAM_INCOMING_ADVISE) {

A silent no-op if you're not in the expected migration phase doesn't
seem right.  Should this be an assert() instead?

> +        /*
> +         * If we're in precopy-advise mode we need to track received pages 
> even
> +         * though we don't need to place pages atomically yet.
> +         * In advise mode there's only a single thread, so don't need locks
> +         */
> +        set_bit(bitmap_index, mis->postcopy_pmi.received_map);
> +    }
> +}
> +
>  int postcopy_ram_hosttest(void)
>  {
>      /* TODO: Needs guarding with CONFIG_ once we have libc's that have the 
> defs
> @@ -156,5 +369,12 @@ int postcopy_ram_hosttest(void)
>      return -1;
>  }
>  
> +/* Called by ram_load prior to mapping the page */
> +void postcopy_hook_early_receive(MigrationIncomingState *mis,
> +                                 size_t bitmap_index)
> +{
> +    /* We don't support postcopy so don't care */
> +}
> +
>  #endif
>  

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: pgpm51XnAapXd.pgp
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]