qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v7 36/42] Host page!=target page: Cleanup bitmap


From: Juan Quintela
Subject: Re: [Qemu-devel] [PATCH v7 36/42] Host page!=target page: Cleanup bitmaps
Date: Tue, 14 Jul 2015 17:01:38 +0200
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/24.5 (gnu/linux)

"Dr. David Alan Gilbert (git)" <address@hidden> wrote:
> From: "Dr. David Alan Gilbert" <address@hidden>
>
> Prior to the start of postcopy, ensure that everything that will
> be transferred later is a whole host-page in size.
>
> This is accomplished by discarding partially transferred host pages
> and marking any that are partially dirty as fully dirty.
>
> Signed-off-by: Dr. David Alan Gilbert <address@hidden>

>  /*
> + * Helper for postcopy_chunk_hostpages where HPS/TPS >= bits-in-long
> + *
> + * !! Untested !!

You continue in the race for best comment ever O:-)


> + */
> +static int hostpage_big_chunk_helper(const char *block_name, void *host_addr,
> +                                     ram_addr_t offset, ram_addr_t length,
> +                                     void *opaque)
> +{
> +    MigrationState *ms = opaque;
> +    unsigned long long_bits = sizeof(long) * 8;
> +    unsigned int host_len = (qemu_host_page_size / TARGET_PAGE_SIZE) /
> +                            long_bits;
> +    unsigned long first_long, last_long, cur_long, current_hp;
> +    unsigned long first = offset >> TARGET_PAGE_BITS;
> +    unsigned long last = (offset + (length - 1)) >> TARGET_PAGE_BITS;
> +
> +    PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
> +                                                           first,
> +                                                           block_name);

Minor

PostcopyDiscardState *pds =
                     postcopy_discard_send_init(ms, first, block_name);

??
> +    first_long = first / long_bits;
> +    last_long = last / long_bits;
> +
> +    /*
> +     * I'm assuming RAMBlocks must start at the start of host pages,
> +     * but I guess they might not use the whole of the host page
> +     */
> +
> +    /* Work along one host page at a time */
> +    for (current_hp = first_long; current_hp <= last_long;
> +         current_hp += host_len) {
> +        bool discard = 0;
> +        bool redirty = 0;
> +        bool has_some_dirty = false;
> +        bool has_some_undirty = false;
> +        bool has_some_sent = false;
> +        bool has_some_unsent = false;
> +
> +        /*
> +         * Check each long of mask for this hp, and see if anything
> +         * needs updating.
> +         */
> +        for (cur_long = current_hp; cur_long < (current_hp + host_len);
> +             cur_long++) {
> +            /* a chunk of sent pages */
> +            unsigned long sdata = ms->sentmap[cur_long];
> +            /* a chunk of dirty pages */
> +            unsigned long ddata = migration_bitmap[cur_long];
> +
> +            if (sdata) {
> +                has_some_sent = true;
> +            }
> +            if (sdata != ~0ul) {
> +                has_some_unsent = true;
> +            }
> +            if (ddata) {
> +                has_some_dirty = true;
> +            }
> +            if (ddata != ~0ul) {
> +                has_some_undirty = true;
> +            }
> +
> +        }

No need for this:

find_first_bit()
find_first_zero_bit()

You are warking all the words when a single search is enough?


> +
> +        if (has_some_sent && has_some_unsent) {
> +            /* Partially sent host page */
> +            discard = true;
> +            redirty = true;
> +        }
> +
> +        if (has_some_dirty && has_some_undirty) {
> +            /* Partially dirty host page */
> +            redirty = true;
> +        }
> +
> +        if (!discard && !redirty) {
> +            /* All consistent - next host page */
> +            continue;
> +        }
> +
> +
> +        /* Now walk the chunks again, sending discards etc */
> +        for (cur_long = current_hp; cur_long < (current_hp + host_len);
> +             cur_long++) {
> +            unsigned long cur_bits = cur_long * long_bits;
> +
> +            /* a chunk of sent pages */
> +            unsigned long sdata = ms->sentmap[cur_long];
> +            /* a chunk of dirty pages */
> +            unsigned long ddata = migration_bitmap[cur_long];
> +
> +            if (discard && sdata) {
> +                /* Tell the destination to discard these pages */
> +                postcopy_discard_send_range(ms, pds, cur_bits,
> +                                            cur_bits + long_bits - 1);
> +                /* And clear them in the sent data structure */
> +                ms->sentmap[cur_long] = 0;
> +            }
> +
> +            if (redirty) {
> +                migration_bitmap[cur_long] = ~0ul;
> +                /* Inc the count of dirty pages */
> +                migration_dirty_pages += ctpopl(~ddata);
> +            }
> +        }

creative use of bitmap_zero(), bitmap_fill() and just doing o whelo
postcopy_discard_send_rand() would not be better?



> +    }
> +
> +    postcopy_discard_send_finish(ms, pds);
> +
> +    return 0;
> +}
> +
> +/*
> + * When working on long chunks of a bitmap where the only valid section
> + * is between start..end (inclusive), generate a mask with only those
> + * valid bits set for the current long word within that bitmask.
> + */
> +static unsigned long make_long_mask(unsigned long start, unsigned long end,
> +                                    unsigned long cur_long)
> +{
> +    unsigned long long_bits = sizeof(long) * 8;
> +    unsigned long long_bits_mask = long_bits - 1;
> +    unsigned long first_long, last_long;
> +    unsigned long mask = ~(unsigned long)0;
> +    first_long = start / long_bits ;
> +    last_long = end / long_bits;
> +
> +    if ((cur_long == first_long) && (start & long_bits_mask)) {
> +        /* e.g. (start & 31) = 3
> +         *         1 << .    -> 2^3
> +         *         . - 1     -> 2^3 - 1 i.e. mask 2..0
> +         *         ~.        -> mask 31..3
> +         */
> +        mask &= ~((((unsigned long)1) << (start & long_bits_mask)) - 1);

           start = start & long_bit_mask;
           bitmap_set(&mask, start, long_bits - start);

> +    }
> +
> +    if ((cur_long == last_long) && ((end & long_bits_mask) != 
> long_bits_mask)) {
> +        /* e.g. (end & 31) = 3
> +         *            .   +1 -> 4
> +         *         1 << .    -> 2^4
> +         *         . -1      -> 2^4 - 1
> +         *                   = mask set 3..0
> +         */
> +        mask &= (((unsigned long)1) << ((end & long_bits_mask) + 1)) - 1;

           bitmap_set(&mask, 0, end);


Adjust +1/-1 depending on how you do limits?

BTW, when I need inspiration about how to code functions that deal with
bits,  I searc for inspiration in bitmap.c.  Sometimes function already
exist, and otherwise, things like BITS_PER_LONG, etc, are already
defined there.

> +    }
> +
> +    return mask;
> +}
> +
> +/*
> + * Utility for the outgoing postcopy code.
> + *
> + * Discard any partially sent host-page size chunks, mark any partially
> + * dirty host-page size chunks as all dirty.
> + *
> + * Returns: 0 on success
> + */
> +static int postcopy_chunk_hostpages(MigrationState *ms)
> +{
> +    struct RAMBlock *block;
> +    unsigned int host_bits = qemu_host_page_size / TARGET_PAGE_SIZE;
> +    unsigned long long_bits = sizeof(long) * 8;
> +    unsigned long host_mask;
> +
> +    assert(is_power_of_2(host_bits));
> +
> +    if (qemu_host_page_size == TARGET_PAGE_SIZE) {
> +        /* Easy case - TPS==HPS - nothing to be done */
> +        return 0;
> +    }
> +
> +    /* Easiest way to make sure we don't resume in the middle of a host-page 
> */
> +    last_seen_block = NULL;
> +    last_sent_block = NULL;

Best names ever.  And you have to blame me at least for the second one
to appear :p


> +
> +    /*
> +     * The currently worst known ratio is ARM that has 1kB target pages, and
> +     * can have 64kB host pages, which is thus inconveniently larger than a 
> long
> +     * on ARM (32bits), and a long is the underlying element of the migration
> +     * bitmaps.
> +     */
> +    if (host_bits >= long_bits) {
> +        /* Deal with the odd case separately */
> +        return qemu_ram_foreach_block(hostpage_big_chunk_helper, ms);
> +    } else {
> +        host_mask =  (1ul << host_bits) - 1;
> +    }

You can remove the else enterily and just put the code at top level.

So, we have three cases:

- host_bits == target_bits -> NOP
- host_bits >= long_bits
- host_bits < long_bits

Couldn't we merge the last two?  they are very similar, and having two
code paths looks too much to me?

> @@ -1405,9 +1664,17 @@ int ram_postcopy_send_discard_bitmap(MigrationState 
> *ms)
>      int ret;
>  
>      rcu_read_lock();
> +
Another not needed.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]