[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 17/19] block/parallels: delay writing to BAT til
From: |
Roman Kagan |
Subject: |
Re: [Qemu-devel] [PATCH 17/19] block/parallels: delay writing to BAT till bdrv_co_flush_to_os |
Date: |
Wed, 14 Jan 2015 16:03:40 +0300 |
User-agent: |
Mutt/1.5.23 (2014-03-12) |
On Tue, Dec 30, 2014 at 01:07:10PM +0300, Denis V. Lunev wrote:
> The idea is that we do not need to immediately sync BAT to the image as
> from the guest point of view there is a possibility that IO is lost
> even in the physical controller until flush command was finished.
> bdrv_co_flush_to_os is exactly the right place for this purpose.
>
> Technically the patch aligns writes into BAT to MAX(bdrv_align, 4096),
> which elliminates read-modify-write transactions on BAT update and
> cache ready-to-write content in a special buffer in BDRVParallelsState.
>
> This buffer possibly contains ParallelsHeader if the first page of the
> image should be modified. The header occupies first 64 bytes of the image
> and the BAT starts immediately after it.
>
> It is also possible that BAT end is not aligned to the cluster size.
> ParallelsHeader->data_off is not specified for this case. We should write
> only part of the cache in that case.
>
> This patch speed ups
> qemu-img create -f parallels -o cluster_size=64k ./1.hds 64G
> qemu-io -f parallels -c "write -P 0x11 0 1024k" 1.hds
> writing from 50-60 Mb/sec to 80-90 Mb/sec on rotational media and
> from 160 Mb/sec to 190 Mb/sec on SSD disk.
>
> Signed-off-by: Denis V. Lunev <address@hidden>
> CC: Kevin Wolf <address@hidden>
> CC: Stefan Hajnoczi <address@hidden>
> ---
> block/parallels.c | 99
> ++++++++++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 94 insertions(+), 5 deletions(-)
>
> diff --git a/block/parallels.c b/block/parallels.c
> index 46cf031..18b9267 100644
> --- a/block/parallels.c
> +++ b/block/parallels.c
> @@ -62,6 +62,11 @@ typedef struct BDRVParallelsState {
> uint32_t *bat;
> unsigned int bat_size;
>
> + uint32_t *bat_cache;
> + unsigned bat_cache_size;
> + int bat_cache_off;
> + int data_off;
> +
> unsigned int tracks;
>
> unsigned int off_multiplier;
> @@ -148,6 +153,17 @@ static int parallels_open(BlockDriverState *bs, QDict
> *options, int flags,
> le32_to_cpus(&s->bat[i]);
>
> qemu_co_mutex_init(&s->lock);
> +
> + s->bat_cache_off = -1;
> + if (bs->open_flags & BDRV_O_RDWR) {
> + s->bat_cache_size = MAX(bdrv_opt_mem_align(bs->file), 4096);
> + s->bat_cache = qemu_blockalign(bs->file, s->bat_cache_size);
> + }
> + s->data_off = le32_to_cpu(s->ph.data_off) * BDRV_SECTOR_SIZE;
> + if (s->data_off == 0) {
> + s->data_off = ROUND_UP(bat_offset(s->bat_size), BDRV_SECTOR_SIZE);
> + }
> +
> return 0;
>
> fail_format:
> @@ -171,10 +187,71 @@ static int64_t seek_to_sector(BDRVParallelsState *s,
> int64_t sector_num)
> return (uint64_t)s->bat[index] * s->off_multiplier + offset;
> }
>
> +static int write_bat_cache(BlockDriverState *bs)
> +{
> + BDRVParallelsState *s = bs->opaque;
> + int size, off;
> +
> + if (s->bat_cache_off == -1) {
> + /* no cached data */
> + return 0;
> + }
> +
> + size = s->bat_cache_size;
> + if (size + s->bat_cache_off > s->data_off) {
> + /* avoid writing to the first data block */
> + size = s->data_off - s->bat_cache_off;
> + }
> +
> + off = s->bat_cache_off;
> + s->bat_cache_off = -1;
> + return bdrv_pwrite(bs->file, off, s->bat_cache, size);
> +}
> +
> +static int cache_bat(BlockDriverState *bs, uint32_t idx, uint32_t
> new_data_off)
> +{
> + int ret, i, off, cache_off;
> + int64_t first_idx, last_idx;
> + BDRVParallelsState *s = bs->opaque;
> + uint32_t *cache = s->bat_cache;
> +
> + off = bat_offset(idx);
> + cache_off = (off / s->bat_cache_size) * s->bat_cache_size;
> +
> + if (s->bat_cache_off != -1 && s->bat_cache_off != cache_off) {
> + ret = write_bat_cache(bs);
> + if (ret < 0) {
> + return ret;
> + }
> + }
> +
> + first_idx = idx - (off - cache_off) / sizeof(uint32_t);
> + last_idx = first_idx + s->bat_cache_size / sizeof(uint32_t);
> + if (first_idx < 0) {
> + memcpy(s->bat_cache, &s->ph, sizeof(s->ph));
> + first_idx = 0;
> + cache = s->bat_cache + sizeof(s->ph) / sizeof(uint32_t);
> + }
> +
> + if (last_idx > s->bat_size) {
> + memset(cache + s->bat_size - first_idx, 0,
> + sizeof(uint32_t) * (last_idx - s->bat_size));
> + }
> +
> + for (i = 0; i < last_idx - first_idx; i++) {
> + cache[i] = cpu_to_le32(s->bat[first_idx + i]);
> + }
You're re-populating the bat_cache on every bat update. Why?
Shouldn't this be done only with empty cache, i.e. under
if (s->bat_cache_off == -1)?
> + cache[idx - first_idx] = cpu_to_le32(new_data_off);
> + s->bat[idx] = new_data_off;
> +
> + s->bat_cache_off = cache_off;
> + return 0;
> +}
> +
> static int64_t allocate_sector(BlockDriverState *bs, int64_t sector_num)
> {
> BDRVParallelsState *s = bs->opaque;
> - uint32_t idx, offset, tmp;
> + uint32_t idx, offset;
> int64_t pos;
> int ret;
>
> @@ -190,17 +267,27 @@ static int64_t allocate_sector(BlockDriverState *bs,
> int64_t sector_num)
>
> pos = bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS;
> bdrv_truncate(bs->file, (pos + s->tracks) << BDRV_SECTOR_BITS);
> - s->bat[idx] = pos / s->off_multiplier;
> -
> - tmp = cpu_to_le32(s->bat[idx]);
>
> - ret = bdrv_pwrite(bs->file, bat_offset(idx), &tmp, sizeof(tmp));
> + ret = cache_bat(bs, idx, pos / s->off_multiplier);
> if (ret < 0) {
> return ret;
> }
> return (uint64_t)s->bat[idx] * s->off_multiplier + offset;
> }
>
> +static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
> +{
> + BDRVParallelsState *s = bs->opaque;
> + int ret;
> +
> + qemu_co_mutex_lock(&s->lock);
> + ret = write_bat_cache(bs);
> + qemu_co_mutex_unlock(&s->lock);
> +
> + return ret;
> +}
> +
> +
> static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
> int nb_sectors)
> {
> @@ -387,6 +474,7 @@ exit:
> static void parallels_close(BlockDriverState *bs)
> {
> BDRVParallelsState *s = bs->opaque;
> + qemu_vfree(s->bat_cache);
Don't you need to flush the bat cache here first?
> g_free(s->bat);
> }
>
> @@ -416,6 +504,7 @@ static BlockDriver bdrv_parallels = {
> .bdrv_open = parallels_open,
> .bdrv_close = parallels_close,
> .bdrv_co_get_block_status = parallels_co_get_block_status,
> + .bdrv_co_flush_to_os = parallels_co_flush_to_os,
> .bdrv_co_readv = parallels_co_readv,
> .bdrv_co_writev = parallels_co_writev,
Roman.
- Re: [Qemu-devel] [PATCH 17/19] block/parallels: delay writing to BAT till bdrv_co_flush_to_os,
Roman Kagan <=