qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC PATCH v2] Support vhd type VHD_DIFFERENCING


From: 21G
Subject: [Qemu-devel] [RFC PATCH v2] Support vhd type VHD_DIFFERENCING
Date: Sat, 6 Sep 2014 02:55:42 +0800

Now qemu only supports vhd type VHD_FIXED and VHD_DYNAMIC,
so qemu can't read snapshot volume of vhd, and can't support
other storage features of vhd file.

This patch add read parent information in function "vpc_open",
read bitmap in "vpc_read", and change bitmap in "vpc_write".

Signed-off-by: Xiaodong Gong <address@hidden>
---
 block/vpc.c | 329 +++++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 261 insertions(+), 68 deletions(-)

diff --git a/block/vpc.c b/block/vpc.c
index c024b4c..3ba0d57 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -33,13 +33,18 @@
 /**************************************************************/
 
 #define HEADER_SIZE 512
+#define DYNAMIC_HEADER_SIZE 1024
+#define PARENT_LOCATOR_NUM 8
+#define PARENT_PREFIX_LEN 7 /* such as file:// */
+#define TBBATMAP_HEAD_SIZE 28
+#define MACX 0x5863614d /* big endian */
 
 //#define CACHE
 
 enum vhd_type {
     VHD_FIXED           = 2,
     VHD_DYNAMIC         = 3,
-    VHD_DIFFERENCING    = 4,
+    VHD_DIFF            = 4,
 };
 
 // Seconds since Jan 1, 2000 0:00:00 (UTC)
@@ -138,6 +143,15 @@ typedef struct BDRVVPCState {
     Error *migration_blocker;
 } BDRVVPCState;
 
+typedef struct vhd_tdbatmap_header {
+    char magic[8]; /* always "tdbatmap" */
+
+    uint64_t batmap_offset;
+    uint32_t batmap_size;
+    uint32_t batmap_version;
+    uint32_t checksum;
+} QEMU_PACKED VHDTdBatmapHeader;
+
 static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 {
     uint32_t res = 0;
@@ -153,7 +167,7 @@ static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
     if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
- return 100;
+        return 100;
     return 0;
 }
 
@@ -164,11 +178,17 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     int i;
     VHDFooter *footer;
     VHDDynDiskHeader *dyndisk_header;
-    uint8_t buf[HEADER_SIZE];
+    uint8_t buf[DYNAMIC_HEADER_SIZE];
+    uint8_t tdbatmap_header_buf[TBBATMAP_HEAD_SIZE];
     uint32_t checksum;
     uint64_t computed_size;
-    int disk_type = VHD_DYNAMIC;
+    uint32_t disk_type;
     int ret;
+    VHDTdBatmapHeader *tdbatmap_header;
+    int parent_locator_offset = 0;
+    int64_t data_offset = 0;
+    int data_length = 0;
+    uint32_t platform;
 
     ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
     if (ret < 0) {
@@ -176,6 +196,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     footer = (VHDFooter *) s->footer_buf;
+    disk_type = be32_to_cpu(footer->type);
+
     if (strncmp(footer->creator, "conectix", 8)) {
         int64_t offset = bdrv_getlength(bs->file);
         if (offset < 0) {
@@ -230,9 +252,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    if (disk_type == VHD_DYNAMIC) {
+    if (disk_type == VHD_DYNAMIC || disk_type == VHD_DIFF) {
         ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
-                         HEADER_SIZE);
+                         DYNAMIC_HEADER_SIZE);
         if (ret < 0) {
             goto fail;
         }
@@ -286,6 +308,56 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
         s->free_data_block_offset =
             (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511;
 
+        /* Read tdbatmap header by offset */
+        ret = bdrv_pread(bs->file, s->free_data_block_offset,
+            tdbatmap_header_buf, TBBATMAP_HEAD_SIZE);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        tdbatmap_header = (VHDTdBatmapHeader *) tdbatmap_header_buf;
+        if (!strncmp(tdbatmap_header->magic, "tdbatmap", 8)) {
+            s->free_data_block_offset =
+                be32_to_cpu(tdbatmap_header->batmap_size) * 512
+                + be64_to_cpu(tdbatmap_header->batmap_offset);
+        }
+
+        /* Read backing file location from dyn header table */
+        if (dyndisk_header->parent_name[0] || dyndisk_header->parent_name[1]) {
+            for (i = 0; i < PARENT_LOCATOR_NUM; i++) {
+                data_offset =
+                    be64_to_cpu(dyndisk_header->parent_locator[i].data_offset);
+                data_length =
+                    be32_to_cpu(dyndisk_header->parent_locator[i].data_length);
+                platform = dyndisk_header->parent_locator[i].platform;
+
+                if (MACX == platform) {
+                    if (data_offset + PARENT_PREFIX_LEN >
+                        s->max_table_entries * s->block_size) {
+                            goto fail;
+                    }
+                        if (data_length - PARENT_PREFIX_LEN > 1024) {
+                            goto fail;
+                    }
+                    ret = bdrv_pread(bs->file, data_offset + PARENT_PREFIX_LEN,
+                        bs->backing_file, data_length - PARENT_PREFIX_LEN);
+                    if (ret < 0) {
+                        goto fail;
+                    }
+
+                    bs->backing_file[data_length - PARENT_PREFIX_LEN] = '\0';
+                }
+
+                if (data_offset > parent_locator_offset) {
+                    parent_locator_offset = data_offset;
+                }
+            }
+        }
+
+        if (parent_locator_offset + 512 > s->free_data_block_offset) {
+            s->free_data_block_offset = parent_locator_offset + 512;
+        }
+
         for (i = 0; i < s->max_table_entries; i++) {
             be32_to_cpus(&s->pagetable[i]);
             if (s->pagetable[i] != 0xFFFFFFFF) {
@@ -363,19 +435,6 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
     bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
     block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
 
-    // We must ensure that we don't write to any sectors which are marked as
-    // unused in the bitmap. We get away with setting all bits in the block
-    // bitmap each time we write to a new block. This might cause Virtual PC to
-    // miss sparse read optimization, but it's not a problem in terms of
-    // correctness.
-    if (write && (s->last_bitmap_offset != bitmap_offset)) {
-        uint8_t bitmap[s->bitmap_size];
-
-        s->last_bitmap_offset = bitmap_offset;
-        memset(bitmap, 0xff, s->bitmap_size);
-        bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
-    }
-
 //    printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
 // sector_num, pagetable_index, pageentry_index,
 // bitmap_offset, block_offset);
@@ -412,6 +471,53 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
 }
 
 /*
+ * Returns the absolute byte offset of the given sector in the differencing
+ * image file.
+ *
+ * If the sector is not allocated, -1 is returned instead. If the sector is
+ * allocated in the backing file, -2 is returned. If the sector is allocated
+ * in current file, the block offset is returned.
+ */
+static inline int64_t get_sector_offset_diff(BlockDriverState *bs,
+    int64_t sector_num)
+{
+    BDRVVPCState *s = bs->opaque;
+    uint64_t offset = sector_num << BDRV_SECTOR_BITS;
+    uint64_t bitmap_offset;
+    uint64_t block_offset;
+    uint32_t pagetable_index, pageentry_index;
+    uint32_t bitmap_index, bitmapentry_index;
+    uint8_t bitmap[s->bitmap_size];
+    int ret;
+
+    pagetable_index = offset / s->block_size;
+    if (pagetable_index >= s->max_table_entries) {
+        return -1;
+    } else if (0xffffffff == s->pagetable[pagetable_index]) {
+        return -2;
+    }
+
+    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
+    if (bitmap_offset > s->max_table_entries * s->block_size) {
+        return -1;
+    }
+    ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+    if (ret < 0) {
+        return -1;
+    }
+
+    pageentry_index = (offset % s->block_size) / 512;
+    bitmap_index = pageentry_index / 8;
+    bitmapentry_index = 7 - pageentry_index % 8;
+    if (bitmap[bitmap_index] & 0x1 << bitmapentry_index) {
+        block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
+        return block_offset;
+    } else {
+         return -2;
+    }
+}
+
+/*
  * Writes the footer to the end of the image file. This is needed when the
  * file grows as it overwrites the old footer
  *
@@ -437,7 +543,8 @@ static int rewrite_footer(BlockDriverState* bs)
  *
  * Returns the sectors' offset in the image file on success and < 0 on error
  */
-static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
+static int64_t alloc_block(BlockDriverState *bs, int64_t sector_num,
+    bool isdiff)
 {
     BDRVVPCState *s = bs->opaque;
     int64_t bat_offset;
@@ -457,7 +564,11 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
     s->pagetable[index] = s->free_data_block_offset / 512;
 
     // Initialize the block's bitmap
-    memset(bitmap, 0xff, s->bitmap_size);
+    if (isdiff) {
+        memset(bitmap, 0x0, s->bitmap_size);
+    } else {
+        memset(bitmap, 0xff, s->bitmap_size);
+    }
     ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
         s->bitmap_size);
     if (ret < 0) {
@@ -501,36 +612,62 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
                     uint8_t *buf, int nb_sectors)
 {
     BDRVVPCState *s = bs->opaque;
-    int ret;
-    int64_t offset;
-    int64_t sectors, sectors_per_block;
     VHDFooter *footer = (VHDFooter *) s->footer_buf;
+    int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
+    int64_t offset, sectors;
+    int ret;
 
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    switch (be32_to_cpu(footer->type)) {
+    case VHD_FIXED:
         return bdrv_read(bs->file, sector_num, buf, nb_sectors);
-    }
-    while (nb_sectors > 0) {
-        offset = get_sector_offset(bs, sector_num, 0);
-
-        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
-        sectors = sectors_per_block - (sector_num % sectors_per_block);
-        if (sectors > nb_sectors) {
-            sectors = nb_sectors;
-        }
+    case VHD_DYNAMIC:
+        while (nb_sectors > 0) {
+            sectors = sectors_per_block - (sector_num % sectors_per_block);
+            if (sectors > nb_sectors) {
+                sectors = nb_sectors;
+            }
 
-        if (offset == -1) {
-            memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
-        } else {
-            ret = bdrv_pread(bs->file, offset, buf,
-                sectors * BDRV_SECTOR_SIZE);
-            if (ret != sectors * BDRV_SECTOR_SIZE) {
-                return -1;
+            offset = get_sector_offset(bs, sector_num, 0);
+            if (-1 == offset) {
+                memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
+            } else {
+                ret = bdrv_pread(bs->file, offset, buf,
+                    sectors * BDRV_SECTOR_SIZE);
+                if (ret != sectors * BDRV_SECTOR_SIZE) {
+                    return -1;
+                }
             }
+
+            nb_sectors -= sectors;
+            sector_num += sectors;
+            buf += sectors * BDRV_SECTOR_SIZE;
         }
+        break;
+    case VHD_DIFF:
+        while (nb_sectors > 0) {
+            offset = get_sector_offset_diff(bs, sector_num);
+            if (-1 == offset) {
+                memset(buf, 0, BDRV_SECTOR_SIZE);
+            } else if (-2 == offset) {
+                ret = bdrv_pread(bs->backing_hd, sector_num << BDRV_SECTOR_BITS
+                    , buf, BDRV_SECTOR_SIZE);
+                if (ret < 0) {
+                    return -1;
+                }
+            } else {
+                ret = bdrv_pread(bs->file, offset, buf, BDRV_SECTOR_SIZE);
+                if (ret != BDRV_SECTOR_SIZE) {
+                    return -1;
+                }
+            }
 
-        nb_sectors -= sectors;
-        sector_num += sectors;
-        buf += sectors * BDRV_SECTOR_SIZE;
+            nb_sectors--;
+            sector_num++;
+            buf += BDRV_SECTOR_SIZE;
+        }
+        break;
+    default:
+        return -1;
     }
     return 0;
 }
@@ -546,43 +683,98 @@ static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
     return ret;
 }
 
+static inline int64_t write_bitmap(BlockDriverState *bs, int64_t sector_num,
+    int64_t sectors)
+{
+    BDRVVPCState *s = bs->opaque;
+    uint64_t offset = sector_num << BDRV_SECTOR_BITS;
+    uint64_t bitmap_offset;
+    uint32_t pagetable_index, pageentry_index;
+    uint8_t bitmap[s->bitmap_size];
+    uint32_t bitmap_index, bitmapbit_index;
+    int i;
+    int ret;
+
+    pagetable_index = offset / s->block_size;
+    pageentry_index = (offset % s->block_size) / 512;
+    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
+
+    if (bitmap_offset > s->max_table_entries * s->block_size) {
+        return -1;
+    }
+    ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+    if (ret < 0) {
+        return -1;
+    }
+
+    for (i = 0; i < sectors; i++) {
+        bitmap_index = pageentry_index / 8;
+        bitmapbit_index = 7 - pageentry_index % 8;
+        bitmap[bitmap_index] |= (0x1 << bitmapbit_index);
+        pageentry_index++;
+    }
+    ret = bdrv_pwrite(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+    if (ret < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
 static int vpc_write(BlockDriverState *bs, int64_t sector_num,
     const uint8_t *buf, int nb_sectors)
 {
     BDRVVPCState *s = bs->opaque;
-    int64_t offset;
-    int64_t sectors, sectors_per_block;
+    VHDFooter *footer = (VHDFooter *) s->footer_buf;
+    int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
+    int64_t offset, sectors;
+    bool isdiff = true;
     int ret;
-    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    switch (be32_to_cpu(footer->type)) {
+    case VHD_FIXED:
         return bdrv_write(bs->file, sector_num, buf, nb_sectors);
-    }
-    while (nb_sectors > 0) {
-        offset = get_sector_offset(bs, sector_num, 1);
-
-        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
-        sectors = sectors_per_block - (sector_num % sectors_per_block);
-        if (sectors > nb_sectors) {
-            sectors = nb_sectors;
+    case VHD_DYNAMIC:
+    case VHD_DIFF:
+        if (VHD_DYNAMIC == be32_to_cpu(footer->type)) {
+            isdiff = false;
         }
 
-        if (offset == -1) {
-            offset = alloc_block(bs, sector_num);
-            if (offset < 0)
+        while (nb_sectors > 0) {
+            sectors = sectors_per_block - (sector_num % sectors_per_block);
+            if (sectors > nb_sectors) {
+                sectors = nb_sectors;
+            }
+
+            offset = get_sector_offset(bs, sector_num, 1);
+            if (offset == -1) {
+                offset = alloc_block(bs, sector_num, isdiff);
+                if (offset < 0) {
+                    return -1;
+                }
+            }
+
+            ret = bdrv_pwrite(bs->file, offset, buf,
+                sectors * BDRV_SECTOR_SIZE);
+            if (ret != sectors * BDRV_SECTOR_SIZE) {
                 return -1;
-        }
+            }
 
-        ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE);
-        if (ret != sectors * BDRV_SECTOR_SIZE) {
-            return -1;
-        }
+            if (true == isdiff) {
+                ret = write_bitmap(bs, sector_num, sectors);
+                if (ret < 0) {
+                    return -1;
+                }
+            }
 
-        nb_sectors -= sectors;
-        sector_num += sectors;
-        buf += sectors * BDRV_SECTOR_SIZE;
+            nb_sectors -= sectors;
+            sector_num += sectors;
+            buf += sectors * BDRV_SECTOR_SIZE;
+        }
+        break;
+    default:
+        return -1;
     }
-
     return 0;
 }
 
@@ -910,6 +1102,7 @@ static BlockDriver bdrv_vpc = {
     .bdrv_close             = vpc_close,
     .bdrv_reopen_prepare    = vpc_reopen_prepare,
     .bdrv_create            = vpc_create,
+    .supports_backing       = true,
 
     .bdrv_read              = vpc_co_read,
     .bdrv_write             = vpc_co_write,
--
1.8.3.1












1.       bdrv_preadv and bdrv_pwritev affer get_sector_offset_* is already checked, others are checked.

2.       MACX is a big-endian number, so there is no need be32_to_cpu, but I added a comment to MACX.

3.       clean the code no used after in vpc_open.

4.       move bdrv_co_readv() to bdrv_preadv in vpc_read.

5.       Added some code to make it easy to understand.

6.       the wrong use of cpu_to_be32 is fixed with another patch.


Last, it is hard to use "git sendmail" to your mailbox through gmail, because of GFW in china, so I only could

send to my mail.qq.com and forward to it. Hope that could run ! '

I am so sorry for the last mail, I must add a fixed subject prefix in my email that out of company, so copy the 

code?? screwed up. 

 


reply via email to

[Prev in Thread] Current Thread [Next in Thread]