qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 01/12] qcow2: Add new overlap check functions


From: Max Reitz
Subject: [Qemu-devel] [PATCH 01/12] qcow2: Add new overlap check functions
Date: Mon, 3 Nov 2014 18:04:20 +0100

The existing qcow2 metadata overlap detection function used existing
structures to determine the location of the image metadata, from plain
fields such as l1_table_offset and l1_size in the BDRVQcowState, over
image structures in memory such as the L1 table for the L2 tables'
positions, or it even read the required data directly from disk for
every requested check, such as the snapshot L1 tables for the inactive
L2 tables' positions.

These new functions instead keep a dedicated structure for keeping track
of the metadata positions in memory. It consists of two parts: First,
there is one structure which is basically a list of all metadata
structures. Each entry has a bitmask of types (because some metadata
structures may actually overlap, such as active and inactive L2 tables),
a number of clusters occupied and the offset from the previous entry in
clusters. This structure requires relatively few memory, but checking a
certain point may take relatively long. Each entry is called a
"fragment".

Therefore, there is another representation which is a bitmap, or rather
a bytemap, of metadata types. The previously described list is split
into multiple windows with each describing a constant number of clusters
(WINDOW_SIZE). If the list is to be queried or changed, the respective
window is selected in constant time and the bitmap is generated from the
fragments belonging to the window. This bitmap can then be queried in
constant time and easily be changed.

Because the bitmap representation requires more memory, it is only used
as a cache. Whenever a window is removed from the cache, the fragment
list will be rebuilt from the bitmap if the latter has been modified.
Therefore, the fragment list is only used as the background
representation to save memory, whereas the bitmap is used whenever
possible.

Regarding the size of the fragment list in memory: As one refcount block
can handle cluster_size / 2 entries and one L2 table can handle
cluster_size / 8 entries, for a qcow2 image with the standard cluster
size of 64 kB, there is a ratio of data to metadata of about 1/6000
(1/32768 refblocks and 1/8192 L2 tables) if one ignores the fact that
not every cluster requires an L2 table entry. The refcount table and the
L1 table is generally negligible. At the worst, each metadata cluster
requires its own entry in the fragment list; each entry takes up four
bytes, therefore, at the worst, the fragment list should take up (for an
image with 64 kB clusters) (4 B) / (64 kB * 6000) of the image size,
which is about 1.e-8 (i.e., 11 kB for a 1 TB image, or 11 MB for a 1 PB
image).

Signed-off-by: Max Reitz <address@hidden>
---
 block/Makefile.objs   |   3 +-
 block/qcow2-overlap.c | 408 ++++++++++++++++++++++++++++++++++++++++++++++++++
 block/qcow2.h         |  13 ++
 3 files changed, 423 insertions(+), 1 deletion(-)
 create mode 100644 block/qcow2-overlap.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 04b0e43..8d9c73d 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,5 +1,6 @@
 block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o 
vvfat.o
-block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
+block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
+block-obj-y += qcow2-cache.o qcow2-overlap.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
diff --git a/block/qcow2-overlap.c b/block/qcow2-overlap.c
new file mode 100644
index 0000000..21d1d3c
--- /dev/null
+++ b/block/qcow2-overlap.c
@@ -0,0 +1,408 @@
+/*
+ * QCOW2 runtime metadata overlap detection
+ *
+ * Copyright (c) 2014 Max Reitz <address@hidden>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "block/block_int.h"
+#include "qemu-common.h"
+#include "qemu/range.h"
+#include "qcow2.h"
+
+/* Number of clusters which are covered by each metadata window;
+ * note that this may not exceed 2^16 as long as
+ * Qcow2MetadataFragment::relative_start is a uint16_t */
+#define WINDOW_SIZE 4096
+
+/* Describes a fragment of a or a whole metadata range; does not necessarily
+ * describe the whole range because it needs to be split on window boundaries 
*/
+typedef struct Qcow2MetadataFragment {
+    /* Bitmask of QCow2MetadataOverlap values */
+    uint8_t types;
+    uint8_t nb_clusters;
+    /* Number of clusters between the start of the window and this range */
+    uint16_t relative_start;
+} QEMU_PACKED Qcow2MetadataFragment;
+
+typedef struct Qcow2MetadataWindow {
+    Qcow2MetadataFragment *fragments;
+    int nb_fragments, fragments_array_size;
+
+    /* If not NULL, this is an expanded version of the "RLE" version given by
+     * the fragments array; there are WINDOW_SIZE entries */
+    uint8_t *bitmap;
+    bool bitmap_modified;
+
+    /* Time of last access */
+    unsigned age;
+
+    /* Index in Qcow2MetadataList::cached_windows */
+    int cached_windows_index;
+} Qcow2MetadataWindow;
+
+struct Qcow2MetadataList {
+    Qcow2MetadataWindow *windows;
+    uint64_t nb_windows;
+
+    unsigned current_age;
+
+    /* Index into the windows array */
+    int *cached_windows;
+    size_t nb_cached_windows;
+};
+
+/**
+ * Destroys the cached window bitmap. If it has been modified, the fragment 
list
+ * will be rebuilt accordingly.
+ */
+static void destroy_window_bitmap(Qcow2MetadataList *mdl,
+                                  Qcow2MetadataWindow *window)
+{
+    if (!window->bitmap) {
+        return;
+    }
+
+    if (window->bitmap_modified) {
+        int bitmap_i, fragment_i = 0;
+        QCow2MetadataOverlap current_types = 0;
+        int current_nb_clusters = 0, current_relative_start = 0;
+
+        /* Rebuild the fragment list; the case bitmap_i == WINDOW_SIZE is for
+         * entering the last fragment at the bitmap end */
+
+        for (bitmap_i = 0; bitmap_i <= WINDOW_SIZE; bitmap_i++) {
+            /* Qcow2MetadataFragment::nb_clusters is a uint8_t, so
+             * current_nb_clusters may not exceed 255 */
+            if (bitmap_i < WINDOW_SIZE &&
+                current_types == window->bitmap[bitmap_i] &&
+                current_nb_clusters < 255)
+            {
+                current_nb_clusters++;
+            } else {
+                if (current_types && current_nb_clusters) {
+                    if (fragment_i >= window->fragments_array_size) {
+                        window->fragments_array_size =
+                            3 * window->fragments_array_size / 2 + 1;
+
+                        /* new_nb_fragments should be small enough, and there 
is
+                         * nothing we can do on failure anyway, so do not use
+                         * g_try_renew() here */
+                        window->fragments =
+                            g_renew(Qcow2MetadataFragment, window->fragments,
+                                    window->fragments_array_size);
+                    }
+
+                    window->fragments[fragment_i++] = (Qcow2MetadataFragment){
+                        .types          = current_types,
+                        .nb_clusters    = current_nb_clusters,
+                        .relative_start = current_relative_start
+                    };
+
+                    current_relative_start = 0;
+                } else if (!current_types) {
+                    current_relative_start = current_nb_clusters;
+                }
+
+                current_nb_clusters = 0;
+                if (bitmap_i < WINDOW_SIZE) {
+                    current_types = window->bitmap[bitmap_i];
+                }
+            }
+        }
+
+        window->nb_fragments = fragment_i;
+    }
+
+    g_free(window->bitmap);
+    window->bitmap = NULL;
+}
+
+/**
+ * Creates a bitmap from the fragment list.
+ */
+static void build_window_bitmap(Qcow2MetadataList *mdl,
+                                Qcow2MetadataWindow *window)
+{
+    int cache_i, oldest_cache_i = -1, i;
+    unsigned oldest_cache_age = 0;
+
+    for (cache_i = 0; cache_i < mdl->nb_cached_windows; cache_i++) {
+        unsigned age;
+
+        if (mdl->cached_windows[cache_i] < 0) {
+            break;
+        }
+
+        age = mdl->current_age - 
mdl->windows[mdl->cached_windows[cache_i]].age;
+        if (age > oldest_cache_age) {
+            oldest_cache_age = age;
+            oldest_cache_i = cache_i;
+        }
+    }
+
+    if (cache_i >= mdl->nb_cached_windows) {
+        destroy_window_bitmap(mdl,
+            &mdl->windows[mdl->cached_windows[oldest_cache_i]]);
+        cache_i = oldest_cache_i;
+    }
+
+    assert(cache_i >= 0);
+    mdl->cached_windows[cache_i] = window - mdl->windows;
+    window->cached_windows_index = cache_i;
+
+    window->age = mdl->current_age++;
+
+    window->bitmap = g_new0(uint8_t, WINDOW_SIZE);
+
+    for (i = 0; i < window->nb_fragments; i++) {
+        Qcow2MetadataFragment *fragment = &window->fragments[i];
+
+        memset(&window->bitmap[fragment->relative_start], fragment->types,
+               fragment->nb_clusters);
+    }
+
+    window->bitmap_modified = false;
+}
+
+/**
+ * Enters a new range into the metadata list.
+ */
+void qcow2_metadata_list_enter(BlockDriverState *bs, uint64_t offset,
+                               int nb_clusters, QCow2MetadataOverlap types)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t start_cluster = offset >> s->cluster_bits;
+    uint64_t end_cluster = start_cluster + nb_clusters;
+    uint64_t current_cluster = start_cluster;
+
+    types &= s->overlap_check;
+    if (!types) {
+        return;
+    }
+
+    if (offset_into_cluster(s, offset)) {
+        /* Do not enter apparently broken metadata ranges */
+        return;
+    }
+
+    while (current_cluster < end_cluster) {
+        int bitmap_i;
+        int bitmap_i_start = current_cluster % WINDOW_SIZE;
+        int bitmap_i_end = MIN(WINDOW_SIZE,
+                               end_cluster - current_cluster + bitmap_i_start);
+        uint64_t window_i = current_cluster / WINDOW_SIZE;
+        Qcow2MetadataWindow *window;
+
+        if (window_i >= s->metadata_list->nb_windows) {
+            /* This should not be happening too often, so it is fine to resize
+             * the array to exactly the required size */
+            Qcow2MetadataWindow *new_windows;
+
+            new_windows = g_try_renew(Qcow2MetadataWindow,
+                                      s->metadata_list->windows,
+                                      window_i + 1);
+            if (!new_windows) {
+                return;
+            }
+
+            memset(new_windows + s->metadata_list->nb_windows, 0,
+                   (window_i + 1 - s->metadata_list->nb_windows) *
+                   sizeof(Qcow2MetadataWindow));
+
+            s->metadata_list->windows = new_windows;
+            s->metadata_list->nb_windows = window_i + 1;
+        }
+
+        window = &s->metadata_list->windows[window_i];
+        if (!window->bitmap) {
+            build_window_bitmap(s->metadata_list, window);
+        }
+
+        for (bitmap_i = bitmap_i_start; bitmap_i < bitmap_i_end; bitmap_i++) {
+            window->bitmap[bitmap_i] |= types;
+        }
+
+        window->age = s->metadata_list->current_age++;
+        window->bitmap_modified = true;
+
+        /* Go to the next window */
+        current_cluster += WINDOW_SIZE - bitmap_i_start;
+    }
+}
+
+/**
+ * Removes a range of the given types from the metadata list.
+ */
+void qcow2_metadata_list_remove(BlockDriverState *bs, uint64_t offset,
+                                int nb_clusters, QCow2MetadataOverlap types)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t start_cluster = offset >> s->cluster_bits;
+    uint64_t end_cluster = start_cluster + nb_clusters;
+    uint64_t current_cluster = start_cluster;
+
+    types &= s->overlap_check;
+    if (!types) {
+        return;
+    }
+
+    if (offset_into_cluster(s, offset)) {
+        /* Try to remove even broken metadata ranges */
+        end_cluster++;
+    }
+
+    while (current_cluster < end_cluster) {
+        int bitmap_i;
+        int bitmap_i_start = current_cluster % WINDOW_SIZE;
+        int bitmap_i_end = MIN(WINDOW_SIZE,
+                               end_cluster - current_cluster + bitmap_i_start);
+        uint64_t window_i = current_cluster / WINDOW_SIZE;
+        Qcow2MetadataWindow *window;
+
+        /* If the list is too small, there is no metadata structure here;
+         * because window_i will only grow, we can abort here */
+        if (window_i >= s->metadata_list->nb_windows) {
+            return;
+        }
+
+        window = &s->metadata_list->windows[window_i];
+        if (!window->bitmap) {
+            build_window_bitmap(s->metadata_list, window);
+        }
+
+        for (bitmap_i = bitmap_i_start; bitmap_i < bitmap_i_end; bitmap_i++) {
+            window->bitmap[bitmap_i] &= ~types;
+        }
+
+        window->age = s->metadata_list->current_age++;
+        window->bitmap_modified = true;
+
+        /* Go to the next window */
+        current_cluster += WINDOW_SIZE - bitmap_i_start;
+    }
+}
+
+static int single_check_metadata_overlap(Qcow2MetadataList *mdl, int ign,
+                                         uint64_t cluster)
+{
+    int window_i = cluster / WINDOW_SIZE;
+    int bitmap_i = cluster % WINDOW_SIZE;
+    Qcow2MetadataWindow *window;
+
+    if (window_i >= mdl->nb_windows) {
+        return 0;
+    }
+    window = &mdl->windows[window_i];
+
+    if (!window->bitmap) {
+        build_window_bitmap(mdl, window);
+    }
+
+    window->age = mdl->current_age++;
+
+    return window->bitmap[bitmap_i] & ~ign;
+}
+
+/* This will replace qcow2_check_metadata_overlap() */
+static int check_metadata_overlap(BlockDriverState *bs, int ign, int64_t 
offset,
+                                  int64_t size) __attribute__((used));
+
+static int check_metadata_overlap(BlockDriverState *bs, int ign, int64_t 
offset,
+                                  int64_t size)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t start_cluster = offset >> s->cluster_bits;
+    uint64_t end_cluster = DIV_ROUND_UP(offset + size, s->cluster_size);
+    uint64_t current_cluster;
+    int ret = 0;
+
+    for (current_cluster = start_cluster; current_cluster < end_cluster;
+         current_cluster++)
+    {
+        ret |= single_check_metadata_overlap(s->metadata_list, ign,
+                                             current_cluster);
+    }
+
+    return ret;
+}
+
+int qcow2_create_empty_metadata_list(BlockDriverState *bs, size_t cache_size,
+                                     Error **errp)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret;
+    size_t cache_entries, i;
+
+    s->metadata_list = g_new0(Qcow2MetadataList, 1);
+    s->metadata_list->nb_windows = bs->total_sectors / s->cluster_sectors
+                                                     / WINDOW_SIZE;
+    s->metadata_list->windows = g_try_new0(Qcow2MetadataWindow,
+                                           s->metadata_list->nb_windows);
+    if (s->metadata_list->nb_windows && !s->metadata_list->windows) {
+        error_setg(errp, "Could not allocate metadata overlap check windows");
+        ret = -ENOMEM;
+        goto fail;
+    }
+
+    cache_entries = cache_size
+                  / (WINDOW_SIZE + sizeof(*s->metadata_list->cached_windows));
+    if (!cache_entries) {
+        cache_entries = 1;
+    }
+
+    s->metadata_list->nb_cached_windows = cache_entries;
+    s->metadata_list->cached_windows = g_try_new(int, cache_entries);
+    if (!s->metadata_list->cached_windows) {
+        error_setg(errp, "Could not allocate metadata overlap cache pointers");
+        ret = -ENOMEM;
+        goto fail;
+    }
+    for (i = 0; i < s->metadata_list->nb_cached_windows; i++) {
+        s->metadata_list->cached_windows[i] = -1;
+    }
+
+    return 0;
+
+fail:
+    qcow2_metadata_list_destroy(bs);
+    return ret;
+}
+
+void qcow2_metadata_list_destroy(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t i;
+
+    if (s->metadata_list && s->metadata_list->windows) {
+        for (i = 0; i < s->metadata_list->nb_windows; i++) {
+            Qcow2MetadataWindow *window = &s->metadata_list->windows[i];
+
+            destroy_window_bitmap(s->metadata_list, window);
+            g_free(window->fragments);
+        }
+
+        g_free(s->metadata_list->cached_windows);
+        g_free(s->metadata_list->windows);
+    }
+
+    g_free(s->metadata_list);
+    s->metadata_list = NULL;
+}
diff --git a/block/qcow2.h b/block/qcow2.h
index 6e39a1b..8dd5a0a 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -159,6 +159,9 @@ typedef struct QCowSnapshot {
 struct Qcow2Cache;
 typedef struct Qcow2Cache Qcow2Cache;
 
+struct Qcow2MetadataList;
+typedef struct Qcow2MetadataList Qcow2MetadataList;
+
 typedef struct Qcow2UnknownHeaderExtension {
     uint32_t magic;
     uint32_t len;
@@ -261,6 +264,7 @@ typedef struct BDRVQcowState {
 
     bool discard_passthrough[QCOW2_DISCARD_MAX];
 
+    Qcow2MetadataList *metadata_list;
     int overlap_check; /* bitmask of Qcow2MetadataOverlap values */
     bool signaled_corruption;
 
@@ -576,4 +580,13 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache 
*c, uint64_t offset,
     void **table);
 int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
 
+/* qcow2-overlap.c functions */
+int qcow2_create_empty_metadata_list(BlockDriverState *bs, size_t cache_size,
+                                     Error **errp);
+void qcow2_metadata_list_destroy(BlockDriverState *bs);
+void qcow2_metadata_list_enter(BlockDriverState *bs, uint64_t offset,
+                               int nb_clusters, QCow2MetadataOverlap type);
+void qcow2_metadata_list_remove(BlockDriverState *bs, uint64_t offset,
+                                int nb_clusters, QCow2MetadataOverlap type);
+
 #endif
-- 
1.9.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]