qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC PATCH 12/14] Add disk buffer for block replication


From: Wen Congyang
Subject: [Qemu-devel] [RFC PATCH 12/14] Add disk buffer for block replication
Date: Thu, 12 Feb 2015 11:07:16 +0800

Signed-off-by: Wen Congyang <address@hidden>
Signed-off-by: zhanghailiang <address@hidden>
Signed-off-by: Gonglei <address@hidden>
---
 block/Makefile.objs    |   1 +
 block/blkcolo-buffer.c | 324 +++++++++++++++++++++++++++++++++++++++++++++++++
 block/blkcolo.h        |  35 ++++++
 3 files changed, 360 insertions(+)
 create mode 100644 block/blkcolo-buffer.c
 create mode 100644 block/blkcolo.h

diff --git a/block/Makefile.objs b/block/Makefile.objs
index db2933e..1b7b458 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -21,6 +21,7 @@ block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o
 block-obj-y += write-threshold.o
+block-obj-y += blkcolo-buffer.o
 
 common-obj-y += stream.o
 common-obj-y += commit.o
diff --git a/block/blkcolo-buffer.c b/block/blkcolo-buffer.c
new file mode 100644
index 0000000..1f64542
--- /dev/null
+++ b/block/blkcolo-buffer.c
@@ -0,0 +1,324 @@
+/*
+ * Block driver for COLO
+ *
+ * Copyright Fujitsu, Corp. 2015
+ * Copyright (c) 2015 Intel Corporation
+ * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * Authors:
+ *     Wen Congyang <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "qemu/queue.h"
+#include "block/block.h"
+#include "block/blkcolo.h"
+
+typedef struct buffered_request_state {
+    uint64_t start_sector;
+    int nb_sectors;
+    void *data;
+    QSIMPLEQ_ENTRY(buffered_request_state) entry;
+} buffered_request_state;
+
+/* common functions */
+/*
+ * The buffered data may eat too much memory, and glibc cannot work
+ * very well in such case.
+ */
+static void *alloc_buffered_data(int nb_sectors)
+{
+    return g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
+}
+
+static void free_buffered_data(void *data)
+{
+    g_free(data);
+}
+
+typedef struct search_brs_state {
+    uint64_t sector;
+    buffered_request_state *prev;
+} search_brs_state;
+
+static buffered_request_state *search_brs(disk_buffer *disk_buffer,
+                                           search_brs_state *sbs)
+{
+    buffered_request_state *brs;
+
+    QSIMPLEQ_FOREACH(brs, &disk_buffer->head, entry) {
+        if (sbs->sector < brs->start_sector) {
+            return NULL;
+        }
+
+        if (sbs->sector < brs->start_sector + brs->nb_sectors) {
+            return brs;
+        }
+
+        sbs->prev = brs;
+    }
+
+    return NULL;
+}
+
+static buffered_request_state *get_next_brs(buffered_request_state *brs)
+{
+    return QSIMPLEQ_NEXT(brs, entry);
+}
+
+static void add_brs_after(disk_buffer *disk_buffer,
+                          buffered_request_state *new_brs,
+                          buffered_request_state *prev)
+{
+    if (!prev) {
+        QSIMPLEQ_INSERT_HEAD(&disk_buffer->head, new_brs, entry);
+    } else {
+        QSIMPLEQ_INSERT_AFTER(&disk_buffer->head, prev, new_brs, entry);
+    }
+}
+
+static bool disk_buffer_empty(disk_buffer *disk_buffer)
+{
+    return QSIMPLEQ_EMPTY(&disk_buffer->head);
+}
+
+/* Disk buffer */
+static buffered_request_state *create_new_brs(QEMUIOVector *qiov,
+                                              uint64_t iov_sector,
+                                              uint64_t sector, int nb_sectors)
+{
+    buffered_request_state *brs;
+
+    brs = g_slice_new(buffered_request_state);
+    brs->start_sector = sector;
+    brs->nb_sectors = nb_sectors;
+    brs->data = alloc_buffered_data(nb_sectors);
+    qemu_iovec_to_buf(qiov, (sector - iov_sector) * BDRV_SECTOR_SIZE,
+                      brs->data, nb_sectors * BDRV_SECTOR_SIZE);
+
+    return brs;
+}
+
+static void free_brs(buffered_request_state *brs)
+{
+    free_buffered_data(brs->data);
+    g_slice_free(buffered_request_state, brs);
+}
+
+bool buffer_has_empty_range(disk_buffer *disk_buffer,
+                            uint64_t sector, int nb_sectors)
+{
+    buffered_request_state *brs;
+    search_brs_state sbs;
+    uint64_t cur_sector = sector;
+
+    if (nb_sectors <= 0) {
+        return false;
+    }
+
+    sbs.sector = sector;
+    sbs.prev = NULL;
+    brs = search_brs(disk_buffer, &sbs);
+    if (!brs) {
+        return true;
+    }
+
+    while (brs && cur_sector < sector + nb_sectors) {
+        if (cur_sector < brs->start_sector) {
+            return true;
+        }
+
+        if (brs->start_sector + brs->nb_sectors >= sector + nb_sectors) {
+            return false;
+        }
+
+        cur_sector = brs->start_sector + brs->nb_sectors;
+        brs = get_next_brs(brs);
+    }
+
+    if (cur_sector < sector + nb_sectors) {
+        return true;
+    } else {
+        return false;
+    }
+}
+
+/* Note: only the sector that exists in the buffer will be overwriten */
+void qiov_read_from_buffer(disk_buffer *disk_buffer, QEMUIOVector *qiov,
+                           uint64_t sector, int nb_sectors)
+{
+    search_brs_state sbs;
+    buffered_request_state *brs;
+    size_t offset, cur_nb_sectors;
+    uint64_t cur_sector = sector;
+    void *buf;
+
+    if (disk_buffer_empty(disk_buffer)) {
+        /* The disk buffer is empty */
+        return;
+    }
+
+    sbs.sector = sector;
+    sbs.prev = NULL;
+    brs = search_brs(disk_buffer, &sbs);
+    if (!brs) {
+        if (!sbs.prev) {
+            brs = QSIMPLEQ_FIRST(&disk_buffer->head);
+        } else {
+            brs = get_next_brs(sbs.prev);
+        }
+    }
+
+    while (brs && cur_sector < sector + nb_sectors) {
+        if (brs->start_sector >= sector + nb_sectors) {
+            break;
+        }
+
+        /* In the first loop, brs->start_sector can be less than sector */
+        if (brs->start_sector < cur_sector) {
+            offset = cur_sector - brs->start_sector;
+            buf = brs->data + offset * BDRV_SECTOR_SIZE;
+        } else {
+            cur_sector = brs->start_sector;
+            offset = 0;
+            buf = brs->data;
+        }
+        if (brs->start_sector + brs->nb_sectors >= sector + nb_sectors) {
+            cur_nb_sectors = sector + nb_sectors - cur_sector;
+        } else {
+            cur_nb_sectors = brs->nb_sectors - offset;
+        }
+        qemu_iovec_from_buf(qiov, (cur_sector - sector) * BDRV_SECTOR_SIZE,
+                            buf, cur_nb_sectors * BDRV_SECTOR_SIZE);
+
+        cur_sector = brs->start_sector + brs->nb_sectors;
+        brs = get_next_brs(brs);
+    }
+}
+
+void qiov_write_to_buffer(disk_buffer *disk_buffer, QEMUIOVector *qiov,
+                          uint64_t sector, int nb_sectors, bool overwrite)
+{
+    search_brs_state sbs;
+    buffered_request_state *brs, *new_brs, *prev;
+    uint64_t cur_sector = sector;
+    int cur_nb_sectors, offset;
+
+    if (disk_buffer_empty(disk_buffer)) {
+        /* The disk buffer is empty */
+        new_brs = create_new_brs(qiov, sector, cur_sector, nb_sectors);
+        add_brs_after(disk_buffer, new_brs, NULL);
+        return;
+    }
+
+    sbs.sector = sector;
+    sbs.prev = NULL;
+    brs = search_brs(disk_buffer, &sbs);
+    if (!sbs.prev) {
+        prev = NULL;
+        brs = QSIMPLEQ_FIRST(&disk_buffer->head);
+    } else {
+        prev = sbs.prev;
+        brs = get_next_brs(sbs.prev);
+    }
+
+    while (brs && cur_sector < sector + nb_sectors) {
+        if (cur_sector < brs->start_sector) {
+            if (sector + nb_sectors <= brs->start_sector) {
+                cur_nb_sectors = sector + nb_sectors - cur_sector;
+            } else {
+                cur_nb_sectors = brs->start_sector - cur_sector;
+            }
+            new_brs = create_new_brs(qiov, sector, cur_sector, cur_nb_sectors);
+            add_brs_after(disk_buffer, new_brs, prev);
+            cur_sector = brs->start_sector;
+        }
+
+        if (cur_sector >= sector + nb_sectors) {
+            break;
+        }
+
+        if (overwrite) {
+            offset = cur_sector - brs->start_sector;
+            if (sector + nb_sectors <= brs->start_sector + brs->nb_sectors) {
+                cur_nb_sectors = sector + nb_sectors - cur_sector;
+            } else {
+                cur_nb_sectors = brs->nb_sectors - offset;
+            }
+            qemu_iovec_to_buf(qiov, (cur_sector - sector) * BDRV_SECTOR_SIZE,
+                              brs->data + offset * BDRV_SECTOR_SIZE,
+                              cur_nb_sectors * BDRV_SECTOR_SIZE);
+        }
+
+        cur_sector = brs->start_sector + brs->nb_sectors;
+
+        prev = brs;
+        brs = get_next_brs(brs);
+    }
+
+    if (cur_sector < sector + nb_sectors) {
+        new_brs = create_new_brs(qiov, sector, cur_sector,
+                                 sector + nb_sectors - cur_sector);
+        add_brs_after(disk_buffer, new_brs, prev);
+    }
+}
+
+struct flushed_data {
+    QEMUIOVector qiov;
+    buffered_request_state *brs;
+};
+
+static void flush_buffered_data_complete(void *opaque, int ret)
+{
+    struct flushed_data *flushed_data = opaque;
+
+    /* We have reported the guest that this write ops successed */
+    assert(ret == 0);
+
+    qemu_iovec_destroy(&flushed_data->qiov);
+    free_brs(flushed_data->brs);
+    g_free(flushed_data);
+}
+
+void flush_buffered_data_to_disk(disk_buffer *disk_buffer,
+                                 BlockDriverState *bs)
+{
+    buffered_request_state *brs, *tmp;
+    struct flushed_data *flushed_data = NULL;
+
+    QSIMPLEQ_FOREACH_SAFE(brs, &disk_buffer->head, entry, tmp) {
+        /* brs is always the head */
+        QSIMPLEQ_REMOVE_HEAD(&disk_buffer->head, entry);
+
+        flushed_data = g_malloc(sizeof(struct flushed_data));
+        qemu_iovec_init(&flushed_data->qiov, 1);
+        qemu_iovec_add(&flushed_data->qiov, brs->data,
+                       brs->nb_sectors * BDRV_SECTOR_SIZE);
+        flushed_data->brs = brs;
+        bdrv_aio_writev(bs, brs->start_sector, &flushed_data->qiov,
+                        brs->nb_sectors, flush_buffered_data_complete,
+                        flushed_data);
+    }
+
+    bdrv_drain_all();
+}
+
+void init_disk_buffer(disk_buffer *disk_buffer)
+{
+    QSIMPLEQ_INIT(&disk_buffer->head);
+}
+
+void clear_all_buffered_data(disk_buffer *disk_buffer)
+{
+    buffered_request_state *brs, *tmp;
+
+    QSIMPLEQ_FOREACH_SAFE(brs, &disk_buffer->head, entry, tmp) {
+        /* brs is always the head */
+        QSIMPLEQ_REMOVE_HEAD(&disk_buffer->head, entry);
+        free_brs(brs);
+    }
+}
diff --git a/block/blkcolo.h b/block/blkcolo.h
new file mode 100644
index 0000000..d8e0d9a
--- /dev/null
+++ b/block/blkcolo.h
@@ -0,0 +1,35 @@
+/*
+ * Block driver for COLO
+ *
+ * Copyright Fujitsu, Corp. 2015
+ * Copyright (c) 2015 Intel Corporation
+ * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * Authors:
+ *     Wen Congyang <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef BLOCK_BLKCOLO_H
+#define BLOCK_BLKCOLO_H
+
+typedef struct disk_buffer {
+    QSIMPLEQ_HEAD(, buffered_request_state) head;
+} disk_buffer;
+
+bool buffer_has_empty_range(disk_buffer *disk_buffer,
+                            uint64_t sector, int nb_sectors);
+void qiov_read_from_buffer(disk_buffer *disk_buffer, QEMUIOVector *qiov,
+                           uint64_t sector, int nb_sectors);
+void qiov_write_to_buffer(disk_buffer *disk_buffer, QEMUIOVector *qiov,
+                          uint64_t sector, int nb_sectors, bool overwrite);
+void flush_buffered_data_to_disk(disk_buffer *disk_buffer,
+                                 BlockDriverState *bs);
+
+void init_disk_buffer(disk_buffer *disk_buffer);
+void clear_all_buffered_data(disk_buffer *disk_buffer);
+
+#endif
-- 
2.1.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]