[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC PATCH 12/14] Add disk buffer for block replication
From: |
Wen Congyang |
Subject: |
[Qemu-devel] [RFC PATCH 12/14] Add disk buffer for block replication |
Date: |
Thu, 12 Feb 2015 11:07:16 +0800 |
Signed-off-by: Wen Congyang <address@hidden>
Signed-off-by: zhanghailiang <address@hidden>
Signed-off-by: Gonglei <address@hidden>
---
block/Makefile.objs | 1 +
block/blkcolo-buffer.c | 324 +++++++++++++++++++++++++++++++++++++++++++++++++
block/blkcolo.h | 35 ++++++
3 files changed, 360 insertions(+)
create mode 100644 block/blkcolo-buffer.c
create mode 100644 block/blkcolo.h
diff --git a/block/Makefile.objs b/block/Makefile.objs
index db2933e..1b7b458 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -21,6 +21,7 @@ block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
block-obj-y += accounting.o
block-obj-y += write-threshold.o
+block-obj-y += blkcolo-buffer.o
common-obj-y += stream.o
common-obj-y += commit.o
diff --git a/block/blkcolo-buffer.c b/block/blkcolo-buffer.c
new file mode 100644
index 0000000..1f64542
--- /dev/null
+++ b/block/blkcolo-buffer.c
@@ -0,0 +1,324 @@
+/*
+ * Block driver for COLO
+ *
+ * Copyright Fujitsu, Corp. 2015
+ * Copyright (c) 2015 Intel Corporation
+ * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * Authors:
+ * Wen Congyang <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "qemu/queue.h"
+#include "block/block.h"
+#include "block/blkcolo.h"
+
+typedef struct buffered_request_state {
+ uint64_t start_sector;
+ int nb_sectors;
+ void *data;
+ QSIMPLEQ_ENTRY(buffered_request_state) entry;
+} buffered_request_state;
+
+/* common functions */
+/*
+ * The buffered data may eat too much memory, and glibc cannot work
+ * very well in such case.
+ */
+static void *alloc_buffered_data(int nb_sectors)
+{
+ return g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
+}
+
+static void free_buffered_data(void *data)
+{
+ g_free(data);
+}
+
+typedef struct search_brs_state {
+ uint64_t sector;
+ buffered_request_state *prev;
+} search_brs_state;
+
+static buffered_request_state *search_brs(disk_buffer *disk_buffer,
+ search_brs_state *sbs)
+{
+ buffered_request_state *brs;
+
+ QSIMPLEQ_FOREACH(brs, &disk_buffer->head, entry) {
+ if (sbs->sector < brs->start_sector) {
+ return NULL;
+ }
+
+ if (sbs->sector < brs->start_sector + brs->nb_sectors) {
+ return brs;
+ }
+
+ sbs->prev = brs;
+ }
+
+ return NULL;
+}
+
+static buffered_request_state *get_next_brs(buffered_request_state *brs)
+{
+ return QSIMPLEQ_NEXT(brs, entry);
+}
+
+static void add_brs_after(disk_buffer *disk_buffer,
+ buffered_request_state *new_brs,
+ buffered_request_state *prev)
+{
+ if (!prev) {
+ QSIMPLEQ_INSERT_HEAD(&disk_buffer->head, new_brs, entry);
+ } else {
+ QSIMPLEQ_INSERT_AFTER(&disk_buffer->head, prev, new_brs, entry);
+ }
+}
+
+static bool disk_buffer_empty(disk_buffer *disk_buffer)
+{
+ return QSIMPLEQ_EMPTY(&disk_buffer->head);
+}
+
+/* Disk buffer */
+static buffered_request_state *create_new_brs(QEMUIOVector *qiov,
+ uint64_t iov_sector,
+ uint64_t sector, int nb_sectors)
+{
+ buffered_request_state *brs;
+
+ brs = g_slice_new(buffered_request_state);
+ brs->start_sector = sector;
+ brs->nb_sectors = nb_sectors;
+ brs->data = alloc_buffered_data(nb_sectors);
+ qemu_iovec_to_buf(qiov, (sector - iov_sector) * BDRV_SECTOR_SIZE,
+ brs->data, nb_sectors * BDRV_SECTOR_SIZE);
+
+ return brs;
+}
+
+static void free_brs(buffered_request_state *brs)
+{
+ free_buffered_data(brs->data);
+ g_slice_free(buffered_request_state, brs);
+}
+
+bool buffer_has_empty_range(disk_buffer *disk_buffer,
+ uint64_t sector, int nb_sectors)
+{
+ buffered_request_state *brs;
+ search_brs_state sbs;
+ uint64_t cur_sector = sector;
+
+ if (nb_sectors <= 0) {
+ return false;
+ }
+
+ sbs.sector = sector;
+ sbs.prev = NULL;
+ brs = search_brs(disk_buffer, &sbs);
+ if (!brs) {
+ return true;
+ }
+
+ while (brs && cur_sector < sector + nb_sectors) {
+ if (cur_sector < brs->start_sector) {
+ return true;
+ }
+
+ if (brs->start_sector + brs->nb_sectors >= sector + nb_sectors) {
+ return false;
+ }
+
+ cur_sector = brs->start_sector + brs->nb_sectors;
+ brs = get_next_brs(brs);
+ }
+
+ if (cur_sector < sector + nb_sectors) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+/* Note: only the sector that exists in the buffer will be overwriten */
+void qiov_read_from_buffer(disk_buffer *disk_buffer, QEMUIOVector *qiov,
+ uint64_t sector, int nb_sectors)
+{
+ search_brs_state sbs;
+ buffered_request_state *brs;
+ size_t offset, cur_nb_sectors;
+ uint64_t cur_sector = sector;
+ void *buf;
+
+ if (disk_buffer_empty(disk_buffer)) {
+ /* The disk buffer is empty */
+ return;
+ }
+
+ sbs.sector = sector;
+ sbs.prev = NULL;
+ brs = search_brs(disk_buffer, &sbs);
+ if (!brs) {
+ if (!sbs.prev) {
+ brs = QSIMPLEQ_FIRST(&disk_buffer->head);
+ } else {
+ brs = get_next_brs(sbs.prev);
+ }
+ }
+
+ while (brs && cur_sector < sector + nb_sectors) {
+ if (brs->start_sector >= sector + nb_sectors) {
+ break;
+ }
+
+ /* In the first loop, brs->start_sector can be less than sector */
+ if (brs->start_sector < cur_sector) {
+ offset = cur_sector - brs->start_sector;
+ buf = brs->data + offset * BDRV_SECTOR_SIZE;
+ } else {
+ cur_sector = brs->start_sector;
+ offset = 0;
+ buf = brs->data;
+ }
+ if (brs->start_sector + brs->nb_sectors >= sector + nb_sectors) {
+ cur_nb_sectors = sector + nb_sectors - cur_sector;
+ } else {
+ cur_nb_sectors = brs->nb_sectors - offset;
+ }
+ qemu_iovec_from_buf(qiov, (cur_sector - sector) * BDRV_SECTOR_SIZE,
+ buf, cur_nb_sectors * BDRV_SECTOR_SIZE);
+
+ cur_sector = brs->start_sector + brs->nb_sectors;
+ brs = get_next_brs(brs);
+ }
+}
+
+void qiov_write_to_buffer(disk_buffer *disk_buffer, QEMUIOVector *qiov,
+ uint64_t sector, int nb_sectors, bool overwrite)
+{
+ search_brs_state sbs;
+ buffered_request_state *brs, *new_brs, *prev;
+ uint64_t cur_sector = sector;
+ int cur_nb_sectors, offset;
+
+ if (disk_buffer_empty(disk_buffer)) {
+ /* The disk buffer is empty */
+ new_brs = create_new_brs(qiov, sector, cur_sector, nb_sectors);
+ add_brs_after(disk_buffer, new_brs, NULL);
+ return;
+ }
+
+ sbs.sector = sector;
+ sbs.prev = NULL;
+ brs = search_brs(disk_buffer, &sbs);
+ if (!sbs.prev) {
+ prev = NULL;
+ brs = QSIMPLEQ_FIRST(&disk_buffer->head);
+ } else {
+ prev = sbs.prev;
+ brs = get_next_brs(sbs.prev);
+ }
+
+ while (brs && cur_sector < sector + nb_sectors) {
+ if (cur_sector < brs->start_sector) {
+ if (sector + nb_sectors <= brs->start_sector) {
+ cur_nb_sectors = sector + nb_sectors - cur_sector;
+ } else {
+ cur_nb_sectors = brs->start_sector - cur_sector;
+ }
+ new_brs = create_new_brs(qiov, sector, cur_sector, cur_nb_sectors);
+ add_brs_after(disk_buffer, new_brs, prev);
+ cur_sector = brs->start_sector;
+ }
+
+ if (cur_sector >= sector + nb_sectors) {
+ break;
+ }
+
+ if (overwrite) {
+ offset = cur_sector - brs->start_sector;
+ if (sector + nb_sectors <= brs->start_sector + brs->nb_sectors) {
+ cur_nb_sectors = sector + nb_sectors - cur_sector;
+ } else {
+ cur_nb_sectors = brs->nb_sectors - offset;
+ }
+ qemu_iovec_to_buf(qiov, (cur_sector - sector) * BDRV_SECTOR_SIZE,
+ brs->data + offset * BDRV_SECTOR_SIZE,
+ cur_nb_sectors * BDRV_SECTOR_SIZE);
+ }
+
+ cur_sector = brs->start_sector + brs->nb_sectors;
+
+ prev = brs;
+ brs = get_next_brs(brs);
+ }
+
+ if (cur_sector < sector + nb_sectors) {
+ new_brs = create_new_brs(qiov, sector, cur_sector,
+ sector + nb_sectors - cur_sector);
+ add_brs_after(disk_buffer, new_brs, prev);
+ }
+}
+
+struct flushed_data {
+ QEMUIOVector qiov;
+ buffered_request_state *brs;
+};
+
+static void flush_buffered_data_complete(void *opaque, int ret)
+{
+ struct flushed_data *flushed_data = opaque;
+
+ /* We have reported the guest that this write ops successed */
+ assert(ret == 0);
+
+ qemu_iovec_destroy(&flushed_data->qiov);
+ free_brs(flushed_data->brs);
+ g_free(flushed_data);
+}
+
+void flush_buffered_data_to_disk(disk_buffer *disk_buffer,
+ BlockDriverState *bs)
+{
+ buffered_request_state *brs, *tmp;
+ struct flushed_data *flushed_data = NULL;
+
+ QSIMPLEQ_FOREACH_SAFE(brs, &disk_buffer->head, entry, tmp) {
+ /* brs is always the head */
+ QSIMPLEQ_REMOVE_HEAD(&disk_buffer->head, entry);
+
+ flushed_data = g_malloc(sizeof(struct flushed_data));
+ qemu_iovec_init(&flushed_data->qiov, 1);
+ qemu_iovec_add(&flushed_data->qiov, brs->data,
+ brs->nb_sectors * BDRV_SECTOR_SIZE);
+ flushed_data->brs = brs;
+ bdrv_aio_writev(bs, brs->start_sector, &flushed_data->qiov,
+ brs->nb_sectors, flush_buffered_data_complete,
+ flushed_data);
+ }
+
+ bdrv_drain_all();
+}
+
+void init_disk_buffer(disk_buffer *disk_buffer)
+{
+ QSIMPLEQ_INIT(&disk_buffer->head);
+}
+
+void clear_all_buffered_data(disk_buffer *disk_buffer)
+{
+ buffered_request_state *brs, *tmp;
+
+ QSIMPLEQ_FOREACH_SAFE(brs, &disk_buffer->head, entry, tmp) {
+ /* brs is always the head */
+ QSIMPLEQ_REMOVE_HEAD(&disk_buffer->head, entry);
+ free_brs(brs);
+ }
+}
diff --git a/block/blkcolo.h b/block/blkcolo.h
new file mode 100644
index 0000000..d8e0d9a
--- /dev/null
+++ b/block/blkcolo.h
@@ -0,0 +1,35 @@
+/*
+ * Block driver for COLO
+ *
+ * Copyright Fujitsu, Corp. 2015
+ * Copyright (c) 2015 Intel Corporation
+ * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * Authors:
+ * Wen Congyang <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef BLOCK_BLKCOLO_H
+#define BLOCK_BLKCOLO_H
+
+typedef struct disk_buffer {
+ QSIMPLEQ_HEAD(, buffered_request_state) head;
+} disk_buffer;
+
+bool buffer_has_empty_range(disk_buffer *disk_buffer,
+ uint64_t sector, int nb_sectors);
+void qiov_read_from_buffer(disk_buffer *disk_buffer, QEMUIOVector *qiov,
+ uint64_t sector, int nb_sectors);
+void qiov_write_to_buffer(disk_buffer *disk_buffer, QEMUIOVector *qiov,
+ uint64_t sector, int nb_sectors, bool overwrite);
+void flush_buffered_data_to_disk(disk_buffer *disk_buffer,
+ BlockDriverState *bs);
+
+void init_disk_buffer(disk_buffer *disk_buffer);
+void clear_all_buffered_data(disk_buffer *disk_buffer);
+
+#endif
--
2.1.0
- Re: [Qemu-devel] [RFC PATCH 09/14] block: give backing image its own BlockBackend, (continued)
- [Qemu-devel] [RFC PATCH 07/14] NBD client: implement block driver interfaces for block replication, Wen Congyang, 2015/02/11
- [Qemu-devel] [RFC PATCH 10/14] allow the backing image access the origin BlockDriverState, Wen Congyang, 2015/02/11
- [Qemu-devel] [RFC PATCH 11/14] allow writing to the backing file, Wen Congyang, 2015/02/11
- [Qemu-devel] [RFC PATCH 13/14] COW: move cow interfaces to a seperate file, Wen Congyang, 2015/02/11
- [Qemu-devel] [RFC PATCH 08/14] block: add a new API to create a hidden BlockBackend, Wen Congyang, 2015/02/11
- [Qemu-devel] [RFC PATCH 12/14] Add disk buffer for block replication,
Wen Congyang <=
- [Qemu-devel] [RFC PATCH 14/14] COLO: implement a new block driver, Wen Congyang, 2015/02/11
- Re: [Qemu-devel] [RFC PATCH 00/14] Block replication for continuous checkpoints, Paolo Bonzini, 2015/02/18