[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [4599] Align file accesses with cache=off (O_DIRECT) (Kevin
From: |
Fabrice Bellard |
Subject: |
[Qemu-devel] [4599] Align file accesses with cache=off (O_DIRECT) (Kevin Wolf, Laurent Vivier) |
Date: |
Wed, 28 May 2008 09:51:10 +0000 |
Revision: 4599
http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4599
Author: bellard
Date: 2008-05-28 09:51:09 +0000 (Wed, 28 May 2008)
Log Message:
-----------
Align file accesses with cache=off (O_DIRECT) (Kevin Wolf, Laurent Vivier)
Modified Paths:
--------------
trunk/block-raw-posix.c
Modified: trunk/block-raw-posix.c
===================================================================
--- trunk/block-raw-posix.c 2008-05-27 21:18:39 UTC (rev 4598)
+++ trunk/block-raw-posix.c 2008-05-28 09:51:09 UTC (rev 4599)
@@ -70,6 +70,8 @@
#define FTYPE_CD 1
#define FTYPE_FD 2
+#define ALIGNED_BUFFER_SIZE (32 * 512)
+
/* if the FD is not accessed during that time (in ms), we try to
reopen it to see if the disk has been changed */
#define FD_OPEN_TIMEOUT 1000
@@ -86,6 +88,9 @@
int fd_got_error;
int fd_media_changed;
#endif
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+ uint8_t* aligned_buf;
+#endif
} BDRVRawState;
static int fd_open(BlockDriverState *bs);
@@ -121,6 +126,17 @@
return ret;
}
s->fd = fd;
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+ s->aligned_buf = NULL;
+ if (flags & BDRV_O_DIRECT) {
+ s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
+ if (s->aligned_buf == NULL) {
+ ret = -errno;
+ close(fd);
+ return ret;
+ }
+ }
+#endif
return 0;
}
@@ -141,7 +157,14 @@
#endif
*/
-static int raw_pread(BlockDriverState *bs, int64_t offset,
+/*
+ * offset and count are in bytes, but must be multiples of 512 for files
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
@@ -194,7 +217,14 @@
return ret;
}
-static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+/*
+ * offset and count are in bytes, but must be multiples of 512 for files
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
const uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
@@ -230,6 +260,164 @@
return ret;
}
+
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+/*
+ * offset and count are in bytes and possibly not aligned. For files opened
+ * with O_DIRECT, necessary alignments are ensured before calling
+ * raw_pread_aligned to do the actual read.
+ */
+static int raw_pread(BlockDriverState *bs, int64_t offset,
+ uint8_t *buf, int count)
+{
+ BDRVRawState *s = bs->opaque;
+ int size, ret, shift, sum;
+
+ sum = 0;
+
+ if (s->aligned_buf != NULL) {
+
+ if (offset & 0x1ff) {
+ /* align offset on a 512 bytes boundary */
+
+ shift = offset & 0x1ff;
+ size = (shift + count + 0x1ff) & ~0x1ff;
+ if (size > ALIGNED_BUFFER_SIZE)
+ size = ALIGNED_BUFFER_SIZE;
+ ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
+ if (ret < 0)
+ return ret;
+
+ size = 512 - shift;
+ if (size > count)
+ size = count;
+ memcpy(buf, s->aligned_buf + shift, size);
+
+ buf += size;
+ offset += size;
+ count -= size;
+ sum += size;
+
+ if (count == 0)
+ return sum;
+ }
+ if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
+
+ /* read on aligned buffer */
+
+ while (count) {
+
+ size = (count + 0x1ff) & ~0x1ff;
+ if (size > ALIGNED_BUFFER_SIZE)
+ size = ALIGNED_BUFFER_SIZE;
+
+ ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
+ if (ret < 0)
+ return ret;
+
+ size = ret;
+ if (size > count)
+ size = count;
+
+ memcpy(buf, s->aligned_buf, size);
+
+ buf += size;
+ offset += size;
+ count -= size;
+ sum += size;
+ }
+
+ return sum;
+ }
+ }
+
+ return raw_pread_aligned(bs, offset, buf, count) + sum;
+}
+
+/*
+ * offset and count are in bytes and possibly not aligned. For files opened
+ * with O_DIRECT, necessary alignments are ensured before calling
+ * raw_pwrite_aligned to do the actual write.
+ */
+static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+ const uint8_t *buf, int count)
+{
+ BDRVRawState *s = bs->opaque;
+ int size, ret, shift, sum;
+
+ sum = 0;
+
+ if (s->aligned_buf != NULL) {
+
+ if (offset & 0x1ff) {
+ /* align offset on a 512 bytes boundary */
+ shift = offset & 0x1ff;
+ ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
+ if (ret < 0)
+ return ret;
+
+ size = 512 - shift;
+ if (size > count)
+ size = count;
+ memcpy(s->aligned_buf + shift, buf, size);
+
+ ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
+ if (ret < 0)
+ return ret;
+
+ buf += size;
+ offset += size;
+ count -= size;
+ sum += size;
+
+ if (count == 0)
+ return sum;
+ }
+ if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
+
+ while ((size = (count & ~0x1ff)) != 0) {
+
+ if (size > ALIGNED_BUFFER_SIZE)
+ size = ALIGNED_BUFFER_SIZE;
+
+ memcpy(s->aligned_buf, buf, size);
+
+ ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
+ if (ret < 0)
+ return ret;
+
+ buf += ret;
+ offset += ret;
+ count -= ret;
+ sum += ret;
+ }
+ /* here, count < 512 because (count & ~0x1ff) == 0 */
+ if (count) {
+ ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
+ if (ret < 0)
+ return ret;
+ memcpy(s->aligned_buf, buf, count);
+
+ ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
+ if (ret < 0)
+ return ret;
+ if (count < ret)
+ ret = count;
+
+ sum += ret;
+ }
+ return sum;
+ }
+ }
+ return raw_pwrite_aligned(bs, offset, buf, count) + sum;
+}
+
+#else
+#define raw_pread raw_pread_aligned
+#define raw_pwrite raw_pwrite_aligned
+#endif
+
+
/***********************************************************/
/* Unix AIO using POSIX AIO */
@@ -237,6 +425,7 @@
BlockDriverAIOCB common;
struct aiocb aiocb;
struct RawAIOCB *next;
+ int ret;
} RawAIOCB;
static int aio_sig_num = SIGUSR2;
@@ -397,12 +586,38 @@
return acb;
}
+#ifndef QEMU_IMG
+static void raw_aio_em_cb(void* opaque)
+{
+ RawAIOCB *acb = opaque;
+ acb->common.cb(acb->common.opaque, acb->ret);
+ qemu_aio_release(acb);
+}
+#endif
+
static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
int64_t sector_num, uint8_t *buf, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
RawAIOCB *acb;
+ /*
+ * If O_DIRECT is used and the buffer is not aligned fall back
+ * to synchronous IO.
+ */
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+ BDRVRawState *s = bs->opaque;
+
+ if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
+ QEMUBH *bh;
+ acb = qemu_aio_get(bs, cb, opaque);
+ acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
+ bh = qemu_bh_new(raw_aio_em_cb, acb);
+ qemu_bh_schedule(bh);
+ return &acb->common;
+ }
+#endif
+
acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
if (!acb)
return NULL;
@@ -419,6 +634,23 @@
{
RawAIOCB *acb;
+ /*
+ * If O_DIRECT is used and the buffer is not aligned fall back
+ * to synchronous IO.
+ */
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+ BDRVRawState *s = bs->opaque;
+
+ if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
+ QEMUBH *bh;
+ acb = qemu_aio_get(bs, cb, opaque);
+ acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
+ bh = qemu_bh_new(raw_aio_em_cb, acb);
+ qemu_bh_schedule(bh);
+ return &acb->common;
+ }
+#endif
+
acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
if (!acb)
return NULL;
@@ -462,6 +694,10 @@
if (s->fd >= 0) {
close(s->fd);
s->fd = -1;
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+ if (s->aligned_buf != NULL)
+ qemu_free(s->aligned_buf);
+#endif
}
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Qemu-devel] [4599] Align file accesses with cache=off (O_DIRECT) (Kevin Wolf, Laurent Vivier),
Fabrice Bellard <=