qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 2/2] fix bdrv_aio_read API breakage in qcow2


From: Andrea Arcangeli
Subject: [Qemu-devel] [PATCH 2/2] fix bdrv_aio_read API breakage in qcow2
Date: Mon, 1 Sep 2008 12:53:14 +0200

While testing the dma cancel patch (1/2) I noticed the qemu_aio_flush
was doing nothing at all. And a flood of cmd_writeb commands leading
to a noop-invocation of qemu_aio_flush were executed.

I tracked it down and the major bug in this area (not sure if it could
be the one responsible of the fs corruption) is that if aio callback
is run before the bdrv_aio_read returns, the bm->aiocb of ide will be
not-null and set to the already completed aiocb, so after that
cmd_writeb will be mistaken for a dma cancellation.

In short all 'memset;goto redo' places must be fixed to use the bh and
not to call the callback in the context of bdrv_aio_read or the
bdrv_aio_read model falls apart. Reading from qcow2 holes is possible
with phyisical readahead (kind of breada in linux buffer cache).

All the implications of this bug aren't clear due the amount of code
affected (qcow2 itself with hd_aiocb in qcow_aio_cancel, scsi
etc..). IDE might have been safe by pure luck because of a DMAING
bitflag check before canceling the I/O, otherwise double free would
happen there too. This makes the 1/2 behave perfectly good (aiocb is
always null after qemu_aio_flush returns).

Same bug exists in qcow of course, can be fixed later as it's less
urgent.

Signed-off-by: Andrea Arcangeli <address@hidden>

Index: Makefile.target
===================================================================
--- Makefile.target     (revision 5119)
+++ Makefile.target     (working copy)
@@ -474,9 +474,9 @@
 
 OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o
 ifdef CONFIG_WIN32
-OBJS+=block-raw-win32.o
+OBJS+=block-raw-win32.o block-qcow2.o
 else
-OBJS+=block-raw-posix.o
+OBJS+=block-raw-posix.o block-qcow2.o
 endif
 
 LIBS+=-lz
Index: Makefile
===================================================================
--- Makefile    (revision 5119)
+++ Makefile    (working copy)
@@ -46,7 +46,7 @@
 BLOCK_OBJS=cutils.o qemu-malloc.o
 BLOCK_OBJS+=block-cow.o block-qcow.o aes.o block-vmdk.o block-cloop.o
 BLOCK_OBJS+=block-dmg.o block-bochs.o block-vpc.o block-vvfat.o
-BLOCK_OBJS+=block-qcow2.o block-parallels.o
+BLOCK_OBJS+=block-parallels.o
 ifndef CONFIG_WIN32
 BLOCK_OBJS+=block-nbd.o
 endif
@@ -175,9 +175,9 @@
 
 QEMU_IMG_BLOCK_OBJS = $(BLOCK_OBJS)
 ifdef CONFIG_WIN32
-QEMU_IMG_BLOCK_OBJS += qemu-img-block-raw-win32.o
+QEMU_IMG_BLOCK_OBJS += qemu-img-block-raw-win32.o qemu-img-block-qcow2.o
 else
-QEMU_IMG_BLOCK_OBJS += nbd.o qemu-img-block-raw-posix.o
+QEMU_IMG_BLOCK_OBJS += nbd.o qemu-img-block-raw-posix.o qemu-img-block-qcow2.o
 endif
 
 ######################################################################
@@ -195,7 +195,8 @@
        $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_NBD -c -o $@ $<
 
 qemu-nbd$(EXESUF):  qemu-nbd.o qemu-nbd-nbd.o qemu-img-block.o \
-                   osdep.o qemu-nbd-block-raw-posix.o $(BLOCK_OBJS)
+                   osdep.o qemu-nbd-block-raw-posix.o \
+                   qemu-nbd-block-qcow2.o $(BLOCK_OBJS)
        $(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS)
 
 # dyngen host tool
Index: block-qcow2.c
===================================================================
--- block-qcow2.c       (revision 5119)
+++ block-qcow2.c       (working copy)
@@ -1169,8 +1169,20 @@
     uint64_t cluster_offset;
     uint8_t *cluster_data;
     BlockDriverAIOCB *hd_aiocb;
+    QEMUBH *bh;
 } QCowAIOCB;
 
+#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
+static void qcow_aio_read_cb(void *opaque, int ret);
+static void qcow_aio_read_bh(void *opaque)
+{
+    QCowAIOCB *acb = opaque;
+    qemu_bh_delete(acb->bh);
+    acb->bh = NULL;
+    qcow_aio_read_cb(opaque, 0);
+}
+#endif
+
 static void qcow_aio_read_cb(void *opaque, int ret)
 {
     QCowAIOCB *acb = opaque;
@@ -1186,7 +1198,9 @@
         return;
     }
 
+#if defined(QEMU_IMG) || defined(QEMU_NBD)
  redo:
+#endif
     /* post process the read buffer */
     if (!acb->cluster_offset) {
         /* nothing to do */
@@ -1227,12 +1241,38 @@
                 if (acb->hd_aiocb == NULL)
                     goto fail;
             } else {
+#if defined(QEMU_IMG) || defined(QEMU_NBD)
                 goto redo;
+#else
+               if (acb->bh) {
+                   ret = -EIO;
+                   goto fail;
+               }
+               acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
+               if (!acb->bh) {
+                   ret = -EIO;
+                   goto fail;
+               }
+               qemu_bh_schedule(acb->bh);
+#endif
             }
         } else {
             /* Note: in this case, no need to wait */
             memset(acb->buf, 0, 512 * acb->n);
+#if defined(QEMU_IMG) || defined(QEMU_NBD)
             goto redo;
+#else
+           if (acb->bh) {
+               ret = -EIO;
+               goto fail;
+           }
+           acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
+           if (!acb->bh) {
+               ret = -EIO;
+               goto fail;
+           }
+           qemu_bh_schedule(acb->bh);
+#endif
         }
     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
         /* add AIO support for compressed blocks ? */
@@ -1240,7 +1280,20 @@
             goto fail;
         memcpy(acb->buf,
                s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
+#if defined(QEMU_IMG) || defined(QEMU_NBD)
         goto redo;
+#else
+       if (acb->bh) {
+           ret = -EIO;
+           goto fail;
+       }
+       acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
+       if (!acb->bh) {
+           ret = -EIO;
+           goto fail;
+       }
+       qemu_bh_schedule(acb->bh);
+#endif
     } else {
         if ((acb->cluster_offset & 511) != 0) {
             ret = -EIO;





reply via email to

[Prev in Thread] Current Thread [Next in Thread]