qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH] Modify qemu-img to mount locally disk image using N


From: Laurent Vivier
Subject: [Qemu-devel] [PATCH] Modify qemu-img to mount locally disk image using NBD (v2)
Date: Mon, 10 Mar 2008 13:24:14 +0100

This patch is a new version of qemu-img using NBD device to mount Qemu
disk image.

To not hang on UP system, it needs following patch:
http://article.gmane.org/gmane.linux.drivers.nbd.general/42
If you want to use loop to see partitions, you need this patch:
http://article.gmane.org/gmane.linux.kernel/651269
otherwise use kpartx (see kpartx package of your distro).

This patch implements in qemu-img the client and the server of the nbd protocol.
Moreover, to avoid to specify a port to use, it creates a UNIX socket instead of
a INET socket.

It adds two actions to qemu-img:
- bind, to bind a disk image to a NBD device,

  qemu-img bind [-d] [-f fmt] device filename

     ('-d' to daemonize)

- unbind, to unbind it.

  qemu-img unbind device

Example:

# qemu-img bind -d /dev/nbd0 fc6.qcow2

[here you can use any tools you want to see partitions: kpartx, patched loop 
device (see LKML), patched NBD driver (I can provide the patch, I use this in 
the following example)]

# mount /dev/nbd0p1 /mnt
# ls /mnt
config-2.6.22.4-45.fc6      lost+found                  vmlinuz
config-2.6.22.5-49.fc6      System.map                  vmlinuz-2.6.22.4-45.fc6
grub                        System.map-2.6.22.4-45.fc6  vmlinuz-2.6.22.5
initrd-2.6.22.4-45.fc6.img  System.map-2.6.22.5         vmlinuz-2.6.22.5-49.fc6
initrd-2.6.22.5-49.fc6.img  System.map-2.6.22.5-49.fc6  vmlinuz-2.6.22.5.old
initrd-2.6.22.5.img         System.map-2.6.22.5.old
# umount /mnt
# qemu-img unbind /dev/nbd0
/dev/nbd0 disconnected

Laurent
---
 qemu-img.c |  426 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 426 insertions(+)

Index: qemu/qemu-img.c
===================================================================
--- qemu.orig/qemu-img.c        2008-03-10 09:58:24.000000000 +0100
+++ qemu/qemu-img.c     2008-03-10 11:39:35.000000000 +0100
@@ -25,6 +25,22 @@
 #include "block_int.h"
 #include <assert.h>
 
+#ifdef __linux__
+#define NBD_SERVER
+#endif
+
+#ifdef NBD_SERVER
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <arpa/inet.h>
+#include <linux/types.h>
+#include <linux/nbd.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#endif /* NBD_SERVER */
+
 #ifdef _WIN32
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
@@ -92,6 +108,10 @@ static void help(void)
            "  commit [-f fmt] filename\n"
            "  convert [-c] [-e] [-6] [-f fmt] [-O output_fmt] filename 
[filename2 [...]] output_filename\n"
            "  info [-f fmt] filename\n"
+#ifdef NBD_SERVER
+           "  bind [-d] [-f fmt] device filename\n"
+           "  unbind device\n"
+#endif
            "\n"
            "Command parameters:\n"
            "  'filename' is a disk image filename\n"
@@ -105,6 +125,9 @@ static void help(void)
            "  '-c' indicates that target image must be compressed (qcow format 
only)\n"
            "  '-e' indicates that the target image must be encrypted (qcow 
format only)\n"
            "  '-6' indicates that the target image must use compatibility 
level 6 (vmdk format only)\n"
+#ifdef NBD_SERVER
+           "  '-d' daemonize (bind only)\n"
+#endif
            );
     printf("\nSupported format:");
     bdrv_iterate_format(format_print, NULL);
@@ -602,6 +625,403 @@ static int img_convert(int argc, char **
     return 0;
 }
 
+#ifdef NBD_SERVER
+
+//#define DEBUG_SERVER
+
+#ifdef DEBUG_SERVER
+#define DPRINTF(fmt, args...) \
+do { printf("img-bind: " fmt , ##args); } while (0)
+#else
+#define DPRINTF(fmt, args...) do {} while(0)
+#endif
+
+#define BUFSIZE (1024*1024)
+
+static int nbd_receive(int fd, char *buf, size_t len)
+{
+    ssize_t rd;
+
+    while (len > 0) {
+        rd = read(fd, buf, len);
+        if (rd == -1)
+            return -errno;
+        len -= rd;
+        buf += rd;
+    }
+    return 0;
+}
+
+static int nbd_send(int fd, char *buf, size_t len)
+{
+    ssize_t written;
+
+    while (len > 0) {
+        written = write(fd, buf, len);
+        if (written == -1)
+            return -errno;
+        len -= written;
+        buf += written;
+    }
+    return 0;
+}
+
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define htonll(x) (x)
+# define ntohll(x) (x)
+#else
+# define htonll(x) __bswap_64(x)
+# define ntohll(x) __bswap_64(x)
+#endif
+
+static void bdrv_loop(BlockDriverState *drv, int net)
+{
+    struct nbd_request request;
+    struct nbd_reply reply;
+    char *buf;
+    int ret;
+
+    buf = qemu_malloc(BUFSIZE);
+    if (buf == NULL)
+        return;
+
+    while(1) {
+        uint32_t len;
+        uint64_t from;
+
+        ret = nbd_receive(net, (char*)&request, sizeof(request));
+        if (ret < 0) {
+            DPRINTF("read failed %d (%d)\n", ret, errno);
+            break;
+        }
+
+        DPRINTF("request magic %x type %d from %lx len %x\n",
+                ntohl(request.magic), ntohl(request.type),
+                ntohll(request.from), ntohl(request.len));
+
+        if (request.magic != htonl(NBD_REQUEST_MAGIC)) {
+            DPRINTF("Bad Magic\n");
+            break;
+        }
+
+        if (request.type == htonl(NBD_CMD_DISC)) {
+            /* disconnect */
+            DPRINTF("Command Disconnect\n");
+            break;
+        }
+
+        len = ntohl(request.len);
+        if (len > BUFSIZE - sizeof(struct nbd_reply)) {
+            DPRINTF("len too big %d\n", len);
+            break;
+        }
+
+        /* prepare reply */
+
+        reply.magic = htonl(NBD_REPLY_MAGIC);
+        reply.error = 0;
+        memcpy(reply.handle, request.handle, sizeof(reply.handle));
+
+        /* do I/O */
+
+        from = ntohll(request.from);
+
+        switch(ntohl(request.type)) {
+        case NBD_CMD_READ:
+            reply.error = -bdrv_read(drv, from >> 9,
+                                     buf + sizeof(struct nbd_reply), len >> 9);
+            if (reply.error != 0) {
+                DPRINTF("bdrv_read error %d\n", reply.error);
+            }
+            memcpy(buf, &reply, sizeof(struct nbd_reply));
+            reply.error = htonl(reply.error);
+            ret = nbd_send(net, buf, len + sizeof(struct nbd_reply));
+            if (ret < 0) {
+                DPRINTF("NBD_CMD_READ: cannot sent result\n");
+                return;
+            }
+            break;
+
+        case NBD_CMD_WRITE:
+            ret = nbd_receive(net, buf, len);
+            if (ret < 0) {
+                DPRINTF("NBD_CMD_WRITE: cannot receive block %d != %d\n", ret, 
len);
+                goto out;
+            }
+            reply.error = -bdrv_write(drv, from >> 9, buf, len >> 9);
+
+            if (reply.error != 0) {
+                       DPRINTF("bdrv_write error %d\n", reply.error);
+            }
+            reply.error = htonl(reply.error);
+            ret = nbd_send(net, (char*)&reply, sizeof(reply));
+            if (ret < 0) {
+                DPRINTF("NBD_CMD_WRITE: cannot sent result %d != %d\n", ret, 
len);
+                goto out;
+            }
+            break;
+        }
+    }
+out:
+    qemu_free(buf);
+    DPRINTF("bdrv_loop: exit\n");
+}
+
+static void show_part(char *device)
+{
+    int nbd;
+    sleep(1);
+    nbd = open(device, O_RDWR);
+    if (nbd == -1)
+        return;
+    ioctl(nbd, BLKRRPART, NULL);
+    close(nbd);
+}
+
+void server_loop(BlockDriverState *drv, char *device)
+{
+    struct sockaddr_un addrin;
+    uint64_t total_sectors;
+    pid_t pid;
+    int sock;
+    int ret;
+    int net;
+    int nbd;
+
+    memset(&addrin, 0, sizeof(addrin));
+    addrin.sun_family = AF_UNIX;
+    sprintf(addrin.sun_path, "/var/lock/qemu-img-%s", basename(device));
+    DPRINTF("socket: %s\n", addrin.sun_path);
+
+    pid = fork();
+    if (pid < 0) {
+        printf("Cannot fork\n");
+        bdrv_delete(drv);
+        return;
+    }
+    if (pid == 0) {
+        socklen_t addrinlen;
+        int yes = 1;
+
+        bdrv_get_geometry(drv, &total_sectors);
+
+        /* child */
+
+        sock = socket(PF_UNIX, SOCK_STREAM, 0);
+        if (sock == -1) {
+            fprintf(stderr, "Cannot create socket\n");
+            goto child_cleanup2;
+        }
+
+        ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
+                         &yes, sizeof(int));
+        if (ret == -1) {
+            fprintf(stderr, "Cannot setsockopt\n");
+            goto child_cleanup1;
+        }
+
+        DPRINTF("bind\n");
+        ret = bind(sock, (struct sockaddr *) &addrin, sizeof(addrin));
+        if (ret == -1) {
+            fprintf(stderr, "Cannot bind socket\n");
+            goto child_cleanup1;
+        }
+
+        ret = listen(sock, 1);
+        if (ret == -1) {
+            fprintf(stderr, "Cannot listen socket\n");
+            goto child_cleanup1;
+        }
+
+        DPRINTF("accept\n");
+
+        printf("Starting qemu image server\n");
+
+        net = accept(sock, &addrin, &addrinlen);
+        if (ret == -1) {
+            fprintf(stderr, "accept failed\n");
+            goto child_cleanup1;
+        }
+
+        ret = write(net, &total_sectors, sizeof(total_sectors));
+        if (ret == sizeof(total_sectors)) {
+            if (fork() == 0) {
+                show_part(device);
+                exit(0);
+            }
+            DPRINTF("bdrv_loop\n");
+            bdrv_loop(drv, net);
+        }
+
+        close(net);
+child_cleanup1:
+        close(sock);
+child_cleanup2:
+        bdrv_delete(drv);
+        unlink(addrin.sun_path);
+
+        return;
+    }
+
+    bdrv_delete(drv);
+
+    /* parent */
+
+    sock = socket(PF_UNIX, SOCK_STREAM, 0);
+    if (sock == -1) {
+        fprintf(stderr, "Cannot create socket\n");
+        goto exit;
+    }
+
+    do {
+        ret = connect(sock, (struct sockaddr *) &addrin, sizeof(addrin));
+        if (ret == -1) {
+           if (errno != ENOENT && errno != ECONNREFUSED) {
+                fprintf(stderr, "Cannot create connect (%d: %s)\n",
+                        errno, strerror(errno));
+                goto exit;
+            }
+            sleep(1);
+        }
+       /* wait children */
+    } while (ret == -1);
+
+    nbd = open(device, O_RDWR);
+    if (nbd == -1) {
+        fprintf(stderr, "Cannot open %s\n", device);
+        goto exit;
+    }
+
+    ret = read(sock, &total_sectors, sizeof(total_sectors));
+    if (ret != sizeof(total_sectors)) {
+        fprintf(stderr, "Cannot read image disk size\n");
+        goto closeall;
+    }
+
+    ret = ioctl(nbd, NBD_SET_BLKSIZE, 512);
+    if (ret == -1) {
+        fprintf(stderr, "Cannot set block size\n");
+        goto closeall;
+    }
+
+    ret = ioctl(nbd, NBD_SET_SIZE_BLOCKS, total_sectors);
+    if (ret == -1) {
+        fprintf(stderr, "Cannot set device size\n");
+        goto closeall;
+    }
+
+    ret = ioctl(nbd, NBD_CLEAR_SOCK);
+    if (ret == -1) {
+        fprintf(stderr, "Cannot clear sock\n");
+        goto closeall;
+    }
+
+    ret = ioctl(nbd, NBD_SET_SOCK, sock);
+    if (ret == -1) {
+        fprintf(stderr, "Cannot set sock\n");
+        goto closeall;
+    }
+
+    printf("Starting NBD interface\n");
+
+    ret = ioctl(nbd, NBD_DO_IT);
+    if (ret == -1)
+        fprintf(stderr, "NBD_DO_IT failed %d\n", errno);
+
+    ioctl(nbd, NBD_CLEAR_QUE);
+
+    ioctl(nbd, NBD_CLEAR_SOCK);
+
+closeall:
+    close(nbd);
+exit:
+    kill(pid, SIGTERM);
+    unlink(addrin.sun_path);
+}
+
+static int img_bind(int argc, char **argv)
+{
+    int c;
+    char *fmt;
+    char *device;
+    char *filename;
+    int daemonize = 0;
+    BlockDriverState *drv;
+
+    fmt = NULL;
+    for(;;) {
+        c = getopt(argc, argv, "f:hd");
+        if (c == -1)
+            break;
+        switch(c) {
+        case 'h':
+            help();
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 'd':
+            daemonize = 1;
+            break;
+        }
+    }
+
+    if (argc - optind < 2)
+        help();
+
+    device = argv[optind];
+    filename = argv[optind + 1];
+
+    drv = bdrv_new_open(filename, fmt);
+    if (!drv) {
+        fprintf(stderr, "Cannot open %s\n", filename);
+        return 1;
+    }
+
+#ifndef DEBUG_SERVER
+    if (daemonize)
+        daemon(0, 0);
+#endif
+
+    server_loop(drv, device);
+
+    return 0;
+}
+
+static int img_unbind(int argc, char **argv)
+{
+    char *device;
+    int nbd;
+    int ret;
+
+    if (argc - optind < 1)
+        help();
+
+    device = argv[optind];
+
+    nbd = open(device, O_RDWR);
+    if (nbd == -1)
+        error("Cannot open %s", device);
+
+    ret = ioctl(nbd, NBD_CLEAR_QUE);
+    if (ret)
+        error("ioctl(NBD_CLEAR_QUE) failed");
+    ret = ioctl(nbd, NBD_DISCONNECT);
+    if (ret)
+        error("ioctl(NBD_DISCONNECT) failed");
+    ret = ioctl(nbd, NBD_CLEAR_SOCK);
+    if (ret)
+        error("ioctl(NBD_CLEAR_SOCK) failed");
+
+    close(nbd);
+
+    printf("%s disconnected\n", device);
+
+    return 0;
+}
+
+#endif /* NBD_SERVER */
+
 #ifdef _WIN32
 static int64_t get_allocated_file_size(const char *filename)
 {
@@ -746,6 +1166,12 @@ int main(int argc, char **argv)
         img_convert(argc, argv);
     } else if (!strcmp(cmd, "info")) {
         img_info(argc, argv);
+#ifdef NBD_SERVER
+    } else if (!strcmp(cmd, "bind")) {
+        img_bind(argc, argv);
+    } else if (!strcmp(cmd, "unbind")) {
+        img_unbind(argc, argv);
+#endif /* NBD_SERVER */
     } else {
         help();
     }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]