qemu-block
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-block] [PATCH 8/8] nbd: Minimal structured read for client


From: Vladimir Sementsov-Ogievskiy
Subject: [Qemu-block] [PATCH 8/8] nbd: Minimal structured read for client
Date: Mon, 25 Sep 2017 16:58:01 +0300

Minimal implementation: drop most of additional error information.

Signed-off-by: Vladimir Sementsov-Ogievskiy <address@hidden>
---
 block/nbd-client.h  |   2 +
 include/block/nbd.h |  15 ++++-
 block/nbd-client.c  |  97 +++++++++++++++++++++++++-----
 nbd/client.c        | 169 +++++++++++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 249 insertions(+), 34 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index b435754b82..9e178de510 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -35,6 +35,8 @@ typedef struct NBDClientSession {
     NBDClientRequest requests[MAX_NBD_REQUESTS];
     NBDReply reply;
     bool quit;
+
+    bool structured_reply;
 } NBDClientSession;
 
 NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 314f2f9bbc..7604e80c49 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -57,11 +57,17 @@ struct NBDRequest {
 };
 typedef struct NBDRequest NBDRequest;
 
-struct NBDReply {
+typedef struct NBDReply {
+    bool simple;
     uint64_t handle;
     uint32_t error;
-};
-typedef struct NBDReply NBDReply;
+
+    uint16_t flags;
+    uint16_t type;
+    uint32_t tail_length;
+    uint64_t offset;
+    uint32_t hole_size;
+} NBDReply;
 
 typedef struct NBDSimpleReply {
     uint32_t magic;  /* NBD_SIMPLE_REPLY_MAGIC */
@@ -178,12 +184,15 @@ enum {
 
 #define NBD_SREP_TYPE_NONE          0
 #define NBD_SREP_TYPE_OFFSET_DATA   1
+#define NBD_SREP_TYPE_OFFSET_HOLE   2
 #define NBD_SREP_TYPE_ERROR         NBD_SREP_ERR(1)
+#define NBD_SREP_TYPE_ERROR_OFFSET  NBD_SREP_ERR(2)
 
 /* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */
 struct NBDExportInfo {
     /* Set by client before nbd_receive_negotiate() */
     bool request_sizes;
+    bool structured_reply;
     /* Set by server results during nbd_receive_negotiate() */
     uint64_t size;
     uint16_t flags;
diff --git a/block/nbd-client.c b/block/nbd-client.c
index e4f0c789f4..bdf9299bb9 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -179,9 +179,10 @@ err:
     return rc;
 }
 
-static int nbd_co_receive_reply(NBDClientSession *s,
-                                uint64_t handle,
-                                QEMUIOVector *qiov)
+static int nbd_co_receive_1_reply_or_chunk(NBDClientSession *s,
+                                           uint64_t handle,
+                                           bool *cont,
+                                           QEMUIOVector *qiov)
 {
     int ret;
     int i = HANDLE_TO_INDEX(s, handle);
@@ -191,29 +192,95 @@ static int nbd_co_receive_reply(NBDClientSession *s,
     qemu_coroutine_yield();
     s->requests[i].receiving = false;
     if (!s->ioc || s->quit) {
-        ret = -EIO;
-    } else {
-        assert(s->reply.handle == handle);
-        ret = -s->reply.error;
-        if (qiov && s->reply.error == 0) {
+        *cont = false;
+        return -EIO;
+    }
+
+    assert(s->reply.handle == handle);
+    *cont = !(s->reply.simple || (s->reply.flags & NBD_SREP_FLAG_DONE));
+    ret = -s->reply.error;
+    if (ret < 0) {
+        goto out;
+    }
+
+    if (s->reply.simple) {
+        if (qiov) {
             if (qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov,
-                                      NULL) < 0) {
-                ret = -EIO;
-                s->quit = true;
+                                      NULL) < 0)
+            {
+                goto fatal;
             }
         }
+        goto out;
+    }
 
-        /* Tell the read handler to read another header.  */
-        s->reply.handle = 0;
+    /* here we deal with successful structured reply */
+    switch (s->reply.type) {
+        QEMUIOVector sub_qiov;
+    case NBD_SREP_TYPE_OFFSET_DATA:
+        if (!qiov || s->reply.offset + s->reply.tail_length > qiov->size) {
+            goto fatal;
+        }
+        qemu_iovec_init(&sub_qiov, qiov->niov);
+        qemu_iovec_concat(&sub_qiov, qiov, s->reply.offset,
+                          s->reply.tail_length);
+        ret = qio_channel_readv_all(s->ioc, sub_qiov.iov, sub_qiov.niov, NULL);
+        qemu_iovec_destroy(&sub_qiov);
+        if (ret < 0) {
+            goto fatal;
+        }
+        assert(ret == 0);
+        break;
+    case NBD_SREP_TYPE_OFFSET_HOLE:
+        if (!qiov || s->reply.offset + s->reply.hole_size > qiov->size) {
+            goto fatal;
+        }
+        qemu_iovec_memset(qiov, s->reply.offset, 0, s->reply.hole_size);
+        break;
+    case NBD_SREP_TYPE_NONE:
+        break;
+    default:
+        goto fatal;
     }
 
-    s->requests[i].coroutine = NULL;
+out:
+    /* For assert at loop start in nbd_read_reply_entry */
+    s->reply.handle = 0;
+
+    if (s->read_reply_co) {
+        aio_co_wake(s->read_reply_co);
+    }
+
+    return ret;
 
-    /* Kick the read_reply_co to get the next reply.  */
+fatal:
+    /* protocol or ioc failure */
+    *cont = false;
+    s->quit = true;
     if (s->read_reply_co) {
         aio_co_wake(s->read_reply_co);
     }
 
+    return -EIO;
+}
+
+static int nbd_co_receive_reply(NBDClientSession *s,
+                                uint64_t handle,
+                                QEMUIOVector *qiov)
+{
+    int ret = 0;
+    int i = HANDLE_TO_INDEX(s, handle);
+    bool cont = true;
+
+    while (cont) {
+        int rc = nbd_co_receive_1_reply_or_chunk(s, handle, &cont, qiov);
+        if (rc < 0 && ret == 0) {
+            ret = rc;
+        }
+    }
+
+    s->requests[i].coroutine = NULL;
+
     qemu_co_mutex_lock(&s->send_mutex);
     s->in_flight--;
     qemu_co_queue_next(&s->free_sema);
diff --git a/nbd/client.c b/nbd/client.c
index 51ae492e92..880eb17b85 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -719,6 +719,13 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name,
         if (fixedNewStyle) {
             int result;
 
+            result = nbd_request_simple_option(ioc, NBD_OPT_STRUCTURED_REPLY,
+                                               errp);
+            if (result < 0) {
+                goto fail;
+            }
+            info->structured_reply = result > 0;
+
             /* Try NBD_OPT_GO first - if it works, we are done (it
              * also gives us a good message if the server requires
              * TLS).  If it is not available, fall back to
@@ -759,6 +766,12 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name,
             goto fail;
         }
         be16_to_cpus(&info->flags);
+
+        if (info->structured_reply && !(info->flags & NBD_CMD_FLAG_DF)) {
+            error_setg(errp, "Structured reply is negotiated, "
+                             "but DF flag is not.");
+            goto fail;
+        }
     } else if (magic == NBD_CLIENT_MAGIC) {
         uint32_t oldflags;
 
@@ -942,6 +955,128 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
     return nbd_write(ioc, buf, sizeof(buf), NULL);
 }
 
+/* nbd_receive_simple_reply
+ * Read simple reply except magic field (which should be already read)
+ */
+static int nbd_receive_simple_reply(QIOChannel *ioc, NBDReply *reply,
+                                    Error **errp)
+{
+    NBDSimpleReply simple_reply;
+    int ret;
+
+    ret = nbd_read(ioc, (uint8_t *)&simple_reply + sizeof(simple_reply.magic),
+                   sizeof(simple_reply) - sizeof(simple_reply.magic), errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    reply->error = be32_to_cpu(simple_reply.error);
+    reply->handle = be64_to_cpu(simple_reply.handle);
+
+    return 0;
+}
+
+/* nbd_receive_structured_reply_chunk
+ * Read structured reply chunk except magic field (which should be already 
read)
+ * Data for NBD_SREP_TYPE_OFFSET_DATA is not read too.
+ * tail_length field of reply out parameter corresponds to unread part of 
reply.
+ */
+static int nbd_receive_structured_reply_chunk(QIOChannel *ioc, NBDReply *reply,
+                                              Error **errp)
+{
+    NBDStructuredReplyChunk chunk;
+    ssize_t ret;
+    uint16_t message_size;
+
+    ret = nbd_read(ioc, (uint8_t *)&chunk + sizeof(chunk.magic),
+                          sizeof(chunk) - sizeof(chunk.magic), errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    reply->flags = be16_to_cpu(chunk.flags);
+    reply->type = be16_to_cpu(chunk.type);
+    reply->handle = be64_to_cpu(chunk.handle);
+    reply->tail_length = be32_to_cpu(chunk.length);
+
+    switch (reply->type) {
+    case NBD_SREP_TYPE_NONE:
+        break;
+    case NBD_SREP_TYPE_OFFSET_DATA:
+        if (reply->tail_length < sizeof(reply->offset)) {
+            return -EIO;
+        }
+        ret = nbd_read(ioc, &reply->offset, sizeof(reply->offset), errp);
+        if (ret < 0) {
+            return ret;
+        }
+        be64_to_cpus(&reply->offset);
+        reply->tail_length -= sizeof(reply->offset);
+
+        break;
+    case NBD_SREP_TYPE_OFFSET_HOLE:
+        ret = nbd_read(ioc, &reply->offset, sizeof(reply->offset), errp);
+        if (ret < 0) {
+            return ret;
+        }
+        be64_to_cpus(&reply->offset);
+
+        ret = nbd_read(ioc, &reply->hole_size, sizeof(reply->hole_size), errp);
+        if (ret < 0) {
+            return ret;
+        }
+        be32_to_cpus(&reply->hole_size);
+
+        break;
+    case NBD_SREP_TYPE_ERROR:
+    case NBD_SREP_TYPE_ERROR_OFFSET:
+        ret = nbd_read(ioc, &reply->error, sizeof(reply->error), errp);
+        if (ret < 0) {
+            return ret;
+        }
+        be32_to_cpus(&reply->error);
+
+        ret = nbd_read(ioc, &message_size, sizeof(message_size), errp);
+        if (ret < 0) {
+            return ret;
+        }
+        be16_to_cpus(&message_size);
+
+        if (message_size > 0) {
+            /* TODO: provide error message to user */
+            ret = nbd_drop(ioc, message_size, errp);
+            if (ret < 0) {
+                return ret;
+            }
+        }
+
+        if (reply->type == NBD_SREP_TYPE_ERROR_OFFSET) {
+            /* drop 64bit offset */
+            ret = nbd_drop(ioc, 8, errp);
+            if (ret < 0) {
+                return ret;
+            }
+        }
+        break;
+    default:
+        if (reply->type & (1 << 15)) {
+            /* unknown error */
+            ret = nbd_drop(ioc, reply->tail_length, errp);
+            if (ret < 0) {
+                return ret;
+            }
+
+            reply->error = NBD_EINVAL;
+            reply->tail_length = 0;
+        } else {
+            /* unknown non-error reply type */
+            return -EINVAL;
+        }
+    }
+
+    return 0;
+}
+
 /* nbd_receive_reply
  * Returns 1 on success
  *         0 on eof, when no data was read (errp is not set)
@@ -949,24 +1084,32 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest 
*request)
  */
 int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
 {
-    uint8_t buf[NBD_REPLY_SIZE];
     uint32_t magic;
     int ret;
 
-    ret = nbd_read_eof(ioc, buf, sizeof(buf), errp);
+    ret = nbd_read_eof(ioc, &magic, sizeof(magic), errp);
     if (ret <= 0) {
         return ret;
     }
 
-    /* Reply
-       [ 0 ..  3]    magic   (NBD_SIMPLE_REPLY_MAGIC)
-       [ 4 ..  7]    error   (0 == no error)
-       [ 7 .. 15]    handle
-     */
+    be32_to_cpus(&magic);
 
-    magic = ldl_be_p(buf);
-    reply->error  = ldl_be_p(buf + 4);
-    reply->handle = ldq_be_p(buf + 8);
+    switch (magic) {
+    case NBD_SIMPLE_REPLY_MAGIC:
+        reply->simple = true;
+        ret = nbd_receive_simple_reply(ioc, reply, errp);
+        break;
+    case NBD_STRUCTURED_REPLY_MAGIC:
+        reply->simple = false;
+        ret = nbd_receive_structured_reply_chunk(ioc, reply, errp);
+        break;
+    default:
+        error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
+        return -EINVAL;
+    }
+    if (ret < 0) {
+        return ret;
+    }
 
     reply->error = nbd_errno_to_system_errno(reply->error);
 
@@ -977,11 +1120,5 @@ int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, 
Error **errp)
     }
     trace_nbd_receive_reply(magic, reply->error, reply->handle);
 
-    if (magic != NBD_SIMPLE_REPLY_MAGIC) {
-        error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
-        return -EINVAL;
-    }
-
     return 1;
 }
-
-- 
2.11.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]