[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC PATCH 17/18] nbd: Implement NBD_CMD_WRITE_ZEROES o
From: |
Alex Bligh |
Subject: |
Re: [Qemu-devel] [RFC PATCH 17/18] nbd: Implement NBD_CMD_WRITE_ZEROES on server |
Date: |
Sat, 9 Apr 2016 11:54:51 +0100 |
On 8 Apr 2016, at 23:05, Eric Blake <address@hidden> wrote:
> RFC because there is still discussion on the NBD list about
> adding an NBD_OPT_ to let the client suggest server defaults
> related to scanning for zeroes during NBD_CMD_WRITE, which may
> tweak this patch.
>
> Upstream NBD protocol recently added the ability to efficiently
> write zeroes without having to send the zeroes over the wire,
> along with a flag to control whether the client wants a hole.
>
> Signed-off-by: Eric Blake <address@hidden>
> ---
> include/block/nbd.h | 5 ++++-
> nbd/server.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 64 insertions(+), 4 deletions(-)
>
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index 4c57754..a1d955c 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -70,6 +70,7 @@ typedef struct nbd_reply nbd_reply;
> #define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit
> Access) */
> #define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm -
> rotational media */
> #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */
> +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */
> #define NBD_FLAG_SEND_CLOSE (1 << 8) /* Send CLOSE */
>
> /* New-style handshake (global) flags, sent from server to client, and
> @@ -92,7 +93,8 @@ typedef struct nbd_reply nbd_reply;
> #define NBD_REP_ERR_UNKNOWN ((UINT32_C(1) << 31) | 6) /* Export unknown */
>
> /* Request flags, sent from client to server during transmission phase */
> -#define NBD_CMD_FLAG_FUA (1 << 0)
> +#define NBD_CMD_FLAG_FUA (1 << 0) /* 'force unit access' during write
> */
> +#define NBD_CMD_FLAG_NO_HOLE (1 << 1) /* don't punch hole on zero run */
>
> /* Supported request types */
> enum {
> @@ -101,6 +103,7 @@ enum {
> NBD_CMD_DISC = 2,
> NBD_CMD_FLUSH = 3,
> NBD_CMD_TRIM = 4,
> + NBD_CMD_WRITE_ZEROES = 5,
> NBD_CMD_CLOSE = 7,
> };
>
> diff --git a/nbd/server.c b/nbd/server.c
> index 2a6eaf2..09af915 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -625,7 +625,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData
> *data)
> int rc;
> const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
> NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
> - NBD_FLAG_SEND_CLOSE);
> + NBD_FLAG_SEND_CLOSE |
> + NBD_FLAG_SEND_WRITE_ZEROES);
> bool oldStyle;
> size_t len;
>
> @@ -1088,7 +1089,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
> struct nbd_request *reque
> goto out;
> }
>
> - if (request->flags & ~NBD_CMD_FLAG_FUA) {
> + if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
> LOG("unsupported flags (got 0x%x)", request->flags);
> return -EINVAL;
> }
> @@ -1102,7 +1103,13 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
> struct nbd_request *reque
> TRACE("Decoding type");
>
> command = request->type;
> - if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
> + if (request->flags & NBD_CMD_FLAG_NO_HOLE &&
> + !(command == NBD_CMD_WRITE || command == NBD_CMD_WRITE_ZEROES)) {
> + LOG("NO_HOLE flag valid only with write operation");
> + return -EINVAL;
> + }
> + if (command == NBD_CMD_READ || command == NBD_CMD_WRITE ||
> + command == NBD_CMD_WRITE_ZEROES) {
> if (request->len > NBD_MAX_BUFFER_SIZE) {
> LOG("len (%" PRIu32" ) is larger than max len (%u)",
> request->len, NBD_MAX_BUFFER_SIZE);
> @@ -1143,6 +1150,7 @@ static void nbd_trip(void *opaque)
> struct nbd_reply reply;
> ssize_t ret;
> uint32_t command;
> + int flags;
>
> TRACE("Reading request.");
> if (client->closing) {
> @@ -1221,6 +1229,9 @@ static void nbd_trip(void *opaque)
>
> TRACE("Writing to device");
>
> + /* FIXME: if the client passes NBD_CMD_FLAG_NO_HOLE, can we
> + * make that override a server that is set to look for
> + * holes? */
No, and I reckon that's an error in the spec. There is a 'MUST'
which should be a 'SHOULD'. Good luck with any FS which supports
dedupe for instance!
> ret = blk_write(exp->blk,
> (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE,
> req->data, request.len / BDRV_SECTOR_SIZE);
> @@ -1243,6 +1254,52 @@ static void nbd_trip(void *opaque)
> goto out;
> }
> break;
> + case NBD_CMD_WRITE_ZEROES:
> + TRACE("Request type is WRITE_ZEROES");
> +
> + if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
> + TRACE("Server is read-only, return error");
> + reply.error = EROFS;
> + goto error_reply;
> + }
> +
> + TRACE("Writing to device");
> +
> + flags = 0;
> + if (request.flags & NBD_CMD_FLAG_FUA) {
> + flags |= BDRV_REQ_FUA;
> + }
> + if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
> + /* FIXME: should this depend on whether the server is set to
> + look for holes? */
> + flags |= BDRV_REQ_MAY_UNMAP;
> + }
> + ret = blk_write_zeroes(exp->blk,
> + ((request.from + exp->dev_offset) /
> + BDRV_SECTOR_SIZE),
> + request.len / BDRV_SECTOR_SIZE,
> + flags);
> + if (ret < 0) {
> + LOG("writing to file failed");
> + reply.error = -ret;
> + goto error_reply;
> + }
> +
> + /* FIXME: do we need FUA flush here, if we also passed it to
> + * blk_write_zeroes? */
I don't think so. blk_write_zeroes with BDRV_REQ_FUA should
make *that* request access the media, but FLUSH makes ALL PREVIOUS
requests access the media, so you are doing too much work here
I think.
> + if (request.flags & NBD_CMD_FLAG_FUA) {
> + ret = blk_co_flush(exp->blk);
> + if (ret < 0) {
> + LOG("flush failed");
> + reply.error = -ret;
> + goto error_reply;
> + }
> + }
> +
> + if (nbd_co_send_reply(req, &reply, 0) < 0) {
> + goto out;
> + }
> + break;
> case NBD_CMD_DISC:
> TRACE("Request type is DISCONNECT");
> goto out;
> --
> 2.5.5
>
>
--
Alex Bligh
- Re: [Qemu-devel] [PATCH 12/18] nbd: Less allocation during NBD_OPT_LIST, (continued)
- [Qemu-devel] [PATCH 15/18] nbd: Implement NBD_OPT_GO on server, Eric Blake, 2016/04/08
- [Qemu-devel] [PATCH 13/18] nbd: Support shorter handshake, Eric Blake, 2016/04/08
- [Qemu-devel] [PATCH 07/18] nbd: Treat flags vs. command type as separate fields, Eric Blake, 2016/04/08
- [Qemu-devel] [RFC PATCH 17/18] nbd: Implement NBD_CMD_WRITE_ZEROES on server, Eric Blake, 2016/04/08
- [Qemu-devel] [PATCH 06/18] nbd: Avoid magic number for NBD max name size, Eric Blake, 2016/04/08
- [Qemu-devel] [RFC PATCH 18/18] nbd: Implement NBD_CMD_WRITE_ZEROES on client, Eric Blake, 2016/04/08
- [Qemu-devel] [PATCH 16/18] nbd: Support NBD_CMD_CLOSE, Eric Blake, 2016/04/08