[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 18/24] DAX/unmap virtiofsd: Parse unmappable elements
From: |
Dr. David Alan Gilbert (git) |
Subject: |
[PATCH 18/24] DAX/unmap virtiofsd: Parse unmappable elements |
Date: |
Tue, 9 Feb 2021 19:02:18 +0000 |
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
For some read/writes the virtio queue elements are unmappable by
the daemon; these are cases where the data is to be read/written
from non-RAM. In viritofs's case this is typically a direct read/write
into an mmap'd DAX file also on virtiofs (possibly on another instance).
When we receive a virtio queue element, check that we have enough
mappable data to handle the headers. Make a note of the number of
unmappable 'in' entries (ie. for read data back to the VMM),
and flag the fuse_bufvec for 'out' entries with a new flag
FUSE_BUF_PHYS_ADDR.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
with fix by:
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
---
tools/virtiofsd/buffer.c | 4 +-
tools/virtiofsd/fuse_common.h | 7 ++
tools/virtiofsd/fuse_virtio.c | 191 ++++++++++++++++++++++++----------
3 files changed, 145 insertions(+), 57 deletions(-)
diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c
index 874f01c488..1a050aa441 100644
--- a/tools/virtiofsd/buffer.c
+++ b/tools/virtiofsd/buffer.c
@@ -77,6 +77,7 @@ static ssize_t fuse_buf_write(const struct fuse_buf *dst,
size_t dst_off,
ssize_t res = 0;
size_t copied = 0;
+ assert(!(src->flags & FUSE_BUF_PHYS_ADDR));
while (len) {
if (dst->flags & FUSE_BUF_FD_SEEK) {
res = pwrite(dst->fd, (char *)src->mem + src_off, len,
@@ -272,7 +273,8 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct
fuse_bufvec *srcv)
* process
*/
for (i = 0; i < srcv->count; i++) {
- if (srcv->buf[i].flags & FUSE_BUF_IS_FD) {
+ if ((srcv->buf[i].flags & FUSE_BUF_PHYS_ADDR) ||
+ (srcv->buf[i].flags & FUSE_BUF_IS_FD)) {
break;
}
}
diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
index a090040bb2..ed9280de91 100644
--- a/tools/virtiofsd/fuse_common.h
+++ b/tools/virtiofsd/fuse_common.h
@@ -611,6 +611,13 @@ enum fuse_buf_flags {
* detected.
*/
FUSE_BUF_FD_RETRY = (1 << 3),
+
+ /**
+ * The addresses in the iovec represent guest physical addresses
+ * that can't be mapped by the daemon process.
+ * IO must be bounced back to the VMM to do it.
+ */
+ FUSE_BUF_PHYS_ADDR = (1 << 4),
};
/**
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 8feb3c0261..8fa438525f 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -49,6 +49,10 @@ typedef struct {
VuVirtqElement elem;
struct fuse_chan ch;
+ /* Number of unmappable iovecs */
+ unsigned bad_in_num;
+ unsigned bad_out_num;
+
/* Used to complete requests that involve no reply */
bool reply_sent;
} FVRequest;
@@ -291,8 +295,10 @@ int virtio_send_data_iov(struct fuse_session *se, struct
fuse_chan *ch,
/* The 'in' part of the elem is to qemu */
unsigned int in_num = elem->in_num;
+ unsigned int bad_in_num = req->bad_in_num;
struct iovec *in_sg = elem->in_sg;
size_t in_len = iov_size(in_sg, in_num);
+ size_t in_len_writeable = iov_size(in_sg, in_num - bad_in_num);
fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
__func__, elem->index, in_num, in_len);
@@ -300,7 +306,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct
fuse_chan *ch,
* The elem should have room for a 'fuse_out_header' (out from fuse)
* plus the data based on the len in the header.
*/
- if (in_len < sizeof(struct fuse_out_header)) {
+ if (in_len_writeable < sizeof(struct fuse_out_header)) {
fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
__func__, elem->index);
ret = E2BIG;
@@ -327,7 +333,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct
fuse_chan *ch,
memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num);
/* These get updated as we skip */
struct iovec *in_sg_ptr = in_sg_cpy;
- int in_sg_cpy_count = in_num;
+ int in_sg_cpy_count = in_num - bad_in_num;
/* skip over parts of in_sg that contained the header iov */
size_t skip_size = iov_len;
@@ -460,17 +466,21 @@ static void fv_queue_worker(gpointer data, gpointer
user_data)
/* The 'out' part of the elem is from qemu */
unsigned int out_num = elem->out_num;
+ unsigned int out_num_readable = out_num - req->bad_out_num;
struct iovec *out_sg = elem->out_sg;
size_t out_len = iov_size(out_sg, out_num);
+ size_t out_len_readable = iov_size(out_sg, out_num_readable);
fuse_log(FUSE_LOG_DEBUG,
- "%s: elem %d: with %d out desc of length %zd\n",
- __func__, elem->index, out_num, out_len);
+ "%s: elem %d: with %d out desc of length %zd"
+ " bad_in_num=%u bad_out_num=%u\n",
+ __func__, elem->index, out_num, out_len, req->bad_in_num,
+ req->bad_out_num);
/*
* The elem should contain a 'fuse_in_header' (in to fuse)
* plus the data based on the len in the header.
*/
- if (out_len < sizeof(struct fuse_in_header)) {
+ if (out_len_readable < sizeof(struct fuse_in_header)) {
fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n",
__func__, elem->index);
assert(0); /* TODO */
@@ -484,63 +494,129 @@ static void fv_queue_worker(gpointer data, gpointer
user_data)
copy_from_iov(&fbuf, 1, out_sg);
pbufv = NULL; /* Compiler thinks an unitialised path */
- if (out_num > 2 &&
- out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
- ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
- out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
- /*
- * For a write we don't actually need to copy the
- * data, we can just do it straight out of guest memory
- * but we must still copy the headers in case the guest
- * was nasty and changed them while we were using them.
- */
- fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__);
-
- /* copy the fuse_write_in header afte rthe fuse_in_header */
- fbuf.mem += out_sg->iov_len;
- copy_from_iov(&fbuf, 1, out_sg + 1);
- fbuf.mem -= out_sg->iov_len;
- fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
-
- /* Allocate the bufv, with space for the rest of the iov */
- pbufv = malloc(sizeof(struct fuse_bufvec) +
- sizeof(struct fuse_buf) * (out_num - 2));
- if (!pbufv) {
- fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
- __func__);
- goto out;
- }
+ if (req->bad_in_num || req->bad_out_num) {
+ bool handled_unmappable = false;
+
+ if (out_num > 2 && out_num_readable >= 2 && !req->bad_in_num &&
+ out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
+ out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
+ handled_unmappable = true;
+
+ /* copy the fuse_write_in header after fuse_in_header */
+ fbuf.mem += out_sg->iov_len;
+ copy_from_iov(&fbuf, 1, out_sg + 1);
+ fbuf.mem -= out_sg->iov_len;
+ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
+
+ /* Allocate the bufv, with space for the rest of the iov */
+ pbufv = malloc(sizeof(struct fuse_bufvec) +
+ sizeof(struct fuse_buf) * (out_num - 2));
+ if (!pbufv) {
+ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
+ __func__);
+ goto out;
+ }
- allocated_bufv = true;
- pbufv->count = 1;
- pbufv->buf[0] = fbuf;
+ allocated_bufv = true;
+ pbufv->count = 1;
+ pbufv->buf[0] = fbuf;
+
+ size_t iovindex, pbufvindex;
+ iovindex = 2; /* 2 headers, separate iovs */
+ pbufvindex = 1; /* 2 headers, 1 fusebuf */
+
+ for (; iovindex < out_num; iovindex++, pbufvindex++) {
+ pbufv->count++;
+ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
+ pbufv->buf[pbufvindex].flags =
+ (iovindex < out_num_readable) ? 0 :
+ FUSE_BUF_PHYS_ADDR;
+ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
+ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
+ }
+ }
- size_t iovindex, pbufvindex;
- iovindex = 2; /* 2 headers, separate iovs */
- pbufvindex = 1; /* 2 headers, 1 fusebuf */
+ if (out_num == 2 && out_num_readable == 2 && req->bad_in_num &&
+ out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_READ &&
+ out_sg[1].iov_len == sizeof(struct fuse_read_in)) {
+ fuse_log(FUSE_LOG_DEBUG,
+ "Unmappable read case "
+ "in_num=%d bad_in_num=%d\n",
+ elem->in_num, req->bad_in_num);
+ handled_unmappable = true;
+ }
- for (; iovindex < out_num; iovindex++, pbufvindex++) {
- pbufv->count++;
- pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
- pbufv->buf[pbufvindex].flags = 0;
- pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
- pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
+ if (!handled_unmappable) {
+ fuse_log(FUSE_LOG_ERR,
+ "Unhandled unmappable element: out: %d(b:%d) in: "
+ "%d(b:%d)",
+ out_num, req->bad_out_num, elem->in_num, req->bad_in_num);
+ fv_panic(dev, "Unhandled unmappable element");
}
- } else {
- /* Normal (non fast write) path */
+ }
+
+ if (!req->bad_out_num) {
+ if (out_num > 2 &&
+ out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
+ out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
+ /*
+ * For a write we don't actually need to copy the
+ * data, we can just do it straight out of guest memory
+ * but we must still copy the headers in case the guest
+ * was nasty and changed them while we were using them.
+ */
+ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n",
+ __func__);
+
+ /* copy the fuse_write_in header after fuse_in_header */
+ fbuf.mem += out_sg->iov_len;
+ copy_from_iov(&fbuf, 1, out_sg + 1);
+ fbuf.mem -= out_sg->iov_len;
+ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
+
+ /* Allocate the bufv, with space for the rest of the iov */
+ pbufv = malloc(sizeof(struct fuse_bufvec) +
+ sizeof(struct fuse_buf) * (out_num - 2));
+ if (!pbufv) {
+ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
+ __func__);
+ goto out;
+ }
- /* Copy the rest of the buffer */
- fbuf.mem += out_sg->iov_len;
- copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
- fbuf.mem -= out_sg->iov_len;
- fbuf.size = out_len;
+ allocated_bufv = true;
+ pbufv->count = 1;
+ pbufv->buf[0] = fbuf;
- /* TODO! Endianness of header */
+ size_t iovindex, pbufvindex;
+ iovindex = 2; /* 2 headers, separate iovs */
+ pbufvindex = 1; /* 2 headers, 1 fusebuf */
- /* TODO: Add checks for fuse_session_exited */
- bufv.buf[0] = fbuf;
- bufv.count = 1;
- pbufv = &bufv;
+ for (; iovindex < out_num; iovindex++, pbufvindex++) {
+ pbufv->count++;
+ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
+ pbufv->buf[pbufvindex].flags = 0;
+ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
+ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
+ }
+ } else {
+ /* Normal (non fast write) path */
+
+ /* Copy the rest of the buffer */
+ fbuf.mem += out_sg->iov_len;
+ copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
+ fbuf.mem -= out_sg->iov_len;
+ fbuf.size = out_len;
+
+ /* TODO! Endianness of header */
+
+ /* TODO: Add checks for fuse_session_exited */
+ bufv.buf[0] = fbuf;
+ bufv.count = 1;
+ pbufv = &bufv;
+ }
}
pbufv->idx = 0;
pbufv->off = 0;
@@ -657,13 +733,16 @@ static void *fv_queue_thread(void *opaque)
__func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
while (1) {
+ unsigned int bad_in_num = 0, bad_out_num = 0;
FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest),
- NULL, NULL);
+ &bad_in_num, &bad_out_num);
if (!req) {
break;
}
req->reply_sent = false;
+ req->bad_in_num = bad_in_num;
+ req->bad_out_num = bad_out_num;
if (!se->thread_pool_size) {
req_list = g_list_prepend(req_list, req);
--
2.29.2
- Re: [PATCH 12/24] DAX: virtiofsd: Wire up passthrough_ll's lo_setupmapping, (continued)
- [PATCH 11/24] DAX: virtiofsd: Add setup/remove mapping handlers to passthrough_ll, Dr. David Alan Gilbert (git), 2021/02/09
- [PATCH 13/24] DAX: virtiofsd: Make lo_removemapping() work, Dr. David Alan Gilbert (git), 2021/02/09
- [PATCH 14/24] DAX: virtiofsd: route se down to destroy method, Dr. David Alan Gilbert (git), 2021/02/09
- [PATCH 15/24] DAX: virtiofsd: Perform an unmap on destroy, Dr. David Alan Gilbert (git), 2021/02/09
- [PATCH 19/24] DAX/unmap virtiofsd: Route unmappable reads, Dr. David Alan Gilbert (git), 2021/02/09
- [PATCH 18/24] DAX/unmap virtiofsd: Parse unmappable elements,
Dr. David Alan Gilbert (git) <=
- [PATCH 17/24] DAX/unmap virtiofsd: Add wrappers for VHOST_USER_SLAVE_FS_IO, Dr. David Alan Gilbert (git), 2021/02/09
- [PATCH 20/24] DAX/unmap virtiofsd: route unmappable write to slave command, Dr. David Alan Gilbert (git), 2021/02/09
- [PATCH 21/24] DAX:virtiofsd: implement FUSE_INIT map_alignment field, Dr. David Alan Gilbert (git), 2021/02/09
- [PATCH 22/24] vhost-user-fs: Extend VhostUserFSSlaveMsg to pass additional info, Dr. David Alan Gilbert (git), 2021/02/09
- [PATCH 16/24] DAX/unmap: virtiofsd: Add VHOST_USER_SLAVE_FS_IO, Dr. David Alan Gilbert (git), 2021/02/09
- [PATCH 23/24] vhost-user-fs: Implement drop CAP_FSETID functionality, Dr. David Alan Gilbert (git), 2021/02/09