[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH RFC 3/3] xen_disk: add persistent grant support
From: |
Stefano Stabellini |
Subject: |
Re: [Qemu-devel] [PATCH RFC 3/3] xen_disk: add persistent grant support to xen_disk backend |
Date: |
Fri, 4 Jan 2013 18:02:02 +0000 |
User-agent: |
Alpine 2.02 (DEB 1266 2009-07-14) |
On Fri, 4 Jan 2013, Roger Pau Monne wrote:
> On 04/01/13 17:42, Stefano Stabellini wrote:
> > On Mon, 31 Dec 2012, Roger Pau Monne wrote:
> >> This protocol extension reuses the same set of grant pages for all
> >> transactions between the front/back drivers, avoiding expensive tlb
> >> flushes, grant table lock contention and switches between userspace
> >> and kernel space. The full description of the protocol can be found in
> >> the public blkif.h header.
> >
> > it would be great if you could add a link to a webpage too
>
> Sure, thanks for the review.
>
> >
> >> Speed improvement with 15 guests performing I/O is ~450%.
> >>
> >> Signed-off-by: Roger Pau Monné <address@hidden>
> >> Cc: address@hidden
> >> Cc: Stefano Stabellini <address@hidden>
> >> Cc: Anthony PERARD <address@hidden>
> >> ---
> >> Performance comparison with the previous implementation can be seen in
> >> the followign graph:
> >>
> >> http://xenbits.xen.org/people/royger/persistent_read_qemu.png
> >
> > this is what I like to see!
> >
> >
> >> hw/xen_disk.c | 155
> >> ++++++++++++++++++++++++++++++++++++++++++++++++++------
> >> 1 files changed, 138 insertions(+), 17 deletions(-)
> >>
> >> diff --git a/hw/xen_disk.c b/hw/xen_disk.c
> >> index 1eb485a..bafeceb 100644
> >> --- a/hw/xen_disk.c
> >> +++ b/hw/xen_disk.c
> >> @@ -52,6 +52,11 @@ static int max_requests = 32;
> >> #define BLOCK_SIZE 512
> >> #define IOCB_COUNT (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
> >>
> >> +struct persistent_gnt {
> >> + void *page;
> >> + struct XenBlkDev *blkdev;
> >> +};
> >> +
> >> struct ioreq {
> >> blkif_request_t req;
> >> int16_t status;
> >> @@ -69,6 +74,7 @@ struct ioreq {
> >> int prot;
> >> void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> >> void *pages;
> >> + int num_unmap;
> >>
> >> /* aio status */
> >> int aio_inflight;
> >> @@ -105,6 +111,12 @@ struct XenBlkDev {
> >> int requests_inflight;
> >> int requests_finished;
> >>
> >> + /* Persistent grants extension */
> >> + gboolean feature_persistent;
> >> + GTree *persistent_gnts;
> >> + unsigned int persistent_gnt_c;
> >
> > can you come up with a better name for this variable?
>
> persistent_gnt_num or persistent_gnt_count?
persistent_gnt_count
> >> @@ -298,41 +333,107 @@ static void ioreq_unmap(struct ioreq *ioreq)
> >> static int ioreq_map(struct ioreq *ioreq)
> >> {
> >> XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
> >> - int i;
> >> + uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> >> + uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> >> + void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> >> + int i, j, new_maps = 0;
> >> + struct persistent_gnt *grant;
> >>
> >> if (ioreq->v.niov == 0 || ioreq->mapped == 1) {
> >> return 0;
> >> }
> >> - if (batch_maps) {
> >> + if (ioreq->blkdev->feature_persistent) {
> >> + for (i = 0; i < ioreq->v.niov; i++) {
> >> + grant = g_tree_lookup(ioreq->blkdev->persistent_gnts,
> >> + GUINT_TO_POINTER(ioreq->refs[i]));
> >> +
> >> + if (grant != NULL) {
> >> + page[i] = grant->page;
> >> + xen_be_printf(&ioreq->blkdev->xendev, 3,
> >> + "using persistent-grant %" PRIu32 "\n",
> >> + ioreq->refs[i]);
> >> + } else {
> >> + /* Add the grant to the list of grants that
> >> + * should be mapped
> >> + */
> >> + domids[new_maps] = ioreq->domids[i];
> >> + refs[new_maps] = ioreq->refs[i];
> >> + page[i] = NULL;
> >> + new_maps++;
> >> + }
> >> + }
> >> + /* Set the protection to RW, since grants may be reused later
> >> + * with a different protection than the one needed for this
> >> request
> >> + */
> >> + ioreq->prot = PROT_WRITE | PROT_READ;
> >> + } else {
> >> + /* All grants in the request should be mapped */
> >> + memcpy(refs, ioreq->refs, sizeof(refs));
> >> + memcpy(domids, ioreq->domids, sizeof(domids));
> >> + memset(page, 0, sizeof(page));
> >> + new_maps = ioreq->v.niov;
> >> + }
> >> +
> >> + if (batch_maps && new_maps) {
> >> ioreq->pages = xc_gnttab_map_grant_refs
> >> - (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot);
> >> + (gnt, new_maps, domids, refs, ioreq->prot);
> >> if (ioreq->pages == NULL) {
> >> xen_be_printf(&ioreq->blkdev->xendev, 0,
> >> "can't map %d grant refs (%s, %d maps)\n",
> >> - ioreq->v.niov, strerror(errno),
> >> ioreq->blkdev->cnt_map);
> >> + new_maps, strerror(errno),
> >> ioreq->blkdev->cnt_map);
> >> return -1;
> >> }
> >> - for (i = 0; i < ioreq->v.niov; i++) {
> >> - ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE +
> >> - (uintptr_t)ioreq->v.iov[i].iov_base;
> >> + for (i = 0, j = 0; i < ioreq->v.niov; i++) {
> >> + if (page[i] == NULL)
> >> + page[i] = ioreq->pages + (j++) * XC_PAGE_SIZE;
> >> }
> >
> > Code style.
> >
> > Is it correct to assume that the refs to map are always the first set?
>
> "refs" only contains grant references that should be mapped. Note that
> refs is not ioreq->refs, it is the subset of references in ioreq->refs
> that are not yet mapped.
Oh right, I missed that