[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 09/11] QEMU NVMe: Implement discontiguous queues
From: |
Anthony Liguori |
Subject: |
Re: [Qemu-devel] [PATCH 09/11] QEMU NVMe: Implement discontiguous queues |
Date: |
Tue, 26 Feb 2013 20:16:54 -0600 |
User-agent: |
Notmuch/0.13.2+93~ged93d79 (http://notmuchmail.org) Emacs/23.3.1 (x86_64-pc-linux-gnu) |
Keith Busch <address@hidden> writes:
> This seems like a silly feature, but the spec allows it, so this adds
> support for queues that are not physically contiguous.
>
> Signed-off-by: Keith Busch <address@hidden>
> ---
> hw/nvme.c | 143 +++++++++++++++++++++++++++++++++++++++++++++++++-----------
> hw/nvme.h | 2 +
> 2 files changed, 118 insertions(+), 27 deletions(-)
>
> diff --git a/hw/nvme.c b/hw/nvme.c
> index 3e5cfc0..2945406 100644
> --- a/hw/nvme.c
> +++ b/hw/nvme.c
> @@ -110,6 +110,42 @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
> }
> }
>
> +static uint64_t *nvme_setup_discontig(NvmeCtrl *n, uint64_t prp_addr,
> + uint16_t queue_depth, uint16_t entry_size)
> +{
> + int i;
> + uint16_t prps_per_page = n->page_size / sizeof(uint64_t);
> + uint64_t prp[prps_per_page];
> + uint16_t total_prps = DIV_ROUND_UP(queue_depth * entry_size,
> n->page_size);
> + uint64_t *prp_list = g_malloc0(total_prps * sizeof(*prp_list));
> +
> + for (i = 0; i < total_prps; i++) {
> + if (i % prps_per_page == 0 && i < total_prps - 1) {
> + if (!prp_addr || prp_addr & (n->page_size - 1)) {
> + g_free(prp_list);
> + return NULL;
> + }
> + pci_dma_read(&n->dev, prp_addr, (void *)&prp, sizeof(prp));
> + prp_addr = prp[prps_per_page - 1];
This doesn't appear to be endian safe.
Regards,
Anthony Liguori
> + }
> + prp_list[i] = prp[i % prps_per_page];
> + if (!prp_list[i] || prp_list[i] & (n->page_size - 1)) {
> + g_free(prp_list);
> + return NULL;
> + }
> + }
> + return prp_list;
> +}
> +
> +static hwaddr nvme_discontig(uint64_t *dma_addr, uint16_t page_size,
> + uint16_t queue_idx, uint16_t entry_size)
> +{
> + uint16_t entries_per_page = page_size / entry_size;
> + uint16_t prp_index = queue_idx / entries_per_page;
> + uint16_t index_in_prp = queue_idx % entries_per_page;
> + return dma_addr[prp_index] + index_in_prp * entry_size;
> +}
> +
> static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
> uint32_t len, NvmeCtrl *n)
> {
> @@ -212,7 +248,12 @@ static void nvme_post_cqes(void *opaque)
> req->cqe.status |= cq->phase;
> req->cqe.sq_id = sq->id;
> req->cqe.sq_head = sq->head;
> - addr = cq->dma_addr + cq->tail * n->cqe_size;
> + if (cq->phys_contig) {
> + addr = cq->dma_addr + cq->tail * n->cqe_size;
> + } else {
> + addr = nvme_discontig(cq->prp_list, cq->tail, n->page_size,
> + n->cqe_size);
> + }
> nvme_inc_cq_tail(cq);
> pci_dma_write(&n->dev, addr, (void *)&req->cqe, sizeof(req->cqe));
> QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
> @@ -528,7 +569,9 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
> qemu_del_timer(sq->timer);
> qemu_free_timer(sq->timer);
> g_free(sq->io_req);
> - sq->io_req = NULL;
> + if (sq->prp_list) {
> + g_free(sq->prp_list);
> + }
> if (sq->id)
> g_free(sq);
> }
> @@ -566,20 +609,32 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
> return NVME_SUCCESS;
> }
>
> -static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
> - uint16_t sqid, uint16_t cqid, uint16_t size, enum QueueFlags prio)
> +static uint16_t nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
> + uint16_t sqid, uint16_t cqid, uint16_t size, enum QueueFlags prio,
> + int contig)
> {
> int i;
> NvmeCQueue *cq;
>
> sq->ctrl = n;
> - sq->dma_addr = dma_addr;
> sq->id = sqid;
> sq->size = size;
> sq->cqid = cqid;
> sq->head = sq->tail = 0;
> + sq->phys_contig = contig;
> sq->io_req = g_malloc(sq->size * sizeof(*sq->io_req));
>
> + if (sq->phys_contig) {
> + sq->dma_addr = dma_addr;
> + }
> + else {
> + sq->prp_list = nvme_setup_discontig(n, dma_addr, size,
> + n->sqe_size);
> + if (!sq->prp_list) {
> + return NVME_INVALID_FIELD | NVME_DNR;
> + }
> + }
> +
> QTAILQ_INIT(&sq->req_list);
> QTAILQ_INIT(&sq->out_req_list);
> for (i = 0; i < sq->size; i++) {
> @@ -607,6 +662,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n,
> uint64_t dma_addr,
> cq = n->cq[cqid];
> QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
> n->sq[sqid] = sq;
> +
> + return NVME_SUCCESS;
> }
>
> static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
> @@ -625,12 +682,15 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd
> *cmd)
> if (!c->prp1 || c->prp1 & (n->page_size - 1)) {
> return NVME_INVALID_FIELD | NVME_DNR;
> }
> - if (!(NVME_SQ_FLAGS_PC(c->sq_flags))) {
> + if (!(NVME_SQ_FLAGS_PC(c->sq_flags)) && NVME_CAP_CQR(n->bar.cap)) {
> return NVME_INVALID_FIELD | NVME_DNR;
> }
> sq = g_malloc0(sizeof(*sq));
> - nvme_init_sq(sq, n, c->prp1, c->sqid, c->cqid, c->qsize + 1,
> - NVME_SQ_FLAGS_QPRIO(c->sq_flags));
> + if (nvme_init_sq(sq, n, c->prp1, c->sqid, c->cqid, c->qsize + 1,
> + NVME_SQ_FLAGS_QPRIO(c->sq_flags),
> NVME_SQ_FLAGS_PC(c->sq_flags))) {
> + g_free(sq);
> + return NVME_INVALID_FIELD | NVME_DNR;
> + }
> return NVME_SUCCESS;
> }
>
> @@ -640,8 +700,12 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
> qemu_del_timer(cq->timer);
> qemu_free_timer(cq->timer);
> msix_vector_unuse(&n->dev, cq->vector);
> - if (cq->id)
> + if (cq->prp_list) {
> + g_free(cq->prp_list);
> + }
> + if (cq->id) {
> g_free(cq);
> + }
>
> }
>
> @@ -662,22 +726,32 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
> return NVME_SUCCESS;
> }
>
> -static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
> - uint16_t cqid, uint16_t vector, uint16_t size)
> +static uint16_t nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
> + uint16_t cqid, uint16_t vector, uint16_t size, int contig)
> {
> cq->ctrl = n;
> cq->id = cqid;
> cq->size = size;
> - cq->dma_addr = dma_addr;
> cq->phase = 1;
> cq->irq_enabled = 1;
> cq->vector = vector;
> cq->head = cq->tail = 0;
> + cq->phys_contig = contig;
> + if (cq->phys_contig) {
> + cq->dma_addr = dma_addr;
> + } else {
> + cq->prp_list = nvme_setup_discontig(n, dma_addr, size,
> + n->cqe_size);
> + if (!cq->prp_list) {
> + return NVME_INVALID_FIELD | NVME_DNR;
> + }
> + }
> QTAILQ_INIT(&cq->req_list);
> QTAILQ_INIT(&cq->sq_list);
> msix_vector_use(&n->dev, cq->vector);
> n->cq[cqid] = cq;
> cq->timer = qemu_new_timer_ns(vm_clock, nvme_post_cqes, cq);
> + return NVME_SUCCESS;
> }
>
> static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
> @@ -696,12 +770,16 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd
> *cmd)
> if (c->irq_vector > n->num_queues) {
> return NVME_INVALID_IRQ_VECTOR;
> }
> - if (!(NVME_CQ_FLAGS_PC(c->cq_flags))) {
> + if (!(NVME_CQ_FLAGS_PC(c->cq_flags)) && NVME_CAP_CQR(n->bar.cap)) {
> return NVME_INVALID_FIELD | NVME_DNR;
> }
>
> cq = g_malloc0(sizeof(*cq));
> - nvme_init_cq(cq, n, c->prp1, c->cqid, c->irq_vector, c->qsize + 1);
> + if (nvme_init_cq(cq, n, c->prp1, c->cqid, c->irq_vector, c->qsize + 1,
> + NVME_CQ_FLAGS_PC(c->cq_flags))) {
> + g_free(cq);
> + return NVME_INVALID_FIELD | NVME_DNR;
> + }
> return NVME_SUCCESS;
> }
>
> @@ -1001,8 +1079,14 @@ static uint16_t nvme_abort_req(NvmeCtrl *n, NvmeCmd
> *cmd, uint32_t *result)
> sq = n->sq[sqid];
> while ((sq->head + index) % sq->size != sq->tail) {
> NvmeCmd abort_cmd;
> - hwaddr addr = sq->dma_addr + ((sq->head + index) % sq->size) *
> - n->sqe_size;
> + hwaddr addr;
> + if (sq->phys_contig) {
> + addr = sq->dma_addr + ((sq->head + index) % sq->size) *
> + n->sqe_size;
> + } else {
> + addr = nvme_discontig(sq->prp_list, (sq->head + index) %
> sq->size,
> + n->page_size, n->sqe_size);
> + }
> pci_dma_read(&n->dev, addr, (void *)&abort_cmd, sizeof(abort_cmd));
> if (abort_cmd.cid == cid) {
> NvmeRequest *req = QTAILQ_FIRST(&sq->req_list);
> @@ -1150,7 +1234,12 @@ static void nvme_sq_process(void *opaque)
>
> while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list)) &&
> processed++ < sq->arb_burst) {
> - addr = sq->dma_addr + sq->head * n->sqe_size;
> + if (sq->phys_contig) {
> + addr = sq->dma_addr + sq->head * n->sqe_size;
> + } else {
> + addr = nvme_discontig(sq->prp_list, sq->head, n->page_size,
> + n->sqe_size);
> + }
> pci_dma_read(&n->dev, addr, (void *)&cmd, sizeof(cmd));
> nvme_inc_sq_head(sq);
>
> @@ -1207,7 +1296,6 @@ static int nvme_start_ctrl(NvmeCtrl *n)
> {
> uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
> uint32_t page_size = 1 << page_bits;
> -
> if (n->cq[0] || n->sq[0]) {
> return -1;
> }
> @@ -1236,20 +1324,21 @@ static int nvme_start_ctrl(NvmeCtrl *n)
> return -1;
> }
>
> - n->page_bits = NVME_CC_MPS(n->bar.cc) + 12;
> + n->page_bits = page_bits;
> n->page_size = 1 << n->page_bits;
> n->max_prp_ents = n->page_size / sizeof(uint64_t);
> n->cqe_size = 1 << NVME_CC_IOCQES(n->bar.cc);
> n->sqe_size = 1 << NVME_CC_IOSQES(n->bar.cc);
> -
> - nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0,
> - NVME_AQA_ACQS(n->bar.aqa) + 1);
> - nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
> - NVME_AQA_ASQS(n->bar.aqa) + 1, NVME_Q_PRIO_HIGH);
> -
> + if (nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0,
> + NVME_AQA_ACQS(n->bar.aqa) + 1, 1)) {
> + return -1;
> + }
> + if (nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
> + NVME_AQA_ASQS(n->bar.aqa) + 1, NVME_Q_PRIO_HIGH, 1)) {
> + return -1;
> + }
> n->aer_timer = qemu_new_timer_ns(vm_clock, nvme_aer_process_cb, n);
> QSIMPLEQ_INIT(&n->aer_queue);
> -
> return 0;
> }
>
> @@ -1426,7 +1515,7 @@ static int nvme_init(PCIDevice *pci_dev)
> n->max_sqes < NVME_MIN_SQUEUE_ES || n->max_cqes <
> NVME_MIN_CQUEUE_ES) {
> return -1;
> }
> - if (n->cqr != 1) {
> + if (n->cqr > 1) {
> return -1;
> }
> if (n->vwc > 1) {
> diff --git a/hw/nvme.h b/hw/nvme.h
> index b7d6615..926f843 100644
> --- a/hw/nvme.h
> +++ b/hw/nvme.h
> @@ -612,6 +612,7 @@ typedef struct NvmeSQueue {
> uint32_t size;
> uint64_t dma_addr;
> uint64_t completed;
> + uint64_t *prp_list;
> QEMUTimer *timer;
> QTAILQ_ENTRY(NvmeSQueue) entry;
> NvmeRequest *io_req;
> @@ -630,6 +631,7 @@ typedef struct NvmeCQueue {
> uint32_t vector;
> uint32_t size;
> uint64_t dma_addr;
> + uint64_t *prp_list;
> QEMUTimer *timer;
> QTAILQ_HEAD(sq_list, NvmeSQueue) sq_list;
> QTAILQ_HEAD(cq_req_list, NvmeRequest) req_list;
> --
> 1.7.0.4
- [Qemu-devel] [PATCH 00/11] *** SUBJECT HERE ***, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 08/11] QEMU NVMe: Enqueue critial temperature event, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 03/11] QEMU NVMe: Implement NVMe features, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 02/11] QEMU NVMe: Add command line options, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 06/11] QEMU NVMe: Implement flush and dsm, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 07/11] QEMU NVMe: Set error pages with error data, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 05/11] QEMU NVMe: Add DSM command support, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 09/11] QEMU NVMe: Implement discontiguous queues, Keith Busch, 2013/02/26
- Re: [Qemu-devel] [PATCH 09/11] QEMU NVMe: Implement discontiguous queues,
Anthony Liguori <=
- [Qemu-devel] [PATCH 04/11] QEMU NVMe: Implement additional admin commands, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 10/11] QEMU NVMe: Add logging, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 11/11] QEMU NVMe: Support NVMe DIF and Meta-data, Keith Busch, 2013/02/26
- [Qemu-devel] [PATCH 01/11] NVMe: Initial commit for NVM Express device, Keith Busch, 2013/02/26