[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v5 20/26] nvme: handle dma errors
From: |
Klaus Birkelund Jensen |
Subject: |
Re: [PATCH v5 20/26] nvme: handle dma errors |
Date: |
Mon, 16 Mar 2020 00:53:31 -0700 |
On Feb 12 13:52, Maxim Levitsky wrote:
> On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote:
> > Handling DMA errors gracefully is required for the device to pass the
> > block/011 test ("disable PCI device while doing I/O") in the blktests
> > suite.
> >
> > With this patch the device passes the test by retrying "critical"
> > transfers (posting of completion entries and processing of submission
> > queue entries).
> >
> > If DMA errors occur at any other point in the execution of the command
> > (say, while mapping the PRPs), the command is aborted with a Data
> > Transfer Error status code.
> >
> > Signed-off-by: Klaus Jensen <address@hidden>
> > ---
> > hw/block/nvme.c | 42 +++++++++++++++++++++++++++++++++---------
> > hw/block/trace-events | 2 ++
> > include/block/nvme.h | 2 +-
> > 3 files changed, 36 insertions(+), 10 deletions(-)
> >
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index f8c81b9e2202..204ae1d33234 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -73,14 +73,14 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr
> > addr)
> > return addr >= low && addr < hi;
> > }
> >
> > -static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> > +static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> > {
> > if (n->cmbsz && nvme_addr_is_cmb(n, addr)) {
> > memcpy(buf, (void *) &n->cmbuf[addr - n->ctrl_mem.addr], size);
> > - return;
> > + return 0;
> > }
> >
> > - pci_dma_read(&n->parent_obj, addr, buf, size);
> > + return pci_dma_read(&n->parent_obj, addr, buf, size);
> > }
> >
> > static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
> > @@ -168,6 +168,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList
> > *qsg, QEMUIOVector *iov,
> > uint16_t status = NVME_SUCCESS;
> > bool is_cmb = false;
> > bool prp_list_in_cmb = false;
> > + int ret;
> >
> > trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, len,
> > prp1, prp2, num_prps);
> > @@ -218,7 +219,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList
> > *qsg, QEMUIOVector *iov,
> >
> > nents = (len + n->page_size - 1) >> n->page_bits;
> > prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
> > - nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > + ret = nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > + if (ret) {
> > + trace_nvme_dev_err_addr_read(prp2);
> > + status = NVME_DATA_TRANSFER_ERROR;
> > + goto unmap;
> > + }
> > while (len != 0) {
> > uint64_t prp_ent = le64_to_cpu(prp_list[i]);
> >
> > @@ -237,7 +243,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList
> > *qsg, QEMUIOVector *iov,
> > i = 0;
> > nents = (len + n->page_size - 1) >> n->page_bits;
> > prp_trans = MIN(n->max_prp_ents, nents) *
> > sizeof(uint64_t);
> > - nvme_addr_read(n, prp_ent, (void *) prp_list,
> > prp_trans);
> > + ret = nvme_addr_read(n, prp_ent, (void *) prp_list,
> > + prp_trans);
> > + if (ret) {
> > + trace_nvme_dev_err_addr_read(prp_ent);
> > + status = NVME_DATA_TRANSFER_ERROR;
> > + goto unmap;
> > + }
> > prp_ent = le64_to_cpu(prp_list[i]);
> > }
> >
> > @@ -443,6 +455,7 @@ static void nvme_post_cqes(void *opaque)
> > NvmeCQueue *cq = opaque;
> > NvmeCtrl *n = cq->ctrl;
> > NvmeRequest *req, *next;
> > + int ret;
> >
> > QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
> > NvmeSQueue *sq;
> > @@ -452,15 +465,21 @@ static void nvme_post_cqes(void *opaque)
> > break;
> > }
> >
> > - QTAILQ_REMOVE(&cq->req_list, req, entry);
> > sq = req->sq;
> > req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
> > req->cqe.sq_id = cpu_to_le16(sq->sqid);
> > req->cqe.sq_head = cpu_to_le16(sq->head);
> > addr = cq->dma_addr + cq->tail * n->cqe_size;
> > - nvme_inc_cq_tail(cq);
> > - pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
> > + ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
> > sizeof(req->cqe));
> > + if (ret) {
> > + trace_nvme_dev_err_addr_write(addr);
> > + timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> > + 100 * SCALE_MS);
> > + break;
> > + }
> > + QTAILQ_REMOVE(&cq->req_list, req, entry);
> > + nvme_inc_cq_tail(cq);
> > nvme_req_clear(req);
> > QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
> > }
> > @@ -1588,7 +1607,12 @@ static void nvme_process_sq(void *opaque)
> >
> > while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
> > addr = sq->dma_addr + sq->head * n->sqe_size;
> > - nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd));
> > + if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
> > + trace_nvme_dev_err_addr_read(addr);
> > + timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> > + 100 * SCALE_MS);
> > + break;
> > + }
>
> Note that once the driver is optimized for performance, these timers must go,
> since they run on main thread and also add latency to each request.
> But for now this change is all right.
>
> About user triggering this each 100ms on purpose, I don't think that this is
> such a big issue.
> Maybe up that to 500ms or even one second, since this condition will not
> happen in real life usage of the device anyway.
>
I bumped it to 500ms.
> > nvme_inc_sq_head(sq);
> >
> > req = QTAILQ_FIRST(&sq->req_list);
> > diff --git a/hw/block/trace-events b/hw/block/trace-events
> > index 90a57fb6099a..09bfb3782dd0 100644
> > --- a/hw/block/trace-events
> > +++ b/hw/block/trace-events
> > @@ -83,6 +83,8 @@ nvme_dev_mmio_shutdown_cleared(void) "shutdown bit
> > cleared"
> > nvme_dev_err_mdts(uint16_t cid, size_t mdts, size_t len) "cid %"PRIu16"
> > mdts %"PRIu64" len %"PRIu64""
> > nvme_dev_err_prinfo(uint16_t cid, uint16_t ctrl) "cid %"PRIu16" ctrl
> > %"PRIu16""
> > nvme_dev_err_aio(uint16_t cid, void *aio, const char *blkname, uint64_t
> > offset, const char *opc, void *req, uint16_t status) "cid %"PRIu16" aio %p
> > blk \"%s\" offset %"PRIu64" opc \"%s\" req %p
> > status 0x%"PRIx16""
> > +nvme_dev_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
> > +nvme_dev_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
> > nvme_dev_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
> > nvme_dev_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null
> > or not page aligned: 0x%"PRIx64""
> > nvme_dev_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned:
> > 0x%"PRIx64""
> > diff --git a/include/block/nvme.h b/include/block/nvme.h
> > index c1de92179596..a873776d98b8 100644
> > --- a/include/block/nvme.h
> > +++ b/include/block/nvme.h
> > @@ -418,7 +418,7 @@ enum NvmeStatusCodes {
> > NVME_INVALID_OPCODE = 0x0001,
> > NVME_INVALID_FIELD = 0x0002,
> > NVME_CID_CONFLICT = 0x0003,
> > - NVME_DATA_TRAS_ERROR = 0x0004,
> > + NVME_DATA_TRANSFER_ERROR = 0x0004,
> > NVME_POWER_LOSS_ABORT = 0x0005,
> > NVME_INTERNAL_DEV_ERROR = 0x0006,
> > NVME_CMD_ABORT_REQ = 0x0007,
>
>
> Best regards,
> Maxim Levitsky
>
- Re: [PATCH v5 20/26] nvme: handle dma errors,
Klaus Birkelund Jensen <=