[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 14/16] hw/block/nvme: allow open to close transitions by controll
From: |
Klaus Jensen |
Subject: |
[PATCH 14/16] hw/block/nvme: allow open to close transitions by controller |
Date: |
Thu, 24 Sep 2020 22:45:14 +0200 |
From: Klaus Jensen <k.jensen@samsung.com>
Allow the controller to release open resources by transitioning
implicitly and explicitly opened zones to closed. This is done using a
naive "least recently opened" strategy.
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
hw/block/nvme-ns.h | 5 +++
hw/block/nvme-ns.c | 5 +++
hw/block/nvme.c | 102 +++++++++++++++++++++++++++++++++++-------
hw/block/trace-events | 5 +++
4 files changed, 102 insertions(+), 15 deletions(-)
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index f520ffa89c98..1fdcdf706ff6 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -38,6 +38,8 @@ typedef struct NvmeZone {
uint8_t *zde;
uint64_t wp_staging;
+
+ QTAILQ_ENTRY(NvmeZone) lru_entry;
} NvmeZone;
typedef struct NvmeNamespace {
@@ -77,6 +79,9 @@ typedef struct NvmeNamespace {
struct {
uint32_t open;
uint32_t active;
+
+ QTAILQ_HEAD(, NvmeZone) lru_open;
+ QTAILQ_HEAD(, NvmeZone) lru_active;
} resources;
} zns;
} NvmeNamespace;
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 588fe7a1f018..547090282660 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -126,6 +126,9 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns)
ns->params.zns.mar + 1 : ns->zns.num_zones;
ns->zns.resources.open = ns->params.zns.mor != 0xffffffff ?
ns->params.zns.mor + 1 : ns->zns.num_zones;
+
+ QTAILQ_INIT(&ns->zns.resources.lru_open);
+ QTAILQ_INIT(&ns->zns.resources.lru_active);
}
static void nvme_ns_init(NvmeNamespace *ns)
@@ -259,6 +262,8 @@ static int nvme_ns_setup_blk_pstate(NvmeNamespace *ns,
Error **errp)
if (ns->zns.resources.active) {
ns->zns.resources.active--;
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone,
+ lru_entry);
continue;
}
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 79732b8a8574..a43a593ab89e 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1192,12 +1192,61 @@ static inline void nvme_zone_reset_wp(NvmeZone *zone)
zone->wp_staging = nvme_zslba(zone);
}
-static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
- NvmeZoneState to)
+static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeZone *zone, NvmeZoneState to,
+ NvmeRequest *req);
+
+static uint16_t nvme_zrm_release_open(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeRequest *req)
+{
+ NvmeZone *candidate;
+ NvmeZoneState zs;
+ uint16_t status;
+
+ trace_pci_nvme_zone_zrm_release_open(nvme_cid(req), ns->params.nsid);
+
+ QTAILQ_FOREACH(candidate, &ns->zns.resources.lru_open, lru_entry) {
+ zs = nvme_zs(candidate);
+
+ trace_pci_nvme_zone_zrm_candidate(nvme_cid(req), ns->params.nsid,
+ nvme_zslba(candidate),
+ nvme_wp(candidate), zs);
+
+ /* skip explicitly opened zones */
+ if (zs == NVME_ZS_ZSEO) {
+ continue;
+ }
+
+ /* the zone cannot be closed if it is currently writing */
+ if (candidate->wp_staging != nvme_wp(candidate)) {
+ continue;
+ }
+
+ status = nvme_zrm_transition(n, ns, candidate, NVME_ZS_ZSC, req);
+ if (status) {
+ return status;
+ }
+
+ if (nvme_zns_commit_zone(ns, candidate) < 0) {
+ return NVME_INTERNAL_DEV_ERROR;
+ }
+
+ return NVME_SUCCESS;
+ }
+
+ return NVME_TOO_MANY_OPEN_ZONES;
+}
+
+static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeZone *zone, NvmeZoneState to,
+ NvmeRequest *req)
{
NvmeZoneState from = nvme_zs(zone);
+ uint16_t status;
+
+ trace_pci_nvme_zone_zrm_transition(nvme_cid(req), ns->params.nsid,
+ nvme_zslba(zone), nvme_zs(zone), to);
- /* fast path */
if (from == to) {
return NVME_SUCCESS;
}
@@ -1212,25 +1261,32 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns,
NvmeZone *zone,
case NVME_ZS_ZSC:
if (!ns->zns.resources.active) {
+ trace_pci_nvme_err_too_many_active_zones(nvme_cid(req));
return NVME_TOO_MANY_ACTIVE_ZONES;
}
ns->zns.resources.active--;
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone, lru_entry);
break;
case NVME_ZS_ZSIO:
case NVME_ZS_ZSEO:
if (!ns->zns.resources.active) {
+ trace_pci_nvme_err_too_many_active_zones(nvme_cid(req));
return NVME_TOO_MANY_ACTIVE_ZONES;
}
if (!ns->zns.resources.open) {
- return NVME_TOO_MANY_OPEN_ZONES;
+ status = nvme_zrm_release_open(n, ns, req);
+ if (status) {
+ return status;
+ }
}
ns->zns.resources.active--;
ns->zns.resources.open--;
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_open, zone, lru_entry);
break;
@@ -1259,11 +1315,15 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns,
NvmeZone *zone,
case NVME_ZS_ZSF:
case NVME_ZS_ZSRO:
ns->zns.resources.active++;
+ ns->zns.resources.open++;
+ QTAILQ_REMOVE(&ns->zns.resources.lru_open, zone, lru_entry);
- /* fallthrough */
+ break;
case NVME_ZS_ZSC:
ns->zns.resources.open++;
+ QTAILQ_REMOVE(&ns->zns.resources.lru_open, zone, lru_entry);
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone, lru_entry);
break;
@@ -1288,16 +1348,22 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns,
NvmeZone *zone,
case NVME_ZS_ZSF:
case NVME_ZS_ZSRO:
ns->zns.resources.active++;
+ QTAILQ_REMOVE(&ns->zns.resources.lru_active, zone, lru_entry);
break;
case NVME_ZS_ZSIO:
case NVME_ZS_ZSEO:
if (!ns->zns.resources.open) {
- return NVME_TOO_MANY_OPEN_ZONES;
+ status = nvme_zrm_release_open(n, ns, req);
+ if (status) {
+ return status;
+ }
}
ns->zns.resources.open--;
+ QTAILQ_REMOVE(&ns->zns.resources.lru_active, zone, lru_entry);
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_open, zone, lru_entry);
break;
@@ -1321,6 +1387,9 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns,
NvmeZone *zone,
case NVME_ZS_ZSF:
switch (to) {
+ case NVME_ZS_ZSF:
+ return NVME_SUCCESS;
+
case NVME_ZS_ZSE:
nvme_zone_reset_wp(zone);
@@ -1359,7 +1428,9 @@ static void nvme_zns_advance_wp(NvmeRequest *req)
wp += nlb;
zone->zd->wp = cpu_to_le64(wp);
if (wp == nvme_zslba(zone) + nvme_zcap(zone)) {
- nvme_zrm_transition(req->ns, zone, NVME_ZS_ZSF);
+ NvmeCtrl *n = nvme_ctrl(req);
+
+ nvme_zrm_transition(n, req->ns, zone, NVME_ZS_ZSF, req);
if (nvme_zns_commit_zone(req->ns, zone) < 0) {
req->status = NVME_INTERNAL_DEV_ERROR;
}
@@ -1416,6 +1487,7 @@ static void nvme_rw_cb(void *opaque, int ret)
uint64_t slba = le64_to_cpu(rw->slba);
NvmeZone *zone = nvme_ns_get_zone(ns, slba);
+ NvmeCtrl *n = nvme_ctrl(req);
/*
* Transition the zone to read-only on write fault and offline
@@ -1424,7 +1496,7 @@ static void nvme_rw_cb(void *opaque, int ret)
NvmeZoneState zs = status == NVME_WRITE_FAULT ?
NVME_ZS_ZSRO : NVME_ZS_ZSO;
- nvme_zrm_transition(ns, zone, zs);
+ nvme_zrm_transition(n, ns, zone, zs, req);
if (nvme_zns_commit_zone(ns, zone) < 0) {
req->status = NVME_INTERNAL_DEV_ERROR;
}
@@ -1518,7 +1590,7 @@ static uint16_t nvme_zone_mgmt_send_close(NvmeCtrl *n,
NvmeRequest *req,
break;
}
- status = nvme_zrm_transition(ns, zone, NVME_ZS_ZSC);
+ status = nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSC, req);
if (status) {
return status;
}
@@ -1543,7 +1615,7 @@ static uint16_t nvme_zone_mgmt_send_finish(NvmeCtrl *n,
NvmeRequest *req,
return NVME_SUCCESS;
}
- status = nvme_zrm_transition(ns, zone, NVME_ZS_ZSF);
+ status = nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSF, req);
if (status) {
return status;
}
@@ -1568,7 +1640,7 @@ static uint16_t nvme_zone_mgmt_send_open(NvmeCtrl *n,
NvmeRequest *req,
return NVME_SUCCESS;
}
- status = nvme_zrm_transition(ns, zone, NVME_ZS_ZSEO);
+ status = nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSEO, req);
if (status) {
return status;
}
@@ -1604,7 +1676,7 @@ static uint16_t nvme_zone_mgmt_send_reset(NvmeCtrl *n,
NvmeRequest *req,
return NVME_INTERNAL_DEV_ERROR;
}
- nvme_zrm_transition(ns, zone, NVME_ZS_ZSE);
+ nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSE, req);
if (nvme_zns_commit_zone(ns, zone) < 0) {
return NVME_INTERNAL_DEV_ERROR;
}
@@ -1635,7 +1707,7 @@ static uint16_t nvme_zone_mgmt_send_offline(NvmeCtrl *n,
NvmeRequest *req,
return NVME_INTERNAL_DEV_ERROR;
}
- nvme_zrm_transition(ns, zone, NVME_ZS_ZSO);
+ nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSO, req);
if (nvme_zns_commit_zone(ns, zone) < 0) {
return NVME_INTERNAL_DEV_ERROR;
}
@@ -1679,7 +1751,7 @@ static uint16_t nvme_zone_mgmt_send_set_zde(NvmeCtrl *n,
NvmeRequest *req,
return status;
}
- status = nvme_zrm_transition(ns, zone, NVME_ZS_ZSC);
+ status = nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSC, req);
if (status) {
return status;
}
@@ -2094,7 +2166,7 @@ static uint16_t nvme_rwz(NvmeCtrl *n, NvmeRequest *req)
case NVME_ZS_ZSEO:
break;
default:
- status = nvme_zrm_transition(ns, zone, NVME_ZS_ZSIO);
+ status = nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSIO, req);
if (status) {
goto invalid;
}
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 929409b79b41..18f7b24ef5e9 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -88,6 +88,9 @@ pci_nvme_mmio_read(uint64_t addr) "addr 0x%"PRIx64""
pci_nvme_mmio_write(uint64_t addr, uint64_t data) "addr 0x%"PRIx64" data
0x%"PRIx64""
pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16"
new_head %"PRIu16""
pci_nvme_mmio_doorbell_sq(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16"
new_tail %"PRIu16""
+pci_nvme_zone_zrm_transition(uint16_t cid, uint32_t nsid, uint64_t zslba,
uint8_t from, uint8_t to) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64" from
0x%"PRIx8" to 0x%"PRIx8""
+pci_nvme_zone_zrm_candidate(uint16_t cid, uint32_t nsid, uint64_t zslba,
uint64_t wp, uint8_t zc) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64" wp
0x%"PRIx64" zc 0x%"PRIx8""
+pci_nvme_zone_zrm_release_open(uint16_t cid, uint32_t nsid) "cid %"PRIu16"
nsid %"PRIu32""
pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO,
interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO,
interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller
config=0x%"PRIx64""
@@ -115,6 +118,8 @@ pci_nvme_err_zone_is_read_only(uint16_t cid, uint64_t slba)
"cid %"PRIu16" lba 0
pci_nvme_err_zone_invalid_write(uint16_t cid, uint64_t slba, uint64_t wp) "cid
%"PRIu16" lba 0x%"PRIx64" wp 0x%"PRIx64""
pci_nvme_err_zone_boundary(uint16_t cid, uint64_t slba, uint32_t nlb, uint64_t
zcap) "cid %"PRIu16" lba 0x%"PRIx64" nlb %"PRIu32" zcap 0x%"PRIx64""
pci_nvme_err_zone_pending_writes(uint16_t cid, uint64_t zslba, uint64_t wp,
uint64_t wp_staging) "cid %"PRIu16" zslba 0x%"PRIx64" wp 0x%"PRIx64" wp_staging
0x%"PRIx64""
+pci_nvme_err_too_many_active_zones(uint16_t cid) "cid %"PRIu16""
+pci_nvme_err_too_many_open_zones(uint16_t cid) "cid %"PRIu16""
pci_nvme_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type
0x%"PRIx8""
pci_nvme_err_invalid_num_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type
0x%"PRIx8""
pci_nvme_err_invalid_sgl_excess_length(uint16_t cid) "cid %"PRIu16""
--
2.28.0
- Re: [PATCH 03/16] hw/block/nvme: make lba data size configurable, (continued)
- [PATCH 05/16] hw/block/nvme: consolidate read, write and write zeroes, Klaus Jensen, 2020/09/24
- [PATCH 07/16] hw/block/nvme: add commands supported and effects log page, Klaus Jensen, 2020/09/24
- [PATCH 08/16] hw/block/nvme: support namespace types, Klaus Jensen, 2020/09/24
- [PATCH 10/16] hw/block/nvme: add the zone management receive command, Klaus Jensen, 2020/09/24
- [PATCH 09/16] hw/block/nvme: add basic read/write for zoned namespaces, Klaus Jensen, 2020/09/24
- [PATCH 11/16] hw/block/nvme: add the zone management send command, Klaus Jensen, 2020/09/24
- [PATCH 13/16] hw/block/nvme: track and enforce zone resources, Klaus Jensen, 2020/09/24
- [PATCH 16/16] hw/block/nvme: support reset/finish recommended limits, Klaus Jensen, 2020/09/24
- [PATCH 12/16] hw/block/nvme: add the zone append command, Klaus Jensen, 2020/09/24
- [PATCH 14/16] hw/block/nvme: allow open to close transitions by controller,
Klaus Jensen <=
- [PATCH 15/16] hw/block/nvme: support zone active excursions, Klaus Jensen, 2020/09/24
- Re: [PATCH 00/16] hw/block/nvme: zoned namespace command set, no-reply, 2020/09/24
- Re: [PATCH 00/16] hw/block/nvme: zoned namespace command set, Keith Busch, 2020/09/24
- RE: [PATCH 00/16] hw/block/nvme: zoned namespace command set, Dmitry Fomichev, 2020/09/25