qemu-block
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v10 09/12] hw/block/nvme: Introduce max active and open zone


From: Klaus Jensen
Subject: Re: [PATCH v10 09/12] hw/block/nvme: Introduce max active and open zone limits
Date: Thu, 12 Nov 2020 20:40:28 +0100

On Nov  7 08:43, Dmitry Fomichev wrote:
> Add two module properties, "zoned.max_active" and "zoned.max_open"
> to control the maximum number of zones that can be active or open.
> Once these variables are set to non-default values, these limits are
> checked during I/O and Too Many Active or Too Many Open command status
> is returned if they are exceeded.
> 
> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
> Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>
> Reviewed-by: Niklas Cassel <Niklas.Cassel@wdc.com>
> ---

For Zone Management Send, if the action is Open Zone and Select All is
set, it must be checked that there are enough resources for all Closed
zones to transition to Explicitly Opened - otherwise no state
transitions "shall" occur.

>  hw/block/nvme-ns.h    | 41 +++++++++++++++++++
>  hw/block/nvme-ns.c    | 30 +++++++++++++-
>  hw/block/nvme.c       | 94 +++++++++++++++++++++++++++++++++++++++++++
>  hw/block/trace-events |  2 +
>  4 files changed, 165 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
> index d2631ff5a3..421bab0a57 100644
> --- a/hw/block/nvme-ns.h
> +++ b/hw/block/nvme-ns.h
> @@ -33,6 +33,8 @@ typedef struct NvmeNamespaceParams {
>      bool     cross_zone_read;
>      uint64_t zone_size_bs;
>      uint64_t zone_cap_bs;
> +    uint32_t max_active_zones;
> +    uint32_t max_open_zones;
>  } NvmeNamespaceParams;
>  
>  typedef struct NvmeNamespace {
> @@ -56,6 +58,8 @@ typedef struct NvmeNamespace {
>      uint64_t        zone_capacity;
>      uint64_t        zone_array_size;
>      uint32_t        zone_size_log2;
> +    int32_t         nr_open_zones;
> +    int32_t         nr_active_zones;
>  
>      NvmeNamespaceParams params;
>  } NvmeNamespace;
> @@ -123,6 +127,43 @@ static inline bool nvme_wp_is_valid(NvmeZone *zone)
>             st != NVME_ZONE_STATE_OFFLINE;
>  }
>  
> +static inline void nvme_aor_inc_open(NvmeNamespace *ns)
> +{
> +    assert(ns->nr_open_zones >= 0);
> +    if (ns->params.max_open_zones) {
> +        ns->nr_open_zones++;
> +        assert(ns->nr_open_zones <= ns->params.max_open_zones);
> +    }
> +}
> +
> +static inline void nvme_aor_dec_open(NvmeNamespace *ns)
> +{
> +    if (ns->params.max_open_zones) {
> +        assert(ns->nr_open_zones > 0);
> +        ns->nr_open_zones--;
> +    }
> +    assert(ns->nr_open_zones >= 0);
> +}
> +
> +static inline void nvme_aor_inc_active(NvmeNamespace *ns)
> +{
> +    assert(ns->nr_active_zones >= 0);
> +    if (ns->params.max_active_zones) {
> +        ns->nr_active_zones++;
> +        assert(ns->nr_active_zones <= ns->params.max_active_zones);
> +    }
> +}
> +
> +static inline void nvme_aor_dec_active(NvmeNamespace *ns)
> +{
> +    if (ns->params.max_active_zones) {
> +        assert(ns->nr_active_zones > 0);
> +        ns->nr_active_zones--;
> +        assert(ns->nr_active_zones >= ns->nr_open_zones);
> +    }
> +    assert(ns->nr_active_zones >= 0);
> +}
> +
>  int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
>  void nvme_ns_drain(NvmeNamespace *ns);
>  void nvme_ns_flush(NvmeNamespace *ns);
> diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
> index e6db7f7d3b..2e45838c15 100644
> --- a/hw/block/nvme-ns.c
> +++ b/hw/block/nvme-ns.c
> @@ -119,6 +119,20 @@ static int nvme_calc_zone_geometry(NvmeNamespace *ns, 
> Error **errp)
>          ns->zone_size_log2 = 63 - clz64(ns->zone_size);
>      }
>  
> +    /* Make sure that the values of all ZNS properties are sane */
> +    if (ns->params.max_open_zones > nz) {
> +        error_setg(errp,
> +                   "max_open_zones value %u exceeds the number of zones %u",
> +                   ns->params.max_open_zones, nz);
> +        return -1;
> +    }
> +    if (ns->params.max_active_zones > nz) {
> +        error_setg(errp,
> +                   "max_active_zones value %u exceeds the number of zones 
> %u",
> +                   ns->params.max_active_zones, nz);
> +        return -1;
> +    }
> +
>      return 0;
>  }
>  
> @@ -166,8 +180,8 @@ static int nvme_zoned_init_ns(NvmeCtrl *n, NvmeNamespace 
> *ns, int lba_index,
>      id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned));
>  
>      /* MAR/MOR are zeroes-based, 0xffffffff means no limit */
> -    id_ns_z->mar = 0xffffffff;
> -    id_ns_z->mor = 0xffffffff;
> +    id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1);
> +    id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1);
>      id_ns_z->zoc = 0;
>      id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00;
>  
> @@ -195,6 +209,7 @@ static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone 
> *zone)
>              trace_pci_nvme_clear_ns_close(state, zone->d.zslba);
>              nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED);
>          }
> +        nvme_aor_inc_active(ns);
>          QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry);
>      } else {
>          trace_pci_nvme_clear_ns_reset(state, zone->d.zslba);
> @@ -211,16 +226,23 @@ static void nvme_zoned_ns_shutdown(NvmeNamespace *ns)
>  
>      QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
>          QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
> +        nvme_aor_dec_active(ns);
>          nvme_clear_zone(ns, zone);
>      }
>      QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
>          QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
> +        nvme_aor_dec_open(ns);
> +        nvme_aor_dec_active(ns);
>          nvme_clear_zone(ns, zone);
>      }
>      QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
>          QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
> +        nvme_aor_dec_open(ns);
> +        nvme_aor_dec_active(ns);
>          nvme_clear_zone(ns, zone);
>      }
> +
> +    assert(ns->nr_open_zones == 0);
>  }
>  
>  static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
> @@ -306,6 +328,10 @@ static Property nvme_ns_props[] = {
>      DEFINE_PROP_SIZE("zoned.zcap", NvmeNamespace, params.zone_cap_bs, 0),
>      DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace,
>                       params.cross_zone_read, false),
> +    DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace,
> +                       params.max_active_zones, 0),
> +    DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace,
> +                       params.max_open_zones, 0),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index f5390e6863..c6b3a5dcf7 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -199,6 +199,26 @@ static void nvme_assign_zone_state(NvmeNamespace *ns, 
> NvmeZone *zone,
>      }
>  }
>  
> +/*
> + * Check if we can open a zone without exceeding open/active limits.
> + * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5).
> + */
> +static int nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn)
> +{
> +    if (ns->params.max_active_zones != 0 &&
> +        ns->nr_active_zones + act > ns->params.max_active_zones) {
> +        trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones);
> +        return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR;
> +    }
> +    if (ns->params.max_open_zones != 0 &&
> +        ns->nr_open_zones + opn > ns->params.max_open_zones) {
> +        trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones);
> +        return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR;
> +    }
> +
> +    return NVME_SUCCESS;
> +}
> +
>  static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
>  {
>      hwaddr low = n->ctrl_mem.addr;
> @@ -1193,6 +1213,41 @@ static uint16_t nvme_check_zone_read(NvmeNamespace 
> *ns, uint64_t slba,
>      return status;
>  }
>  
> +static void nvme_auto_transition_zone(NvmeNamespace *ns, bool implicit,
> +                                      bool adding_active)
> +{
> +    NvmeZone *zone;
> +
> +    if (implicit && ns->params.max_open_zones &&
> +        ns->nr_open_zones == ns->params.max_open_zones) {
> +        zone = QTAILQ_FIRST(&ns->imp_open_zones);
> +        if (zone) {
> +            /*
> +             * Automatically close this implicitly open zone.
> +             */
> +            QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
> +            nvme_aor_dec_open(ns);
> +            nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
> +        }
> +    }
> +}

If implicit is false this function is a noop. Since you are always
calling it with a literal bool there should be no reason to even call
it.

> +
> +static uint16_t nvme_auto_open_zone(NvmeNamespace *ns, NvmeZone *zone)
> +{
> +    uint16_t status = NVME_SUCCESS;
> +    uint8_t zs = nvme_get_zone_state(zone);
> +
> +    if (zs == NVME_ZONE_STATE_EMPTY) {
> +        nvme_auto_transition_zone(ns, true, true);
> +        status = nvme_aor_check(ns, 1, 1);
> +    } else if (zs == NVME_ZONE_STATE_CLOSED) {
> +        nvme_auto_transition_zone(ns, true, false);
> +        status = nvme_aor_check(ns, 0, 1);
> +    }
> +
> +    return status;
> +}
> +
>  static bool nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req,
>                                        bool failed)
>  {
> @@ -1226,7 +1281,11 @@ static bool nvme_finalize_zoned_write(NvmeNamespace 
> *ns, NvmeRequest *req,
>          switch (nvme_get_zone_state(zone)) {
>          case NVME_ZONE_STATE_IMPLICITLY_OPEN:
>          case NVME_ZONE_STATE_EXPLICITLY_OPEN:
> +            nvme_aor_dec_open(ns);
> +            /* fall through */
>          case NVME_ZONE_STATE_CLOSED:
> +            nvme_aor_dec_active(ns);
> +            /* fall through */
>          case NVME_ZONE_STATE_EMPTY:
>              nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL);
>              /* fall through */
> @@ -1255,7 +1314,10 @@ static uint64_t nvme_advance_zone_wp(NvmeNamespace 
> *ns, NvmeZone *zone,
>          zs = nvme_get_zone_state(zone);
>          switch (zs) {
>          case NVME_ZONE_STATE_EMPTY:
> +            nvme_aor_inc_active(ns);
> +            /* fall through */
>          case NVME_ZONE_STATE_CLOSED:
> +            nvme_aor_inc_open(ns);
>              nvme_assign_zone_state(ns, zone, 
> NVME_ZONE_STATE_IMPLICITLY_OPEN);
>          }
>      }
> @@ -1449,6 +1511,11 @@ static uint16_t nvme_write(NvmeCtrl *n, NvmeRequest 
> *req, bool append, bool wrz)
>              goto invalid;
>          }
>  
> +        status = nvme_auto_open_zone(ns, zone);
> +        if (status != NVME_SUCCESS) {
> +            goto invalid;
> +        }
> +
>          if (append) {
>              slba = zone->w_ptr;
>          }
> @@ -1529,9 +1596,27 @@ enum NvmeZoneProcessingMask {
>  static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone,
>                                 uint8_t state)
>  {
> +    uint16_t status;
> +
>      switch (state) {
>      case NVME_ZONE_STATE_EMPTY:
> +        nvme_auto_transition_zone(ns, false, true);
> +        status = nvme_aor_check(ns, 1, 0);
> +        if (status != NVME_SUCCESS) {
> +            return status;
> +        }
> +        nvme_aor_inc_active(ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_CLOSED:
> +        status = nvme_aor_check(ns, 0, 1);
> +        if (status != NVME_SUCCESS) {
> +            if (state == NVME_ZONE_STATE_EMPTY) {
> +                nvme_aor_dec_active(ns);
> +            }
> +            return status;
> +        }
> +        nvme_aor_inc_open(ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_IMPLICITLY_OPEN:
>          nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN);
>          /* fall through */
> @@ -1548,6 +1633,7 @@ static uint16_t nvme_close_zone(NvmeNamespace *ns, 
> NvmeZone *zone,
>      switch (state) {
>      case NVME_ZONE_STATE_EXPLICITLY_OPEN:
>      case NVME_ZONE_STATE_IMPLICITLY_OPEN:
> +        nvme_aor_dec_open(ns);
>          nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
>          /* fall through */
>      case NVME_ZONE_STATE_CLOSED:
> @@ -1563,7 +1649,11 @@ static uint16_t nvme_finish_zone(NvmeNamespace *ns, 
> NvmeZone *zone,
>      switch (state) {
>      case NVME_ZONE_STATE_EXPLICITLY_OPEN:
>      case NVME_ZONE_STATE_IMPLICITLY_OPEN:
> +        nvme_aor_dec_open(ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_CLOSED:
> +        nvme_aor_dec_active(ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_EMPTY:
>          zone->w_ptr = nvme_zone_wr_boundary(zone);
>          zone->d.wp = zone->w_ptr;
> @@ -1582,7 +1672,11 @@ static uint16_t nvme_reset_zone(NvmeNamespace *ns, 
> NvmeZone *zone,
>      switch (state) {
>      case NVME_ZONE_STATE_EXPLICITLY_OPEN:
>      case NVME_ZONE_STATE_IMPLICITLY_OPEN:
> +        nvme_aor_dec_open(ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_CLOSED:
> +        nvme_aor_dec_active(ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_FULL:
>          zone->w_ptr = zone->d.zslba;
>          zone->d.wp = zone->w_ptr;
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 4d910bb942..e674522883 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -122,6 +122,8 @@ pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t 
> zone) "appending at slb
>  pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint32_t status) 
> "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
>  pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint32_t status) 
> "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
>  pci_nvme_err_append_too_large(uint64_t slba, uint32_t nlb, uint8_t zasl) 
> "slba=%"PRIu64", nlb=%"PRIu32", zasl=%"PRIu8""
> +pci_nvme_err_insuff_active_res(uint32_t max_active) "max_active=%"PRIu32" 
> zone limit exceeded"
> +pci_nvme_err_insuff_open_res(uint32_t max_open) "max_open=%"PRIu32" zone 
> limit exceeded"
>  pci_nvme_err_invalid_iocsci(uint32_t idx) "unsupported command set 
> combination index %"PRIu32""
>  pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue 
> deletion, sid=%"PRIu16""
>  pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating 
> submission queue, invalid cqid=%"PRIu16""
> -- 
> 2.21.0
> 
> 

-- 
One of us - No more doubt, silence or taboo about mental illness.

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]