qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH RFC 04/10] intel_iommu: Second Stage Access Dirty bit support


From: Jason Wang
Subject: Re: [PATCH RFC 04/10] intel_iommu: Second Stage Access Dirty bit support
Date: Fri, 29 Apr 2022 10:26:21 +0800

On Fri, Apr 29, 2022 at 5:14 AM Joao Martins <joao.m.martins@oracle.com> wrote:
>
> IOMMU advertises Access/Dirty bits if the extended capability
> DMAR register reports it (ECAP, mnemonic ECAP.SSADS albeit it used
> to be known as SLADS before). The first stage table, though, has no bit for
> advertising Access/Dirty, unless referenced via a scalable-mode PASID Entry.
> Relevant Intel IOMMU SDM ref for first stage table "3.6.2 Accessed, Extended
> Accessed, and Dirty Flags" and second stage table "3.7.2 Accessed and Dirty
> Flags".
>
> To enable it we depend on scalable-mode for the second-stage table,
> so we limit use of dirty-bit to scalable-mode To use SSADS, we set a bit in
> the scalable-mode PASID Table entry, by setting bit 9 (SSADE). When
> we do so we require flushing the context/pasid-table caches and IOTLB much
> like AMD. Relevant SDM refs:
>
> "3.7.2 Accessed and Dirty Flags"
> "6.5.3.3 Guidance to Software for Invalidations,
>  Table 23. Guidance to Software for Invalidations"
>
> To read out what's dirtied, same thing as past implementations is done.
> Dirty bit support is located in the same location (bit 9). The IOTLB
> caches some attributes when SSADE is enabled and dirty-ness information,
> so we also need to flush IOTLB to make sure IOMMU attempts to set the
> dirty bit again. Relevant manuals over the hardware translation is
> chapter 6 with some special mention to:
>
> "6.2.3.1 Scalable-Mode PASID-Table Entry Programming Considerations"
> "6.2.4 IOTLB"
>
> The first 12bits of the PTE are already cached via the SLPTE pointer,
> similar to how it is added in amd-iommu. Use also the previously
> added PASID entry cache in order to fetch whether Dirty bit was
> enabled or not in the SM second stage table.
>
> This is useful for covering and prototyping IOMMU support for access/dirty
> bits.
>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
>  hw/i386/intel_iommu.c          | 85 ++++++++++++++++++++++++++++++----
>  hw/i386/intel_iommu_internal.h |  4 ++
>  hw/i386/trace-events           |  2 +
>  3 files changed, 82 insertions(+), 9 deletions(-)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 752940fa4c0e..e946f793a968 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -651,6 +651,21 @@ static uint64_t vtd_get_slpte(dma_addr_t base_addr, 
> uint32_t index)
>      return slpte;
>  }
>
> +/* Get the content of a spte located in @base_addr[@index] */
> +static uint64_t vtd_set_slpte(dma_addr_t base_addr, uint32_t index,
> +                              uint64_t slpte)
> +{
> +
> +    if (dma_memory_write(&address_space_memory,
> +                         base_addr + index * sizeof(slpte), &slpte,
> +                         sizeof(slpte), MEMTXATTRS_UNSPECIFIED)) {
> +        slpte = (uint64_t)-1;
> +        return slpte;
> +    }
> +
> +    return vtd_get_slpte(base_addr, index);
> +}
> +
>  /* Given an iova and the level of paging structure, return the offset
>   * of current level.
>   */
> @@ -720,6 +735,11 @@ static inline bool vtd_pe_present(VTDPASIDEntry *pe)
>      return pe->val[0] & VTD_PASID_ENTRY_P;
>  }
>
> +static inline bool vtd_pe_slad_enabled(VTDPASIDEntry *pe)
> +{
> +    return pe->val[0] & VTD_SM_PASID_ENTRY_SLADE;
> +}
> +
>  static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>                                            uint32_t pasid,
>                                            dma_addr_t addr,
> @@ -1026,6 +1046,33 @@ static VTDBus 
> *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
>      return NULL;
>  }
>
> +static inline bool vtd_ssad_update(IntelIOMMUState *s, uint16_t pe_flags,
> +                                   uint64_t *slpte, bool is_write,
> +                                   bool reads, bool writes)
> +{
> +    bool dirty, access = reads;
> +
> +    if (!(pe_flags & VTD_SM_PASID_ENTRY_SLADE)) {
> +        return false;
> +    }
> +
> +    dirty = access = false;
> +
> +    if (is_write && writes && !(*slpte & VTD_SL_D)) {
> +        *slpte |= VTD_SL_D;
> +        trace_vtd_dirty_update(*slpte);
> +        dirty = true;
> +    }
> +
> +    if (((!is_write && reads) || dirty) && !(*slpte & VTD_SL_A)) {
> +        *slpte |= VTD_SL_A;
> +        trace_vtd_access_update(*slpte);
> +        access = true;
> +    }
> +
> +    return dirty || access;
> +}
> +
>  /* Given the @iova, get relevant @slptep. @slpte_level will be the last level
>   * of the translation, can be used for deciding the size of large page.
>   */
> @@ -1039,6 +1086,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, 
> VTDContextEntry *ce,
>      uint32_t offset;
>      uint64_t slpte;
>      uint64_t access_right_check;
> +    uint16_t pe_flags;
>
>      if (!vtd_iova_range_check(s, iova, ce, aw_bits)) {
>          error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")",
> @@ -1054,14 +1102,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, 
> VTDContextEntry *ce,
>          slpte = vtd_get_slpte(addr, offset);
>
>          if (slpte == (uint64_t)-1) {
> -            error_report_once("%s: detected read error on DMAR slpte "
> -                              "(iova=0x%" PRIx64 ")", __func__, iova);
> -            if (level == vtd_get_iova_level(s, ce)) {
> -                /* Invalid programming of context-entry */
> -                return -VTD_FR_CONTEXT_ENTRY_INV;
> -            } else {
> -                return -VTD_FR_PAGING_ENTRY_INV;
> -            }
> +            goto inv_slpte;
>          }
>          *reads = (*reads) && (slpte & VTD_SL_R);
>          *writes = (*writes) && (slpte & VTD_SL_W);
> @@ -1081,6 +1122,14 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, 
> VTDContextEntry *ce,
>          }
>
>          if (vtd_is_last_slpte(slpte, level)) {
> +            pe_flags = vtd_sm_pasid_entry_flags(s, ce);
> +            if (vtd_ssad_update(s, pe_flags, &slpte, is_write,
> +                                *reads, *writes)) {
> +                slpte = vtd_set_slpte(addr, offset, slpte);
> +                if (slpte == (uint64_t)-1) {
> +                    goto inv_slpte;
> +                }
> +            }
>              *slptep = slpte;
>              *slpte_level = level;
>              return 0;
> @@ -1088,6 +1137,16 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, 
> VTDContextEntry *ce,
>          addr = vtd_get_slpte_addr(slpte, aw_bits);
>          level--;
>      }
> +
> +inv_slpte:
> +    error_report_once("%s: detected read error on DMAR slpte "
> +                      "(iova=0x%" PRIx64 ")", __func__, iova);
> +    if (level == vtd_get_iova_level(s, ce)) {
> +        /* Invalid programming of context-entry */
> +        return -VTD_FR_CONTEXT_ENTRY_INV;
> +    } else {
> +        return -VTD_FR_PAGING_ENTRY_INV;
> +    }
>  }
>
>  typedef int (*vtd_page_walk_hook)(IOMMUTLBEvent *event, void *private);
> @@ -1742,6 +1801,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace 
> *vtd_as, PCIBus *bus,
>          slpte = iotlb_entry->slpte;
>          access_flags = iotlb_entry->access_flags;
>          page_mask = iotlb_entry->mask;
> +        if (vtd_ssad_update(s, iotlb_entry->sm_pe_flags, &slpte, is_write,
> +                            access_flags & IOMMU_RO, access_flags & 
> IOMMU_WO)) {
> +                uint32_t offset;
> +
> +                offset = vtd_iova_level_offset(addr, vtd_get_iova_level(s, 
> &ce));
> +                vtd_set_slpte(addr, offset, slpte);
> +        }
>          goto out;
>      }
>
> @@ -3693,7 +3759,8 @@ static void vtd_init(IntelIOMMUState *s)
>
>      /* TODO: read cap/ecap from host to decide which cap to be exposed. */
>      if (s->scalable_mode) {
> -        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
> +        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS |
> +                   VTD_ECAP_SLADS;
>      }

We probably need a dedicated command line parameter and make it compat
for pre 7.1 machines.

Otherwise we may break migration.

Thanks

>
>      if (s->snoop_control) {
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 1ff13b40f9bb..c00f6e7b4a72 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -192,6 +192,7 @@
>  #define VTD_ECAP_MHMV               (15ULL << 20)
>  #define VTD_ECAP_SRS                (1ULL << 31)
>  #define VTD_ECAP_SMTS               (1ULL << 43)
> +#define VTD_ECAP_SLADS              (1ULL << 45)
>  #define VTD_ECAP_SLTS               (1ULL << 46)
>
>  /* CAP_REG */
> @@ -492,6 +493,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>
>  #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted guest-address-width 
> */
>  #define VTD_SM_PASID_ENTRY_DID(val)    ((val) & VTD_DOMAIN_ID_MASK)
> +#define VTD_SM_PASID_ENTRY_SLADE       (1ULL << 9)
>
>  /* Second Level Page Translation Pointer*/
>  #define VTD_SM_PASID_ENTRY_SLPTPTR     (~0xfffULL)
> @@ -515,5 +517,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>  #define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & 
> VTD_HAW_MASK(aw))
>  #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>  #define VTD_SL_TM                   (1ULL << 62)
> +#define VTD_SL_A                    (1ULL << 8)
> +#define VTD_SL_D                    (1ULL << 9)
>
>  #endif
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index eb5f075873cd..e4122ee8a999 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -66,6 +66,8 @@ vtd_frr_new(int index, uint64_t hi, uint64_t lo) "index %d 
> high 0x%"PRIx64" low
>  vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16
>  vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 
> 0x%"PRIx16" index %d vec %d (should be: %d)"
>  vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 
> 0x%"PRIx16" index %d trigger %d (should be: %d)"
> +vtd_access_update(uint64_t slpte) "slpte 0x%"PRIx64
> +vtd_dirty_update(uint64_t slpte) "slpte 0x%"PRIx64
>
>  # amd_iommu.c
>  amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at 
> addr 0x%"PRIx64" +  offset 0x%"PRIx32
> --
> 2.17.2
>




reply via email to

[Prev in Thread] Current Thread [Next in Thread]