qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 2/3] i386: Atomically update PTEs with mttcg


From: Philippe Mathieu-Daudé
Subject: Re: [Qemu-devel] [PATCH 2/3] i386: Atomically update PTEs with mttcg
Date: Fri, 14 Dec 2018 12:11:51 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.3.1

On 12/14/18 1:05 AM, Benjamin Herrenschmidt wrote:
> Note to RiscV folks: You may want to adapt your code to do the same as
> this, esp. afaik, what you do today is endian-broken if host and guest
> endian are different.

Cc'ing the RISC-V list.

> Cheers,
> Ben. 
> 
> On Fri, 2018-12-14 at 10:58 +1100, Benjamin Herrenschmidt wrote:
>> Afaik, this isn't well documented (at least it wasn't when I last looked)
>> but OSes such as Linux rely on this behaviour:
>>
>> The HW updates to the page tables need to be done atomically with the
>> checking of the present bit (and other permissions).
>>
>> This is what allows Linux to do simple xchg of PTEs with 0 and assume
>> the value read has "final" stable dirty and accessed bits (the TLB
>> invalidation is deferred).
>>
>> Signed-off-by: Benjamin Herrenschmidt <address@hidden>
>> ---
>>  target/i386/excp_helper.c | 104 +++++++++++++++++++++++++++++---------
>>  1 file changed, 80 insertions(+), 24 deletions(-)
>>
>> diff --git a/target/i386/excp_helper.c b/target/i386/excp_helper.c
>> index 49231f6b69..93fc24c011 100644
>> --- a/target/i386/excp_helper.c
>> +++ b/target/i386/excp_helper.c
>> @@ -157,11 +157,45 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, 
>> int size,
>>  
>>  #else
>>  
>> +static inline uint64_t update_entry(CPUState *cs, target_ulong addr,
>> +                                    uint64_t orig_entry, uint32_t bits)
>> +{
>> +    uint64_t new_entry = orig_entry | bits;
>> +
>> +    /* Write the updated bottom 32-bits */
>> +    if (qemu_tcg_mttcg_enabled()) {
>> +        uint32_t old_le = cpu_to_le32(orig_entry);
>> +        uint32_t new_le = cpu_to_le32(new_entry);
>> +        MemTxResult result;
>> +        uint32_t old_ret;
>> +
>> +        old_ret = address_space_cmpxchgl_notdirty(cs->as, addr,
>> +                                                  old_le, new_le,
>> +                                                  MEMTXATTRS_UNSPECIFIED,
>> +                                                  &result);
>> +        if (result == MEMTX_OK) {
>> +            if (old_ret != old_le && old_ret != new_le) {
>> +                new_entry = 0;
>> +            }
>> +            return new_entry;
>> +        }
>> +
>> +        /* Do we need to support this case where PTEs aren't in RAM ?
>> +         *
>> +         * For now fallback to non-atomic case
>> +         */
>> +    }
>> +
>> +    x86_stl_phys_notdirty(cs, addr, new_entry);
>> +
>> +    return new_entry;
>> +}
>> +
>>  static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType 
>> access_type,
>>                          int *prot)
>>  {
>>      CPUX86State *env = &X86_CPU(cs)->env;
>> -    uint64_t rsvd_mask = PG_HI_RSVD_MASK;
>> +    uint64_t rsvd_mask;
>>      uint64_t ptep, pte;
>>      uint64_t exit_info_1 = 0;
>>      target_ulong pde_addr, pte_addr;
>> @@ -172,6 +206,8 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, 
>> MMUAccessType access_type,
>>          return gphys;
>>      }
>>  
>> + restart:
>> +    rsvd_mask = PG_HI_RSVD_MASK;
>>      if (!(env->nested_pg_mode & SVM_NPT_NXE)) {
>>          rsvd_mask |= PG_NX_MASK;
>>      }
>> @@ -198,8 +234,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, 
>> MMUAccessType access_type,
>>                  goto do_fault_rsvd;
>>              }
>>              if (!(pml4e & PG_ACCESSED_MASK)) {
>> -                pml4e |= PG_ACCESSED_MASK;
>> -                x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
>> +                pml4e = update_entry(cs, pml4e_addr, pml4e, 
>> PG_ACCESSED_MASK);
>> +                if (!pml4e) {
>> +                    goto restart;
>> +                }
>>              }
>>              ptep &= pml4e ^ PG_NX_MASK;
>>              pdpe_addr = (pml4e & PG_ADDRESS_MASK) +
>> @@ -213,8 +251,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, 
>> MMUAccessType access_type,
>>              }
>>              ptep &= pdpe ^ PG_NX_MASK;
>>              if (!(pdpe & PG_ACCESSED_MASK)) {
>> -                pdpe |= PG_ACCESSED_MASK;
>> -                x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
>> +                pdpe = update_entry(cs, pdpe_addr, pdpe, PG_ACCESSED_MASK);
>> +                if (!pdpe) {
>> +                    goto restart;
>> +                }
>>              }
>>              if (pdpe & PG_PSE_MASK) {
>>                  /* 1 GB page */
>> @@ -256,8 +296,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, 
>> MMUAccessType access_type,
>>          }
>>          /* 4 KB page */
>>          if (!(pde & PG_ACCESSED_MASK)) {
>> -            pde |= PG_ACCESSED_MASK;
>> -            x86_stl_phys_notdirty(cs, pde_addr, pde);
>> +            pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
>> +            if (!pde) {
>> +                goto restart;
>> +            }
>>          }
>>          pte_addr = (pde & PG_ADDRESS_MASK) + (((gphys >> 12) & 0x1ff) << 3);
>>          pte = x86_ldq_phys(cs, pte_addr);
>> @@ -295,8 +337,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, 
>> MMUAccessType access_type,
>>          }
>>  
>>          if (!(pde & PG_ACCESSED_MASK)) {
>> -            pde |= PG_ACCESSED_MASK;
>> -            x86_stl_phys_notdirty(cs, pde_addr, pde);
>> +            pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
>> +            if (!pde) {
>> +                goto restart;
>> +            }
>>          }
>>  
>>          /* page directory entry */
>> @@ -376,7 +420,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, 
>> int size,
>>      int error_code = 0;
>>      int is_dirty, prot, page_size, is_write, is_user;
>>      hwaddr paddr;
>> -    uint64_t rsvd_mask = PG_HI_RSVD_MASK;
>> +    uint64_t rsvd_mask;
>>      uint32_t page_offset;
>>      target_ulong vaddr;
>>  
>> @@ -401,6 +445,8 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, 
>> int size,
>>          goto do_mapping;
>>      }
>>  
>> + restart:
>> +    rsvd_mask = PG_HI_RSVD_MASK;
>>      if (!(env->efer & MSR_EFER_NXE)) {
>>          rsvd_mask |= PG_NX_MASK;
>>      }
>> @@ -436,8 +482,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, 
>> int size,
>>                      goto do_fault_rsvd;
>>                  }
>>                  if (!(pml5e & PG_ACCESSED_MASK)) {
>> -                    pml5e |= PG_ACCESSED_MASK;
>> -                    x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
>> +                    pml5e = update_entry(cs, pml5e_addr, pml5e, 
>> PG_ACCESSED_MASK);
>> +                    if (!pml5e) {
>> +                        goto restart;
>> +                    }
>>                  }
>>                  ptep = pml5e ^ PG_NX_MASK;
>>              } else {
>> @@ -456,8 +504,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, 
>> int size,
>>                  goto do_fault_rsvd;
>>              }
>>              if (!(pml4e & PG_ACCESSED_MASK)) {
>> -                pml4e |= PG_ACCESSED_MASK;
>> -                x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
>> +                pml4e = update_entry(cs, pml4e_addr, pml4e, 
>> PG_ACCESSED_MASK);
>> +                if (!pml4e) {
>> +                    goto restart;
>> +                }
>>              }
>>              ptep &= pml4e ^ PG_NX_MASK;
>>              pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 
>> 0x1ff) << 3)) &
>> @@ -472,8 +522,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, 
>> int size,
>>              }
>>              ptep &= pdpe ^ PG_NX_MASK;
>>              if (!(pdpe & PG_ACCESSED_MASK)) {
>> -                pdpe |= PG_ACCESSED_MASK;
>> -                x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
>> +                pdpe = update_entry(cs, pdpe_addr, pdpe, PG_ACCESSED_MASK);
>> +                if (!pdpe) {
>> +                    goto restart;
>> +                }
>>              }
>>              if (pdpe & PG_PSE_MASK) {
>>                  /* 1 GB page */
>> @@ -520,8 +572,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, 
>> int size,
>>          }
>>          /* 4 KB page */
>>          if (!(pde & PG_ACCESSED_MASK)) {
>> -            pde |= PG_ACCESSED_MASK;
>> -            x86_stl_phys_notdirty(cs, pde_addr, pde);
>> +            pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
>> +            if (!pde) {
>> +                goto restart;
>> +            }
>>          }
>>          pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 
>> 3)) &
>>              a20_mask;
>> @@ -563,8 +617,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, 
>> int size,
>>          }
>>  
>>          if (!(pde & PG_ACCESSED_MASK)) {
>> -            pde |= PG_ACCESSED_MASK;
>> -            x86_stl_phys_notdirty(cs, pde_addr, pde);
>> +            pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
>> +            if (!pde) {
>> +                goto restart;
>> +            }
>>          }
>>  
>>          /* page directory entry */
>> @@ -634,11 +690,11 @@ do_check_protect_pse36:
>>      /* yes, it can! */
>>      is_dirty = is_write && !(pte & PG_DIRTY_MASK);
>>      if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
>> -        pte |= PG_ACCESSED_MASK;
>> -        if (is_dirty) {
>> -            pte |= PG_DIRTY_MASK;
>> +        pte = update_entry(cs, pte_addr, pte,
>> +                           PG_ACCESSED_MASK | (is_dirty ? PG_DIRTY_MASK : 
>> 0));
>> +        if (!pte) {
>> +            goto restart;
>>          }
>> -        x86_stl_phys_notdirty(cs, pte_addr, pte);
>>      }
>>  
>>      if (!(pte & PG_DIRTY_MASK)) {
> 
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]