qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nod


From: Paolo Bonzini
Subject: Re: [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nodes prepared for RCU
Date: Mon, 13 May 2013 11:20:59 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130311 Thunderbird/17.0.4

Il 13/05/2013 05:21, Liu Ping Fan ha scritto:
> From: Liu Ping Fan <address@hidden>
> 
> Now, each AddressSpaceDispatch has its own radix-tree, and all of them
> lie on phys_section[] and phys_map_nodes[]. When we want lockless
> mmio dispatch, we need something like RCU.
> 
> Acheive this with PhysPageTable which contains all of the info for all
> radix trees. After all address space listeners update (ie. excluding the
> readers) we switch from PhysPageTable *cur_pgtbl to *next_pgtbl.
> (The real RCU style is adopt by listener, see next patch)
> 
> Signed-off-by: Liu Ping Fan <address@hidden>
> ---
>  exec.c                |  197 
> +++++++++++++++++++++++++------------------------
>  include/exec/memory.h |    2 +
>  memory.c              |    2 +
>  3 files changed, 106 insertions(+), 95 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index c5f8082..bb4e540 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -80,23 +80,34 @@ int use_icount;
>  #if !defined(CONFIG_USER_ONLY)
>  
>  #define SUBSECTION_IDX(addr)      ((addr) & ~TARGET_PAGE_MASK)
> -#define PHYS_SECTION_ID(psection) ((psection) - phys_sections)
> +#define PHYS_SECTION_ID(psection, base) ((psection) - base->phys_sections)
>  
>  typedef struct PhysSection {
>      MemoryRegionSection section;
>      uint16_t *sub_section;
>  } PhysSection;
>  
> -static PhysSection *phys_sections;
> -static unsigned phys_sections_nb, phys_sections_nb_alloc;
> -static uint16_t phys_section_unassigned;
> -static uint16_t phys_section_notdirty;
> -static uint16_t phys_section_rom;
> -static uint16_t phys_section_watch;
> +typedef PhysPageEntry Node[L2_SIZE];
>  
> -/* Simple allocator for PhysPageEntry nodes */
> -static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
> -static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
> +typedef struct PhysPageTable PhysPageTable;
> +
> +struct PhysPageTable {
> +    int ref;
> +    PhysSection *phys_sections;
> +    unsigned phys_sections_nb;
> +    unsigned phys_sections_nb_alloc;
> +    uint16_t phys_section_unassigned;
> +    uint16_t phys_section_notdirty;
> +    uint16_t phys_section_rom;
> +    uint16_t phys_section_watch;

These four should be constants.  Please make them #defines and add
assertions that they have the expected values (in a separate patch).

> +
> +    Node *phys_map_nodes;
> +    unsigned phys_map_nodes_nb;
> +    unsigned phys_map_nodes_nb_alloc;
> +};
> +static PhysPageTable *cur_pgtbl;
> +static PhysPageTable *next_pgtbl;

You shouldn't need cur_pgtbl.  Instead, each AddressSpaceDispatch should
have a pointer to its own cur_pgtbl.  In the commit hook you can then
take a lock, unref the old table, assign cur_pgtbl = next_pgtbl and ref
the new one.

The lock must also be taken in address_space_translate, together with a
relatively expensive ref/unref pair (two atomic operations).  We
probably need real RCU so that we can skip the ref/unref.

>  
>  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>  
> @@ -111,13 +122,13 @@ static MemoryRegion io_mem_watch;
>  
>  static void phys_map_node_reserve(unsigned nodes)
>  {
> -    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
> +    if (next_pgtbl->phys_map_nodes_nb + nodes > 
> next_pgtbl->phys_map_nodes_nb_alloc) {
>          typedef PhysPageEntry Node[L2_SIZE];
> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
> -                                      phys_map_nodes_nb + nodes);
> -        phys_map_nodes = g_renew(Node, phys_map_nodes,
> -                                 phys_map_nodes_nb_alloc);
> +        next_pgtbl->phys_map_nodes_nb_alloc = 
> MAX(next_pgtbl->phys_map_nodes_nb_alloc * 2, 16);
> +        next_pgtbl->phys_map_nodes_nb_alloc = 
> MAX(next_pgtbl->phys_map_nodes_nb_alloc,
> +                                      next_pgtbl->phys_map_nodes_nb + nodes);
> +        next_pgtbl->phys_map_nodes = g_renew(Node, 
> next_pgtbl->phys_map_nodes,
> +                                 next_pgtbl->phys_map_nodes_nb_alloc);
>      }
>  }
>  
> @@ -126,22 +137,16 @@ static uint16_t phys_map_node_alloc(void)
>      unsigned i;
>      uint16_t ret;
>  
> -    ret = phys_map_nodes_nb++;
> +    ret = next_pgtbl->phys_map_nodes_nb++;
>      assert(ret != PHYS_MAP_NODE_NIL);
> -    assert(ret != phys_map_nodes_nb_alloc);
> +    assert(ret != next_pgtbl->phys_map_nodes_nb_alloc);
>      for (i = 0; i < L2_SIZE; ++i) {
> -        phys_map_nodes[ret][i].is_leaf = 0;
> -        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
> +        next_pgtbl->phys_map_nodes[ret][i].is_leaf = 0;
> +        next_pgtbl->phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
>      }
>      return ret;
>  }
>  
> -static void phys_map_nodes_reset(void)
> -{
> -    phys_map_nodes_nb = 0;
> -}
> -
> -
>  static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
>                                  hwaddr *nb, uint16_t leaf,
>                                  int level)
> @@ -152,15 +157,15 @@ static void phys_page_set_level(PhysPageEntry *lp, 
> hwaddr *index,
>  
>      if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
>          lp->ptr = phys_map_node_alloc();
> -        p = phys_map_nodes[lp->ptr];
> +        p = next_pgtbl->phys_map_nodes[lp->ptr];
>          if (level == 0) {
>              for (i = 0; i < L2_SIZE; i++) {
>                  p[i].is_leaf = 1;
> -                p[i].ptr = phys_section_unassigned;
> +                p[i].ptr = next_pgtbl->phys_section_unassigned;
>              }
>          }
>      } else {
> -        p = phys_map_nodes[lp->ptr];
> +        p = next_pgtbl->phys_map_nodes[lp->ptr];
>      }
>      lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
>  
> @@ -192,11 +197,13 @@ static PhysSection 
> *phys_section_find(AddressSpaceDispatch *d,
>  {
>      PhysPageEntry lp = d->phys_map;
>      PhysPageEntry *p;
> +    PhysSection *phys_sections = cur_pgtbl->phys_sections;
> +    Node *phys_map_nodes = cur_pgtbl->phys_map_nodes;
>      int i;
>  
>      for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
>          if (lp.ptr == PHYS_MAP_NODE_NIL) {
> -            return &phys_sections[phys_section_unassigned];
> +            return &phys_sections[cur_pgtbl->phys_section_unassigned];
>          }
>          p = phys_map_nodes[lp.ptr];
>          lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
> @@ -209,6 +216,7 @@ static MemoryRegionSection 
> *address_space_lookup_region(AddressSpace *as,
>  {
>      PhysSection *psection;
>      uint16_t idx;
> +    PhysSection *phys_sections = cur_pgtbl->phys_sections;
>  
>      psection = phys_section_find(as->dispatch, addr >> TARGET_PAGE_BITS);
>      if (psection->sub_section) {
> @@ -246,7 +254,7 @@ MemoryRegionSection *address_space_translate(AddressSpace 
> *as, hwaddr addr,
>                  | (addr & iotlb.addr_mask));
>          len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
>          if (!iotlb.perm[is_write]) {
> -            section = &phys_sections[phys_section_unassigned].section;
> +            section = 
> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_unassigned].section;
>              break;
>          }
>  
> @@ -690,12 +698,12 @@ hwaddr memory_region_section_get_iotlb(CPUArchState 
> *env,
>          iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
>              + xlat;
>          if (!section->readonly) {
> -            iotlb |= phys_section_notdirty;
> +            iotlb |= cur_pgtbl->phys_section_notdirty;
>          } else {
> -            iotlb |= phys_section_rom;
> +            iotlb |= cur_pgtbl->phys_section_rom;
>          }
>      } else {
> -        iotlb = PHYS_SECTION_ID(container_of(section, PhysSection, section));
> +        iotlb = PHYS_SECTION_ID(container_of(section, PhysSection, section), 
> cur_pgtbl);
>          iotlb += xlat;
>      }
>  
> @@ -705,7 +713,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
>          if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
>              /* Avoid trapping reads of pages with a write breakpoint. */
>              if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
> -                iotlb = phys_section_watch + paddr;
> +                iotlb = cur_pgtbl->phys_section_watch + paddr;
>                  *address |= TLB_MMIO;
>                  break;
>              }
> @@ -721,59 +729,40 @@ static int subsection_register(PhysSection *psection, 
> uint32_t start,
>                                 uint32_t end, uint16_t section);
>  static void subsections_init(PhysSection *psection);
>  
> -static void destroy_page_desc(uint16_t section_index)
> +/* Call after all listener has been commit.
> +  * we do not walk over tree, just simply drop.
> +  */
> +static void destroy_pagetable(PhysPageTable *pgtbl)
>  {
> -    g_free(phys_sections[section_index].sub_section);
> -}
> -
> -static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
> -{
> -    unsigned i;
> -    PhysPageEntry *p;
> +    int i;
>  
> -    if (lp->ptr == PHYS_MAP_NODE_NIL) {
> -        return;
> -    }
> +    g_free(pgtbl->phys_map_nodes);
>  
> -    p = phys_map_nodes[lp->ptr];
> -    for (i = 0; i < L2_SIZE; ++i) {
> -        if (!p[i].is_leaf) {
> -            destroy_l2_mapping(&p[i], level - 1);
> +    for (i = 0; i < pgtbl->phys_sections_nb_alloc; i++) {
> +        if (pgtbl->phys_sections[i].sub_section) {
> +            g_free(pgtbl->phys_sections[i].sub_section);
>          } else {
> -            destroy_page_desc(p[i].ptr);
> +            memory_region_unref(pgtbl->phys_sections[i].section.mr);
>          }
>      }
> -    lp->is_leaf = 0;
> -    lp->ptr = PHYS_MAP_NODE_NIL;
> -}
> +    g_free(pgtbl->phys_sections);
>  
> -static void destroy_all_mappings(AddressSpaceDispatch *d)
> -{
> -    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
> -    phys_map_nodes_reset();
> +    g_free(pgtbl);
>  }

This should be a separate patch.

> -static uint16_t phys_section_add(MemoryRegionSection *section)
> +static uint16_t phys_section_add(MemoryRegionSection *section, PhysPageTable 
> *pgtbl)

phys_section_add will always add to next_pgtbl.

>  {
> -    assert(phys_sections_nb < TARGET_PAGE_SIZE);
> +    assert(pgtbl->phys_sections_nb < TARGET_PAGE_SIZE);
>  
> -    if (phys_sections_nb == phys_sections_nb_alloc) {
> -        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
> -        phys_sections = g_renew(PhysSection, phys_sections,
> -                                phys_sections_nb_alloc);
> +    if (pgtbl->phys_sections_nb == pgtbl->phys_sections_nb_alloc) {
> +        pgtbl->phys_sections_nb_alloc = MAX(pgtbl->phys_sections_nb_alloc * 
> 2, 16);
> +        pgtbl->phys_sections = g_renew(PhysSection, pgtbl->phys_sections,
> +                                pgtbl->phys_sections_nb_alloc);
>      }
> -    phys_sections[phys_sections_nb].section = *section;
> -    phys_sections[phys_sections_nb].sub_section = NULL;
> +    pgtbl->phys_sections[pgtbl->phys_sections_nb].section = *section;
> +    pgtbl->phys_sections[pgtbl->phys_sections_nb].sub_section = NULL;
>      memory_region_ref(section->mr);
> -    return phys_sections_nb++;
> -}
> -
> -static void phys_sections_clear(void)
> -{
> -    while (phys_sections_nb > 0) {
> -        PhysSection *phys_section = &phys_sections[--phys_sections_nb];
> -        memory_region_unref(phys_section->section.mr);
> -    }
> +    return pgtbl->phys_sections_nb++;
>  }
>  
>  static void register_subsection(AddressSpaceDispatch *d,
> @@ -793,18 +782,18 @@ static void register_subsection(AddressSpaceDispatch *d,
>             psection->section.mr == &io_mem_unassigned);
>  
>      if (!psection->sub_section) {
> -        new_section = phys_section_add(&subsection);
> -        psection = &phys_sections[new_section];
> +        new_section = phys_section_add(&subsection, next_pgtbl);
> +        psection = &next_pgtbl->phys_sections[new_section];
>          subsections_init(psection);
>          phys_page_set(d, base >> TARGET_PAGE_BITS, 1, new_section);
>      } else {
> -        new_section = PHYS_SECTION_ID(psection);
> +        new_section = PHYS_SECTION_ID(psection, next_pgtbl);
>      }
>  
> -    new_subsection = phys_section_add(section);
> +    new_subsection = phys_section_add(section, next_pgtbl);
>  
>      /* phys_section_add invalidates psection, reload it  */
> -    psection = &phys_sections[new_section];
> +    psection = &next_pgtbl->phys_sections[new_section];
>      start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
>      end = start + section->size - 1;
>      subsection_register(psection, start, end, new_subsection);
> @@ -816,7 +805,7 @@ static void register_multipage(AddressSpaceDispatch *d, 
> MemoryRegionSection *sec
>      hwaddr start_addr = section->offset_within_address_space;
>      ram_addr_t size = section->size;
>      hwaddr addr;
> -    uint16_t section_index = phys_section_add(section);
> +    uint16_t section_index = phys_section_add(section, next_pgtbl);
>  
>      assert(size);
>  
> @@ -1653,7 +1642,7 @@ static void subsections_init(PhysSection *psection)
>  {
>      psection->sub_section = g_malloc0(sizeof(uint16_t) * TARGET_PAGE_SIZE);
>      subsection_register(psection, 0, TARGET_PAGE_SIZE-1,
> -                        phys_section_unassigned);
> +                        next_pgtbl->phys_section_unassigned);
>  }
>  
>  static uint16_t dummy_section(MemoryRegion *mr)
> @@ -1665,12 +1654,12 @@ static uint16_t dummy_section(MemoryRegion *mr)
>          .size = UINT64_MAX,
>      };
>  
> -    return phys_section_add(&section);
> +    return phys_section_add(&section, next_pgtbl);
>  }
>  
>  MemoryRegion *iotlb_to_region(hwaddr index)
>  {
> -    return phys_sections[index & ~TARGET_PAGE_MASK].section.mr;
> +    return cur_pgtbl->phys_sections[index & ~TARGET_PAGE_MASK].section.mr;
>  }
>  
>  static void io_mem_init(void)
> @@ -1685,21 +1674,40 @@ static void io_mem_init(void)
>                            "watch", UINT64_MAX);
>  }
>  
> +void global_begin(void)
> +{
> +    next_pgtbl = g_new0(PhysPageTable, 1);
> +    next_pgtbl->ref = 1;
> +    next_pgtbl->phys_section_unassigned = dummy_section(&io_mem_unassigned);
> +    next_pgtbl->phys_section_notdirty = dummy_section(&io_mem_notdirty);
> +    next_pgtbl->phys_section_rom = dummy_section(&io_mem_rom);
> +    next_pgtbl->phys_section_watch = dummy_section(&io_mem_watch);
> +}
> +
> +/* other listeners finished */
> +void global_commit(void)
> +{
> +    PhysPageTable *t = cur_pgtbl;
> +
> +    cur_pgtbl = next_pgtbl;
> +    /* Fix me,  currently, we rely on each address space listener agaist its
> +      * reader. So when we come here, no readers will touch old 
> phys_map_node.
> +      * After rcu, should changed to call_rcu()
> +      */
> +    if (__sync_sub_and_fetch(&t->ref, 1) == 0) {
> +        destroy_pagetable(t);
> +    }

global_commit should not be needed, and global_begin should be simply
the new core_begin.  It should unref next_pgtbl and reallocate a new one.


> +}
> +
>  static void mem_begin(MemoryListener *listener)
>  {
>      AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, 
> listener);
>  
> -    destroy_all_mappings(d);
>      d->phys_map.ptr = PHYS_MAP_NODE_NIL;
>  }
>  
>  static void core_begin(MemoryListener *listener)
>  {
> -    phys_sections_clear();
> -    phys_section_unassigned = dummy_section(&io_mem_unassigned);
> -    phys_section_notdirty = dummy_section(&io_mem_notdirty);
> -    phys_section_rom = dummy_section(&io_mem_rom);
> -    phys_section_watch = dummy_section(&io_mem_watch);
>  }
>  
>  static void tcg_commit(MemoryListener *listener)
> @@ -1779,7 +1787,6 @@ void address_space_destroy_dispatch(AddressSpace *as)
>      AddressSpaceDispatch *d = as->dispatch;
>  
>      memory_listener_unregister(&d->listener);
> -    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
>      g_free(d);
>      as->dispatch = NULL;
>  }
> @@ -2386,7 +2393,7 @@ void stl_phys_notdirty(hwaddr addr, uint32_t val)
>  
>      if (!memory_region_is_ram(section->mr) || section->readonly) {
>          if (memory_region_is_ram(section->mr)) {
> -            section = &phys_sections[phys_section_rom].section;
> +            section = 
> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
>          }
>          io_mem_write(section->mr, addr, val, 4);
>      } else {
> @@ -2422,7 +2429,7 @@ void stq_phys_notdirty(hwaddr addr, uint64_t val)
>  
>      if (!memory_region_is_ram(section->mr) || section->readonly) {
>          if (memory_region_is_ram(section->mr)) {
> -            section = &phys_sections[phys_section_rom].section;
> +            section = 
> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
>          }
>  #ifdef TARGET_WORDS_BIGENDIAN
>          io_mem_write(section->mr, addr, val >> 32, 4);
> @@ -2455,7 +2462,7 @@ static inline void stl_phys_internal(hwaddr addr, 
> uint32_t val,
>  
>      if (!memory_region_is_ram(section->mr) || section->readonly) {
>          if (memory_region_is_ram(section->mr)) {
> -            section = &phys_sections[phys_section_rom].section;
> +            section = 
> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
>          }
>  #if defined(TARGET_WORDS_BIGENDIAN)
>          if (endian == DEVICE_LITTLE_ENDIAN) {
> @@ -2526,7 +2533,7 @@ static inline void stw_phys_internal(hwaddr addr, 
> uint32_t val,
>  
>      if (!memory_region_is_ram(section->mr) || section->readonly) {
>          if (memory_region_is_ram(section->mr)) {
> -            section = &phys_sections[phys_section_rom].section;
> +            section = 
> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
>          }
>  #if defined(TARGET_WORDS_BIGENDIAN)
>          if (endian == DEVICE_LITTLE_ENDIAN) {
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index b97ace7..cc654fa 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -992,6 +992,8 @@ void *address_space_map(AddressSpace *as, hwaddr addr,
>  void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
>                           int is_write, hwaddr access_len);
>  
> +void global_begin(void);
> +void global_commit(void);
>  
>  #endif
>  
> diff --git a/memory.c b/memory.c
> index 1a86607..da06dfd 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -805,6 +805,7 @@ void memory_region_transaction_commit(void)
>      --memory_region_transaction_depth;
>      if (!memory_region_transaction_depth && memory_region_update_pending) {
>          memory_region_update_pending = false;
> +        global_begin();
>          MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
>  
>          QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
> @@ -812,6 +813,7 @@ void memory_region_transaction_commit(void)
>          }
>  
>          MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
> +        global_commit();
>      }
>  }
>  
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]