[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu
From: |
liu ping fan |
Subject: |
Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style |
Date: |
Wed, 15 May 2013 09:29:54 +0800 |
On Tue, May 14, 2013 at 5:34 PM, Paolo Bonzini <address@hidden> wrote:
> Il 14/05/2013 07:47, liu ping fan ha scritto:
>> On Mon, May 13, 2013 at 5:31 PM, Paolo Bonzini <address@hidden> wrote:
>>> Il 13/05/2013 05:21, Liu Ping Fan ha scritto:
>>>> From: Liu Ping Fan <address@hidden>
>>>>
>>>> Each address space listener has PhysPageMap *cur_map, *next_map,
>>>> the switch from cur_map to next_map complete the RCU style. The
>>>> mem_commit() do the switch, and it is against reader but AddressSpace's
>>>> lock or later RCU mechanism (around address_space_translate() ).
>>>>
>>>> Signed-off-by: Liu Ping Fan <address@hidden>
>>>> ---
>>>> exec.c | 36 +++++++++++++++++++++++++++---------
>>>> include/exec/memory-internal.h | 11 ++++++++++-
>>>> 2 files changed, 37 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/exec.c b/exec.c
>>>> index bb4e540..e5871d6 100644
>>>> --- a/exec.c
>>>> +++ b/exec.c
>>>> @@ -186,24 +186,26 @@ static void phys_page_set(AddressSpaceDispatch *d,
>>>> hwaddr index, hwaddr nb,
>>>> uint16_t leaf)
>>>> {
>>>> + PhysPageMap *map = d->next_map;
>>>> /* Wildly overreserve - it doesn't matter much. */
>>>> phys_map_node_reserve(3 * P_L2_LEVELS);
>>>>
>>>> - phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
>>>> + phys_page_set_level(&map->root, &index, &nb, leaf, P_L2_LEVELS - 1);
>>>> }
>>>>
>>>> static PhysSection *phys_section_find(AddressSpaceDispatch *d,
>>>> hwaddr index)
>>>> {
>>>> - PhysPageEntry lp = d->phys_map;
>>>> PhysPageEntry *p;
>>>> - PhysSection *phys_sections = cur_pgtbl->phys_sections;
>>>> - Node *phys_map_nodes = cur_pgtbl->phys_map_nodes;
>>>> + PhysPageEntry lp = d->cur_map->root;
>>>> + PhysPageTable *pgtbl = d->cur_map->pgtbl;
>>>> + PhysSection *phys_sections = pgtbl->phys_sections;
>>>> + Node *phys_map_nodes = pgtbl->phys_map_nodes;
>>>> int i;
>>>>
>>>> for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
>>>> if (lp.ptr == PHYS_MAP_NODE_NIL) {
>>>> - return &phys_sections[cur_pgtbl->phys_section_unassigned];
>>>> + return &phys_sections[pgtbl->phys_section_unassigned];
>>>> }
>>>> p = phys_map_nodes[lp.ptr];
>>>> lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
>>>> @@ -234,7 +236,7 @@ MemoryRegionSection
>>>> *address_space_translate(AddressSpace *as, hwaddr addr,
>>>> IOMMUTLBEntry iotlb;
>>>> MemoryRegionSection *section;
>>>> hwaddr len = *plen;
>>>> -
>>>> + PhysPageTable *pgtbl = cur_pgtbl;
>>>
>>> d->cur_map->pgtbl.
>>>
>>>> for (;;) {
>>>> section = address_space_lookup_region(as, addr);
>>>>
>>>> @@ -254,7 +256,7 @@ MemoryRegionSection
>>>> *address_space_translate(AddressSpace *as, hwaddr addr,
>>>> | (addr & iotlb.addr_mask));
>>>> len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
>>>> if (!iotlb.perm[is_write]) {
>>>> - section =
>>>> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_unassigned].section;
>>>> + section =
>>>> &pgtbl->phys_sections[pgtbl->phys_section_unassigned].section;
>>>> break;
>>>> }
>>>>
>>>> @@ -1703,7 +1705,21 @@ static void mem_begin(MemoryListener *listener)
>>>> {
>>>> AddressSpaceDispatch *d = container_of(listener,
>>>> AddressSpaceDispatch, listener);
>>>>
>>>> - d->phys_map.ptr = PHYS_MAP_NODE_NIL;
>>>> + d->next_map = g_new0(PhysPageMap, 1);
>>>> + d->next_map->pgtbl = next_pgtbl;
>>>> +}
>>>> +
>>>> +static void mem_commit(MemoryListener *listener)
>>>> +{
>>>> + AddressSpaceDispatch *d = container_of(listener,
>>>> AddressSpaceDispatch, listener);
>>>> + PhysPageMap *m = d->cur_map;
>>>> +
>>>> + d->cur_map = d->next_map;
>>>> + /* Fixme, Currently, we rely on biglock or address-space lock against
>>>> + * reader. So here, we can safely drop it.
>>>> + * After RCU, should change to call_rcu()
>>>> + */
>>>> + g_free(m);
>>>> }
>>>>
>>>> static void core_begin(MemoryListener *listener)
>>>> @@ -1771,11 +1787,12 @@ void address_space_init_dispatch(AddressSpace *as)
>>>> {
>>>> AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
>>>>
>>>> - d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf =
>>>> 0 };
>>>> + d->cur_map = g_new0(PhysPageMap, 1);
>>>> d->listener = (MemoryListener) {
>>>> .begin = mem_begin,
>>>> .region_add = mem_add,
>>>> .region_nop = mem_add,
>>>> + .commit = mem_commit,
>>>> .priority = 0,
>>>> };
>>>> as->dispatch = d;
>>>> @@ -1787,6 +1804,7 @@ void address_space_destroy_dispatch(AddressSpace *as)
>>>> AddressSpaceDispatch *d = as->dispatch;
>>>>
>>>> memory_listener_unregister(&d->listener);
>>>> + g_free(d->cur_map);
>>>> g_free(d);
>>>> as->dispatch = NULL;
>>>> }
>>>> diff --git a/include/exec/memory-internal.h
>>>> b/include/exec/memory-internal.h
>>>> index 1b156fd..0dfe260 100644
>>>> --- a/include/exec/memory-internal.h
>>>> +++ b/include/exec/memory-internal.h
>>>> @@ -30,13 +30,22 @@ struct PhysPageEntry {
>>>> uint16_t ptr : 15;
>>>> };
>>>>
>>>> +struct PhysPageTable;
>>>> +typedef struct PhysPageMap PhysPageMap;
>>>> +
>>>> +struct PhysPageMap {
>>>> + PhysPageEntry root;
>>>> + struct PhysPageTable *pgtbl;
>>>
>>> cur_pgtbl should be introduced in patch 1 already.
>>>
>> PhysPageMap is a member of each AddressSpaceDispatch. And we achieve
>> RCU based on PhysPageMap *cur_map=next_map.
>
> But assignment of cur_map is not atomic. That's what I was trying to
> achieve with the do...while loop below.
>
It is atomic. The reason why I introduce PhysPageMap* is to ensure the
atomic assignment in RCU, since root and pgtbl can not be changed
atomically.
>> While cur_pgtbl/next_pgtbl are shared by all AddressSpaceDispatch.
>
> You cannot share them, because the root of some AddressSpaceDispatch
> structures will still refer to the old pagetable. You need to attach
All the AddressSpaceDispatch's updaters are overlapped in
mem_commit(), the shared cur_pgtbl will be reclaimed only after all
the AddressSpaceDispatch have been reclaimed. So at that time, no
"root of some AddressSpaceDispatch structures will still refer to the
old pagetable"
> the root of each ASD to the right pagetable, and the simplest way to do
> it is to put cur_pgtbl/next_pgtbl in the ASD. And this problem exists
> in the first patch already, so that's where you have to put it.
>
I will implement them in each ASD, since it is more coherent with RCU
model (not coalescing updaters ).
>>>> +};
>>>> +
>>>> typedef struct AddressSpaceDispatch AddressSpaceDispatch;
>>>>
>>>> struct AddressSpaceDispatch {
>>>> /* This is a multi-level map on the physical address space.
>>>> * The bottom level has pointers to MemoryRegionSections.
>>>> */
>>>> - PhysPageEntry phys_map;
>>>> + PhysPageMap *cur_map;
>>>> + PhysPageMap *next_map;
>>>
>>> Pointers are quite expensive here. With RCU we can fetch a consistent
>>> root/table pair like this:
>>>
>>> rcu_read_lock();
>>> do {
>>> pgtbl = d->cur_pgtbl;
>>> smp_rmb();
>>> root = d->cur_root;
>>>
>>> /* RCU ensures that d->cur_pgtbl remains alive, thus it cannot
>>> * be recycled while this loop is running. If
>>> * d->cur_pgtbl == pgtbl, the root is the right one for this
>>> * pgtable.
>>> */
>>> smp_rmb();
>>> } while (d->cur_pgtbl == pgtbl);
>
> Ouch, != of course.
>
>>> ...
>>> rcu_read_unlock();
>>>
>> It seems to break the semantic of rcu_dereference() and rcu_assign().
>
> It doesn't. In fact it is even stronger, I'm using a "full" rmb instead
> of read_barrier_depends.
>
rcu_dereference()/rcu_assign() ensure the switch from prev to next
version, based on atomic-ops. I think your method _does_ work based on
read+check+barrier skill, but it is not the standard RCU method, and
export some concept (barrier) outside RCU.
>> If pointers are expensive, how about this:
>> if (unlikely(d->prev_map!=d->cur_map)) {
>> d->root = d->cur_map->root;
>> d->pgtbl = d->cur_map->root;
>> d->prev_map = d->cur_map;
>> }
>> So usually, we use cache value.
>
rcu_read_lock();
map = rcu_derefenrence(d->cur_map)
if (unlikely(d->prev_map!=map) {
d->root = map->root;
d->pgtbl = map->pgtbl;
}
......
rcu_read_unlock();
Then it can avoid ABA problem.
Thanks and regards,
Pingfan
> Doesn't work, it has ABA problem. In my solution above, RCU avoids ABA
> because the read and the check are under the same RCU critical section.
> In your solution, the read and the check are under different RCU
> critical sections, so it doesn't work.
>
> Paolo
>
>> Thanks and regards,
>> Pingfan
>>> Remember to have a matching smp_wmb() in mem_commit, and to write ->root
>>> first:
>>>
>>> old_pgtbl = d->cur_pgtbl;
>>> smp_wmb();
>>> d->cur_root = d->next_root;
>>>
>>> /* Write the root before updating the page table. */
>>> smp_wmb();
>>> d->cur_pgtbl = d->next_pgtbl;
>>>
>>> /* Write cur_pgtbl before possibly destroying the old one. */
>>> smp_mb();
>>> page_table_unref(old_pgtbl); /* uses call_rcu if --refcount == 0 */
>>>
>>> If you are renaming fields, please do it as the first step.
>>>
>>> Paolo
>>>
>>>> MemoryListener listener;
>>>> };
>>>>
>>>>
>>>
>>
>>
>
[Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, Liu Ping Fan, 2013/05/12
- Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, Paolo Bonzini, 2013/05/13
- Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, liu ping fan, 2013/05/14
- Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, Paolo Bonzini, 2013/05/14
- Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style,
liu ping fan <=
- Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, Paolo Bonzini, 2013/05/15
- Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, liu ping fan, 2013/05/15
- Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, Paolo Bonzini, 2013/05/15
Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, Paolo Bonzini, 2013/05/16