[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC PATCH] Convert ram_list to RCU DQ V4
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [RFC PATCH] Convert ram_list to RCU DQ V4 |
Date: |
Wed, 04 Sep 2013 21:58:58 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130805 Thunderbird/17.0.8 |
Il 04/09/2013 21:41, Mike Day ha scritto:
> * now passes virt-test -t qemu
> * uses call_rcu instead of call_rcu1
> * completely removed the ram_list mutex and its locking functions
> * cleaned up some comments
>
> Changes from V3:
>
> * added rcu reclaim function to free ram blocks
> * reduced the number of comments
> * made rcu locking policy consistent for different caller scenarios
> * rcu updates to ram_block now are protected only by the iothread mutex
>
> Changes from V1:
>
> * Omitted locks or rcu critical sections within Some functions that
> read or write the ram_list but are called in a protected context
> (the caller holds the iothread lock, the ram_list mutex, or an rcu
> critical section).
>
> Allow "unlocked" reads of the ram_list by using an RCU-enabled
> DQ. Most readers of the list no longer require holding the list mutex.
>
> The ram_list now uses a QLIST instead of a QTAILQ. The difference is
> minimal.
>
> This patch has been built and make-checked for the x86_64, ppc64,
> s390x, and arm targets. It has been virt-tested using KVM -t qemu and
> passes 15/15 migration tests.
>
> To apply this patch, you must base upon Paolo Bonzini's rcu tree and
> also apply the RCU DQ patch (below).
Looks good, just a couple of comments but nothing I cannot apply in my
tree. I will push the new RCU tree tomorrow.
Would you look at adding memory barriers before all updates of
ram_block.version next?
Paolo
> https://github.com/bonzini/qemu/tree/rcu
> http://article.gmane.org/gmane.comp.emulators.qemu/230159/
>
> Signed-off-by: Mike Day <address@hidden>
> ---
> arch_init.c | 96 +++++++++++++++++-----------
> exec.c | 162
> +++++++++++++++++++++++++----------------------
> include/exec/cpu-all.h | 13 ++--
> include/qemu/rcu_queue.h | 8 +++
> 4 files changed, 160 insertions(+), 119 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index 68a7ab7..76ef5c9 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -49,6 +49,7 @@
> #include "trace.h"
> #include "exec/cpu-all.h"
> #include "hw/acpi/acpi.h"
> +#include "qemu/rcu_queue.h"
>
> #ifdef DEBUG_ARCH_INIT
> #define DPRINTF(fmt, ...) \
> @@ -398,7 +399,8 @@ static void migration_bitmap_sync(void)
> trace_migration_bitmap_sync_start();
> address_space_sync_dirty_bitmap(&address_space_memory);
>
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + rcu_read_lock();
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
> if (memory_region_test_and_clear_dirty(block->mr,
> addr, TARGET_PAGE_SIZE,
> @@ -407,6 +409,8 @@ static void migration_bitmap_sync(void)
> }
> }
> }
> + rcu_read_unlock();
> +
> trace_migration_bitmap_sync_end(migration_dirty_pages
> - num_dirty_pages_init);
> num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
> @@ -446,6 +450,8 @@ static void migration_bitmap_sync(void)
> *
> * Returns: The number of bytes written.
> * 0 means no dirty pages
> + *
> + * assumes that the caller has rcu-locked the ram_list
> */
>
> static int ram_save_block(QEMUFile *f, bool last_stage)
> @@ -458,7 +464,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
> ram_addr_t current_addr;
>
> if (!block)
> - block = QTAILQ_FIRST(&ram_list.blocks);
> + block = QLIST_FIRST_RCU(&ram_list.blocks);
>
> while (true) {
> mr = block->mr;
> @@ -469,9 +475,9 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
> }
> if (offset >= block->length) {
> offset = 0;
> - block = QTAILQ_NEXT(block, next);
> + block = QLIST_NEXT_RCU(block, next);
> if (!block) {
> - block = QTAILQ_FIRST(&ram_list.blocks);
> + block = QLIST_FIRST_RCU(&ram_list.blocks);
> complete_round = true;
> ram_bulk_stage = false;
> }
> @@ -565,10 +571,10 @@ uint64_t ram_bytes_total(void)
> {
> RAMBlock *block;
> uint64_t total = 0;
> -
> - QTAILQ_FOREACH(block, &ram_list.blocks, next)
> + rcu_read_lock();
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
> total += block->length;
> -
> + rcu_read_unlock();
> return total;
> }
>
> @@ -602,10 +608,20 @@ static void reset_ram_globals(void)
> last_offset = 0;
> last_version = ram_list.version;
> ram_bulk_stage = true;
> + smp_wmb();
No need for this here; you need a smp_rmb() instead. You would need
smp_wmb before all increments of ram_list.version. See docs/atomic.txt,
read/write barriers come in pairs and accesses are in the opposite
order. Here we read version before block, with a smp_rmb() in the
middle; elsewhere we write version after block, with a smp_wmb() in the
middle.
I'll remove this smp_wmb() when I commit.
> }
>
> #define MAX_WAIT 50 /* ms, half buffered_file limit */
>
> +
> +/* ram_save_* functions each implement a long-running RCU critical
> + * section. When rcu-reclaims in the code start to become numerous
> + * it will be necessary to reduce the granularity of these critical
> + * sections.
> + *
> + * (ram_save_setup, ram_save_iterate, and ram_save_complete.)
> + */
> +
> static int ram_save_setup(QEMUFile *f, void *opaque)
> {
> RAMBlock *block;
> @@ -631,7 +647,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
> }
>
> qemu_mutex_lock_iothread();
> - qemu_mutex_lock_ramlist();
> + rcu_read_lock();
> bytes_transferred = 0;
> reset_ram_globals();
>
> @@ -641,13 +657,13 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>
> qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
>
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> qemu_put_byte(f, strlen(block->idstr));
> qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
> qemu_put_be64(f, block->length);
> }
>
> - qemu_mutex_unlock_ramlist();
> + rcu_read_unlock();
>
> ram_control_before_iterate(f, RAM_CONTROL_SETUP);
> ram_control_after_iterate(f, RAM_CONTROL_SETUP);
> @@ -664,16 +680,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
> int64_t t0;
> int total_sent = 0;
>
> - qemu_mutex_lock_ramlist();
> -
> - if (ram_list.version != last_version) {
> + if (atomic_rcu_read(&ram_list.version) != last_version) {
> reset_ram_globals();
> }
>
> ram_control_before_iterate(f, RAM_CONTROL_ROUND);
> -
> t0 = qemu_get_clock_ns(rt_clock);
> i = 0;
> + rcu_read_lock();
> while ((ret = qemu_file_rate_limit(f)) == 0) {
> int bytes_sent;
>
> @@ -700,9 +714,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
> }
> i++;
> }
> -
> - qemu_mutex_unlock_ramlist();
> -
> + rcu_read_unlock();
> /*
> * Must occur before EOS (or any QEMUFile operation)
> * because of RDMA protocol.
> @@ -723,13 +735,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>
> static int ram_save_complete(QEMUFile *f, void *opaque)
> {
> - qemu_mutex_lock_ramlist();
> +
> migration_bitmap_sync();
>
> ram_control_before_iterate(f, RAM_CONTROL_FINISH);
>
> /* try transferring iterative blocks of memory */
>
> + rcu_read_lock();
> /* flush all remaining blocks regardless of rate limiting */
> while (true) {
> int bytes_sent;
> @@ -741,11 +754,10 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
> }
> bytes_transferred += bytes_sent;
> }
> -
> + rcu_read_unlock();
> ram_control_after_iterate(f, RAM_CONTROL_FINISH);
> migration_end();
>
> - qemu_mutex_unlock_ramlist();
> qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>
> return 0;
> @@ -807,6 +819,9 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void
> *host)
> return rc;
> }
>
> +/* Must be called from within a rcu critical section.
> + * Returns a pointer from within the RCU-protected ram_list.
> + */
> static inline void *host_from_stream_offset(QEMUFile *f,
> ram_addr_t offset,
> int flags)
> @@ -828,9 +843,10 @@ static inline void *host_from_stream_offset(QEMUFile *f,
> qemu_get_buffer(f, (uint8_t *)id, len);
> id[len] = 0;
>
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> - if (!strncmp(id, block->idstr, sizeof(id)))
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> + if (!strncmp(id, block->idstr, sizeof(id))) {
> return memory_region_get_ram_ptr(block->mr) + offset;
> + }
> }
>
> fprintf(stderr, "Can't find block %s!\n", id);
> @@ -867,7 +883,12 @@ static int ram_load(QEMUFile *f, void *opaque, int
> version_id)
> if (version_id < 4 || version_id > 4) {
> return -EINVAL;
> }
> -
> + /* this implements a long-running RCU critical section.
> + * When rcu reclaims in the code start to become numerous
> + * it will be necessary to reduce the granularity of this critical
> + * section.
> + */
> + rcu_read_lock();
> do {
> addr = qemu_get_be64(f);
>
> @@ -889,21 +910,19 @@ static int ram_load(QEMUFile *f, void *opaque, int
> version_id)
> qemu_get_buffer(f, (uint8_t *)id, len);
> id[len] = 0;
> length = qemu_get_be64(f);
> -
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> if (!strncmp(id, block->idstr, sizeof(id))) {
> if (block->length != length) {
> fprintf(stderr,
> "Length mismatch: %s: " RAM_ADDR_FMT
> " in != " RAM_ADDR_FMT "\n", id,
> length,
> block->length);
> - ret = -EINVAL;
> + ret = -EINVAL;
> goto done;
> }
> break;
> }
> }
> -
> if (!block) {
> fprintf(stderr, "Unknown ramblock \"%s\", cannot "
> "accept migration\n", id);
> @@ -916,30 +935,30 @@ static int ram_load(QEMUFile *f, void *opaque, int
> version_id)
> }
> }
>
> + /* Call host_from_stream_offset while holding an rcu read lock.
> + * It returns a pointer from within the rcu-protected ram_list.
> + */
> if (flags & RAM_SAVE_FLAG_COMPRESS) {
> - void *host;
> uint8_t ch;
> -
> - host = host_from_stream_offset(f, addr, flags);
> + void *host = host_from_stream_offset(f, addr, flags);
> if (!host) {
> - return -EINVAL;
> + ret = -EINVAL;
> + goto done;
> }
> -
> ch = qemu_get_byte(f);
> ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
> } else if (flags & RAM_SAVE_FLAG_PAGE) {
> - void *host;
> -
> - host = host_from_stream_offset(f, addr, flags);
> + void *host = host_from_stream_offset(f, addr, flags);
> if (!host) {
> - return -EINVAL;
> + ret = -EINVAL;
> + goto done;
> }
> -
> qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
> } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
> void *host = host_from_stream_offset(f, addr, flags);
> if (!host) {
> - return -EINVAL;
> + ret = -EINVAL;
> + goto done;
> }
>
> if (load_xbzrle(f, addr, host) < 0) {
> @@ -957,6 +976,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> version_id)
> } while (!(flags & RAM_SAVE_FLAG_EOS));
>
> done:
> + rcu_read_unlock();
> DPRINTF("Completed load of VM with exit code %d seq iteration "
> "%" PRIu64 "\n", ret, seq_iter);
> return ret;
> diff --git a/exec.c b/exec.c
> index 5eebcc1..4b7605a 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -46,7 +46,7 @@
> #endif
> #include "exec/cpu-all.h"
> #include "qemu/tls.h"
> -
> +#include "qemu/rcu_queue.h"
> #include "exec/cputlb.h"
> #include "translate-all.h"
>
> @@ -57,7 +57,10 @@
> #if !defined(CONFIG_USER_ONLY)
> static int in_migration;
>
> -RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
> +/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
> + * are protected by a lock, currently the iothread lock.
> + */
> +RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
>
> static MemoryRegion *system_memory;
> static MemoryRegion *system_io;
> @@ -329,7 +332,6 @@ void cpu_exec_init_all(void)
> {
> tls_alloc_current_cpu_var();
> #if !defined(CONFIG_USER_ONLY)
> - qemu_mutex_init(&ram_list.mutex);
> memory_map_init();
> io_mem_init();
> #endif
> @@ -901,16 +903,6 @@ void qemu_flush_coalesced_mmio_buffer(void)
> kvm_flush_coalesced_mmio_buffer();
> }
>
> -void qemu_mutex_lock_ramlist(void)
> -{
> - qemu_mutex_lock(&ram_list.mutex);
> -}
> -
> -void qemu_mutex_unlock_ramlist(void)
> -{
> - qemu_mutex_unlock(&ram_list.mutex);
> -}
> -
> #if defined(__linux__) && !defined(TARGET_S390X)
>
> #include <sys/vfs.h>
> @@ -1021,17 +1013,24 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
> RAMBlock *block, *next_block;
> ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
>
> + /* ram_list must be protected by a mutex (for writes), or
> + * an rcu critical section (for reads). Currently this code
> + * is called with the iothread lock held. If that changes,
> + * make sure to protect ram_list with an rcu critical section.
> + */
> +
> assert(size != 0); /* it would hand out same offset multiple times */
>
> - if (QTAILQ_EMPTY(&ram_list.blocks))
> + if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
> return 0;
> + }
>
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> ram_addr_t end, next = RAM_ADDR_MAX;
>
> end = block->offset + block->length;
>
> - QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
> + QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
> if (next_block->offset >= end) {
> next = MIN(next, next_block->offset);
> }
> @@ -1056,9 +1055,11 @@ ram_addr_t last_ram_offset(void)
> RAMBlock *block;
> ram_addr_t last = 0;
>
> - QTAILQ_FOREACH(block, &ram_list.blocks, next)
> + rcu_read_lock();
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> last = MAX(last, block->offset + block->length);
> -
> + }
> + rcu_read_unlock();
> return last;
> }
>
> @@ -1083,7 +1084,12 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char
> *name, DeviceState *dev)
> RAMBlock *new_block, *block;
>
> new_block = NULL;
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> +
> + /* Assumes that the iothread lock is taken ... if that changes,
> + * add an rcu_read_lock()/unlock pair when traversing the
> + * ram list
> + */
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> if (block->offset == addr) {
> new_block = block;
> break;
> @@ -1102,15 +1108,13 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char
> *name, DeviceState *dev)
> pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
>
> /* This assumes the iothread lock is taken here too. */
> - qemu_mutex_lock_ramlist();
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
> fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
> new_block->idstr);
> abort();
> }
> }
> - qemu_mutex_unlock_ramlist();
> }
>
> static int memory_try_enable_merging(void *addr, size_t len)
> @@ -1123,16 +1127,16 @@ static int memory_try_enable_merging(void *addr,
> size_t len)
> return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
> }
>
> +/* Called with the iothread lock being held */
> +
> ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
> MemoryRegion *mr)
> {
> - RAMBlock *block, *new_block;
> + RAMBlock *block, *new_block, *last_block = 0;
>
> size = TARGET_PAGE_ALIGN(size);
> new_block = g_malloc0(sizeof(*new_block));
>
> - /* This assumes the iothread lock is taken here too. */
> - qemu_mutex_lock_ramlist();
> new_block->mr = mr;
> new_block->offset = find_ram_offset(size);
> if (host) {
> @@ -1165,21 +1169,24 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size,
> void *host,
> new_block->length = size;
>
> /* Keep the list sorted from biggest to smallest block. */
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> + last_block = block;
> if (block->length < new_block->length) {
> break;
> }
> }
> if (block) {
> - QTAILQ_INSERT_BEFORE(block, new_block, next);
> + QLIST_INSERT_BEFORE_RCU(block, new_block, next);
> } else {
> - QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
> + if (last_block) {
> + QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
> + } else { /* list is empty */
> + QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
> + }
> }
> ram_list.mru_block = NULL;
>
> ram_list.version++;
> - qemu_mutex_unlock_ramlist();
> -
> ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
> last_ram_offset() >>
> TARGET_PAGE_BITS);
> memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
> @@ -1204,31 +1211,29 @@ void qemu_ram_free_from_ptr(ram_addr_t addr)
> {
> RAMBlock *block;
>
> - /* This assumes the iothread lock is taken here too. */
> - qemu_mutex_lock_ramlist();
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + /* This assumes the iothread lock is taken here too. */
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> if (addr == block->offset) {
> - QTAILQ_REMOVE(&ram_list.blocks, block, next);
> + QLIST_REMOVE_RCU(block, next);
> ram_list.mru_block = NULL;
> ram_list.version++;
> - g_free(block);
> + call_rcu(block, (void (*)(struct RAMBlock *))g_free, rcu);
Hmm, I'll probably reintroduce reclaim_ram_block, while keeping
call_rcu's well-typedness.
> break;
> }
> }
> - qemu_mutex_unlock_ramlist();
> }
>
> +/* called with the iothread lock held */
> void qemu_ram_free(ram_addr_t addr)
> {
> RAMBlock *block;
>
> - /* This assumes the iothread lock is taken here too. */
> - qemu_mutex_lock_ramlist();
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> if (addr == block->offset) {
> - QTAILQ_REMOVE(&ram_list.blocks, block, next);
> + QLIST_REMOVE_RCU(block, next);
> ram_list.mru_block = NULL;
> - ram_list.version++;
> + int ram_list_version = atomic_rcu_read(&ram_list.version);
> + atomic_rcu_set(&ram_list.version, ++ram_list_version);
> if (block->flags & RAM_PREALLOC_MASK) {
> ;
> } else if (mem_path) {
> @@ -1249,12 +1254,10 @@ void qemu_ram_free(ram_addr_t addr)
> qemu_anon_ram_free(block->host, block->length);
> }
> }
> - g_free(block);
> + call_rcu(block, (void (*)(struct RAMBlock *))g_free, rcu);
> break;
> }
> }
> - qemu_mutex_unlock_ramlist();
> -
> }
>
> #ifndef _WIN32
> @@ -1265,7 +1268,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
> int flags;
> void *area, *vaddr;
>
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> offset = addr - block->offset;
> if (offset < block->length) {
> vaddr = block->host + offset;
> @@ -1313,7 +1316,6 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
> memory_try_enable_merging(vaddr, length);
> qemu_ram_setup_dump(vaddr, length);
> }
> - return;
> }
> }
> }
> @@ -1323,23 +1325,21 @@ static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
> {
> RAMBlock *block;
>
> - /* The list is protected by the iothread lock here. */
> + /* This assumes the iothread lock is taken here too. */
> +
> block = ram_list.mru_block;
> if (block && addr - block->offset < block->length) {
> - goto found;
> + return block;
> }
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> if (addr - block->offset < block->length) {
> - goto found;
> + atomic_rcu_set(&ram_list.mru_block, block);
I think this is not needed, block was already published into the block
list. What is important is to order mru_block == NULL so that it
happens before the node is removed. Which we don't do, but we can fix
later.
> + return block;
> }
> }
>
> fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
> abort();
> -
> -found:
> - ram_list.mru_block = block;
> - return block;
> }
>
> /* Return a host pointer to ram allocated with qemu_ram_alloc.
> @@ -1378,8 +1378,8 @@ static void *qemu_safe_ram_ptr(ram_addr_t addr)
> {
> RAMBlock *block;
>
> - /* The list is protected by the iothread lock here. */
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + /* This assumes the iothread lock is taken here too. */
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> if (addr - block->offset < block->length) {
> if (xen_enabled()) {
> /* We need to check if the requested address is in the RAM
> @@ -1399,14 +1399,19 @@ static void *qemu_safe_ram_ptr(ram_addr_t addr)
>
> fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
> abort();
> -
> return NULL;
> }
>
> /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
> - * but takes a size argument */
> + * but takes a size argument
> + *
> + * The returned pointer is not protected by RCU so the caller
> + * must have other means of protecting the pointer, such as a
> + * reference.
> + * */
> static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
> {
> + void *ptr = NULL;
> if (*size == 0) {
> return NULL;
> }
> @@ -1414,12 +1419,14 @@ static void *qemu_ram_ptr_length(ram_addr_t addr,
> hwaddr *size)
> return xen_map_cache(addr, *size, 1);
> } else {
> RAMBlock *block;
> -
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + rcu_read_lock();
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> if (addr - block->offset < block->length) {
> if (addr - block->offset + *size > block->length)
> *size = block->length - addr + block->offset;
> - return block->host + (addr - block->offset);
> + ptr = block->host + (addr - block->offset);
> + rcu_read_unlock();
> + return ptr;
> }
> }
>
> @@ -1429,37 +1436,43 @@ static void *qemu_ram_ptr_length(ram_addr_t addr,
> hwaddr *size)
> }
>
> /* Some of the softmmu routines need to translate from a host pointer
> - (typically a TLB entry) back to a ram offset. */
> + * (typically a TLB entry) back to a ram offset.
> + *
> + * Note that the returned MemoryRegion is not RCU-protected.
> + */
> MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
> {
> RAMBlock *block;
> uint8_t *host = ptr;
> + MemoryRegion *mr = NULL;
>
> if (xen_enabled()) {
> *ram_addr = xen_ram_addr_from_mapcache(ptr);
> return qemu_get_ram_block(*ram_addr)->mr;
> }
> -
> - block = ram_list.mru_block;
> + rcu_read_lock();
> + block = atomic_rcu_read(&ram_list.mru_block);
Good catch!
Paolo
> if (block && block->host && host - block->host < block->length) {
> - goto found;
> + *ram_addr = block->offset + (host - block->host);
> + mr = block->mr;
> + goto unlock_out;
> }
>
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> /* This case append when the block is not mapped. */
> if (block->host == NULL) {
> continue;
> }
> if (host - block->host < block->length) {
> - goto found;
> + *ram_addr = block->offset + (host - block->host);
> + mr = block->mr;
> + goto unlock_out;
> }
> }
>
> - return NULL;
> -
> -found:
> - *ram_addr = block->offset + (host - block->host);
> - return block->mr;
> +unlock_out:
> + rcu_read_unlock();
> + return mr;
> }
>
> static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
> @@ -2709,9 +2722,10 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr)
> void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
> {
> RAMBlock *block;
> -
> - QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> + rcu_read_lock();
> + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> func(block->host, block->offset, block->length, opaque);
> }
> + rcu_read_unlock();
> }
> #endif
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index e088089..e68b86b 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -24,6 +24,7 @@
> #include "qemu/thread.h"
> #include "qom/cpu.h"
> #include "qemu/tls.h"
> +#include "qemu/rcu.h"
>
> /* some important defines:
> *
> @@ -448,28 +449,26 @@ extern ram_addr_t ram_size;
> #define RAM_PREALLOC_MASK (1 << 0)
>
> typedef struct RAMBlock {
> + struct rcu_head rcu;
> struct MemoryRegion *mr;
> uint8_t *host;
> ram_addr_t offset;
> ram_addr_t length;
> uint32_t flags;
> char idstr[256];
> - /* Reads can take either the iothread or the ramlist lock.
> - * Writes must take both locks.
> - */
> - QTAILQ_ENTRY(RAMBlock) next;
> + /* Writes must hold the iothread lock */
> + QLIST_ENTRY(RAMBlock) next;
> #if defined(__linux__) && !defined(TARGET_S390X)
> int fd;
> #endif
> } RAMBlock;
>
> typedef struct RAMList {
> - QemuMutex mutex;
> /* Protected by the iothread lock. */
> uint8_t *phys_dirty;
> RAMBlock *mru_block;
> - /* Protected by the ramlist lock. */
> - QTAILQ_HEAD(, RAMBlock) blocks;
> + /* RCU-enabled, writes protected by the iothread lock. */
> + QLIST_HEAD(, RAMBlock) blocks;
> uint32_t version;
> } RAMList;
> extern RAMList ram_list;
> diff --git a/include/qemu/rcu_queue.h b/include/qemu/rcu_queue.h
> index e2b8ba5..d159850 100644
> --- a/include/qemu/rcu_queue.h
> +++ b/include/qemu/rcu_queue.h
> @@ -37,6 +37,14 @@
> extern "C" {
> #endif
>
> +
> +/*
> + * List access methods.
> + */
> +#define QLIST_EMPTY_RCU(head) (atomic_rcu_read(&(head)->lh_first) == NULL)
> +#define QLIST_FIRST_RCU(head) (atomic_rcu_read(&(head)->lh_first))
> +#define QLIST_NEXT_RCU(elm, field) (atomic_rcu_read(&(elm)->field.le_next))
> +
> /*
> * List functions.
> */
>