[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RC
From: |
Alex Bennée |
Subject: |
Re: [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU |
Date: |
Mon, 25 Apr 2016 16:19:59 +0100 |
User-agent: |
mu4e 0.9.17; emacs 25.0.93.1 |
Emilio G. Cota <address@hidden> writes:
> [ Applies on top of bennee/mttcg/enable-mttcg-for-armv7-v1 after
> reverting "translate-all: introduces tb_flush_safe". A trivial
> conflict must be solved after applying. ]
>
> This is a first attempt at making tb_flush not have to stop all CPUs.
> There are issues as pointed out below, but this could be a good start.
>
> Context:
> https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg04658.html
> https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg06942.html
>
> Changes from v1:
> - When a static buffer is used, split it in two instead of using
> a second buffer.
>
> Known issues:
> - Fails Alex' unit test with low enough -tb-size, see
> https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg03465.html
> Seems to work in MTTCG, although I've only tested with tb_lock
> always being taken in tb_find_fast.
With --enable-debug-tcg I get it failing pretty quickly:
#4 0x00005555556d332a in tcg_global_alloc (s=0x555556007ba0 <tcg_ctx>)
at /home/alex/lsrc/qemu/qemu.git/tcg/tcg.c:463
463 tcg_debug_assert(s->nb_globals == s->nb_temps);
(gdb) p s->nb_globals
$1 = 24
(gdb) p s->nb_temps
$2 = 31
Seems odd though, the other threads are all waiting on the tb_lock.
> - Windows; not even compile-tested!
>
> Signed-off-by: Emilio G. Cota <address@hidden>
> ---
> translate-all.c | 146
> +++++++++++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 133 insertions(+), 13 deletions(-)
>
> diff --git a/translate-all.c b/translate-all.c
> index 8e70583..6830371 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -535,6 +535,9 @@ static inline void *split_cross_256mb(void *buf1, size_t
> size1)
> #ifdef USE_STATIC_CODE_GEN_BUFFER
> static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
> __attribute__((aligned(CODE_GEN_ALIGN)));
> +static int static_buf_mask = 1;
> +static void *static_buf1;
> +static void *static_buf2;
>
> # ifdef _WIN32
> static inline void do_protect(void *addr, long size, int prot)
> @@ -577,6 +580,13 @@ static inline void map_none(void *addr, long size)
> }
> # endif /* WIN32 */
>
> +static void map_static_code_gen_buffer(void *buf, size_t size)
> +{
> + map_exec(buf, size);
> + map_none(buf + size, qemu_real_host_page_size);
> + qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
> +}
> +
> static inline void *alloc_code_gen_buffer(void)
> {
> void *buf = static_code_gen_buffer;
> @@ -586,28 +596,41 @@ static inline void *alloc_code_gen_buffer(void)
> full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer))
> & qemu_real_host_page_mask) - (uintptr_t)buf;
>
> - /* Reserve a guard page. */
> - size = full_size - qemu_real_host_page_size;
> + /*
> + * Reserve two guard pages, one after each of the two buffers:
> + * | buf1 |g1| buf2 |g2|
> + */
> + size = full_size - 2 * qemu_real_host_page_size;
>
> /* Honor a command-line option limiting the size of the buffer. */
> if (size > tcg_ctx.code_gen_buffer_size) {
> size = (((uintptr_t)buf + tcg_ctx.code_gen_buffer_size)
> & qemu_real_host_page_mask) - (uintptr_t)buf;
> }
> - tcg_ctx.code_gen_buffer_size = size;
>
> #ifdef __mips__
> - if (cross_256mb(buf, size)) {
> - buf = split_cross_256mb(buf, size);
> - size = tcg_ctx.code_gen_buffer_size;
> + /*
> + * Pass 'size + page_size', since we want 'buf1 | guard1 | buf2' to be
> + * within the boundary.
> + */
> + if (cross_256mb(buf, size + qemu_real_host_page_size)) {
> + buf = split_cross_256mb(buf, size + qemu_real_host_page_size);
> + size = tcg_ctx.code_gen_buffer_size - qemu_real_host_page_size;
> }
> #endif
>
> - map_exec(buf, size);
> - map_none(buf + size, qemu_real_host_page_size);
> - qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
> + /* split the buffer in two */
> + size /= 2;
> + tcg_ctx.code_gen_buffer_size = size;
>
> - return buf;
> + static_buf1 = buf;
> + static_buf2 = buf + size + qemu_real_host_page_mask;
> +
> + map_static_code_gen_buffer(static_buf1, size);
> + map_static_code_gen_buffer(static_buf2, size);
> +
> + assert(static_buf_mask == 1);
> + return static_buf1;
> }
> #elif defined(_WIN32)
> static inline void *alloc_code_gen_buffer(void)
> @@ -825,10 +848,100 @@ static void page_flush_tb(void)
> }
> }
>
> +#ifdef USE_STATIC_CODE_GEN_BUFFER
> +
> +struct code_gen_desc {
> + struct rcu_head rcu;
> + int clear_bit;
> +};
> +
> +static void code_gen_buffer_clear(struct rcu_head *rcu)
> +{
> + struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc,
> rcu);
> +
> + tb_lock();
> + static_buf_mask &= ~desc->clear_bit;
> + tb_unlock();
> + g_free(desc);
> +}
> +
> +static void *code_gen_buffer_replace(void)
> +{
> + struct code_gen_desc *desc = g_malloc0(sizeof(*desc));
> +
> + /*
> + * If both bits are set, we're having two concurrent flushes. This
> + * can easily happen if the buffers are heavily undersized.
> + */
> + assert(static_buf_mask == 1 || static_buf_mask == 2);
> +
> + desc->clear_bit = static_buf_mask;
> + call_rcu1(&desc->rcu, code_gen_buffer_clear);
> +
> + if (static_buf_mask == 1) {
> + static_buf_mask |= 2;
> + return static_buf2;
> + }
> + static_buf_mask |= 1;
> + return static_buf1;
> +}
> +
> +#elif defined(_WIN32)
> +
> +struct code_gen_desc {
> + struct rcu_head rcu;
> + void *buf;
> +};
> +
> +static void code_gen_buffer_vfree(struct rcu_head *rcu)
> +{
> + struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc,
> rcu);
> +
> + VirtualFree(desc->buf, 0, MEM_RELEASE);
> + g_free(desc);
> +}
> +
> +static void *code_gen_buffer_replace(void)
> +{
> + struct code_gen_desc *desc;
> +
> + desc = g_malloc0(sizeof(*desc));
> + desc->buf = tcg_ctx.code_gen_buffer;
> + call_rcu1(&desc->rcu, code_gen_buffer_vfree);
> +
> + return alloc_code_gen_buffer();
> +}
> +
> +#else /* UNIX, dynamically-allocated code buffer */
> +
> +struct code_gen_desc {
> + struct rcu_head rcu;
> + void *buf;
> + size_t size;
> +};
> +
> +static void code_gen_buffer_unmap(struct rcu_head *rcu)
> +{
> + struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc,
> rcu);
> +
> + munmap(desc->buf, desc->size + qemu_real_host_page_size);
> + g_free(desc);
> +}
> +
> +static void *code_gen_buffer_replace(void)
> +{
> + struct code_gen_desc *desc;
> +
> + desc = g_malloc0(sizeof(*desc));
> + desc->buf = tcg_ctx.code_gen_buffer;
> + desc->size = tcg_ctx.code_gen_buffer_size;
> + call_rcu1(&desc->rcu, code_gen_buffer_unmap);
> +
> + return alloc_code_gen_buffer();
> +}
> +#endif /* USE_STATIC_CODE_GEN_BUFFER */
> +
> /* flush all the translation blocks */
> -/* XXX: tb_flush is currently not thread safe. System emulation calls it
> only
> - * with tb_lock taken or from safe_work, so no need to take tb_lock here.
> - */
> void tb_flush(CPUState *cpu)
> {
> #if defined(DEBUG_FLUSH)
> @@ -852,10 +965,17 @@ void tb_flush(CPUState *cpu)
> memset(tcg_ctx.tb_ctx.tb_phys_hash, 0,
> sizeof(tcg_ctx.tb_ctx.tb_phys_hash));
> page_flush_tb();
>
> + tcg_ctx.code_gen_buffer = code_gen_buffer_replace();
> tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
> + tcg_prologue_init(&tcg_ctx);
> /* XXX: flush processor icache at this point if cache flush is
> expensive */
> tcg_ctx.tb_ctx.tb_flush_count++;
> +
> + /* exit all CPUs so that the old buffer is quickly cleared. */
> + CPU_FOREACH(cpu) {
> + cpu_exit(cpu);
> + }
> }
>
> #ifdef DEBUG_TB_CHECK
--
Alex Bennée
- [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU, Emilio G. Cota, 2016/04/21
- Re: [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU, Alex Bennée, 2016/04/22
- Re: [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU, Richard Henderson, 2016/04/22
- [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU, Emilio G. Cota, 2016/04/23
- Re: [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU, Richard Henderson, 2016/04/24
- Re: [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU,
Alex Bennée <=
- Re: [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU, Emilio G. Cota, 2016/04/25
- [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Emilio G. Cota, 2016/04/25
- Re: [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Richard Henderson, 2016/04/26
- Re: [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Alex Bennée, 2016/04/26
- Re: [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Richard Henderson, 2016/04/26
- Re: [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Alex Bennée, 2016/04/26
- Re: [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Emilio G. Cota, 2016/04/29