[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 18/20] tcg: Save insn data and use it in cpu_res
From: |
Peter Maydell |
Subject: |
Re: [Qemu-devel] [PATCH 18/20] tcg: Save insn data and use it in cpu_restore_state_from_tb |
Date: |
Thu, 10 Sep 2015 14:49:13 +0100 |
On 2 September 2015 at 06:52, Richard Henderson <address@hidden> wrote:
> We can now restore state without retranslation.
>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> include/exec/exec-all.h | 1 +
> tcg/tcg.c | 11 ++++-
> tcg/tcg.h | 3 +-
> translate-all.c | 129
> +++++++++++++++++++++++++++++++++---------------
> 4 files changed, 100 insertions(+), 44 deletions(-)
>
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 8c347ba..315f20a 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -198,6 +198,7 @@ struct TranslationBlock {
> #define CF_USE_ICOUNT 0x20000
>
> void *tc_ptr; /* pointer to the translated code */
> + uint8_t *tc_search; /* pointer to search data */
> /* next matching tb for physical address. */
> struct TranslationBlock *phys_hash_next;
> /* original tb when cflags has CF_NOCACHE */
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index d956f0b..3541d4c 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -2300,7 +2300,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
> tcg_insn_unit *gen_code_buf,
> long search_pc)
> {
> - int i, oi, oi_next;
> + int i, oi, oi_next, num_insns;
>
> #ifdef DEBUG_DISAS
> if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
> @@ -2344,6 +2344,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
>
> tcg_out_tb_init(s);
>
> + num_insns = -1;
> for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
> TCGOp * const op = &s->gen_op_buf[oi];
> TCGArg * const args = &s->gen_opparam_buf[op->args];
> @@ -2367,6 +2368,10 @@ static inline int tcg_gen_code_common(TCGContext *s,
> tcg_reg_alloc_movi(s, args, dead_args, sync_args);
> break;
> case INDEX_op_insn_start:
> + if (num_insns >= 0) {
> + s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
> + }
> + num_insns++;
> for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
> target_ulong a;
> #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
> @@ -2374,7 +2379,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
> #else
> a = args[i];
> #endif
> - s->gen_opc_data[i] = a;
> + s->gen_insn_data[num_insns][i] = a;
> }
> break;
> case INDEX_op_discard:
> @@ -2406,6 +2411,8 @@ static inline int tcg_gen_code_common(TCGContext *s,
> check_regs(s);
> #endif
> }
> + tcg_debug_assert(num_insns >= 0);
This is claiming that every TB will have at least one insn_start,
right? I think that most targets will violate that in the breakpoint
case, because the "if we have a bp for this insn then generate a
debug insn and break out of the loop" code is before the call
to tcg_gen_insn_start().
We should probably assert that num_insns < TCG_MAX_INSNS while
we're here.
> + s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
>
> /* Generate TB finalization at the end of block */
> tcg_out_tb_finalize(s);
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 794b757..11cc107 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -581,7 +581,8 @@ struct TCGContext {
> uint16_t gen_opc_icount[OPC_BUF_SIZE];
> uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
>
> - target_ulong gen_opc_data[TARGET_INSN_START_WORDS];
> + uint16_t gen_insn_end_off[TCG_MAX_INSNS];
> + target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
> };
>
> extern TCGContext tcg_ctx;
> diff --git a/translate-all.c b/translate-all.c
> index 74be98a..a31f839 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -138,58 +138,65 @@ void cpu_gen_init(void)
> tcg_context_init(&tcg_ctx);
> }
>
> -/* The cpu state corresponding to 'searched_pc' is restored.
> - */
> +static target_long decode_sleb128(uint8_t **pp)
> +{
> + uint8_t *p = *pp;
> + target_long val = 0;
> + int byte, shift = 0;
> +
> + do {
> + byte = *p++;
> + val |= (target_ulong)(byte & 0x7f) << shift;
> + shift += 7;
> + } while (byte & 0x80);
> + if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
> + val |= -(target_ulong)1 << shift;
> + }
> +
> + *pp = p;
> + return val;
> +}
Are the encode/decode sleb128 functions known-good ones
borrowed from somewhere else?
(PS: checkpatch complains about missing braces.)
> +
> +/* The cpu state corresponding to 'searched_pc' is restored. */
> static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
> uintptr_t searched_pc)
> {
> + target_ulong data[TARGET_INSN_START_WORDS] = { };
> + uintptr_t host_pc = (uintptr_t)tb->tc_ptr;
> CPUArchState *env = cpu->env_ptr;
> - TCGContext *s = &tcg_ctx;
> - int j;
> - uintptr_t tc_ptr;
> + uint8_t *p = tb->tc_search;
> + int i, j, num_insns = tb->icount;
> #ifdef CONFIG_PROFILER
> - int64_t ti;
> + int64_t ti = profile_getclock();
> #endif
>
> -#ifdef CONFIG_PROFILER
> - ti = profile_getclock();
> -#endif
> - tcg_func_start(s);
> + if (searched_pc < host_pc) {
> + return -1;
> + }
>
> - gen_intermediate_code_pc(env, tb);
> + /* Reconstruct the stored insn data while looking for the point at
> + which the end of the insn exceeds the searched_pc. */
> + for (i = 0; i < num_insns; ++i) {
> + for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
> + data[j] += decode_sleb128(&p);
> + }
> + host_pc += decode_sleb128(&p);
> + if (host_pc > searched_pc) {
> + goto found;
> + }
> + }
> + return -1;
>
> + found:
> if (tb->cflags & CF_USE_ICOUNT) {
> assert(use_icount);
> /* Reset the cycle counter to the start of the block. */
> - cpu->icount_decr.u16.low += tb->icount;
> + cpu->icount_decr.u16.low += num_insns;
> /* Clear the IO flag. */
> cpu->can_do_io = 0;
> }
> -
> - /* find opc index corresponding to search_pc */
> - tc_ptr = (uintptr_t)tb->tc_ptr;
> - if (searched_pc < tc_ptr)
> - return -1;
> -
> - s->tb_next_offset = tb->tb_next_offset;
> -#ifdef USE_DIRECT_JUMP
> - s->tb_jmp_offset = tb->tb_jmp_offset;
> - s->tb_next = NULL;
> -#else
> - s->tb_jmp_offset = NULL;
> - s->tb_next = tb->tb_next;
> -#endif
> - j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr,
> - searched_pc - tc_ptr);
> - if (j < 0)
> - return -1;
> - /* now find start of instruction before */
> - while (s->gen_opc_instr_start[j] == 0) {
> - j--;
> - }
> - cpu->icount_decr.u16.low -= s->gen_opc_icount[j];
> -
> - restore_state_to_opc(env, tb, s->gen_opc_data);
> + cpu->icount_decr.u16.low -= i;
> + restore_state_to_opc(env, tb, data);
>
> #ifdef CONFIG_PROFILER
> s->restore_time += profile_getclock() - ti;
> @@ -933,6 +940,44 @@ static void build_page_bitmap(PageDesc *p)
> }
> }
>
> +static uint8_t *encode_sleb128(uint8_t *p, target_long val)
> +{
> + int more, byte;
> +
> + do {
> + byte = val & 0x7f;
> + val >>= 7;
> + more = !((val == 0 && (byte & 0x40) == 0)
> + || (val == -1 && (byte & 0x40) != 0));
> + if (more)
> + byte |= 0x80;
> + *p++ = byte;
> + } while (more);
> +
> + return p;
> +}
> +
> +static int encode_search(TranslationBlock *tb, uint8_t *block)
> +{
I think this function would benefit from a brief comment
describing the compressed format we're creating here.
> + uint8_t *p = block;
> + int i, j, n;
> +
> + tb->tc_search = block;
> +
> + for (i = 0, n = tb->icount; i < n; ++i) {
> + target_ulong prev;
> +
> + for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
> + prev = (i == 0 ? 0 : tcg_ctx.gen_insn_data[i - 1][j]);
> + p = encode_sleb128(p, tcg_ctx.gen_insn_data[i][j] - prev);
> + }
> + prev = (i == 0 ? 0 : tcg_ctx.gen_insn_end_off[i - 1]);
> + p = encode_sleb128(p, tcg_ctx.gen_insn_end_off[i] - prev);
> + }
> +
> + return p - block;
> +}
> +
> TranslationBlock *tb_gen_code(CPUState *cpu,
> target_ulong pc, target_ulong cs_base,
> int flags, int cflags)
> @@ -942,7 +987,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> tb_page_addr_t phys_pc, phys_page2;
> target_ulong virt_page2;
> tcg_insn_unit *gen_code_buf;
> - int gen_code_size;
> + int gen_code_size, search_size;
> #ifdef CONFIG_PROFILER
> int64_t ti;
> #endif
> @@ -998,11 +1043,12 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> #endif
>
> gen_code_size = tcg_gen_code(&tcg_ctx, gen_code_buf);
> + search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
Now we're putting the encoded search info in the codegen buffer,
don't we need to adjust the calculation of code_gen_buffer_max_size
to avoid falling off the end if the last TB in the buffer has a very
large set of generated TCG code and also a big encoded search buffer?
It would also be nice to assert if we do fall off the end of the
buffer somehow.
>
> #ifdef CONFIG_PROFILER
> tcg_ctx.code_time += profile_getclock();
> tcg_ctx.code_in_len += tb->size;
> - tcg_ctx.code_out_len += gen_code_size;
> + tcg_ctx.code_out_len += gen_code_size + search_size;
> #endif
How much extra space does the encoded search typically take (as a
% of the gen_code_size, say)?
>
> #ifdef DEBUG_DISAS
> @@ -1014,8 +1060,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> }
> #endif
>
> - tcg_ctx.code_gen_ptr = (void *)(((uintptr_t)gen_code_buf +
> - gen_code_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
> + tcg_ctx.code_gen_ptr = (void *)
> + (((uintptr_t)gen_code_buf + gen_code_size + search_size
> + + CODE_GEN_ALIGN - 1) & -CODE_GEN_ALIGN);
If we're messing with this line anyway we might as well use ROUND_UP:
tcg_ctx.code_gen_ptr = (void *)
ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
CODE_GEN_ALIGN);
>
> /* check next page if needed */
> virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
> --
> 2.4.3
thanks
-- PMM
- [Qemu-devel] [PATCH 08/20] target-sh4: Add flags state to insn_start, (continued)
- [Qemu-devel] [PATCH 08/20] target-sh4: Add flags state to insn_start, Richard Henderson, 2015/09/02
- [Qemu-devel] [PATCH 09/20] target-cris: Mirror gen_opc_pc into insn_start, Richard Henderson, 2015/09/02
- [Qemu-devel] [PATCH 11/20] target-sparc: Split out gen_branch_n, Richard Henderson, 2015/09/02
- [Qemu-devel] [PATCH 10/20] target-sparc: Tidy gen_branch_a interface, Richard Henderson, 2015/09/02
- [Qemu-devel] [PATCH 12/20] target-sparc: Remove gen_opc_jump_pc, Richard Henderson, 2015/09/02
- [Qemu-devel] [PATCH 13/20] target-sparc: Add npc state to insn_start, Richard Henderson, 2015/09/02
- [Qemu-devel] [PATCH 14/20] tcg: Merge cpu_gen_code into tb_gen_code, Richard Henderson, 2015/09/02
- [Qemu-devel] [PATCH 15/20] target-*: Drop cpu_gen_code define, Richard Henderson, 2015/09/02
- [Qemu-devel] [PATCH 16/20] tcg: Add TCG_MAX_INSNS, Richard Henderson, 2015/09/02
- [Qemu-devel] [PATCH 18/20] tcg: Save insn data and use it in cpu_restore_state_from_tb, Richard Henderson, 2015/09/02
- Re: [Qemu-devel] [PATCH 18/20] tcg: Save insn data and use it in cpu_restore_state_from_tb,
Peter Maydell <=
- Re: [Qemu-devel] [PATCH 18/20] tcg: Save insn data and use it in cpu_restore_state_from_tb, Richard Henderson, 2015/09/15
[Qemu-devel] [PATCH 20/20] tcg: Remove tcg_gen_code_search_pc, Richard Henderson, 2015/09/02
[Qemu-devel] [PATCH 17/20] tcg: Pass data argument to restore_state_to_opc, Richard Henderson, 2015/09/02
[Qemu-devel] [PATCH 19/20] tcg: Remove gen_intermediate_code_pc, Richard Henderson, 2015/09/02