[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v2 01/19] tcg: Enhance flush_icache_range with separate data
From: |
Joelle van Dyne |
Subject: |
Re: [PATCH v2 01/19] tcg: Enhance flush_icache_range with separate data pointer |
Date: |
Sat, 31 Oct 2020 23:54:42 -0700 |
s->code_ptr and s->code_buf are 4 byte pointers on aarch64 so the
cache flush is off by a factor of 4
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 44b923f5fe..2c4b66965b 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -4325,7 +4325,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
/* flush instruction cache */
flush_idcache_range((uintptr_t)tcg_mirror_rw_to_rx(s->code_buf),
- (uintptr_t)s->code_buf, s->code_ptr - s->code_buf);
+ (uintptr_t)s->code_buf,
+ (uintptr_t)s->code_ptr - (uintptr_t)s->code_buf);
return tcg_current_code_size(s);
}
With this and the other changes, split JIT works fine on iOS.
-j
On Thu, Oct 29, 2020 at 5:49 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> We are shortly going to have a split rw/rx jit buffer. Depending
> on the host, we need to flush the dcache at the rw data pointer and
> flush the icache at the rx code pointer.
>
> For now, the two passed pointers are identical, so there is no
> effective change in behaviour.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/aarch64/tcg-target.h | 9 +++++++--
> tcg/arm/tcg-target.h | 8 ++++++--
> tcg/i386/tcg-target.h | 3 ++-
> tcg/mips/tcg-target.h | 8 ++++++--
> tcg/ppc/tcg-target.h | 2 +-
> tcg/riscv/tcg-target.h | 8 ++++++--
> tcg/s390/tcg-target.h | 3 ++-
> tcg/sparc/tcg-target.h | 8 +++++---
> tcg/tci/tcg-target.h | 3 ++-
> softmmu/physmem.c | 9 ++++++++-
> tcg/tcg.c | 5 +++--
> tcg/aarch64/tcg-target.c.inc | 2 +-
> tcg/mips/tcg-target.c.inc | 2 +-
> tcg/ppc/tcg-target.c.inc | 21 +++++++++++----------
> tcg/sparc/tcg-target.c.inc | 4 ++--
> 15 files changed, 63 insertions(+), 32 deletions(-)
>
> diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
> index 663dd0b95e..d0a6a059b7 100644
> --- a/tcg/aarch64/tcg-target.h
> +++ b/tcg/aarch64/tcg-target.h
> @@ -148,9 +148,14 @@ typedef enum {
> #define TCG_TARGET_DEFAULT_MO (0)
> #define TCG_TARGET_HAS_MEMORY_BSWAP 1
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t
> len)
> {
> - __builtin___clear_cache((char *)start, (char *)stop);
> + /* TODO: Copy this from gcc to avoid 4 loops instead of 2. */
> + if (rw != rx) {
> + __builtin___clear_cache((char *)rw, (char *)(rw + len));
> + }
> + __builtin___clear_cache((char *)rx, (char *)(rx + len));
> }
>
> void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
> diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
> index 17e771374d..fa88b24e43 100644
> --- a/tcg/arm/tcg-target.h
> +++ b/tcg/arm/tcg-target.h
> @@ -134,9 +134,13 @@ enum {
> #define TCG_TARGET_DEFAULT_MO (0)
> #define TCG_TARGET_HAS_MEMORY_BSWAP 1
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t
> len)
> {
> - __builtin___clear_cache((char *) start, (char *) stop);
> + if (rw != rx) {
> + __builtin___clear_cache((char *)rw, (char *)(rw + len));
> + }
> + __builtin___clear_cache((char *)rx, (char *)(rx + len));
> }
>
> /* not defined -- call should be eliminated at compile time */
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index abd4ac7fc0..8323e72639 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -206,7 +206,8 @@ extern bool have_avx2;
> #define TCG_TARGET_extract_i64_valid(ofs, len) \
> (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t
> len)
> {
> }
>
> diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
> index c6b091d849..47b1226ee9 100644
> --- a/tcg/mips/tcg-target.h
> +++ b/tcg/mips/tcg-target.h
> @@ -207,9 +207,13 @@ extern bool use_mips32r2_instructions;
> #define TCG_TARGET_DEFAULT_MO (0)
> #define TCG_TARGET_HAS_MEMORY_BSWAP 1
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t
> len)
> {
> - cacheflush ((void *)start, stop-start, ICACHE);
> + if (rx != rw) {
> + cacheflush((void *)rw, len, DCACHE);
> + }
> + cacheflush((void *)rx, len, ICACHE);
> }
>
> void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
> diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
> index be10363956..fbb6dc1b47 100644
> --- a/tcg/ppc/tcg-target.h
> +++ b/tcg/ppc/tcg-target.h
> @@ -175,7 +175,7 @@ extern bool have_vsx;
> #define TCG_TARGET_HAS_bitsel_vec have_vsx
> #define TCG_TARGET_HAS_cmpsel_vec 0
>
> -void flush_icache_range(uintptr_t start, uintptr_t stop);
> +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len);
> void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
>
> #define TCG_TARGET_DEFAULT_MO (0)
> diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
> index 032439d806..0fa6ae358e 100644
> --- a/tcg/riscv/tcg-target.h
> +++ b/tcg/riscv/tcg-target.h
> @@ -159,9 +159,13 @@ typedef enum {
> #define TCG_TARGET_HAS_mulsh_i64 1
> #endif
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t
> len)
> {
> - __builtin___clear_cache((char *)start, (char *)stop);
> + if (rx != rw) {
> + __builtin___clear_cache((char *)rw, (char *)(rw + len));
> + }
> + __builtin___clear_cache((char *)rx, (char *)(rx + len));
> }
>
> /* not defined -- call should be eliminated at compile time */
> diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
> index 63c8797bd3..c3dc2e8938 100644
> --- a/tcg/s390/tcg-target.h
> +++ b/tcg/s390/tcg-target.h
> @@ -145,7 +145,8 @@ enum {
> TCG_AREG0 = TCG_REG_R10,
> };
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t
> len)
> {
> }
>
> diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
> index 633841ebf2..c27c40231e 100644
> --- a/tcg/sparc/tcg-target.h
> +++ b/tcg/sparc/tcg-target.h
> @@ -168,10 +168,12 @@ extern bool use_vis3_instructions;
> #define TCG_TARGET_DEFAULT_MO (0)
> #define TCG_TARGET_HAS_MEMORY_BSWAP 1
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t
> len)
> {
> - uintptr_t p;
> - for (p = start & -8; p < ((stop + 7) & -8); p += 8) {
> + /* No additional data flush to the RW virtual address required. */
> + uintptr_t p, end = (rx + len + 7) & -8;
> + for (p = rx & -8; p < end; p += 8) {
> __asm__ __volatile__("flush\t%0" : : "r" (p));
> }
> }
> diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
> index 8c1c1d265d..6460449719 100644
> --- a/tcg/tci/tcg-target.h
> +++ b/tcg/tci/tcg-target.h
> @@ -191,7 +191,8 @@ void tci_disas(uint8_t opc);
>
> #define HAVE_TCG_QEMU_TB_EXEC
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t
> len)
> {
> }
>
> diff --git a/softmmu/physmem.c b/softmmu/physmem.c
> index a9adedb9f8..b23c1fef54 100644
> --- a/softmmu/physmem.c
> +++ b/softmmu/physmem.c
> @@ -2954,7 +2954,14 @@ static inline MemTxResult
> address_space_write_rom_internal(AddressSpace *as,
> invalidate_and_set_dirty(mr, addr1, l);
> break;
> case FLUSH_CACHE:
> - flush_icache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr +
> l);
> + /*
> + * FIXME: This function is currently located in tcg/host/,
> + * but we never come here when tcg is enabled; only for
> + * real hardware acceleration. This can actively fail
> + * when TCI is configured, since that function is a nop.
> + * We should move this to util/ or something.
> + */
> + flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr,
> l);
> break;
> }
> }
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index a8c28440e2..3bf36e0cfe 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -1076,7 +1076,7 @@ void tcg_prologue_init(TCGContext *s)
> #endif
>
> buf1 = s->code_ptr;
> - flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
> + flush_idcache_range((uintptr_t)buf0, (uintptr_t)buf0, buf1 - buf0);
>
> /* Deduct the prologue from the buffer. */
> prologue_size = tcg_current_code_size(s);
> @@ -4268,7 +4268,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
> }
>
> /* flush instruction cache */
> - flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
> + flush_idcache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_buf,
> + s->code_ptr - s->code_buf);
>
> return tcg_current_code_size(s);
> }
> diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
> index 26f71cb599..83af3108a4 100644
> --- a/tcg/aarch64/tcg-target.c.inc
> +++ b/tcg/aarch64/tcg-target.c.inc
> @@ -1363,7 +1363,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr,
> uintptr_t jmp_addr,
> }
> pair = (uint64_t)i2 << 32 | i1;
> qatomic_set((uint64_t *)jmp_addr, pair);
> - flush_icache_range(jmp_addr, jmp_addr + 8);
> + flush_idcache_range(jmp_addr, jmp_addr, 8);
> }
>
> static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
> diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
> index 41be574e89..c255ecb444 100644
> --- a/tcg/mips/tcg-target.c.inc
> +++ b/tcg/mips/tcg-target.c.inc
> @@ -2660,7 +2660,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr,
> uintptr_t jmp_addr,
> uintptr_t addr)
> {
> qatomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2));
> - flush_icache_range(jmp_addr, jmp_addr + 4);
> + flush_idcache_range(jmp_addr, jmp_addr, 4);
> }
>
> typedef struct {
> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> index 18ee989f95..a848e98383 100644
> --- a/tcg/ppc/tcg-target.c.inc
> +++ b/tcg/ppc/tcg-target.c.inc
> @@ -1753,12 +1753,12 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr,
> uintptr_t jmp_addr,
> /* As per the enclosing if, this is ppc64. Avoid the _Static_assert
> within qatomic_set that would fail to build a ppc32 host. */
> qatomic_set__nocheck((uint64_t *)jmp_addr, pair);
> - flush_icache_range(jmp_addr, jmp_addr + 8);
> + flush_idcache_range(jmp_addr, jmp_addr, 8);
> } else {
> intptr_t diff = addr - jmp_addr;
> tcg_debug_assert(in_range_b(diff));
> qatomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
> - flush_icache_range(jmp_addr, jmp_addr + 4);
> + flush_idcache_range(jmp_addr, jmp_addr, 4);
> }
> }
>
> @@ -3864,22 +3864,23 @@ void tcg_register_jit(void *buf, size_t buf_size)
> }
> #endif /* __ELF__ */
>
> -void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +void flush_idcache_range(uintptr_t rx, uintptr_t rw, uintptr_t len)
> {
> - uintptr_t p, start1, stop1;
> + uintptr_t p, start, stop;
> size_t dsize = qemu_dcache_linesize;
> size_t isize = qemu_icache_linesize;
>
> - start1 = start & ~(dsize - 1);
> - stop1 = (stop + dsize - 1) & ~(dsize - 1);
> - for (p = start1; p < stop1; p += dsize) {
> + start = rw & ~(dsize - 1);
> + stop = (rw + len + dsize - 1) & ~(dsize - 1);
> + for (p = start; p < stop; p += dsize) {
> asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
> }
> asm volatile ("sync" : : : "memory");
>
> - start &= start & ~(isize - 1);
> - stop1 = (stop + isize - 1) & ~(isize - 1);
> - for (p = start1; p < stop1; p += isize) {
> + start = rx & ~(isize - 1);
> + stop = (rx + len + isize - 1) & ~(isize - 1);
> + for (p = start; p < stop; p += isize) {
> asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
> }
> asm volatile ("sync" : : : "memory");
> diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
> index 6775bd30fc..6e2d755f6a 100644
> --- a/tcg/sparc/tcg-target.c.inc
> +++ b/tcg/sparc/tcg-target.c.inc
> @@ -1836,7 +1836,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr,
> uintptr_t jmp_addr,
> if (!USE_REG_TB) {
> qatomic_set((uint32_t *)jmp_addr,
> deposit32(CALL, 0, 30, br_disp >> 2));
> - flush_icache_range(jmp_addr, jmp_addr + 4);
> + flush_idcache_range(jmp_addr, jmp_addr, 4);
> return;
> }
>
> @@ -1860,5 +1860,5 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr,
> uintptr_t jmp_addr,
> }
>
> qatomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1));
> - flush_icache_range(jmp_addr, jmp_addr + 8);
> + flush_idcache_range(jmp_addr, jmp_addr, 8);
> }
> --
> 2.25.1
>
- Re: [PATCH v2 01/19] tcg: Enhance flush_icache_range with separate data pointer,
Joelle van Dyne <=