[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v3 20/41] tcg/aarch64: Implement flush_idcache_range manually
From: |
Richard Henderson |
Subject: |
[PATCH v3 20/41] tcg/aarch64: Implement flush_idcache_range manually |
Date: |
Thu, 5 Nov 2020 19:29:00 -0800 |
Copy the single pointer implementation from libgcc and modify it to
support the double pointer interface we require. This halves the
number of cache operations required when split-rwx is enabled.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/aarch64/tcg-target.h | 11 +------
tcg/aarch64/tcg-target.c.inc | 64 ++++++++++++++++++++++++++++++++++++
2 files changed, 65 insertions(+), 10 deletions(-)
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index fa64058d43..e62d38ba55 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -148,16 +148,7 @@ typedef enum {
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
-/* Flush the dcache at RW, and the icache at RX, as necessary. */
-static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
-{
- /* TODO: Copy this from gcc to avoid 4 loops instead of 2. */
- if (rw != rx) {
- __builtin___clear_cache((char *)rw, (char *)(rw + len));
- }
- __builtin___clear_cache((char *)rx, (char *)(rx + len));
-}
-
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len);
void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
#ifdef CONFIG_SOFTMMU
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index bd888bc66d..8aa1fafd91 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -2968,3 +2968,67 @@ void tcg_register_jit(const void *buf, size_t buf_size)
{
tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
}
+
+/* Flush the dcache at RW, and the icache at RX, as necessary. */
+#ifdef CONFIG_DARWIN
+/* Apple does not expose CTR_EL0, so we must use system interfaces. */
+extern void sys_icache_invalidate(void *start, size_t len);
+extern void sys_dcache_flush(void *start, size_t len);
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
+{
+ sys_dcache_flush((void *)rw, len);
+ sys_icache_invalidate((void *)rx, len);
+}
+#else
+/*
+ * This is a copy of gcc's __aarch64_sync_cache_range, modified
+ * to fit this three-operand interface.
+ */
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
+{
+ const unsigned CTR_IDC = 1u << 28;
+ const unsigned CTR_DIC = 1u << 29;
+ static unsigned int cache_info;
+ uintptr_t icache_lsize, dcache_lsize, p;
+
+ if (!cache_info) {
+ /*
+ * CTR_EL0 [3:0] contains log2 of icache line size in words.
+ * CTR_EL0 [19:16] contains log2 of dcache line size in words.
+ */
+ asm volatile("mrs\t%0, ctr_el0" : "=r"(cache_info));
+ }
+
+ icache_lsize = 4 << extract32(cache_info, 0, 4);
+ dcache_lsize = 4 << extract32(cache_info, 16, 4);
+
+ /*
+ * If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification
+ * is not required for instruction to data coherence.
+ */
+ if (!(cache_info & CTR_IDC)) {
+ /*
+ * Loop over the address range, clearing one cache line at once.
+ * Data cache must be flushed to unification first to make sure
+ * the instruction cache fetches the updated data.
+ */
+ for (p = rw & -dcache_lsize; p < rw + len; p += dcache_lsize) {
+ asm volatile("dc\tcvau, %0" : : "r" (p) : "memory");
+ }
+ asm volatile("dsb\tish" : : : "memory");
+ }
+
+ /*
+ * If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point
+ * of Unification is not required for instruction to data coherence.
+ */
+ if (!(cache_info & CTR_DIC)) {
+ for (p = rx & -icache_lsize; p < rx + len; p += icache_lsize) {
+ asm volatile("ic\tivau, %0" : : "r"(p) : "memory");
+ }
+ asm volatile ("dsb\tish" : : : "memory");
+ }
+
+ asm volatile("isb" : : : "memory");
+}
+#endif /* CONFIG_DARWIN */
--
2.25.1
- [PATCH v3 15/41] accel/tcg: Support split-wx for linux with memfd, (continued)
- [PATCH v3 15/41] accel/tcg: Support split-wx for linux with memfd, Richard Henderson, 2020/11/05
- [PATCH v3 18/41] tcg/i386: Support split-wx code generation, Richard Henderson, 2020/11/05
- [PATCH v3 16/41] accel/tcg: Support split-wx for darwin/iOS with vm_remap, Richard Henderson, 2020/11/05
- [PATCH v3 19/41] tcg/aarch64: Use B not BL for tcg_out_goto_long, Richard Henderson, 2020/11/05
- [PATCH v3 17/41] tcg: Return the TB pointer from the rx region from exit_tb, Richard Henderson, 2020/11/05
- [PATCH v3 20/41] tcg/aarch64: Implement flush_idcache_range manually,
Richard Henderson <=
- [PATCH v3 21/41] tcg/aarch64: Support split-wx code generation, Richard Henderson, 2020/11/05
- [PATCH v3 22/41] disas: Push const down through host disasassembly, Richard Henderson, 2020/11/05
- [PATCH v3 23/41] tcg/tci: Push const down through bytecode reading, Richard Henderson, 2020/11/05
- [PATCH v3 24/41] tcg: Introduce tcg_tbrel_diff, Richard Henderson, 2020/11/05
- [PATCH v3 25/41] tcg/ppc: Use tcg_tbrel_diff, Richard Henderson, 2020/11/05
- [PATCH v3 26/41] tcg/ppc: Use tcg_out_mem_long to reset TCG_REG_TB, Richard Henderson, 2020/11/05
- [PATCH v3 27/41] tcg/ppc: Support split-wx code generation, Richard Henderson, 2020/11/05
- [PATCH v3 29/41] tcg/sparc: Support split-wx code generation, Richard Henderson, 2020/11/05
- [PATCH v3 35/41] accel/tcg: Add mips support to alloc_code_gen_buffer_splitwx_memfd, Richard Henderson, 2020/11/05
- [PATCH v3 32/41] tcg/riscv: Fix branch range checks, Richard Henderson, 2020/11/05