[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v2 1/1] tcg: add perfmap and jitdump
From: |
Alex Bennée |
Subject: |
Re: [PATCH v2 1/1] tcg: add perfmap and jitdump |
Date: |
Fri, 06 Jan 2023 17:31:08 +0000 |
User-agent: |
mu4e 1.9.11; emacs 29.0.60 |
Ilya Leoshkevich <iii@linux.ibm.com> writes:
> Add ability to dump /tmp/perf-<pid>.map and jit-<pid>.dump.
> The first one allows the perf tool to map samples to each individual
> translation block. The second one adds the ability to resolve symbol
> names, line numbers and inspect JITed code.
>
> Example of use:
>
> perf record qemu-x86_64 -perfmap ./a.out
> perf report
>
> or
>
> perf record -k 1 qemu-x86_64 -jitdump ./a.out
> perf inject -j -i perf.data -o perf.data.jitted
> perf report -i perf.data.jitted
>
> Co-developed-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> Co-developed-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> Message-Id: <20221012051846.1432050-2-iii@linux.ibm.com>
> ---
> accel/tcg/debuginfo.c | 99 +++++++++++
> accel/tcg/debuginfo.h | 52 ++++++
> accel/tcg/meson.build | 2 +
> accel/tcg/perf.c | 334 ++++++++++++++++++++++++++++++++++++++
> accel/tcg/perf.h | 28 ++++
> accel/tcg/translate-all.c | 8 +
> docs/devel/tcg.rst | 23 +++
> hw/core/loader.c | 5 +
> include/tcg/tcg.h | 4 +-
> linux-user/elfload.c | 3 +
> linux-user/exit.c | 2 +
> linux-user/main.c | 15 ++
> linux-user/meson.build | 1 +
> meson.build | 8 +
> qemu-options.hx | 20 +++
> softmmu/vl.c | 11 ++
> tcg/region.c | 2 +-
> tcg/tcg.c | 2 +
> 18 files changed, 616 insertions(+), 3 deletions(-)
> create mode 100644 accel/tcg/debuginfo.c
> create mode 100644 accel/tcg/debuginfo.h
> create mode 100644 accel/tcg/perf.c
> create mode 100644 accel/tcg/perf.h
>
> diff --git a/accel/tcg/debuginfo.c b/accel/tcg/debuginfo.c
> new file mode 100644
> index 00000000000..c312db77146
> --- /dev/null
> +++ b/accel/tcg/debuginfo.c
> @@ -0,0 +1,99 @@
> +/*
> + * Debug information support.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/lockable.h"
> +
> +#include <elfutils/libdwfl.h>
> +
> +#include "debuginfo.h"
> +
> +static QemuMutex lock;
> +static Dwfl *dwfl;
> +static const Dwfl_Callbacks dwfl_callbacks = {
> + .find_elf = NULL,
> + .find_debuginfo = dwfl_standard_find_debuginfo,
> + .section_address = NULL,
> + .debuginfo_path = NULL,
> +};
> +
> +__attribute__((constructor))
> +static void debuginfo_init(void)
> +{
> + qemu_mutex_init(&lock);
> +}
> +
> +bool debuginfo_report_elf(const char *image_name, int image_fd,
> + unsigned long long load_bias)
> +{
> + QEMU_LOCK_GUARD(&lock);
> +
> + if (dwfl == NULL) {
> + dwfl = dwfl_begin(&dwfl_callbacks);
> + } else {
> + dwfl_report_begin_add(dwfl);
> + }
> +
> + if (dwfl == NULL) {
> + return false;
> + }
> +
> + dwfl_report_elf(dwfl, image_name, image_name, image_fd, load_bias, true);
> + dwfl_report_end(dwfl, NULL, NULL);
> + return true;
> +}
> +
> +bool debuginfo_get_symbol(unsigned long long address,
> + const char **symbol, unsigned long long *offset)
> +{
> + Dwfl_Module *dwfl_module;
> + GElf_Off dwfl_offset;
> + GElf_Sym dwfl_sym;
> +
> + QEMU_LOCK_GUARD(&lock);
> +
> + if (dwfl == NULL) {
> + return false;
> + }
> +
> + dwfl_module = dwfl_addrmodule(dwfl, address);
> + if (dwfl_module == NULL) {
> + return false;
> + }
> +
> + *symbol = dwfl_module_addrinfo(dwfl_module, address, &dwfl_offset,
> + &dwfl_sym, NULL, NULL, NULL);
> + if (*symbol == NULL) {
> + return false;
> + }
> + *offset = dwfl_offset;
> + return true;
> +}
> +
> +bool debuginfo_get_line(unsigned long long address,
> + const char **file, int *line)
> +{
> + Dwfl_Module *dwfl_module;
> + Dwfl_Line *dwfl_line;
> +
> + QEMU_LOCK_GUARD(&lock);
> +
> + if (dwfl == NULL) {
> + return false;
> + }
> +
> + dwfl_module = dwfl_addrmodule(dwfl, address);
> + if (dwfl_module == NULL) {
> + return false;
> + }
> +
> + dwfl_line = dwfl_module_getsrc(dwfl_module, address);
> + if (dwfl_line == NULL) {
> + return false;
> + }
> + *file = dwfl_lineinfo(dwfl_line, NULL, line, 0, NULL, NULL);
> + return true;
> +}
> diff --git a/accel/tcg/debuginfo.h b/accel/tcg/debuginfo.h
> new file mode 100644
> index 00000000000..d41d9d8d9b4
> --- /dev/null
> +++ b/accel/tcg/debuginfo.h
> @@ -0,0 +1,52 @@
> +/*
> + * Debug information support.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef ACCEL_TCG_DEBUGINFO_H
> +#define ACCEL_TCG_DEBUGINFO_H
> +
> +#if defined(CONFIG_TCG) && defined(CONFIG_LIBDW)
> +/*
> + * Load debuginfo for the specified guest ELF image.
> + * Return true on success, false on failure.
> + */
> +bool debuginfo_report_elf(const char *image_name, int image_fd,
> + unsigned long long load_bias);
> +
> +/*
> + * Find a symbol name associated with the specified guest PC.
> + * Return true on success, false if there is no associated symbol.
> + */
> +bool debuginfo_get_symbol(unsigned long long address,
> + const char **symbol, unsigned long long *offset);
> +
> +/*
> + * Find a line number associated with the specified guest PC.
> + * Return true on success, false if there is no associated line number.
> + */
> +bool debuginfo_get_line(unsigned long long address,
> + const char **file, int *line);
> +#else
> +static inline bool debuginfo_report_elf(const char *image_name, int image_fd,
> + unsigned long long load_bias)
> +{
> + return false;
> +}
> +
> +static inline bool debuginfo_get_symbol(unsigned long long address,
> + const char **symbol,
> + unsigned long long *offset)
> +{
> + return false;
> +}
> +
> +static inline bool debuginfo_get_line(unsigned long long address,
> + const char **file, int *line)
> +{
> + return false;
> +}
> +#endif
> +
> +#endif
> diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
> index 75e1dffb4df..f916110f651 100644
> --- a/accel/tcg/meson.build
> +++ b/accel/tcg/meson.build
> @@ -1,5 +1,6 @@
> tcg_ss = ss.source_set()
> tcg_ss.add(files(
> + 'perf.c',
I think this needs to be:
tcg_ss.add(when: 'CONFIG_LINUX', if_true: files('perf.c'))
as otherwise we break non-Linux builds. This might needs some ifdef
CONFIG_LINUX checks on the call (or a static inline stub).
See https://gitlab.com/stsquad/qemu/-/jobs/3566180347
> 'tcg-all.c',
> 'cpu-exec-common.c',
> 'cpu-exec.c',
> @@ -12,6 +13,7 @@ tcg_ss.add(files(
> tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c'))
> tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
> tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')])
> +tcg_ss.add(when: libdw, if_true: files('debuginfo.c'))
> specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
>
> specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
> diff --git a/accel/tcg/perf.c b/accel/tcg/perf.c
> new file mode 100644
> index 00000000000..c7283406162
> --- /dev/null
> +++ b/accel/tcg/perf.c
> @@ -0,0 +1,334 @@
> +/*
> + * Linux perf perf-<pid>.map and jit-<pid>.dump integration.
> + *
> + * The jitdump spec can be found at [1].
> + *
> + * [1]
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/tools/perf/Documentation/jitdump-specification.txt
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include "qemu/osdep.h"
> +#include "elf.h"
> +#include "qemu/timer.h"
> +#include "tcg/tcg.h"
> +
> +#include "debuginfo.h"
> +#include "perf.h"
> +
> +static FILE *safe_fopen_w(const char *path)
> +{
> + int saved_errno;
> + FILE *f;
> + int fd;
> +
> + /* Delete the old file, if any. */
> + unlink(path);
> +
> + /* Avoid symlink attacks by using O_CREAT | O_EXCL. */
> + fd = open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
> + if (fd == -1) {
> + return NULL;
> + }
> +
> + /* Convert fd to FILE*. */
> + f = fdopen(fd, "w");
> + if (f == NULL) {
> + saved_errno = errno;
> + close(fd);
> + errno = saved_errno;
> + return NULL;
> + }
> +
> + return f;
> +}
> +
> +static FILE *perfmap;
> +
> +void perf_enable_perfmap(void)
> +{
> + char map_file[32];
> +
> + snprintf(map_file, sizeof(map_file), "/tmp/perf-%d.map", getpid());
> + perfmap = safe_fopen_w(map_file);
> + if (perfmap == NULL) {
> + warn_report("Could not open %s: %s, proceeding without perfmap",
> + map_file, strerror(errno));
> + }
> +}
> +
> +static FILE *jitdump;
> +
> +#define JITHEADER_MAGIC 0x4A695444
> +#define JITHEADER_VERSION 1
> +
> +struct jitheader {
> + uint32_t magic;
> + uint32_t version;
> + uint32_t total_size;
> + uint32_t elf_mach;
> + uint32_t pad1;
> + uint32_t pid;
> + uint64_t timestamp;
> + uint64_t flags;
> +};
> +
> +enum jit_record_type {
> + JIT_CODE_LOAD = 0,
> + JIT_CODE_DEBUG_INFO = 2,
> +};
> +
> +struct jr_prefix {
> + uint32_t id;
> + uint32_t total_size;
> + uint64_t timestamp;
> +};
> +
> +struct jr_code_load {
> + struct jr_prefix p;
> +
> + uint32_t pid;
> + uint32_t tid;
> + uint64_t vma;
> + uint64_t code_addr;
> + uint64_t code_size;
> + uint64_t code_index;
> +};
> +
> +struct debug_entry {
> + uint64_t addr;
> + int lineno;
> + int discrim;
> + const char name[];
> +};
> +
> +struct jr_code_debug_info {
> + struct jr_prefix p;
> +
> + uint64_t code_addr;
> + uint64_t nr_entry;
> + struct debug_entry entries[];
> +};
> +
> +static uint32_t get_e_machine(void)
> +{
> + Elf64_Ehdr elf_header;
> + FILE *exe;
> + size_t n;
> +
> + QEMU_BUILD_BUG_ON(offsetof(Elf32_Ehdr, e_machine) !=
> + offsetof(Elf64_Ehdr, e_machine));
> +
> + exe = fopen("/proc/self/exe", "r");
> + if (exe == NULL) {
> + return EM_NONE;
> + }
> +
> + n = fread(&elf_header, sizeof(elf_header), 1, exe);
> + fclose(exe);
> + if (n != 1) {
> + return EM_NONE;
> + }
> +
> + return elf_header.e_machine;
> +}
> +
> +void perf_enable_jitdump(void)
> +{
> + struct jitheader header;
> + char jitdump_file[32];
> +#ifdef CONFIG_LINUX
> + void *perf_marker;
> +#endif
> +
> + if (!use_rt_clock) {
> + warn_report("CLOCK_MONOTONIC is not available, proceeding without
> jitdump");
> + return;
> + }
> +
> + snprintf(jitdump_file, sizeof(jitdump_file), "jit-%d.dump", getpid());
> + jitdump = safe_fopen_w(jitdump_file);
> + if (jitdump == NULL) {
> + warn_report("Could not open %s: %s, proceeding without jitdump",
> + jitdump_file, strerror(errno));
> + return;
> + }
> +
> +#ifdef CONFIG_LINUX
> + /*
> + * `perf inject` will see that the mapped file name in the corresponding
> + * PERF_RECORD_MMAP or PERF_RECORD_MMAP2 event is of the form jit-%d.dump
> + * and will process it as a jitdump file.
> + */
> + perf_marker = mmap(NULL, qemu_real_host_page_size(), PROT_READ |
> PROT_EXEC,
> + MAP_PRIVATE, fileno(jitdump), 0);
> + if (perf_marker == MAP_FAILED) {
> + warn_report("Could not map %s: %s, proceeding without jitdump",
> + jitdump_file, strerror(errno));
> + fclose(jitdump);
> + jitdump = NULL;
> + return;
> + }
> +#endif
> +
> + header.magic = JITHEADER_MAGIC;
> + header.version = JITHEADER_VERSION;
> + header.total_size = sizeof(header);
> + header.elf_mach = get_e_machine();
> + header.pad1 = 0;
> + header.pid = getpid();
> + header.timestamp = get_clock();
> + header.flags = 0;
> + fwrite(&header, sizeof(header), 1, jitdump);
> +}
> +
> +void perf_report_prologue(const void *start, size_t size)
> +{
> + if (perfmap) {
> + fprintf(perfmap, "%"PRIxPTR" %zx tcg-prologue-buffer\n",
> + (uintptr_t)start, size);
> + }
> +}
> +
> +/*
> + * Append a single line mapping to a JIT_CODE_DEBUG_INFO jitdump entry.
> + * Return 1 on success, 0 if there is no line number information for
> guest_pc.
> + */
> +static int append_debug_entry(GArray *raw, const void *host_pc,
> + target_ulong guest_pc)
> +{
> + struct debug_entry ent;
> + const char *file;
> + int line;
> +
> + if (!debuginfo_get_line(guest_pc, &file, &line)) {
> + return 0;
> + }
> +
> + ent.addr = (uint64_t)host_pc;
> + ent.lineno = line;
> + ent.discrim = 0;
> + g_array_append_vals(raw, &ent, sizeof(ent));
> + g_array_append_vals(raw, file, strlen(file) + 1);
> + return 1;
> +}
> +
> +/* Write a JIT_CODE_DEBUG_INFO jitdump entry. */
> +static void write_jr_code_debug_info(const void *start, size_t size,
> + int icount)
> +{
> + GArray *raw = g_array_new(false, false, 1);
> + struct jr_code_debug_info rec;
> + struct debug_entry ent;
> + target_ulong guest_pc;
> + const void *host_pc;
> + int insn;
> +
> + /* Reserve space for the header. */
> + g_array_set_size(raw, sizeof(rec));
> +
> + /* Create debug entries. */
> + rec.nr_entry = 0;
> + for (insn = 0; insn < icount; insn++) {
> + host_pc = start;
> + if (insn != 0) {
> + host_pc += tcg_ctx->gen_insn_end_off[insn - 1];
> + }
> + guest_pc = tcg_ctx->gen_insn_data[insn][0];
> + rec.nr_entry += append_debug_entry(raw, host_pc, guest_pc);
> + }
> +
> + /* Trailing debug_entry. */
> + ent.addr = (uint64_t)start + size;
> + ent.lineno = 0;
> + ent.discrim = 0;
> + g_array_append_vals(raw, &ent, sizeof(ent));
> + g_array_append_vals(raw, "", 1);
> + rec.nr_entry++;
> +
> + /* Create header. */
> + rec.p.id = JIT_CODE_DEBUG_INFO;
> + rec.p.total_size = raw->len;
> + rec.p.timestamp = get_clock();
> + rec.code_addr = (uint64_t)start;
> + memcpy(raw->data, &rec, sizeof(rec));
> +
> + /* Flush. */
> + fwrite(raw->data, raw->len, 1, jitdump);
> + g_array_unref(raw);
> +}
> +
> +/* Write a JIT_CODE_LOAD jitdump entry. */
> +static void write_jr_code_load(const void *start, size_t size,
> + const char *symbol, const char *suffix)
> +{
> + static uint64_t code_index;
> + struct jr_code_load rec;
> + size_t suffix_size;
> + size_t name_size;
> +
> + name_size = strlen(symbol);
> + suffix_size = strlen(suffix) + 1;
> + rec.p.id = JIT_CODE_LOAD;
> + rec.p.total_size = sizeof(rec) + name_size + suffix_size + size;
> + rec.p.timestamp = get_clock();
> + rec.pid = getpid();
> + rec.tid = gettid();
This causes a failure on some of the CI builds:
../accel/tcg/perf.c: In function ‘write_jr_code_load’:
../accel/tcg/perf.c:276:15: error: implicit declaration of function ‘gettid’;
did you mean ‘getgid’? [-Werror=implicit-function-declaration]
rec.tid = gettid();
^~~~~~
getgid
../accel/tcg/perf.c:276:15: error: nested extern declaration of ‘gettid’
[-Werror=nested-externs]
We also have to deal with this in log_thread_id() - see CONFIG_GETTID.
CI Failures:
https://gitlab.com/stsquad/qemu/-/pipelines/740305827/failures
> + rec.vma = (uint64_t)start;
> + rec.code_addr = (uint64_t)start;
> + rec.code_size = size;
> + rec.code_index = code_index++;
> + fwrite(&rec, sizeof(rec), 1, jitdump);
> + fwrite(symbol, name_size, 1, jitdump);
> + fwrite(suffix, suffix_size, 1, jitdump);
> + fwrite(start, size, 1, jitdump);
> +}
> +
> +void perf_report_code(const void *start, size_t size, int icount, uint64_t
> pc)
> +{
> + char suffix[32] = "";
> + char symbol_buf[32];
> + const char *symbol;
> + unsigned long long offset;
> +
> + /* Symbolize guest PC. */
> + if (perfmap || jitdump) {
> + if (!debuginfo_get_symbol(pc, &symbol, &offset)) {
> + snprintf(symbol_buf, sizeof(symbol_buf), "subject-%"PRIx64, pc);
> + symbol = symbol_buf;
> + offset = 0;
> + }
> + if (offset != 0) {
> + snprintf(suffix, sizeof(suffix), "+0x%"PRIx64, (uint64_t)offset);
> + }
> + }
> +
> + /* Emit a perfmap entry if needed. */
> + if (perfmap) {
> + flockfile(perfmap);
> + fprintf(perfmap, "%"PRIxPTR" %zx %s%s\n",
> + (uintptr_t)start, size, symbol, suffix);
> + funlockfile(perfmap);
> + }
> +
> + /* Emit jitdump entries if needed. */
> + if (jitdump) {
> + flockfile(jitdump);
> + write_jr_code_debug_info(start, size, icount);
> + write_jr_code_load(start, size, symbol, suffix);
> + funlockfile(jitdump);
> + }
> +}
> +
> +void perf_exit(void)
> +{
> + if (perfmap) {
> + fclose(perfmap);
> + perfmap = NULL;
> + }
> +
> + if (jitdump) {
> + fclose(jitdump);
> + jitdump = NULL;
> + }
> +}
> diff --git a/accel/tcg/perf.h b/accel/tcg/perf.h
> new file mode 100644
> index 00000000000..68c80ac2fd9
> --- /dev/null
> +++ b/accel/tcg/perf.h
> @@ -0,0 +1,28 @@
> +/*
> + * Linux perf perf-<pid>.map and jit-<pid>.dump integration.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef ACCEL_TCG_PERF_H
> +#define ACCEL_TCG_PERF_H
> +
> +#include <stddef.h>
> +#include <stdint.h>
I don't think these are needed here. osdep should have bought them in so
you can drop them here.
> +
> +/* Start writing perf-<pid>.map. */
> +void perf_enable_perfmap(void);
> +
> +/* Start writing jit-<pid>.dump. */
> +void perf_enable_jitdump(void);
> +
> +/* Add information about TCG prologue to profiler maps. */
> +void perf_report_prologue(const void *start, size_t size);
> +
> +/* Add information about JITted guest code to profiler maps. */
> +void perf_report_code(const void *start, size_t size, int icount, uint64_t
> pc);
> +
> +/* Stop writing perf-<pid>.map and/or jit-<pid>.dump. */
> +void perf_exit(void);
> +
> +#endif
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index ac3ee3740cb..2e1a6dc004e 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -62,6 +62,7 @@
> #include "tb-hash.h"
> #include "tb-context.h"
> #include "internal.h"
> +#include "perf.h"
>
> /* make various TB consistency checks */
>
> @@ -884,6 +885,13 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> }
> tb->tc.size = gen_code_size;
>
> + /*
> + * For TARGET_TB_PCREL, attribute all executions of the generated
> + * code to its first mapping.
> + */
> + perf_report_code(tcg_splitwx_to_rx(gen_code_buf), gen_code_size,
> + tb->icount, pc);
> +
> #ifdef CONFIG_PROFILER
> qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
> qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
> diff --git a/docs/devel/tcg.rst b/docs/devel/tcg.rst
> index a65fb7b1c44..fa479414ddc 100644
> --- a/docs/devel/tcg.rst
> +++ b/docs/devel/tcg.rst
> @@ -188,3 +188,26 @@ memory areas instead calls out to C code for device
> emulation.
> Finally, the MMU helps tracking dirty pages and pages pointed to by
> translation blocks.
>
> +Profiling JITted code
> +---------------------
> +
> +The Linux ``perf`` tool will treat all JITted code as a single block as
> +unlike the main code it can't use debug information to link individual
> +program counter samples with larger functions. To overcome this
> +limitation you can use the ``-perfmap`` or the ``-jitdump`` option to
> generate
> +map files. ``-perfmap`` is lightweight and produces only guest-host mappings.
> +``-jitdump`` additionally saves JITed code and guest debug information (if
> +available); its output needs to be integrated with the ``perf.data`` file
> +before the final report can be viewed.
> +
> +.. code::
> +
> + perf record $QEMU -perfmap $REMAINING_ARGS
> + perf report
> +
> + perf record -k 1 $QEMU -jitdump $REMAINING_ARGS
> + perf inject -j -i perf.data -o perf.data.jitted
> + perf report -i perf.data.jitted
> +
> +Note that qemu-system generates mappings only for ``-kernel`` files in ELF
> +format.
> diff --git a/hw/core/loader.c b/hw/core/loader.c
> index 0548830733e..55dbe2e1993 100644
> --- a/hw/core/loader.c
> +++ b/hw/core/loader.c
> @@ -61,6 +61,7 @@
> #include "hw/boards.h"
> #include "qemu/cutils.h"
> #include "sysemu/runstate.h"
> +#include "accel/tcg/debuginfo.h"
>
> #include <zlib.h>
>
> @@ -503,6 +504,10 @@ ssize_t load_elf_ram_sym(const char *filename,
> clear_lsb, data_swab, as, load_rom, sym_cb);
> }
>
> + if (ret != ELF_LOAD_FAILED) {
> + debuginfo_report_elf(filename, fd, 0);
> + }
> +
> fail:
> close(fd);
> return ret;
> diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
> index d84bae6e3f5..cd237cdaeff 100644
> --- a/include/tcg/tcg.h
> +++ b/include/tcg/tcg.h
> @@ -639,10 +639,10 @@ extern TCGv_env cpu_env;
> bool in_code_gen_buffer(const void *p);
>
> #ifdef CONFIG_DEBUG_TCG
> -const void *tcg_splitwx_to_rx(void *rw);
> +const void *tcg_splitwx_to_rx(const void *rw);
> void *tcg_splitwx_to_rw(const void *rx);
> #else
> -static inline const void *tcg_splitwx_to_rx(void *rw)
> +static inline const void *tcg_splitwx_to_rx(const void *rw)
> {
> return rw ? rw + tcg_splitwx_diff : NULL;
> }
> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
> index 20894b633f5..5928c14dfc9 100644
> --- a/linux-user/elfload.c
> +++ b/linux-user/elfload.c
> @@ -19,6 +19,7 @@
> #include "qemu/selfmap.h"
> #include "qapi/error.h"
> #include "target_signal.h"
> +#include "accel/tcg/debuginfo.h"
>
> #ifdef _ARCH_PPC64
> #undef ARCH_DLINFO
> @@ -3261,6 +3262,8 @@ static void load_elf_image(const char *image_name, int
> image_fd,
> load_symbols(ehdr, image_fd, load_bias);
> }
>
> + debuginfo_report_elf(image_name, image_fd, load_bias);
> +
> mmap_unlock();
>
> close(image_fd);
> diff --git a/linux-user/exit.c b/linux-user/exit.c
> index fa6ef0b9b44..607b6da9fc4 100644
> --- a/linux-user/exit.c
> +++ b/linux-user/exit.c
> @@ -17,6 +17,7 @@
> * along with this program; if not, see <http://www.gnu.org/licenses/>.
> */
> #include "qemu/osdep.h"
> +#include "accel/tcg/perf.h"
> #include "exec/gdbstub.h"
> #include "qemu.h"
> #include "user-internals.h"
> @@ -38,4 +39,5 @@ void preexit_cleanup(CPUArchState *env, int code)
> #endif
> gdb_exit(code);
> qemu_plugin_user_exit();
> + perf_exit();
> }
> diff --git a/linux-user/main.c b/linux-user/main.c
> index a17fed045bf..4290651c3cf 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -53,6 +53,7 @@
> #include "signal-common.h"
> #include "loader.h"
> #include "user-mmap.h"
> +#include "accel/tcg/perf.h"
>
> #ifdef CONFIG_SEMIHOSTING
> #include "semihosting/semihost.h"
> @@ -423,6 +424,16 @@ static void handle_arg_abi_call0(const char *arg)
> }
> #endif
>
> +static void handle_arg_perfmap(const char *arg)
> +{
> + perf_enable_perfmap();
> +}
> +
> +static void handle_arg_jitdump(const char *arg)
> +{
> + perf_enable_jitdump();
> +}
> +
> static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
>
> #ifdef CONFIG_PLUGIN
> @@ -493,6 +504,10 @@ static const struct qemu_argument arg_table[] = {
> {"xtensa-abi-call0", "QEMU_XTENSA_ABI_CALL0", false,
> handle_arg_abi_call0,
> "", "assume CALL0 Xtensa ABI"},
> #endif
> + {"perfmap", "QEMU_PERFMAP", false, handle_arg_perfmap,
> + "", "Generate a /tmp/perf-${pid}.map file for perf"},
> + {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump,
> + "", "Generate a jit-${pid}.dump file for perf"},
> {NULL, NULL, false, NULL, NULL, NULL}
> };
>
> diff --git a/linux-user/meson.build b/linux-user/meson.build
> index de4320af053..7171dc60be2 100644
> --- a/linux-user/meson.build
> +++ b/linux-user/meson.build
> @@ -22,6 +22,7 @@ linux_user_ss.add(files(
> 'uname.c',
> ))
> linux_user_ss.add(rt)
> +linux_user_ss.add(libdw)
>
> linux_user_ss.add(when: 'TARGET_HAS_BFLT', if_true: files('flatload.c'))
> linux_user_ss.add(when: 'TARGET_I386', if_true: files('vm86.c'))
> diff --git a/meson.build b/meson.build
> index cf3e517e56d..2a736dcf7e6 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1642,6 +1642,12 @@ if libbpf.found() and not cc.links('''
> endif
> endif
>
> +# libdw
> +libdw = dependency('libdw',
> + method: 'pkg-config',
> + kwargs: static_kwargs,
> + required: false)
> +
> #################
> # config-host.h #
> #################
> @@ -1911,6 +1917,7 @@ config_host_data.set('CONFIG_DBUS_DISPLAY',
> dbus_display)
> config_host_data.set('CONFIG_CFI', get_option('cfi'))
> config_host_data.set('CONFIG_SELINUX', selinux.found())
> config_host_data.set('CONFIG_XEN_BACKEND', xen.found())
> +config_host_data.set('CONFIG_LIBDW', libdw.found())
> if xen.found()
> # protect from xen.version() having less than three components
> xen_version = xen.version().split('.') + ['0', '0']
> @@ -3970,6 +3977,7 @@ summary_info += {'libudev': libudev}
> # Dummy dependency, keep .found()
> summary_info += {'FUSE lseek': fuse_lseek.found()}
> summary_info += {'selinux': selinux}
> +summary_info += {'libdw': libdw}
> summary(summary_info, bool_yn: true, section: 'Dependencies')
>
> if not supported_cpus.contains(cpu)
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 7f99d15b231..99dd7f0e584 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -4844,6 +4844,26 @@ SRST
> Enable synchronization profiling.
> ERST
>
> +#ifdef CONFIG_TCG
> +DEF("perfmap", 0, QEMU_OPTION_perfmap,
> + "-perfmap generate a /tmp/perf-${pid}.map file for perf\n",
> + QEMU_ARCH_ALL)
> +SRST
> +``-perfmap``
> + Generate a map file for Linux perf tools that will allow basic profiling
> + information to be broken down into basic blocks.
> +ERST
> +
> +DEF("jitdump", 0, QEMU_OPTION_jitdump,
> + "-jitdump generate a jit-${pid}.dump file for perf\n",
> + QEMU_ARCH_ALL)
> +SRST
> +``-jitdump``
> + Generate a dump file for Linux perf tools that maps basic blocks to
> symbol
> + names, line numbers and JITted code.
> +ERST
> +#endif
> +
> DEFHEADING()
>
> DEFHEADING(Generic object creation:)
> diff --git a/softmmu/vl.c b/softmmu/vl.c
> index 5115221efe4..6ae3d874679 100644
> --- a/softmmu/vl.c
> +++ b/softmmu/vl.c
> @@ -96,6 +96,9 @@
> #include "fsdev/qemu-fsdev.h"
> #endif
> #include "sysemu/qtest.h"
> +#ifdef CONFIG_TCG
> +#include "accel/tcg/perf.h"
> +#endif
>
> #include "disas/disas.h"
>
> @@ -2926,6 +2929,14 @@ void qemu_init(int argc, char **argv)
> case QEMU_OPTION_DFILTER:
> qemu_set_dfilter_ranges(optarg, &error_fatal);
> break;
> +#ifdef CONFIG_TCG
Probably CONFIG_TCG and CONFIG_LINUX
> + case QEMU_OPTION_perfmap:
> + perf_enable_perfmap();
> + break;
> + case QEMU_OPTION_jitdump:
> + perf_enable_jitdump();
> + break;
> +#endif
> case QEMU_OPTION_seed:
> qemu_guest_random_seed_main(optarg, &error_fatal);
> break;
> diff --git a/tcg/region.c b/tcg/region.c
> index 88d6bb273f1..af334c5c61d 100644
> --- a/tcg/region.c
> +++ b/tcg/region.c
> @@ -83,7 +83,7 @@ bool in_code_gen_buffer(const void *p)
> }
>
> #ifdef CONFIG_DEBUG_TCG
> -const void *tcg_splitwx_to_rx(void *rw)
> +const void *tcg_splitwx_to_rx(const void *rw)
> {
This constification should be a separate patch as its not related.
> /* Pass NULL pointers unchanged. */
> if (rw) {
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 436fcf6ebd2..e2151a93001 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -61,6 +61,7 @@
> #include "exec/log.h"
> #include "tcg/tcg-ldst.h"
> #include "tcg-internal.h"
> +#include "accel/tcg/perf.h"
>
> #ifdef CONFIG_TCG_INTERPRETER
> #include <ffi.h>
> @@ -749,6 +750,7 @@ void tcg_prologue_init(TCGContext *s)
> #endif
>
> prologue_size = tcg_current_code_size(s);
> + perf_report_prologue(s->code_gen_ptr, prologue_size);
>
> #ifndef CONFIG_TCG_INTERPRETER
> flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
--
Alex Bennée
Virtualisation Tech Lead @ Linaro
- Re: [PATCH v2 1/1] tcg: add perfmap and jitdump,
Alex Bennée <=