[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 1/2] RFC enhance tracing utilities
From: |
Luca Dariz |
Subject: |
[PATCH 1/2] RFC enhance tracing utilities |
Date: |
Sat, 9 Mar 2024 15:03:11 +0100 |
This extends the previous debug utility to trace system calls with
more events and the ability to control the tracing from userspace,
collecting a trace of the whole systems.
This tool was quite useful in porting the rpc format to 64 bit and
handle the 32to64 translation, but also to debug user-space programs.
It's still a draft with some limitations and open points:
- DEBUG_TRACE for 64 bit is missing
- check if we can have meaningful and reliable data removing the
syscall number on syscall return; this would avoid saving it in the
thread structure.
- add more info to each trace entry, e.g. current CPU or timestamps
- trace more events
- add more filtering capabilities
- trace a single subhurd
- trace format is not exported
---
Makefrag.am | 7 +
configfrag.ac | 12 ++
i386/i386/debug.h | 16 +-
i386/i386/debug_i386.c | 45 ++--
i386/i386/locore.S | 33 ++-
i386/i386/pcb.c | 4 +
i386/i386/trap.c | 5 +
include/mach/gnumach.defs | 33 +++
include/mach/trace.h | 61 ++++++
ipc/ipc_kmsg.c | 6 +-
ipc/mach_msg.c | 2 +
kern/exception.c | 1 +
kern/startup.c | 2 +
kern/thread.h | 4 +
kern/trace.c | 432 ++++++++++++++++++++++++++++++++++++++
kern/trace.h | 42 ++++
scripts/gnumach-gdb.py | 63 ++++++
tests/test-trace.c | 58 +++++
tests/user-qemu.mk | 3 +-
x86_64/debug_trace.S | 9 +-
x86_64/interrupt.S | 19 ++
x86_64/locore.S | 84 +++++++-
22 files changed, 887 insertions(+), 54 deletions(-)
create mode 100644 include/mach/trace.h
create mode 100644 kern/trace.c
create mode 100644 kern/trace.h
create mode 100644 scripts/gnumach-gdb.py
create mode 100644 tests/test-trace.c
diff --git a/Makefrag.am b/Makefrag.am
index 82fce628..25d73ec3 100644
--- a/Makefrag.am
+++ b/Makefrag.am
@@ -221,6 +221,12 @@ libkernel_a_SOURCES += \
kern/xpr.h \
kern/elf-load.c \
kern/boot_script.c
+
+if KERNEL_TRACE
+libkernel_a_SOURCES += \
+ kern/trace.c
+endif
+
EXTRA_DIST += \
kern/exc.defs \
kern/mach.srv \
@@ -407,6 +413,7 @@ include_mach_HEADERS = \
include/mach/profilparam.h \
include/mach/std_types.h \
include/mach/syscall_sw.h \
+ include/mach/trace.h \
include/mach/task_info.h \
include/mach/task_special_ports.h \
include/mach/thread_info.h \
diff --git a/configfrag.ac b/configfrag.ac
index b8b41261..f4dbe63d 100644
--- a/configfrag.ac
+++ b/configfrag.ac
@@ -139,6 +139,18 @@ AC_DEFINE([SLAB_VERIFY], [0], [SLAB_VERIFY])
# Enable the CPU pool layer in the slab allocator.
AC_DEFINE([SLAB_USE_CPU_POOLS], [0], [SLAB_USE_CPU_POOLS])
+
+# enable support for tracing various kernel events
+AC_ARG_ENABLE([kernel-trace],
+ AS_HELP_STRING([--enable-kernel-trace], [Enable kernel event tracing]))
+[if [ x"$enable_kernel_trace" = xyes ]; then]
+ AC_DEFINE([KERNEL_TRACE], [1], [Enable kernel event tracing])
+ AM_CONDITIONAL([KERNEL_TRACE], [true])
+[else]
+ AC_DEFINE([KERNEL_TRACE], [0], [Enable kernel event tracing])
+ AM_CONDITIONAL([KERNEL_TRACE], [false])
+[fi]
+
#
# Options.
diff --git a/i386/i386/debug.h b/i386/i386/debug.h
index 84397ba8..eff330c6 100644
--- a/i386/i386/debug.h
+++ b/i386/i386/debug.h
@@ -54,6 +54,7 @@ void debug_trace_dump(void);
#else /* __ASSEMBLER__ */
+#ifndef __x86_64__
#define DEBUG_TRACE \
pushl $__LINE__ ;\
pushl $9f ;\
@@ -62,10 +63,21 @@ void debug_trace_dump(void);
.data ;\
9: .ascii __FILE__"\0" ;\
.text
-
+#else /* __x86_64__ */
+#define DEBUG_TRACE \
+ pushq %rdi ;\
+ pushq %rs1 ;\
+ movq $__LINE__,%rdi ;\
+ movq $9f,%rsi ;\
+ call __debug_trace ;\
+ popq %rsi ;\
+ popq %rdi ;\
+ .data ;\
+9: .ascii __FILE__"\0" ;\
+ .text
+#endif
#endif /* __ASSEMBLER__ */
-
#endif /* DEBUG */
/* XXX #include_next "debug.h" */
diff --git a/i386/i386/debug_i386.c b/i386/i386/debug_i386.c
index 41d032e3..61d73dff 100644
--- a/i386/i386/debug_i386.c
+++ b/i386/i386/debug_i386.c
@@ -22,6 +22,7 @@
*/
#include <kern/printf.h>
+#include <kern/trace.h>
#include "thread.h"
#include "trap.h"
@@ -140,39 +141,23 @@ debug_trace_dump(void)
splx(s);
}
+#endif /* DEBUG */
-#include <kern/syscall_sw.h>
-
-int syscall_trace = 0;
-task_t syscall_trace_task;
+#if KERNEL_TRACE
-int
-syscall_trace_print(int syscallvec, ...)
+#ifndef __x86_64__
+#define SYSCALL_NUM_SHIFT 4
+#else
+#define SYSCALL_NUM_SHIFT 5
+#endif
+long
+syscall_trace_enter(long *sysv)
{
- int syscallnum = syscallvec >> 4;
- int i;
- const mach_trap_t *trap = &mach_trap_table[syscallnum];
-
- if (syscall_trace_task && syscall_trace_task != current_task())
- goto out;
-
- printf("0x%08x:0x%08x:%s(",
- current_task(), current_thread(), trap->mach_trap_name);
- for (i = 0; i < trap->mach_trap_arg_count; i++) {
- unsigned long value = (&syscallvec)[1+i];
- /* Use a crude heuristic to format pointers. */
- if (value > 1024)
- printf("0x%08x", value);
- else
- printf("%d", value);
-
- if (i + 1 < trap->mach_trap_arg_count)
- printf(", ");
- }
- printf(")\n");
+ long sysnum = sysv[0];
- out:
- return syscallvec;
+ syscall_trace_common(sysnum >> SYSCALL_NUM_SHIFT, sysv + 1);
+
+ return sysnum;
}
-#endif /* DEBUG */
+#endif /* KERNEL_TRACE */
diff --git a/i386/i386/locore.S b/i386/i386/locore.S
index 9d0513a1..26c5843c 100644
--- a/i386/i386/locore.S
+++ b/i386/i386/locore.S
@@ -634,6 +634,15 @@ ENTRY(thread_bootstrap_return)
*/
ENTRY(thread_syscall_return)
+#if KERNEL_TRACE
+ testb $0xff,EXT(syscall_trace)
+ jz 1f
+ movl S_ARG0,%eax /* get return value */
+ pushl %eax
+ call syscall_trace_return
+ movl %eax,S_ARG0 /* restore return value */
+1:
+#endif
movl S_ARG0,%eax /* get return value */
movl %esp,%ecx /* get kernel stack */
or $(KERNEL_STACK_SIZE-1),%ecx
@@ -1174,18 +1183,26 @@ syscall_native:
mach_call_call:
-#ifdef DEBUG
+#if KERNEL_TRACE
testb $0xff,EXT(syscall_trace)
jz 0f
- pushl %eax
- call EXT(syscall_trace_print)
- /* will return with syscallofs still (or again) in eax */
- addl $4,%esp
+ pushl %eax /* add syscall num to args array */
+ pushl %esp /* args array is the first argument*/
+ call EXT(syscall_trace_enter)
+ popl %eax
+ popl %eax
0:
-#endif /* DEBUG */
+#endif /* KERNEL_TRACE */
+ call *EXT(mach_trap_table)+4(%eax) /* call procedure */
+#if KERNEL_TRACE
+ testb $0xff,EXT(syscall_trace)
+ jz 1f
+ pushl %eax
+ call syscall_trace_return
+ /* will return with return value still (or again) in eax */
+1:
+#endif /* KERNEL_TRACE */
- call *EXT(mach_trap_table)+4(%eax)
- /* call procedure */
movl %esp,%ecx /* get kernel stack */
or $(KERNEL_STACK_SIZE-1),%ecx
movl -3-IKS_SIZE(%ecx),%esp /* switch back to PCB stack */
diff --git a/i386/i386/pcb.c b/i386/i386/pcb.c
index e8901550..a3ed7d67 100644
--- a/i386/i386/pcb.c
+++ b/i386/i386/pcb.c
@@ -37,6 +37,7 @@
#include <kern/counters.h>
#include <kern/debug.h>
#include <kern/thread.h>
+#include <kern/trace.h>
#include <kern/sched_prim.h>
#include <kern/slab.h>
#include <vm/vm_kern.h>
@@ -308,6 +309,7 @@ void stack_handoff(
/*
* Load the rest of the user state for the new thread
*/
+ context_switch_trace(old, NULL, new);
switch_ktss(new->pcb);
/*
@@ -332,6 +334,7 @@ void stack_handoff(
*/
void load_context(thread_t new)
{
+ context_switch_trace(NULL, NULL, new);
switch_ktss(new->pcb);
Load_context(new);
}
@@ -382,6 +385,7 @@ thread_t switch_context(
/*
* Load the rest of the user state for the new thread
*/
+ context_switch_trace(old, continuation, new);
switch_ktss(new->pcb);
return Switch_context(old, continuation, new);
}
diff --git a/i386/i386/trap.c b/i386/i386/trap.c
index 555d7498..17f7eb3c 100644
--- a/i386/i386/trap.c
+++ b/i386/i386/trap.c
@@ -53,6 +53,7 @@
#include <kern/printf.h>
#include <kern/thread.h>
#include <kern/task.h>
+#include <kern/trace.h>
#include <kern/sched.h>
#include <kern/sched_prim.h>
#include <kern/exception.h>
@@ -162,6 +163,8 @@ void kernel_trap(struct i386_saved_state *regs)
thread_t thread;
extern char _start[], etext[];
+ trap_trace(TRACE_TRAP_KERNEL, regs);
+
type = regs->trapno;
code = regs->err;
thread = current_thread();
@@ -366,6 +369,7 @@ int user_trap(struct i386_saved_state *regs)
unsigned long type;
thread_t thread = current_thread();
+ trap_trace(TRACE_TRAP_USER, regs);
#ifdef __x86_64__
assert(regs == &thread->pcb->iss);
#endif
@@ -592,6 +596,7 @@ int user_trap(struct i386_saved_state *regs)
void
i386_astintr(void)
{
+ trap_trace(TRACE_TRAP_ASTINTR, &(current_thread()->pcb->iss));
(void) splsched(); /* block interrupts to check reasons */
#ifndef MACH_RING1
int mycpu = cpu_number();
diff --git a/include/mach/gnumach.defs b/include/mach/gnumach.defs
index 7ecf74d3..583bdc36 100644
--- a/include/mach/gnumach.defs
+++ b/include/mach/gnumach.defs
@@ -215,3 +215,36 @@ routine vm_pages_phys(
simpleroutine thread_set_name(
thread : thread_t;
name : kernel_debug_name_t);
+
+/*
+ * Start tracing kernel events, using a buffer of BUFSIZE bytes and
+ * tracing only events set in KINDS.
+ */
+simpleroutine ktrace_start(
+ host_priv : host_priv_t;
+ bufsize : rpc_vm_size_t;
+ kinds : rpc_long_integer_t);
+
+/*
+ * Stop tracing kernel events.
+ */
+simpleroutine ktrace_stop(
+ host_priv : host_priv_t);
+
+/*
+ * Free the tracing buffer allocated in ktrace_start().
+ */
+simpleroutine ktrace_free(
+ host_priv : host_priv_t);
+
+/*
+ * Read the content of the tracing buffer in BUF; additionally we get
+ * a pointer to the first free byte after the tracing data in LAST_PTR
+ * and some trace parameters in FLAGS.
+ */
+routine ktrace_collect(
+ host_priv : host_priv_t;
+ out buf : pointer_t;
+ out last_ptr : vm_offset_t;
+ out flags : natural_t);
+
diff --git a/include/mach/trace.h b/include/mach/trace.h
new file mode 100644
index 00000000..3fccbbc7
--- /dev/null
+++ b/include/mach/trace.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2023 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _MACH_TRACE_H_
+#define _MACH_TRACE_H_
+
+#include <stdint.h>
+#include <mach/std_types.h>
+
+/* trace types as encoded in the ring buffer */
+#define DEBUG_TRACE_INVALID 0
+#define DEBUG_TRACE_SYSCALL 1
+#define DEBUG_TRACE_SAMPLE 2
+#define DEBUG_TRACE_TRAP 3
+#define DEBUG_TRACE_CSW 4
+#define DEBUG_TRACE_IRQ 5
+#define DEBUG_TRACE_RPC 6
+#define DEBUG_TRACE_DUMMY 15
+
+/* trap events */
+#define TRACE_TRAP_USER 0
+#define TRACE_TRAP_KERNEL 1
+#define TRACE_TRAP_ASTINTR 2
+
+/* trace types bitmask, to be used in trace_start()*/
+#define TRACE_TYPE_SYSCALL 0x01
+#define TRACE_TYPE_TRAP 0x02
+#define TRACE_TYPE_CSW 0x04
+#define TRACE_TYPE_IRQ 0x08
+#define TRACE_TYPE_RPC 0x10
+#define TRACE_TYPE_ALL 0x1F
+
+/* Trace file header definitions */
+#define TH_MAGIC "GMTR"
+#define TH_OVERRUN_MASK 0x0f
+#define TH_KERNEL64 0x10
+#define TH_USER64 0x20
+
+typedef struct {
+ char magic[4];
+ uint32_t flags;
+ uint32_t last;
+ uint32_t ov_first;
+} trace_header_t;
+
+#endif /* _MACH_TRACE_H_ */
diff --git a/ipc/ipc_kmsg.c b/ipc/ipc_kmsg.c
index 8bd645ff..8a153386 100644
--- a/ipc/ipc_kmsg.c
+++ b/ipc/ipc_kmsg.c
@@ -509,7 +509,7 @@ ipc_kmsg_get(
ikm_free(kmsg);
return MACH_SEND_INVALID_DATA;
}
-
+ rpc_trace(0, msg, size);
*kmsgp = kmsg;
return MACH_MSG_SUCCESS;
}
@@ -576,8 +576,10 @@ ipc_kmsg_put(
if (copyoutmsg(&kmsg->ikm_header, msg, size))
mr = MACH_RCV_INVALID_DATA;
- else
+ else {
mr = MACH_MSG_SUCCESS;
+ rpc_trace(1, msg, msg->msgh_size);
+ }
ikm_cache_free(kmsg);
diff --git a/ipc/mach_msg.c b/ipc/mach_msg.c
index ff5e5b09..3a4298ff 100644
--- a/ipc/mach_msg.c
+++ b/ipc/mach_msg.c
@@ -464,6 +464,7 @@ mach_msg_trap(
ikm_free(kmsg);
goto slow_get;
}
+ rpc_trace(0, msg, send_size);
fast_copyin:
/*
@@ -1194,6 +1195,7 @@ mach_msg_trap(
if (!ikm_cache_free_try(kmsg))
goto slow_put;
+ rpc_trace(1, msg, msg->msgh_size);
thread_syscall_return(MACH_MSG_SUCCESS);
/*NOTREACHED*/
return MACH_MSG_SUCCESS; /* help for the compiler */
diff --git a/kern/exception.c b/kern/exception.c
index 15f29705..0e0260f0 100644
--- a/kern/exception.c
+++ b/kern/exception.c
@@ -702,6 +702,7 @@ exception_raise(
/*NOTREACHED*/
}
+ rpc_trace(1, receiver->ith_msg, receiver->ith_msg->msgh_size);
thread_syscall_return(MACH_MSG_SUCCESS);
/*NOTREACHED*/
#ifndef __GNUC__
diff --git a/kern/startup.c b/kern/startup.c
index 61751d35..ce8f0123 100644
--- a/kern/startup.c
+++ b/kern/startup.c
@@ -47,6 +47,7 @@
#include <kern/thread.h>
#include <kern/thread_swap.h>
#include <kern/timer.h>
+#include <kern/trace.h>
#include <kern/xpr.h>
#include <kern/bootstrap.h>
#include <kern/startup.h>
@@ -163,6 +164,7 @@ void setup_main(void)
compute_mach_factor();
gsync_setup ();
+ trace_init();
/*
* Create a kernel thread to start the other kernel
diff --git a/kern/thread.h b/kern/thread.h
index 81d32924..5b5d2235 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -240,6 +240,10 @@ struct thread {
#endif
char name[THREAD_NAME_SIZE];
+
+#if KERNEL_TRACE
+ unsigned int syscall_num; /* for syscall tracing */
+#endif
};
#include <kern/cpu_number.h>
diff --git a/kern/trace.c b/kern/trace.c
new file mode 100644
index 00000000..69e6eaf0
--- /dev/null
+++ b/kern/trace.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "kern/thread.h"
+#include <kern/gnumach.server.h>
+#include <kern/kalloc.h>
+#include <kern/printf.h>
+#include <kern/syscall_sw.h>
+#include <kern/trace.h>
+#include <mach/message.h>
+#include <mach/trace.h>
+#include <machine/locore.h>
+#include <string.h>
+#include "vm/vm_kern.h"
+
+/* Common information for each trace entry */
+struct msg_trace_header {
+ char task_name[TASK_NAME_SIZE];
+ thread_t thread;
+ //int time; // to address thread/task reuse? we should trace also task
creation/destruction...
+ unsigned int type : 8;
+ unsigned int size : 24; // in bytes, can probably be reduced
+ uint8_t data[];
+} __attribute__((packed));
+
+struct syscall_trace_data {
+ unsigned int num : 7; // syscall id
+ unsigned int direction : 1; // entry/exit
+ unsigned int reserved : 24;
+ uint8_t data[]; // args/retcode, depending on the syscall number and
direction
+} __attribute__((packed));
+
+struct rpc_trace_data {
+ unsigned int size : 16; // in bytes
+ unsigned int direction : 1; // send-copyin/recv-copyout
+ unsigned int reserved : 15;
+ uint8_t data[];
+} __attribute__((packed));
+
+struct trap_trace_data {
+ unsigned int origin : 4; // user/kernel
+ unsigned int reserved : 28;
+ struct i386_saved_state regs;
+} __attribute__((packed));
+
+struct csw_trace_data {
+ char old_task_name[TASK_NAME_SIZE];
+ thread_t old_thread;
+ continuation_t continuation;
+ char new_task_name[TASK_NAME_SIZE];
+ thread_t new_thread;
+} __attribute__((packed));
+
+struct irq_trace_data {
+ int irqn;
+} __attribute__((packed));
+
+struct sample_trace_data {
+ unsigned int linenum;
+ char filename[];
+} __attribute__((packed));
+
+static uint8_t *msg_trace_buf = NULL;
+static vm_size_t msg_trace_size = 0;
+static uint8_t *msg_trace_ptr = NULL;
+static int msg_trace_overrun = 0;
+
+int syscall_trace;
+static int trace_rpc_enable;
+static int trace_trap_enable;
+static int trace_csw_enable;
+int interrupt_trace_enable;
+
+task_t syscall_trace_task = 0;
+unsigned int syscall_seqn = 0;
+
+/* Called to initialize kernel tracing */
+void trace_init(void)
+{
+ msg_trace_buf = NULL;
+ msg_trace_ptr = NULL;
+ msg_trace_size = 0;
+ msg_trace_overrun = 0;
+
+ syscall_trace = 0;
+ trace_rpc_enable = 0;
+ trace_trap_enable = 0;
+ trace_csw_enable = 0;
+ interrupt_trace_enable = 0;
+
+ /* Useful for debugging small tests and the bootstrap modules */
+ extern char* kernel_cmdline; // TODO: find a nice header
+ if (strstr(kernel_cmdline, "tracestart") != NULL)
+ {
+ kern_return_t ret = ktrace_start(&realhost, 20*1024*1024,
TRACE_TYPE_ALL);
+ if (ret == KERN_SUCCESS)
+ printf("trace: started from command line\n");
+ else
+ panic("Can't initialize trace buffer: error %d\n", ret);
+ }
+}
+
+kern_return_t ktrace_start(const host_t host_priv, rpc_vm_size_t bufsize,
+ rpc_long_integer_t kinds)
+{
+ if (host_priv == HOST_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ if (msg_trace_buf != NULL)
+ return KERN_FAILURE;
+
+ int kr = kmem_alloc(kernel_map, (vm_offset_t*) &msg_trace_buf, bufsize);
+ if (kr != KERN_SUCCESS)
+ return KERN_RESOURCE_SHORTAGE;
+
+ if (msg_trace_buf == 0)
+ return KERN_RESOURCE_SHORTAGE;
+
+ msg_trace_ptr = msg_trace_buf;
+ msg_trace_size = bufsize;
+ msg_trace_overrun = 0;
+
+ if (kinds & TRACE_TYPE_SYSCALL)
+ syscall_trace = 1;
+ if (kinds & TRACE_TYPE_RPC)
+ trace_rpc_enable = 1;
+ if (kinds & TRACE_TYPE_TRAP)
+ trace_trap_enable = 1;
+ if (kinds & TRACE_TYPE_CSW)
+ trace_csw_enable = 1;
+ if (kinds & TRACE_TYPE_IRQ)
+ interrupt_trace_enable = 1;
+
+ if (current_thread())
+ current_thread()->syscall_num = 25; // fake mach_msg() entry for better
trace
+
+ return KERN_SUCCESS;
+}
+
+kern_return_t ktrace_stop(const host_t host_priv)
+{
+ if (host_priv == HOST_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ if (msg_trace_buf == NULL)
+ return KERN_FAILURE;
+
+ syscall_trace = 0;
+ trace_rpc_enable = 0;
+ trace_trap_enable = 0;
+ trace_csw_enable = 0;
+ interrupt_trace_enable = 0;
+ return KERN_SUCCESS;
+}
+
+kern_return_t ktrace_free(const host_t host_priv)
+{
+ if (host_priv == HOST_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ if (msg_trace_buf == NULL)
+ return KERN_FAILURE;
+
+ kmem_free(kernel_map, (vm_offset_t) msg_trace_buf, msg_trace_size);
+ msg_trace_buf = NULL;
+ msg_trace_ptr = NULL;
+ return KERN_SUCCESS;
+}
+
+kern_return_t ktrace_collect(const host_t host_priv, pointer_t *buf,
+ mach_msg_type_number_t *bufsize,
+ vm_size_t *last_ptr, natural_t *flags)
+{
+ if (host_priv == HOST_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ if (msg_trace_buf == NULL)
+ return KERN_FAILURE;
+
+ kern_return_t err;
+ vm_map_copy_t copy;
+ err = vm_map_copyin(kernel_map, (vm_offset_t) msg_trace_buf, msg_trace_size,
+ FALSE, ©);
+ if (err == KERN_SUCCESS)
+ {
+ *buf = (pointer_t) copy;
+ *bufsize = msg_trace_size;
+ *last_ptr = msg_trace_ptr - msg_trace_buf;
+ *flags = 0;
+ if (msg_trace_overrun > 0)
+ {
+ if (msg_trace_overrun < TH_OVERRUN_MASK)
+ *flags |= msg_trace_overrun & TH_OVERRUN_MASK;
+ else
+ *flags |= TH_OVERRUN_MASK;
+ }
+#ifdef __LP64__
+ *flags |= TH_KERNEL64;
+#ifndef USER32
+ *flags |= TH_USER64;
+#endif /* USER32 */
+#endif /* __x86_64__ */
+ }
+ return err;
+}
+
+/*
+ * Starts a new trace entry. It pre-allocates a given size and returns a
pointer
+ * to a region that can be filled with the same amount of data.
+ * In this way we can copy the data only once, e.g. when tracing mach_msg()
+ */
+static void* trace_msg(unsigned int type, size_t size)
+{
+ const size_t msg_size = size + sizeof(struct msg_trace_header);
+ int overrun = 0;
+ uint8_t *tptr, *tptr_orig;
+ struct msg_trace_header *cur;
+
+ /*
+ * Handle interrupitons from trap/irq in reserving the trace data
+ * entry.
+ *
+ * TODO: ensure this is SMP-safe and maybe add cpuid to the trace
+ * header.
+ */
+ do {
+ tptr = __atomic_load_n(&msg_trace_ptr, __ATOMIC_RELAXED);
+ tptr_orig = tptr;
+
+ if ((tptr - msg_trace_buf) > (msg_trace_size - msg_size))
+ {
+ // circular buffer, reset to the beginning
+ tptr = msg_trace_buf;
+ overrun = 1;
+ }
+
+ tptr += msg_size;
+
+ } while (! __atomic_compare_exchange_n(&msg_trace_ptr, &tptr_orig, tptr, 0,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
+
+ if (overrun)
+ msg_trace_overrun += 1;
+
+ cur = (struct msg_trace_header*)(tptr - msg_size);
+ memcpy(cur->task_name, current_task()->name, TASK_NAME_SIZE);
+ cur->thread = current_thread();
+ cur->type = type;
+ cur->size = msg_size;
+ return cur->data;
+}
+
+#ifdef SYSCALL_TRACE_RECORD
+static void trace_syscall_entry(int num, long *args)
+{
+ const mach_trap_t *trap = &mach_trap_table[num];
+ size_t argsize = sizeof(long)*trap->mach_trap_arg_count;
+ size_t size = sizeof(struct syscall_trace_data) + argsize;
+ struct syscall_trace_data *data = trace_msg(DEBUG_TRACE_SYSCALL, size);
+ data->direction = 0;
+ data->num = num;
+ memcpy(data->data, args, argsize);
+}
+
+static void trace_syscall_exit(int num, long ret)
+{
+ size_t argsize = sizeof(long);
+ size_t size = sizeof(struct syscall_trace_data) + argsize;
+ struct syscall_trace_data *data = trace_msg(DEBUG_TRACE_SYSCALL, size);
+ data->direction = 1;
+ data->num = num;
+ memcpy(data->data, &ret, argsize);
+}
+#endif /* SYSCALL_TRACE_RECORD */
+
+/* This needs to be called from the machine-specific entry point */
+void syscall_trace_common(long sysnum, long argv[MAX_SYSCALL_ARGS])
+{
+
+ if (syscall_trace == 0)
+ goto out;
+
+ current_thread()->syscall_num = sysnum;
+ if (syscall_trace_task && syscall_trace_task != current_task())
+ goto out;
+
+#ifdef SYSCALL_TRACE_PRINT
+ if (sysnum == 30) // don't track mach_print()
+ goto out;
+
+ syscall_seqn++;
+ const mach_trap_t *trap = &mach_trap_table[sysnum];
+ printf("%08d:%s:0x%p:%s(",
+ syscall_seqn, current_task()->name, current_thread(),
trap->mach_trap_name);
+ for (int i=0; i<MAX_SYSCALL_ARGS; i++)
+ {
+ int value = argv[i];
+ if (trap->mach_trap_arg_count <= i)
+ continue;
+ if ((value > 1024) || (value < -1024))
+ printf("0x%08x", value);
+ else
+ printf("%d", value);
+ if (i + 1 < trap->mach_trap_arg_count)
+ printf(", ");
+ }
+ printf(")\n");
+#else /* SYSCALL_TRACE_PRINT */
+ trace_syscall_entry(sysnum, argv);
+#endif
+ out:
+ return;
+}
+
+/* This needs to be called from the machine-specific entry point */
+int syscall_trace_return(int val)
+{
+ if (syscall_trace == 0)
+ return val;
+ if (syscall_trace_task && syscall_trace_task != current_task())
+ return val;
+#ifdef SYSCALL_TRACE_PRINT
+ if (current_thread()->syscall_num != 30) // don't track mach_print()
+ {
+ const mach_trap_t *trap =
&mach_trap_table[current_thread()->syscall_num];
+ printf("%08d:%s:0x%p: %s return(%08x)\n",
+ syscall_seqn, current_task()->name, current_thread(),
+ trap->mach_trap_name, val);
+ }
+#else /* SYSCALL_TRACE_PRINT */
+ trace_syscall_exit(current_thread()->syscall_num, val);
+#endif
+
+ return val;
+}
+
+void rpc_trace(int direction, void *msg, long msize)
+{
+ if (!trace_rpc_enable)
+ return;
+
+ size_t size = sizeof(struct rpc_trace_data);
+ struct rpc_trace_data *data = trace_msg(DEBUG_TRACE_RPC, size + msize);
+ data->direction = direction;
+ data->size = msize;
+ if (copyin(msg, data->data, msize))
+ memset(data->data, 0, msize);
+}
+
+/* This needs to be called from the machine-specific entry point */
+void trap_trace(int origin, struct i386_saved_state *regs)
+{
+ if (!trace_trap_enable)
+ return;
+
+ size_t size = sizeof(struct trap_trace_data);
+ struct trap_trace_data *data = trace_msg(DEBUG_TRACE_TRAP, size);
+ data->origin = origin;
+ memcpy(&data->regs, regs, sizeof(*regs));
+}
+
+void context_switch_trace(thread_t old, continuation_t continuation, thread_t
new)
+{
+ if (!trace_csw_enable)
+ return;
+
+ size_t size = sizeof(struct csw_trace_data);
+ struct csw_trace_data *data = trace_msg(DEBUG_TRACE_CSW, size);
+ if (old)
+ strncpy(data->old_task_name, old->task->name, TASK_NAME_SIZE);
+ else
+ data->old_task_name[0] = '\0';
+ data->old_thread = old;
+ data->continuation = continuation;
+ strncpy(data->new_task_name, new->task->name, TASK_NAME_SIZE);
+ data->new_thread = new;
+}
+
+void interrupt_trace(int iunit, /* 'unit' number */
+ int old_ipl, /* old interrupt level */
+ const char *ret_addr, /* return address in interrupt
handler */
+ struct i386_interrupt_state *regs, /* saved registers */
+ int irqn)
+{
+ size_t size = sizeof(struct irq_trace_data);
+ struct irq_trace_data *data = trace_msg(DEBUG_TRACE_IRQ, size);
+ data->irqn = irqn;
+}
+
+/* This trace type is useful during debug to create on-the-fly custom
+ traces, e.g. to follow the evolution of a specific value or
+ expression along other trace points */
+void dummy_trace(void)
+{
+ struct dummy_data {
+ int nthreads;
+ };
+ size_t size = sizeof(struct dummy_data);
+ struct dummy_data *data = trace_msg(DEBUG_TRACE_DUMMY, size);
+
+ // as an example, we count the threads
+ task_t task;
+ thread_t thread;
+ processor_set_t pset;
+ data->nthreads = 0;
+ if (queue_first(&all_psets) == 0)
+ return;
+ queue_iterate(&all_psets, pset, processor_set_t, all_psets) {
+ if (queue_first(&pset->tasks) == 0)
+ continue;
+ queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
+ queue_iterate(&task->thread_list, thread, thread_t, thread_list) {
+ data->nthreads++;
+ }
+ }
+ }
+}
diff --git a/kern/trace.h b/kern/trace.h
new file mode 100644
index 00000000..06d77e93
--- /dev/null
+++ b/kern/trace.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2023 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#if KERNEL_TRACE
+
+#include <mach/trace.h>
+#include <kern/thread.h>
+
+/* syscall events */
+#define MAX_SYSCALL_ARGS 12
+//#define SYSCALL_TRACE_PRINT // useful for small tests
+#define SYSCALL_TRACE_RECORD
+
+void trace_init(void);
+void rpc_trace(int direction, void *msg, long size);
+void trap_trace(int origin, struct i386_saved_state *regs);
+void syscall_trace_common(long sysnum, long argv[MAX_SYSCALL_ARGS]);
+int syscall_trace_return(int val);
+void context_switch_trace(thread_t old, continuation_t continuation, thread_t
new);
+
+#else /* ! KERNEL_TRACE */
+
+#define trace_init()
+#define trap_trace(origin, regs)
+#define context_switch_trace(old, cont, new)
+
+#endif /* KERNEL_TRACE */
diff --git a/scripts/gnumach-gdb.py b/scripts/gnumach-gdb.py
new file mode 100644
index 00000000..16728c06
--- /dev/null
+++ b/scripts/gnumach-gdb.py
@@ -0,0 +1,63 @@
+
+import logging
+
+import gdb
+
+
+log = logging.getLogger("gnumach-gdb")
+on64bit = gdb.selected_inferior().architecture().name() == "i386:x86-64"
+on64bituser = gdb.lookup_symbol("syscall64")[0] is not None
+
+
+class TraceCmd(gdb.Command):
+ """gnumach-trace [FILE]
+
+Save the content of the kernel trace buffer to a file (default ./trace.bin).
+"""
+
+ def __init__(self):
+ super().__init__("gnumach-trace", gdb.COMMAND_USER)
+
+ def invoke(self, arg_string, from_tty):
+ try:
+ self._invoke(arg_string, from_tty)
+ except Exception:
+ log.exception(arg_string)
+
+ def _invoke(self, arg_string, from_tty):
+ args = gdb.string_to_argv(arg_string)
+ if args in ['-h', '--help']:
+ print(TraceCmd.__doc__)
+ return
+ outfile = "trace.bin"
+ if len(args) >= 1:
+ outfile = args[0]
+
+ tbuf = gdb.lookup_static_symbol("msg_trace_buf")
+ if tbuf is None:
+ log.error("trace buffer not available.")
+ return
+ tbuf_base = gdb.lookup_static_symbol("msg_trace_buf").value()
+ tbuf_top = gdb.lookup_static_symbol("msg_trace_ptr").value()
+ tbuf_overrun = gdb.lookup_static_symbol("msg_trace_overrun").value()
+ tbuf_len = tbuf_top - tbuf_base
+
+ print(f"Writing {tbuf_len} bytes to {outfile} (overrun
{tbuf_overrun})")
+ proc = gdb.selected_inferior()
+ with open(outfile, 'wb') as tf:
+ # trace header (see mach/trace.h)
+ tf.write(b"GMTR")
+ flags = int(tbuf_overrun & 0xF)
+ if on64bit:
+ flags |= 0x10
+ if on64bituser:
+ flags |= 0x20
+ tf.write(flags.to_bytes(4, byteorder='little'))
+ tf.write(int(tbuf_len).to_bytes(4, byteorder='little'))
+ tf.write(int(0).to_bytes(4, byteorder='little'))
+ # trace payload
+ tf.write(proc.read_memory(tbuf_base, tbuf_len))
+
+
+logging.basicConfig(level=logging.INFO)
+TraceCmd()
diff --git a/tests/test-trace.c b/tests/test-trace.c
new file mode 100644
index 00000000..d6bc8051
--- /dev/null
+++ b/tests/test-trace.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <syscalls.h>
+#include <testlib.h>
+
+#include <mach/trace.h>
+#include <gnumach.user.h>
+#include <mach.user.h>
+
+void test_trace(void)
+{
+ int err;
+ err = ktrace_start(host_priv(), 40 * 1024 * 1024, TRACE_TYPE_ALL);
+ ASSERT_RET(err, "ktrace_start");
+
+ err = mach_msg(NULL, MACH_MSG_OPTION_NONE, 0, 0,
+ MACH_PORT_NULL, 0, MACH_PORT_NULL);
+ ASSERT_RET(err, "syscall path with thread_syscall_return()");
+
+ err = ktrace_stop(host_priv());
+ ASSERT_RET(err, "ktrace_stop");
+
+ vm_offset_t buf, last_ptr;
+ mach_msg_type_number_t bufsize;
+ int flags;
+ err = ktrace_collect(host_priv(), &buf, &bufsize, &last_ptr, &flags);
+ ASSERT_RET(err, "ktrace_collect");
+ printf("size %lu last %lu flags %x\n", bufsize, last_ptr, flags);
+ ASSERT(bufsize >= last_ptr, "ERROR overrun?");
+
+ err = vm_deallocate(mach_task_self(), buf, bufsize);
+ ASSERT_RET(err, "vm_deallocate");
+
+ err = ktrace_free(host_priv());
+ ASSERT_RET(err, "ktrace_free");
+}
+
+int main(int argc, char *argv[], int envc, char *envp[])
+{
+ test_trace();
+ return 0;
+}
diff --git a/tests/user-qemu.mk b/tests/user-qemu.mk
index fd5ae1ab..c4df73f9 100644
--- a/tests/user-qemu.mk
+++ b/tests/user-qemu.mk
@@ -204,7 +204,8 @@ USER_TESTS := \
tests/test-syscalls \
tests/test-machmsg \
tests/test-task \
- tests/test-threads
+ tests/test-threads \
+ tests/test-trace
USER_TESTS_CLEAN = $(subst tests/,clean-,$(USER_TESTS))
diff --git a/x86_64/debug_trace.S b/x86_64/debug_trace.S
index 7bed5ccc..97e48063 100644
--- a/x86_64/debug_trace.S
+++ b/x86_64/debug_trace.S
@@ -30,19 +30,18 @@
.text
ENTRY(_debug_trace)
+ ud2 // to test, do we need to keep it in asm?
pushf
cli
pushq %rax
pushq %rbx
- .byte 0x36 /* SS: bug in gas? */
movl %ss:EXT(debug_trace_pos),%eax
movq S_ARG0,%rbx
- movq %rbx,%ss:EXT(debug_trace_buf)(,%eax,16)
- movl S_ARG1,%ebx
- movl %ebx,%ss:EXT(debug_trace_buf)+8(,%eax,16)
+ movq %rbx,%ss:EXT(debug_trace_buf)(,%eax,8)
+ movq S_ARG1,%rbx
+ movq %rbx,%ss:EXT(debug_trace_buf)+8(,%eax,8)
incl %eax
andl $DEBUG_TRACE_LEN-1,%eax
- .byte 0x36 /* SS: bug in gas? */
movl %eax,%ss:EXT(debug_trace_pos)
popq %rbx
popq %rax
diff --git a/x86_64/interrupt.S b/x86_64/interrupt.S
index 6fb77727..72a2fd0c 100644
--- a/x86_64/interrupt.S
+++ b/x86_64/interrupt.S
@@ -115,6 +115,25 @@ ENTRY(interrupt)
shll $2,%eax /* irq * 4 */
movl EXT(iunit)(%rax),%edi /* get device unit number as 1st arg */
+#if KERNEL_TRACE
+ testb $0xff,EXT(interrupt_trace_enable)
+ jz 0f
+ movq S_IRQ,S_ARG4
+ pushq %rax
+ // save args for the real handler
+ pushq S_ARG0
+ pushq S_ARG1
+ pushq S_ARG2
+ pushq S_ARG3
+ callq EXT(interrupt_trace)
+ popq S_ARG3
+ popq S_ARG2
+ popq S_ARG1
+ popq S_ARG0
+ popq %rax
+0:
+#endif /* KERNEL_TRACE */
+
shll $1,%eax /* irq * 8 */
call *EXT(ivect)(%rax) /* call interrupt handler */
diff --git a/x86_64/locore.S b/x86_64/locore.S
index 8f39a677..9e9e5833 100644
--- a/x86_64/locore.S
+++ b/x86_64/locore.S
@@ -747,6 +747,15 @@ ENTRY(thread_bootstrap_return)
*/
ENTRY(thread_syscall_return)
+#if KERNEL_TRACE
+ testb $0xff,EXT(syscall_trace)
+ jz 1f
+ /* first arg is already set in rdi, save it */
+ pushq %rdi
+ callq syscall_trace_return
+ popq %rdi
+1:
+#endif
movq S_ARG0,%rax /* get return value */
movq %rsp,%rcx /* get kernel stack */
or $(KERNEL_STACK_SIZE-1),%rcx
@@ -1275,16 +1284,41 @@ syscall_native:
jnz 0b /* loop for all arguments */
mach_call_call:
-
-#ifdef DEBUG
+#if KERNEL_TRACE
testb $0xff,EXT(syscall_trace)
jz 0f
- movq %rax,%rdi
- call EXT(syscall_trace_print)
- /* will return with syscallofs still (or again) in eax */
+ /*
+ * We need to save register arguments, for the actual tracing
+ * and to restore them before calling the real syscall.
+ */
+ pushq %r9
+ pushq %r8
+ pushq %rcx
+ pushq %rdx
+ pushq %rsi
+ pushq %rdi
+ pushq %rax
+ movq %rsp,%rdi
+ callq EXT(syscall_trace_enter)
+ popq %rax
+ popq %rdi
+ popq %rsi
+ popq %rdx
+ popq %rcx
+ popq %r8
+ popq %r9
+ movq %rax,%r12 /* save sysnum for tracing return value */
0:
-#endif /* DEBUG */
+#endif /* KERNEL_TRACE */
call *EXT(mach_trap_table)+8(%rax) /* call procedure */
+#if KERNEL_TRACE
+ testb $0xff,EXT(syscall_trace)
+ jz 1f
+ movq %rax,%rdi
+ callq syscall_trace_return
+ /* will return with return value still (or again) in eax */
+1:
+#endif /* KERNEL_TRACE */
movq %rsp,%rcx /* get kernel stack */
or $(KERNEL_STACK_SIZE-1),%rcx
movq -7-IKS_SIZE(%rcx),%rsp /* switch back to PCB stack */
@@ -1460,7 +1494,45 @@ _syscall64_args_stack:
jnz 0b /* loop for all remaining arguments */
_syscall64_call:
+#if KERNEL_TRACE
+ testb $0xff,EXT(syscall_trace)
+ jz 0f
+ /*
+ * We need to save register arguments, for the actual tracing
+ * and to restore them before calling the real syscall.
+ * We store all arguments and the syscall number in an array
+ * on the stack; fortunately args > 6 are already there, so
+ * push the rest and restore them before actually calling the
+ * handler.
+ */
+ pushq %r9
+ pushq %r8
+ pushq %rcx
+ pushq %rdx
+ pushq %rsi
+ pushq %rdi
+ pushq %rax
+ movq %rsp,%rdi
+ callq EXT(syscall_trace_enter)
+ popq %rax
+ popq %rdi
+ popq %rsi
+ popq %rdx
+ popq %rcx
+ popq %r8
+ popq %r9
+ movq %rax,%r12 /* save sysnum for tracing return value */
+0:
+#endif /* KERNEL_TRACE */
call *EXT(mach_trap_table)+8(%rax) /* call procedure */
+#if KERNEL_TRACE
+ testb $0xff,EXT(syscall_trace)
+ jz 1f
+ movq %rax,%rdi
+ callq syscall_trace_return
+ /* will return with return value still (or again) in eax */
+1:
+#endif /* KERNEL_TRACE */
_syscall64_check_for_ast:
/* Check for ast. */
--
2.39.2
- [PATCH 1/2] RFC enhance tracing utilities,
Luca Dariz <=