[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH gnumach 1/2] yyy sysenter prototype
From: |
Justus Winter |
Subject: |
[PATCH gnumach 1/2] yyy sysenter prototype |
Date: |
Wed, 6 May 2015 13:30:20 +0200 |
---
i386/Makefrag.am | 2 +
i386/i386/gdt.c | 17 ++++
i386/i386/gdt.h | 7 +-
i386/i386/i386asm.sym | 1 +
i386/i386/locore.S | 224 ++++++++++++++++++++++++++++++++++++++++++++++++
i386/i386/pcb.c | 24 +++---
i386/i386/syscall.c | 103 ++++++++++++++++++++++
i386/i386/syscall.h | 7 ++
i386/i386/tss.h | 1 +
i386/i386at/conf.c | 8 ++
i386/i386at/model_dep.c | 2 +
11 files changed, 383 insertions(+), 13 deletions(-)
create mode 100644 i386/i386/syscall.c
create mode 100644 i386/i386/syscall.h
diff --git a/i386/Makefrag.am b/i386/Makefrag.am
index 4dd6a9f..f59ac29 100644
--- a/i386/Makefrag.am
+++ b/i386/Makefrag.am
@@ -147,6 +147,8 @@ libkernel_a_SOURCES += \
i386/i386/setjmp.h \
i386/i386/spl.S \
i386/i386/spl.h \
+ i386/i386/syscall.c \
+ i386/i386/syscall.h \
i386/i386/task.h \
i386/i386/thread.h \
i386/i386/time_stamp.h \
diff --git a/i386/i386/gdt.c b/i386/i386/gdt.c
index c895eb3..0f9d0e3 100644
--- a/i386/i386/gdt.c
+++ b/i386/i386/gdt.c
@@ -57,6 +57,23 @@ gdt_init(void)
LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
LINEAR_MAX_KERNEL_ADDRESS -
(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1,
ACC_PL_K|ACC_DATA_W, SZ_32);
+ fill_gdt_descriptor(KERNEL_ENTER_CS,
+ LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
+ LINEAR_MAX_KERNEL_ADDRESS -
(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1,
+ ACC_PL_K|ACC_CODE_R, SZ_32);
+ fill_gdt_descriptor(KERNEL_ENTER_DS,
+ LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
+ LINEAR_MAX_KERNEL_ADDRESS -
(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1,
+ ACC_PL_K|ACC_DATA_W, SZ_32);
+ fill_gdt_descriptor(USER_EXIT_CS,
+ VM_MIN_ADDRESS,
+ VM_MAX_ADDRESS-VM_MIN_ADDRESS-4096,
+ /* XXX LINEAR_... */
+ ACC_PL_U|ACC_CODE_R, SZ_32);
+ fill_gdt_descriptor(USER_EXIT_DS,
+ VM_MIN_ADDRESS,
+ VM_MAX_ADDRESS-VM_MIN_ADDRESS-4096,
+ ACC_PL_U|ACC_DATA_W, SZ_32);
#ifndef MACH_PV_DESCRIPTORS
fill_gdt_descriptor(LINEAR_DS,
0,
diff --git a/i386/i386/gdt.h b/i386/i386/gdt.h
index d865640..37ca6f5 100644
--- a/i386/i386/gdt.h
+++ b/i386/i386/gdt.h
@@ -55,7 +55,12 @@
#define USER_GDT 0x48 /* user-defined GDT entries */
#define USER_GDT_SLOTS 2
-#define GDTSZ (USER_GDT/8 + USER_GDT_SLOTS)
+#define KERNEL_ENTER_CS (0x58 | SEL_PL_K) /* kernel code
*/
+#define KERNEL_ENTER_DS (0x60 | SEL_PL_K) /* kernel data
*/
+#define USER_EXIT_CS (0x68 | SEL_PL_U) /* user code */
+#define USER_EXIT_DS (0x70 | SEL_PL_U) /* user data */
+
+#define GDTSZ (USER_EXIT_DS/8 + 1)
extern struct real_descriptor gdt[GDTSZ];
diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym
index dd1a2ed..e495d1a 100644
--- a/i386/i386/i386asm.sym
+++ b/i386/i386/i386asm.sym
@@ -70,6 +70,7 @@ size i386_kernel_state iks
size i386_exception_link iel
+size i386_saved_state iss
offset i386_saved_state r cs
offset i386_saved_state r uesp
offset i386_saved_state r eax
diff --git a/i386/i386/locore.S b/i386/i386/locore.S
index cfda86f..d8241a7 100644
--- a/i386/i386/locore.S
+++ b/i386/i386/locore.S
@@ -521,6 +521,9 @@ _return_to_user:
*/
_return_from_kernel:
+ cmpl $0x7fffffff, R_TRAPNO(%esp) /* YYY */
+ je return_from_sysenter
+
_kret_popl_gs:
popl %gs /* restore segment registers */
_kret_popl_fs:
@@ -978,6 +981,18 @@ ttd_from_iret_i: /* on interrupt stack */
#endif /* MACH_TTD */
+/* User stub for calling the kernel using the trap gate. */
+ .globl user_trapgate_stub_start
+user_trapgate_stub_start:
+ popl %ecx /* Pop return address into %ecx. */
+ popl %eax /* Pop syscall number into %eax. */
+ pushl %ecx /* Push back return address. */
+ lcall $7, $0
+ subl $4, %esp /* magic */
+ ret
+ .globl user_trapgate_stub_end
+user_trapgate_stub_end:
+
/*
* System call enters through a call gate. Flags are not saved -
* we must shuffle stack to look like trap save area.
@@ -1170,6 +1185,215 @@ syscall_addr:
/* set error code - read user space */
jmp _take_trap /* treat as a trap */
+/*
+ * SYSENTER-based system calls.
+ *
+ * Calling convention:
+ * %eax - syscall number
+ * %ebx - syscall argument 1
+ * %ecx - syscall argument 2
+ * %edx - syscall argument 3
+ * %esi - syscall argument 4
+ * %edi - userspace return address
+ * %ebp - userspace stack pointer
+ */
+
+/* User stub for calling the kernel using the sysenter instruction. */
+ .globl user_sysenter_stub_start
+user_sysenter_stub_start:
+ push %ebp
+ mov %esp, %ebp
+ pushf
+ push %ebx /* Store callee-saved registers. */
+ push %esi
+ push %edi
+ mov 8(%ebp), %eax /* Move syscall number into %eax. */
+ mov 12(%ebp), %ebx /* Move first argument into %ebx. */
+ mov 16(%ebp), %ecx /* Move second argument into %ecx. */
+ mov 20(%ebp), %edx /* Move third argument into %edx. */
+ mov 24(%ebp), %esi /* Move fourth argument into %esi. */
+ call get_ip /* compute location of sysexit */
+get_ip: pop %edi /* load current ip */
+ add $8, %edi /* userspace return address */
+ movl %esp, %ebp /* userspace stack pointer */
+ sysenter
+ pop %edi
+ pop %esi
+ pop %ebx
+ popf
+ pop %ebp
+ ret
+ .globl user_sysenter_stub_end
+user_sysenter_stub_end:
+
+/*
+ * SYSENTER entry point.
+ *
+ * Control enters at `sysenter_entry' with %esp pointing to the
+ * per-cpu sysenter stack. We store all arguments here. We keep %ebp
+ * pointing to the top of this structure to copy the arguments.
+
+ *
+ * sysenter stack layout:
+ *
+ * sysenter stack base -> EAX
+ * ECX
+ * EDX
+ * EBX
+ * ESP
+ * EBP
+ * ESI
+ * EDI
+ * DS
+ * ES
+ * FS
+ * %ebp -> GS
+ */
+/* Offsets from %ebp */
+#define SE_EAX (4 * 11)
+#define SE_ECX (4 * 10)
+#define SE_EDX (4 * 9)
+#define SE_EBX (4 * 8)
+#define SE_ESX (4 * 7)
+#define SE_EBP (4 * 6)
+#define SE_ESI (4 * 5)
+#define SE_EDI (4 * 4)
+#define SE_DS (4 * 3)
+#define SE_ES (4 * 2)
+#define SE_FS (4 * 1)
+#define SE_GS (4 * 0)
+#define SE_STACK_POINTER SE_EBP
+#define SE_RETURN_ADDRESS SE_EDI
+
+#define SE_USER_SKIP 20 /* skip past the scratchpad */
+
+ENTRY(sysenter_entry)
+ pusha /* save all registers */
+ cld /* clear direction flag */
+ pushl %ds /* save the segment registers */
+ pushl %es
+ pushl %fs
+ pushl %gs
+ mov %esp, %ebp /* to access the sysenter stack */
+
+ mov %ss,%cx /* switch to kernel data segment */
+ mov %cx,%ds
+ mov %cx,%es
+ mov %cx,%fs
+ mov %cx,%gs
+
+ CPU_NUMBER(%edx)
+ movl CX(EXT(kernel_stack),%edx),%ebx
+ /* get current kernel stack */
+ movl %ebx, %ecx
+ or $(KERNEL_STACK_SIZE-1),%ecx
+ movl -3-IKS_SIZE(%ecx), %esp /* switch to PCB stack */
+ addl $(ISS_SIZE - 16 /* vm86 */ - 6 * 4 /* unused */), %esp
+ /* point to trap number */
+
+ /* Populate trap save area. */
+ pushl $0x7fffffff /* trap number */
+ pushl %eax /* %eax: unused */
+ pushl SE_STACK_POINTER(%ebp) /* %ecx: for sysexit */
+ pushl SE_RETURN_ADDRESS(%ebp) /* %edx: for sysexit */
+ subl $(5 * 4), %esp /* unused */
+ pushl SE_DS(%ebp) /* copy the segment registers */
+ pushl SE_ES(%ebp)
+ pushl SE_FS(%ebp)
+ pushl SE_GS(%ebp)
+
+ xchgl %ebx, %esp /* switch to kernel stack */
+ /* %ebx points to user registers */
+ negl %eax /* get system call number */
+ jl sysenter_mach_call_range
+ /* out of range if it was positive */
+ cmpl EXT(mach_trap_count),%eax
+ /* check system call table bounds */
+ jg sysenter_mach_call_range
+ /* error if out of range */
+
+ shll $4,%eax /* manual indexing */
+ movl EXT(mach_trap_table)(%eax),%ecx
+ /* get number of arguments */
+
+ cmp $4, %ecx
+ ja se_args_5plus
+ je se_args_4
+ cmp $2, %ecx
+ ja se_args_3
+ je se_args_2
+ cmp $1, %ecx
+ je se_args_1
+ jmp se_args_0
+
+se_args_5plus:
+
+ sub $4, %ecx /* skip the four first arguments */
+ movl SE_STACK_POINTER(%ebp), %esi
+ /* get user stack pointer */
+ lea (4 /* skip user return address */\
+ +4 /* point past last argument */\
+ +16 /* skip register arguments */\
+ +SE_USER_SKIP)(%esi,%ecx,4),%esi
+ /* and skip past the userspace
+ local storage */
+
+ movl $USER_DS,%edx /* use user data segment for accesses */
+ mov %dx,%fs
+ movl %esp,%edx /* save kernel ESP for error recovery */
+
+0: subl $4,%esi
+ RECOVER(sysenter_mach_call_addr_push)
+ pushl %fs:(%esi) /* push argument on stack */
+ loop 0b /* loop for all arguments */
+
+se_args_4:
+ push SE_ESI(%ebp) /* push fourth argument */
+se_args_3:
+ push SE_EDX(%ebp) /* push third argument */
+se_args_2:
+ push SE_ECX(%ebp) /* push second argument */
+se_args_1:
+ push SE_EBX(%ebp) /* push first argument */
+se_args_0:
+ sti /* xxx: sti/cli where ? */
+ call *EXT(mach_trap_table)+4(%eax)
+ /* call procedure */
+ cli /* xxx: sti/cli where ? */
+ movl %ebx, %esp /* switch to pcb stack */
+ movl %eax, R_EAX(%esp) /* save return value */
+ jmp _return_from_trap /* check for AST, then... */
+return_from_sysenter: /* return here */
+ popl %gs /* restore segment registers */
+ popl %fs
+ popl %es
+ popl %ds
+ popa
+ sti /* xxx: sti/cli where ? */
+ sysexit
+
+/*
+ * Address out of range. Change to page fault.
+ * %esi holds failing address.
+ */
+sysenter_mach_call_addr_push:
+ movl %edx,%esp /* clean parameters from stack */
+ movl %esi,R_CR2(%ebx) /* set fault address */
+ movl $(T_PAGE_FAULT),R_TRAPNO(%ebx)
+ /* set page-fault trap */
+ movl $(T_PF_USER),R_ERR(%ebx)
+ /* set error code - read user space */
+ jmp _take_trap /* treat as a trap */
+
+/*
+ * System call out of range. Treat as invalid-instruction trap.
+ * (? general protection?)
+ */
+sysenter_mach_call_range:
+ movl $(T_INVALID_OPCODE),R_TRAPNO(%ebx)
+ /* set invalid-operation trap */
+ movl $0,R_ERR(%ebx) /* clear error code */
+ jmp _take_trap /* treat as a trap */
.data
DATA(cpu_features)
diff --git a/i386/i386/pcb.c b/i386/i386/pcb.c
index e8040c8..2da3804 100644
--- a/i386/i386/pcb.c
+++ b/i386/i386/pcb.c
@@ -391,12 +391,12 @@ void pcb_init(thread_t thread)
* Guarantee that the bootstrapped thread will be in user
* mode.
*/
- pcb->iss.cs = USER_CS;
- pcb->iss.ss = USER_DS;
- pcb->iss.ds = USER_DS;
- pcb->iss.es = USER_DS;
- pcb->iss.fs = USER_DS;
- pcb->iss.gs = USER_DS;
+ pcb->iss.cs = USER_EXIT_CS;
+ pcb->iss.ss = USER_EXIT_DS;
+ pcb->iss.ds = USER_EXIT_DS;
+ pcb->iss.es = USER_EXIT_DS;
+ pcb->iss.fs = USER_EXIT_DS;
+ pcb->iss.gs = USER_EXIT_DS;
pcb->iss.efl = EFL_USER_SET;
thread->pcb = pcb;
@@ -524,12 +524,12 @@ kern_return_t thread_setstatus(
* 386 mode. Set segment registers for flat
* 32-bit address space.
*/
- saved_state->cs = USER_CS;
- saved_state->ss = USER_DS;
- saved_state->ds = USER_DS;
- saved_state->es = USER_DS;
- saved_state->fs = USER_DS;
- saved_state->gs = USER_DS;
+ saved_state->cs = USER_EXIT_CS;
+ saved_state->ss = USER_EXIT_DS;
+ saved_state->ds = USER_EXIT_DS;
+ saved_state->es = USER_EXIT_DS;
+ saved_state->fs = USER_EXIT_DS;
+ saved_state->gs = USER_EXIT_DS;
}
else {
/*
diff --git a/i386/i386/syscall.c b/i386/i386/syscall.c
new file mode 100644
index 0000000..e9b17d0
--- /dev/null
+++ b/i386/i386/syscall.c
@@ -0,0 +1,103 @@
+#include <mach/vm_param.h>
+#include <mach/vm_prot.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <string.h>
+#include <kern/debug.h>
+
+#include <machine/tss.h>
+#include <i386/i386/ktss.h>
+#include <i386/i386/gdt.h>
+#include <i386/i386/locore.h>
+
+#include "syscall.h"
+
+#include <kern/printf.h> // xxx
+
+static vm_offset_t msyscall = 0;
+
+void user_trapgate_stub_start();
+void user_trapgate_stub_end();
+
+void user_sysenter_stub_start();
+void user_sysenter_stub_end();
+
+void
+syscall_init(void)
+{
+ kern_return_t kr;
+ vm_offset_t user_stub_start;
+ vm_offset_t user_stub_end;
+
+ kr = kmem_alloc_wired(kernel_map, &msyscall, PAGE_SIZE);
+ if (kr != KERN_SUCCESS)
+ panic("syscall_init");
+
+ memset((void *) msyscall, 0, PAGE_SIZE);
+
+ if (CPU_HAS_FEATURE (CPU_FEATURE_SEP)) {
+ printf ("syscall: using SYSENTER/SYSEXIT\n");
+ user_stub_start = (vm_offset_t) user_sysenter_stub_start;
+ user_stub_end = (vm_offset_t) user_sysenter_stub_end;
+ } else {
+ printf ("syscall: using trap gate\n");
+ user_stub_start = (vm_offset_t) user_trapgate_stub_start;
+ user_stub_end = (vm_offset_t) user_trapgate_stub_end;
+ }
+
+ memcpy((void *) msyscall, (void *) user_stub_start,
+ (size_t) (user_stub_end - user_stub_start));
+
+ syscall_init_cpu();
+}
+
+static void
+wrmsr(unsigned int msr, unsigned long long val)
+{
+ __asm__ __volatile__("wrmsr"
+ : /* no Outputs */
+ : "c" (msr), "A" (val));
+}
+
+#define MSR_IA32_SYSENTER_CS 0x00000174
+#define MSR_IA32_SYSENTER_ESP 0x00000175
+#define MSR_IA32_SYSENTER_EIP 0x00000176
+
+extern void sysenter_entry(void);
+
+void
+syscall_init_cpu(void)
+{
+ if (! CPU_HAS_FEATURE (CPU_FEATURE_SEP))
+ return;
+
+ //struct task_tss *tss = curr_ktss (cpu_number ());
+ struct task_tss *tss = &ktss;
+
+ wrmsr(MSR_IA32_SYSENTER_CS, KERNEL_ENTER_CS);
+ wrmsr(MSR_IA32_SYSENTER_ESP,
+ (unsigned long) tss->sysenter_stack + sizeof tss->sysenter_stack);
+ wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry);
+}
+
+int
+syscall_open(dev_t dev, int flag, io_req_t ior)
+{
+ return 0;
+}
+
+void
+syscall_close(dev_t dev, int flag)
+{
+ return;
+}
+
+int
+syscall_mmap(dev_t dev, vm_offset_t off, vm_prot_t prot)
+{
+ if (prot & VM_PROT_WRITE)
+ return (-1);
+
+ return (i386_btop(pmap_extract(pmap_kernel(),
+ (vm_offset_t) msyscall)));
+}
diff --git a/i386/i386/syscall.h b/i386/i386/syscall.h
new file mode 100644
index 0000000..de9670c
--- /dev/null
+++ b/i386/i386/syscall.h
@@ -0,0 +1,7 @@
+// XXX
+
+void syscall_init(void);
+void syscall_init_cpu(void);
+int syscall_open(dev_t dev, int flag, io_req_t ior);
+void syscall_close(dev_t dev, int flag);
+int syscall_mmap(dev_t dev, vm_offset_t off, vm_prot_t prot);
diff --git a/i386/i386/tss.h b/i386/i386/tss.h
index ff25f21..8c939c7 100644
--- a/i386/i386/tss.h
+++ b/i386/i386/tss.h
@@ -76,6 +76,7 @@ struct task_tss
struct i386_tss tss;
unsigned char iopb[IOPB_BYTES];
unsigned char barrier;
+ unsigned long sysenter_stack[64]; /* xxx */
};
diff --git a/i386/i386at/conf.c b/i386/i386at/conf.c
index ab4f680..d7f9e6f 100644
--- a/i386/i386at/conf.c
+++ b/i386/i386at/conf.c
@@ -68,6 +68,9 @@
#define hypcnname "hyp"
#endif /* MACH_HYP */
+#include <i386/syscall.h>
+#define syscall_name "syscall"
+
/*
* List of devices - console must be at slot 0
*/
@@ -143,6 +146,11 @@ struct dev_ops dev_name_list[] =
nodev },
#endif /* MACH_HYP */
+ { syscall_name, syscall_open, syscall_close, nulldev_read,
+ nulldev_write, nulldev_getstat, nulldev_setstat,
+ syscall_mmap,
+ nodev, nulldev, nulldev_portdeath, 0,
+ nodev },
};
int dev_name_count = sizeof(dev_name_list)/sizeof(dev_name_list[0]);
diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c
index bc34c9b..210e54d 100644
--- a/i386/i386at/model_dep.c
+++ b/i386/i386at/model_dep.c
@@ -63,6 +63,7 @@
#include <i386/proc_reg.h>
#include <i386/locore.h>
#include <i386/model_dep.h>
+#include <i386/syscall.h>
#include <i386at/autoconf.h>
#include <i386at/idt.h>
#include <i386at/int_init.h>
@@ -197,6 +198,7 @@ void machine_init(void)
*/
pmap_unmap_page_zero();
#endif
+ syscall_init();
}
/* Conserve power on processor CPU. */
--
2.1.4