bug-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 5/5] x86_64: add 64-bit syscall entry point


From: Samuel Thibault
Subject: Re: [PATCH 5/5] x86_64: add 64-bit syscall entry point
Date: Mon, 27 Feb 2023 23:02:13 +0100
User-agent: NeoMutt/20170609 (1.8.3)

Luca Dariz, le lun. 27 févr. 2023 21:45:01 +0100, a ecrit:
> diff --git a/i386/i386/ldt.h b/i386/i386/ldt.h
> index b15f11a5..4490f99f 100644
> --- a/i386/i386/ldt.h
> +++ b/i386/i386/ldt.h
> @@ -45,9 +45,14 @@
>  #define      USER_SCALL      0x07            /* system call gate */
>  #ifdef __x86_64__
>  /* Call gate needs two entries */
> -#endif
> +
> +/* The sysret instruction puts some constraints on the user segment indexes 
> */
> +#define      USER_CS         0x1f            /* user code segment */
> +#define      USER_DS         0x17            /* user data segment */

I'd say we'd rather avoid changing them for the x86_64 && USER32 case?

> +#else
>  #define      USER_CS         0x17            /* user code segment */
>  #define      USER_DS         0x1f            /* user data segment */
> +#endif
>  
>  #define      LDTSZ           4
>  
> diff --git a/i386/include/mach/i386/syscall_sw.h 
> b/i386/include/mach/i386/syscall_sw.h
> index 86f6ff2f..20ef7c13 100644
> --- a/i386/include/mach/i386/syscall_sw.h
> +++ b/i386/include/mach/i386/syscall_sw.h
> @@ -29,16 +29,16 @@
>  
>  #include <mach/machine/asm.h>
>  
> -#if BSD_TRAP
> -#define kernel_trap(trap_name,trap_number,number_args) \
> -ENTRY(trap_name) \
> -     movl    $ trap_number,%eax; \
> -     SVC; \
> -     jb LCL(cerror); \
> -     ret; \
> +#if defined(__x86_64__) && ! defined(USER32)
> +#define kernel_trap(trap_name,trap_number,number_args)  \
> +ENTRY(trap_name)                                     \
> +     movq    $ trap_number,%rax;                     \

> +     movq    %rcx,%r10;                              \

What is that for?

> +     syscall;                                        \
> +     ret;                                            \
>  END(trap_name)
>  #else
> -#define kernel_trap(trap_name,trap_number,number_args) \
> +#define kernel_trap(trap_name,trap_number,number_args)  \
>  ENTRY(trap_name) \
>       movl    $ trap_number,%eax; \
>       SVC; \
> diff --git a/x86_64/locore.S b/x86_64/locore.S
> index 47d9085c..fdf7300b 100644
> --- a/x86_64/locore.S
> +++ b/x86_64/locore.S
> @@ -1281,6 +1281,142 @@ DATA(cpu_features_ecx)
>  
>  END(syscall)
>  
> +
> +/* Entry point for 64-bit syscalls.
> + * On entry we're still on the user stack, so better not use it. Instead we
> + * save the thread state immediately in thread->pcb->iss, then try to invoke
> + * the syscall.
> + * TODO:
> +     - for now we assume the return address is canonical, but apparently 
> there
> +       can be cases where it's not (see how Linux handles this). Does it 
> apply
> +       here?
> +     - do we need to check for ast on syscalls? Maybe on interrupts is enough
> +     - check that the case where a task is suspended, and later returns via
> +       iretq from return_from_trap, works fine in all combinations
> +     - emulated syscalls - are they used anywhere?

Not that I know of.

> + */
> +ENTRY(syscall64)
> +     /* RFLAGS[32:63] are reserved, so combine syscall num (32 bit) and
> +      * eflags in RAX to allow using r11 as temporary register */
> +     shlq    $32,%r11
> +     shlq    $32,%rax        /* make sure bits 32:63 of %rax are zero */
> +     shrq    $32,%rax
> +     or      %r11,%rax
> +
> +     /* Save thread state in pcb->iss, as on exception entry.
> +      * Since this is triggered synchronously from userspace, we can
> +      * save only the callee-preserved status according to the C ABI,
> +      * plus RIP and EFLAGS for sysret */
> +     CPU_NUMBER(%r11)
> +     movq    CX(EXT(active_threads),%r11),%r11 /* point to current thread */
> +     movq    TH_PCB(%r11),%r11               /* point to pcb */
> +     addq    $ PCB_ISS,%r11                  /* point to saved state */
> +
> +     mov     %gs,R_GS(%r11)
> +     mov     %fs,R_FS(%r11)
> +     mov     %rsp,R_UESP(%r11)       /* callee-preserved register */
> +     mov     %rcx,R_EIP(%r11)        /* syscall places user RIP in RCX */
> +     mov     %rbx,R_EBX(%r11)        /* callee-preserved register */
> +     mov     %rax,%rbx               /* Now we can unpack eflags again */
> +     shr     $32,%rbx
> +     mov     %rbx,R_EFLAGS(%r11)     /* ... and save them in pcb as well */
> +     mov     %rbp,R_EBP(%r11)        /* callee-preserved register */
> +     mov     %r12,R_R12(%r11)        /* callee-preserved register */
> +     mov     %r13,R_R13(%r11)        /* callee-preserved register */
> +     mov     %r14,R_R14(%r11)        /* callee-preserved register */
> +     mov     %r15,R_R15(%r11)        /* callee-preserved register */
> +     mov     %r11,%rbx               /* prepare for error handling */
> +     mov     %r10,%rcx               /* fix arg3 location according to C ABI 
> */
> +
> +     /* switch to kernel stack */
> +     CPU_NUMBER(%r11)
> +     movq    CX(EXT(kernel_stack),%r11),%rsp
> +
> +     /* Now we have saved state and args 1-6 are in place.
> +      * Before invoking the syscall we do some bound checking and,
> +      * if we have more that 6 arguments, we need to copy the
> +      * remaining ones to the kernel stack, handling page faults when
> +      * accessing the user stack.
> +      */
> +     shlq    $32,%rax                /* make sure bits 32:63 of %rax are 
> zero */
> +     shrq    $32,%rax
> +     negl    %eax                    /* get system call number */
> +     jl      _syscall64_range        /* out of range if it was positive */
> +     cmpl    EXT(mach_trap_count),%eax       /* check system call table 
> bounds */
> +     jg      _syscall64_range        /* error if out of range */
> +     shll    $5,%eax                 /* manual indexing of mach_trap_t */
> +
> +     /* check if we need to place some arguments on the stack */
> +_syscall64_args_stack:
> +     mov     EXT(mach_trap_table)(%rax),%r10 /* get number of arguments */
> +     subq    $6,%r10                 /* the first 6 args are already in 
> place */
> +     jl      _syscall64_call         /* skip argument copy if >6 args */

jle?

> +
> +     movq    R_UESP(%rbx),%r11       /* get user stack pointer */
> +     addq    $8,%r11                 /* Skip user return address */
> +
> +     mov     $USER_DS,%r12           /* use user data segment for accesses */
> +     mov     %r12,%fs
> +
> +     lea     (%r11,%r10,8),%r11      /* point past last argument */

> +     xorq    %r12,%r12

Why clearing it?

> +0:   subq    $8,%r11
> +     RECOVER(_syscall64_addr_push)
> +     mov     %fs:(%r11),%r12
> +     pushq   %r12                    /* push argument on stack */
> +     dec     %r10
> +     jnz     0b                      /* loop for all remaining arguments */
> +
> +_syscall64_call:
> +     call    *EXT(mach_trap_table)+8(%rax)  /* call procedure */
> +     // XXX: check ast on exit?
> +
> +     /* avoid leaking information in callee-clobbered registers */
> +     mov     $0,%rdi

Rather xorq?

> +     mov     $0,%rsi
> +     mov     $0,%rdx
> +     mov     $0,%r10
> +     mov     $0,%r9
> +     mov     $0,%r8
> +
> +     /* restore thread state and return to user using sysret */
> +     CPU_NUMBER(%r11)
> +     movq    CX(EXT(active_threads),%r11),%r11 /* point to current thread */
> +     movq    TH_PCB(%r11),%r11               /* point to pcb */
> +     addq    $ PCB_ISS,%r11                  /* point to saved state */
> +
> +     mov     R_GS(%r11),%gs
> +     mov     R_FS(%r11),%fs
> +     mov     R_UESP(%r11),%rsp       /* callee-preserved register,
> +                                      * switch to user stack */
> +     mov     R_EIP(%r11),%rcx        /* sysret convention */
> +     mov     R_EBX(%r11),%rbx        /* callee-preserved register */
> +     mov     R_EBP(%r11),%rbp        /* callee-preserved register */
> +     mov     R_R12(%r11),%r12        /* callee-preserved register */
> +     mov     R_R13(%r11),%r13        /* callee-preserved register */
> +     mov     R_R14(%r11),%r14        /* callee-preserved register */
> +     mov     R_R15(%r11),%r15        /* callee-preserved register */
> +     mov     R_EFLAGS(%r11),%r11     /* sysret convention */
> +
> +     sysretq         /* fast return to user-space, the thread didn't block */
> +
> +/* Error handling fragments, from here we jump directly to the trap handler 
> */
> +_syscall64_addr_push:
> +     movq    %rbx,%rsp               /* clean parameters from stack */
> +     movq    %r11,R_CR2(%rbx)        /* set fault address */
> +     movq    $(T_PAGE_FAULT),R_TRAPNO(%rbx)  /* set page-fault trap */
> +     movq    $(T_PF_USER),R_ERR(%rbx) /* set error code - read user space */
> +     jmp     _take_trap              /* treat as a trap */
> +
> +_syscall64_range:
> +     movq    $(T_INVALID_OPCODE),R_TRAPNO(%rbx)
> +                                     /* set invalid-operation trap */
> +     movq    $0,R_ERR(%rbx)          /* clear error code */
> +     jmp     _take_trap              /* treat as a trap */
> +
> +END(syscall64)
> +
>  /* Discover what kind of cpu we have; return the family number
>     (3, 4, 5, 6, for 386, 486, 586, 686 respectively).  */
>  ENTRY(discover_x86_cpu_type)
> -- 
> 2.30.2
> 
> 

-- 
Samuel
---
Pour une évaluation indépendante, transparente et rigoureuse !
Je soutiens la Commission d'Évaluation de l'Inria.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]