dotgnu-pnet
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Dotgnu-pnet] [patch #7620] Added support for the windows stack guard pa


From: Patrick van Beem
Subject: [Dotgnu-pnet] [patch #7620] Added support for the windows stack guard page in libjit.
Date: Wed, 21 Sep 2011 05:12:52 +0000
User-agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.220 Safari/535.1

URL:
  <http://savannah.gnu.org/patch/?7620>

                 Summary: Added support for the windows stack guard page in
libjit.
                 Project: DotGNU Portable.NET
            Submitted by: pvbeem
            Submitted on: Wed 21 Sep 2011 05:12:51 AM GMT
                Category: None
                Priority: 5 - Normal
                  Status: None
                 Privacy: Public
             Assigned to: None
        Originator Email: 
             Open/Closed: Open
         Discussion Lock: Any

    _______________________________________________________

Details:

In windows, the allocation of physical memory for the stack is controlled by a
guard page on the last not allocated page of the stack. Functions that use
more then 4K of stack, should access every page of the stack they need in the
correct order, to ensure the allocation of physical memory for the complete
stack they use. Failing to do so, results in an access violation exception.
Libjit did not do this. The changes below implement this. See also:
http://blogs.technet.com/b/markrussinovich/archive/2009/07/08/3261309.aspx

The fixes are below. I couldn't make a diff, because my previous patches are
already applied in this code, but not in the main trunk...

----

Fix for 32 bit (jit-rules-x86.c), chages are on and after the #ifdef WIN32
block:


void *_jit_gen_prolog(jit_gencode_t gen, jit_function_t func, void *buf)
{
        unsigned char prolog[JIT_PROLOG_SIZE];
        unsigned char *inst = prolog;
        int reg;

        /* Push ebp onto the stack */
        x86_push_reg(inst, X86_EBP);

        /* Initialize EBP for the current frame */
        x86_mov_reg_reg(inst, X86_EBP, X86_ESP, sizeof(void *));

        /* Allocate space for the local variable frame */
        if(func->builder->frame_size > 0)
        {
#ifdef WIN32 
                /* For windows: make sure we hit the guard page on the stack. 
Call the
routine below. 
                 * I coudn't get the compiler to omit the prologue on a 
function, so I
created the 
                 * function in-line in this function. This causes problems in 
unrolling the
stack when
                 * an exceptions occurs in this code. However, it's very 
unlikely that will
happen.
                 */
                if ( func->builder->frame_size >= 4096 )
                {
                        void* entrypoint;
                        __asm__ ( 
                                "jmp    .L3\n"
                                ".chkstk:\n\t"
                                "push    %%ecx\n\t"
                                "mov     %%esp,%%ecx\n\t"
                                "add     $8,%%ecx\n"
                                ".L1:\t"
                                "cmp     $0x1000,%%eax\n\t"
                                "jb      .L2\n\t"
                                "sub     $0x1000,%%ecx\n\t"
                                "or      $0,(%%ecx)\n\t"
                                "sub     $0x1000,%%eax\n\t"
                                "jmp     .L1\n"
                                ".L2:\t"
                                "sub     %%eax,%%ecx\n\t"
                                "or      $0,(%%ecx)\n\t"
                                "mov     %%esp,%%eax\n\t"
                                "mov     %%ecx,%%esp\n\t"
                                "mov     (%%eax),%%ecx\n\t"
                                "mov     4(%%eax),%%eax\n\t"
                                "jmp     %%eax\n\t"
                                ".L3:\t"
                                "lea     .chkstk,%0\n\t"
                                : "=r"(entrypoint)
                        );
                        x86_mov_reg_imm(inst, X86_EAX, 
func->builder->frame_size);
                        /* We generate in a temporary buffer, so relative calls 
can't be used (as
is). Use long calls.
                         * ECX is caller-saved, so we can use that one here.
                         */
                        x86_mov_reg_imm(inst, X86_ECX, entrypoint);
                        x86_call_reg(inst, X86_ECX);
                }
                else
#endif
                {
                        x86_alu_reg_imm(inst, X86_SUB, X86_ESP, 
(int)(func->builder->frame_size));
                }
        }

        /* Save registers that we need to preserve */
        for(reg = 0; reg <= 7; ++reg)
        {
                if(jit_reg_is_used(gen->touched, reg) &&
                   (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0)
                {
                        x86_push_reg(inst, _jit_reg_info[reg].cpu_reg);
                }
        }

        /* Copy the prolog into place and return the adjusted entry position */
        reg = (int)(inst - prolog);
        jit_memcpy(((unsigned char *)buf) + JIT_PROLOG_SIZE - reg, prolog, reg);
        return (void *)(((unsigned char *)buf) + JIT_PROLOG_SIZE - reg);
}


----

Fix for 64 bit (jit-rules-x86-64.c), changes are in the block with the inline
assembler within the #ifdef WIN64:


void *
_jit_gen_prolog(jit_gencode_t gen, jit_function_t func, void *buf)
{
        unsigned char prolog[JIT_PROLOG_SIZE];
        unsigned char *inst = prolog;
        int reg;
        int frame_size = 0;
        int regs_to_save = _jit_count_regs_saved(gen);  

#ifdef WIN64
        /* Bail out if there is insufficient space for the prolog unwind 
information.
*/
        #define UNWIND_CODE_COUNT 8
        _jit_cache_check_space(&gen->posn, sizeof(RUNTIME_FUNCTION) +
sizeof(UNWIND_INFO) + sizeof(UNWIND_CODE) * (UNWIND_CODE_COUNT - 1) + 4 );

        PRUNTIME_FUNCTION win64_runtime_function;
        PUNWIND_INFO win64_unwind_info;
        int unwindslots = 0;

        /* Create the WIN64 unwind information */
        func->win64_runtime_function_base = buf;
        win64_runtime_function = (PRUNTIME_FUNCTION)(((jit_nuint)gen->posn.ptr 
+ 3) &
~(jit_nuint)3);
        func->win64_runtime_function = win64_runtime_function;
        win64_runtime_function->EndAddress = (ULONG)((void*)gen->posn.ptr - 
buf);
        gen->posn.ptr = (unsigned char*)win64_runtime_function +
sizeof(RUNTIME_FUNCTION);
        win64_unwind_info = (PUNWIND_INFO)(gen->posn.ptr + sizeof(UNWIND_CODE) *
UNWIND_CODE_COUNT);
        gen->posn.ptr += sizeof(UNWIND_INFO) + sizeof(UNWIND_CODE) *
(UNWIND_CODE_COUNT - 1);
#endif
        
        /* Push ebp onto the stack */
        x86_64_push_reg_size(inst, X86_64_RBP, 8);

#ifdef WIN64
        /* Register this operation code in the prolog unwind table. */
        unwindslots++;
        win64_unwind_info = (void*)win64_unwind_info - sizeof(UNWIND_CODE);
        win64_unwind_info->UnwindCode[0].CodeOffset = (ULONG)(inst - prolog);
        win64_unwind_info->UnwindCode[0].UnwindOp = UWOP_PUSH_NONVOL;
        win64_unwind_info->UnwindCode[0].OpInfo = 5;
#endif
        
        /* Initialize EBP for the current frame */
        x86_64_mov_reg_reg_size(inst, X86_64_RBP, X86_64_RSP, 8);

        /* Allocate space for the local variable frame */
        if(func->builder->frame_size > 0)
        {
                /* Make sure that the framesize is a multiple of 8 bytes */
                frame_size = (func->builder->frame_size + 0x7) & ~0x7;
        }

        /* add the register save area to the initial frame size */
        frame_size += (regs_to_save << 3);

#ifdef JIT_USE_PARAM_AREA
        /* Add the param area to the frame_size if the additional offset
           doesnt cause the offsets in the register saves become 4 bytes */
        if(func->builder->param_area_size > 0 &&
           (func->builder->param_area_size <= 0x50 || regs_to_save == 0))
        {
                frame_size += func->builder->param_area_size;
        }
#endif /* JIT_USE_PARAM_AREA */

        /* Make sure that the framesize is a multiple of 16 bytes */
        /* so that the final RSP will be alligned on a 16byte boundary. */
        frame_size = (frame_size + 0xf) & ~0xf;

        if(frame_size > 0)
        {
#ifdef WIN64 
                /* For windows: make sure we hit the guard page on the stack. 
Call the
routine below. 
                 * I coudn't get the compiler to omit the prologue on a 
function, so I
created the 
                 * function in-line in this function. This causes problems in 
unrolling the
stack when
                 * an exceptions occurs in this code. However, it's very 
unlikely that will
happen.
                 */
                if ( frame_size >= 4096 )
                {
                        void* entrypoint;
                        __asm__ ( 
                                "jmp    .L3\n"
                                ".chkstk:\n\t"
                                "pushq   %%rcx\n\t"
                                "pushq   %%rax\n\t"
                                "cmpq    $0x1000, %%rax\n\t"
                                "leaq    0x18(%%rsp), %%rcx\n\t"
                                "jb      .L2\n"
                                ".L1:\t"
                                "subq    $0x1000,%%rcx\n\t"
                                "orq     $0,(%%rcx)\n\t"
                                "subq    $0x1000,%%rax\n\t"
                                "cmpq    $0x1000,%%rax\n\t"
                                "ja      .L1\n"
                                ".L2:\t"
                                "subq    %%rax, %%rcx\n\t"
                                "orq     $0,(%%rcx)\n\t"
                                "popq    %%rax\n\t"
                                "popq    %%rcx\n\t"
                                "ret\n"
                                ".L3:\t"
                                "leaq    .chkstk,%0\n\t"
                                : "=r"(entrypoint)
                        );
                        x86_64_mov_reg_imm_size(inst, X86_64_RAX, frame_size, 
8);
                        /* We generate in a temporary buffer, so relative calls 
can't be used (as
is). Use long calls. */
                        x86_64_mov_reg_imm_size(inst, X86_64_SCRATCH, 
(jit_nint)entrypoint, 8);
                        x86_64_call_reg(inst, X86_64_SCRATCH);
                }
#endif
                x86_64_sub_reg_imm_size(inst, X86_64_RSP, frame_size, 8);
#ifdef WIN64
                /* Register this operation code in the prolog unwind table. */
                if ( frame_size <= 128 )
                {
                        unwindslots++;
                        win64_unwind_info = (void*)win64_unwind_info - 
sizeof(UNWIND_CODE);
                        win64_unwind_info->UnwindCode[0].CodeOffset = 
(ULONG)(inst - prolog);
                        win64_unwind_info->UnwindCode[0].UnwindOp = 
UWOP_ALLOC_SMALL;
                        win64_unwind_info->UnwindCode[0].OpInfo = (frame_size / 
8) - 1;
                }
                else
                {
                        unwindslots++;
                        win64_unwind_info = (void*)win64_unwind_info - 
sizeof(UNWIND_CODE);
                        win64_unwind_info->UnwindCode[0].FrameOffset = 
frame_size / 8;
                        unwindslots++;
                        win64_unwind_info = (void*)win64_unwind_info - 
sizeof(UNWIND_CODE);
                        win64_unwind_info->UnwindCode[0].CodeOffset = 
(ULONG)(inst - prolog);
                        win64_unwind_info->UnwindCode[0].UnwindOp = 
UWOP_ALLOC_LARGE;
                        win64_unwind_info->UnwindCode[0].OpInfo = 0;
                }
#endif
        }

        if(regs_to_save > 0)
        {
                int current_offset;
#ifdef JIT_USE_PARAM_AREA
                if(func->builder->param_area_size > 0 &&
                   func->builder->param_area_size <= 0x50)
                {
                        current_offset = func->builder->param_area_size;
                }
                else
#endif /* JIT_USE_PARAM_AREA */
                {
                        current_offset = 0;
                }

                /* Save registers that we need to preserve */
                for(reg = 0; reg <= 14; ++reg)
                {
                        if(jit_reg_is_used(gen->touched, reg) &&
                           (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0)
                        {
                                x86_64_mov_membase_reg_size(inst, X86_64_RSP, 
current_offset,
                                                                                
        _jit_reg_info[reg].cpu_reg, 8);
                                current_offset += 8;
                        }
                }
        }
#ifdef JIT_USE_PARAM_AREA
        if(func->builder->param_area_size > 0x50 && regs_to_save > 0)
        {
                x86_64_sub_reg_imm_size(inst, X86_64_RSP, 
func->builder->param_area_size,
8);
#ifdef WIN64
                /* Register this operation code in the prolog unwind table. */
                unwindslots++;
                win64_unwind_info = (void*)win64_unwind_info - 
sizeof(UNWIND_CODE);
                win64_unwind_info->UnwindCode[0].FrameOffset =
func->builder->param_area_size / 8;
                unwindslots++;
                win64_unwind_info = (void*)win64_unwind_info - 
sizeof(UNWIND_CODE);
                win64_unwind_info->UnwindCode[0].CodeOffset = (ULONG)(inst - 
prolog);
                win64_unwind_info->UnwindCode[0].UnwindOp = UWOP_ALLOC_LARGE;
                win64_unwind_info->UnwindCode[0].OpInfo = 0;
#endif // WIN64
        }
#endif /* JIT_USE_PARAM_AREA */

        /* Copy the prolog into place and return the adjusted entry position */
        reg = (int)(inst - prolog);
        jit_memcpy(((unsigned char *)buf) + JIT_PROLOG_SIZE - reg, prolog, reg);

#ifdef WIN64
        /* Register stack unwinding information that is ready to be used by
RtlAddFunctionTable */
        win64_runtime_function->BeginAddress = JIT_PROLOG_SIZE - reg;
        win64_unwind_info->Version = 1;
        win64_unwind_info->Flags = 0;
        win64_unwind_info->CountOfCodes = unwindslots;
        win64_unwind_info->FrameRegister = 0;
        win64_unwind_info->FrameOffset = 0;
        win64_unwind_info->SizeOfProlog = reg;
        win64_runtime_function->UnwindData = (ULONG)((void*)win64_unwind_info -
buf);
#endif

        return (void *)(((unsigned char *)buf) + JIT_PROLOG_SIZE - reg);
}

void
_jit_gen_epilog(jit_gencode_t gen, jit_function_t func)
{
        unsigned char *inst;
        int reg;
        int current_offset;
        jit_int *fixup;
        jit_int *next;

        /* Bail out if there is insufficient space for the epilog */
        _jit_cache_check_space(&gen->posn, 256); // Was 48, 76 needed for FAA, 
256 to
be safe...

        inst = gen->posn.ptr;

        /* Perform fixups on any blocks that jump to the epilog */
        fixup = (jit_int *)(gen->epilog_fixup);
        while(fixup != 0)
        {
                if(DEBUG_FIXUPS)
                {
                        fprintf(stderr, "Fixup Address: %lx, Value: %x\n",
                                        (jit_nint)fixup, fixup[0]);
                }
                next = (jit_int *)_JIT_CALC_NEXT_FIXUP(fixup, fixup[0]);
                fixup[0] = (jit_int)(((jit_nint)inst) - ((jit_nint)fixup) - 4);
                fixup = next;
        }
        gen->epilog_fixup = 0;

        /* Perform fixups on any alloca calls */
        fixup = (jit_int *)(gen->alloca_fixup);
        while (fixup != 0)
        {
                next = (jit_int *)_JIT_CALC_NEXT_FIXUP(fixup, fixup[0]);
                fixup[0] = func->builder->param_area_size;
                if(DEBUG_FIXUPS)
                {
                        fprintf(stderr, "Fixup Param Area Size: %lx, Value: 
%x\n",
                                        (jit_nint)fixup, fixup[0]);
                }
                fixup = next;
        }
        gen->alloca_fixup = 0;

        /* Restore the used callee saved registers */
        if(gen->stack_changed)
        {
                int frame_size = func->builder->frame_size;
                int regs_saved = _jit_count_regs_saved(gen);

                /* add the register save area to the initial frame size */
                frame_size += (regs_saved << 3);

                /* Make sure that the framesize is a multiple of 16 bytes */
                /* so that the final RSP will be alligned on a 16byte boundary. 
*/
                frame_size = (frame_size + 0xf) & ~0xf;
                
                current_offset = -frame_size;

                for(reg = 0; reg <= 14; ++reg)
                {
                        if(jit_reg_is_used(gen->touched, reg) &&
                           (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0)
                        {
                                x86_64_mov_reg_membase_size(inst, 
_jit_reg_info[reg].cpu_reg,
                                                            X86_64_RBP, 
current_offset, 8);
                                current_offset += 8;
                        }
                }
        }
        else
        {
#ifdef JIT_USE_PARAM_AREA
                if(func->builder->param_area_size > 0)
                {
                        current_offset = func->builder->param_area_size;
                }
                else
                {
                        current_offset = 0;
                }
#else /* !JIT_USE_PARAM_AREA */
                current_offset = 0;
#endif /* !JIT_USE_PARAM_AREA */
                for(reg = 0; reg <= 14; ++reg)
                {
                        if(jit_reg_is_used(gen->touched, reg) &&
                           (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0)
                        {
                                x86_64_mov_reg_membase_size(inst, 
_jit_reg_info[reg].cpu_reg,
                                                            X86_64_RSP, 
current_offset, 8);
                                current_offset += 8;
                        }
                }
        }

        /* Restore stackpointer and frame register */
        x86_64_mov_reg_reg_size(inst, X86_64_RSP, X86_64_RBP, 8);
        x86_64_pop_reg_size(inst, X86_64_RBP, 8);

        /* and return */
        x86_64_ret(inst);

        gen->posn.ptr = inst;
}





    _______________________________________________________

Reply to this item at:

  <http://savannah.gnu.org/patch/?7620>

_______________________________________________
  Message sent via/by Savannah
  http://savannah.gnu.org/




reply via email to

[Prev in Thread] Current Thread [Next in Thread]