[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 12/12] i386: Refactor int stacks to be per cpu for SMP
From: |
Samuel Thibault |
Subject: |
Re: [PATCH 12/12] i386: Refactor int stacks to be per cpu for SMP |
Date: |
Wed, 26 Oct 2022 01:25:59 +0200 |
User-agent: |
NeoMutt/20170609 (1.8.3) |
Damien Zammit, le mar. 25 oct. 2022 10:56:44 +0000, a ecrit:
> diff --git a/i386/i386/cpu_number.h b/i386/i386/cpu_number.h
> index 9aef6370..d56cb602 100644
> --- a/i386/i386/cpu_number.h
> +++ b/i386/i386/cpu_number.h
> @@ -35,14 +35,35 @@
> /* More-specific code must define cpu_number() and CPU_NUMBER. */
> #ifdef __i386__
> #define CX(addr, reg) addr(,reg,4)
> +
> +/* CPU_NUMBER(%ebx) will _not_ work! */
> +#define CPU_NUMBER(reg) \
> + pushfl ;\
> + cli ;\
> + pushl %esi ;\
> + pushl %edi ;\
> + pushl %ebx ;\
> + pushl %eax ;\
> + call EXT(cpu_number) ;\
> + movl %eax, %ebx ;\
> + popl %eax ;\
> + movl %ebx, reg ;\
> + popl %ebx ;\
> + popl %edi ;\
> + popl %esi ;\
> + popfl
The point of the CPU_NUMBER() macro is to make it efficient and safe to
call in tricky contexts. I'd think we can write an assembly version of
(lapic->apic_id.r >> 24) & 0xff + the loop to find the apic_id in
apic_data.cpu_lapic_list?
> +
> #endif
> #ifdef __x86_64__
> #define CX(addr, reg) addr(,reg,8)
> +#warning Missing CPU_NUMBER() for 64 bit
> +#define CPU_NUMBER(reg)
> #endif
>
> -/* XXX For now */
> -#define CPU_NUMBER(reg) movl $0,reg
> -#define cpu_number() 0
> +#ifndef __ASSEMBLER__
> +#include "kern/cpu_number.h"
> +int cpu_number();
> +#endif
>
> #else /* NCPUS == 1 */
>
> @@ -51,8 +72,4 @@
>
> #endif /* NCPUS == 1 */
>
> -#ifndef __ASSEMBLER__
> -#include "kern/cpu_number.h"
> -#endif
> -
> #endif /* _I386_CPU_NUMBER_H_ */
> diff --git a/i386/i386/cswitch.S b/i386/i386/cswitch.S
> index 718c8aac..46da2300 100644
> --- a/i386/i386/cswitch.S
> +++ b/i386/i386/cswitch.S
> @@ -110,7 +110,7 @@ ENTRY(Thread_continue)
> */
> ENTRY(switch_to_shutdown_context)
> CPU_NUMBER(%edx)
> - movl EXT(active_stacks)(,%edx,4),%ecx /* get old kernel stack
> */
> + movl CX(EXT(active_stacks),%edx),%ecx /* get old kernel stack
> */
> movl %ebx,KSS_EBX(%ecx) /* save registers */
> movl %ebp,KSS_EBP(%ecx)
> movl %edi,KSS_EDI(%ecx)
> @@ -124,7 +124,7 @@ ENTRY(switch_to_shutdown_context)
> movl 4(%esp),%ebx /* get routine to run next */
> movl 8(%esp),%esi /* get its argument */
>
> - movl EXT(interrupt_stack)(,%edx,4),%ecx /* point to its
> interrupt stack */
> + movl CX(EXT(int_stack_base),%edx),%ecx /* point to its
> interrupt stack */
> lea INTSTACK_SIZE(%ecx),%esp /* switch to it (top) */
>
> pushl %eax /* push thread */
> diff --git a/i386/i386/locore.S b/i386/i386/locore.S
> index 8c2f57ea..64152b5b 100644
> --- a/i386/i386/locore.S
> +++ b/i386/i386/locore.S
> @@ -540,21 +540,22 @@ _kret_iret:
> */
> trap_from_kernel:
> #if MACH_KDB || MACH_TTD
> - movl %esp,%ebx /* save current stack */
> + CPU_NUMBER(%ecx) /* get CPU number */
>
> + movl %esp,%ebx /* save current stack */
> movl %esp,%edx /* on an interrupt stack? */
> +
> and $(~(KERNEL_STACK_SIZE-1)),%edx
> - cmpl EXT(int_stack_base),%edx
> + cmpl CX(EXT(int_stack_base),%ecx),%edx
> je 1f /* OK if so */
>
> - CPU_NUMBER(%edx) /* get CPU number */
> - cmpl CX(EXT(kernel_stack),%edx),%esp
> + cmpl CX(EXT(kernel_stack),%ecx),%esp
> /* already on kernel stack? */
> ja 0f
> - cmpl CX(EXT(active_stacks),%edx),%esp
> + cmpl CX(EXT(active_stacks),%ecx),%esp
> ja 1f /* switch if not */
> 0:
> - movl CX(EXT(kernel_stack),%edx),%esp
> + movl CX(EXT(kernel_stack),%ecx),%esp
> 1:
> pushl %ebx /* save old stack */
> pushl %ebx /* pass as parameter */
> @@ -668,24 +669,25 @@ ENTRY(all_intrs)
> pushl %edx
> cld /* clear direction flag */
>
> - movl %esp,%edx /* on an interrupt stack? */
> - and $(~(KERNEL_STACK_SIZE-1)),%edx
> - cmpl %ss:EXT(int_stack_base),%edx
> + CPU_NUMBER(%edx)
> +
> + movl %esp,%ecx /* on an interrupt stack? */
> + and $(~(KERNEL_STACK_SIZE-1)),%ecx
> + cmpl %ss:CX(EXT(int_stack_base),%edx),%ecx
> je int_from_intstack /* if not: */
>
> pushl %ds /* save segment registers */
> pushl %es
> pushl %fs
> pushl %gs
> - mov %ss,%dx /* switch to kernel segments */
> - mov %dx,%ds
> - mov %dx,%es
> - mov %dx,%fs
> - mov %dx,%gs
> -
> - CPU_NUMBER(%edx)
> + mov %ss,%cx /* switch to kernel segments */
> + mov %cx,%ds
> + mov %cx,%es
> + mov %cx,%fs
> + mov %cx,%gs
>
> movl CX(EXT(int_stack_top),%edx),%ecx
> +
> xchgl %ecx,%esp /* switch to interrupt stack */
>
> #if STAT_TIME
> @@ -724,19 +726,19 @@ LEXT(return_to_iret) /* ( label for
> kdb_kintr and hardclock) */
> pop %fs
> pop %es
> pop %ds
> - pop %edx
> - pop %ecx
> - pop %eax
> + popl %edx
> + popl %ecx
> + popl %eax
> iret /* return to caller */
>
> int_from_intstack:
> - cmpl EXT(int_stack_base),%esp /* seemingly looping? */
> + cmpl CX(EXT(int_stack_base),%edx),%esp /* seemingly looping? */
> jb stack_overflowed /* if not: */
> call EXT(interrupt) /* call interrupt routine */
> _return_to_iret_i: /* ( label for kdb_kintr) */
> - pop %edx /* must have been on kernel segs */
> - pop %ecx
> - pop %eax /* no ASTs */
> + popl %edx /* must have been on kernel segs */
> + popl %ecx
> + popl %eax /* no ASTs */
> iret
>
> stack_overflowed:
> diff --git a/i386/i386/mp_desc.c b/i386/i386/mp_desc.c
> index 1e9ea0fc..0c68ec11 100644
> --- a/i386/i386/mp_desc.c
> +++ b/i386/i386/mp_desc.c
> @@ -24,25 +24,36 @@
> * the rights to redistribute these changes.
> */
>
> -#if NCPUS > 1
> -
> -#include <string.h>
> -
> #include <kern/cpu_number.h>
> #include <kern/debug.h>
> #include <kern/printf.h>
> +#include <kern/smp.h>
> +#include <kern/startup.h>
> +#include <kern/kmutex.h>
> #include <mach/machine.h>
> #include <mach/xen.h>
> #include <vm/vm_kern.h>
>
> #include <i386/mp_desc.h>
> #include <i386/lock.h>
> +#include <i386/apic.h>
> +#include <i386/locore.h>
> +#include <i386/gdt.h>
> +#include <i386at/idt.h>
> +#include <i386at/int_init.h>
> +#include <i386/cpu.h>
> +#include <i386/smp.h>
> +
> #include <i386at/model_dep.h>
> #include <machine/ktss.h>
> +#include <machine/smp.h>
> #include <machine/tss.h>
> #include <machine/io_perm.h>
> #include <machine/vm_param.h>
>
> +#include <i386at/acpi_parse_apic.h>
> +#include <string.h>
> +
> /*
> * The i386 needs an interrupt stack to keep the PCB stack from being
> * overrun by interrupts. All interrupt stacks MUST lie at lower addresses
> @@ -52,20 +63,35 @@
> /*
> * Addresses of bottom and top of interrupt stacks.
> */
> -vm_offset_t interrupt_stack[NCPUS];
> vm_offset_t int_stack_top[NCPUS];
> vm_offset_t int_stack_base[NCPUS];
>
> -/*
> - * Barrier address.
> - */
> -vm_offset_t int_stack_high;
> +/* Interrupt stack allocation */
> +uint8_t solid_intstack[NCPUS*INTSTACK_SIZE] __aligned(INTSTACK_SIZE);
>
> +void
> +interrupt_stack_alloc(void)
> +{
> + int i;
> +
> + /*
> + * Set up pointers to the top of the interrupt stack.
> + */
> +
> + for (i = 0; i < NCPUS; i++) {
> + int_stack_base[i] = (vm_offset_t) &solid_intstack[i *
> INTSTACK_SIZE];
> + int_stack_top[i] = (vm_offset_t) &solid_intstack[(i + 1) *
> INTSTACK_SIZE];
> + }
> +}
> +
> +#if NCPUS > 1
> /*
> - * First cpu`s interrupt stack.
> + * Flag to mark SMP init by BSP complete
> */
> -extern char _intstack[]; /* bottom */
> -extern char _eintstack[]; /* top */
> +int bspdone;
> +
> +extern void *apboot, *apbootend;
> +extern volatile ApicLocalUnit* lapic;
>
> /*
> * Multiprocessor i386/i486 systems use a separate copy of the
> @@ -77,7 +103,7 @@ extern char _eintstack[]; /* top */
> */
>
> /*
> - * Allocated descriptor tables.
> + * Descriptor tables.
> */
> struct mp_desc_table *mp_desc_table[NCPUS] = { 0 };
>
> @@ -98,16 +124,121 @@ extern struct real_gate idt[IDTSZ];
> extern struct real_descriptor gdt[GDTSZ];
> extern struct real_descriptor ldt[LDTSZ];
>
> +static void
> +set_control_regs_and_pmap(void)
> +{
> + /* XXX move to intel/pmap.h */
> + extern pt_entry_t *kernel_page_dir;
> + int i;
> +
> + /*
> + * We'll have to temporarily install a direct mapping
> + * between physical memory and low linear memory,
> + * until we start using our new kernel segment descriptors.
> + */
Rather use another kernel_page_dir, otherwise this is messing with the
original kernel_page_dir of the BSP.
And also factorize with i386at_init: this is tricky code that we don't
want to see two copies of, which will inevitably diverge and both become
half-bogus on the long run.
> +#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
> + vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS -
> LINEAR_MIN_KERNEL_ADDRESS;
> + if ((vm_offset_t)(-delta) < delta)
> + delta = (vm_offset_t)(-delta);
> + int nb_direct = delta >> PDESHIFT;
> + for (i = 0; i < nb_direct; i++)
> + kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) +
> i] =
> +
> kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS) + i];
> +#endif
> + /* We need BIOS memory mapped at 0xc0000 & co for BIOS accesses */
> +#if VM_MIN_KERNEL_ADDRESS != 0
> + kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS -
> VM_MIN_KERNEL_ADDRESS)] =
> + kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)];
> +#endif
[...]
> + unsigned long flags;
> +
> + cpu_intr_save(&flags);
> +
> + int ncpus = smp_get_numcpus();
> +
> + //Copy cpu initialization assembly routine
> + memcpy((void*)phystokv(AP_BOOT_ADDR), (void*) &apboot,
> + (uint32_t)&apbootend - (uint32_t)&apboot);
Do we have something that makes sure that this area is not allocated for
something else?
- [PATCH 08/12] i386: Add AP variants of descriptor tables, (continued)
- [PATCH 08/12] i386: Add AP variants of descriptor tables, Damien Zammit, 2022/10/25
- [PATCH 09/12] i386: Fix lapic and ioapic for smp, Damien Zammit, 2022/10/25
- [PATCH 10/12] Add cpu_number and cpuboot, Damien Zammit, 2022/10/25
- [PATCH 11/12] i386: Fix race in multiprocessor ktss, Damien Zammit, 2022/10/25
- [PATCH 12/12] i386: Refactor int stacks to be per cpu for SMP, Damien Zammit, 2022/10/25
- Re: [PATCH 0/12 - gnumach] SMP almost working!, Almudena Garcia, 2022/10/25
- Re: [PATCH 0/12 - gnumach] SMP almost working!, Almudena Garcia, 2022/10/25