bug-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC PATCH gnumach] percpu area using gs segment


From: Damien Zammit
Subject: [RFC PATCH gnumach] percpu area using gs segment
Date: Sat, 26 Aug 2023 06:48:52 +0000

This speeds up smp again, by storing the struct processor
in a percpu area and avoiding an expensive cpu_number every call
of current_processor(), as well as getting the cpu_number by
an offset into the percpu area.  Needs work for 64 bit and replacing
other percpu arrays.
---
 i386/Makefrag.am       |  2 ++
 i386/i386/cpu_number.c |  8 ++++-
 i386/i386/cpu_number.h |  1 +
 i386/i386/gdt.c        |  9 +++++-
 i386/i386/gdt.h        | 11 ++++++-
 i386/i386/i386asm.sym  |  7 ----
 i386/i386/locore.S     | 13 +++++---
 i386/i386/mp_desc.c    |  4 ++-
 i386/i386/percpu.c     | 30 ++++++++++++++++++
 i386/i386/percpu.h     | 72 ++++++++++++++++++++++++++++++++++++++++++
 kern/processor.c       |  7 ++--
 kern/processor.h       | 18 ++++-------
 12 files changed, 150 insertions(+), 32 deletions(-)
 create mode 100644 i386/i386/percpu.c
 create mode 100644 i386/i386/percpu.h

diff --git a/i386/Makefrag.am b/i386/Makefrag.am
index 274e8695..c1724cea 100644
--- a/i386/Makefrag.am
+++ b/i386/Makefrag.am
@@ -108,6 +108,8 @@ libkernel_a_SOURCES += \
        i386/i386/irq.c \
        i386/i386/irq.h \
        i386/i386/msr.h \
+       i386/i386/percpu.c \
+       i386/i386/percpu.h \
        i386/i386/pit.c \
        i386/i386/pit.h
 
diff --git a/i386/i386/cpu_number.c b/i386/i386/cpu_number.c
index ef19e11f..241015b5 100644
--- a/i386/i386/cpu_number.c
+++ b/i386/i386/cpu_number.c
@@ -20,11 +20,17 @@
 #include <i386/smp.h>
 #include <i386/cpu.h>
 #include <i386/mp_desc.h>
+#include <i386/percpu.h>
 #include <kern/printf.h>
 
 #if NCPUS > 1
-int cpu_number(void)
+int cpu_number_slow(void)
 {
        return cpu_id_lut[apic_get_current_cpu()];
 }
+
+int cpu_number(void)
+{
+       return *((int *)percpu_ptr(int, cpu_id));
+}
 #endif
diff --git a/i386/i386/cpu_number.h b/i386/i386/cpu_number.h
index 479a847a..098696a3 100644
--- a/i386/i386/cpu_number.h
+++ b/i386/i386/cpu_number.h
@@ -65,6 +65,7 @@
 
 #ifndef __ASSEMBLER__
 #include "kern/cpu_number.h"
+int cpu_number_slow(void);
 int cpu_number(void);
 #endif
 
diff --git a/i386/i386/gdt.c b/i386/i386/gdt.c
index ddda603b..e335de50 100644
--- a/i386/i386/gdt.c
+++ b/i386/i386/gdt.c
@@ -35,6 +35,7 @@
 
 #include <kern/assert.h>
 #include <intel/pmap.h>
+#include <machine/percpu.h>
 
 #include "vm_param.h"
 #include "seg.h"
@@ -73,6 +74,11 @@ gdt_fill(struct real_descriptor *mygdt)
                            0xffffffff,
                            ACC_PL_K|ACC_DATA_W, SZ_32);
 #endif /* MACH_PV_DESCRIPTORS */
+       vm_offset_t thiscpu = kvtolin(&percpu_array[cpu_number_slow()]);
+       _fill_gdt_descriptor(mygdt, PERCPU_DS,
+                           thiscpu,
+                           thiscpu + sizeof(struct percpu) - 1,
+                           ACC_PL_K|ACC_DATA_W, SZ_32);
 #endif
 
 #ifdef MACH_PV_DESCRIPTORS
@@ -119,8 +125,9 @@ reload_segs(void)
                     
                     "movw      %w1,%%ds\n"
                     "movw      %w1,%%es\n"
+                    "movw      %w3,%%gs\n"
                     "movw      %w1,%%ss\n"
-                    : : "i" (KERNEL_CS), "r" (KERNEL_DS), "r" (0));
+                    : : "i" (KERNEL_CS), "r" (KERNEL_DS), "r" (0), "r" 
(PERCPU_DS));
 #endif
 }
 
diff --git a/i386/i386/gdt.h b/i386/i386/gdt.h
index 5def73cb..d5d78d43 100644
--- a/i386/i386/gdt.h
+++ b/i386/i386/gdt.h
@@ -77,12 +77,20 @@
 
 /*                     0x58               used by user TSS in 64bit mode */
 
+#ifdef __x86_64__
+/*                     XXX */
+#else
+#define PERCPU_DS      0x68            /* per-cpu data mapping */
+#endif
+
 #ifdef __x86_64__
 #define        GDTSZ           sel_idx(0x60)
 #else
-#define        GDTSZ           sel_idx(0x58)
+#define        GDTSZ           sel_idx(0x70)
 #endif
 
+#ifndef __ASSEMBLER__
+
 extern struct real_descriptor gdt[GDTSZ];
 
 /* Fill a segment descriptor in the GDT.  */
@@ -117,4 +125,5 @@ extern struct real_descriptor gdt[GDTSZ];
 extern void gdt_init(void);
 extern void ap_gdt_init(int cpu);
 
+#endif /* __ASSEMBLER__ */
 #endif /* _I386_GDT_ */
diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym
index 436e296a..832c7041 100644
--- a/i386/i386/i386asm.sym
+++ b/i386/i386/i386asm.sym
@@ -154,17 +154,10 @@ expr      NPTES                                           
PTES_PER_PAGE
 expr   INTEL_PTE_VALID|INTEL_PTE_WRITE                 INTEL_PTE_KERNEL
 
 expr   IDTSZ
-expr   GDTSZ
-expr   LDTSZ
 
 expr   KERNEL_RING
-
 expr   KERNEL_CS
 expr   KERNEL_DS
-expr   KERNEL_TSS
-#ifndef        MACH_PV_DESCRIPTORS
-expr   KERNEL_LDT
-#endif /* MACH_PV_DESCRIPTORS */
 
 expr   (VM_MIN_KERNEL_ADDRESS>>PDESHIFT)*sizeof(pt_entry_t)    KERNELBASEPDE
 
diff --git a/i386/i386/locore.S b/i386/i386/locore.S
index 55aa9d60..463cce55 100644
--- a/i386/i386/locore.S
+++ b/i386/i386/locore.S
@@ -33,6 +33,7 @@
 #include <i386/proc_reg.h>
 #include <i386/trap.h>
 #include <i386/seg.h>
+#include <i386/gdt.h>
 #include <i386/ldt.h>
 #include <i386/i386asm.h>
 #include <i386/cpu_number.h>
@@ -468,7 +469,8 @@ trap_push_segs:
        mov     %ax,%ds                 /* (same as kernel stack segment) */
        mov     %ax,%es
        mov     %ax,%fs
-       mov     %ax,%gs
+       mov     $(PERCPU_DS),%ax
+       movw    %ax,%gs
 
 trap_set_segs:
        cld                             /* clear direction flag */
@@ -686,7 +688,8 @@ ENTRY(all_intrs)
        mov     %dx,%ds
        mov     %dx,%es
        mov     %dx,%fs
-       mov     %dx,%gs
+       mov     $(PERCPU_DS),%dx
+       movw    %dx,%gs
 
        CPU_NUMBER(%edx)
 
@@ -792,7 +795,8 @@ ast_from_interrupt:
        mov     %dx,%ds
        mov     %dx,%es
        mov     %dx,%fs
-       mov     %dx,%gs
+       mov     $(PERCPU_DS),%dx
+       movw    %dx,%gs
 
        CPU_NUMBER(%edx)
        TIME_TRAP_UENTRY
@@ -1051,7 +1055,8 @@ syscall_entry_2:
        mov     %dx,%ds
        mov     %dx,%es
        mov     %dx,%fs
-       mov     %dx,%gs
+       mov     $(PERCPU_DS),%dx
+       movw    %dx,%gs
 
 /*
  * Shuffle eflags,eip,cs into proper places
diff --git a/i386/i386/mp_desc.c b/i386/i386/mp_desc.c
index f1a1f989..465ffacc 100644
--- a/i386/i386/mp_desc.c
+++ b/i386/i386/mp_desc.c
@@ -143,6 +143,8 @@ mp_desc_init(int mycpu)
        struct mp_desc_table *mpt;
        vm_offset_t mem;
 
+       init_percpu(mycpu);
+
        if (mycpu == 0) {
                /*
                 * Master CPU uses the tables built at boot time.
@@ -275,7 +277,7 @@ cpu_setup(int cpu)
 void
 cpu_ap_main()
 {
-    int cpu = cpu_number();
+    int cpu = cpu_number_slow();
 
     do {
        cpu_pause();
diff --git a/i386/i386/percpu.c b/i386/i386/percpu.c
new file mode 100644
index 00000000..0bc8b234
--- /dev/null
+++ b/i386/i386/percpu.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2023 Free Software Foundation, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <i386/smp.h>
+#include <i386/apic.h>
+#include <i386/percpu.h>
+
+struct percpu percpu_array[NCPUS] __aligned(0x8000);
+
+void init_percpu(int cpu)
+{
+    int apic_id = apic_get_current_cpu();
+
+    percpu_array[cpu].self = &percpu_array[cpu];
+    percpu_array[cpu].apic_id = apic_id;
+    percpu_array[cpu].cpu_id = cpu_id_lut[apic_id];
+}
diff --git a/i386/i386/percpu.h b/i386/i386/percpu.h
new file mode 100644
index 00000000..b22d512c
--- /dev/null
+++ b/i386/i386/percpu.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2023 Free Software Foundation, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PERCPU_H_
+#define _PERCPU_H_
+
+#include <kern/ast.h>
+#include <kern/processor.h>
+#include <kern/thread.h>
+#include <kern/timer.h>
+#include <i386/mp_desc.h>
+#include <i386/spl.h>
+#include <intel/pmap.h>
+#include <ipc/ipc_kmsg.h>
+
+#define percpu_assign(stm, val)     \
+    asm("mov %0, %%gs:%1"           \
+                 : : "r" (val), "m" (__builtin_offsetof(struct percpu, stm)));
+
+#define percpu_ptr(typ, stm)        \
+MACRO_BEGIN                         \
+    typ *ptr_ = (typ *)__builtin_offsetof(struct percpu, stm); \
+                                    \
+    asm("add %%gs:0, %0"            \
+                 : "+r" (ptr_)      \
+                 : );               \
+                                    \
+    ptr_;                           \
+MACRO_END
+
+struct percpu {
+    struct percpu      *self;
+    struct processor   processor;
+/*
+    struct machine_slot        machine_slot;
+    struct mp_desc_table mp_desc_table;
+    thread_t           active_thread;
+    vm_offset_t                active_stack;
+    vm_offset_t                int_stack_top;
+    vm_offset_t                int_stack_base;
+    ast_t              need_ast;
+    ipc_kmsg_t         ipc_kmsg_cache;
+    pmap_update_list   cpu_update_list;
+    spl_t              saved_ipl;
+    spl_t              curr_ipl;
+    timer_data_t       kernel_timer;
+    timer_t            current_timer;
+    unsigned long      in_interrupt;
+*/
+    int                        apic_id;
+    int                        cpu_id;
+};
+
+extern struct percpu percpu_array[NCPUS];
+
+void init_percpu(int cpu);
+
+#endif /* _PERCPU_H_ */
diff --git a/kern/processor.c b/kern/processor.c
index 2cd6d46c..76735381 100644
--- a/kern/processor.c
+++ b/kern/processor.c
@@ -60,14 +60,12 @@ struct kmem_cache pset_cache;
 int    master_cpu;
 
 struct processor_set default_pset;
-struct processor processor_array[NCPUS];
 
 queue_head_t           all_psets;
 int                    all_psets_count;
 def_simple_lock_data(, all_psets_lock);
 
 processor_t    master_processor;
-processor_t    processor_ptr[NCPUS];
 
 /*
  *     Bootstrap the processor/pset system so the scheduler can run.
@@ -81,10 +79,9 @@ void pset_sys_bootstrap(void)
        for (i = 0; i < NCPUS; i++) {
                /*
                 *      Initialize processor data structures.
-                *      Note that cpu_to_processor(i) is processor_ptr[i].
+                *      Note that cpu_to_processor is processor_ptr.
                 */
-               processor_ptr[i] = &processor_array[i];
-               processor_init(processor_ptr[i], i);
+               processor_init(processor_ptr(i), i);
        }
        master_processor = cpu_to_processor(master_cpu);
        queue_init(&all_psets);
diff --git a/kern/processor.h b/kern/processor.h
index 17b784a3..d83cdf3c 100644
--- a/kern/processor.h
+++ b/kern/processor.h
@@ -112,6 +112,8 @@ struct processor {
 typedef struct processor Processor;
 extern struct processor        processor_array[NCPUS];
 
+#include <machine/percpu.h>
+
 /*
  *     Chain of all processor sets.
  */
@@ -195,23 +197,15 @@ extern processor_t        master_processor;
 #define        PROCESSOR_ASSIGN        4       /* Assignment is changing */
 #define PROCESSOR_SHUTDOWN     5       /* Being shutdown */
 
-/*
- *     Use processor ptr array to find current processor's data structure.
- *     This replaces a multiplication (index into processor_array) with
- *     an array lookup and a memory reference.  It also allows us to save
- *     space if processor numbering gets too sparse.
- */
-
-extern processor_t     processor_ptr[NCPUS];
-
-#define cpu_to_processor(i)    (processor_ptr[i])
+#define processor_ptr(i)       (&percpu_array[i].processor)
+#define cpu_to_processor       processor_ptr
 
-#define current_processor()    (processor_ptr[cpu_number()])
+#define current_processor()    (percpu_ptr(struct processor, processor))
 #define current_processor_set()        (current_processor()->processor_set)
 
 /* Compatibility -- will go away */
 
-#define cpu_state(slot_num)    (processor_ptr[slot_num]->state)
+#define cpu_state(slot_num)    (processor_ptr(slot_num)->state)
 #define cpu_idle(slot_num)     (cpu_state(slot_num) == PROCESSOR_IDLE)
 
 /* Useful lock macros */
-- 
2.40.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]