qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] Huge TLB performance improvement


From: Daniel Jacobowitz
Subject: Re: [Qemu-devel] [PATCH] Huge TLB performance improvement
Date: Sun, 12 Nov 2006 11:56:35 -0500
User-agent: Mutt/1.5.13 (2006-08-11)

On Sun, Nov 12, 2006 at 02:29:38PM +0000, Thiemo Seufer wrote:
> JFTR, increasing the TLB size from 16 to 64 entries made no performance
> difference whatsoever.

I suspect that's because we do about as much eviction.  Here's a
different approach.  Whenever an entry is evicted by tlbwr, the guest
can't predict which existing entry will be removed.  So, let's evict
none of them.  This takes the "evicted" entry and swaps it out to
a second set of TLB entries, avoiding the qemu internal TLB flush.

I'm trying for a complete as-if implementation, so tlbp only searches
the "real" entries (I don't know if it should cause a flush of the
shadowed entries, but things seem to work OK without it).  tlbwi and
tlbr both discard the shadowed entries.

This appears to cut single page flushes by 90%.

My best time for boot/runlevel-2/halt yesterday was 73 seconds.  This
runs at about 51 seconds.  apt-get update finishes in a reasonable
amount of time.  This is with all of the patches I've posted to the
list applied, including the improved tb_jmp_cache handling - we still
do a non-trivial number of single page cache flushes so I think it's
a good idea.

> The excessive flushing for mips happens because Qemu doesn't properly
> model the hardware's ASID handling.

We still do flushes at ASID switches, by the way, so it might be
possible to get further gains here.  But we're down to under ~ 15%
of CPU time for soft-mmu routines and tb management routines, which
is very good.  Then there's about 65% executing guest code and the rest
in translation, virtual hardware, and other overhead.

-- 
Daniel Jacobowitz
CodeSourcery

---
 target-mips/cpu.h       |    3 ++-
 target-mips/exec.h      |    1 +
 target-mips/helper.c    |    2 +-
 target-mips/mips-defs.h |    1 +
 target-mips/op_helper.c |   43 +++++++++++++++++++++++++++++++++++++------
 target-mips/translate.c |    1 +
 6 files changed, 43 insertions(+), 8 deletions(-)

Index: qemu/target-mips/cpu.h
===================================================================
--- qemu.orig/target-mips/cpu.h 2006-11-12 11:34:01.000000000 -0500
+++ qemu/target-mips/cpu.h      2006-11-12 11:34:24.000000000 -0500
@@ -94,7 +94,8 @@ struct CPUMIPSState {
                
 #endif
 #if defined(MIPS_USES_R4K_TLB)
-    tlb_t tlb[MIPS_TLB_NB];
+    tlb_t tlb[MIPS_TLB_MAX];
+    uint32_t tlb_in_use;
 #endif
     uint32_t CP0_index;
     uint32_t CP0_random;
Index: qemu/target-mips/exec.h
===================================================================
--- qemu.orig/target-mips/exec.h        2006-11-12 11:34:01.000000000 -0500
+++ qemu/target-mips/exec.h     2006-11-12 11:34:24.000000000 -0500
@@ -115,5 +115,6 @@ uint32_t cpu_mips_get_count (CPUState *e
 void cpu_mips_store_count (CPUState *env, uint32_t value);
 void cpu_mips_store_compare (CPUState *env, uint32_t value);
 void cpu_mips_clock_init (CPUState *env);
+void cpu_mips_tlb_flush (CPUState *env, int flush_global);
 
 #endif /* !defined(__QEMU_MIPS_EXEC_H__) */
Index: qemu/target-mips/helper.c
===================================================================
--- qemu.orig/target-mips/helper.c      2006-11-12 11:34:01.000000000 -0500
+++ qemu/target-mips/helper.c   2006-11-12 11:34:24.000000000 -0500
@@ -46,7 +46,7 @@ static int map_address (CPUState *env, t
     tlb_t *tlb;
     int i, n;
 
-    for (i = 0; i < MIPS_TLB_NB; i++) {
+    for (i = 0; i < env->tlb_in_use; i++) {
         tlb = &env->tlb[i];
         /* Check ASID, virtual page number & size */
         if ((tlb->G == 1 || tlb->ASID == ASID) &&
Index: qemu/target-mips/mips-defs.h
===================================================================
--- qemu.orig/target-mips/mips-defs.h   2006-11-12 11:34:01.000000000 -0500
+++ qemu/target-mips/mips-defs.h        2006-11-12 11:34:24.000000000 -0500
@@ -22,6 +22,7 @@
 /* Uses MIPS R4Kc TLB model */
 #define MIPS_USES_R4K_TLB
 #define MIPS_TLB_NB 16
+#define MIPS_TLB_MAX 128
 /* basic FPU register support */
 #define MIPS_USES_FPU 1
 /* Define a implementation number of 1.
Index: qemu/target-mips/op_helper.c
===================================================================
--- qemu.orig/target-mips/op_helper.c   2006-11-12 11:34:02.000000000 -0500
+++ qemu/target-mips/op_helper.c        2006-11-12 11:42:44.000000000 -0500
@@ -367,7 +367,7 @@ void do_mtc0 (int reg, int sel)
         env->CP0_EntryHi = val;
        /* If the ASID changes, flush qemu's TLB.  */
        if ((old & 0xFF) != (val & 0xFF))
-         tlb_flush (env, 1);
+         cpu_mips_tlb_flush (env, 1);
         rn = "EntryHi";
         break;
     case 11:
@@ -569,7 +569,14 @@ void fpu_handle_exception(void)
 
 /* TLB management */
 #if defined(MIPS_USES_R4K_TLB)
-static void invalidate_tlb (int idx)
+void cpu_mips_tlb_flush (CPUState *env, int flush_global)
+{
+    /* Flush qemu's TLB and discard all shadowed entries.  */
+    tlb_flush (env, flush_global);
+    env->tlb_in_use = MIPS_TLB_NB;
+}
+
+static void invalidate_tlb (int idx, int use_extra)
 {
     tlb_t *tlb;
     target_ulong addr;
@@ -582,6 +589,15 @@ static void invalidate_tlb (int idx)
         return;
     }
 
+    if (use_extra && env->tlb_in_use < MIPS_TLB_MAX) {
+        /* For tlbwr, we can shadow the discarded entry into
+          a new (fake) TLB entry, as long as the guest can not
+          tell that it's there.  */
+        memcpy (&env->tlb[env->tlb_in_use], tlb, sizeof (*tlb));
+        env->tlb_in_use++;
+        return;
+    }
+
     if (tlb->V0) {
         tb_invalidate_page_range(tlb->PFN[0], tlb->end - tlb->VPN);
         addr = tlb->VPN;
@@ -600,6 +616,14 @@ static void invalidate_tlb (int idx)
     }
 }
 
+static void mips_tlb_flush_extra (CPUState *env)
+{
+    tlb_random = 2;
+    while (env->tlb_in_use > MIPS_TLB_NB) {
+        invalidate_tlb(--env->tlb_in_use, 0);
+    }
+}
+
 static void fill_tlb (int idx)
 {
     tlb_t *tlb;
@@ -626,9 +650,14 @@ static void fill_tlb (int idx)
 
 void do_tlbwi (void)
 {
+    /* Discard cached TLB entries.  We could avoid doing this if the
+       tlbwi is just upgrading access permissions on the current entry;
+       that might be a further win.  */
+    mips_tlb_flush_extra (env);
+
     /* Wildly undefined effects for CP0_index containing a too high value and
        MIPS_TLB_NB not being a power of two.  But so does real silicon.  */
-    invalidate_tlb(env->CP0_index & (MIPS_TLB_NB - 1));
+    invalidate_tlb(env->CP0_index & (MIPS_TLB_NB - 1), 0);
     fill_tlb(env->CP0_index & (MIPS_TLB_NB - 1));
 }
 
@@ -636,7 +665,7 @@ void do_tlbwr (void)
 {
     int r = cpu_mips_get_random(env);
 
-    invalidate_tlb(r);
+    invalidate_tlb(r, 1);
     fill_tlb(r);
 }
 
@@ -673,8 +702,10 @@ void do_tlbr (void)
     tlb = &env->tlb[env->CP0_index & (MIPS_TLB_NB - 1)];
 
     /* If this will change the current ASID, flush qemu's TLB.  */
-    if (ASID != tlb->ASID && tlb->G != 1)
-      tlb_flush (env, 1);
+    if (ASID != tlb->ASID)
+        cpu_mips_tlb_flush (env, 1);
+
+    mips_tlb_flush_extra(env);
 
     env->CP0_EntryHi = tlb->VPN | tlb->ASID;
     size = (tlb->end - tlb->VPN) >> 12;
Index: qemu/target-mips/translate.c
===================================================================
--- qemu.orig/target-mips/translate.c   2006-11-12 11:34:01.000000000 -0500
+++ qemu/target-mips/translate.c        2006-11-12 11:34:24.000000000 -0500
@@ -2450,6 +2450,7 @@ void cpu_reset (CPUMIPSState *env)
     env->PC = 0xBFC00000;
 #if defined (MIPS_USES_R4K_TLB)
     env->CP0_random = MIPS_TLB_NB - 1;
+    env->tlb_in_use = MIPS_TLB_NB;
 #endif
     env->CP0_Wired = 0;
     env->CP0_Config0 = MIPS_CONFIG0;




reply via email to

[Prev in Thread] Current Thread [Next in Thread]