[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC] updated e1000 mitigation patch
From: |
Luigi Rizzo |
Subject: |
[Qemu-devel] [RFC] updated e1000 mitigation patch |
Date: |
Thu, 27 Dec 2012 11:06:58 +0100 |
User-agent: |
Mutt/1.4.2.3i |
Before submitting a proper patch, I'd like to hear feedback on the
following proposed change to hw/e1000.c to properly implement
interrupt mitigation.
This is joint work with Vincenzo Maffione and Giuseppe Lettieri (in Cc),
and is a followup to a similar patch i posted in july
https://lists.gnu.org/archive/html/qemu-devel/2012-07/msg03195.html
The patch models three of the five (sic!) e1000 register that control
moderation, and uses qemu timers for that (the minimum delay for
these timers affects the fidelity of the emulation).
Right now there is a static variable that controls whether the
feature is enabled. I would probably like to make it a parameter
accessible from the command line in qemu, possibly extending it to
override the mitigation delay set by the device driver.
Right now we reached transmit speeds in the order of 2-300Kpps
(if matched with a proper guest device driver optimization, see
https://groups.google.com/forum/?fromgroups=#!topic/mailing.freebsd.emulator/xQHR_hleCuc
)
Some performance data using a FreeBSD guest, for udp transmissions:
KVM QEMU
standard KVM, standard driver 20 Kpps 6.3 Kpps
modified KVM, standard driver 37 Kpps 28 Kpps
modified KVM, modified driver 200 Kpps 34 Kpps
As you can see, on kvm this change gives a 5x speedup to the tx path,
which combines nicely with the 2x speedup that comes from supporting
interrupt mitigation alone in the hypervisor. Without kvm (or kqemu ?)
the benefits are much lower, as the guest becomes too slow.
cheers
luigi
diff -urp qemu-1.3.0-orig/hw/e1000.c qemu-1.3.0/hw/e1000.c
--- qemu-1.3.0-orig/hw/e1000.c 2012-12-03 20:37:05.000000000 +0100
+++ qemu-1.3.0/hw/e1000.c 2012-12-27 09:47:16.000000000 +0100
@@ -35,6 +35,8 @@
#include "e1000_hw.h"
+static int mit_on = 1; /* interrupt mitigation enable */
+
#define E1000_DEBUG
#ifdef E1000_DEBUG
@@ -129,6 +131,9 @@ typedef struct E1000State_st {
} eecd_state;
QEMUTimer *autoneg_timer;
+ QEMUTimer *mit_timer; // handle for the timer
+ uint32_t mit_timer_on; // mitigation timer active
+ uint32_t mit_cause; // pending interrupt cause
} E1000State;
#define defreg(x) x = (E1000_##x>>2)
@@ -144,6 +149,7 @@ enum {
defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC),
defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA),
defreg(VET),
+ defreg(RDTR), defreg(RADV), defreg(TADV), defreg(ITR),
};
static void
@@ -639,6 +645,68 @@ static uint64_t tx_desc_base(E1000State
return (bah << 32) + bal;
}
+/* helper function, 0 means the value is not set */
+static inline void
+mit_update_delay(uint32_t *curr, uint32_t value)
+{
+ if (value && (*curr == 0 || value < *curr))
+ *curr = value;
+}
+
+/*
+ * If necessary, rearm the timer and post an interrupt.
+ * Called at the end of tx/rx routines (mit_timer_on == 0),
+ * and when the timer fires (mit_timer_on == 1).
+ * We provide a partial implementation of interrupt mitigation,
+ * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
+ * RADV and TADV, 256ns units for ITR). RDTR is only used to enable RADV;
+ * relative timers based on TIDV and RDTR are not implemented.
+ */
+static void
+mit_rearm_and_int(void *opaque)
+{
+ E1000State *s = opaque;
+ uint32_t mit_delay = 0;
+
+ /*
+ * Clear the flag. It is only set when the callback fires,
+ * and we need to clear it anyways.
+ */
+ s->mit_timer_on = 0;
+ if (s->mit_cause == 0) /* no events pending, we are done */
+ return;
+ /*
+ * Compute the next mitigation delay according to pending interrupts
+ * and the current values of RADV (provided RDTR!=0), TADV and ITR.
+ * Then rearm the timer.
+ */
+ if (s->mit_cause & (E1000_ICR_TXQE | E1000_ICR_TXDW))
+ mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
+ if (s->mac_reg[RDTR] && (s->mit_cause & E1000_ICS_RXT0))
+ mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
+ mit_update_delay(&mit_delay, s->mac_reg[ITR]);
+
+ if (mit_delay) {
+ s->mit_timer_on = 1;
+ qemu_mod_timer(s->mit_timer,
+ qemu_get_clock_ns(vm_clock) + mit_delay * 256);
+ }
+ set_ics(s, 0, s->mit_cause);
+ s->mit_cause = 0;
+}
+
+static void
+mit_set_ics(E1000State *s, uint32_t cause)
+{
+ if (mit_on == 0) {
+ set_ics(s, 0, cause);
+ return;
+ }
+ s->mit_cause |= cause;
+ if (!s->mit_timer_on)
+ mit_rearm_and_int(s);
+}
+
static void
start_xmit(E1000State *s)
{
@@ -676,7 +744,7 @@ start_xmit(E1000State *s)
break;
}
}
- set_ics(s, 0, cause);
+ mit_set_ics(s, cause);
}
static int
@@ -894,7 +962,7 @@ e1000_receive(NetClientState *nc, const
s->rxbuf_min_shift)
n |= E1000_ICS_RXDMT0;
- set_ics(s, 0, n);
+ mit_set_ics(s, n);
return size;
}
@@ -999,6 +1067,7 @@ static uint32_t (*macreg_readops[])(E100
getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS),
getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL),
getreg(TDLEN), getreg(RDLEN),
+ getreg(RDTR), getreg(RADV), getreg(TADV), getreg(ITR),
[TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
[GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4,
@@ -1015,6 +1084,8 @@ static void (*macreg_writeops[])(E1000St
putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC),
putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH),
putreg(RDBAL), putreg(LEDCTL), putreg(VET),
+ [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit,
+ [ITR] = set_16bit,
[TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl,
[TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics,
[TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt,
@@ -1286,6 +1357,9 @@ static int pci_e1000_init(PCIDevice *pci
add_boot_device_path(d->conf.bootindex, &pci_dev->qdev, "/address@hidden");
d->autoneg_timer = qemu_new_timer_ms(vm_clock, e1000_autoneg_timer, d);
+ d->mit_cause = 0;
+ d->mit_timer_on = 0;
+ d->mit_timer = qemu_new_timer_ns(vm_clock, mit_rearm_and_int, d);
return 0;
}
- [Qemu-devel] [RFC] updated e1000 mitigation patch,
Luigi Rizzo <=