[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] interrupt mitigation for e1000
From: |
Luigi Rizzo |
Subject: |
[Qemu-devel] interrupt mitigation for e1000 |
Date: |
Tue, 24 Jul 2012 18:58:35 +0200 |
User-agent: |
Mutt/1.4.2.3i |
I noticed that the various NIC modules in qemu/kvm do not implement
interrupt mitigation, which is very beneficial as it dramatically
reduces exits from the hypervisor.
As a proof of concept i tried to implement it for the e1000 driver
(patch below), and it brings tx performance from 9 to 56Kpps on
qemu-softmmu, and from ~20 to 140Kpps on qemu-kvm.
I am going to measure the rx interrupt mitigation in the next couple
of days.
Is there any interest in having this code in ?
cheers
luigi
diff -ubwrp --exclude '*.[do]' /tmp/qemu-61dc008/hw/e1000.c ./hw/e1000.c
--- /tmp/qemu-61dc008/hw/e1000.c 2012-07-20 01:25:52.000000000 +0200
+++ ./hw/e1000.c 2012-07-24 18:21:39.000000000 +0200
@@ -33,6 +33,8 @@
#include "sysemu.h"
#include "dma.h"
+#define MITIGATION
+
#include "e1000_hw.h"
#define E1000_DEBUG
@@ -127,6 +129,13 @@ typedef struct E1000State_st {
} eecd_state;
QEMUTimer *autoneg_timer;
+
+#ifdef MITIGATION
+ QEMUBH *int_bh; // interrupt mitigation handler
+ int tx_ics_count; // pending tx int requests
+ int rx_ics_count; // pending rx int requests
+ int int_cause; // int cause
+#endif // MITIGATION
} E1000State;
#define defreg(x) x = (E1000_##x>>2)
@@ -638,6 +648,26 @@ start_xmit(E1000State *s)
return;
}
+#ifdef MITIGATION
+ /* we transmit the first few packets, or we do if we are
+ * approaching a full ring. in the latter case, also
+ * send an ics.
+ *
+ */
+{
+ int len, pending;
+ len = s->mac_reg[TDLEN] / sizeof(desc) ;
+ pending = s->mac_reg[TDT] - s->mac_reg[TDH];
+ if (pending < 0)
+ pending += len;
+ /* ignore requests after the first few ones, as long as
+ * we are not approaching a full ring.
+ * Otherwise, deliver packets to the backend.
+ */
+ if (s->tx_ics_count > 4 && s->tx_ics_count + pending < len - 5)
+ return;
+#endif // MITIGATION
+
while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
base = tx_desc_base(s) +
sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
@@ -663,7 +693,21 @@ start_xmit(E1000State *s)
break;
}
}
+#ifdef MITIGATION
+ s->int_cause |= cause; // remember the interrupt cause.
+ s->tx_ics_count += pending;
+ if (s->tx_ics_count >= len - 5) {
+ // if the ring is about to become full, generate an interrupt
+ set_ics(s, 0, s->int_cause);
+ s->tx_ics_count = 0;
+ s->int_cause = 0;
+ } else { // otherwise just schedule it for later.
+ qemu_bh_schedule_idle(s->int_bh);
+ }
+}
+#else /* !MITIGATION */
set_ics(s, 0, cause);
+#endif
}
static int
@@ -875,7 +919,27 @@ e1000_receive(VLANClientState *nc, const
s->rxbuf_min_shift)
n |= E1000_ICS_RXDMT0;
+#ifdef MITIGATION
+#define MIT_RXDMT0_SENT 100000 // large
+ s->int_cause |= n;
+ if (s->rx_ics_count == 0) {
+ /* deliver the first interrupt */
+ set_ics(s, 0, s->int_cause);
+ s->int_cause = 0;
+ s->rx_ics_count++;
+ } else if ( (n & E1000_ICS_RXDMT0) && s->rx_ics_count < MIT_RXDMT0_SENT) {
+ /* also deliver if we are approaching ring full */
+ set_ics(s, 0, s->int_cause);
+ s->int_cause = 0;
+ s->rx_ics_count = MIT_RXDMT0_SENT;
+ } else {
+ /* otherwise schedule for later */
+ s->rx_ics_count++;
+ qemu_bh_schedule_idle(s->int_bh);
+ }
+#else /* !MITIGATION */
set_ics(s, 0, n);
+#endif /* !MITIGATION */
return size;
}
@@ -1214,6 +1281,20 @@ static NetClientInfo net_e1000_info = {
.link_status_changed = e1000_set_link_status,
};
+#ifdef MITIGATION
+static void e1000_int_bh(void *opaque)
+{
+ E1000State *s = opaque;
+ if (s->tx_ics_count < 1 && s->rx_ics_count < 1)
+ return;
+ s->tx_ics_count = 0;
+ s->rx_ics_count = 0;
+ start_xmit(s);
+ set_ics(s, 0, s->int_cause);
+ s->int_cause = 0;
+}
+#endif /* MITIGATION */
+
static int pci_e1000_init(PCIDevice *pci_dev)
{
E1000State *d = DO_UPCAST(E1000State, dev, pci_dev);
@@ -1231,6 +1312,9 @@ static int pci_e1000_init(PCIDevice *pci
e1000_mmio_setup(d);
+#ifdef MITIGATION
+ d->int_bh = qemu_bh_new(e1000_int_bh, d);
+#endif /* MITIGATION */
pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
pci_register_bar(&d->dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
- [Qemu-devel] interrupt mitigation for e1000,
Luigi Rizzo <=