[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 5/5] memory: able to pin guest node memory to ho
From: |
Wanlong Gao |
Subject: |
Re: [Qemu-devel] [PATCH 5/5] memory: able to pin guest node memory to host node manually |
Date: |
Mon, 27 May 2013 10:57:15 +0800 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130514 Thunderbird/17.0.6 |
Ping............
> On 05/23/2013 04:47 PM, Wanlong Gao wrote:
>> Use mbind to pin guest numa node memory to host nodes manually.
>>
>> If we are not able to pin memory to host node, we may meet the
>> cross node memory access performance regression.
>>
>> With this patch, we can add manual pinning host node like this:
>> -m 1024 -numa node,cpus=0,nodeid=0,mem=512,pin=0 -numa
>> node,nodeid=1,cpus=1,mem=512,pin=1
>>
>> And, if PCI-passthrough is used, direct-attached-device uses DMA transfer
>> between device and qemu process. All pages of the guest will be pinned by
>> get_user_pages().
>>
>> KVM_ASSIGN_PCI_DEVICE ioctl
>> kvm_vm_ioctl_assign_device()
>> =>kvm_assign_device()
>> => kvm_iommu_map_memslots()
>> => kvm_iommu_map_pages()
>> => kvm_pin_pages()
>>
>> So, with direct-attached-device, all guest page's page count will be +1 and
>> any page migration will not work. AutoNUMA won't too. And direction by
>> libvirt is *ignored*.
>>
>> Above all, we need manual pinning memory to host node to avoid
>> such cross nodes memmory access performance regression.
>
> Any comments ?
>
> Thanks,
> Wanlong Gao
>
>>
>> Signed-off-by: Wanlong Gao <address@hidden>
>> ---
>> exec.c | 21 +++++++++++++++++++++
>> include/sysemu/sysemu.h | 1 +
>> vl.c | 13 +++++++++++++
>> 3 files changed, 35 insertions(+)
>>
>> diff --git a/exec.c b/exec.c
>> index aec65c5..fe929ef 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -36,6 +36,8 @@
>> #include "qemu/config-file.h"
>> #include "exec/memory.h"
>> #include "sysemu/dma.h"
>> +#include "sysemu/sysemu.h"
>> +#include "qemu/bitops.h"
>> #include "exec/address-spaces.h"
>> #if defined(CONFIG_USER_ONLY)
>> #include <qemu.h>
>> @@ -1081,6 +1083,25 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size,
>> void *host,
>> memory_try_enable_merging(new_block->host, size);
>> }
>> }
>> +
>> + if (nb_numa_nodes > 0 && !strcmp(mr->name, "pc.ram")) {
>> + int i;
>> + uint64_t nodes_mem = 0;
>> + unsigned long *maskp = g_malloc0(sizeof(*maskp));
>> + for (i = 0; i < nb_numa_nodes; i++) {
>> + *maskp = 0;
>> + if (node_pin[i] != -1) {
>> + set_bit(node_pin[i], maskp);
>> + if (qemu_mbind(new_block->host + nodes_mem, node_mem[i],
>> + QEMU_MPOL_BIND, maskp, MAX_NODES, 0)) {
>> + perror("qemu_mbind");
>> + exit(1);
>> + }
>> + }
>> + nodes_mem += node_mem[i];
>> + }
>> + }
>> +
>> new_block->length = size;
>>
>> /* Keep the list sorted from biggest to smallest block. */
>> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
>> index 2fb71af..ebf6580 100644
>> --- a/include/sysemu/sysemu.h
>> +++ b/include/sysemu/sysemu.h
>> @@ -131,6 +131,7 @@ extern QEMUClock *rtc_clock;
>> #define MAX_CPUMASK_BITS 255
>> extern int nb_numa_nodes;
>> extern uint64_t node_mem[MAX_NODES];
>> +extern int node_pin[MAX_NODES];
>> extern unsigned long *node_cpumask[MAX_NODES];
>>
>> #define MAX_OPTION_ROMS 16
>> diff --git a/vl.c b/vl.c
>> index 5555b1d..3768002 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -253,6 +253,7 @@ static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
>>
>> int nb_numa_nodes;
>> uint64_t node_mem[MAX_NODES];
>> +int node_pin[MAX_NODES];
>> unsigned long *node_cpumask[MAX_NODES];
>>
>> uint8_t qemu_uuid[16];
>> @@ -1390,6 +1391,17 @@ static void numa_add(const char *optarg)
>> }
>> node_mem[nodenr] = sval;
>> }
>> +
>> + if (get_param_value(option, 128, "pin", optarg) != 0) {
>> + int unsigned long long pin_node;
>> + if (parse_uint_full(option, &pin_node, 10) < 0) {
>> + fprintf(stderr, "qemu: Invalid pinning nodeid: %s\n",
>> optarg);
>> + exit(1);
>> + } else {
>> + node_pin[nodenr] = pin_node;
>> + }
>> + }
>> +
>> if (get_param_value(option, 128, "cpus", optarg) != 0) {
>> numa_node_parse_cpus(nodenr, option);
>> }
>> @@ -2921,6 +2933,7 @@ int main(int argc, char **argv, char **envp)
>>
>> for (i = 0; i < MAX_NODES; i++) {
>> node_mem[i] = 0;
>> + node_pin[i] = -1;
>> node_cpumask[i] = bitmap_new(MAX_CPUMASK_BITS);
>> }
>>
>>
>
>