qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v3] NUMA: Enable adding NUMA node implicitly


From: Igor Mammedov
Subject: Re: [Qemu-devel] [PATCH v3] NUMA: Enable adding NUMA node implicitly
Date: Wed, 18 Oct 2017 12:21:24 +0200

On Thu, 21 Sep 2017 17:23:08 +0800
Dou Liyang <address@hidden> wrote:

> Linux and Windows need ACPI SRAT table to make memory hotplug work properly,
> however currently QEMU doesn't create SRAT table if numa options aren't 
> present
> on CLI.
> 
> Which breaks both linux and windows guests in certain conditions:
>  * Windows: won't enable memory hotplug without SRAT table at all
>  * Linux: if QEMU is started with initial memory all below 4Gb and no SRAT 
> table
>    present, guest kernel will use nommu DMA ops, which breaks 32bit hw drivers
>    when memory is hotplugged and guest tries to use it with that drivers.
> 
> Fix above issues by automatically creating a numa node when QEMU is started 
> with
> memory hotplug enabled but without '-numa' options on CLI.
> (PS: auto-create numa node only for new machine types so not to break 
> migration).
> 
> Which would provide SRAT table to guests without explicit -numa options on CLI
> and would allow:
>  * Windows: to enable memory hotplug
>  * Linux: switch to SWIOTLB DMA ops, to bounce DMA transfers to 32bit 
> allocated
>    buffers that legacy drivers/hw can handle.
> 
> [Rewritten by Igor]
> 
> Reported-by: Thadeu Lima de Souza Cascardo <address@hidden>
> Suggested-by: Igor Mammedov <address@hidden>
> Signed-off-by: Dou Liyang <address@hidden>
> Cc: Paolo Bonzini <address@hidden>
> Cc: Richard Henderson <address@hidden>
> Cc: Eduardo Habkost <address@hidden>
> Cc: "Michael S. Tsirkin" <address@hidden>
> Cc: Marcel Apfelbaum <address@hidden>
> Cc: Igor Mammedov <address@hidden>
> Cc: David Hildenbrand <address@hidden>
> Cc: Thomas Huth <address@hidden>
> Cc: Alistair Francis <address@hidden>
> Cc: address@hidden
> Cc: Takao Indoh <address@hidden>
> Cc: Izumi Taku <address@hidden>
> ---
> changelog V2 --> V3:
>   -Replace the callback function with a boolean parameter suggested by Igor
>   -Use QTAILQ_EMPTY() macro to check the QemuOptsList
> 
>  hw/i386/pc.c          |  1 +
>  hw/i386/pc_piix.c     |  1 +
>  hw/i386/pc_q35.c      |  1 +
>  include/hw/boards.h   |  1 +
>  include/sysemu/numa.h |  2 +-
>  numa.c                | 24 ++++++++++++++++++++++--
>  vl.c                  |  9 +++++----
>  7 files changed, 32 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 05985d4..f1a44cc 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -2318,6 +2318,7 @@ static void pc_machine_class_init(ObjectClass *oc, void 
> *data)
>      mc->cpu_index_to_instance_props = pc_cpu_index_to_props;
>      mc->get_default_cpu_node_id = pc_get_default_cpu_node_id;
>      mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
> +    mc->auto_enable_numa_with_memhp = true;
>      mc->has_hotpluggable_cpus = true;
>      mc->default_boot_order = "cad";
>      mc->hot_add_cpu = pc_hot_add_cpu;
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 9ff79b1..d87a433 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -449,6 +449,7 @@ static void pc_i440fx_2_10_machine_options(MachineClass 
> *m)
>      m->is_default = 0;
>      m->alias = NULL;
>      SET_MACHINE_COMPAT(m, PC_COMPAT_2_10);
> +    m->auto_enable_numa_with_memhp = false;
>  }
>  
>  DEFINE_I440FX_MACHINE(v2_10, "pc-i440fx-2.10", NULL,
> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> index 6c4ec4b..68cbfc5 100644
> --- a/hw/i386/pc_q35.c
> +++ b/hw/i386/pc_q35.c
> @@ -319,6 +319,7 @@ static void pc_q35_2_10_machine_options(MachineClass *m)
>      m->alias = NULL;
>      SET_MACHINE_COMPAT(m, PC_COMPAT_2_10);
>      m->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
> +    m->auto_enable_numa_with_memhp = false;
>  }
>  
>  DEFINE_Q35_MACHINE(v2_10, "pc-q35-2.10", NULL,
> diff --git a/include/hw/boards.h b/include/hw/boards.h
> index 156e0a5..0fe2c8f 100644
> --- a/include/hw/boards.h
> +++ b/include/hw/boards.h
> @@ -191,6 +191,7 @@ struct MachineClass {
>      bool has_hotpluggable_cpus;
>      bool ignore_memory_transaction_failures;
>      int numa_mem_align_shift;
> +    bool auto_enable_numa_with_memhp;
>      void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes,
>                                   int nb_nodes, ram_addr_t size);
>  
> diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
> index 5c6df28..31d3ac0 100644
> --- a/include/sysemu/numa.h
> +++ b/include/sysemu/numa.h
> @@ -30,7 +30,7 @@ struct NumaNodeMem {
>  };
>  
>  extern NodeInfo numa_info[MAX_NODES];
> -void parse_numa_opts(MachineState *ms);
> +void parse_numa_opts(MachineState *ms, uint64_t ram_slots);
>  void query_numa_node_mem(NumaNodeMem node_mem[]);
>  extern QemuOptsList qemu_numa_opts;
>  void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
> diff --git a/numa.c b/numa.c
> index 100a67f..ba8d813 100644
> --- a/numa.c
> +++ b/numa.c
> @@ -423,12 +423,32 @@ void numa_default_auto_assign_ram(MachineClass *mc, 
> NodeInfo *nodes,
>      nodes[i].node_mem = size - usedmem;
>  }
>  
> -void parse_numa_opts(MachineState *ms)
> +void parse_numa_opts(MachineState *ms, uint64_t ram_slots)
>  {
>      int i;
>      MachineClass *mc = MACHINE_GET_CLASS(ms);
> +    QemuOptsList *numa_opts = qemu_find_opts("numa");
>  
> -    if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, ms, NULL)) {
> +    /*
> +     * If memory hotplug is enabled (slots > 0) but without '-numa'
> +     * options explicitly on CLI, guestes will break.
> +     *
> +     *   Windows: won't enable memory hotplug without SRAT table at all
> +     *
> +     *   Linux: if QEMU is started with initial memory all below 4Gb
> +     *   and no SRAT table present, guest kernel will use nommu DMA ops,
> +     *   which breaks 32bit hw drivers when memory is hotplugged and
> +     *   guest tries to use it with that drivers.
> +     *
> +     * Enable NUMA implicitly by adding a new NUMA node automatically.
> +     */
> +    if (ram_slots > 0 && QTAILQ_EMPTY(&numa_opts->head)) {
> +        if (mc->auto_enable_numa_with_memhp) {
> +            qemu_opts_parse_noisily(numa_opts, "node", true);
> +        }
> +    }
> +
> +    if (qemu_opts_foreach(numa_opts, parse_numa, ms, NULL)) {
>          exit(1);
>      }
>  
> diff --git a/vl.c b/vl.c
> index 9bb5058..d083b4d 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -4665,7 +4665,11 @@ int main(int argc, char **argv, char **envp)
>      default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
>      default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
>  
> -    parse_numa_opts(current_machine);
> +    current_machine->ram_size = ram_size;
> +    current_machine->maxram_size = maxram_size;
> +    current_machine->ram_slots = ram_slots;
> +
> +    parse_numa_opts(current_machine, ram_slots);
>  
>      if (qemu_opts_foreach(qemu_find_opts("mon"),
>                            mon_init_func, NULL, NULL)) {
> @@ -4710,9 +4714,6 @@ int main(int argc, char **argv, char **envp)
>      replay_checkpoint(CHECKPOINT_INIT);
>      qdev_machine_init();
>  
> -    current_machine->ram_size = ram_size;
> -    current_machine->maxram_size = maxram_size;
> -    current_machine->ram_slots = ram_slots;
>      current_machine->boot_order = boot_order;
>      current_machine->cpu_model = cpu_model;
it should be safe to move parse_numa_opts(current_machine) here

  




reply via email to

[Prev in Thread] Current Thread [Next in Thread]