[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH] pseries: Add support for new KVM hash table con
From: |
Benjamin Herrenschmidt |
Subject: |
Re: [Qemu-devel] [PATCH] pseries: Add support for new KVM hash table control call |
Date: |
Wed, 27 Jun 2012 22:12:26 +1000 |
On Wed, 2012-06-27 at 22:10 +1000, Benjamin Herrenschmidt wrote:
> From: David Gibson <address@hidden>
>
> This adds support for then new "reset htab" ioctl which allows qemu
> to properly cleanup the MMU hash table when the guest is reset. With
> the corresponding kernel support, reset of a guest now works properly.
Forgot to mention ... this depends on a newer linux kvm.h from Avi's
-next branch, so don't apply this patch to qemu until kvm.h had the
update adding the definitions for KVM_CAP_PPC_ALLOC_HTAB and
KVM_PPC_ALLOCATE_HTAB.
Cheers,
Ben.
> This also paves the way for indicating a different size hash table
> to the kernel and for the kernel to be able to impose limits on
> the requested size.
>
> Signed-off-by: David Gibson <address@hidden>
> Signed-off-by: Benjamin Herrenschmidt <address@hidden>
> ---
> hw/spapr.c | 88
> ++++++++++++++++++++++++++++++++------------------
> hw/spapr.h | 2 +-
> target-ppc/kvm.c | 17 ++++++++++
> target-ppc/kvm_ppc.h | 7 ++++
> 4 files changed, 82 insertions(+), 32 deletions(-)
>
> diff --git a/hw/spapr.c b/hw/spapr.c
> index a6bc5e8..e19dbd8 100644
> --- a/hw/spapr.c
> +++ b/hw/spapr.c
> @@ -83,6 +83,8 @@
>
> #define PHANDLE_XICP 0x00001111
>
> +#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
> +
> sPAPREnvironment *spapr;
> static int spapr_has_graphics;
>
> @@ -111,12 +113,13 @@ qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t
> *irq_num,
> return qirq;
> }
>
> -static int spapr_set_associativity(void *fdt, sPAPREnvironment *spapr)
> +static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
> {
> int ret = 0, offset;
> CPUPPCState *env;
> char cpu_model[32];
> int smt = kvmppc_smt_threads();
> + uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
>
> assert(spapr->cpu_model);
>
> @@ -140,8 +143,16 @@ static int spapr_set_associativity(void *fdt,
> sPAPREnvironment *spapr)
> return offset;
> }
>
> - ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
> - sizeof(associativity));
> + if (nb_numa_nodes > 1) {
> + ret = fdt_setprop(fdt, offset, "ibm,associativity",
> associativity,
> + sizeof(associativity));
> + if (ret < 0) {
> + return ret;
> + }
> + }
> +
> + ret = fdt_setprop(fdt, offset, "ibm,pft-size",
> + pft_size_prop, sizeof(pft_size_prop));
> if (ret < 0) {
> return ret;
> }
> @@ -189,15 +200,13 @@ static void *spapr_create_fdt_skel(const char
> *cpu_model,
> target_phys_addr_t initrd_size,
> target_phys_addr_t kernel_size,
> const char *boot_device,
> - const char *kernel_cmdline,
> - long hash_shift)
> + const char *kernel_cmdline)
> {
> void *fdt;
> CPUPPCState *env;
> uint64_t mem_reg_property[2];
> uint32_t start_prop = cpu_to_be32(initrd_base);
> uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
> - uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
> char hypertas_prop[] =
> "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
> "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
> char qemu_hypertas_prop[] = "hcall-memop1";
> @@ -366,8 +375,6 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
> _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
> _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
> _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
> - _FDT((fdt_property(fdt, "ibm,pft-size",
> - pft_size_prop, sizeof(pft_size_prop))));
> _FDT((fdt_property_string(fdt, "status", "okay")));
> _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
>
> @@ -502,11 +509,9 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
> }
>
> /* Advertise NUMA via ibm,associativity */
> - if (nb_numa_nodes > 1) {
> - ret = spapr_set_associativity(fdt, spapr);
> - if (ret < 0) {
> - fprintf(stderr, "Couldn't set up NUMA device tree properties\n");
> - }
> + ret = spapr_fixup_cpu_dt(fdt, spapr);
> + if (ret < 0) {
> + fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
> }
>
> if (!spapr_has_graphics) {
> @@ -536,12 +541,34 @@ static void emulate_spapr_hypercall(CPUPPCState *env)
> env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
> }
>
> -static void spapr_reset(void *opaque)
> +static void spapr_reset_htab(void *opaque)
> {
> sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;
> + long shift;
> +
> + /* allocate hash page table. For now we always make this 16mb,
> + * later we should probably make it scale to the size of guest
> + * RAM */
> +
> + shift = kvmppc_reset_htab(spapr->htab_shift);
> +
> + if (shift > 0) {
> + /* Kernel handles htab, we don't need to allocate one */
> + spapr->htab_shift = shift;
> + } else {
> + if (!spapr->htab) {
> + /* Allocate an htab if we don't yet have one */
> + spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
> + }
> +
> + /* And clear it */
> + memset(spapr->htab, 0, HTAB_SIZE(spapr));
> + }
> +}
>
> - /* flush out the hash table */
> - memset(spapr->htab, 0, spapr->htab_size);
> +static void spapr_reset(void *opaque)
> +{
> + sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;
>
> /* Load the fdt */
> spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
> @@ -558,8 +585,16 @@ static void spapr_reset(void *opaque)
> static void spapr_cpu_reset(void *opaque)
> {
> PowerPCCPU *cpu = opaque;
> + CPUPPCState *env = &cpu->env;
>
> cpu_reset(CPU(cpu));
> +
> + env->external_htab = spapr->htab;
> + env->htab_base = -1;
> + env->htab_mask = HTAB_SIZE(spapr) - 1;
> +
> + env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
> + (spapr->htab_shift - 18);
> }
>
> static int spapr_vga_init(PCIBus *pci_bus)
> @@ -603,7 +638,6 @@ static void ppc_spapr_init(ram_addr_t ram_size,
> uint32_t initrd_base = 0;
> long kernel_size = 0, initrd_size = 0;
> long load_limit, rtas_limit, fw_size;
> - long pteg_shift = 17;
> char *filename;
>
> spapr = g_malloc0(sizeof(*spapr));
> @@ -632,6 +666,11 @@ static void ppc_spapr_init(ram_addr_t ram_size,
> spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
> load_limit = spapr->fdt_addr - FW_OVERHEAD;
>
> + /* For now, always aim for a 16MB hash table */
> + /* FIXME: we should change this default based on RAM size */
> + spapr->htab_shift = 24;
> + qemu_register_reset(spapr_reset_htab, spapr);
> +
> /* init CPUs */
> if (cpu_model == NULL) {
> cpu_model = kvm_enabled() ? "host" : "POWER7";
> @@ -664,20 +703,8 @@ static void ppc_spapr_init(ram_addr_t ram_size,
> memory_region_add_subregion(sysmem, nonrma_base, ram);
> }
>
> - /* allocate hash page table. For now we always make this 16mb,
> - * later we should probably make it scale to the size of guest
> - * RAM */
> - spapr->htab_size = 1ULL << (pteg_shift + 7);
> - spapr->htab = qemu_memalign(spapr->htab_size, spapr->htab_size);
> -
> for (env = first_cpu; env != NULL; env = env->next_cpu) {
> - env->external_htab = spapr->htab;
> - env->htab_base = -1;
> - env->htab_mask = spapr->htab_size - 1;
> -
> /* Tell KVM that we're in PAPR mode */
> - env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
> - ((pteg_shift + 7) - 18);
> env->spr[SPR_HIOR] = 0;
>
> if (kvm_enabled()) {
> @@ -816,8 +843,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
> spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size,
> initrd_base, initrd_size,
> kernel_size,
> - boot_device, kernel_cmdline,
> - pteg_shift + 7);
> + boot_device, kernel_cmdline);
> assert(spapr->fdt_skel != NULL);
>
> qemu_register_reset(spapr_reset, spapr);
> diff --git a/hw/spapr.h b/hw/spapr.h
> index 9153f29..7ec4d7c 100644
> --- a/hw/spapr.h
> +++ b/hw/spapr.h
> @@ -15,7 +15,7 @@ typedef struct sPAPREnvironment {
>
> target_phys_addr_t ram_limit;
> void *htab;
> - long htab_size;
> + long htab_shift;
> target_phys_addr_t fdt_addr, rtas_addr;
> long rtas_size;
> void *fdt_skel;
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index 829e180..12ae0d7 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -1101,6 +1101,23 @@ int kvmppc_remove_spapr_tce(void *table, int fd,
> uint32_t window_size)
> return 0;
> }
>
> +int kvmppc_reset_htab(int shift_hint)
> +{
> + uint32_t shift = shift_hint;
> +
> + if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
> + int ret;
> + ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
> + if (ret < 0) {
> + return ret;
> + }
> + return shift;
> + }
> +
> + /* For now.. */
> + return 0;
> +}
> +
> static inline uint32_t mfpvr(void)
> {
> uint32_t pvr;
> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
> index e2f8703..b5be657 100644
> --- a/target-ppc/kvm_ppc.h
> +++ b/target-ppc/kvm_ppc.h
> @@ -27,6 +27,7 @@ int kvmppc_smt_threads(void);
> off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem);
> void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int
> *pfd);
> int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
> +int kvmppc_reset_htab(int shift_hint);
> #endif /* !CONFIG_USER_ONLY */
> const ppc_def_t *kvmppc_host_cpu_def(void);
> int kvmppc_fixup_cpu(CPUPPCState *env);
> @@ -94,6 +95,12 @@ static inline int kvmppc_remove_spapr_tce(void *table, int
> pfd,
> {
> return -1;
> }
> +
> +static inline int kvmppc_reset_htab(int shift_hint)
> +{
> + return -1;
> +}
> +
> #endif /* !CONFIG_USER_ONLY */
>
> static inline const ppc_def_t *kvmppc_host_cpu_def(void)
>