qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 5/5] sclp-s390: Add memory hotplug SCLPs


From: Alexander Graf
Subject: Re: [Qemu-devel] [PATCH 5/5] sclp-s390: Add memory hotplug SCLPs
Date: Mon, 16 Dec 2013 22:42:57 +0100

On 16.12.2013, at 21:51, Matthew Rosato <address@hidden> wrote:

> Add memory information to read SCP info and add handlers for
> Read Storage Element Information, Attach Storage Element,
> Assign Storage and Unassign Storage.
> 
> Signed-off-by: Matthew Rosato <address@hidden>
> ---
> hw/s390x/sclp.c         |  233 +++++++++++++++++++++++++++++++++++++++++++++--
> include/hw/s390x/sclp.h |    2 +
> 2 files changed, 229 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
> index cb53d7e..5d27fc3 100644
> --- a/hw/s390x/sclp.c
> +++ b/hw/s390x/sclp.c
> @@ -15,9 +15,15 @@
> #include "cpu.h"
> #include "sysemu/kvm.h"
> #include "exec/memory.h"
> -
> +#include "exec/address-spaces.h"
> #include "hw/s390x/sclp.h"
> 
> +int shift;
> +ram_addr_t standby_subregion_size;
> +ram_addr_t padded_ram_size;
> +ram_addr_t rzm;
> +char *standby_state_map;

Do you really need all these globals?

> +
> static inline S390SCLPDevice *get_event_facility(void)
> {
>     ObjectProperty *op = object_property_find(qdev_get_machine(),
> @@ -31,16 +37,215 @@ static inline S390SCLPDevice *get_event_facility(void)
> static void read_SCP_info(SCCB *sccb)
> {
>     ReadInfo *read_info = (ReadInfo *) sccb;
> -    int shift = 0;
> +    int rnsize, rnmax;
> +    int max_avail_slots = MAX_AVAIL_SLOTS;
> +#ifdef CONFIG_KVM
> +    max_avail_slots = kvm_check_extension(kvm_state, KVM_CAP_NR_MEMSLOTS);

Please don't call kvm specific code from non-kvm specific code. Check out 
kvm_s390_io_interrupt() as an example how to plumb it in. Please also check 
kvm_enabled(), as CONFIG_KVM doesn't mean that you actually end up using KVM. 
It's probably best to completely abstract this away in a helper.

> +#endif
> +
> +    read_info->facilities = 0;
> +
> +    if (standby_mem_size) {

I don't understand why we have 2 code paths - one for standby and one without. 
Can't we just handle all cases as standby?

> +        /*
> +         * The storage increment size is a multiple of 1M and is a power of 
> 2.
> +         * The number of storage increments must be 1020 or fewer.
> +         */
> +        while ((ram_size >> (20 + shift)) > 1020) {
> +            shift++;
> +        }
> +        while ((standby_mem_size >> (20 + shift)) > 1020) {
> +            shift++;
> +        }
> +
> +        standby_subregion_size = MEM_SECTION_SIZE;
> +        /* Deduct the memory slot already used for core */
> +        while ((standby_subregion_size * (max_avail_slots - 1)
> +               < standby_mem_size)) {
> +            standby_subregion_size = standby_subregion_size << 1;
> +        }
> +        /*
> +         * Initialize mapping of guest standby memory sections indicating 
> which
> +         * are and are not online. Assume all standby memory begins offline.
> +         */
> +        if (standby_mem_size % standby_subregion_size) {
> +            standby_state_map = g_malloc0((standby_mem_size /
> +                                           standby_subregion_size + 1) *
> +                                          (standby_subregion_size /
> +                                           MEM_SECTION_SIZE));
> +        } else {
> +            standby_state_map = g_malloc0(standby_mem_size / 
> MEM_SECTION_SIZE);
> +        }

You don't free this thing when the guest calls read_SCP_info twice.

> +
> +        padded_ram_size = ram_size + pad_size;
> +        rzm = 1 << (20 + shift);
> +
> +    } else {
> +        while ((ram_size >> (20 + shift)) > 65535) {
> +            shift++;
> +        }
> +    }
> 
> -    while ((ram_size >> (20 + shift)) > 65535) {
> -        shift++;
> +    rnsize = 1 << shift;
> +    if (rnsize <= 128) {
> +        read_info->rnsize = rnsize;
> +    } else {
> +        read_info->rnsize = 0;
> +        read_info->rnsize2 = cpu_to_be32(rnsize);
> +    }
> +
> +    rnmax = (ram_size + standby_mem_size + pad_size) >> (20 + shift);
> +    if (rnmax < 0x10000) {
> +        read_info->rnmax = cpu_to_be16(rnmax);
> +    } else {
> +        read_info->rnmax = cpu_to_be16(0);
> +        read_info->rnmax2 = cpu_to_be64(rnmax);
> +    }
> +
> +    if (standby_mem_size) {
> +        read_info->facilities |= 
> cpu_to_be64(SCLP_FC_ASSIGN_ATTACH_READ_STOR);

Any reason to make this conditional?

>     }
> -    read_info->rnmax = cpu_to_be16(ram_size >> (20 + shift));
> -    read_info->rnsize = 1 << shift;
>     sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
> }
> 
> +static void read_storage_element0_info(SCCB *sccb)
> +{
> +    int i, assigned;
> +    int subincrement_id = SCLP_STARTING_SUBINCREMENT_ID;
> +    ReadStorageElementInfo *storage_info = (ReadStorageElementInfo *) sccb;
> +
> +    if ((ram_size >> (20 + shift)) >= 0x10000) {
> +        sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION);
> +        return;
> +    }
> +
> +    storage_info->max_id = cpu_to_be16(standby_mem_size ? 1 : 0);
> +    assigned = ram_size >> (20 + shift);
> +    storage_info->assigned = cpu_to_be16(assigned);
> +
> +    for (i = 0; i < assigned; i++) {
> +        storage_info->entries[i] = cpu_to_be32(subincrement_id);
> +        subincrement_id += SCLP_INCREMENT_UNIT;
> +    }
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
> +}
> +
> +static void read_storage_element1_info(SCCB *sccb)
> +{
> +    ReadStorageElementInfo *storage_info = (ReadStorageElementInfo *) sccb;
> +
> +    if ((standby_mem_size >> (20 + shift)) >= 0x10000) {
> +        sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION);
> +        return;
> +    }
> +
> +    storage_info->max_id = cpu_to_be16(standby_mem_size ? 1 : 0);
> +    storage_info->assigned = cpu_to_be16(standby_mem_size >> (20 + shift));
> +    storage_info->standby = cpu_to_be16(standby_mem_size >> (20 + shift));
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_STANDBY_READ_COMPLETION);
> +}
> +
> +static void attach_storage_element(SCCB *sccb, uint16_t element)
> +{
> +    int i, assigned, subincrement_id;
> +    AttachStorageElement *attach_info = (AttachStorageElement *) sccb;
> +
> +    if (element != 1) {
> +        sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND);
> +        return;
> +    }
> +
> +    assigned = standby_mem_size >> (20 + shift);
> +    attach_info->assigned = cpu_to_be16(assigned);
> +    subincrement_id = ((ram_size >> (20 + shift)) << 16)
> +                      + SCLP_STARTING_SUBINCREMENT_ID;
> +    for (i = 0; i < assigned; i++) {
> +        attach_info->entries[i] = cpu_to_be32(subincrement_id);
> +        subincrement_id += SCLP_INCREMENT_UNIT;
> +    }
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
> +}
> +
> +static void assign_storage(SCCB *sccb)
> +{
> +    MemoryRegion *mr = NULL;
> +    int this_subregion_size;
> +    AssignStorage *assign_info = (AssignStorage *) sccb;
> +    ram_addr_t assign_addr = (assign_info->rn - 1) * rzm;
> +    MemoryRegion *sysmem = get_system_memory();
> +
> +    if ((assign_addr % MEM_SECTION_SIZE == 0) &&
> +        (assign_addr >= padded_ram_size)) {
> +        mr = memory_region_find(sysmem, assign_addr, 1).mr;
> +        if (!mr) {
> +
> +            MemoryRegion *standby_ram = g_new(MemoryRegion, 1);
> +
> +            /* offset to align to standby_subregion_size for allocation */
> +            ram_addr_t offset = assign_addr -
> +                                (assign_addr - padded_ram_size)
> +                                % standby_subregion_size;
> +
> +            /* strlen("standby.ram") + 4 (Max of KVM_MEMORY_SLOTS) +  NULL */
> +            char id[16];
> +            snprintf(id, 16, "standby.ram%d", (int)((offset - 
> padded_ram_size)
> +                     / standby_subregion_size) + 1);
> +
> +            /* Allocate a subregion of the calculated standby_subregion_size 
> */
> +            if (offset + standby_subregion_size >
> +                padded_ram_size + standby_mem_size) {
> +                this_subregion_size = padded_ram_size + standby_mem_size
> +                                      - offset;
> +            } else {
> +                this_subregion_size = standby_subregion_size;
> +            }
> +
> +            memory_region_init_ram(standby_ram, NULL, id, 
> this_subregion_size);
> +            vmstate_register_ram_global(standby_ram);
> +            memory_region_add_subregion(sysmem, offset, standby_ram);
> +        }
> +        standby_state_map[(assign_addr - padded_ram_size)
> +                          / MEM_SECTION_SIZE] = 1;
> +    }
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
> +}
> +
> +static void unassign_storage(SCCB *sccb)
> +{
> +    MemoryRegion *mr = NULL;
> +    AssignStorage *assign_info = (AssignStorage *) sccb;
> +    ram_addr_t unassign_addr = (assign_info->rn - 1) * rzm;
> +    MemoryRegion *sysmem = get_system_memory();
> +
> +    /* if the addr is a multiple of 256 MB */
> +    if ((unassign_addr % MEM_SECTION_SIZE == 0) &&
> +        (unassign_addr >= padded_ram_size)) {
> +        standby_state_map[(unassign_addr -
> +                           padded_ram_size) / MEM_SECTION_SIZE] = 0;
> +        mr = memory_region_find(sysmem, unassign_addr, 1).mr;
> +        if (mr) {
> +            int i;
> +            int is_removable = 1;
> +            ram_addr_t map_offset = (unassign_addr - padded_ram_size -
> +                                     (unassign_addr - padded_ram_size)
> +                                     % standby_subregion_size);
> +            for (i = 0;
> +                 i < (standby_subregion_size / MEM_SECTION_SIZE);
> +                 i++) {
> +
> +                if (standby_state_map[i + map_offset / MEM_SECTION_SIZE]) {
> +                    is_removable = 0;
> +                    break;
> +                }
> +            }
> +            if (is_removable) {
> +                memory_region_del_subregion(sysmem, mr);
> +                memory_region_destroy(mr);

no g_free()?

> +            }
> +        }
> +    }
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
> +}
> +
> static void sclp_execute(SCCB *sccb, uint64_t code)
> {
>     S390SCLPDevice *sdev = get_event_facility();
> @@ -50,6 +255,22 @@ static void sclp_execute(SCCB *sccb, uint64_t code)
>     case SCLP_CMDW_READ_SCP_INFO_FORCED:
>         read_SCP_info(sccb);
>         break;
> +    case SCLP_READ_STORAGE_ELEMENT_INFO:
> +        if (code & 0xff00) {
> +            read_storage_element1_info(sccb);
> +        } else {
> +            read_storage_element0_info(sccb);
> +        }
> +        break;
> +    case SCLP_ATTACH_STORAGE_ELEMENT:
> +        attach_storage_element(sccb, (code & 0xff00) >> 8);
> +        break;
> +    case SCLP_ASSIGN_STORAGE:
> +        assign_storage(sccb);
> +        break;
> +    case SCLP_UNASSIGN_STORAGE:
> +        unassign_storage(sccb);
> +        break;

Do you think it'd be possible to model this whole thing as a device that has 
its own state? That's where you could keep the bitmap for example. You'd only 
need some callback mechanism to hook into the SCLP calls, but the PPC guys 
already have something similar with their hypercalls.

Overall, the code could use _a lot_ more comments.


Alex




reply via email to

[Prev in Thread] Current Thread [Next in Thread]