qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] Re: [PATCH v2 1/9] msi: implemented msi.


From: Michael S. Tsirkin
Subject: [Qemu-devel] Re: [PATCH v2 1/9] msi: implemented msi.
Date: Wed, 8 Sep 2010 12:13:44 +0300
User-agent: Mutt/1.5.20 (2009-12-10)

On Wed, Sep 08, 2010 at 04:39:34PM +0900, Isaku Yamahata wrote:
> implemented msi support functions.
> 
> Signed-off-by: Isaku Yamahata <address@hidden>

Good stuff. Some minor corrections mostly to the comments.
It's up to you whether to address or ignore them.

> ---
> Changes v1 -> v2:
> - opencode some oneline helper function/macros for readability
> - use ffs where appropriate
> - rename some functions/variables as suggested.
> - added assert()
> - 1 -> 1U
> - clear INTx# when MSI is enabled
> - clear pending bits for freed vectors.
> - check the requested number of vectors.
> ---
>  Makefile.objs |    2 +-
>  hw/msi.c      |  360 
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/msi.h      |   41 +++++++
>  hw/pci.h      |   10 +-
>  4 files changed, 409 insertions(+), 4 deletions(-)
>  create mode 100644 hw/msi.c
>  create mode 100644 hw/msi.h
> 
> diff --git a/Makefile.objs b/Makefile.objs
> index 594894b..5f5a4c5 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -186,7 +186,7 @@ hw-obj-$(CONFIG_PIIX4) += piix4.o
>  # PCI watchdog devices
>  hw-obj-y += wdt_i6300esb.o
>  
> -hw-obj-y += msix.o
> +hw-obj-y += msix.o msi.o
>  
>  # PCI network cards
>  hw-obj-y += ne2000.o
> diff --git a/hw/msi.c b/hw/msi.c
> new file mode 100644
> index 0000000..32dc953
> --- /dev/null
> +++ b/hw/msi.c
> @@ -0,0 +1,360 @@
> +/*
> + * msi.c
> + *
> + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
> + *                    VA Linux Systems Japan K.K.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> +
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> +
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "msi.h"
> +
> +/* Eventually those constants should go to Linux pci_regs.h */
> +#define PCI_MSI_PENDING_32      0x10
> +#define PCI_MSI_PENDING_64      0x14
> +
> +/* PCI_MSI_ADDRESS_LO */
> +#define PCI_MSI_ADDRESS_LO_MASK         (~0x3)
> +
> +/* If we get rid of cap allocator, we won't need those. */
> +#define PCI_MSI_32_SIZEOF       0x0a
> +#define PCI_MSI_64_SIZEOF       0x0e
> +#define PCI_MSI_32M_SIZEOF      0x14
> +#define PCI_MSI_64M_SIZEOF      0x18
> +
> +/* If we get rid of cap allocator, we won't need this. */
> +static inline uint8_t msi_cap_sizeof(uint16_t flags)
> +{
> +    switch (flags & (PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT)) {
> +    case PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT:
> +        return PCI_MSI_64M_SIZEOF;
> +    case PCI_MSI_FLAGS_64BIT:
> +        return PCI_MSI_64_SIZEOF;
> +    case PCI_MSI_FLAGS_MASKBIT:
> +        return PCI_MSI_32M_SIZEOF;
> +    case 0:
> +        return PCI_MSI_32_SIZEOF;
> +    default:
> +        abort();
> +        break;
> +    }
> +    return 0;
> +}
> +
> +//#define MSI_DEBUG
> +
> +#ifdef MSI_DEBUG
> +# define MSI_DPRINTF(fmt, ...)                                          \
> +    fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
> +#else
> +# define MSI_DPRINTF(fmt, ...)  do { } while (0)
> +#endif
> +#define MSI_DEV_PRINTF(dev, fmt, ...)                                   \
> +    MSI_DPRINTF("%s:%x " fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
> +
> +static inline uint8_t msi_nr_vectors(uint16_t flags)
> +{
> +    return 1U <<
> +        ((flags & PCI_MSI_FLAGS_QSIZE) >> (ffs(PCI_MSI_FLAGS_QSIZE) - 1));
> +}
> +
> +static inline uint8_t msi_flags_off(const PCIDevice* dev)
> +{
> +    return dev->msi_cap + PCI_MSI_FLAGS;
> +}
> +
> +static inline uint8_t msi_address_lo_off(const PCIDevice* dev)
> +{
> +    return dev->msi_cap + PCI_MSI_ADDRESS_LO;
> +}
> +
> +static inline uint8_t msi_address_hi_off(const PCIDevice* dev)
> +{
> +    return dev->msi_cap + PCI_MSI_ADDRESS_HI;
> +}
> +
> +static inline uint8_t msi_data_off(const PCIDevice* dev, bool msi64bit)
> +{
> +    return dev->msi_cap + (msi64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32);
> +}
> +
> +static inline uint8_t msi_mask_off(const PCIDevice* dev, bool msi64bit)
> +{
> +    return dev->msi_cap + (msi64bit ? PCI_MSI_MASK_64 : PCI_MSI_MASK_32);
> +}
> +
> +static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
> +{
> +    return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : 
> PCI_MSI_PENDING_32);
> +}
> +
> +bool msi_enabled(const PCIDevice *dev)
> +{
> +    return msi_present(dev) &&
> +        (pci_get_word(dev->config + msi_flags_off(dev)) &
> +         PCI_MSI_FLAGS_ENABLE);
> +}
> +
> +int msi_init(struct PCIDevice *dev, uint8_t offset,
> +             uint8_t nr_vectors, bool msi64bit, bool msi_per_vector_mask)
> +{
> +    uint8_t vectors_order;
> +    uint16_t flags;
> +    uint8_t cap_size;
> +    int config_offset;
> +    MSI_DEV_PRINTF(dev,
> +                   "init offset: 0x%"PRIx8" vector: %"PRId8
> +                   " 64bit %d mask %d\n",
> +                   offset, nr_vectors, msi64bit, msi_per_vector_mask);
> +
> +    assert(!(nr_vectors & (nr_vectors - 1)));   /* power of 2 */
> +    assert(nr_vectors > 0);
> +    assert(nr_vectors <= 32);   /* the nr of MSI vectors is up to 32 */
> +    vectors_order = ffs(nr_vectors) - 1;
> +
> +    flags = (vectors_order << (ffs(PCI_MSI_FLAGS_QMASK) - 1)) &
> +        PCI_MSI_FLAGS_QMASK;

The & PCI_MSI_FLAGS_QMASK is not needed, is it?

> +    if (msi64bit) {
> +        flags |= PCI_MSI_FLAGS_64BIT;
> +    }
> +    if (msi_per_vector_mask) {
> +        flags |= PCI_MSI_FLAGS_MASKBIT;
> +    }
> +
> +    cap_size = msi_cap_sizeof(flags);
> +    config_offset = pci_add_capability(dev, PCI_CAP_ID_MSI, offset, 
> cap_size);
> +    if (config_offset < 0) {
> +        return config_offset;
> +    }
> +
> +    dev->msi_cap = config_offset;
> +    dev->cap_present |= QEMU_PCI_CAP_MSI;
> +
> +    pci_set_word(dev->config + msi_flags_off(dev), flags);
> +    pci_set_word(dev->wmask + msi_flags_off(dev),
> +                 PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
> +    pci_set_long(dev->wmask + msi_address_lo_off(dev),
> +                 PCI_MSI_ADDRESS_LO_MASK);
> +    if (msi64bit) {
> +        pci_set_long(dev->wmask + msi_address_hi_off(dev), 0xffffffff);
> +    }
> +    pci_set_word(dev->wmask + msi_data_off(dev, msi64bit), 0xffff);
> +
> +    if (msi_per_vector_mask) {
> +        pci_set_long(dev->wmask + msi_mask_off(dev, msi64bit),
> +                     (1U << nr_vectors) - 1);
> +    }
> +    return config_offset;
> +}
> +
> +void msi_uninit(struct PCIDevice *dev)
> +{
> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> +    uint8_t cap_size = msi_cap_sizeof(flags);
> +    pci_del_capability(dev, PCI_CAP_ID_MSIX, cap_size);
> +    MSI_DEV_PRINTF(dev, "uninit\n");
> +}
> +
> +void msi_reset(PCIDevice *dev)
> +{
> +    uint16_t flags;
> +    bool msi64bit;
> +
> +    flags = pci_get_word(dev->config + msi_flags_off(dev));
> +    flags &= ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
> +    msi64bit = flags & PCI_MSI_FLAGS_64BIT;
> +
> +    pci_set_word(dev->config + msi_flags_off(dev), flags);
> +    pci_set_long(dev->config + msi_address_lo_off(dev), 0);
> +    if (msi64bit) {
> +        pci_set_long(dev->config + msi_address_hi_off(dev), 0);
> +    }
> +    pci_set_word(dev->config + msi_data_off(dev, msi64bit), 0);
> +    if (flags & PCI_MSI_FLAGS_MASKBIT) {
> +        pci_set_long(dev->config + msi_mask_off(dev, msi64bit), 0);
> +        pci_set_long(dev->config + msi_pending_off(dev, msi64bit), 0);
> +    }
> +    MSI_DEV_PRINTF(dev, "reset\n");
> +}
> +
> +static bool msi_is_masked(const PCIDevice *dev, uint8_t vector)
> +{
> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> +    uint32_t mask;
> +
> +    if (!(flags & PCI_MSI_FLAGS_MASKBIT)) {
> +        return false;
> +    }
> +
> +    mask = pci_get_long(dev->config +
> +                        msi_mask_off(dev, flags & PCI_MSI_FLAGS_64BIT));
> +    return mask & (1U << vector);
> +}
> +
> +static void msi_set_pending(PCIDevice *dev, uint8_t vector)
> +{
> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
> +    uint32_t pending;
> +
> +    assert(flags & PCI_MSI_FLAGS_MASKBIT);
> +
> +    pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit));
> +    pending |= 1U << vector;
> +    pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending);
> +    MSI_DEV_PRINTF(dev, "pending vector 0x%"PRIx8"\n", vector);
> +}
> +
> +void msi_notify(PCIDevice *dev, uint8_t vector)
> +{
> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
> +    uint8_t nr_vectors = msi_nr_vectors(flags);
> +    uint64_t address;
> +    uint32_t data;
> +
> +    assert(vector < nr_vectors);
> +    if (msi_is_masked(dev, vector)) {
> +        msi_set_pending(dev, vector);
> +        return;
> +    }
> +
> +    if (msi64bit){
> +        address = pci_get_quad(dev->config + msi_address_lo_off(dev));
> +    } else {
> +        address = pci_get_long(dev->config + msi_address_lo_off(dev));
> +    }
> +
> +    /* upper bit 31:16 is zero */
> +    data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
> +    if (nr_vectors > 1) {
> +        data &= ~(nr_vectors - 1);
> +        data |= vector;
> +    }
> +
> +    MSI_DEV_PRINTF(dev,
> +                   "notify vector 0x%"PRIx8
> +                   " address: 0x%"PRIx64" data: 0x%"PRIx32"\n",
> +                   vector, address, data);
> +    stl_phys(address, data);
> +}
> +
> +/* call this function after updating configs by pci_default_write_config(). 
> */
> +void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len)
> +{
> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
> +    bool msi_per_vector_mask = flags & PCI_MSI_FLAGS_MASKBIT;
> +    uint8_t nr_vectors;
> +    uint8_t nr_vectors_capable;
> +    uint8_t vector;
> +    uint32_t pending;
> +    int i;
> +
> +#ifdef MSI_DEBUG
> +    if (ranges_overlap(addr, len, dev->msi_cap, msi_cap_sizeof(flags))) {
> +        MSI_DEV_PRINTF(dev, "addr 0x%"PRIx32" val 0x%"PRIx32" len %d\n",
> +                       addr, val, len);
> +        MSI_DEV_PRINTF(dev, "ctrl: 0x%"PRIx16" address: 0x%"PRIx32,
> +                       flags,
> +                       pci_get_long(dev->config + msi_address_lo_off(dev)));
> +        if (msi64bit) {
> +            fprintf(stderr, " addrss-hi: 0x%"PRIx32,
> +                    pci_get_long(dev->config + msi_address_hi_off(dev)));
> +        }
> +        fprintf(stderr, " data: 0x%"PRIx16,
> +                pci_get_word(dev->config + msi_data_off(dev, msi64bit)));
> +        if (flags & PCI_MSI_FLAGS_MASKBIT) {
> +            fprintf(stderr, " mask 0x%"PRIx32" pending 0x%"PRIx32,
> +                    pci_get_long(dev->config + msi_mask_off(dev, msi64bit)),
> +                    pci_get_long(dev->config + msi_pending_off(dev, 
> msi64bit)));
> +        }
> +        fprintf(stderr, "\n");
> +    }
> +#endif
> +
> +    /* do we modified? */

english:
do -> Are


> +    if (!(ranges_overlap(addr, len, msi_flags_off(dev), 2) ||
> +          (msi_per_vector_mask &&
> +           ranges_overlap(addr, len, msi_mask_off(dev, msi64bit), 4)))) {
> +        return;
> +    }
> +
> +    if (!(flags & PCI_MSI_FLAGS_ENABLE)) {
> +        return;
> +    }
> +
> +    /*
> +     * Now MSI is enabled, clear INTx# interrupts.
> +     * Guest is prohibited from enabling msi while interrupt is asserted.
> +     * But it can.

I think the above wording is misleading: the spec says that
the *driver* is prohibited from writing enable bit to mask
a service request. But the guest OS could do this.

>+      * So we just discard the interrupts as moderate fallback.
> +     *
> +     * 6.8.3.3. Enabling Operation
> +     *   While enabled for MSI or MSI-X operation, a function is prohibited
> +     *   from using its INTx# pin (if implemented) to request
> +     *   service (MSI, MSI-X, and INTx# are mutually exclusive).
> +     */
> +    for (i = 0; i < PCI_NUM_PINS; ++i) {
> +        qemu_set_irq(dev->irq[i], 0);
> +    }
> +
> +    /* nr_vectors might be set bigger than capable. So clamp it. */

Maybe add that this is not legal by spec, so we
can do anything we like, just don't crash the host.

> +    nr_vectors = msi_nr_vectors(flags);
> +    nr_vectors_capable = 1U <<
> +        ((flags & PCI_MSI_FLAGS_QMASK) >> (ffs(PCI_MSI_FLAGS_QMASK) - 1));
> +    if (nr_vectors > nr_vectors_capable) {
> +        uint8_t vectors_order;
> +        nr_vectors = nr_vectors_capable;
> +        vectors_order = ffs(nr_vectors) - 1;
> +
> +        flags &= ~PCI_MSI_FLAGS_QSIZE;
> +        flags |= (vectors_order << (ffs(PCI_MSI_FLAGS_QSIZE) - 1)) &
> +            PCI_MSI_FLAGS_QSIZE;
> +        pci_set_word(dev->config + msi_flags_off(dev), flags);
> +    }

We do the shift/ffs dance many times here.  This can be done easier
if we compare the log values:

log_num_vecs = (flags & PCI_MSI_FLAGS_QSIZE) >> (ffs(PCI_MSI_FLAGS_QSIZE) - 1);
log_max_vecs = (flags & PCI_MSI_FLAGS_QMASK) >> (ffs(PCI_MSI_FLAGS_QMASK) - 1);
if (log_num_vecs > log_max_vecs) {
        flags &= ~PCI_MSI_FLAGS_QSIZE;
        flags |= log_max_vecs << (ffs(PCI_MSI_FLAGS_QSIZE) - 1);
        pci_set_word(dev->config + msi_flags_off(dev), flags);
}

> +
> +    if (!msi_per_vector_mask) {
> +        /* if per vector masking isn't supported,
> +           there is no pending interrupt. */
> +        return;
> +    }
> +
> +    /*
> +     * If the number of vectors are reduced, clear pending bit for
> +     * freed vectors.
> +     * Since it is unknown here which interrupt will be used for pending bit
> +     * of freed vector, just discard the interrupt.

The last two lines seem to confuse more than they explain.
Replace with 'This will discard pending interrupts, if any.'?

> +     */
> +    pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit));
> +    pending &= (1U << pending) - 1;
> +    pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending);
> +
> +    /* deliver pending interrupts which are unmasked */
> +    for (vector = 0; vector < nr_vectors; ++vector) {
> +        if (msi_is_masked(dev, vector) || !(pending & (1U << vector))) {
> +            continue;
> +        }
> +
> +        pending &= ~(1U << vector);
> +        pci_set_long(dev->config + msi_pending_off(dev, msi64bit),
> +                     pending);
> +        msi_notify(dev, vector);
> +    }
> +}
> +
> +uint8_t msi_nr_vectors_allocated(const PCIDevice *dev)
> +{
> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> +    return msi_nr_vectors(flags);
> +}
> diff --git a/hw/msi.h b/hw/msi.h
> new file mode 100644
> index 0000000..eac9c78
> --- /dev/null
> +++ b/hw/msi.h
> @@ -0,0 +1,41 @@
> +/*
> + * msi.h
> + *
> + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
> + *                    VA Linux Systems Japan K.K.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> +
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> +
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef QEMU_MSI_H
> +#define QEMU_MSI_H
> +
> +#include "qemu-common.h"
> +#include "pci.h"
> +
> +bool msi_enabled(const PCIDevice *dev);
> +int msi_init(struct PCIDevice *dev, uint8_t offset,
> +             uint8_t nr_vectors, bool msi64bit, bool msi_per_vector_mask);
> +void msi_uninit(struct PCIDevice *dev);
> +void msi_reset(PCIDevice *dev);
> +void msi_notify(PCIDevice *dev, uint8_t vector);
> +void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len);
> +uint8_t msi_nr_vectors_allocated(const PCIDevice *dev);
> +
> +static inline bool msi_present(const PCIDevice *dev)
> +{
> +    return dev->cap_present & QEMU_PCI_CAP_MSI;
> +}
> +
> +#endif /* QEMU_MSI_H */
> diff --git a/hw/pci.h b/hw/pci.h
> index 2b4c318..296c7ba 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -109,11 +109,12 @@ typedef struct PCIIORegion {
>  
>  /* Bits in cap_present field. */
>  enum {
> -    QEMU_PCI_CAP_MSIX = 0x1,
> -    QEMU_PCI_CAP_EXPRESS = 0x2,
> +    QEMU_PCI_CAP_MSI = 0x1,
> +    QEMU_PCI_CAP_MSIX = 0x2,
> +    QEMU_PCI_CAP_EXPRESS = 0x4,
>  
>      /* multifunction capable device */
> -#define QEMU_PCI_CAP_MULTIFUNCTION_BITNR        2
> +#define QEMU_PCI_CAP_MULTIFUNCTION_BITNR        3
>      QEMU_PCI_CAP_MULTIFUNCTION = (1 << QEMU_PCI_CAP_MULTIFUNCTION_BITNR),
>  };
>  
> @@ -168,6 +169,9 @@ struct PCIDevice {
>      /* Version id needed for VMState */
>      int32_t version_id;
>  
> +    /* Offset of MSI capability in config space */
> +    uint8_t msi_cap;
> +
>      /* Location of option rom */
>      char *romfile;
>      ram_addr_t rom_offset;
> -- 
> 1.7.1.1



reply via email to

[Prev in Thread] Current Thread [Next in Thread]