[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
RE: [PATCH v6 4/7] igb: RX payload guest writting refactoring
|
From: |
Sriram Yagnaraman |
|
Subject: |
RE: [PATCH v6 4/7] igb: RX payload guest writting refactoring |
|
Date: |
Mon, 15 May 2023 11:39:25 +0000 |
> -----Original Message-----
> From: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>
> Sent: Friday, 12 May 2023 17:44
> To: qemu-devel@nongnu.org; akihiko.odaki@daynix.com
> Cc: Sriram Yagnaraman <sriram.yagnaraman@est.tech>;
> jasowang@redhat.com; k.kwiecien@samsung.com;
> m.sochacki@samsung.com
> Subject: [PATCH v6 4/7] igb: RX payload guest writting refactoring
>
> Refactoring is done in preparation for support of multiple advanced
> descriptors
> RX modes, especially packet-split modes.
>
> Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>
> ---
> hw/net/e1000e_core.c | 18 ++--
> hw/net/igb_core.c | 216 +++++++++++++++++++++++++--------------
> tests/qtest/libqos/igb.c | 5 +
> 3 files changed, 153 insertions(+), 86 deletions(-)
>
> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index
> b2e54fe802..f9ff31fd70 100644
> --- a/hw/net/e1000e_core.c
> +++ b/hw/net/e1000e_core.c
> @@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore
> *core, }
>
> static void
> -e1000e_write_to_rx_buffers(E1000ECore *core,
> - hwaddr ba[MAX_PS_BUFFERS],
> - e1000e_ba_state *bastate,
> - const char *data,
> - dma_addr_t data_len)
> +e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core,
> + hwaddr ba[MAX_PS_BUFFERS],
> + e1000e_ba_state *bastate,
> + const char *data,
> + dma_addr_t data_len)
> {
> while (data_len > 0) {
> uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx];
> @@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore
> *core, struct NetRxPkt *pkt,
> while (copy_size) {
> iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
>
> - e1000e_write_to_rx_buffers(core, ba, &bastate,
> - iov->iov_base + iov_ofs,
> iov_copy);
> + e1000e_write_payload_frag_to_rx_buffers(core, ba,
> &bastate,
> + iov->iov_base +
> + iov_ofs,
> + iov_copy);
>
> copy_size -= iov_copy;
> iov_ofs += iov_copy; @@ -1607,7 +1609,7 @@
> e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
>
> if (desc_offset + desc_size >= total_size) {
> /* Simulate FCS checksum presence in the last descriptor
> */
> - e1000e_write_to_rx_buffers(core, ba, &bastate,
> + e1000e_write_payload_frag_to_rx_buffers(core, ba,
> + &bastate,
> (const char *) &fcs_pad,
> e1000x_fcs_len(core->mac));
> }
> }
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> 774b34fc92..0eabe7106e 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -941,6 +941,14 @@ igb_has_rxbufs(IGBCore *core, const E1000ERingInfo
> *r, size_t total_size)
> bufsize;
> }
>
> +static uint32_t
> +igb_get_queue_rx_header_buf_size(IGBCore *core, const E1000ERingInfo
> +*r) {
Would be nice to have similar names for igb_rxbufsize and
igb_get_queue_rx_header_buf_size.
If we want to keep igb_rxbufsize due to its similarity with e1000e equivalent,
how about igb_rxhdrbufsize() for this new function?
> + uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
> + return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >>
> + E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; }
> +
> void
> igb_start_recv(IGBCore *core)
> {
> @@ -1231,6 +1239,21 @@ igb_read_adv_rx_descr(IGBCore *core, union
> e1000_adv_rx_desc *desc,
> *buff_addr = le64_to_cpu(desc->read.pkt_addr); }
>
> +typedef struct IGBPacketRxDMAState {
> + size_t size;
> + size_t total_size;
> + size_t ps_hdr_len;
> + size_t desc_size;
> + size_t desc_offset;
> + uint32_t rx_desc_packet_buf_size;
> + uint32_t rx_desc_header_buf_size;
> + struct iovec *iov;
> + size_t iov_ofs;
> + bool is_first;
> + uint16_t written;
> + hwaddr ba;
> +} IGBPacketRxDMAState;
> +
> static inline void
> igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
> hwaddr *buff_addr)
> @@ -1512,19 +1535,6 @@ igb_pci_dma_write_rx_desc(IGBCore *core,
> PCIDevice *dev, dma_addr_t addr,
> }
> }
>
> -static void
> -igb_write_to_rx_buffers(IGBCore *core,
> - PCIDevice *d,
> - hwaddr ba,
> - uint16_t *written,
> - const char *data,
> - dma_addr_t data_len)
> -{
> - trace_igb_rx_desc_buff_write(ba, *written, data, data_len);
> - pci_dma_write(d, ba + *written, data, data_len);
> - *written += data_len;
> -}
> -
> static void
> igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi,
> size_t pkt_size, size_t pkt_fcs_size) @@ -1550,6
> +1560,93 @@
> igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi)
> ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; }
>
> +static void
> +igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t
> +*size) {
> + if (*size > pdma_st->rx_desc_packet_buf_size) {
> + *size = pdma_st->rx_desc_packet_buf_size;
> + }
> +}
> +
> +static void
> +igb_write_payload_frag_to_rx_buffers(IGBCore *core,
> + PCIDevice *d,
> + hwaddr ba,
> + uint16_t *written,
> + uint32_t cur_buf_len,
> + const char *data,
> + dma_addr_t data_len) {
> + trace_igb_rx_desc_buff_write(ba, *written, data, data_len);
> + pci_dma_write(d, ba + *written, data, data_len);
> + *written += data_len;
> +}
> +
> +static void
> +igb_write_payload_to_rx_buffers(IGBCore *core,
> + struct NetRxPkt *pkt,
> + PCIDevice *d,
> + IGBPacketRxDMAState *pdma_st,
> + size_t *copy_size) {
> + static const uint32_t fcs_pad;
> + size_t iov_copy;
> +
> + /* Copy packet payload */
> + while (*copy_size) {
> + iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs);
> + igb_write_payload_frag_to_rx_buffers(core, d,
> + pdma_st->ba,
> + &pdma_st->written,
> +
> pdma_st->rx_desc_packet_buf_size,
> + pdma_st->iov->iov_base +
> + pdma_st->iov_ofs,
> + iov_copy);
> +
> + *copy_size -= iov_copy;
> + pdma_st->iov_ofs += iov_copy;
> + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) {
> + pdma_st->iov++;
> + pdma_st->iov_ofs = 0;
> + }
> + }
> +
> + if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) {
> + /* Simulate FCS checksum presence in the last descriptor */
> + igb_write_payload_frag_to_rx_buffers(core, d,
> + pdma_st->ba,
> + &pdma_st->written,
> +
> pdma_st->rx_desc_packet_buf_size,
> + (const char *) &fcs_pad,
> + e1000x_fcs_len(core->mac));
> + }
> +}
> +
> +static void
> +igb_write_to_rx_buffers(IGBCore *core,
> + struct NetRxPkt *pkt,
> + PCIDevice *d,
> + IGBPacketRxDMAState *pdma_st) {
> + size_t copy_size;
> +
> + if (!pdma_st->ba) {
> + /* as per intel docs; skip descriptors with null buf addr */
> + trace_e1000e_rx_null_descriptor();
> + return;
> + }
> +
> + if (pdma_st->desc_offset >= pdma_st->size) {
> + return;
> + }
> +
> + pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset;
> + igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size);
> + copy_size = pdma_st->size - pdma_st->desc_offset;
> + igb_truncate_to_descriptor_size(pdma_st, ©_size);
> + igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, ©_size);
> +}
> +
> static void
> igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
> const E1000E_RxRing *rxr, @@ -1559,91 +1656,54 @@
> igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
> PCIDevice *d;
> dma_addr_t base;
> union e1000_rx_desc_union desc;
> - size_t desc_size;
> - size_t desc_offset = 0;
> - size_t iov_ofs = 0;
> -
> - struct iovec *iov = net_rx_pkt_get_iovec(pkt);
> - size_t size = net_rx_pkt_get_total_len(pkt);
> - size_t total_size = size + e1000x_fcs_len(core->mac);
> - const E1000ERingInfo *rxi = rxr->i;
> - size_t bufsize = igb_rxbufsize(core, rxi);
> -
> + const E1000ERingInfo *rxi;
> + size_t rx_desc_len;
> +
> + IGBPacketRxDMAState pdma_st = {0};
> + pdma_st.is_first = true;
> + pdma_st.size = net_rx_pkt_get_total_len(pkt);
> + pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac);
> +
> + rxi = rxr->i;
> + rx_desc_len = core->rx_desc_len;
> + pdma_st.rx_desc_packet_buf_size =
> + igb_rxbufsize(core, rxi);
> + pdma_st.rx_desc_header_buf_size =
> + igb_get_queue_rx_header_buf_size(core, rxi);
> + pdma_st.iov = net_rx_pkt_get_iovec(pkt);
> d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8);
> if (!d) {
> d = core->owner;
> }
>
> do {
> - hwaddr ba;
> - uint16_t written = 0;
> + pdma_st.written = 0;
> bool is_last = false;
>
> - desc_size = total_size - desc_offset;
> -
> - if (desc_size > bufsize) {
> - desc_size = bufsize;
> - }
> -
> if (igb_ring_empty(core, rxi)) {
> return;
> }
>
> base = igb_ring_head_descr(core, rxi);
> + pci_dma_read(d, base, &desc, rx_desc_len);
> + trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len);
>
> - pci_dma_read(d, base, &desc, core->rx_desc_len);
> -
> - trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len);
> -
> - igb_read_rx_descr(core, &desc, &ba);
> -
> - if (ba) {
> - if (desc_offset < size) {
> - static const uint32_t fcs_pad;
> - size_t iov_copy;
> - size_t copy_size = size - desc_offset;
> - if (copy_size > bufsize) {
> - copy_size = bufsize;
> - }
> -
> - /* Copy packet payload */
> - while (copy_size) {
> - iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
> -
> - igb_write_to_rx_buffers(core, d, ba, &written,
> - iov->iov_base + iov_ofs,
> iov_copy);
> + igb_read_rx_descr(core, &desc, &pdma_st.ba);
>
> - copy_size -= iov_copy;
> - iov_ofs += iov_copy;
> - if (iov_ofs == iov->iov_len) {
> - iov++;
> - iov_ofs = 0;
> - }
> - }
> -
> - if (desc_offset + desc_size >= total_size) {
> - /* Simulate FCS checksum presence in the last descriptor
> */
> - igb_write_to_rx_buffers(core, d, ba, &written,
> - (const char *) &fcs_pad,
> e1000x_fcs_len(core->mac));
> - }
> - }
> - } else { /* as per intel docs; skip descriptors with null buf addr */
> - trace_e1000e_rx_null_descriptor();
> - }
> - desc_offset += desc_size;
> - if (desc_offset >= total_size) {
> + igb_write_to_rx_buffers(core, pkt, d, &pdma_st);
> + pdma_st.desc_offset += pdma_st.desc_size;
> + if (pdma_st.desc_offset >= pdma_st.total_size) {
> is_last = true;
> }
>
> igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL,
> - rss_info, etqf, ts, written);
> - igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len);
> -
> - igb_ring_advance(core, rxi, core->rx_desc_len /
> E1000_MIN_RX_DESC_LEN);
> -
> - } while (desc_offset < total_size);
> + rss_info, etqf, ts, pdma_st.written);
> + pci_dma_write(d, base, &desc, rx_desc_len);
> + igb_ring_advance(core, rxi,
> + rx_desc_len / E1000_MIN_RX_DESC_LEN);
> + } while (pdma_st.desc_offset < pdma_st.total_size);
>
> - igb_update_rx_stats(core, rxi, size, total_size);
> + igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size);
> }
>
> static bool
> diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c index
> a603468beb..f40c4ec4cd 100644
> --- a/tests/qtest/libqos/igb.c
> +++ b/tests/qtest/libqos/igb.c
> @@ -109,6 +109,11 @@ static void igb_pci_start_hw(QOSGraphObject *obj)
> E1000_RAH_AV | E1000_RAH_POOL_1 |
> le16_to_cpu(*(uint16_t *)(address + 4)));
>
> + /* Set supported receive descriptor mode */
> + e1000e_macreg_write(&d->e1000e,
> + E1000_SRRCTL(0),
> + E1000_SRRCTL_DESCTYPE_ADV_ONEBUF);
> +
> /* Enable receive */
> e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN);
> e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN);
> --
> 2.25.1