qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v4 17/18] migration/rdma: send data for both rdma-pin-all and


From: Dr. David Alan Gilbert
Subject: Re: [PATCH v4 17/18] migration/rdma: send data for both rdma-pin-all and NOT rdma-pin-all mode
Date: Thu, 4 Feb 2021 10:18:36 +0000
User-agent: Mutt/1.14.6 (2020-07-11)

* Chuan Zheng (zhengchuan@huawei.com) wrote:
> Signed-off-by: Zhimin Feng <fengzhimin1@huawei.com>
> Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
> ---
>  migration/rdma.c | 65 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 61 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 2097839..c19a91f 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -2002,6 +2002,20 @@ static int qemu_rdma_write_one(QEMUFile *f, 
> RDMAContext *rdma,
>                                 .repeat = 1,
>                               };
>  
> +    /* use multifd to send data */
> +    if (migrate_use_multifd()) {
> +        int channel = get_multifd_RDMA_channel();
> +        int ret = 0;
> +        MultiFDSendParams *multifd_send_param = NULL;
> +        ret = get_multifd_send_param(channel, &multifd_send_param);
> +        if (ret) {
> +            error_report("rdma: error getting multifd_send_param(%d)", 
> channel);
> +            return -EINVAL;
> +        }
> +        rdma = (RDMAContext *)multifd_send_param->rdma;
> +        block = &(rdma->local_ram_blocks.block[current_index]);
> +    }
> +
>  retry:
>      sge.addr = (uintptr_t)(block->local_host_addr +
>                              (current_addr - block->offset));
> @@ -2197,6 +2211,27 @@ retry:
>      return 0;
>  }
>  
> +static int multifd_rdma_write_flush(void)
> +{
> +    /* The multifd RDMA threads send data */
> +    MultiFDSendParams *multifd_send_param = NULL;
> +    RDMAContext *rdma = NULL;
> +    MigrationState *s = migrate_get_current();
> +    int ret = 0;
> +
> +    ret = get_multifd_send_param(s->rdma_channel,
> +                                 &multifd_send_param);
> +    if (ret) {
> +        error_report("rdma: error getting multifd_send_param(%d)",
> +                     s->rdma_channel);

Do we need these error_report's for get_multifd_send_param calls - how
can they fail in practice?

> +        return ret;
> +    }
> +    rdma = (RDMAContext *)(multifd_send_param->rdma);
> +    rdma->nb_sent++;
> +
> +    return ret;

But this doesn't actually 'flush' anything?

> +}
> +
>  /*
>   * Push out any unwritten RDMA operations.
>   *
> @@ -2219,8 +2254,15 @@ static int qemu_rdma_write_flush(QEMUFile *f, 
> RDMAContext *rdma)
>      }
>  
>      if (ret == 0) {
> -        rdma->nb_sent++;
> -        trace_qemu_rdma_write_flush(rdma->nb_sent);
> +        if (migrate_use_multifd()) {
> +            ret = multifd_rdma_write_flush();
> +            if (ret) {
> +                return ret;
> +            }
> +        } else {
> +            rdma->nb_sent++;
> +            trace_qemu_rdma_write_flush(rdma->nb_sent);
> +        }
>      }
>  
>      rdma->current_length = 0;
> @@ -4062,6 +4104,7 @@ wait_reg_complete:
>              }
>  
>              qemu_sem_post(&multifd_send_param->sem_sync);
> +            qemu_sem_wait(&multifd_send_param->sem);

why?

>          }
>      }
>  
> @@ -4443,6 +4486,7 @@ static void *multifd_rdma_send_thread(void *opaque)
>      Error *local_err = NULL;
>      int ret = 0;
>      RDMAControlHeader head = { .len = 0, .repeat = 1 };
> +    RDMAContext *rdma = p->rdma;
>  
>      trace_multifd_send_thread_start(p->id);
>      if (multifd_send_initial_packet(p, &local_err) < 0) {
> @@ -4451,7 +4495,7 @@ static void *multifd_rdma_send_thread(void *opaque)
>  
>      /* wait for semaphore notification to register memory */
>      qemu_sem_wait(&p->sem_sync);
> -    if (qemu_rdma_registration(p->rdma) < 0) {
> +    if (qemu_rdma_registration(rdma) < 0) {
>          goto out;
>      }
>      /*
> @@ -4466,12 +4510,25 @@ static void *multifd_rdma_send_thread(void *opaque)
>                  break;
>              }
>          }
> +        /* To complete polling(CQE) */
> +        while (rdma->nb_sent) {

Where is nb_sent decremented?

> +            ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_RDMA_WRITE, NULL);
> +            if (ret < 0) {
> +                error_report("multifd RDMA migration: "
> +                             "complete polling error!");
> +                return NULL;
> +            }
> +        }
>          /* Send FINISHED to the destination */
>          head.type = RDMA_CONTROL_REGISTER_FINISHED;
> -        ret = qemu_rdma_exchange_send(p->rdma, &head, NULL, NULL, NULL, 
> NULL);
> +        ret = qemu_rdma_exchange_send(rdma, &head, NULL, NULL, NULL, NULL);
>          if (ret < 0) {
> +            error_report("multifd RDMA migration: "
> +                         "sending remote error!");
>              return NULL;
>          }
> +        /* sync main thread */
> +        qemu_sem_post(&p->sem);
>      }
>  
>  out:
> -- 
> 1.8.3.1
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK




reply via email to

[Prev in Thread] Current Thread [Next in Thread]