[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH] tests/vhost-user-bridge: implement logging of d
From: |
Michael S. Tsirkin |
Subject: |
Re: [Qemu-devel] [PATCH] tests/vhost-user-bridge: implement logging of dirty pages |
Date: |
Wed, 11 Nov 2015 17:28:00 +0200 |
On Wed, Nov 11, 2015 at 09:03:16AM +0200, Victor Kaplansky wrote:
> During migration devices continue writing to the guest's memory.
> These writes have to be reported to QEMU. The change implements
> minimal support in vhost-user-bridge required for successful
> migration of a guest with virtio-net device.
>
> Signed-off-by: Victor Kaplansky <address@hidden>
> ---
> tests/vhost-user-bridge.c | 168
> ++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 156 insertions(+), 12 deletions(-)
>
> diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
> index fa18ad5..8bab0bb 100644
> --- a/tests/vhost-user-bridge.c
> +++ b/tests/vhost-user-bridge.c
> @@ -173,6 +173,9 @@ typedef struct VubrVirtq {
> #define VHOST_MEMORY_MAX_NREGIONS 8
> #define VHOST_USER_F_PROTOCOL_FEATURES 30
>
> +typedef uint8_t vhost_log_chunk_t;
> +#define VHOST_LOG_PAGE 4096
> +
> enum VhostUserProtocolFeature {
> VHOST_USER_PROTOCOL_F_MQ = 0,
> VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
> @@ -265,8 +268,12 @@ typedef struct VubrDev {
> uint32_t nregions;
> VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> VubrVirtq vq[MAX_NR_VIRTQUEUE];
> + int log_call_fd;
> + uint64_t log_size;
> + vhost_log_chunk_t *log_table;
> int backend_udp_sock;
> struct sockaddr_in backend_udp_dest;
> + int ready;
> } VubrDev;
>
> static const char *vubr_request_str[] = {
> @@ -329,6 +336,27 @@ gpa_to_va(VubrDev *dev, uint64_t guest_addr)
> return 0;
> }
>
> +/* Translate our virtual address to guest physical address. */
You should never need this.
Looking at callers, that's when you used ring dirty.
And that does not need a translation: you
actually get the PA base for the used ring.
> +static uint64_t
> +va_to_gpa(VubrDev *dev, void *obj)
> +{
> + int i;
> + uint64_t va = (uint64_t) obj;
> +
> + /* Find matching memory region. */
> + for (i = 0; i < dev->nregions; i++) {
> + VubrDevRegion *r = &dev->regions[i];
> +
> + if ((va >= r->mmap_addr + r->mmap_offset) &&
> + (va < r->mmap_addr + r->mmap_offset + r->size)) {
> + return va - r->mmap_addr - r->mmap_offset + r->gpa;
> + }
> + }
> +
> + assert(!"address not found in regions");
> + return 0;
> +}
> +
> /* Translate qemu virtual address to our virtual address. */
> static uint64_t
> qva_to_va(VubrDev *dev, uint64_t qemu_addr)
> @@ -368,7 +396,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
>
> rc = recvmsg(conn_fd, &msg, 0);
>
> - if (rc <= 0) {
> + if (rc == 0) {
> + vubr_die("recvmsg");
> + fprintf(stderr, "Peer disconnected.\n");
> + exit(1);
> + }
> + if (rc < 0) {
> vubr_die("recvmsg");
> }
>
> @@ -395,7 +428,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
>
> if (vmsg->size) {
> rc = read(conn_fd, &vmsg->payload, vmsg->size);
> - if (rc <= 0) {
> + if (rc == 0) {
> + vubr_die("recvmsg");
> + fprintf(stderr, "Peer disconnected.\n");
> + exit(1);
> + }
> + if (rc < 0) {
> vubr_die("recvmsg");
> }
>
> @@ -465,6 +503,32 @@ vubr_virtqueue_kick(VubrVirtq *vq)
> }
> }
>
> +
> +static void
> +vubr_log_page(uint8_t *log_table, uint64_t page)
> +{
> + DPRINT("Logged dirty guest page: %"PRId64"\n", page);
> + log_table[page / 8] |= 1 << (page % 8);
> +}
> +
> +static void
> +vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
> +{
> + uint64_t page;
> +
> + if (!dev->log_table || !length) {
> + return;
> + }
> +
> + assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8));
> +
> + page = address / VHOST_LOG_PAGE;
> + while (page * VHOST_LOG_PAGE < address + length) {
> + vubr_log_page(dev->log_table, page);
> + page += VHOST_LOG_PAGE;
> + }
> +}
> +
> static void
> vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
> {
> @@ -510,6 +574,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t
> *buf, int32_t len)
>
> if (len <= chunk_len) {
> memcpy(chunk_start, buf, len);
> + vubr_log_write(dev, desc[i].addr, len);
> } else {
> fprintf(stderr,
> "Received too long packet from the backend. Dropping...\n");
> @@ -519,11 +584,14 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t
> *buf, int32_t len)
> /* Add descriptor to the used ring. */
> used->ring[u_index].id = d_index;
> used->ring[u_index].len = len;
> + vubr_log_write(dev, va_to_gpa(dev, &used->ring[u_index]),
> + sizeof(used->ring[u_index]));
>
> vq->last_avail_index++;
> vq->last_used_index++;
>
> atomic_mb_set(&used->idx, vq->last_used_index);
> + vubr_log_write(dev, va_to_gpa(dev, &used->idx), sizeof(used->idx));
>
> /* Kick the guest if necessary. */
> vubr_virtqueue_kick(vq);
> @@ -552,6 +620,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
> void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
> uint32_t chunk_len = desc[i].len;
>
> + assert(!(desc[i].flags & VRING_DESC_F_WRITE));
> +
> if (len + chunk_len < buf_size) {
> memcpy(buf + len, chunk_start, chunk_len);
> DPRINT("%d ", chunk_len);
> @@ -577,6 +647,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
> /* Add descriptor to the used ring. */
> used->ring[u_index].id = d_index;
> used->ring[u_index].len = len;
> + vubr_log_write(dev, va_to_gpa(dev, &used->ring[u_index]),
> + sizeof(used->ring[u_index]));
>
> vubr_consume_raw_packet(dev, buf, len);
>
> @@ -596,6 +668,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
> }
>
> atomic_mb_set(&used->idx, vq->last_used_index);
> + vubr_log_write(dev, va_to_gpa(dev, &used->idx), sizeof(used->idx));
> }
>
> static void
> @@ -609,6 +682,10 @@ vubr_backend_recv_cb(int sock, void *ctx)
> int buflen = sizeof(buf);
> int len;
>
> + if (!dev->ready) {
> + return;
> + }
> +
> DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
>
> uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
> @@ -656,9 +733,9 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
> {
> vmsg->payload.u64 =
> ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
> - (1ULL << VIRTIO_NET_F_CTRL_VQ) |
> - (1ULL << VIRTIO_NET_F_CTRL_RX) |
> - (1ULL << VHOST_F_LOG_ALL));
> + (1ULL << VHOST_F_LOG_ALL) |
> + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
> +
> vmsg->size = sizeof(vmsg->payload.u64);
>
> DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
> @@ -680,10 +757,27 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
> return 0;
> }
>
> +static void
> +vubr_close_log(VubrDev *dev)
> +{
> + if (dev->log_table) {
> + if (munmap(dev->log_table, dev->log_size) != 0) {
> + vubr_die("munmap()");
> + }
> +
> + dev->log_table = 0;
> + }
> + if (dev->log_call_fd != -1) {
> + close(dev->log_call_fd);
> + dev->log_call_fd = -1;
> + }
> +}
> +
> static int
> vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
> {
> - DPRINT("Function %s() not implemented yet.\n", __func__);
> + vubr_close_log(dev);
> + dev->ready = 0;
> return 0;
> }
>
> @@ -736,8 +830,39 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
> static int
> vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
> {
> - DPRINT("Function %s() not implemented yet.\n", __func__);
> - return 0;
> + int fd, i;
> + uint64_t last_addr = 0;
> + uint64_t log_size;
> + void *rc;
> +
> + assert(vmsg->fd_num == 1);
> + fd = vmsg->fds[0];
> +
> + /* Calculate size of vlog_table. Each entry is 32 bits. */
> + for (i = 0; i < dev->nregions; i++) {
> + VubrDevRegion *dev_region = &dev->regions[i];
> +
> + uint64_t last_region_addr = dev_region->gpa + dev_region->size;
> +
> + if (last_addr < last_region_addr) {
> + last_addr = last_region_addr;
> + }
> + }
> +
> + log_size = last_addr / (VHOST_LOG_PAGE * sizeof(vhost_log_chunk_t));
> + DPRINT("Largest guest address: 0x%016"PRIx64"\n", last_addr);
> + DPRINT("Log size: %"PRId64"\n", log_size);
> +
> + rc = mmap(0, log_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
> + if (rc == MAP_FAILED) {
> + vubr_die("mmap");
> + }
> + dev->log_table = (vhost_log_chunk_t *) rc;
> + dev->log_size = log_size;
> +
> + vmsg->size = sizeof(vmsg->payload.u64);
> + /* Reply */
> + return 1;
> }
>
> static int
> @@ -803,8 +928,14 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg
> *vmsg)
> static int
> vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
> {
> - DPRINT("Function %s() not implemented yet.\n", __func__);
> - return 0;
> + unsigned int index = vmsg->payload.state.index;
> +
> + DPRINT("State.index: %d\n", index);
> + vmsg->payload.state.num = dev->vq[index].last_avail_index;
> + vmsg->size = sizeof(vmsg->payload.state);
> +
> + /* reply */
> + return 1;
> }
>
> static int
> @@ -829,6 +960,10 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg
> *vmsg)
> DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
> dev->vq[index].kick_fd, index);
> }
> + if (dev->vq[0].kick_fd != -1 &&
> + dev->vq[1].kick_fd != -1) {
> + dev->ready = 1;
> + }
> return 0;
> }
>
> @@ -858,9 +993,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
> static int
> vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
> {
> - /* FIXME: unimplented */
> + vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
> DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
> - return 0;
> + vmsg->size = sizeof(vmsg->payload.u64);
> +
> + /* Reply */
> + return 1;
> }
>
> static int
> @@ -1012,6 +1150,12 @@ vubr_new(const char *path)
> };
> }
>
> + /* Init log */
> + dev->log_call_fd = -1;
> + dev->log_size = 0;
> + dev->log_table = 0;
> + dev->ready = 0;
> +
> /* Get a UNIX socket. */
> dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
> if (dev->sock == -1) {
> --
> --Victor