[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v3] block/vxhs: Add Veritas HyperScale VxHS bloc
From: |
Jeff Cody |
Subject: |
Re: [Qemu-devel] [PATCH v3] block/vxhs: Add Veritas HyperScale VxHS block device support |
Date: |
Fri, 28 Oct 2016 15:03:50 -0400 |
User-agent: |
Mutt/1.5.24 (2015-08-30) |
On Fri, Oct 28, 2016 at 12:44:27AM -0700, Ashish Mittal wrote:
> This patch adds support for a new block device type called "vxhs".
> Source code for the qnio library that this code loads can be downloaded from:
> https://github.com/MittalAshish/libqnio.git
>
> Sample command line using the JSON syntax:
> ./qemu-system-x86_64 -name instance-00000008 -S -vnc 0.0.0.0:0 -k en-us
> -vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5
> -msg timestamp=on
> 'json:{"driver":"vxhs","vdisk_id":"{c3e9095a-a5ee-4dce-afeb-2a59fb387410}",
> "server":{"host":"172.172.17.4","port":"9999"}}'
>
> Sample command line using the URI syntax:
> qemu-img convert -f raw -O raw -n
> /var/lib/nova/instances/_base/0c5eacd5ebea5ed914b6a3e7b18f1ce734c386ad
> vxhs://192.168.0.1:9999/%7Bc6718f6b-0401-441d-a8c3-1f0064d75ee0%7D
>
> Signed-off-by: Ashish Mittal <address@hidden>
> ---
> v3 changelog:
> (1) Added QAPI schema for the VxHS driver.
>
> v2 changelog:
> (1) Changes done in response to v1 comments.
>
> TODO:
> (1) Add qemu-iotest
> (2) We need to be able to free all resources once we close the last vxhs
> drive.
>
> block/Makefile.objs | 2 +
> block/trace-events | 21 ++
> block/vxhs.c | 669
> +++++++++++++++++++++++++++++++++++++++++++++++++++
> configure | 41 ++++
> qapi/block-core.json | 20 +-
> 5 files changed, 751 insertions(+), 2 deletions(-)
> create mode 100644 block/vxhs.c
>
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index 67a036a..58313a2 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -18,6 +18,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
> block-obj-$(CONFIG_CURL) += curl.o
> block-obj-$(CONFIG_RBD) += rbd.o
> block-obj-$(CONFIG_GLUSTERFS) += gluster.o
> +block-obj-$(CONFIG_VXHS) += vxhs.o
> block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
> block-obj-$(CONFIG_LIBSSH2) += ssh.o
> block-obj-y += accounting.o dirty-bitmap.o
> @@ -38,6 +39,7 @@ rbd.o-cflags := $(RBD_CFLAGS)
> rbd.o-libs := $(RBD_LIBS)
> gluster.o-cflags := $(GLUSTERFS_CFLAGS)
> gluster.o-libs := $(GLUSTERFS_LIBS)
> +vxhs.o-libs := $(VXHS_LIBS)
> ssh.o-cflags := $(LIBSSH2_CFLAGS)
> ssh.o-libs := $(LIBSSH2_LIBS)
> archipelago.o-libs := $(ARCHIPELAGO_LIBS)
> diff --git a/block/trace-events b/block/trace-events
> index 05fa13c..94249ee 100644
> --- a/block/trace-events
> +++ b/block/trace-events
> @@ -114,3 +114,24 @@ qed_aio_write_data(void *s, void *acb, int ret, uint64_t
> offset, size_t len) "s
> qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len,
> uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
> qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len,
> uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
> qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len)
> "s %p acb %p ret %d offset %"PRIu64" len %zu"
> +
> +# block/vxhs.c
> +vxhs_iio_callback(int error, int reason) "ctx is NULL: error %d, reason %d"
> +vxhs_setup_qnio(void *s) "Context to HyperScale IO manager = %p"
> +vxhs_iio_callback_chnfail(int err, int error) "QNIO channel failed, no i/o
> %d, %d"
> +vxhs_iio_callback_unknwn(int opcode, int err) "unexpected opcode %d, errno
> %d"
> +vxhs_open_fail(int ret) "Could not open the device. Error = %d"
> +vxhs_open_epipe(int ret) "Could not create a pipe for device. Bailing out.
> Error=%d"
> +vxhs_aio_rw_invalid(int req) "Invalid I/O request iodir %d"
> +vxhs_aio_rw_ioerr(char *guid, int iodir, uint64_t size, uint64_t off, void
> *acb, int ret, int err) "IO ERROR (vDisk %s) FOR : Read/Write = %d size = %lu
> offset = %lu ACB = %p. Error = %d, errno = %d"
> +vxhs_get_vdisk_stat_err(char *guid, int ret, int err) "vDisk (%s) stat ioctl
> failed, ret = %d, errno = %d"
> +vxhs_get_vdisk_stat(char *vdisk_guid, uint64_t vdisk_size) "vDisk %s stat
> ioctl returned size %lu"
> +vxhs_qnio_iio_open(const char *ip) "Failed to connect to storage agent on
> host-ip %s"
> +vxhs_qnio_iio_devopen(const char *fname) "Failed to open vdisk device: %s"
> +vxhs_complete_aio(void *acb, uint64_t ret) "aio failed acb %p ret %ld"
> +vxhs_parse_uri_filename(const char *filename) "URI passed via
> bdrv_parse_filename %s"
> +vxhs_qemu_init_vdisk(const char *vdisk_id) "vdisk_id from json %s"
> +vxhs_parse_uri_hostinfo(int num, char *host, int port) "Host %d: IP %s, Port
> %d"
> +vxhs_qemu_init(char *of_vsa_addr, int port) "Adding host %s:%d to
> BDRVVXHSState"
> +vxhs_qemu_init_filename(const char *filename) "Filename passed as %s"
> +vxhs_close(char *vdisk_guid) "Closing vdisk %s"
> diff --git a/block/vxhs.c b/block/vxhs.c
> new file mode 100644
> index 0000000..08ad681
> --- /dev/null
> +++ b/block/vxhs.c
> @@ -0,0 +1,669 @@
> +/*
> + * QEMU Block driver for Veritas HyperScale (VxHS)
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "block/block_int.h"
> +#include <qnio/qnio_api.h>
> +#include "qapi/qmp/qerror.h"
> +#include "qapi/qmp/qdict.h"
> +#include "qapi/qmp/qstring.h"
> +#include "trace.h"
> +#include "qemu/uri.h"
> +#include "qapi/error.h"
> +#include "qemu/error-report.h"
> +
> +#define VDISK_FD_READ 0
> +#define VDISK_FD_WRITE 1
> +
> +#define VXHS_OPT_FILENAME "filename"
> +#define VXHS_OPT_VDISK_ID "vdisk_id"
> +#define VXHS_OPT_SERVER "server"
> +#define VXHS_OPT_HOST "host"
> +#define VXHS_OPT_PORT "port"
> +
> +/* qnio client ioapi_ctx */
> +static void *global_qnio_ctx;
This is still never freed anywhere, is it? Once all drives are done with
the global context, we need to free the resources allocated in it.
You'll need something like a ref counter to track usage of the global
instance, and then free the resources when the last drive is closed.
Coincidentally, there was very recently a patch on list to do a similar
thing for gluster. If you look at "block/gluster: memory usage: use one
glfs instance per volume", you can see the basic idea I am talking about
with regards to wrapping the global ctx in a struct, and tracking refs and
unrefs to it.
> +
> +/* vdisk prefix to pass to qnio */
> +static const char vdisk_prefix[] = "/dev/of/vdisk";
> +
> +typedef enum {
> + VDISK_AIO_READ,
> + VDISK_AIO_WRITE,
> + VDISK_STAT
> +} VDISKAIOCmd;
> +
> +/*
> + * HyperScale AIO callbacks structure
> + */
> +typedef struct VXHSAIOCB {
> + BlockAIOCB common;
> + int err;
> + int direction; /* IO direction (r/w) */
> + size_t io_offset;
> + size_t size;
> + QEMUIOVector *qiov;
> +} VXHSAIOCB;
> +
> +typedef struct VXHSvDiskHostsInfo {
> + int qnio_cfd; /* Channel FD */
> + int vdisk_rfd; /* vDisk remote FD */
> + char *hostip; /* Host's IP addresses */
> + int port; /* Host's port number */
> +} VXHSvDiskHostsInfo;
> +
> +/*
> + * Structure per vDisk maintained for state
> + */
> +typedef struct BDRVVXHSState {
> + int fds[2];
> + int64_t vdisk_size;
> + int event_reader_pos;
> + VXHSAIOCB *qnio_event_acb;
> + VXHSvDiskHostsInfo vdisk_hostinfo; /* Per host info */
> + char *vdisk_guid;
> +} BDRVVXHSState;
> +
> +static void vxhs_qnio_iio_close(BDRVVXHSState *s)
> +{
> + /*
> + * Close vDisk device
> + */
> + if (s->vdisk_hostinfo.vdisk_rfd >= 0) {
> + iio_devclose(global_qnio_ctx, 0, s->vdisk_hostinfo.vdisk_rfd);
> + s->vdisk_hostinfo.vdisk_rfd = -1;
> + }
> +
> + /*
> + * Close QNIO channel against cached channel-fd
> + */
> + if (s->vdisk_hostinfo.qnio_cfd >= 0) {
> + iio_close(global_qnio_ctx, s->vdisk_hostinfo.qnio_cfd);
> + s->vdisk_hostinfo.qnio_cfd = -1;
> + }
> +}
> +
> +static int vxhs_qnio_iio_open(int *cfd, const char *of_vsa_addr,
> + int *rfd, const char *file_name)
> +{
> + /*
> + * Open qnio channel to storage agent if not opened before.
> + */
> + if (*cfd < 0) {
> + *cfd = iio_open(global_qnio_ctx, of_vsa_addr, 0);
> + if (*cfd < 0) {
> + trace_vxhs_qnio_iio_open(of_vsa_addr);
> + return -ENODEV;
> + }
> + }
> +
> + /*
> + * Open vdisk device
> + */
> + *rfd = iio_devopen(global_qnio_ctx, *cfd, file_name, 0);
> + if (*rfd < 0) {
> + if (*cfd >= 0) {
> + iio_close(global_qnio_ctx, *cfd);
> + *cfd = -1;
> + *rfd = -1;
> + }
> +
> + trace_vxhs_qnio_iio_devopen(file_name);
> + return -ENODEV;
> + }
> +
> + return 0;
> +}
> +
> +static void vxhs_iio_callback(int32_t rfd, uint32_t reason, void *ctx,
> + uint32_t error, uint32_t opcode)
> +{
> + VXHSAIOCB *acb = NULL;
> + BDRVVXHSState *s = NULL;
> + ssize_t ret;
> +
> + switch (opcode) {
> + case IRP_READ_REQUEST:
> + case IRP_WRITE_REQUEST:
> +
> + /*
> + * ctx is VXHSAIOCB*
> + * ctx is NULL if error is QNIOERROR_CHANNEL_HUP or
> + * reason is IIO_REASON_HUP
> + */
> + if (ctx) {
> + acb = ctx;
> + s = acb->common.bs->opaque;
> + } else {
> + trace_vxhs_iio_callback(error, reason);
> + goto out;
> + }
> +
> + if (error) {
> + if (!acb->err) {
> + acb->err = error;
> + }
> + trace_vxhs_iio_callback(error, reason);
> + }
> +
> + ret = qemu_write_full(s->fds[VDISK_FD_WRITE], &acb, sizeof(acb));
> + g_assert(ret == sizeof(acb));
> + break;
> +
> + default:
> + if (error == QNIOERROR_CHANNEL_HUP) {
> + /*
> + * Channel failed, spontaneous notification,
> + * not in response to I/O
> + */
> + trace_vxhs_iio_callback_chnfail(error, errno);
> + } else {
> + trace_vxhs_iio_callback_unknwn(opcode, error);
> + }
> + break;
> + }
> +out:
> + return;
> +}
> +
> +static void vxhs_complete_aio(VXHSAIOCB *acb, BDRVVXHSState *s)
> +{
> + BlockCompletionFunc *cb = acb->common.cb;
> + void *opaque = acb->common.opaque;
> + int ret = 0;
> +
> + if (acb->err != 0) {
> + trace_vxhs_complete_aio(acb, acb->err);
> + /*
> + * We mask all the IO errors generically as EIO for upper layers
> + * Right now our IO Manager uses non standard error codes. Instead
> + * of confusing upper layers with incorrect interpretation we are
> + * doing this workaround.
> + */
> + ret = (-EIO);
> + }
> +
> + qemu_aio_unref(acb);
> + cb(opaque, ret);
> +}
> +
> +/*
> + * This is the HyperScale event handler registered to QEMU.
> + * It is invoked when any IO gets completed and written on pipe
> + * by callback called from QNIO thread context. Then it marks
> + * the AIO as completed, and releases HyperScale AIO callbacks.
> + */
> +static void vxhs_aio_event_reader(void *opaque)
> +{
> + BDRVVXHSState *s = opaque;
> + char *p;
> + ssize_t ret;
> +
> + do {
> + p = (char *)&s->qnio_event_acb;
> + ret = read(s->fds[VDISK_FD_READ], p + s->event_reader_pos,
> + sizeof(s->qnio_event_acb) - s->event_reader_pos);
> + if (ret > 0) {
> + s->event_reader_pos += ret;
> + if (s->event_reader_pos == sizeof(s->qnio_event_acb)) {
> + s->event_reader_pos = 0;
> + vxhs_complete_aio(s->qnio_event_acb, s);
> + }
> + }
> + } while (ret < 0 && errno == EINTR);
> +}
> +
> +/*
> + * Call QNIO operation to create channels to do IO on vDisk.
> + */
> +
> +static void *vxhs_setup_qnio(void)
> +{
> + void *qnio_ctx = NULL;
> +
> + qnio_ctx = iio_init(vxhs_iio_callback);
> + if (qnio_ctx != NULL) {
> + trace_vxhs_setup_qnio(qnio_ctx);
> + }
> + return qnio_ctx;
> +}
> +
> +static QemuOptsList runtime_opts = {
> + .name = "vxhs",
> + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
> + .desc = {
> + {
> + .name = VXHS_OPT_FILENAME,
> + .type = QEMU_OPT_STRING,
> + .help = "URI to the Veritas HyperScale image",
> + },
> + {
> + .name = VXHS_OPT_VDISK_ID,
> + .type = QEMU_OPT_STRING,
> + .help = "UUID of the VxHS vdisk",
> + },
> + { /* end of list */ }
> + },
> +};
> +
> +static QemuOptsList runtime_tcp_opts = {
> + .name = "vxhs_tcp",
> + .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
> + .desc = {
> + {
> + .name = VXHS_OPT_HOST,
> + .type = QEMU_OPT_STRING,
> + .help = "host address (ipv4 addresses)",
> + },
> + {
> + .name = VXHS_OPT_PORT,
> + .type = QEMU_OPT_NUMBER,
> + .help = "port number on which VxHSD is listening (default 9999)",
> + .def_value_str = "9999"
> + },
> + { /* end of list */ }
> + },
> +};
> +
> +/*
> + * Parse the incoming URI and populate *options with the host information.
> + * URI syntax has the limitation of supporting only one host info.
> + * To pass multiple host information, use the JSON syntax.
> + */
> +static int vxhs_parse_uri(const char *filename, QDict *options)
> +{
> + URI *uri = NULL;
> + char *hoststr, *portstr;
> + char *port;
> + int ret = 0;
> +
> + trace_vxhs_parse_uri_filename(filename);
> + uri = uri_parse(filename);
> + if (!uri || !uri->server || !uri->path) {
> + uri_free(uri);
> + return -EINVAL;
> + }
> +
> + hoststr = g_strdup(VXHS_OPT_SERVER".host");
> + qdict_put(options, hoststr, qstring_from_str(uri->server));
> + g_free(hoststr);
> +
> + portstr = g_strdup(VXHS_OPT_SERVER".port");
> + if (uri->port) {
> + port = g_strdup_printf("%d", uri->port);
> + qdict_put(options, portstr, qstring_from_str(port));
> + g_free(port);
> + }
> + g_free(portstr);
> +
> + if (strstr(uri->path, "vxhs") == NULL) {
> + qdict_put(options, "vdisk_id", qstring_from_str(uri->path));
> + }
> +
> + trace_vxhs_parse_uri_hostinfo(1, uri->server, uri->port);
> + uri_free(uri);
> +
> + return ret;
> +}
> +
> +static void vxhs_parse_filename(const char *filename, QDict *options,
> + Error **errp)
> +{
> + if (qdict_haskey(options, "vdisk_id") || qdict_haskey(options,
> "server")) {
> + error_setg(errp, "vdisk_id/server and a file name may not be
> specified "
> + "at the same time");
> + return;
> + }
> +
> + if (strstr(filename, "://")) {
> + int ret = vxhs_parse_uri(filename, options);
> + if (ret < 0) {
> + error_setg(errp, "Invalid URI. URI should be of the form "
> + " vxhs://<host_ip>:<port>/{<vdisk_id>}");
> + }
> + }
> +}
> +
> +static int vxhs_qemu_init(QDict *options, BDRVVXHSState *s,
> + int *cfd, int *rfd, Error **errp)
> +{
> + QDict *backing_options = NULL;
> + QemuOpts *opts, *tcp_opts;
> + const char *vxhs_filename;
> + char *of_vsa_addr = NULL;
> + Error *local_err = NULL;
> + const char *vdisk_id_opt;
> + const char *server_host_opt;
> + char *file_name = NULL;
> + char *str = NULL;
> + int ret = 0;
> +
> + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
> + qemu_opts_absorb_qdict(opts, options, &local_err);
> + if (local_err) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + vxhs_filename = qemu_opt_get(opts, VXHS_OPT_FILENAME);
> + if (vxhs_filename) {
> + trace_vxhs_qemu_init_filename(vxhs_filename);
> + }
> +
> + vdisk_id_opt = qemu_opt_get(opts, VXHS_OPT_VDISK_ID);
> + if (!vdisk_id_opt) {
> + error_setg(&local_err, QERR_MISSING_PARAMETER, VXHS_OPT_VDISK_ID);
> + ret = -EINVAL;
> + goto out;
> + }
> + s->vdisk_guid = g_strdup(vdisk_id_opt);
> + trace_vxhs_qemu_init_vdisk(vdisk_id_opt);
> +
> + str = g_strdup_printf(VXHS_OPT_SERVER".");
> + qdict_extract_subqdict(options, &backing_options, str);
> +
> + /* Create opts info from runtime_tcp_opts list */
> + tcp_opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
> + qemu_opts_absorb_qdict(tcp_opts, backing_options, &local_err);
> + if (local_err) {
> + qdict_del(backing_options, str);
> + qemu_opts_del(tcp_opts);
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + server_host_opt = qemu_opt_get(tcp_opts, VXHS_OPT_HOST);
> + if (!server_host_opt) {
> + error_setg(&local_err, QERR_MISSING_PARAMETER,
> + VXHS_OPT_SERVER"."VXHS_OPT_HOST);
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + s->vdisk_hostinfo.hostip = g_strdup(server_host_opt);
> +
> + s->vdisk_hostinfo.port = g_ascii_strtoll(qemu_opt_get(tcp_opts,
> + VXHS_OPT_PORT),
> + NULL, 0);
> +
> + s->vdisk_hostinfo.qnio_cfd = -1;
> + s->vdisk_hostinfo.vdisk_rfd = -1;
> + trace_vxhs_qemu_init(s->vdisk_hostinfo.hostip,
> + s->vdisk_hostinfo.port);
> +
> + qdict_del(backing_options, str);
> + qemu_opts_del(tcp_opts);
> +
> + file_name = g_strdup_printf("%s%s", vdisk_prefix, s->vdisk_guid);
> + of_vsa_addr = g_strdup_printf("of://%s:%d",
> + s->vdisk_hostinfo.hostip,
> + s->vdisk_hostinfo.port);
> +
> + /*
> + * .bdrv_open() and .bdrv_create() run under the QEMU global mutex.
> + */
> + if (global_qnio_ctx == NULL) {
> + global_qnio_ctx = vxhs_setup_qnio();
> + if (global_qnio_ctx == NULL) {
> + error_setg(&local_err, "Failed vxhs_setup_qnio");
> + ret = -EINVAL;
> + goto out;
> + }
> + }
> +
> + ret = vxhs_qnio_iio_open(cfd, of_vsa_addr, rfd, file_name);
> + if (ret) {
> + error_setg(&local_err, "Failed qnio_iio_open");
> + ret = -EIO;
> + }
> +
> +out:
> + g_free(str);
> + g_free(file_name);
> + g_free(of_vsa_addr);
> + qemu_opts_del(opts);
> +
> + if (ret < 0) {
> + error_propagate(errp, local_err);
> + g_free(s->vdisk_hostinfo.hostip);
> + g_free(s->vdisk_guid);
> + s->vdisk_guid = NULL;
> + errno = -ret;
> + }
> +
> + return ret;
> +}
> +
> +static int vxhs_open(BlockDriverState *bs, QDict *options,
> + int bdrv_flags, Error **errp)
> +{
> + BDRVVXHSState *s = bs->opaque;
> + AioContext *aio_context;
> + int qemu_qnio_cfd = -1;
> + int qemu_rfd = -1;
> + int ret = 0;
> +
> + ret = vxhs_qemu_init(options, s, &qemu_qnio_cfd, &qemu_rfd, errp);
> + if (ret < 0) {
> + trace_vxhs_open_fail(ret);
> + return ret;
> + }
> +
> + s->vdisk_hostinfo.qnio_cfd = qemu_qnio_cfd;
> + s->vdisk_hostinfo.vdisk_rfd = qemu_rfd;
> + s->vdisk_size = -1;
> +
> + /*
> + * Create a pipe for communicating between two threads in different
> + * context. Set handler for read event, which gets triggered when
> + * IO completion is done by non-QEMU context.
> + */
> + ret = qemu_pipe(s->fds);
> + if (ret < 0) {
> + trace_vxhs_open_epipe(ret);
> + ret = -errno;
> + goto errout;
> + }
> + fcntl(s->fds[VDISK_FD_READ], F_SETFL, O_NONBLOCK);
> +
> + aio_context = bdrv_get_aio_context(bs);
> + aio_set_fd_handler(aio_context, s->fds[VDISK_FD_READ],
> + false, vxhs_aio_event_reader, NULL, s);
> + return 0;
> +
> +errout:
> + /*
> + * Close remote vDisk device if it was opened earlier
> + */
> + vxhs_qnio_iio_close(s);
> + trace_vxhs_open_fail(ret);
> + return ret;
> +}
> +
> +static const AIOCBInfo vxhs_aiocb_info = {
> + .aiocb_size = sizeof(VXHSAIOCB)
> +};
> +
> +/*
> + * This allocates QEMU-VXHS callback for each IO
> + * and is passed to QNIO. When QNIO completes the work,
> + * it will be passed back through the callback.
> + */
> +static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num,
> + QEMUIOVector *qiov, int nb_sectors,
> + BlockCompletionFunc *cb, void *opaque, int
> iodir)
> +{
> + VXHSAIOCB *acb = NULL;
> + BDRVVXHSState *s = bs->opaque;
> + size_t size;
> + uint64_t offset;
> + int iio_flags = 0;
> + int ret = 0;
> + uint32_t rfd = s->vdisk_hostinfo.vdisk_rfd;
> +
> + offset = sector_num * BDRV_SECTOR_SIZE;
> + size = nb_sectors * BDRV_SECTOR_SIZE;
> + acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque);
> + /*
> + * Setup or initialize VXHSAIOCB.
> + * Every single field should be initialized since
> + * acb will be picked up from the slab without
> + * initializing with zero.
> + */
> + acb->io_offset = offset;
> + acb->size = size;
> + acb->err = 0;
> + acb->qiov = qiov;
> + acb->direction = iodir;
> +
> + iio_flags = (IIO_FLAG_DONE | IIO_FLAG_ASYNC);
> +
> + switch (iodir) {
> + case VDISK_AIO_WRITE:
> + ret = iio_writev(global_qnio_ctx, rfd, qiov->iov, qiov->niov,
> + offset, (uint64_t)size, (void *)acb, iio_flags);
> + break;
> + case VDISK_AIO_READ:
> + ret = iio_readv(global_qnio_ctx, rfd, qiov->iov, qiov->niov,
> + offset, (uint64_t)size, (void *)acb, iio_flags);
> + break;
> + default:
> + trace_vxhs_aio_rw_invalid(iodir);
> + goto errout;
> + }
> +
> + if (ret != 0) {
> + trace_vxhs_aio_rw_ioerr(s->vdisk_guid, iodir, size, offset,
> + acb, ret, errno);
> + goto errout;
> + }
> + return &acb->common;
> +
> +errout:
> + qemu_aio_unref(acb);
> + return NULL;
> +}
> +
> +static BlockAIOCB *vxhs_aio_readv(BlockDriverState *bs,
> + int64_t sector_num, QEMUIOVector *qiov,
> + int nb_sectors,
> + BlockCompletionFunc *cb, void *opaque)
> +{
> + return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
> + opaque, VDISK_AIO_READ);
> +}
> +
> +static BlockAIOCB *vxhs_aio_writev(BlockDriverState *bs,
> + int64_t sector_num, QEMUIOVector *qiov,
> + int nb_sectors,
> + BlockCompletionFunc *cb, void *opaque)
> +{
> + return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors,
> + cb, opaque, VDISK_AIO_WRITE);
> +}
> +
> +static void vxhs_close(BlockDriverState *bs)
> +{
> + BDRVVXHSState *s = bs->opaque;
> +
> + trace_vxhs_close(s->vdisk_guid);
> + close(s->fds[VDISK_FD_READ]);
> + close(s->fds[VDISK_FD_WRITE]);
> +
> + /*
> + * Clearing all the event handlers for oflame registered to QEMU
> + */
> + aio_set_fd_handler(bdrv_get_aio_context(bs), s->fds[VDISK_FD_READ],
> + false, NULL, NULL, NULL);
> + g_free(s->vdisk_guid);
> + s->vdisk_guid = NULL;
> + vxhs_qnio_iio_close(s);
> +
> + /*
> + * Free the dynamically allocated hostip string
> + */
> + g_free(s->vdisk_hostinfo.hostip);
> + s->vdisk_hostinfo.hostip = NULL;
> + s->vdisk_hostinfo.port = 0;
> +}
> +
> +static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s)
> +{
> + int64_t vdisk_size = -1;
> + int ret = 0;
> + uint32_t rfd = s->vdisk_hostinfo.vdisk_rfd;
> +
> + ret = iio_ioctl(global_qnio_ctx, rfd, IOR_VDISK_STAT, &vdisk_size, NULL,
> 0);
> + if (ret < 0) {
> + trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno);
> + return -EIO;
> + }
> +
> + trace_vxhs_get_vdisk_stat(s->vdisk_guid, vdisk_size);
> + return vdisk_size;
> +}
> +
> +/*
> + * Returns the size of vDisk in bytes. This is required
> + * by QEMU block upper block layer so that it is visible
> + * to guest.
> + */
> +static int64_t vxhs_getlength(BlockDriverState *bs)
> +{
> + BDRVVXHSState *s = bs->opaque;
> + int64_t vdisk_size;
> +
> + if (s->vdisk_size <= 0) {
> + vdisk_size = vxhs_get_vdisk_stat(s);
> + if (vdisk_size < 0) {
> + return -EIO;
> + }
> + s->vdisk_size = vdisk_size;
> + }
> +
> + return s->vdisk_size;
> +}
> +
> +static void vxhs_detach_aio_context(BlockDriverState *bs)
> +{
> + BDRVVXHSState *s = bs->opaque;
> +
> + aio_set_fd_handler(bdrv_get_aio_context(bs), s->fds[VDISK_FD_READ],
> + false, NULL, NULL, NULL);
> +
> +}
> +
> +static void vxhs_attach_aio_context(BlockDriverState *bs,
> + AioContext *new_context)
> +{
> + BDRVVXHSState *s = bs->opaque;
> +
> + aio_set_fd_handler(new_context, s->fds[VDISK_FD_READ],
> + false, vxhs_aio_event_reader, NULL, s);
> +}
> +
> +static BlockDriver bdrv_vxhs = {
> + .format_name = "vxhs",
> + .protocol_name = "vxhs",
> + .instance_size = sizeof(BDRVVXHSState),
> + .bdrv_file_open = vxhs_open,
> + .bdrv_parse_filename = vxhs_parse_filename,
> + .bdrv_close = vxhs_close,
> + .bdrv_getlength = vxhs_getlength,
> + .bdrv_aio_readv = vxhs_aio_readv,
> + .bdrv_aio_writev = vxhs_aio_writev,
> + .bdrv_detach_aio_context = vxhs_detach_aio_context,
> + .bdrv_attach_aio_context = vxhs_attach_aio_context,
> +};
> +
> +static void bdrv_vxhs_init(void)
> +{
> + bdrv_register(&bdrv_vxhs);
> +}
> +
> +block_init(bdrv_vxhs_init);
> diff --git a/configure b/configure
> index 8e10059..eeef5d6 100755
> --- a/configure
> +++ b/configure
> @@ -321,6 +321,7 @@ numa=""
> tcmalloc="no"
> jemalloc="no"
> replication="yes"
> +vxhs=""
>
> # parse CC options first
> for opt do
> @@ -1162,6 +1163,11 @@ for opt do
> ;;
> --enable-replication) replication="yes"
> ;;
> + --disable-vxhs) vxhs="no"
> + ;;
> + --enable-vxhs) vxhs="yes"
> + ;;
> +
> *)
> echo "ERROR: unknown option $opt"
> echo "Try '$0 --help' for more information"
> @@ -1394,6 +1400,7 @@ disabled with --disable-FEATURE, default is enabled if
> available:
> tcmalloc tcmalloc support
> jemalloc jemalloc support
> replication replication support
> + vxhs Veritas HyperScale vDisk backend support
>
> NOTE: The object files are built at the place where configure is launched
> EOF
> @@ -4677,6 +4684,33 @@ if do_cc -nostdlib -Wl,-r -Wl,--no-relax -o $TMPMO
> $TMPO; then
> fi
>
> ##########################################
> +# Veritas HyperScale block driver VxHS
> +# Check if libqnio is installed
> +
> +if test "$vxhs" != "no" ; then
> + cat > $TMPC <<EOF
> +#include <stdint.h>
> +#include <qnio/qnio_api.h>
> +
> +void *vxhs_callback;
> +
> +int main(void) {
> + iio_init(vxhs_callback);
> + return 0;
> +}
> +EOF
> + vxhs_libs="-lqnio"
> + if compile_prog "" "$vxhs_libs" ; then
> + vxhs=yes
> + else
> + if test "$vxhs" = "yes" ; then
> + feature_not_found "vxhs block device" "Install libqnio. See github"
> + fi
> + vxhs=no
> + fi
> +fi
> +
> +##########################################
> # End of CC checks
> # After here, no more $cc or $ld runs
>
> @@ -5042,6 +5076,7 @@ echo "tcmalloc support $tcmalloc"
> echo "jemalloc support $jemalloc"
> echo "avx2 optimization $avx2_opt"
> echo "replication support $replication"
> +echo "VxHS block device $vxhs"
>
> if test "$sdl_too_old" = "yes"; then
> echo "-> Your SDL version is too old - please upgrade to have SDL support"
> @@ -5650,6 +5685,12 @@ if test "$pthread_setname_np" = "yes" ; then
> echo "CONFIG_PTHREAD_SETNAME_NP=y" >> $config_host_mak
> fi
>
> +if test "$vxhs" = "yes" ; then
> + echo "CONFIG_VXHS=y" >> $config_host_mak
> + echo "VXHS_CFLAGS=$vxhs_cflags" >> $config_host_mak
> + echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak
> +fi
> +
> if test "$tcg_interpreter" = "yes"; then
> QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
> elif test "$ARCH" = "sparc64" ; then
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 97b1205..d5407a4 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -1708,7 +1708,7 @@
> ##
> { 'enum': 'BlockdevDriver',
> 'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
> - 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
> + 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', 'vxhs',
> 'host_device', 'http', 'https', 'luks', 'null-aio', 'null-co',
> 'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw',
> 'replication', 'tftp', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
> @@ -2220,6 +2220,21 @@
> 'data': { 'filename': 'str' } }
>
> ##
> +# @BlockdevOptionsVxHS
> +#
> +# Driver specific block device options for VxHS
> +#
> +# @vdisk_id: UUID of VxHS volume
> +#
> +# @server: vxhs server IP, port
> +#
> +# Since: 2.7
> +##
> +{ 'struct': 'BlockdevOptionsVxHS',
> + 'data': { 'vdisk_id': 'str',
> + 'server': 'InetSocketAddress' } }
> +
> +##
> # @BlockdevOptions
> #
> # Options for creating a block device. Many options are available for all
> @@ -2283,7 +2298,8 @@
> 'vhdx': 'BlockdevOptionsGenericFormat',
> 'vmdk': 'BlockdevOptionsGenericCOWFormat',
> 'vpc': 'BlockdevOptionsGenericFormat',
> - 'vvfat': 'BlockdevOptionsVVFAT'
> + 'vvfat': 'BlockdevOptionsVVFAT',
> + 'vxhs': 'BlockdevOptionsVxHS'
> } }
>
> ##
> --
> 2.5.5
>