qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH COLO-Frame v12 21/38] COLO failover: Introduce a


From: Markus Armbruster
Subject: Re: [Qemu-devel] [PATCH COLO-Frame v12 21/38] COLO failover: Introduce a new command to trigger a failover
Date: Sat, 19 Dec 2015 10:38:27 +0100
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/24.5 (gnu/linux)

zhanghailiang <address@hidden> writes:

> We leave users to choose whatever heartbeat solution they want, if the 
> heartbeat
> is lost, or other errors they detect, they can use experimental command
> 'x_colo_lost_heartbeat' to tell COLO to do failover, COLO will do operations
> accordingly.
>
> For example, if the command is sent to the PVM, the Primary side will
> exit COLO mode and take over operation. If sent to the Secondary, the
> secondary will run failover work, then take over server operation to
> become the new Primary.
>
> Cc: Luiz Capitulino <address@hidden>
> Cc: Eric Blake <address@hidden>
> Cc: Markus Armbruster <address@hidden>
> Signed-off-by: zhanghailiang <address@hidden>
> Signed-off-by: Li Zhijian <address@hidden>
> ---
> v11:
> - Add more comments for x-colo-lost-heartbeat command (Eric's suggestion)
> - Return 'enum' instead of 'int' for get_colo_mode() (Eric's suggestion)
> v10:
> - Rename command colo_lost_hearbeat to experimental 'x_colo_lost_heartbeat'
>
> Signed-off-by: zhanghailiang <address@hidden>
> ---
>  hmp-commands.hx              | 15 +++++++++++++++
>  hmp.c                        |  8 ++++++++
>  hmp.h                        |  1 +
>  include/migration/colo.h     |  3 +++
>  include/migration/failover.h | 20 ++++++++++++++++++++
>  migration/Makefile.objs      |  2 +-
>  migration/colo-comm.c        | 11 +++++++++++
>  migration/colo-failover.c    | 41 +++++++++++++++++++++++++++++++++++++++++
>  migration/colo.c             |  1 +
>  qapi-schema.json             | 29 +++++++++++++++++++++++++++++
>  qmp-commands.hx              | 19 +++++++++++++++++++
>  stubs/migration-colo.c       |  8 ++++++++
>  12 files changed, 157 insertions(+), 1 deletion(-)
>  create mode 100644 include/migration/failover.h
>  create mode 100644 migration/colo-failover.c
>
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index bb52e4d..a381b0b 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1039,6 +1039,21 @@ migration (or once already in postcopy).
>  ETEXI
>  
>      {
> +        .name       = "x_colo_lost_heartbeat",
> +        .args_type  = "",
> +        .params     = "",
> +        .help       = "Tell COLO that heartbeat is lost,\n\t\t\t"
> +                      "a failover or takeover is needed.",
> +        .mhandler.cmd = hmp_x_colo_lost_heartbeat,
> +    },
> +
> +STEXI
> address@hidden x_colo_lost_heartbeat
> address@hidden x_colo_lost_heartbeat
> +Tell COLO that heartbeat is lost, a failover or takeover is needed.
> +ETEXI
> +
> +    {
>          .name       = "client_migrate_info",
>          .args_type  = 
> "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
>          .params     = "protocol hostname port tls-port cert-subject",
> diff --git a/hmp.c b/hmp.c
> index ee87d38..dc6dc30 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -1310,6 +1310,14 @@ void hmp_migrate_start_postcopy(Monitor *mon, const 
> QDict *qdict)
>      hmp_handle_error(mon, &err);
>  }
>  
> +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict)
> +{
> +    Error *err = NULL;
> +
> +    qmp_x_colo_lost_heartbeat(&err);
> +    hmp_handle_error(mon, &err);
> +}
> +
>  void hmp_set_password(Monitor *mon, const QDict *qdict)
>  {
>      const char *protocol  = qdict_get_str(qdict, "protocol");
> diff --git a/hmp.h b/hmp.h
> index a8c5b5a..864a300 100644
> --- a/hmp.h
> +++ b/hmp.h
> @@ -70,6 +70,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
> *qdict);
>  void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
>  void hmp_client_migrate_info(Monitor *mon, const QDict *qdict);
>  void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict);
> +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict);
>  void hmp_set_password(Monitor *mon, const QDict *qdict);
>  void hmp_expire_password(Monitor *mon, const QDict *qdict);
>  void hmp_eject(Monitor *mon, const QDict *qdict);
> diff --git a/include/migration/colo.h b/include/migration/colo.h
> index 2676c4a..ba27719 100644
> --- a/include/migration/colo.h
> +++ b/include/migration/colo.h
> @@ -17,6 +17,7 @@
>  #include "migration/migration.h"
>  #include "qemu/coroutine_int.h"
>  #include "qemu/thread.h"
> +#include "qemu/main-loop.h"
>  
>  bool colo_supported(void);
>  void colo_info_mig_init(void);
> @@ -29,4 +30,6 @@ bool migration_incoming_enable_colo(void);
>  void migration_incoming_exit_colo(void);
>  void *colo_process_incoming_thread(void *opaque);
>  bool migration_incoming_in_colo_state(void);
> +
> +COLOMode get_colo_mode(void);
>  #endif
> diff --git a/include/migration/failover.h b/include/migration/failover.h
> new file mode 100644
> index 0000000..1785b52
> --- /dev/null
> +++ b/include/migration/failover.h
> @@ -0,0 +1,20 @@
> +/*
> + *  COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
> + *  (a.k.a. Fault Tolerance or Continuous Replication)
> + *
> + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD.
> + * Copyright (c) 2015 FUJITSU LIMITED
> + * Copyright (c) 2015 Intel Corporation
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef QEMU_FAILOVER_H
> +#define QEMU_FAILOVER_H
> +
> +#include "qemu-common.h"
> +
> +void failover_request_active(Error **errp);
> +
> +#endif
> diff --git a/migration/Makefile.objs b/migration/Makefile.objs
> index 81b5713..920d1e7 100644
> --- a/migration/Makefile.objs
> +++ b/migration/Makefile.objs
> @@ -1,6 +1,6 @@
>  common-obj-y += migration.o tcp.o
> -common-obj-$(CONFIG_COLO) += colo.o
>  common-obj-y += colo-comm.o
> +common-obj-$(CONFIG_COLO) += colo.o colo-failover.o
>  common-obj-y += vmstate.o
>  common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o 
> qemu-file-stdio.o
>  common-obj-y += xbzrle.o postcopy-ram.o
> diff --git a/migration/colo-comm.c b/migration/colo-comm.c
> index 30df3d3..58a6488 100644
> --- a/migration/colo-comm.c
> +++ b/migration/colo-comm.c
> @@ -20,6 +20,17 @@ typedef struct {
>  
>  static COLOInfo colo_info;
>  
> +COLOMode get_colo_mode(void)
> +{
> +    if (migration_in_colo_state()) {
> +        return COLO_MODE_PRIMARY;
> +    } else if (migration_incoming_in_colo_state()) {
> +        return COLO_MODE_SECONDARY;
> +    } else {
> +        return COLO_MODE_UNKNOWN;
> +    }
> +}
> +
>  static void colo_info_pre_save(void *opaque)
>  {
>      COLOInfo *s = opaque;
> diff --git a/migration/colo-failover.c b/migration/colo-failover.c
> new file mode 100644
> index 0000000..e3897c6
> --- /dev/null
> +++ b/migration/colo-failover.c
> @@ -0,0 +1,41 @@
> +/*
> + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
> + * (a.k.a. Fault Tolerance or Continuous Replication)
> + *
> + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO., LTD.
> + * Copyright (c) 2015 FUJITSU LIMITED
> + * Copyright (c) 2015 Intel Corporation
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#include "migration/colo.h"
> +#include "migration/failover.h"
> +#include "qmp-commands.h"
> +#include "qapi/qmp/qerror.h"
> +
> +static QEMUBH *failover_bh;
> +
> +static void colo_failover_bh(void *opaque)
> +{
> +    qemu_bh_delete(failover_bh);
> +    failover_bh = NULL;
> +    /*TODO: Do failover work */
> +}
> +
> +void failover_request_active(Error **errp)
> +{
> +    failover_bh = qemu_bh_new(colo_failover_bh, NULL);
> +    qemu_bh_schedule(failover_bh);
> +}
> +
> +void qmp_x_colo_lost_heartbeat(Error **errp)
> +{
> +    if (get_colo_mode() == COLO_MODE_UNKNOWN) {
> +        error_setg(errp, QERR_FEATURE_DISABLED, "colo");
> +        return;
> +    }
> +
> +    failover_request_active(errp);
> +}
> diff --git a/migration/colo.c b/migration/colo.c
> index ca5df44..7098497 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -17,6 +17,7 @@
>  #include "trace.h"
>  #include "qemu/error-report.h"
>  #include "qemu/sockets.h"
> +#include "migration/failover.h"
>  
>  /* colo buffer */
>  #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
> diff --git a/qapi-schema.json b/qapi-schema.json
> index a5699a7..feb7d53 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -761,6 +761,35 @@
>              'vmstate-send', 'vmstate-size','vmstate-received',
>              'vmstate-loaded' ] }
>  
> +##
> +# @COLOMode
> +#
> +# The colo mode

This is rather terse for an ignorant reader like me.

> +#
> +# @unknown: unknown mode

What does "unknown mode" mean, and how can it happen?

> +#
> +# @primary: master side
> +#
> +# @secondary: slave side
> +#
> +# Since: 2.6
> +##
> +{ 'enum': 'COLOMode',
> +  'data': [ 'unknown', 'primary', 'secondary'] }
> +
> +##
> +# @x-colo-lost-heartbeat
> +#
> +# Tell qemu that heartbeat is lost, request it to do takeover procedures.
> +# If this command is sent to the PVM, the Primary side will exit COLO mode.
> +# If sent to the Secondary, the Secondary side will run failover work,
> +# then takes over server operation to become the service VM.
> +#
> +# Since: 2.6
> +##
> +{ 'command': 'x-colo-lost-heartbeat' }
> +
> +##
>  # @MouseInfo:
>  #
>  # Information about a mouse device.
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index 89756c9..76ad208 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -805,6 +805,25 @@ Example:
>  EQMP
>  
>      {
> +        .name       = "x-colo-lost-heartbeat",
> +        .args_type  = "",
> +        .mhandler.cmd_new = qmp_marshal_x_colo_lost_heartbeat,
> +    },
> +
> +SQMP
> +x-colo-lost-heartbeat
> +--------------------
> +
> +Tell COLO that heartbeat is lost, a failover or takeover is needed.
> +
> +Example:
> +
> +-> { "execute": "x-colo-lost-heartbeat" }
> +<- { "return": {} }
> +
> +EQMP
> +
> +    {
>          .name       = "client_migrate_info",
>          .args_type  = 
> "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
>          .params     = "protocol hostname port tls-port cert-subject",
> diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c
> index c12516e..5028f63 100644
> --- a/stubs/migration-colo.c
> +++ b/stubs/migration-colo.c
> @@ -11,6 +11,7 @@
>   */
>  
>  #include "migration/colo.h"
> +#include "qmp-commands.h"
>  
>  bool colo_supported(void)
>  {
> @@ -35,3 +36,10 @@ void *colo_process_incoming_thread(void *opaque)
>  {
>      return NULL;
>  }
> +
> +void qmp_x_colo_lost_heartbeat(Error **errp)
> +{
> +    error_setg(errp, "COLO is not supported, please rerun configure"
> +                     " with --enable-colo option in order to support"
> +                     " COLO feature");
> +}



reply via email to

[Prev in Thread] Current Thread [Next in Thread]