qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH 05/13] add rcu library


From: Blue Swirl
Subject: Re: [Qemu-devel] [RFC PATCH 05/13] add rcu library
Date: Wed, 17 Aug 2011 17:04:21 +0000

On Mon, Aug 15, 2011 at 9:08 PM, Paolo Bonzini <address@hidden> wrote:
> This includes a (mangled) copy of the urcu-qsbr code from liburcu.
> The main changes are: 1) removing dependencies on many other header files
> in liburcu; 2) removing for simplicity the tentative busy waiting in
> synchronize_rcu, which has limited performance effects; 3) replacing
> futexes in synchronize_rcu with QemuEvents for Win32 portability.
> The API is the same as liburcu, so it should be possible in the future
> to require liburcu on POSIX systems for example and use our copy only
> on Windows.
>
> Among the various versions available I chose urcu-qsbr, which has the
> fastest rcu_read_{lock,unlock} but requires the program to manually
> annotate quiescent points or intervals.  QEMU threads usually have easily
> identified quiescent periods, so this should not be a problem.
>
> Signed-off-by: Paolo Bonzini <address@hidden>
> ---
>  Makefile.objs |    2 +-
>  qemu-queue.h  |   11 +++
>  rcu-pointer.h |  119 +++++++++++++++++++++++++++++++
>  rcu.c         |  221 
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  rcu.h         |  135 +++++++++++++++++++++++++++++++++++
>  5 files changed, 487 insertions(+), 1 deletions(-)
>  create mode 100644 rcu-pointer.h
>  create mode 100644 rcu.c
>  create mode 100644 rcu.h
>
> diff --git a/Makefile.objs b/Makefile.objs
> index 16eef38..902a083 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -6,7 +6,7 @@ qobject-obj-y += qerror.o error.o
>
>  #######################################################################
>  # oslib-obj-y is code depending on the OS (win32 vs posix)
> -oslib-obj-y = osdep.o
> +oslib-obj-y = osdep.o rcu.o
>  oslib-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o
>  oslib-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o
>
> diff --git a/qemu-queue.h b/qemu-queue.h
> index 1d07745..cc8e55b 100644
> --- a/qemu-queue.h
> +++ b/qemu-queue.h
> @@ -100,6 +100,17 @@ struct {                                                 
>                \
>         (head)->lh_first = NULL;                                        \
>  } while (/*CONSTCOND*/0)
>
> +#define QLIST_SWAP(dstlist, srclist, field) do {                        \
> +        void *tmplist;                                                  \
> +        tmplist = (srclist)->lh_first;                                  \
> +        (srclist)->lh_first = (dstlist)->lh_first;                      \
> +        if ((srclist)->lh_first != NULL)                                \
> +            (srclist)->lh_first->field.le_prev = &(srclist)->lh_first;  \
> +        (dstlist)->lh_first = tmplist; \
> +        if ((dstlist)->lh_first != NULL)                                \
> +            (dstlist)->lh_first->field.le_prev = &(dstlist)->lh_first;  \
> +} while (/*CONSTCOND*/0)
> +
>  #define QLIST_INSERT_AFTER(listelm, elm, field) do {                    \
>         if (((elm)->field.le_next = (listelm)->field.le_next) != NULL)  \
>                 (listelm)->field.le_next->field.le_prev =               \
> diff --git a/rcu-pointer.h b/rcu-pointer.h
> new file mode 100644
> index 0000000..4381313
> --- /dev/null
> +++ b/rcu-pointer.h
> @@ -0,0 +1,119 @@
> +#ifndef _URCU_POINTER_STATIC_H
> +#define _URCU_POINTER_STATIC_H
> +
> +/*
> + * urcu-pointer-static.h
> + *
> + * Userspace RCU header. Operations on pointers.
> + *
> + * TO BE INCLUDED ONLY IN LGPL-COMPATIBLE CODE. See urcu-pointer.h for
> + * linking dynamically with the userspace rcu library.
> + *
> + * Copyright (c) 2009 Mathieu Desnoyers <address@hidden>
> + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + *
> + * IBM's contributions to this file may be relicensed under LGPLv2 or later.
> + */
> +
> +#include "compiler.h"
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#ifdef __alpha__
> +#define smp_read_barrier_depends()   asm volatile("mb":::"memory")
> +#else
> +#define smp_read_barrier_depends()
> +#endif
> +
> +/**
> + * rcu_dereference - reads (copy) a RCU-protected pointer to a local variable
> + * into a RCU read-side critical section. The pointer can later be safely
> + * dereferenced within the critical section.
> + *
> + * This ensures that the pointer copy is invariant thorough the whole 
> critical
> + * section.
> + *
> + * Inserts memory barriers on architectures that require them (currently only
> + * Alpha) and documents which pointers are protected by RCU.
> + *
> + * The compiler memory barrier in ACCESS_ONCE() ensures that 
> value-speculative
> + * optimizations (e.g. VSS: Value Speculation Scheduling) does not perform 
> the
> + * data read before the pointer read by speculating the value of the pointer.
> + * Correct ordering is ensured because the pointer is read as a volatile 
> access.
> + * This acts as a global side-effect operation, which forbids reordering of
> + * dependent memory operations. Note that such concern about 
> dependency-breaking
> + * optimizations will eventually be taken care of by the 
> "memory_order_consume"
> + * addition to forthcoming C++ standard.
> + *
> + * Should match rcu_assign_pointer() or rcu_xchg_pointer().
> + */
> +
> +#define rcu_dereference(p)     ({                                      \
> +                               typeof(p) _________p1 = ACCESS_ONCE(p); \
> +                               smp_read_barrier_depends();             \
> +                               (_________p1);                          \
> +                               })
> +
> +/**
> + * rcu_cmpxchg_pointer - same as rcu_assign_pointer, but tests if the pointer
> + * is as expected by "old". If succeeds, returns the previous pointer to the
> + * data structure, which can be safely freed after waiting for a quiescent 
> state
> + * using synchronize_rcu(). If fails (unexpected value), returns old (which
> + * should not be freed !).
> + */
> +
> +#define rcu_cmpxchg_pointer(p, old, _new)                              \
> +       ({                                                              \
> +               typeof(*p) _________pold = (old);                       \
> +               typeof(*p) _________pnew = (_new);                      \
> +               if (!__builtin_constant_p(_new) ||                      \
> +                   ((_new) != NULL))                                   \
> +                       __sync_synchronize();                           \
> +               uatomic_cmpxchg(p, _________pold, _________pnew);       \
> +       })
> +
> +#define rcu_set_pointer(p, v)                          \
> +       ({                                              \
> +               typeof(*p) _________pv = (v);           \
> +               if (!__builtin_constant_p(v) ||         \
> +                   ((v) != NULL))                      \
> +                       __sync_synchronize();           \
> +               ACCESS_ONCE(*p) = _________pv;          \
> +       })
> +
> +/**
> + * rcu_assign_pointer - assign (publicize) a pointer to a new data structure
> + * meant to be read by RCU read-side critical sections. Returns the assigned
> + * value.
> + *
> + * Documents which pointers will be dereferenced by RCU read-side critical
> + * sections and adds the required memory barriers on architectures requiring
> + * them. It also makes sure the compiler does not reorder code initializing 
> the
> + * data structure before its publication.
> + *
> + * Should match rcu_dereference_pointer().
> + */
> +
> +#define rcu_assign_pointer(p, v)       rcu_set_pointer(&(p), v)
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _URCU_POINTER_STATIC_H */
> diff --git a/rcu.c b/rcu.c
> new file mode 100644
> index 0000000..e2f347a
> --- /dev/null
> +++ b/rcu.c
> @@ -0,0 +1,221 @@
> +/*
> + * urcu-qsbr.c
> + *
> + * Userspace RCU QSBR library
> + *
> + * Copyright (c) 2009 Mathieu Desnoyers <address@hidden>
> + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
> + * Copyright 2011 Red Hat, Inc.
> + *
> + * Ported to QEMU by Paolo Bonzini  <address@hidden>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + *
> + * IBM's contributions to this file may be relicensed under LGPLv2 or later.
> + */
> +
> +#include <stdio.h>
> +#include <assert.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <errno.h>
> +#include "rcu.h"
> +#include "qemu-barrier.h"
> +
> +/*
> + * Global grace period counter.  Bit 0 is one if the thread is online.
> + * Bits 1 and above are defined in synchronize_rcu/update_counter_and_wait.
> + */
> +#define RCU_GP_ONLINE           (1UL << 0)
> +#define RCU_GP_CTR              (1UL << 1)
> +
> +unsigned long rcu_gp_ctr = RCU_GP_ONLINE;
> +
> +QemuEvent rcu_gp_event;
> +QemuMutex rcu_gp_lock;
> +
> +/*
> + * Check whether a quiescent state was crossed between the beginning of
> + * update_counter_and_wait and now.
> + */
> +static inline int rcu_gp_ongoing(unsigned long *ctr)
> +{
> +       unsigned long v;
> +
> +       v = ACCESS_ONCE(*ctr);
> +       return v && (v != rcu_gp_ctr);
> +}
> +
> +/*
> + * Written to only by each individual reader. Read by both the reader and the
> + * writers.
> + */
> +__thread struct rcu_reader rcu_reader;
> +
> +typedef QLIST_HEAD(, rcu_reader) ThreadList;
> +static ThreadList registry = QLIST_HEAD_INITIALIZER(registry);
> +
> +static void update_counter_and_wait(void)
> +{
> +       ThreadList qsreaders = QLIST_HEAD_INITIALIZER(qsreaders);
> +       struct rcu_reader *index, *tmp;
> +
> +       if (sizeof (rcu_gp_ctr) <= 4) {
> +               /* Switch parity: 0 -> 1, 1 -> 0 */
> +               ACCESS_ONCE(rcu_gp_ctr) = rcu_gp_ctr ^ RCU_GP_CTR;
> +       } else {
> +               /* Increment current grace period.  */
> +               ACCESS_ONCE(rcu_gp_ctr) = rcu_gp_ctr + RCU_GP_CTR;
> +       }
> +
> +       barrier();
> +
> +       /*
> +        * Wait for each thread rcu_reader_qs_gp count to become 0.
> +        */
> +       for (;;) {
> +               /*
> +                * We want to be notified of changes made to rcu_gp_ongoing
> +                * while we walk the list.
> +                */
> +               qemu_event_reset(&rcu_gp_event);
> +               QLIST_FOREACH_SAFE(index, &registry, node, tmp) {
> +                       if (!rcu_gp_ongoing(&index->ctr)) {
> +                               QLIST_REMOVE(index, node);
> +                               QLIST_INSERT_HEAD(&qsreaders, index, node);
> +                       }
> +               }
> +
> +               if (QLIST_EMPTY(&registry)) {
> +                       break;
> +               }
> +
> +               /*
> +                * Wait for one thread to report a quiescent state and
> +                * try again.
> +                */
> +               qemu_event_wait(&rcu_gp_event);
> +       }
> +       smp_mb();
> +
> +       /* put back the reader list in the registry */
> +       QLIST_SWAP(&registry, &qsreaders, node);
> +}
> +
> +/*
> + * Using a two-subphases algorithm for architectures with smaller than 64-bit
> + * long-size to ensure we do not encounter an overflow bug.
> + */
> +
> +void synchronize_rcu(void)
> +{
> +       unsigned long was_online;
> +
> +       was_online = rcu_reader.ctr;
> +
> +       /* All threads should read qparity before accessing data structure
> +        * where new ptr points to.
> +        */
> +
> +       /*
> +        * Mark the writer thread offline to make sure we don't wait for
> +        * our own quiescent state. This allows using synchronize_rcu()
> +        * in threads registered as readers.
> +        *
> +        * Also includes a memory barrier.
> +        */
> +       if (was_online) {
> +               rcu_thread_offline();
> +       } else {
> +               smp_mb();
> +       }
> +
> +       qemu_mutex_lock(&rcu_gp_lock);
> +
> +       if (QLIST_EMPTY(&registry))
> +               goto out;
> +
> +       if (sizeof(rcu_gp_ctr) <= 4) {
> +               /*
> +                * Wait for previous parity to be empty of readers.
> +                */
> +               update_counter_and_wait();      /* 0 -> 1, wait readers in 
> parity 0 */
> +
> +               /*
> +                * Must finish waiting for quiescent state for parity 0 before
> +                * committing next rcu_gp_ctr update to memory. Failure to
> +                * do so could result in the writer waiting forever while new
> +                * readers are always accessing data (no progress).  Enforce
> +                * compiler-order of load rcu_reader ctr before store to
> +                * rcu_gp_ctr.
> +                */
> +               barrier();
> +
> +               /*
> +                * Adding a memory barrier which is _not_ formally required,
> +                * but makes the model easier to understand. It does not have 
> a
> +                * big performance impact anyway, given this is the 
> write-side.
> +                */
> +               smp_mb();
> +       }
> +
> +       /*
> +        * Wait for previous parity/grace period to be empty of readers.
> +        */
> +       update_counter_and_wait();      /* 1 -> 0, wait readers in parity 1 */
> +out:
> +       qemu_mutex_unlock(&rcu_gp_lock);
> +
> +       if (was_online) {
> +               /* Also includes a memory barrier.  */
> +               rcu_thread_online();
> +       } else {
> +               /*
> +                * Finish waiting for reader threads before letting the old
> +                * ptr being freed.
> +                */
> +               smp_mb();
> +       }
> +}
> +
> +static void rcu_init(void)
> +{
> +       qemu_mutex_init(&rcu_gp_lock);
> +       qemu_event_init(&rcu_gp_event, true);
> +}
> +
> +void rcu_register_thread(void)
> +{
> +       static QemuOnce init = QEMU_ONCE_INIT;
> +       assert(rcu_reader.ctr == 0);
> +
> +       qemu_once(&init, rcu_init);
> +       qemu_mutex_lock(&rcu_gp_lock);
> +       QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
> +       qemu_mutex_unlock(&rcu_gp_lock);
> +       rcu_thread_online();
> +}
> +
> +void rcu_unregister_thread(void)
> +{
> +       /*
> +        * We have to make the thread offline otherwise we end up dealocking
> +        * with a waiting writer.
> +        */
> +       rcu_thread_offline();
> +       qemu_mutex_lock(&rcu_gp_lock);
> +       QLIST_REMOVE(&rcu_reader, node);
> +       qemu_mutex_unlock(&rcu_gp_lock);
> +}
> diff --git a/rcu.h b/rcu.h
> new file mode 100644
> index 0000000..569f696
> --- /dev/null
> +++ b/rcu.h
> @@ -0,0 +1,135 @@
> +#ifndef _URCU_QSBR_H
> +#define _URCU_QSBR_H
> +
> +/*
> + * urcu-qsbr.h
> + *
> + * Userspace RCU QSBR header.
> + *
> + * LGPL-compatible code should include this header with :
> + *
> + * #define _LGPL_SOURCE
> + * #include <urcu.h>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + *
> +c* IBM's contributions to this file may be relicensed under LGPLv2 or later.
> + */
> +
> +#include <stdlib.h>
> +#include <assert.h>
> +#include <limits.h>
> +#include <unistd.h>
> +#include <stdint.h>
> +#include <stdbool.h>
> +
> +#include "compiler.h"
> +#include "rcu-pointer.h"
> +#include "qemu-thread.h"
> +#include "qemu-queue.h"
> +#include "qemu-barrier.h"
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/*
> + * Important !
> + *
> + * Each thread containing read-side critical sections must be registered
> + * with rcu_register_thread() before calling rcu_read_lock().
> + * rcu_unregister_thread() should be called before the thread exits.
> + */
> +
> +#ifdef DEBUG_RCU
> +#define rcu_assert(args...)    assert(args)
> +#else
> +#define rcu_assert(args...)
> +#endif
> +
> +#define RCU_GP_ONLINE          (1UL << 0)
> +#define RCU_GP_CTR             (1UL << 1)
> +
> +/*
> + * Global quiescent period counter with low-order bits unused.
> + * Using a int rather than a char to eliminate false register dependencies
> + * causing stalls on some architectures.
> + */
> +extern unsigned long rcu_gp_ctr;
> +
> +extern QemuEvent rcu_gp_event;
> +
> +struct rcu_reader {
> +       /* Data used by both reader and synchronize_rcu() */
> +       unsigned long ctr;
> +       /* Data used for registry */
> +       QLIST_ENTRY(rcu_reader) node;
> +};
> +
> +extern __thread struct rcu_reader rcu_reader;

$ cat thread.c
static __thread int sigusr1_wfd;
$ gcc -v
Reading specs from
/usr/bin/../lib/gcc-lib/sparc64-unknown-openbsd4.9/4.2.1/specs
Target: sparc64-unknown-openbsd4.9
Configured with: OpenBSD/sparc64 system compiler
Thread model: posix
gcc version 4.2.1 20070719
$ gcc -c -pthread thread.c
thread.c:1: error: thread-local storage not supported for this target

> +
> +static inline void rcu_read_lock(void)
> +{
> +       rcu_assert(rcu_reader.ctr);
> +}
> +
> +static inline void rcu_read_unlock(void)
> +{
> +}
> +
> +static inline void rcu_quiescent_state(void)
> +{
> +       smp_mb();
> +       ACCESS_ONCE(rcu_reader.ctr) = ACCESS_ONCE(rcu_gp_ctr);
> +#if 0
> +       /* write rcu_reader.ctr before read futex.  Included in
> +        * qemu_event_set. */
> +       smp_mb();
> +#endif
> +       qemu_event_set(&rcu_gp_event);
> +}
> +
> +static inline void rcu_thread_offline(void)
> +{
> +       smp_mb();
> +       ACCESS_ONCE(rcu_reader.ctr) = 0;
> +#if 0
> +       /* write rcu_reader.ctr before read futex.  Included in
> +        * qemu_event_set. */
> +       smp_mb();
> +#endif
> +       qemu_event_set(&rcu_gp_event);
> +}
> +
> +static inline void rcu_thread_online(void)
> +{
> +       smp_mb();       /* Ensure the compiler does not reorder us with mutex 
> */
> +       ACCESS_ONCE(rcu_reader.ctr) = ACCESS_ONCE(rcu_gp_ctr);
> +       smp_mb();
> +}
> +
> +extern void synchronize_rcu(void);
> +
> +/*
> + * Reader thread registration.
> + */
> +extern void rcu_register_thread(void);
> +extern void rcu_unregister_thread(void);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _URCU_QSBR_H */
> --
> 1.7.6
>
>
>
>



reply via email to

[Prev in Thread] Current Thread [Next in Thread]