[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v3 25/34] tests: add atomic_add-bench
From: |
Emilio G. Cota |
Subject: |
Re: [Qemu-devel] [PATCH v3 25/34] tests: add atomic_add-bench |
Date: |
Wed, 14 Sep 2016 22:23:47 -0400 |
User-agent: |
Mutt/1.5.23 (2014-03-12) |
On Wed, Sep 14, 2016 at 14:53:14 +0100, Alex Bennée wrote:
> Richard Henderson <address@hidden> writes:
> > From: "Emilio G. Cota" <address@hidden>
> > QEMU_CFLAGS += -I$(SRC_PATH)/tests
> > @@ -465,6 +466,7 @@ tests/test-qdist$(EXESUF): tests/test-qdist.o
> > $(test-util-obj-y)
> > tests/test-qht$(EXESUF): tests/test-qht.o $(test-util-obj-y)
> > tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF)
> > $(test-util-obj-y)
> > tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y)
> > +tests/atomic_add-bench$(EXESUF): tests/atomic_add-bench.o
> > $(test-util-obj-y)
>
> This probably more properly lives in tests/tcg/generic or some such but
> that needs the tcg/tests being rehabilitated into the build system so at
> least here it gets built.
I didn't know where to put it; tests/ was easy enough :-)
> > tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
> > hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\
> > diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
> > new file mode 100644
> > index 0000000..5bbecf6
> > --- /dev/null
> > +++ b/tests/atomic_add-bench.c
>
> I wonder if this would be worth making atomic-bench and adding the other
> atomic operations into the benchmark? I know given the current helper
> overhead its unlikely to show much difference between the ops but if we
> move to backend support for the tcg atomics it would be a useful tool to
> have.
I'd rather add more ops later if necessary, but if you insist I can do it.
(snip)
> > +static void create_threads(void)
> > +{
> > + unsigned int i;
> > +
> > + threads = g_new(QemuThread, n_threads);
> > + th_info = g_new(struct thread_info, n_threads);
> > + counts = qemu_memalign(64, sizeof(*counts) * range);
>
> This fails on my setup as AFAICT qemu_memalign doesn't give you zeroed
> memory. I added a memset after to zero it out.
Yes I fixed this more than a month ago, among other things in this program,
e.g., running for -d seconds instead of -n operations (much easier way to
fairly measure throughput).
Obviously forgot to tell anyone about it :/ sorry for making you waste time.
I'm appending the appropriate delta -- just checked it applies cleanly over
rth's atomic-3 branch on github.
Thanks,
Emilio
>From f4a1a6fe2ffcf9572353f0b85a21ed27cd1765e1 Mon Sep 17 00:00:00 2001
From: "Emilio G. Cota" <address@hidden>
Date: Tue, 9 Aug 2016 23:14:13 -0400
Subject: [PATCH] tests: fix atomic_add_bench
Signed-off-by: Emilio G. Cota <address@hidden>
---
tests/atomic_add-bench.c | 51 ++++++++++++++++--------------------------------
1 file changed, 17 insertions(+), 34 deletions(-)
diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
index 06300ba..dc97441 100644
--- a/tests/atomic_add-bench.c
+++ b/tests/atomic_add-bench.c
@@ -17,14 +17,14 @@ static struct thread_info *th_info;
static unsigned int n_threads = 1;
static unsigned int n_ready_threads;
static struct count *counts;
-static unsigned long n_ops = 10000;
-static double duration;
-static unsigned int range = 1;
+static unsigned int duration = 1;
+static unsigned int range = 1024;
static bool test_start;
+static bool test_stop;
static const char commands_string[] =
" -n = number of threads\n"
- " -o = number of ops per thread\n"
+ " -d = duration in seconds\n"
" -r = range (will be rounded up to pow2)";
static void usage_complete(char *argv[])
@@ -49,14 +49,13 @@ static uint64_t xorshift64star(uint64_t x)
static void *thread_func(void *arg)
{
struct thread_info *info = arg;
- unsigned long i;
atomic_inc(&n_ready_threads);
while (!atomic_mb_read(&test_start)) {
cpu_relax();
}
- for (i = 0; i < n_ops; i++) {
+ while (!atomic_read(&test_stop)) {
unsigned int index;
info->r = xorshift64star(info->r);
@@ -66,32 +65,23 @@ static void *thread_func(void *arg)
return NULL;
}
-static inline
-uint64_t ts_subtract(const struct timespec *a, const struct timespec *b)
-{
- uint64_t ns;
-
- ns = (b->tv_sec - a->tv_sec) * 1000000000ULL;
- ns += (b->tv_nsec - a->tv_nsec);
- return ns;
-}
-
static void run_test(void)
{
+ unsigned int remaining;
unsigned int i;
- struct timespec ts_start, ts_end;
while (atomic_read(&n_ready_threads) != n_threads) {
cpu_relax();
}
atomic_mb_set(&test_start, true);
+ do {
+ remaining = sleep(duration);
+ } while (remaining);
+ atomic_mb_set(&test_stop, true);
- clock_gettime(CLOCK_MONOTONIC, &ts_start);
for (i = 0; i < n_threads; i++) {
qemu_thread_join(&threads[i]);
}
- clock_gettime(CLOCK_MONOTONIC, &ts_end);
- duration = ts_subtract(&ts_start, &ts_end) / 1e9;
}
static void create_threads(void)
@@ -101,6 +91,7 @@ static void create_threads(void)
threads = g_new(QemuThread, n_threads);
th_info = g_new(struct thread_info, n_threads);
counts = qemu_memalign(64, sizeof(*counts) * range);
+ memset(counts, 0, sizeof(*counts) * range);
for (i = 0; i < n_threads; i++) {
struct thread_info *info = &th_info[i];
@@ -115,7 +106,7 @@ static void pr_params(void)
{
printf("Parameters:\n");
printf(" # of threads: %u\n", n_threads);
- printf(" n_ops: %lu\n", n_ops);
+ printf(" duration: %u\n", duration);
printf(" ops' range: %u\n", range);
}
@@ -128,22 +119,20 @@ static void pr_stats(void)
for (i = 0; i < range; i++) {
val += counts[i].val;
}
- assert(val == n_threads * n_ops);
tx = val / duration / 1e6;
printf("Results:\n");
- printf("Duration: %.2f s\n", duration);
+ printf("Duration: %u s\n", duration);
printf(" Throughput: %.2f Mops/s\n", tx);
printf(" Throughput/thread: %.2f Mops/s/thread\n", tx / n_threads);
}
static void parse_args(int argc, char *argv[])
{
- unsigned long long n_ops_ull;
int c;
for (;;) {
- c = getopt(argc, argv, "hn:o:r:");
+ c = getopt(argc, argv, "hd:n:r:");
if (c < 0) {
break;
}
@@ -151,18 +140,12 @@ static void parse_args(int argc, char *argv[])
case 'h':
usage_complete(argv);
exit(0);
+ case 'd':
+ duration = atoi(optarg);
+ break;
case 'n':
n_threads = atoi(optarg);
break;
- case 'o':
- n_ops_ull = atoll(optarg);
- if (n_ops_ull > ULONG_MAX) {
- fprintf(stderr,
- "fatal: -o cannot be greater than %lu\n", ULONG_MAX);
- exit(1);
- }
- n_ops = n_ops_ull;
- break;
case 'r':
range = pow2ceil(atoi(optarg));
break;
--
2.5.0
- Re: [Qemu-devel] [PATCH v3 13/34] tcg: Add atomic helpers, (continued)
[Qemu-devel] [PATCH v3 19/34] target-i386: emulate LOCK'ed NOT using atomic helper, Richard Henderson, 2016/09/03
[Qemu-devel] [PATCH v3 15/34] tcg: Add CONFIG_ATOMIC64, Richard Henderson, 2016/09/03
[Qemu-devel] [PATCH v3 24/34] target-i386: remove helper_lock(), Richard Henderson, 2016/09/03
[Qemu-devel] [PATCH v3 21/34] target-i386: emulate LOCK'ed XADD using atomic helper, Richard Henderson, 2016/09/03
[Qemu-devel] [PATCH v3 25/34] tests: add atomic_add-bench, Richard Henderson, 2016/09/03
[Qemu-devel] [PATCH v3 23/34] target-i386: emulate XCHG using atomic helper, Richard Henderson, 2016/09/03
[Qemu-devel] [PATCH v3 20/34] target-i386: emulate LOCK'ed NEG using cmpxchg helper, Richard Henderson, 2016/09/03
[Qemu-devel] [PATCH v3 22/34] target-i386: emulate LOCK'ed BTX ops using atomic helpers, Richard Henderson, 2016/09/03
[Qemu-devel] [PATCH v3 27/34] target-arm: emulate LL/SC using cmpxchg helpers, Richard Henderson, 2016/09/03
[Qemu-devel] [PATCH v3 26/34] target-arm: Rearrange aa32 load and store functions, Richard Henderson, 2016/09/03
[Qemu-devel] [PATCH v3 32/34] target-arm: remove EXCP_STREX + cpu_exclusive_{test, info}, Richard Henderson, 2016/09/03