[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCHv2 4/9] bitops: use vector algorithm to optimize find
From: |
Peter Lieven |
Subject: |
[Qemu-devel] [PATCHv2 4/9] bitops: use vector algorithm to optimize find_next_bit() |
Date: |
Fri, 15 Mar 2013 16:50:13 +0100 |
this patch adds the usage of buffer_find_nonzero_offset()
to skip large areas of zeroes.
compared to loop unrolling presented in an earlier
patch this adds another 50% performance benefit for
skipping large areas of zeroes. loop unrolling alone
added close to 100% speedup.
Signed-off-by: Peter Lieven <address@hidden>
---
util/bitops.c | 26 +++++++++++++++++++++++---
1 file changed, 23 insertions(+), 3 deletions(-)
diff --git a/util/bitops.c b/util/bitops.c
index e72237a..3c301fa 100644
--- a/util/bitops.c
+++ b/util/bitops.c
@@ -42,10 +42,30 @@ unsigned long find_next_bit(const unsigned long *addr,
unsigned long size,
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
- while (size & ~(BITS_PER_LONG-1)) {
- if ((tmp = *(p++))) {
- goto found_middle;
+ while (size >= BITS_PER_LONG) {
+ if ((tmp = *p)) {
+ goto found_middle;
+ }
+ if (((uintptr_t) p) % sizeof(VECTYPE) == 0
+ && size >= BITS_PER_BYTE * sizeof(VECTYPE)
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
+ unsigned long tmp2 =
+ buffer_find_nonzero_offset(p, ((size / BITS_PER_BYTE) &
+ ~(BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR *
+ sizeof(VECTYPE) - 1)));
+ result += tmp2 * BITS_PER_BYTE;
+ size -= tmp2 * BITS_PER_BYTE;
+ p += tmp2 / sizeof(unsigned long);
+ if (!size) {
+ return result;
+ }
+ if (tmp2) {
+ if ((tmp = *p)) {
+ goto found_middle;
+ }
+ }
}
+ p++;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
--
1.7.9.5