qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 1/2] configure: avx2 and avx512f detection for clang


From: Shu-Chun Weng
Subject: [PATCH 1/2] configure: avx2 and avx512f detection for clang
Date: Wed, 22 Jul 2020 17:27:04 -0700

Since clang does not support "#pragma GCC", the instruction sets are
always disabled. In this change, we

 1. wrap "#pragma GCC" inside "#ifndef __clang__",
 2. only retain them around "#include <{e,i,s}mmintrin.h>" to work
    around gcc bug,
 3. and annotate each function with `__attribute__((target(*)))` which
    is recognized by both gcc and clang.

Signed-off-by: Shu-Chun Weng <scw@google.com>
---
 configure           | 16 ++++++++++++++--
 util/bufferiszero.c | 33 +++++++++++++++++++++++----------
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/configure b/configure
index 4bd80ed507..d9ce3aa5db 100755
--- a/configure
+++ b/configure
@@ -5808,10 +5808,16 @@ fi
 
 if test "$cpuid_h" = "yes" && test "$avx2_opt" != "no"; then
   cat > $TMPC << EOF
+#include <cpuid.h>
+#ifndef __clang__
 #pragma GCC push_options
 #pragma GCC target("avx2")
-#include <cpuid.h>
+#endif
 #include <immintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
+__attribute__((target("avx2")))
 static int bar(void *a) {
     __m256i x = *(__m256i *)a;
     return _mm256_testz_si256(x, x);
@@ -5835,10 +5841,16 @@ fi
 
 if test "$cpuid_h" = "yes" && test "$avx512f_opt" = "yes"; then
   cat > $TMPC << EOF
+#include <cpuid.h>
+#ifndef __clang__
 #pragma GCC push_options
 #pragma GCC target("avx512f")
-#include <cpuid.h>
+#endif
 #include <immintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
+__attribute__((target("avx512f")))
 static int bar(void *a) {
     __m512i x = *(__m512i *)a;
     return _mm512_test_epi64_mask(x, x);
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 695bb4ce28..ca836b6e8c 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -64,17 +64,18 @@ buffer_zero_int(const void *buf, size_t len)
 }
 
 #if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT) || 
defined(__SSE2__)
-/* Do not use push_options pragmas unnecessarily, because clang
- * does not support them.
- */
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
+#ifndef __clang__
 #pragma GCC push_options
 #pragma GCC target("sse2")
 #endif
 #include <emmintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
 
 /* Note that each of these vectorized functions require len >= 64.  */
 
+__attribute__((target("sse2")))
 static bool
 buffer_zero_sse2(const void *buf, size_t len)
 {
@@ -104,19 +105,22 @@ buffer_zero_sse2(const void *buf, size_t len)
 
     return _mm_movemask_epi8(_mm_cmpeq_epi8(t, zero)) == 0xFFFF;
 }
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
-#pragma GCC pop_options
-#endif
 
 #ifdef CONFIG_AVX2_OPT
 /* Note that due to restrictions/bugs wrt __builtin functions in gcc <= 4.8,
  * the includes have to be within the corresponding push_options region, and
  * therefore the regions themselves have to be ordered with increasing ISA.
  */
+#ifndef __clang__
 #pragma GCC push_options
 #pragma GCC target("sse4")
+#endif
 #include <smmintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
 
+__attribute__((target("sse4")))
 static bool
 buffer_zero_sse4(const void *buf, size_t len)
 {
@@ -145,11 +149,16 @@ buffer_zero_sse4(const void *buf, size_t len)
     return _mm_testz_si128(t, t);
 }
 
-#pragma GCC pop_options
+#ifndef __clang__
 #pragma GCC push_options
 #pragma GCC target("avx2")
+#endif
 #include <immintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
 
+__attribute__((target("avx2")))
 static bool
 buffer_zero_avx2(const void *buf, size_t len)
 {
@@ -176,14 +185,19 @@ buffer_zero_avx2(const void *buf, size_t len)
 
     return _mm256_testz_si256(t, t);
 }
-#pragma GCC pop_options
 #endif /* CONFIG_AVX2_OPT */
 
 #ifdef CONFIG_AVX512F_OPT
+#ifndef __clang__
 #pragma GCC push_options
 #pragma GCC target("avx512f")
+#endif
 #include <immintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
 
+__attribute__((target("avx512f")))
 static bool
 buffer_zero_avx512(const void *buf, size_t len)
 {
@@ -210,7 +224,6 @@ buffer_zero_avx512(const void *buf, size_t len)
     return !_mm512_test_epi64_mask(t, t);
 
 }
-#pragma GCC pop_options
 #endif
 
 
-- 
2.28.0.rc0.105.gf9edc3c819-goog




reply via email to

[Prev in Thread] Current Thread [Next in Thread]