[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] factor: sync longlong.h adjustments from upstream
From: |
Pádraig Brady |
Subject: |
[PATCH] factor: sync longlong.h adjustments from upstream |
Date: |
Tue, 25 Feb 2020 11:56:08 +0000 |
* src/longlong.h: Sync changes from:
https://gmplib.org/repo/gmp/log/tip/longlong.h
mips64: Provide r6 asm code as default expression yields.
arm32: Define sub_ddmmss separately for non-thumb (no rsc instruction).
powerpc: Add "CLOBBER" descriptions for some registers.
x86: Fix criterion for when to use mulx in umul_ppmm.
---
src/longlong.h | 166 +++++++++++++++++++++++++++++++++++--------------
1 file changed, 121 insertions(+), 45 deletions(-)
diff --git a/src/longlong.h b/src/longlong.h
index 92ab1a6f2..e57ba7821 100644
--- a/src/longlong.h
+++ b/src/longlong.h
@@ -432,11 +432,39 @@ long __MPN(count_leading_zeros) (UDItype);
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC); \
} while (0)
-/* FIXME: Extend the immediate range for the low word by using both
- ADDS and SUBS, since they set carry in the same way. */
+/* FIXME: Extend the immediate range for the low word by using both ADDS and
+ SUBS, since they set carry in the same way. Note: We need separate
+ definitions for thumb and non-thumb due to the absence of RSC on thumb. */
+#if defined (__thumb__)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ do { \
+ if (__builtin_constant_p (ah) && __builtin_constant_p (bh) \
+ && (ah) == (bh)) \
+ __asm__ ("subs\t%1, %2, %3\n\tsbc\t%0, %0, %0" \
+ : "=r" (sh), "=r" (sl) \
+ : "r" (al), "rI" (bl) __CLOBBER_CC); \
+ else if (__builtin_constant_p (al))
\
+ __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
+ else if (__builtin_constant_p (bl))
\
+ __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
+ else \
+ __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
+ } while (0)
+#else
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
- if (__builtin_constant_p (al)) \
+ if (__builtin_constant_p (ah) && __builtin_constant_p (bh) \
+ && (ah) == (bh)) \
+ __asm__ ("subs\t%1, %2, %3\n\tsbc\t%0, %0, %0" \
+ : "=r" (sh), "=r" (sl) \
+ : "r" (al), "rI" (bl) __CLOBBER_CC); \
+ else if (__builtin_constant_p (al))
\
{
\
if (__builtin_constant_p (ah)) \
__asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \
@@ -459,21 +487,15 @@ long __MPN(count_leading_zeros) (UDItype);
: "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
}
\
else if (__builtin_constant_p (bl))
\
- {
\
- if (__builtin_constant_p (bh)) \
- __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
- : "=r" (sh), "=&r" (sl) \
- : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
- else \
- __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \
- : "=r" (sh), "=&r" (sl) \
- : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
- }
\
+ __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
else /* only bh might be a constant */ \
__asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
- : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC);\
+ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
} while (0)
+#endif
#if defined (__ARM_ARCH_2__) || defined (__ARM_ARCH_2A__) \
|| defined (__ARM_ARCH_3__)
#define umul_ppmm(xh, xl, a, b)
\
@@ -1016,14 +1038,16 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype,
UWtype, UWtype);
: "=r" (sh), "=&r" (sl) \
: "0" ((UDItype)(ah)), "rme" ((UDItype)(bh)), \
"1" ((UDItype)(al)), "rme" ((UDItype)(bl)))
-#if defined (HAVE_MULX)
+#if X86_ASM_MULX \
+ && (HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell \
+ || HAVE_HOST_CPU_skylake || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen)
#define umul_ppmm(w1, w0, u, v) \
- __asm__ ("mulx %3, %0, %1" \
+ __asm__ ("mulx\t%3, %0, %1" \
: "=r" (w0), "=r" (w1) \
: "%d" ((UDItype)(u)), "rm" ((UDItype)(v)))
#else
#define umul_ppmm(w1, w0, u, v) \
- __asm__ ("mulq %3" \
+ __asm__ ("mulq\t%3" \
: "=a" (w0), "=d" (w1) \
: "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
#endif
@@ -1031,21 +1055,44 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype,
UWtype, UWtype);
__asm__ ("divq %4" /* stringification in K&R C */ \
: "=a" (q), "=d" (r) \
: "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
-/* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */
+
+#if HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell || HAVE_HOST_CPU_skylake \
+ || HAVE_HOST_CPU_k10 || HAVE_HOST_CPU_bd1 || HAVE_HOST_CPU_bd2 \
+ || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen \
+ || HAVE_HOST_CPU_bobcat || HAVE_HOST_CPU_jaguar
+#define count_leading_zeros(count, x) \
+ do { \
+ /* This is lzcnt, spelled for older assemblers. Destination and */
\
+ /* source must be a 64-bit registers, hence cast and %q. */
\
+ __asm__ ("rep;bsr\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \
+ } while (0)
+#define COUNT_LEADING_ZEROS_0 64
+#else
#define count_leading_zeros(count, x) \
do { \
UDItype __cbtmp; \
ASSERT ((x) != 0); \
- __asm__ ("bsrq %1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x))); \
+ __asm__ ("bsr\t%1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x))); \
(count) = __cbtmp ^ 63; \
} while (0)
-/* bsfq destination must be a 64-bit register, "%q0" forces this in case
- count is only an int. */
+#endif
+
+#if HAVE_HOST_CPU_bd2 || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 \
+ || HAVE_HOST_CPU_zen || HAVE_HOST_CPU_jaguar
+#define count_trailing_zeros(count, x) \
+ do { \
+ /* This is tzcnt, spelled for older assemblers. Destination and */
\
+ /* source must be a 64-bit registers, hence cast and %q. */
\
+ __asm__ ("rep;bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \
+ } while (0)
+#define COUNT_TRAILING_ZEROS_0 64
+#else
#define count_trailing_zeros(count, x) \
do { \
ASSERT ((x) != 0); \
- __asm__ ("bsfq %1,%q0" : "=r" (count) : "rm" ((UDItype)(x))); \
+ __asm__ ("bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \
} while (0)
+#endif
#endif /* __amd64__ */
#if defined (__i860__) && W_TYPE_SIZE == 32
@@ -1239,7 +1286,15 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype,
UWtype, UWtype);
#endif /* __mips */
#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
-#if __GMP_GNUC_PREREQ (4,4)
+#if defined (_MIPS_ARCH_MIPS64R6)
+#define umul_ppmm(w1, w0, u, v) \
+ do { \
+ UDItype __m0 = (u), __m1 = (v); \
+ (w0) = __m0 * __m1;
\
+ __asm__ ("dmuhu\t%0, %1, %2" : "=d" (w1) : "d" (__m0), "d" (__m1));
\
+ } while (0)
+#endif
+#if !defined (umul_ppmm) && __GMP_GNUC_PREREQ (4,4)
#define umul_ppmm(w1, w0, u, v) \
do { \
typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \
@@ -1324,33 +1379,41 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype,
UWtype, UWtype);
do { \
if (__builtin_constant_p (bh) && (bh) == 0)
\
__asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)
\
__asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl) \
+ __CLOBBER_CC); \
else \
__asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
- : "=r" (sh), "=&r" (sl) \
- : "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "r" (bh), "%r" (al), "rI" (bl) \
+ __CLOBBER_CC); \
} while (0)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (ah) && (ah) == 0)
\
__asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)
\
__asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == 0) \
__asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)
\
__asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl) \
+ __CLOBBER_CC); \
else \
__asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
: "=r" (sh), "=&r" (sl) \
- : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
+ : "r" (ah), "r" (bh), "rI" (al), "r" (bl) \
+ __CLOBBER_CC); \
} while (0)
#define count_leading_zeros(count, x) \
__asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
@@ -1398,17 +1461,20 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype,
UWtype, UWtype);
__asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), \
- "%r" ((UDItype)(al)), "rI" ((UDItype)(bl))); \
+ "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)
\
__asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), \
- "%r" ((UDItype)(al)), "rI" ((UDItype)(bl))); \
+ "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
else \
__asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), "r" ((UDItype)(bh)), \
- "%r" ((UDItype)(al)), "rI" ((UDItype)(bl))); \
+ "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
} while (0)
/* We use "*rI" for the constant operand here, since with just "I", gcc barfs.
This might seem strange, but gcc folds away the dead code late. */
@@ -1419,53 +1485,63 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype,
UWtype, UWtype);
__asm__ ("addic %1,%3,%4\n\tsubfze %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))); \
+ "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
__asm__ ("addic %1,%3,%4\n\tsubfme %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))); \
+ "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == 0) \
__asm__ ("addic %1,%3,%4\n\taddme %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))); \
+ "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
__asm__ ("addic %1,%3,%4\n\taddze %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))); \
+ "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ __CLOBBER_CC); \
else \
__asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))); \
+ "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ __CLOBBER_CC); \
} else { \
if (__builtin_constant_p (ah) && (ah) == 0) \
__asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "r" ((UDItype)(bl))); \
+ "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
__asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "r" ((UDItype)(bl))); \
+ "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == 0) \
__asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), \
- "rI" ((UDItype)(al)), "r" ((UDItype)(bl))); \
+ "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
__asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), \
- "rI" ((UDItype)(al)), "r" ((UDItype)(bl))); \
+ "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
else \
__asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "r" ((UDItype)(bl))); \
+ "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
} \
} while (0)
#endif /* ! _LONG_LONG_LIMB */
--
2.24.1
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [PATCH] factor: sync longlong.h adjustments from upstream,
Pádraig Brady <=