[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[lmi-commits] [lmi] odd/glibc-expm1-log1p 9c7ff11 1/2: Test glibc's expm
From: |
Greg Chicares |
Subject: |
[lmi-commits] [lmi] odd/glibc-expm1-log1p 9c7ff11 1/2: Test glibc's expm1 and log1p |
Date: |
Tue, 6 Oct 2020 17:10:01 -0400 (EDT) |
branch: odd/glibc-expm1-log1p
commit 9c7ff110c072acd1da70a16ab6af57c6f14d7910
Author: Gregory W. Chicares <gchicares@sbcglobal.net>
Commit: Gregory W. Chicares <gchicares@sbcglobal.net>
Test glibc's expm1 and log1p
Somewhat casually imported glibc's implementation of expm1 and log1p,
and added them to speed and accuracy tests. See:
https://lists.nongnu.org/archive/html/lmi/2020-10/msg00028.html
The glibc code produces the same values as std::expm1 and std::log1p
for double precision (as expected), and unexpectedly runs at about the
same speed, for both these architectures:
i686-w64-mingw32-gcc-8.3-win32
x86_64-pc-linux-gnu
The hypothesis that the glibc implementation is inherently faster is
rejected by this experiment.
These measurements say something interesting about TimeAnAliquot().
A rather empty function such as
{double volatile x; (void)&x;}
appears to run twenty times as fast with pc-linux-gnu. If that's not
empty enough, this implementation:
void mete5() {}
(not committed herewith) runs at the same speed. On this E5-2630 v3
machine (nominally 2.40 GHz), one might hypothesize that there's a
fixed timing overhead of
3.356e-08 * 2.4e+09 = 80 cycles
per iteration for pc-linux-gnu, and perhaps
6.606e-07 * 2.4e+09 = 1600 cycles approximately
for i686-w64-mingw32. Thus, we might have been measuring neither signal
nor noise, but rather a fixed overhead.
i686-w64-mingw32-gcc-8.3-win32
Speed tests:
std::pow 1.515e-06 s mean; 1 us least of 6601 runs
std::expm1 1.290e-06 s mean; 1 us least of 7755 runs
double i365 8.312e-07 s mean; 1 us least of 12033 runs
long double i365 7.678e-07 s mean; 1 us least of 13026 runs
expm1 glibc i365 7.538e-07 s mean; 1 us least of 13267 runs
empty function 6.606e-07 s mean; 1 us least of 15138 runs
Daily rate corresponding to 1% annual interest, by various methods:
000000000111111111122
123456789012345678901
0.0000272615520089941669031 method in production
0.0000272615520089941669031 long double precision, std::expm1 an...
0.0000272615520089941739887 long double precision, std::pow
0.0000272615520089941672124 double precision, std::expm1 and std...
0.0000272615520089392049385 double precision, std::pow
0.0000272615520089941672124 double precision, glibc_expm1 and gl...
pc-linux-gnu gcc-9
Speed tests:
std::pow 5.646e-06 s mean; 5 us least of 1772 runs
std::expm1 1.118e-06 s mean; 1 us least of 8942 runs
double i365 9.143e-08 s mean; 0 us least of 109376 runs
long double i365 2.201e-07 s mean; 0 us least of 45426 runs
expm1 glibc i365 9.042e-08 s mean; 0 us least of 110594 runs
empty function 3.356e-08 s mean; 0 us least of 298007 runs
Daily rate corresponding to 1% annual interest, by various methods:
000000000111111111122
123456789012345678901
0.0000272615520089941669014 method in production
0.0000272615520089941672124 double precision, std::expm1 and std...
0.0000272615520089392049385 double precision, std::pow
0.0000272615520089941672124 double precision, glibc_expm1 and gl...
---
math_functions_test.cpp | 420 ++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 406 insertions(+), 14 deletions(-)
diff --git a/math_functions_test.cpp b/math_functions_test.cpp
index a03868a..e3afcac 100644
--- a/math_functions_test.cpp
+++ b/math_functions_test.cpp
@@ -35,6 +35,349 @@
#include <limits>
#include <type_traits>
+//
https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/generic/math_private.h;h=9296324d24faea9b2a19d8402a0ea72a0121e56e;hb=HEAD58
+
+typedef union
+{
+ double value;
+ struct
+ {
+ uint32_t lsw;
+ uint32_t msw;
+ } parts;
+ uint64_t word;
+} ieee_double_shape_type;
+
+////#ifndef GET_HIGH_WORD
+#if !defined GET_HIGH_WORD
+# define GET_HIGH_WORD(i,d) \
+do { \
+ ieee_double_shape_type gh_u; \
+ gh_u.value = (d); \
+ (i) = gh_u.parts.msw; \
+} while (0)
+#endif
+
+/* Get the less significant 32 bit int from a double. */
+
+////#ifndef GET_LOW_WORD
+#if !defined GET_LOW_WORD
+# define GET_LOW_WORD(i,d) \
+do { \
+ ieee_double_shape_type gl_u; \
+ gl_u.value = (d); \
+ (i) = gl_u.parts.lsw; \
+} while (0)
+#endif
+
+/* Set the more significant 32 bits of a double from an int. */
+////#ifndef SET_HIGH_WORD
+#if !defined SET_HIGH_WORD
+#define SET_HIGH_WORD(d,v) \
+do { \
+ ieee_double_shape_type sh_u; \
+ sh_u.value = (d); \
+ sh_u.parts.msw = (v); \
+ (d) = sh_u.value; \
+} while (0)
+#endif
+
+//
https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/ieee754/dbl-64/s_expm1.c;h=8f1c95bd04bdb0bc44113c13b62c58c75c106a3f;hb=HEAD
+
+#include <errno.h>
+#include <float.h>
+#include <math.h>
+////#include <math-barriers.h>
+////#include <math_private.h>
+////#include <math-underflow.h>
+////#include <libm-alias-double.h>
+
+#define one Q[0]
+static const double
+ huge = 1.0e+300,
+ tiny = 1.0e-300,
+ o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
+ ln2_hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
+ ln2_lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
+ invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */
+/* scaled coefficients related to expm1 */
+ Q[] = { 1.0, -3.33333333333331316428e-02, /* BFA11111 111110F4 */
+ 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */
+ -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
+ 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
+ -2.01099218183624371326e-07 }; /* BE8AFDB7 6E09C32D */
+
+double
+////_ _ expm1 (double x)
+glibc_expm1 (double x)
+{
+ double y, hi, lo, c, t, e, hxs, hfx, r1, h2, h4, R1, R2, R3;
+ int32_t k, xsb;
+ uint32_t hx;
+
+ GET_HIGH_WORD (hx, x);
+ xsb = hx & 0x80000000; /* sign bit of x */
+ if (xsb == 0)
+ y = x;
+ else
+ y = -x; /* y = |x| */
+ hx &= 0x7fffffff; /* high word of |x| */
+
+ /* filter out huge and non-finite argument */
+ if (hx >= 0x4043687A) /* if |x|>=56*ln2 */
+ {
+ if (hx >= 0x40862E42) /* if |x|>=709.78... */
+ {
+ if (hx >= 0x7ff00000)
+ {
+ uint32_t low;
+ GET_LOW_WORD (low, x);
+ if (((hx & 0xfffff) | low) != 0)
+ return x + x; /* NaN */
+ else
+ return (xsb == 0) ? x : -1.0; /* exp(+-inf)={inf,-1} */
+ }
+ if (x > o_threshold)
+ {
+//// _ _ set_errno (ERANGE);
+//// _set_errno (ERANGE); // nonportable msw-ism
+ return huge * huge; /* overflow */
+ }
+ }
+ if (xsb != 0) /* x < -56*ln2, return -1.0 with inexact */
+ {
+//// math_force_eval (x + tiny); /* raise inexact */
+ return tiny - one; /* return -1 */
+ }
+ }
+
+ /* argument reduction */
+ if (hx > 0x3fd62e42) /* if |x| > 0.5 ln2 */
+ {
+ if (hx < 0x3FF0A2B2) /* and |x| < 1.5 ln2 */
+ {
+ if (xsb == 0)
+ {
+ hi = x - ln2_hi; lo = ln2_lo; k = 1;
+ }
+ else
+ {
+ hi = x + ln2_hi; lo = -ln2_lo; k = -1;
+ }
+ }
+ else
+ {
+//// k = invln2 * x + ((xsb == 0) ? 0.5 : -0.5);
+ k = static_cast<int32_t>(invln2 * x + ((xsb == 0) ? 0.5 : -0.5));
+ t = k;
+ hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
+ lo = t * ln2_lo;
+ }
+ x = hi - lo;
+ c = (hi - x) - lo;
+ }
+ else if (hx < 0x3c900000) /* when |x|<2**-54, return x */
+ {
+//// math_check_force_underflow (x);
+ t = huge + x; /* return x with inexact flags when x!=0 */
+ return x - (t - (huge + x));
+ }
+ else
+ k = 0;
+
+ /* x is now in primary range */
+ hfx = 0.5 * x;
+ hxs = x * hfx;
+ R1 = one + hxs * Q[1]; h2 = hxs * hxs;
+ R2 = Q[2] + hxs * Q[3]; h4 = h2 * h2;
+ R3 = Q[4] + hxs * Q[5];
+ r1 = R1 + h2 * R2 + h4 * R3;
+ t = 3.0 - r1 * hfx;
+ e = hxs * ((r1 - t) / (6.0 - x * t));
+ if (k == 0)
+ return x - (x * e - hxs); /* c is 0 */
+ else
+ {
+ e = (x * (e - c) - c);
+ e -= hxs;
+ if (k == -1)
+ return 0.5 * (x - e) - 0.5;
+ if (k == 1)
+ {
+ if (x < -0.25)
+ return -2.0 * (e - (x + 0.5));
+ else
+ return one + 2.0 * (x - e);
+ }
+ if (k <= -2 || k > 56) /* suffice to return exp(x)-1 */
+ {
+ uint32_t high;
+ y = one - (e - x);
+ GET_HIGH_WORD (high, y);
+ SET_HIGH_WORD (y, high + (k << 20)); /* add k to y's exponent */
+ return y - one;
+ }
+ t = one;
+ if (k < 20)
+ {
+ uint32_t high;
+ SET_HIGH_WORD (t, 0x3ff00000 - (0x200000 >> k)); /* t=1-2^-k */
+ y = t - (e - x);
+ GET_HIGH_WORD (high, y);
+ SET_HIGH_WORD (y, high + (k << 20)); /* add k to y's exponent */
+ }
+ else
+ {
+ uint32_t high;
+ SET_HIGH_WORD (t, ((0x3ff - k) << 20)); /* 2^-k */
+ y = x - (e + t);
+ y += one;
+ GET_HIGH_WORD (high, y);
+ SET_HIGH_WORD (y, high + (k << 20)); /* add k to y's exponent */
+ }
+ }
+ return y;
+}
+////libm_alias_double (_ _ expm1, expm1)
+
+//
https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/ieee754/dbl-64/s_log1p.c;h=e6476a826039ea8c8f2ac66133448b8c33c966c4;hb=HEAD
+
+////#include <float.h>
+////#include <math.h>
+////#include <math-barriers.h>
+////#include <math_private.h>
+////#include <math-underflow.h>
+////#include <libc-diag.h>
+
+# define glibc_unlikely(cond) (cond)
+
+static const double
+//// already defined above:
+//// ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
+//// ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
+ two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
+ Lp[] = { 0.0, 6.666666666666735130e-01, /* 3FE55555 55555593 */
+ 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
+ 2.857142874366239149e-01, /* 3FD24924 94229359 */
+ 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
+ 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
+ 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
+ 1.479819860511658591e-01 }; /* 3FC2F112 DF3E5244 */
+
+static const double zero = 0.0;
+
+double
+//// _ _ log1p (double x)
+glibc_log1p (double x)
+{
+ double hfsq, f, c, s, z, R, u, z2, z4, z6, R1, R2, R3, R4;
+ int32_t k, hx, hu, ax;
+
+ GET_HIGH_WORD (hx, x);
+ ax = hx & 0x7fffffff;
+
+ k = 1;
+ if (hx < 0x3FDA827A) /* x < 0.41422 */
+ {
+ //// name had double-underscore prefix
+ if (glibc_unlikely (ax >= 0x3ff00000)) /* x <= -1.0 */
+ {
+ if (x == -1.0)
+ return -two54 / zero; /* log1p(-1)=-inf */
+ else
+ return (x - x) / (x - x); /* log1p(x<-1)=NaN */
+ }
+ //// name had double-underscore prefix
+ if (glibc_unlikely (ax < 0x3e200000)) /* |x| < 2**-29 */
+ {
+//// math_force_eval (two54 + x); /* raise inexact */
+ if (ax < 0x3c900000) /* |x| < 2**-54 */
+ {
+//// math_check_force_underflow (x);
+ return x;
+ }
+ else
+ return x - x * x * 0.5;
+ }
+//// if (hx > 0 || hx <= ((int32_t) 0xbfd2bec3))
+ if (hx > 0 || hx <= (static_cast<int32_t>(0xbfd2bec3)))
+ {
+ k = 0; f = x; hu = 1;
+ } /* -0.2929<x<0.41422 */
+ }
+ //// name had double-underscore prefix
+ else if (glibc_unlikely (hx >= 0x7ff00000))
+ return x + x;
+ if (k != 0)
+ {
+ if (hx < 0x43400000)
+ {
+ u = 1.0 + x;
+ GET_HIGH_WORD (hu, u);
+ k = (hu >> 20) - 1023;
+ c = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0); /* correction term */
+ c /= u;
+ }
+ else
+ {
+ u = x;
+ GET_HIGH_WORD (hu, u);
+ k = (hu >> 20) - 1023;
+ c = 0;
+ }
+ hu &= 0x000fffff;
+ if (hu < 0x6a09e)
+ {
+ SET_HIGH_WORD (u, hu | 0x3ff00000); /* normalize u */
+ }
+ else
+ {
+ k += 1;
+ SET_HIGH_WORD (u, hu | 0x3fe00000); /* normalize u/2 */
+ hu = (0x00100000 - hu) >> 2;
+ }
+ f = u - 1.0;
+ }
+ hfsq = 0.5 * f * f;
+ if (hu == 0) /* |f| < 2**-20 */
+ {
+ if (f == zero)
+ {
+ if (k == 0)
+ return zero;
+ else
+ {
+ c += k * ln2_lo; return k * ln2_hi + c;
+ }
+ }
+ R = hfsq * (1.0 - 0.66666666666666666 * f);
+ if (k == 0)
+ return f - R;
+ else
+ return k * ln2_hi - ((R - (k * ln2_lo + c)) - f);
+ }
+ s = f / (2.0 + f);
+ z = s * s;
+ R1 = z * Lp[1]; z2 = z * z;
+ R2 = Lp[2] + z * Lp[3]; z4 = z2 * z2;
+ R3 = Lp[4] + z * Lp[5]; z6 = z4 * z2;
+ R4 = Lp[6] + z * Lp[7];
+ R = R1 + z2 * R2 + z4 * R3 + z6 * R4;
+ if (k == 0)
+ return f - (hfsq - s * (hfsq + R));
+ else
+ {
+ /* With GCC 7 when compiling with -Os the compiler warns that c
+ might be used uninitialized. This can't be true because k
+ must be 0 for c to be uninitialized and we handled that
+ computation earlier without using c. */
+//// DIAG_PUSH_NEEDS_COMMENT;
+//// DIAG_IGNORE_Os_NEEDS_COMMENT (7, "-Wmaybe-uninitialized");
+ return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);
+//// DIAG_POP_NEEDS_COMMENT;
+ }
+}
+
// Some of these tests may raise hardware exceptions. That means that
// edge cases are tested, not that the code tested is invalid for
// arguments that aren't ill conditioned.
@@ -144,6 +487,19 @@ struct i_upper_n_over_n_from_i_T
return std::expm1(std::log1p(i) * reciprocal_n);
}
};
+
+// This implementation uses glibc's expm1() and log1p().
+
+template<typename T, int n>
+struct i_upper_n_over_n_from_i_glibc
+{
+ static_assert(std::is_floating_point<T>::value);
+ T operator()(T const& i) const
+ {
+ static T const reciprocal_n = T(1) / n;
+ return glibc_expm1(glibc_log1p(i) * reciprocal_n);
+ }
+};
} // Unnamed namespace.
/// This function isn't a unit test per se. Its purpose is to show
@@ -162,28 +518,32 @@ void sample_results()
std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
std::cout.precision(25);
std::cout
- << "\n daily rate corresponding to 1% annual interest"
- << ", by various methods\n"
- << " method in production\n "
- << i_upper_n_over_n_from_i <long double,365>()(0.01) << '\n'
+ << "\nDaily rate corresponding to 1% annual interest"
+ << ", by various methods:\n"
+ << " 000000000111111111122\n"
+ << " 123456789012345678901\n"
+ << " " << i_upper_n_over_n_from_i <long double,365>()(0.01)
+ << " method in production\n"
;
#if defined LMI_X87
fenv_precision(fe_ldblprec);
std::cout
- << " long double precision, std::expm1 and std::log1p\n "
- << i_upper_n_over_n_from_i_T <long double,365>()(0.01) << '\n'
- << " long double precision, std::pow\n "
- << i_upper_n_over_n_from_i_naive<long double,365>()(0.01) << '\n'
+ << " " << i_upper_n_over_n_from_i_T <long double,365>()(0.01)
+ << " long double precision, std::expm1 and std::log1p\n"
+ << " " << i_upper_n_over_n_from_i_naive<long double,365>()(0.01)
+ << " long double precision, std::pow\n"
;
fenv_initialize();
fenv_precision(fe_dblprec);
#endif // defined LMI_X87
std::cout
- << " double precision, std::expm1 and std::log1p\n "
- << i_upper_n_over_n_from_i_T <double,365>()(0.01) << '\n'
- << " double precision, std::pow\n "
- << i_upper_n_over_n_from_i_naive<double,365>()(0.01) << '\n'
+ << " " << i_upper_n_over_n_from_i_T <double,365>()(0.01)
+ << " double precision, std::expm1 and std::log1p\n"
+ << " " << i_upper_n_over_n_from_i_naive<double,365>()(0.01)
+ << " double precision, std::pow\n"
+ << " " << i_upper_n_over_n_from_i_glibc<double,365>()(0.01)
+ << " double precision, glibc_expm1 and glibc_log1p\n"
;
fenv_initialize();
@@ -215,10 +575,42 @@ void mete1()
x = net_i_from_gross<double,365>()(0.04, 0.007, 0.003);
}
+void mete2()
+{
+ double volatile x;
+ stifle_warning_for_unused_value(x);
+ x = i_upper_n_over_n_from_i_T<double,365>()(0.01);
+}
+
+void mete3()
+{
+ double volatile x;
+ stifle_warning_for_unused_value(x);
+ x = static_cast<double>(i_upper_n_over_n_from_i_T<long
double,365>()(0.01));
+}
+
+void mete4()
+{
+ double volatile x;
+ stifle_warning_for_unused_value(x);
+ x = i_upper_n_over_n_from_i_glibc<double,365>()(0.01);
+}
+
+void mete5()
+{
+ double volatile x;
+ stifle_warning_for_unused_value(x);
+}
+
void assay_speed()
{
- std::cout << " Speed test: std::pow \n " << TimeAnAliquot(mete0) <<
'\n';
- std::cout << " Speed test: std::expm1\n " << TimeAnAliquot(mete1) <<
'\n';
+ std::cout << "Speed tests:\n";
+ std::cout << " std::pow " << TimeAnAliquot(mete0) << '\n';
+ std::cout << " std::expm1 " << TimeAnAliquot(mete1) << '\n';
+ std::cout << " double i365 " << TimeAnAliquot(mete2) << '\n';
+ std::cout << " long double i365 " << TimeAnAliquot(mete3) << '\n';
+ std::cout << " expm1 glibc i365 " << TimeAnAliquot(mete4) << '\n';
+ std::cout << " empty function " << TimeAnAliquot(mete5) << '\n';
}
int test_main(int, char*[])