From 29b7abfd1a990e1fe4fc10f3d2532eadd079151f Mon Sep 17 00:00:00 2001 From: felix Date: Sun, 7 Nov 2021 13:48:31 +0100 Subject: [PATCH] Add support for fused-multiply-add (suggested by Christian Himpe on chicken-users) --- NEWS | 4 ++++ c-platform.scm | 3 ++- chicken.h | 2 ++ lfa2.scm | 2 ++ library.scm | 6 ++++++ manual/Module (chicken flonum) | 3 ++- types.db | 3 +++ 7 files changed, 21 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index de01c00e..69fe5054 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,10 @@ - Default "cc" on BSD systems for building CHICKEN to avoid ABI problems when linking with C++ code. +- Core libraries + - Added "fp*+" (fused multiply-add) to "chicken.flonum" module + (suggested by Christian Himpe). + 5.3.0rc4 - Compiler diff --git a/c-platform.scm b/c-platform.scm index 00960c82..e59b1f1c 100644 --- a/c-platform.scm +++ b/c-platform.scm @@ -149,7 +149,7 @@ (define-constant +flonum-bindings+ (map (lambda (x) (symbol-append 'chicken.flonum# x)) - '(fp/? fp+ fp- fp* fp/ fp> fp< fp= fp>= fp<= fpmin fpmax fpneg fpgcd + '(fp/? fp+ fp- fp* fp/ fp> fp< fp= fp>= fp<= fpmin fpmax fpneg fpgcd fp*+ fpfloor fpceiling fptruncate fpround fpsin fpcos fptan fpasin fpacos fpatan fpatan2 fpexp fpexpt fplog fpsqrt fpabs fpinteger?))) @@ -652,6 +652,7 @@ (rewrite 'chicken.flonum#fp/? 16 2 "C_a_i_flonum_quotient_checked" #f words-per-flonum) (rewrite 'chicken.flonum#fpneg 16 1 "C_a_i_flonum_negate" #f words-per-flonum) (rewrite 'chicken.flonum#fpgcd 16 2 "C_a_i_flonum_gcd" #f words-per-flonum) +(rewrite 'chicken.flonum#fp*+ 16 3 "C_a_i_flonum_multiply_add" #f words-per-flonum) (rewrite 'scheme#zero? 5 "C_eqp" 0 'fixnum) (rewrite 'scheme#zero? 2 1 "C_u_i_zerop2" #f) diff --git a/chicken.h b/chicken.h index 7e51a38f..ba075471 100644 --- a/chicken.h +++ b/chicken.h @@ -1204,6 +1204,7 @@ typedef void (C_ccall *C_proc)(C_word, C_word *) C_noret; #define C_a_i_flonum_plus(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) + C_flonum_magnitude(n2)) #define C_a_i_flonum_difference(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) - C_flonum_magnitude(n2)) #define C_a_i_flonum_times(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) * C_flonum_magnitude(n2)) +#define C_a_i_flonum_multiply_add(ptr, c, n1, n2, n3) C_flonum(ptr, fma(C_flonum_magnitude(n1), C_flonum_magnitude(n2), C_flonum_magnitude(n3))) #define C_a_i_flonum_quotient(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) / C_flonum_magnitude(n2)) #define C_a_i_flonum_negate(ptr, c, n) C_flonum(ptr, -C_flonum_magnitude(n)) #define C_a_u_i_flonum_signum(ptr, n, x) (C_flonum_magnitude(x) == 0.0 ? (x) : ((C_flonum_magnitude(x) < 0.0) ? C_flonum(ptr, -1.0) : C_flonum(ptr, 1.0))) @@ -1513,6 +1514,7 @@ typedef void (C_ccall *C_proc)(C_word, C_word *) C_noret; #define C_ub_i_flonum_difference(x, y) ((x) - (y)) #define C_ub_i_flonum_times(x, y) ((x) * (y)) #define C_ub_i_flonum_quotient(x, y) ((x) / (y)) +#define C_ub_i_flonum_multiply_add(x, y, z) fma((x), (y), (z)) #define C_ub_i_flonum_equalp(n1, n2) C_mk_bool((n1) == (n2)) #define C_ub_i_flonum_greaterp(n1, n2) C_mk_bool((n1) > (n2)) diff --git a/lfa2.scm b/lfa2.scm index 45057578..e4bd308e 100644 --- a/lfa2.scm +++ b/lfa2.scm @@ -191,6 +191,7 @@ ("C_a_i_flonum_sqrt" float) ("C_a_i_flonum_tan" float) ("C_a_i_flonum_times" float) + ("C_a_i_flonum_multiply_add" float) ("C_a_i_flonum_truncate" float) ("C_a_u_i_f64vector_ref" float) ("C_a_u_i_f32vector_ref" float) @@ -201,6 +202,7 @@ '(("C_a_i_flonum_plus" "C_ub_i_flonum_plus" op) ("C_a_i_flonum_difference" "C_ub_i_flonum_difference" op) ("C_a_i_flonum_times" "C_ub_i_flonum_times" op) + ("C_a_i_flonum_multiply_add" "C_ub_i_flonum_multiply_add" op) ("C_a_i_flonum_quotient" "C_ub_i_flonum_quotient" op) ("C_flonum_equalp" "C_ub_i_flonum_equalp" pred) ("C_flonum_greaterp" "C_ub_i_flonum_greaterp" pred) diff --git a/library.scm b/library.scm index 6c6a6942..45182e84 100644 --- a/library.scm +++ b/library.scm @@ -1590,6 +1590,12 @@ EOF (fp-check-flonums x y 'fp/) (##core#inline_allocate ("C_a_i_flonum_quotient" 4) x y) ) +(define (fp*+ x y z) + (unless (and (flonum? x) (flonum? y) (flonum? z)) + (##sys#error-hook (foreign-value "C_BAD_ARGUMENT_TYPE_NO_FLONUM_ERROR" int) + 'fp*+ x y z) ) + (##core#inline_allocate ("C_a_i_flonum_multiply_add" 4) x y z) ) + (define (fpgcd x y) (fp-check-flonums x y 'fpgcd) (##core#inline_allocate ("C_a_i_flonum_gcd" 4) x y)) diff --git a/manual/Module (chicken flonum) b/manual/Module (chicken flonum) index d780185b..69aab2fc 100644 --- a/manual/Module (chicken flonum) +++ b/manual/Module (chicken flonum) @@ -20,6 +20,7 @@ your code. (fp- X Y) (fp* X Y) (fp/ X Y) +(fp*+ X Y Z) (fpgcd X Y) (fpneg X) (fpmin X Y) @@ -52,7 +53,7 @@ Arithmetic floating-point operations. In safe mode, these procedures throw a type error when given non-float arguments. In unsafe mode, these procedures do not check their arguments. A non-flonum argument in unsafe mode can crash the -application. +application. {{fp*+}} implements fused multiply-add {{(X * Y) + Z}}. Note: {{fpround}} uses the rounding mode that your C library implements, which is usually different from R5RS. diff --git a/types.db b/types.db index 922c07af..0d85d203 100644 --- a/types.db +++ b/types.db @@ -1194,6 +1194,9 @@ (chicken.flonum#fp+ (#(procedure #:clean #:enforce #:foldable) chicken.flonum#fp+ (float float) float) ((float float) (##core#inline_allocate ("C_a_i_flonum_plus" 4) #(1) #(2)) )) +(chicken.flonum#fp*+ (#(procedure #:clean #:enforce #:foldable) chicken.flonum#fp*+ (float float float) float) + ((float float float) (##core#inline_allocate ("C_a_i_flonum_multiply_add" 4) #(1) #(2) #(3)) )) + (chicken.flonum#fp< (#(procedure #:clean #:enforce #:foldable) chicken.flonum#fp< (float float) boolean) ((float float) (##core#inline "C_flonum_lessp" #(1) #(2)) )) -- 2.28.0