[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Gcl-devel] [Maxima] Questions on modular arithmetic functions in ra
From: |
Camm Maguire |
Subject: |
Re: [Gcl-devel] [Maxima] Questions on modular arithmetic functions in rat3a.lisp |
Date: |
Thu, 21 Jun 2012 18:03:49 -0400 |
User-agent: |
Gnus/5.13 (Gnus v5.13) Emacs/23.2 (gnu/linux) |
Greetings!
Richard Fateman <address@hidden> writes:
> On 6/21/2012 11:58 AM, Camm Maguire wrote:
>> question. It looks like a factor of five to ten is available in some
>> routines.
>>
>> bctimes is in fact much slower because it does *not* check modulus :-).
>>
> huh?
> can you explain more?
>
Well, of course all benchmarks depend on the input, etc. But making a
list of 10^6 positive random fixnums and looping over cbexpt setting a,
b, and modulus, replacing bctimes with ctimes speeds things up
significantly. The branch checking modulus pales in comparison to the
possibility to inline the modular multiply, even including the
balancing, which can also be done in registers. The i/o, bignum case
handling, and function call overhead are very dominant, at least on my
system.
These are not optimally tuned yet, but take a look. The overload of mcf
into cmod is dumb and should be removed.
=============================================================================
/tmp/u.l
=============================================================================
(in-package :si)
(defmacro bctimes (&rest l)
`(rem (* ,@l) modulus))
(defun cbexpt (p n)
(do ((n (ash n -1) (ash n -1))
(s (if (oddp n) p 1)))
((zerop n) s)
(setq p (bctimes p p))
(and (oddp n) (setq s (bctimes s p)))))
(defun cbexpt1 (p n s)
(declare (fixnum n))
(if (zerop n) s (cbexpt1 (ctimes p p) (ash n -1) (if (/= 0 (logand n 1))
(ctimes s p) s))))
(defun cbexpt2 (p n &optional (s 1))
(if (zerop n) s (cbexpt2 (ctimes p p) (ash n -1) (if (oddp n) (ctimes s p)
s))))
(defun cbexpt3 (p n)
(do ((n (ash n -1) (ash n -1))
(s (if (oddp n) p 1)))
((zerop n) s)
(setq p (ctimes p p))
(and (oddp n) (setq s (ctimes s p)))))
(defun fr (a) (when a (let ((modulus (pop a))) (cbexpt (pop a) (pop a))) (fr
a)))
(defun fr1 (a) (when a (let ((modulus (pop a))) (cbexpt1 (pop a) (pop a) 1))
(fr1 a)))
(defun fr2 (a) (when a (let ((modulus (pop a))) (cbexpt2 (pop a) (pop a))) (fr2
a)))
(defun fr3 (a) (when a (let ((modulus (pop a))) (cbexpt3 (pop a) (pop a))) (fr3
a)))
(defun doit (lim &aux (a (make-list 1000002)))
(mapl (lambda (x) (setf (car x) (random lim))) a)
(time (fr a))
(time (fr1 a))
(time (fr2 a))
(time (fr3 a)))
=============================================================================
/tmp/u1.l
=============================================================================
(in-package :compiler)
(progn
(push '((fixnum fixnum) fixnum #.(compiler::flags compiler::rfa)
"(fixnum)(((long long)(#0))%((long long)(#1)))") (get 'i%
'compiler::inline-always))
(push '((fixnum fixnum fixnum) fixnum #.(compiler::flags compiler::rfa)
"(fixnum)(((long long)(#0))*((long long)(#1))%((long long)(#2)))")
(get '*% 'compiler::inline-always))
(push '((fixnum fixnum fixnum) fixnum #.(compiler::flags compiler::rfa)
"(fixnum)(((long long)(#0))+((long long)(#1))%((long long)(#2)))")
(get '+% 'compiler::inline-always))
(push '((fixnum fixnum fixnum) fixnum #.(compiler::flags compiler::rfa)
"(fixnum)(((long long)(#0))-((long long)(#1))%((long long)(#2)))")
(get '-% 'compiler::inline-always))
(setf (get 'i% 'compiler::return-type) t)
(setf (get '*% 'compiler::return-type) t)
(setf (get '+% 'compiler::return-type) t)
(setf (get '-% 'compiler::return-type) t)
(defmacro mcf (f a &optional b &aux (z (assoc f '((identity 64 i%)(* 32 *%)(+
63 +%)(- 63 -%))))(tp (cadr z))(ff (caddr z)))
`(if ,(if b `(typep si::modulus ',(if (< (integer-length
most-positive-fixnum) tp) `fixnum `(signed-byte ,tp)))
`(and (typep si::modulus 'fixnum) (typep ,a 'fixnum)))
(let ((x ,a),@(when b `((y ,b)))(z si::modulus))
(declare (fixnum x ,@(when b `(y)) z))
(let ((k (,ff x ,@(when b `(y)) z))(q (ash z -1)))
(declare (fixnum k q))
(if (> k q) (the fixnum (- k z)) (if (< k (the fixnum (- q))) (the
fixnum (+ k z)) k))))
(let ((k ,@(if b `((,f ,a ,b)) `(,a))))
(if si::modulus (let ((q (ash si::modulus -1))(k (mod k si::modulus)))
(if (> k q) (- k si::modulus) (if (< k (- q)) (+ k si::modulus) k))) k))))
(si::define-compiler-macro si::ctimes (a b) `(mcf * ,a ,b))
(si::define-compiler-macro si::cplus (a b) `(mcf + ,a ,b))
(si::define-compiler-macro si::cdifference (a b) `(mcf - ,a ,b))
(si::define-compiler-macro si::cmod (a) `(mcf identity ,a))
)
(in-package :si)
(defmacro bctimes (&rest l)
`(rem (* ,@l) modulus))
(defun cbexpt (p n)
(do ((n (ash n -1) (ash n -1))
(s (if (oddp n) p 1)))
((zerop n) s)
(setq p (bctimes p p))
(and (oddp n) (setq s (bctimes s p)))))
(defun cbexpt1 (p n s)
(declare (fixnum n))
(if (zerop n) s (cbexpt1 (ctimes p p) (ash n -1) (if (/= 0 (logand n 1))
(ctimes s p) s))))
(defun cbexpt2 (p n &optional (s 1))
(if (zerop n) s (cbexpt2 (ctimes p p) (ash n -1) (if (oddp n) (ctimes s p)
s))))
(defun cbexpt3 (p n)
(do ((n (ash n -1) (ash n -1))
(s (if (oddp n) p 1)))
((zerop n) s)
(setq p (ctimes p p))
(and (oddp n) (setq s (ctimes s p)))))
(defun fr (a) (when a (let ((modulus (pop a))) (cbexpt (pop a) (pop a))) (fr
a)))
(defun fr1 (a) (when a (let ((modulus (pop a))) (cbexpt1 (pop a) (pop a) 1))
(fr1 a)))
(defun fr2 (a) (when a (let ((modulus (pop a))) (cbexpt2 (pop a) (pop a))) (fr2
a)))
(defun fr3 (a) (when a (let ((modulus (pop a))) (cbexpt3 (pop a) (pop a))) (fr3
a)))
(defun doit (lim &aux (a (make-list 1000002)))
(mapl (lambda (x) (setf (car x) (random lim))) a)
(time (fr a))
(time (fr1 a))
(time (fr2 a))
(time (fr3 a)))
=============================================================================
GCL 2.6.8pre
=============================================================================
address@hidden:~/debian/gcl/gclcvs/tmp/unixport$
address@hidden:~/debian/gcl/gclcvs/tmp/unixport$ GCL_ANSI=t gcl
GCL (GNU Common Lisp) 2.6.7 ANSI May 5 2012 23:28:56
Source License: LGPL(gcl,gmp), GPL(unexec,bfd,xgcl)
Binary License: GPL due to GPL'ed components: (XGCL READLINE UNEXEC)
Modifications of this banner must retain notice of a compatible license
Dedicated to the memory of W. Schelter
Use (help) to get some basic information on how to use GCL.
Temporary directory for compiler files set to /tmp/
>(in-package :compiler)
#<"COMPILER" package>
COMPILER>*opt-three*
"-O3 -fomit-frame-pointer"
COMPILER>(load (compile-file "/tmp/u.l" :c-file t :data-file t :system-p t
:h-file t))
Compiling /tmp/u.l.
End of Pass 1.
;; Note: Tail-recursive call of CBEXPT1 was replaced by iteration.
;; Note: Tail-recursive call of FR was replaced by iteration.
;; Note: Tail-recursive call of FR1 was replaced by iteration.
;; Note: Tail-recursive call of FR2 was replaced by iteration.
;; Note: Tail-recursive call of FR3 was replaced by iteration.
End of Pass 2.
OPTIMIZE levels: Safety=0 (No runtime error checking), Space=0, Speed=3
Finished compiling /tmp/u.l.
Loading /tmp/u.o
start address -T 0x87d5658 Finished loading /tmp/u.o
7443
;; compile twice to pick up compiler macro defs if any
COMPILER>(load (compile-file "/tmp/u.l" :c-file t :data-file t :system-p t
:h-file t))
Compiling /tmp/u.l.
End of Pass 1.
;; Note: Tail-recursive call of CBEXPT1 was replaced by iteration.
;; Note: Tail-recursive call of FR was replaced by iteration.
;; Note: Tail-recursive call of FR1 was replaced by iteration.
;; Note: Tail-recursive call of FR2 was replaced by iteration.
;; Note: Tail-recursive call of FR3 was replaced by iteration.
End of Pass 2.
OPTIMIZE levels: Safety=0 (No runtime error checking), Space=0, Speed=3
Finished compiling /tmp/u.l.
Loading /tmp/u.o
start address -T 0x87d7658 Finished loading /tmp/u.o
7443
COMPILER>(si::doit most-positive-fixnum)
real time : 9.930 secs
run-gbc time : 7.600 secs
child run time : 0.000 secs
gbc time : 2.170 secs
real time : 2.130 secs
run-gbc time : 1.590 secs
child run time : 0.000 secs
gbc time : 0.540 secs
real time : 2.170 secs
run-gbc time : 1.770 secs
child run time : 0.000 secs
gbc time : 0.370 secs
real time : 2.000 secs
run-gbc time : 1.660 secs
child run time : 0.000 secs
gbc time : 0.340 secs
NIL
COMPILER>(load (compile-file "/tmp/u1.l" :c-file t :data-file t :system-p t
:h-file t))
Compiling /tmp/u1.l.
; (IN-PACKAGE :SI) is being compiled.
;; Warning: The package operation (IN-PACKAGE :SI) was in a bad place.
End of Pass 1.
;; Note: Tail-recursive call of CBEXPT1 was replaced by iteration.
;; Note: Tail-recursive call of FR was replaced by iteration.
;; Note: Tail-recursive call of FR1 was replaced by iteration.
;; Note: Tail-recursive call of FR2 was replaced by iteration.
;; Note: Tail-recursive call of FR3 was replaced by iteration.
End of Pass 2.
OPTIMIZE levels: Safety=0 (No runtime error checking), Space=0, Speed=3
Finished compiling /tmp/u1.l.
Loading /tmp/u1.o
start address -T 0x87df7e8 Finished loading /tmp/u1.o
8979
COMPILER>(load (compile-file "/tmp/u1.l" :c-file t :data-file t :system-p t
:h-file t))
Compiling /tmp/u1.l.
; (IN-PACKAGE :SI) is being compiled.
;; Warning: The package operation (IN-PACKAGE :SI) was in a bad place.
End of Pass 1.
;; Note: Tail-recursive call of CBEXPT1 was replaced by iteration.
;; Note: Tail-recursive call of FR was replaced by iteration.
;; Note: Tail-recursive call of FR1 was replaced by iteration.
;; Note: Tail-recursive call of FR2 was replaced by iteration.
;; Note: Tail-recursive call of FR3 was replaced by iteration.
End of Pass 2.
OPTIMIZE levels: Safety=0 (No runtime error checking), Space=0, Speed=3
Finished compiling /tmp/u1.l.
Loading /tmp/u1.o
start address -T 0x87e3978 Finished loading /tmp/u1.o
11511
COMPILER>(si::doit most-positive-fixnum)
real time : 9.830 secs
run-gbc time : 7.670 secs
child run time : 0.000 secs
gbc time : 2.100 secs
real time : 1.960 secs
run-gbc time : 1.520 secs
child run time : 0.000 secs
gbc time : 0.440 secs
real time : 1.990 secs
run-gbc time : 1.700 secs
child run time : 0.000 secs
gbc time : 0.280 secs
real time : 1.840 secs
run-gbc time : 1.560 secs
child run time : 0.000 secs
gbc time : 0.280 secs
NIL
COMPILER>(si::doit 268435455)
real time : 8.900 secs
run-gbc time : 6.910 secs
child run time : 0.000 secs
gbc time : 1.970 secs
real time : 1.710 secs
run-gbc time : 1.360 secs
child run time : 0.000 secs
gbc time : 0.360 secs
real time : 1.860 secs
run-gbc time : 1.560 secs
child run time : 0.000 secs
gbc time : 0.290 secs
real time : 1.730 secs
run-gbc time : 1.440 secs
child run time : 0.000 secs
gbc time : 0.290 secs
NIL
COMPILER>
=============================================================================
GCL 2.7.0 experimental
=============================================================================
address@hidden:~/debian/gcl/gclcvs/tmp/unixport$
address@hidden:~/debian/gcl/gclcvs/tmp/unixport$ ./saved_ansi_gcl
GCL (GNU Common Lisp) NIL.NIL.NIL CLtL1 May 25 2012 14:58:14
Source License: LGPL(gcl,gmp,pargcl), GPL(unexec,bfd,xgcl)
Binary License: GPL due to GPL'ed components: (XGCL READLINE UNEXEC)
Modifications of this banner must retain notice of a compatible license
Dedicated to the memory of W. Schelter
Use (help) to get some basic information on how to use GCL.
Temporary directory for compiler files set to /tmp/
>(in-package :compiler)
#<"COMPILER" package>
COMPILER>(load (compile-file "/tmp/u.l" :c-file t :data-file t :system-p t
:h-file t))
;; Compiling /tmp/u.l.
;; End of Pass 1.
;; End of Pass 2.
;; OPTIMIZE levels: Safety=0 (No runtime error checking), Space=0, Speed=3,
(Debug quality ignored)
;; Finished compiling /tmp/u.o.
;; Loading /tmp/u.o
start address -T 0xf2b160 ;; Finished loading /tmp/u.o
10583
COMPILER>(load (compile-file "/tmp/u.l" :c-file t :data-file t :system-p t
:h-file t))
;; Compiling /tmp/u.l.
;; End of Pass 1.
;; End of Pass 2.
;; OPTIMIZE levels: Safety=0 (No runtime error checking), Space=0, Speed=3,
(Debug quality ignored)
;; Finished compiling /tmp/u.o.
;; Loading /tmp/u.o
start address -T 0xf2dab8 ;; Finished loading /tmp/u.o
10583
COMPILER>(si::doit most-positive-fixnum)
real time : 12.480 secs
run-gbc time : 8.210 secs
child run time : 0.000 secs
gbc time : 4.120 secs
real time : 1.910 secs
run-gbc time : 1.230 secs
child run time : 0.000 secs
gbc time : 0.670 secs
real time : 1.980 secs
run-gbc time : 1.320 secs
child run time : 0.000 secs
gbc time : 0.650 secs
real time : 1.700 secs
run-gbc time : 1.290 secs
child run time : 0.000 secs
gbc time : 0.410 secs
NIL
COMPILER>(si::doit 268435455)
real time : 9.270 secs
run-gbc time : 6.540 secs
child run time : 0.000 secs
gbc time : 2.690 secs
real time : 0.840 secs
run-gbc time : 0.840 secs
child run time : 0.000 secs
gbc time : 0.000 secs
real time : 0.870 secs
run-gbc time : 0.860 secs
child run time : 0.000 secs
gbc time : 0.000 secs
real time : 0.850 secs
run-gbc time : 0.860 secs
child run time : 0.000 secs
gbc time : 0.000 secs
NIL
COMPILER>(load (compile-file "/tmp/u1.l" :c-file t :data-file t :system-p t
:h-file t))
;; Compiling /tmp/u1.l.
;; End of Pass 1.
;; End of Pass 2.
;; OPTIMIZE levels: Safety=0 (No runtime error checking), Space=0, Speed=3,
(Debug quality ignored)
;; Finished compiling /tmp/u1.o.
;; Loading /tmp/u1.o
start address -T 0xf15928 ;; Finished loading /tmp/u1.o
13663
COMPILER>(load (compile-file "/tmp/u1.l" :c-file t :data-file t :system-p t
:h-file t))
;; Compiling /tmp/u1.l.
;; End of Pass 1.
;; End of Pass 2.
;; OPTIMIZE levels: Safety=0 (No runtime error checking), Space=0, Speed=3,
(Debug quality ignored)
;; Finished compiling /tmp/u1.o.
;; Loading /tmp/u1.o
start address -T 0xf18e88 ;; Finished loading /tmp/u1.o
19107
COMPILER>(si::doit most-positive-fixnum)
real time : 11.750 secs
run-gbc time : 8.240 secs
child run time : 0.000 secs
gbc time : 3.470 secs
real time : 1.330 secs
run-gbc time : 1.000 secs
child run time : 0.000 secs
gbc time : 0.310 secs
real time : 1.390 secs
run-gbc time : 1.080 secs
child run time : 0.000 secs
gbc time : 0.310 secs
real time : 1.370 secs
run-gbc time : 1.060 secs
child run time : 0.000 secs
gbc time : 0.300 secs
NIL
COMPILER>(si::doit 268435455)
real time : 9.340 secs
run-gbc time : 6.570 secs
child run time : 0.000 secs
gbc time : 2.760 secs
real time : 0.550 secs
run-gbc time : 0.550 secs
child run time : 0.000 secs
gbc time : 0.000 secs
real time : 0.560 secs
run-gbc time : 0.560 secs
child run time : 0.000 secs
gbc time : 0.000 secs
real time : 0.590 secs
run-gbc time : 0.580 secs
child run time : 0.000 secs
gbc time : 0.000 secs
NIL
COMPILER>
=============================================================================
>
>
>
--
Camm Maguire address@hidden
==========================================================================
"The earth is but one country, and mankind its citizens." -- Baha'u'llah
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Re: [Gcl-devel] [Maxima] Questions on modular arithmetic functions in rat3a.lisp,
Camm Maguire <=