getfem-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Getfem-commits] (no subject)


From: Andriy Andreykiv
Subject: [Getfem-commits] (no subject)
Date: Wed, 31 Oct 2018 07:54:22 -0400 (EDT)

branch: optimising_gmm_add_for_small_vectors
commit ab95e5c68c5e598d9a6d67e8402ba9eb3dc247c2
Author: Andriy.Andreykiv <address@hidden>
Date:   Wed Oct 31 12:54:06 2018 +0100

    optimizing gmm::add for small vectors
---
 src/gmm/gmm_blas_interface.h | 45 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 42 insertions(+), 3 deletions(-)

diff --git a/src/gmm/gmm_blas_interface.h b/src/gmm/gmm_blas_interface.h
index 8144293..f051c74 100644
--- a/src/gmm/gmm_blas_interface.h
+++ b/src/gmm/gmm_blas_interface.h
@@ -328,13 +328,52 @@ namespace gmm {
   /* ********************************************************************* */
   /* add(x, y).                                                            */
   /* ********************************************************************* */
+  template<size_type N, class V1, class V2>
+  inline void add_fixed(const V1 &x, V2 &y)
+  {
+    for(size_type i = 0; i != N; ++i) y[i] += x[i];
+  }
+
+  template<class V1, class V2>
+  inline void add_for_short_vectors(const V1 &x, V2 &y, size_type n)
+  {
+    switch(n)
+    {
+      case  1: add_fixed<1>(x, y);  break;
+      case  2: add_fixed<2>(x, y);  break;
+      case  3: add_fixed<3>(x, y);  break;
+      case  4: add_fixed<4>(x, y);  break;
+      case  5: add_fixed<5>(x, y);  break;
+      case  6: add_fixed<6>(x, y);  break;
+      case  7: add_fixed<7>(x, y);  break;
+      case  8: add_fixed<8>(x, y);  break;
+      case  9: add_fixed<9>(x, y);  break;
+      case 10: add_fixed<10>(x, y); break;
+      case 11: add_fixed<11>(x, y); break;
+      case 12: add_fixed<12>(x, y); break;
+      case 13: add_fixed<13>(x, y); break;
+      case 14: add_fixed<14>(x, y); break;
+      case 15: add_fixed<15>(x, y); break;
+      case 16: add_fixed<16>(x, y); break;
+      case 17: add_fixed<17>(x, y); break;
+      case 18: add_fixed<18>(x, y); break;
+      case 19: add_fixed<19>(x, y); break;
+      case 20: add_fixed<20>(x, y); break;
+      case 21: add_fixed<21>(x, y); break;
+      case 22: add_fixed<22>(x, y); break;
+      case 23: add_fixed<23>(x, y); break;
+      case 24: add_fixed<24>(x, y); break;
+      default: GMM_ASSERT2(false, "add_for_short_vectors used with unsupported 
size"); break;
+    }
+  }
 
 # define axpy_interface(param1, trans1, blas_name, base_type)              \
   inline void add(param1(base_type), std::vector<base_type > &y) {         \
     GMMLAPACK_TRACE("axpy_interface");                                     \
-    long inc(1), n(long(vect_size(y))); trans1(base_type);                \
-    if (n == 0) return;                                                        
   \
-    blas_name(&n, &a, &x[0], &inc, &y[0], &inc);                           \
+    long inc(1), n(long(vect_size(y))); trans1(base_type);                 \
+    if(n == 0) return;                                                     \
+    else if(n < 25) add_for_short_vectors(x, y, n);                        \
+    else blas_name(&n, &a, &x[0], &inc, &y[0], &inc);                      \
   }
 
 # define axpy_p1(base_type) const std::vector<base_type > &x



reply via email to

[Prev in Thread] Current Thread [Next in Thread]