libcvd-members
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Libcvd-members] libcvd/cvd utility.h


From: Ethan Eade
Subject: [Libcvd-members] libcvd/cvd utility.h
Date: Tue, 16 May 2006 13:14:37 +0000

CVSROOT:        /cvsroot/libcvd
Module name:    libcvd
Branch:         
Changes by:     Ethan Eade <address@hidden>     06/05/16 13:14:37

Modified files:
        cvd            : utility.h 

Log message:
        Regularised all the utility functions.  These perform computations over 
many
        input values, and some functions are specialised for certain data types
        using SIMD.  The implementations of the optimised versions are all done
        using intrinsics for MMX, SSE, and SSE2 (as opposed to straight 
assembly)
        and should thus compile on more platforms and compilers.
        
        Generic implementations are provided for all functions, and an 
appropriate
        combination of generic and optimised implementations is chosen at 
runtime
        depending on the alignment of input data.
        
        Such functions include:
        
        differences: computes a_i - b_i -> c_i
        (specialised for
        [byte,byte]->short,
        [short,short]->short,
        [int,int]->int,
        [float,float]->float,
        [double,double]->double)
        
        add_multiple_of_sum: computes (a_i + b_i) * c -> d_i
        (specialised for
        [float,float,float]->float,
        [double,double,double]->double)
        
        assign_multiple: computes a_i * b -> c_i
        (specialised for
        [float,float]->float,
        [double,double]->double)
        
        inner_product: returns sum(a_i*b_i)
        (specialised for
        [float,float]->double,
        [double,double]->double)
        
        sum_squared_differences: returns sum((a_i-b_i)^2)
        (specialised for
        [byte,byte]->long long,
        [float,float]->double,
        [double,double]->double

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/libcvd/libcvd/cvd/utility.h.diff?tr1=1.3&tr2=1.4&r1=text&r2=text

Patches:
Index: libcvd/cvd/utility.h
diff -u libcvd/cvd/utility.h:1.3 libcvd/cvd/utility.h:1.4
--- libcvd/cvd/utility.h:1.3    Thu Feb  9 11:11:27 2006
+++ libcvd/cvd/utility.h        Tue May 16 13:14:37 2006
@@ -1,6 +1,7 @@
 #ifndef CVD_UTILITY_H
 #define CVD_UTILITY_H
 
+#include <cvd/config.h>
 #include <cvd/image.h>
 #include <cvd/internal/is_pod.h>
 #include <cvd/internal/pixel_traits.h>
@@ -51,86 +52,146 @@
     }
   }
   
+  template <class T, bool pod = Internal::is_POD<T>::is_pod> struct ZeroPixel {
+      static void zero(T& t) { 
+         for (unsigned int c=0; c<Pixel::Component<T>::count; c++)
+             Pixel::Component<T>::get(t,c) = 0;
+      }
+  };
+  
+  template <class T> struct ZeroPixel<T,true> {
+      static void zero(T& t) { memset(&t,0,sizeof(T)); }
+  };
+  
   template <class T, bool pod = Internal::is_POD<T>::is_pod> struct ZeroPixels 
{
-    static void zero(T* pixels, int count) {
-      while (count--) {
-       *(pixels++) = T();
+      static void zero(T* pixels, int count) {
+         if (count) {
+             ZeroPixel<T>::zero(*pixels);
+             std::fill(pixels+1, pixels+count, *pixels);
+         }
       }
-    }
   };
 
   template <class T> struct ZeroPixels<T,true> {
-    static void zero(T* pixels, int count) {
-      memset(pixels, 0, sizeof(T)*count);
-    }
+      static void zero(T* pixels, int count) {
+         memset(pixels, 0, sizeof(T)*count);
+      }
   };
+  
 
-  template <class T> void zeroPixel(T& pixel) {
-    ZeroPixels<T>::zero(&pixel, 1);
-  }
-
-  template <class T> void zeroPixels(T* pixels, int count) {
-    ZeroPixels<T>::zero(pixels, count);
-  }
-
+  /// Set a pixel to the default value (typically 0)
+  /// For multi-component pixels, this zeros all components (sets them to 
defaults)
+  template <class T> inline void zeroPixel(T& pixel) { 
ZeroPixel<T>::zero(pixel); }
+
+  /// Set many pixels to the default value (typically 0)
+  /// For multi-component pixels, this zeros all components (sets them to 
defaults)
+  template <class T> inline void zeroPixels(T* pixels, int count) {  
ZeroPixels<T>::zero(pixels, count);  }
+  
+  /// Set the one-pixel border (top, bottom, sides) of an image to zero values
   template <class T> void zeroBorders(BasicImage<T>& I)
   {
     if (I.size().y == 0)
       return;
     zeroPixels(I[0], I.size().x);
     for (int r=0;r<I.size().y-1; r++)
-      zeroPixels(I[r]+I.size().x-1,2);
+       zeroPixels(I[r]+I.size().x-1,2);
     zeroPixels(I[I.size().y-1], I.size().x);
   }
 
-  template <class A, class B> inline void differences(const A* a, const A* b, 
B* diff, unsigned int size) {
-    for (unsigned int j=0; j<size; j++)
-      *(diff++) = *(b++)-*(a++);
+  /// Compute pointwise differences (a_i - b_i) and store in diff_i
+  /// This is accelerated using SIMD for some platforms and data types 
(alignment is checked at runtime)
+  /// Do not specify template parameters explicitly so that overloading can 
choose the right implementation
+  template <class A, class B> inline void differences(const A* a, const A* b, 
B* diff, unsigned int count)
+  {
+      while (count--)
+         *(diff++) = (B)*(a++) - (B)*(b++);
   }
-  
-#if defined(CVD_HAVE_MMXEXT) && defined(CVD_HAVE_CPU_i686)
-  void differences(const unsigned char* a, const unsigned char* b, short* 
diff, unsigned int size);
-  void differences(const short* a, const short* b, short* diff, unsigned int 
size);
-  void differences(const int32_t* a, const int32_t* b, int32_t* diff, unsigned 
int size);
-#endif 
-  
-#if defined(CVD_HAVE_SSE) && defined(CVD_HAVE_CPU_i686)
-  void differences(const float* a, const float* b, float* diff, unsigned int 
size);
-#endif
 
-  template <class T,class S> inline void add_mul_add(const T* a, const T* b, 
const S& f, T* c, size_t count)
+  /// Compute pointwise (a_i + b_i) * c and add to out_i
+  /// This is accelerated using SIMD for some platforms and data types 
(alignment is checked at runtime)
+  /// Do not specify template parameters explicitly so that overloading can 
choose the right implementation
+  template <class A, class B> inline void add_multiple_of_sum(const A* a, 
const A* b, const A& c,  B* out, unsigned int count)
   {
-    for (size_t i=0; i<count; i++)
-      c[i] += f * (a[i] + b[i]);
+      while (count--)
+         *(out++) += (*(a++) + *(b++)) * c;
   }
-
-  template <class T,class S> inline void add_mul_add(const Rgb<T>* a, const 
Rgb<T>* b, const S& f, Rgb<T>* c, size_t count)
+  
+  /// Compute pointwise a_i * c and store in out_i
+  /// This is accelerated using SIMD for some platforms and data types 
(alignment is checked at runtime)
+  /// Do not specify template parameters explicitly so that overloading can 
choose the right implementation
+  template <class A, class B> inline void assign_multiple(const A* a, const A& 
c,  B* out, unsigned int count)
   {
-    add_mul_add(reinterpret_cast<const T*>(a), reinterpret_cast<const T*>(b), 
f, reinterpret_cast<T*>(c), count*3);
+      while (count--)
+         *(out++) = *(a++) * c;
   }
 
-#if defined(CVD_HAVE_SSE) && defined(CVD_HAVE_CPU_i686)
-  void add_mul_add(const float* a, const float* b, const float& f, float* c, 
size_t count);
-#endif
-
-  template <class T,class S> inline void assign_mul(const T* in, const S& f, 
T* out, size_t count)
-  {
-    for (size_t i=0; i<count; i++)
-      out[i] = f * in[i];
+  /// Compute sum(a_i*b_i)
+  /// This is accelerated using SIMD for some platforms and data types 
(alignment is checked at runtime)
+  /// Do not specify template parameters explicitly so that overloading can 
choose the right implementation
+  template <class T> double inner_product(const T* a, const T* b, unsigned int 
count) {
+      double dot = 0;
+      while (count--)
+         dot += *(a++) * *(b++);
+      return dot;
+  }
+
+  template <class R, class D, class T> struct SumSquaredDifferences {
+      static inline R sum_squared_differences(const T* a, const T* b, size_t 
count) {
+         R ssd = 0;
+         while (count--) {
+             D d = *a++ - *b++;
+             ssd += d*d;
+         }
+         return ssd;
+      }
+  };
+ 
+  /// Compute sum of (a_i - b_i)^2 (the SSD)
+  /// This is accelerated using SIMD for some platforms and data types 
(alignment is checked at runtime)
+  /// Do not specify template parameters explicitly so that overloading can 
choose the right implementation
+  template <class T> inline double sum_squared_differences(const T* a, const 
T* b, size_t count) {
+      return 
SumSquaredDifferences<double,double,T>::sum_squared_differences(a,b,count);
   }
   
-#if defined(CVD_HAVE_SSE) && defined(CVD_HAVE_CPU_i686)
-  inline void assign_mul(const float* in, const double& f, float* out, size_t 
count) {
-    assign_mul(in,(float)f,out,count);
-  }
-  void assign_mul(const float* in, const float& f, float* out, size_t count);
-#endif
-
+  /// Check if the pointer is aligned to the specified byte granularity
   template<int bytes> bool is_aligned(const void* ptr);
-
   template<> inline bool is_aligned<8>(const void* ptr) {   return 
((reinterpret_cast<size_t>(ptr)) & 0x7) == 0;   }
   template<> inline bool is_aligned<16>(const void* ptr) {  return 
((reinterpret_cast<size_t>(ptr)) & 0xF) == 0;   }
 
+  /// Compute the number of pointer increments necessary to yield alignment of 
A bytes
+  template<int A, class T> inline size_t steps_to_align(const T* ptr) 
+  {
+      return is_aligned<A>(ptr) ? 0 : (A-((reinterpret_cast<size_t>(ptr)) & 
(A-1)))/sizeof(T); 
+  }
+
+#if defined(CVD_HAVE_MMXEXT) && defined(CVD_HAVE_MMINTRIN)
+  void differences(const byte* a, const byte* b, short* diff, unsigned int 
size);
+  void differences(const short* a, const short* b, short* diff, unsigned int 
size);
+#endif  
+
+
+#if defined(CVD_HAVE_SSE) && defined(CVD_HAVE_XMMINTRIN)
+  void differences(const float* a, const float* b, float* diff, unsigned int 
size);
+  void add_multiple_of_sum(const float* a, const float* b, const float& c,  
float* out, unsigned int count);
+  void assign_multiple(const float* a, const float& c,  float* out, unsigned 
int count);
+  double inner_product(const float* a, const float* b, unsigned int count);
+  double sum_squared_differences(const float* a, const float* b, size_t count);
+#endif
+
+#if defined (CVD_HAVE_SSE2) && defined(CVD_HAVE_EMMINTRIN)
+  void differences(const int32_t* a, const int32_t* b, int32_t* diff, unsigned 
int size);
+  void differences(const double* a, const double* b, double* diff, unsigned 
int size);
+  void add_multiple_of_sum(const double* a, const double* b, const float& c,  
double* out, unsigned int count);
+  void assign_multiple(const double* a, const double& c,  double* out, 
unsigned int count);
+  double inner_product(const double* a, const double* b, unsigned int count);
+  double sum_squared_differences(const double* a, const double* b, size_t 
count);
+  long long sum_squared_differences(const byte* a, const byte* b, size_t 
count);
+#else  
+  inline long long sum_squared_differences(const byte* a, const byte* b, 
size_t count) {
+      return SumSquaredDifferences<long 
long,int,byte>::sum_squared_differences(a,b,count);
+  }
+#endif 
+  
 }
 
 #endif




reply via email to

[Prev in Thread] Current Thread [Next in Thread]