[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Libcvd-members] libcvd/cvd utility.h
From: |
Ethan Eade |
Subject: |
[Libcvd-members] libcvd/cvd utility.h |
Date: |
Tue, 16 May 2006 13:14:37 +0000 |
CVSROOT: /cvsroot/libcvd
Module name: libcvd
Branch:
Changes by: Ethan Eade <address@hidden> 06/05/16 13:14:37
Modified files:
cvd : utility.h
Log message:
Regularised all the utility functions. These perform computations over
many
input values, and some functions are specialised for certain data types
using SIMD. The implementations of the optimised versions are all done
using intrinsics for MMX, SSE, and SSE2 (as opposed to straight
assembly)
and should thus compile on more platforms and compilers.
Generic implementations are provided for all functions, and an
appropriate
combination of generic and optimised implementations is chosen at
runtime
depending on the alignment of input data.
Such functions include:
differences: computes a_i - b_i -> c_i
(specialised for
[byte,byte]->short,
[short,short]->short,
[int,int]->int,
[float,float]->float,
[double,double]->double)
add_multiple_of_sum: computes (a_i + b_i) * c -> d_i
(specialised for
[float,float,float]->float,
[double,double,double]->double)
assign_multiple: computes a_i * b -> c_i
(specialised for
[float,float]->float,
[double,double]->double)
inner_product: returns sum(a_i*b_i)
(specialised for
[float,float]->double,
[double,double]->double)
sum_squared_differences: returns sum((a_i-b_i)^2)
(specialised for
[byte,byte]->long long,
[float,float]->double,
[double,double]->double
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/libcvd/libcvd/cvd/utility.h.diff?tr1=1.3&tr2=1.4&r1=text&r2=text
Patches:
Index: libcvd/cvd/utility.h
diff -u libcvd/cvd/utility.h:1.3 libcvd/cvd/utility.h:1.4
--- libcvd/cvd/utility.h:1.3 Thu Feb 9 11:11:27 2006
+++ libcvd/cvd/utility.h Tue May 16 13:14:37 2006
@@ -1,6 +1,7 @@
#ifndef CVD_UTILITY_H
#define CVD_UTILITY_H
+#include <cvd/config.h>
#include <cvd/image.h>
#include <cvd/internal/is_pod.h>
#include <cvd/internal/pixel_traits.h>
@@ -51,86 +52,146 @@
}
}
+ template <class T, bool pod = Internal::is_POD<T>::is_pod> struct ZeroPixel {
+ static void zero(T& t) {
+ for (unsigned int c=0; c<Pixel::Component<T>::count; c++)
+ Pixel::Component<T>::get(t,c) = 0;
+ }
+ };
+
+ template <class T> struct ZeroPixel<T,true> {
+ static void zero(T& t) { memset(&t,0,sizeof(T)); }
+ };
+
template <class T, bool pod = Internal::is_POD<T>::is_pod> struct ZeroPixels
{
- static void zero(T* pixels, int count) {
- while (count--) {
- *(pixels++) = T();
+ static void zero(T* pixels, int count) {
+ if (count) {
+ ZeroPixel<T>::zero(*pixels);
+ std::fill(pixels+1, pixels+count, *pixels);
+ }
}
- }
};
template <class T> struct ZeroPixels<T,true> {
- static void zero(T* pixels, int count) {
- memset(pixels, 0, sizeof(T)*count);
- }
+ static void zero(T* pixels, int count) {
+ memset(pixels, 0, sizeof(T)*count);
+ }
};
+
- template <class T> void zeroPixel(T& pixel) {
- ZeroPixels<T>::zero(&pixel, 1);
- }
-
- template <class T> void zeroPixels(T* pixels, int count) {
- ZeroPixels<T>::zero(pixels, count);
- }
-
+ /// Set a pixel to the default value (typically 0)
+ /// For multi-component pixels, this zeros all components (sets them to
defaults)
+ template <class T> inline void zeroPixel(T& pixel) {
ZeroPixel<T>::zero(pixel); }
+
+ /// Set many pixels to the default value (typically 0)
+ /// For multi-component pixels, this zeros all components (sets them to
defaults)
+ template <class T> inline void zeroPixels(T* pixels, int count) {
ZeroPixels<T>::zero(pixels, count); }
+
+ /// Set the one-pixel border (top, bottom, sides) of an image to zero values
template <class T> void zeroBorders(BasicImage<T>& I)
{
if (I.size().y == 0)
return;
zeroPixels(I[0], I.size().x);
for (int r=0;r<I.size().y-1; r++)
- zeroPixels(I[r]+I.size().x-1,2);
+ zeroPixels(I[r]+I.size().x-1,2);
zeroPixels(I[I.size().y-1], I.size().x);
}
- template <class A, class B> inline void differences(const A* a, const A* b,
B* diff, unsigned int size) {
- for (unsigned int j=0; j<size; j++)
- *(diff++) = *(b++)-*(a++);
+ /// Compute pointwise differences (a_i - b_i) and store in diff_i
+ /// This is accelerated using SIMD for some platforms and data types
(alignment is checked at runtime)
+ /// Do not specify template parameters explicitly so that overloading can
choose the right implementation
+ template <class A, class B> inline void differences(const A* a, const A* b,
B* diff, unsigned int count)
+ {
+ while (count--)
+ *(diff++) = (B)*(a++) - (B)*(b++);
}
-
-#if defined(CVD_HAVE_MMXEXT) && defined(CVD_HAVE_CPU_i686)
- void differences(const unsigned char* a, const unsigned char* b, short*
diff, unsigned int size);
- void differences(const short* a, const short* b, short* diff, unsigned int
size);
- void differences(const int32_t* a, const int32_t* b, int32_t* diff, unsigned
int size);
-#endif
-
-#if defined(CVD_HAVE_SSE) && defined(CVD_HAVE_CPU_i686)
- void differences(const float* a, const float* b, float* diff, unsigned int
size);
-#endif
- template <class T,class S> inline void add_mul_add(const T* a, const T* b,
const S& f, T* c, size_t count)
+ /// Compute pointwise (a_i + b_i) * c and add to out_i
+ /// This is accelerated using SIMD for some platforms and data types
(alignment is checked at runtime)
+ /// Do not specify template parameters explicitly so that overloading can
choose the right implementation
+ template <class A, class B> inline void add_multiple_of_sum(const A* a,
const A* b, const A& c, B* out, unsigned int count)
{
- for (size_t i=0; i<count; i++)
- c[i] += f * (a[i] + b[i]);
+ while (count--)
+ *(out++) += (*(a++) + *(b++)) * c;
}
-
- template <class T,class S> inline void add_mul_add(const Rgb<T>* a, const
Rgb<T>* b, const S& f, Rgb<T>* c, size_t count)
+
+ /// Compute pointwise a_i * c and store in out_i
+ /// This is accelerated using SIMD for some platforms and data types
(alignment is checked at runtime)
+ /// Do not specify template parameters explicitly so that overloading can
choose the right implementation
+ template <class A, class B> inline void assign_multiple(const A* a, const A&
c, B* out, unsigned int count)
{
- add_mul_add(reinterpret_cast<const T*>(a), reinterpret_cast<const T*>(b),
f, reinterpret_cast<T*>(c), count*3);
+ while (count--)
+ *(out++) = *(a++) * c;
}
-#if defined(CVD_HAVE_SSE) && defined(CVD_HAVE_CPU_i686)
- void add_mul_add(const float* a, const float* b, const float& f, float* c,
size_t count);
-#endif
-
- template <class T,class S> inline void assign_mul(const T* in, const S& f,
T* out, size_t count)
- {
- for (size_t i=0; i<count; i++)
- out[i] = f * in[i];
+ /// Compute sum(a_i*b_i)
+ /// This is accelerated using SIMD for some platforms and data types
(alignment is checked at runtime)
+ /// Do not specify template parameters explicitly so that overloading can
choose the right implementation
+ template <class T> double inner_product(const T* a, const T* b, unsigned int
count) {
+ double dot = 0;
+ while (count--)
+ dot += *(a++) * *(b++);
+ return dot;
+ }
+
+ template <class R, class D, class T> struct SumSquaredDifferences {
+ static inline R sum_squared_differences(const T* a, const T* b, size_t
count) {
+ R ssd = 0;
+ while (count--) {
+ D d = *a++ - *b++;
+ ssd += d*d;
+ }
+ return ssd;
+ }
+ };
+
+ /// Compute sum of (a_i - b_i)^2 (the SSD)
+ /// This is accelerated using SIMD for some platforms and data types
(alignment is checked at runtime)
+ /// Do not specify template parameters explicitly so that overloading can
choose the right implementation
+ template <class T> inline double sum_squared_differences(const T* a, const
T* b, size_t count) {
+ return
SumSquaredDifferences<double,double,T>::sum_squared_differences(a,b,count);
}
-#if defined(CVD_HAVE_SSE) && defined(CVD_HAVE_CPU_i686)
- inline void assign_mul(const float* in, const double& f, float* out, size_t
count) {
- assign_mul(in,(float)f,out,count);
- }
- void assign_mul(const float* in, const float& f, float* out, size_t count);
-#endif
-
+ /// Check if the pointer is aligned to the specified byte granularity
template<int bytes> bool is_aligned(const void* ptr);
-
template<> inline bool is_aligned<8>(const void* ptr) { return
((reinterpret_cast<size_t>(ptr)) & 0x7) == 0; }
template<> inline bool is_aligned<16>(const void* ptr) { return
((reinterpret_cast<size_t>(ptr)) & 0xF) == 0; }
+ /// Compute the number of pointer increments necessary to yield alignment of
A bytes
+ template<int A, class T> inline size_t steps_to_align(const T* ptr)
+ {
+ return is_aligned<A>(ptr) ? 0 : (A-((reinterpret_cast<size_t>(ptr)) &
(A-1)))/sizeof(T);
+ }
+
+#if defined(CVD_HAVE_MMXEXT) && defined(CVD_HAVE_MMINTRIN)
+ void differences(const byte* a, const byte* b, short* diff, unsigned int
size);
+ void differences(const short* a, const short* b, short* diff, unsigned int
size);
+#endif
+
+
+#if defined(CVD_HAVE_SSE) && defined(CVD_HAVE_XMMINTRIN)
+ void differences(const float* a, const float* b, float* diff, unsigned int
size);
+ void add_multiple_of_sum(const float* a, const float* b, const float& c,
float* out, unsigned int count);
+ void assign_multiple(const float* a, const float& c, float* out, unsigned
int count);
+ double inner_product(const float* a, const float* b, unsigned int count);
+ double sum_squared_differences(const float* a, const float* b, size_t count);
+#endif
+
+#if defined (CVD_HAVE_SSE2) && defined(CVD_HAVE_EMMINTRIN)
+ void differences(const int32_t* a, const int32_t* b, int32_t* diff, unsigned
int size);
+ void differences(const double* a, const double* b, double* diff, unsigned
int size);
+ void add_multiple_of_sum(const double* a, const double* b, const float& c,
double* out, unsigned int count);
+ void assign_multiple(const double* a, const double& c, double* out,
unsigned int count);
+ double inner_product(const double* a, const double* b, unsigned int count);
+ double sum_squared_differences(const double* a, const double* b, size_t
count);
+ long long sum_squared_differences(const byte* a, const byte* b, size_t
count);
+#else
+ inline long long sum_squared_differences(const byte* a, const byte* b,
size_t count) {
+ return SumSquaredDifferences<long
long,int,byte>::sum_squared_differences(a,b,count);
+ }
+#endif
+
}
#endif
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Libcvd-members] libcvd/cvd utility.h,
Ethan Eade <=