commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] [gnuradio] 02/22: volk: add neon kernels for 32fc->32f


From: git
Subject: [Commit-gnuradio] [gnuradio] 02/22: volk: add neon kernels for 32fc->32f deinterleavers
Date: Fri, 31 Oct 2014 19:22:30 +0000 (UTC)

This is an automated email from the git hooks/post-receive script.

jcorgan pushed a commit to branch master
in repository gnuradio.

commit 58432bad48831ae2f3f14fa5d3a8472d7c55ffae
Author: Nathan West <address@hidden>
Date:   Fri Oct 17 21:04:41 2014 -0500

    volk: add neon kernels for 32fc->32f deinterleavers
---
 volk/kernels/volk/volk_32fc_deinterleave_32f_x2.h  | 33 ++++++++++++++++++++++
 .../kernels/volk/volk_32fc_deinterleave_imag_32f.h | 29 +++++++++++++++++++
 .../kernels/volk/volk_32fc_deinterleave_real_32f.h | 26 +++++++++++++++++
 3 files changed, 88 insertions(+)

diff --git a/volk/kernels/volk/volk_32fc_deinterleave_32f_x2.h 
b/volk/kernels/volk/volk_32fc_deinterleave_32f_x2.h
index 5b485ec..6874966 100644
--- a/volk/kernels/volk/volk_32fc_deinterleave_32f_x2.h
+++ b/volk/kernels/volk/volk_32fc_deinterleave_32f_x2.h
@@ -122,6 +122,39 @@ static inline void 
volk_32fc_deinterleave_32f_x2_a_sse(float* iBuffer, float* qB
 }
 #endif /* LV_HAVE_SSE */
 
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+/*!
+  \brief Deinterleaves the complex vector into I & Q vector data
+  \param complexVector The complex input vector
+  \param iBuffer The I buffer output data
+  \param qBuffer The Q buffer output data
+  \param num_points The number of complex data values to be deinterleaved
+*/
+static inline void volk_32fc_deinterleave_32f_x2_neon(float* iBuffer, float* 
qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+  unsigned int number = 0;
+  unsigned int quarter_points = num_points / 4;
+  const float* complexVectorPtr = (float*)complexVector;
+  float* iBufferPtr = iBuffer;
+  float* qBufferPtr = qBuffer;
+  float32x4x2_t complexInput;
+
+  for(number = 0; number < quarter_points; number++){
+    complexInput = vld2q_f32(complexVectorPtr);
+    vst1q_f32( iBufferPtr, complexInput.val[0] );
+    vst1q_f32( qBufferPtr, complexInput.val[1] );
+    complexVectorPtr += 8;
+    iBufferPtr += 4;
+    qBufferPtr += 4;
+  }
+
+  for(number = quarter_points*4; number < num_points; number++){
+    *iBufferPtr++ = *complexVectorPtr++;
+    *qBufferPtr++ = *complexVectorPtr++;
+  }
+}
+#endif /* LV_HAVE_NEON */
+
 #ifdef LV_HAVE_GENERIC
 /*!
   \brief Deinterleaves the complex vector into I & Q vector data
diff --git a/volk/kernels/volk/volk_32fc_deinterleave_imag_32f.h 
b/volk/kernels/volk/volk_32fc_deinterleave_imag_32f.h
index 6fabedd..f80265d 100644
--- a/volk/kernels/volk/volk_32fc_deinterleave_imag_32f.h
+++ b/volk/kernels/volk/volk_32fc_deinterleave_imag_32f.h
@@ -110,6 +110,35 @@ static inline void 
volk_32fc_deinterleave_imag_32f_a_sse(float* qBuffer, const l
 }
 #endif /* LV_HAVE_SSE */
 
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+/*!
+  \brief Deinterleaves the complex vector into Q vector data
+  \param complexVector The complex input vector
+  \param qBuffer The Q buffer output data
+  \param num_points The number of complex data values to be deinterleaved
+*/
+static inline void volk_32fc_deinterleave_imag_32f_neon(float* qBuffer, const 
lv_32fc_t* complexVector, unsigned int num_points){
+  unsigned int number = 0;
+  unsigned int quarter_points = num_points / 4;
+  const float* complexVectorPtr = (float*)complexVector;
+  float* qBufferPtr = qBuffer;
+  float32x4x2_t complexInput;
+
+  for(number = 0; number < quarter_points; number++){
+    complexInput = vld2q_f32(complexVectorPtr);
+    vst1q_f32( qBufferPtr, complexInput.val[1] );
+    complexVectorPtr += 8;
+    qBufferPtr += 4;
+  }
+
+  for(number = quarter_points*4; number < num_points; number++){
+    complexVectorPtr++;
+    *qBufferPtr++ = *complexVectorPtr++;
+  }
+}
+#endif /* LV_HAVE_NEON */
+
 #ifdef LV_HAVE_GENERIC
 /*!
   \brief Deinterleaves the complex vector into Q vector data
diff --git a/volk/kernels/volk/volk_32fc_deinterleave_real_32f.h 
b/volk/kernels/volk/volk_32fc_deinterleave_real_32f.h
index 9200206..c0e8d8f 100644
--- a/volk/kernels/volk/volk_32fc_deinterleave_real_32f.h
+++ b/volk/kernels/volk/volk_32fc_deinterleave_real_32f.h
@@ -84,7 +84,33 @@ static inline void 
volk_32fc_deinterleave_real_32f_generic(float* iBuffer, const
 }
 #endif /* LV_HAVE_GENERIC */
 
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+/*!
+  \brief Deinterleaves the complex vector into I vector data
+  \param complexVector The complex input vector
+  \param iBuffer The I buffer output data
+  \param num_points The number of complex data values to be deinterleaved
+*/
+static inline void volk_32fc_deinterleave_real_32f_neon(float* iBuffer, const 
lv_32fc_t* complexVector, unsigned int num_points){
+  unsigned int number = 0;
+  unsigned int quarter_points = num_points / 4;
+  const float* complexVectorPtr = (float*)complexVector;
+  float* iBufferPtr = iBuffer;
+  float32x4x2_t complexInput;
 
+  for(number = 0; number < quarter_points; number++){
+    complexInput = vld2q_f32(complexVectorPtr);
+    vst1q_f32( iBufferPtr, complexInput.val[0] );
+    complexVectorPtr += 8;
+    iBufferPtr += 4;
+  }
 
+  for(number = quarter_points*4; number < num_points; number++){
+    *iBufferPtr++ = *complexVectorPtr++;
+    complexVectorPtr++;
+  }
+}
+#endif /* LV_HAVE_NEON */
 
 #endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a_H */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]