[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] r9550 - gnuradio/branches/developers/nldudok1/gpgpu-wi
From: |
nldudok1 |
Subject: |
[Commit-gnuradio] r9550 - gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib |
Date: |
Wed, 10 Sep 2008 09:30:21 -0600 (MDT) |
Author: nldudok1
Date: 2008-09-10 09:30:20 -0600 (Wed, 10 Sep 2008)
New Revision: 9550
Modified:
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.cc
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.h
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_7_kernel.cu
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.cc
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.h
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.cc
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.h
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.cc
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.h
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.cc
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.h
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_wfm_rcv_cf.cc
Log:
changed gr-cuda blocks to use vmcircbuf_cuda
Modified:
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.cc
===================================================================
---
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.cc
2008-09-10 01:23:21 UTC (rev 9549)
+++
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.cc
2008-09-10 15:30:20 UTC (rev 9550)
@@ -33,85 +33,48 @@
cuda_fir_ccf_cuda::cuda_fir_ccf_cuda()
: cuda_fir_ccf()
{
- d_first=true;
d_verbose=0;
- d_device_output_internal=true;
- d_device_input_internal=true;
+ d_device_taps=NULL;
}
cuda_fir_ccf_cuda::cuda_fir_ccf_cuda(const std::vector<float> &taps)
: cuda_fir_ccf(taps)
{
- d_first=true;
d_verbose=0;
- d_device_output_internal=true;
- d_device_input_internal=true;
+ d_device_taps=NULL;
}
-void
-cuda_fir_ccf_cuda::init_cuda (unsigned max_noutputs,unsigned
max_ntaps,unsigned decimation) {
- //cuda_fir->ntaps=-1;
- d_device_taps=NULL;
+bool
+cuda_fir_ccf_cuda::start()
+{
//unsigned int max_mem_size = sizeof( float) * MAX_NTAPS;
CUDA_SAFE_CALL(cudaMalloc( (void**) &(d_device_taps),
FIR_CCF_TAPS_BUFSIZE_BYTES));
CUDA_SAFE_CALL(cudaMemset( d_device_taps, 0, FIR_CCF_TAPS_BUFSIZE_BYTES)
);
d_device_taps_items=FIR_CCF_TAPS_BUFSIZE_BYTES;
- //max_mem_size=sizeof( gr_complex) * (MAX_NINPUTS);
- if(d_device_input_internal)
- {
- d_device_input=NULL;
- CUDA_SAFE_CALL( cudaMalloc( (void**)
&(d_device_input),FIR_CCF_INPUT_BUFSIZE_BYTES ));
- d_device_input_items=FIR_CCF_INPUT_BUFSIZE_ITEMS;
- } else
- d_device_input_items=0;//unknown external buffer size
- //max_mem_size=sizeof( gr_complex) * MAX_NOUTPUTS;
- if(d_device_input_internal)
- {
- d_device_output=NULL;
- CUDA_SAFE_CALL( cudaMalloc( (void**) &(d_device_output),
FIR_CCF_OUTPUT_BUFSIZE_BYTES ));
- d_device_output_items=FIR_CCF_OUTPUT_BUFSIZE_ITEMS;
- } else
- d_device_output_items=0;//unknown external buffer size
- d_host_taps=NULL;
- set_taps(d_taps);
- d_first=false;
- }
+ set_taps_cuda();
+ return true;
+}
+bool
+cuda_fir_ccf_cuda::stop()
+{
+ cudaThreadSynchronize();
+ CUDA_SAFE_CALL(cudaFree(d_device_taps));
+ d_device_taps=NULL;
+ return true;
+}
cuda_fir_ccf_cuda::~cuda_fir_ccf_cuda()
{
- cudaThreadSynchronize();
- CUDA_SAFE_CALL(cudaFree(d_device_taps));
- //cuda_fir->ntaps=-1;
-
- CUDA_SAFE_CALL( cudaFree( d_device_input));
- CUDA_SAFE_CALL( cudaFree( d_device_output));
- if(d_host_taps)
- free(d_host_taps);//host_taps only filled when requested with
cuda_fir_ccf_get_taps
+ stop();
}
-void
-cuda_fir_ccf_cuda::set_device_output(gr_complex *device_output)
-{
- if(!d_first) CUDA_SAFE_CALL( cudaFree( d_device_output));
- d_device_output=device_output;
- d_device_output_internal=false;
-}
void
-cuda_fir_ccf_cuda::set_device_input(gr_complex *device_input)
+cuda_fir_ccf_cuda::set_taps_cuda ()
{
- if(!d_first) CUDA_SAFE_CALL( cudaFree( d_device_input));
- d_device_input=device_input;
- d_device_input_internal=false;
-}
+ const std::vector<float> new_taps = d_taps;//d_taps is already set to
gr_reverse(inew_taps)
-void
-cuda_fir_ccf_cuda::set_taps (const std::vector<float> &inew_taps)
-{
- cuda_fir_ccf::set_taps (inew_taps); // call superclass which sets d_taps to
gr_reverse(inew_taps)
- const std::vector<float> new_taps = gr_reverse(inew_taps);//d_taps is also
already set to gr_reverse(inew_taps)
-
unsigned len = new_taps.size ();
unsigned int mem_size = sizeof( float) * len;
unsigned int max_mem_size = sizeof( float) * FIR_CCF_MAX_NTAPS;
@@ -143,6 +106,14 @@
free(rev_taps);
}
+void
+cuda_fir_ccf_cuda::set_taps (const std::vector<float> &inew_taps)
+{
+ cuda_fir_ccf::set_taps (inew_taps); // call superclass which sets d_taps to
gr_reverse(inew_taps)
+ //stop ();start(); // initialize cuda device memory for
taps and copy taps to cuda to it
+ set_taps_cuda (); // copy taps to cuda device memory
+}
+
gr_complex
cuda_fir_ccf_cuda::filter (const gr_complex input[])
{
@@ -166,19 +137,10 @@
{
// for (unsigned i = 0; i < n; i++)
// output[i] = filter (&input[i]);
-
- if(d_first) init_cuda(0,0,0);//unsigned max_noutputs,unsigned
max_ntaps,unsigned decimation
-
- // copy device memory to host
- //CUDA_SAFE_CALL(cudaMemset( d_device_input, 0, INPUT_BUFSIZE_BYTES) );
-cudaThreadSynchronize();
- if(input!=NULL) CUDA_SAFE_CALL( cudaMemcpy( d_device_input,input, sizeof(
gr_complex) * (n+ntaps()), cudaMemcpyHostToDevice) );
-
+ cudaThreadSynchronize();
//do the filtering on the device
- cuda_fir_ccf_7_filterN ((float2 *)d_device_output,(float2
*)d_device_input,n, ntaps());//cuda_filterN
-
- if(output!=NULL) CUDA_SAFE_CALL( cudaMemcpy( output, d_device_output,
sizeof( gr_complex) * (n), cudaMemcpyDeviceToHost) );
-
+ //cuda_fir_ccf_7_filterN ((float2 *)d_device_output,(float2
*)d_device_input,n, ntaps());//cuda_filterN
+ cuda_fir_ccf_7_filterN ((float2 *)output,(float2 *)input,n,
ntaps());//cuda_filterN
}
void
@@ -192,17 +154,11 @@
// output[i] = filter (&input[j]);
// j += decimate;
// }
- if(d_first) init_cuda(0,0,0);
-cudaThreadSynchronize();
- // copy device memory to host
- //CUDA_SAFE_CALL(cudaMemset( d_device_input, 0, INPUT_BUFSIZE_BYTES) );
- if(input!=NULL) CUDA_SAFE_CALL( cudaMemcpy( d_device_input,input, sizeof(
gr_complex) * (n*decimate+ntaps()), cudaMemcpyHostToDevice) );
-
- cuda_fir_ccf_7_filterNdec ((float2 *)d_device_output, (float2
*)d_device_input, n, ntaps(), decimate);//cuda_filterNdec
-
- if(output!=NULL) CUDA_SAFE_CALL( cudaMemcpy( output, d_device_output,
sizeof( gr_complex) * (n), cudaMemcpyDeviceToHost) );
-
+ cudaThreadSynchronize();
+ //do the filtering on the device
+ //cuda_fir_ccf_7_filterNdec ((float2 *)d_device_output, (float2
*)d_device_input, n, ntaps(), decimate);//cuda_filterNdec
+ cuda_fir_ccf_7_filterNdec ((float2 *)output, (float2 *)input, n, ntaps(),
decimate);//cuda_filterNdec
}
/*
Modified:
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.h
===================================================================
---
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.h
2008-09-10 01:23:21 UTC (rev 9549)
+++
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.h
2008-09-10 15:30:20 UTC (rev 9550)
@@ -25,7 +25,7 @@
#include <cuda_fir_ccf.h>
/*!
- * \brief Concrete class for generic implementation of FIR with gr_complex
input, gr_complex output and float taps
+ * \brief Concrete class for cuda implementation of FIR with gr_complex input,
gr_complex output and float taps
*
* The trailing suffix has the form _IOT where I codes the input type,
* O codes the output type, and T codes the tap type.
@@ -35,20 +35,14 @@
class cuda_fir_ccf_cuda : public cuda_fir_ccf {
private:
- void init_cuda(unsigned max_noutputs,unsigned max_ntaps,unsigned
decimation); //initialize cuda context, must be done in same thread as
work is run in
- gr_complex *d_device_input;
- gr_complex *d_device_output;
float *d_device_taps;
- float *d_host_taps;
- bool d_first;
int d_verbose;
unsigned d_device_taps_items;
- unsigned d_device_input_items;
- unsigned d_device_output_items;
- bool d_device_output_internal;
- bool d_device_input_internal;
+protected:
+ virtual void set_taps_cuda ();//copy the taps to cuda device memory
+
public:
// CREATORS
@@ -57,12 +51,30 @@
cuda_fir_ccf_cuda (const std::vector<float> &taps);// : cuda_fir_ccf (taps)
{}
~cuda_fir_ccf_cuda (); // public destructor
+ /*!
+ * \brief Called to enable drivers, etc for i/o devices.
+ *
+ * This allows a block to enable an associated driver to begin
+ * transfering data just before we start to execute the scheduler.
+ * The end result is that this reduces latency in the pipeline when
+ * dealing with audio devices, usrps, etc.
+ *
+ * In this case initialize the cuda memory for the fir taps
+ * In cuda all device memory has to be created in the same thread
+ * as where it is used. So we have to initialize and destroy it with
+ * every start and stop.
+ */
+ virtual bool start();
+
+ /*!
+ * \brief Called to disable drivers, etc for i/o devices.
+ * In this case destroy the cuda memory for the fir taps
+ */
+ virtual bool stop();
+
// ACCESSORS
- gr_complex * device_output(){return d_device_output;}
- gr_complex * device_input(){return d_device_input;}
+
// MANIPULATORS
- void set_device_output(gr_complex *device_output);
- void set_device_input(gr_complex *device_input);
/*!
* \brief compute a single output value.
*
Modified:
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_7_kernel.cu
===================================================================
---
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_7_kernel.cu
2008-09-10 01:23:21 UTC (rev 9549)
+++
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_7_kernel.cu
2008-09-10 15:30:20 UTC (rev 9550)
@@ -377,7 +377,7 @@
// }
cuda_fir_fff_kernel_params params;
- const int verbose=0;
+ const int verbose=1;
get_cuda_fir_fff_filter_7_kernel_short_decim_params (ntaps,
(unsigned)n,decimate, ¶ms , verbose);
Modified:
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.cc
===================================================================
---
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.cc
2008-09-10 01:23:21 UTC (rev 9549)
+++
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.cc
2008-09-10 15:30:20 UTC (rev 9550)
@@ -34,85 +34,46 @@
cuda_fir_fff_cuda::cuda_fir_fff_cuda()
: cuda_fir_fff()
{
- d_first=true;
d_verbose=0;
- d_device_output_internal=true;
- d_device_input_internal=true;
+ d_device_taps=NULL;
}
cuda_fir_fff_cuda::cuda_fir_fff_cuda(const std::vector<float> &taps)
: cuda_fir_fff(taps)
{
- d_first=true;
d_verbose=0;
- d_device_output_internal=true;
- d_device_input_internal=true;
+ d_device_taps=NULL;
}
-void
-cuda_fir_fff_cuda::init_cuda (unsigned max_noutputs,unsigned
max_ntaps,unsigned decimation) {
- //cuda_fir->ntaps=-1;
- d_device_taps=NULL;
- //unsigned int max_mem_size = sizeof( float) * MAX_NTAPS;
- CUDA_SAFE_CALL(cudaMalloc( (void**) &(d_device_taps),
FIR_FFF_TAPS_BUFSIZE_BYTES));
- CUDA_SAFE_CALL(cudaMemset( d_device_taps, 0, FIR_FFF_TAPS_BUFSIZE_BYTES)
);
- d_device_taps_items=FIR_FFF_TAPS_BUFSIZE_BYTES;
- //max_mem_size=sizeof( float) * (MAX_NINPUTS);
- if(d_device_input_internal)
- {
- d_device_input=NULL;
- CUDA_SAFE_CALL( cudaMalloc( (void**)
&(d_device_input),FIR_FFF_INPUT_BUFSIZE_BYTES ));
- d_device_input_items=FIR_FFF_INPUT_BUFSIZE_ITEMS;
- } else
- d_device_input_items=0;//unknown external buffer size
- //max_mem_size=sizeof( float) * MAX_NOUTPUTS;
- if(d_device_output_internal)
- {
- d_device_output=NULL;
- CUDA_SAFE_CALL( cudaMalloc( (void**) &(d_device_output),
FIR_FFF_OUTPUT_BUFSIZE_BYTES ));
- d_device_output_items=FIR_FFF_OUTPUT_BUFSIZE_ITEMS;
- } else
- d_device_output_items=0;//unknown external buffer size
- d_host_taps=NULL;
- set_taps(d_taps);
- d_first=false;
+bool
+cuda_fir_fff_cuda::start ()
+{
+ //unsigned int max_mem_size = sizeof( float) * MAX_NTAPS;
+ CUDA_SAFE_CALL(cudaMalloc( (void**) &(d_device_taps),
FIR_FFF_TAPS_BUFSIZE_BYTES));
+ CUDA_SAFE_CALL(cudaMemset( d_device_taps, 0, FIR_FFF_TAPS_BUFSIZE_BYTES) );
+ d_device_taps_items=FIR_FFF_TAPS_BUFSIZE_BYTES;
+ set_taps_cuda();
+ return true;
+}
- }
-
-
-cuda_fir_fff_cuda::~cuda_fir_fff_cuda()
- {
- cudaThreadSynchronize();
- CUDA_SAFE_CALL(cudaFree(d_device_taps));
- //cuda_fir->ntaps=-1;
-
- CUDA_SAFE_CALL( cudaFree( d_device_input));
- CUDA_SAFE_CALL( cudaFree( d_device_output));
- if(d_host_taps)
- free(d_host_taps);//host_taps only filled when requested with
cuda_fir_fff_get_taps
- }
-
-void
-cuda_fir_fff_cuda::set_device_output(float *device_output)
+bool
+cuda_fir_fff_cuda::stop ()
{
- if(!d_first) CUDA_SAFE_CALL( cudaFree( d_device_output));
- d_device_output=device_output;
- d_device_output_internal=false;
+ cudaThreadSynchronize();
+ CUDA_SAFE_CALL(cudaFree(d_device_taps));
+ d_device_taps=NULL;
+ return true;
}
-void
-cuda_fir_fff_cuda::set_device_input(float *device_input)
+cuda_fir_fff_cuda::~cuda_fir_fff_cuda()
{
- if(!d_first) CUDA_SAFE_CALL( cudaFree( d_device_input));
- d_device_input=device_input;
- d_device_input_internal=false;
+ stop();
}
void
-cuda_fir_fff_cuda::set_taps (const std::vector<float> &inew_taps)
+cuda_fir_fff_cuda::set_taps_cuda ( )
{
- cuda_fir_fff::set_taps (inew_taps); // call superclass which sets d_taps to
gr_reverse(inew_taps)
- const std::vector<float> new_taps = gr_reverse(inew_taps);//d_taps is also
already set to gr_reverse(inew_taps)
+ const std::vector<float> new_taps = d_taps;//d_taps is also already set to
gr_reverse(inew_taps)
unsigned len = new_taps.size ();
unsigned int mem_size = sizeof( float) * len;
@@ -145,13 +106,19 @@
free(rev_taps);
}
+void
+cuda_fir_fff_cuda::set_taps (const std::vector<float> &inew_taps)
+{
+ cuda_fir_fff::set_taps (inew_taps); // call superclass which sets d_taps to
gr_reverse(inew_taps)
+ set_taps_cuda();
+}
float
cuda_fir_fff_cuda::filter (const float input[])
{
float result[1];
- filterN(result,input,1,false,false);
+ filterN(result,input,1);
return result[1];
}
@@ -165,52 +132,34 @@
void
cuda_fir_fff_cuda::filterN (float output[],
const float input[],
- unsigned long n,bool output_is_device, bool
input_is_device)
+ unsigned long n)
{
// for (unsigned i = 0; i < n; i++)
// output[i] = filter (&input[i]);
- if(d_first) init_cuda(0,0,0);//unsigned max_noutputs,unsigned
max_ntaps,unsigned decimation
-
- enum cudaMemcpyKind
input_copy_kind=(input_is_device)?cudaMemcpyDeviceToDevice:cudaMemcpyHostToDevice;
- enum cudaMemcpyKind
output_copy_kind=(output_is_device)?cudaMemcpyDeviceToDevice:cudaMemcpyDeviceToHost;
-
- // copy device memory to host
//CUDA_SAFE_CALL(cudaMemset( d_device_input, 0, INPUT_BUFSIZE_BYTES) );
-cudaThreadSynchronize();
- if(input!=NULL) CUDA_SAFE_CALL( cudaMemcpy( d_device_input,input, sizeof(
float) * (n+ntaps()), input_copy_kind) );
+ cudaThreadSynchronize();
//do the filtering on the device
- cuda_fir_fff_7_filterN (d_device_output,d_device_input,n,
ntaps());//cuda_filterN
+ cuda_fir_fff_7_filterN (output,input,n, ntaps());//cuda_filterN
- if(output!=NULL) CUDA_SAFE_CALL( cudaMemcpy( output, d_device_output,
sizeof( float) * (n), output_copy_kind) );
-
}
void
cuda_fir_fff_cuda::filterNdec (float output[],
const float input[],
unsigned long n,
- unsigned decimate,bool output_is_device, bool
input_is_device)
+ unsigned decimate)
{
// unsigned j = 0;
// for (unsigned i = 0; i < n; i++){
// output[i] = filter (&input[j]);
// j += decimate;
// }
- if(d_first) init_cuda(0,0,0);
- enum cudaMemcpyKind
input_copy_kind=(input_is_device)?cudaMemcpyDeviceToDevice:cudaMemcpyHostToDevice;
- enum cudaMemcpyKind
output_copy_kind=(output_is_device)?cudaMemcpyDeviceToDevice:cudaMemcpyDeviceToHost;
-
-cudaThreadSynchronize();
+ cudaThreadSynchronize();
// copy device memory to host
//CUDA_SAFE_CALL(cudaMemset( d_device_input, 0, INPUT_BUFSIZE_BYTES) );
- if(input!=NULL) CUDA_SAFE_CALL( cudaMemcpy( d_device_input,input, sizeof(
float) * (n*decimate+ntaps()), input_copy_kind) );
-
- cuda_fir_fff_7_filterNdec (d_device_output, d_device_input, n, ntaps(),
decimate);//cuda_filterNdec
-
- if(output!=NULL) CUDA_SAFE_CALL( cudaMemcpy( output, d_device_output,
sizeof( float) * (n), output_copy_kind) );
-
+ cuda_fir_fff_7_filterNdec (output, input, n, ntaps(),
decimate);//cuda_filterNdec
}
/*
Modified:
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.h
===================================================================
---
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.h
2008-09-10 01:23:21 UTC (rev 9549)
+++
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.h
2008-09-10 15:30:20 UTC (rev 9550)
@@ -25,7 +25,7 @@
#include <cuda_fir_fff.h>
/*!
- * \brief Concrete class for generic implementation of FIR with float input,
float output and float taps
+ * \brief Concrete class for cuda implementation of FIR with float input,
float output and float taps
*
* The trailing suffix has the form _IOT where I codes the input type,
* O codes the output type, and T codes the tap type.
@@ -35,19 +35,17 @@
class cuda_fir_fff_cuda : public cuda_fir_fff {
private:
- void init_cuda(unsigned max_noutputs,unsigned max_ntaps,unsigned
decimation); //initialize cuda context, must be done in same thread as
work is run in
- float *d_device_input;
- float *d_device_output;
+ //void init_cuda(unsigned max_noutputs,unsigned max_ntaps,unsigned
decimation); //initialize cuda context, must be done in same thread as
work is run in
float *d_device_taps;
- float *d_host_taps;
- bool d_first;
int d_verbose;
unsigned d_device_taps_items;
- unsigned d_device_input_items;
- unsigned d_device_output_items;
- bool d_device_output_internal;
- bool d_device_input_internal;
+protected:
+ /*!
+ * \brief copy \p taps to cuda device memory
+ */
+ virtual void set_taps_cuda ();
+
public:
// CREATORS
@@ -56,13 +54,30 @@
cuda_fir_fff_cuda (const std::vector<float> &taps);// : cuda_fir_fff (taps)
{}
~cuda_fir_fff_cuda (); // public destructor
+ /*!
+ * \brief Called to enable drivers, etc for i/o devices.
+ *
+ * This allows a block to enable an associated driver to begin
+ * transfering data just before we start to execute the scheduler.
+ * The end result is that this reduces latency in the pipeline when
+ * dealing with audio devices, usrps, etc.
+ *
+ * In this case initialize the cuda memory for the fir taps
+ * In cuda all device memory has to be created in the same thread
+ * as where it is used. So we have to initialize and destroy it with
+ * every start and stop.
+ */
+ virtual bool start();
+
+ /*!
+ * \brief Called to disable drivers, etc for i/o devices.
+ * In this case destroy the cuda memory for the fir taps
+ */
+ virtual bool stop();
+
// ACCESSORS
- float * device_output(){return d_device_output;}
- float * device_input(){return d_device_input;}
// MANIPULATORS
- void set_device_output(float *device_output);
- void set_device_input(float *device_input);
/*!
* \brief compute a single output value.
*
@@ -79,11 +94,9 @@
* \p input must have (n - 1 + ntaps()) valid entries.
* input[0] .. input[n - 1 + ntaps() - 1] are referenced to compute the
output values.
*/
- virtual void filterN (float output[], const float input[],
- unsigned long n,bool output_is_device=false, bool
input_is_device=false);
virtual void filterN (float output[], const float input[],
- unsigned long n){filterN(output,input,n,false,false);}
+ unsigned long n);
/*!
* \brief compute an array of N output values, decimating the input
*
@@ -92,9 +105,7 @@
* compute the output values.
*/
virtual void filterNdec (float output[], const float input[],
- unsigned long n, unsigned decimate,bool
output_is_device=false, bool input_is_device=false);
- virtual void filterNdec (float output[], const float input[],
- unsigned long n, unsigned
decimate){filterNdec(output,input,n,decimate,false,false);}
+ unsigned long n, unsigned decimate);
/*!
* \brief install \p new_taps as the current taps.
*/
Modified:
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.cc
===================================================================
---
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.cc
2008-09-10 01:23:21 UTC (rev 9549)
+++
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.cc
2008-09-10 15:30:20 UTC (rev 9550)
@@ -39,8 +39,8 @@
cuda_fir_filter_ccf::cuda_fir_filter_ccf (int decimation, const
std::vector<float> &taps)
: gr_sync_decimator ("fir_filter_ccf",
- gr_make_io_signature (1, 1, sizeof (gr_complex)),
- gr_make_io_signature (1, 1, sizeof (gr_complex)),
+ gr_make_io_signature (1, 1, sizeof
(gr_complex),GR_BUFFER_CUDA),
+ gr_make_io_signature (1, 1, sizeof
(gr_complex),GR_BUFFER_CUDA),
decimation),
d_updated (false)
{
@@ -53,6 +53,18 @@
delete d_fir;
}
+bool
+cuda_fir_filter_ccf::start()
+{
+ d_fir->start();
+}
+
+bool
+cuda_fir_filter_ccf::stop()
+{
+ d_fir->stop();
+}
+
void
cuda_fir_filter_ccf::set_taps (const std::vector<float> &taps)
{
Modified:
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.h
===================================================================
---
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.h
2008-09-10 01:23:21 UTC (rev 9549)
+++
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.h
2008-09-10 15:30:20 UTC (rev 9550)
@@ -30,7 +30,7 @@
typedef boost::shared_ptr<cuda_fir_filter_ccf> cuda_fir_filter_ccf_sptr;
cuda_fir_filter_ccf_sptr cuda_make_fir_filter_ccf (int decimation, const
std::vector<float> &taps);
-class cuda_fir_ccf;
+class cuda_fir_ccf_cuda;
/*!
* \brief FIR filter with gr_complex input, gr_complex output and float taps
@@ -41,7 +41,7 @@
private:
friend cuda_fir_filter_ccf_sptr cuda_make_fir_filter_ccf (int decimation,
const std::vector<float> &taps);
- cuda_fir_ccf *d_fir;
+ cuda_fir_ccf_cuda *d_fir;
std::vector<float> d_new_taps;
bool d_updated;
@@ -52,7 +52,27 @@
public:
~cuda_fir_filter_ccf ();
+ /*!
+ * \brief Called to enable drivers, etc for i/o devices.
+ *
+ * This allows a block to enable an associated driver to begin
+ * transfering data just before we start to execute the scheduler.
+ * The end result is that this reduces latency in the pipeline when
+ * dealing with audio devices, usrps, etc.
+ *
+ * In this case initialize the cuda memory for the fir taps
+ * In cuda all device memory has to be created in the same thread
+ * as where it is used. So we have to initialize and destroy it with
+ * every start and stop.
+ */
+ virtual bool start();
+ /*!
+ * \brief Called to disable drivers, etc for i/o devices.
+ * In this case destroy the cuda memory for the fir taps
+ */
+ virtual bool stop();
+
void set_taps (const std::vector<float> &taps);
int work (int noutput_items,
Modified:
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.cc
===================================================================
---
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.cc
2008-09-10 01:23:21 UTC (rev 9549)
+++
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.cc
2008-09-10 15:30:20 UTC (rev 9550)
@@ -43,8 +43,8 @@
cuda_fir_filter_fff::cuda_fir_filter_fff (int decimation, const
std::vector<float> &taps)
: gr_sync_decimator ("fir_filter_fff",
- gr_make_io_signature (1, 1, sizeof (float)),
- gr_make_io_signature (1, 1, sizeof (float)),
+ gr_make_io_signature (1, 1, sizeof
(float),GR_BUFFER_CUDA),
+ gr_make_io_signature (1, 1, sizeof
(float),GR_BUFFER_CUDA),
decimation),
d_updated (false)
{
@@ -57,6 +57,18 @@
delete d_fir;
}
+bool
+cuda_fir_filter_fff::start()
+{
+ d_fir->start();
+}
+
+bool
+cuda_fir_filter_fff::stop()
+{
+ d_fir->stop();
+}
+
void
cuda_fir_filter_fff::set_taps (const std::vector<float> &taps)
{
Modified:
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.h
===================================================================
---
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.h
2008-09-10 01:23:21 UTC (rev 9549)
+++
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.h
2008-09-10 15:30:20 UTC (rev 9550)
@@ -21,8 +21,9 @@
*/
/*
- * WARNING: This file is automatically generated by
generate_cuda_fir_filter_XXX.py
+ * WARNING: This file should be automatically generated by
generate_cuda_fir_filter_XXX.py
* Any changes made to this file will be overwritten.
+ * //TODO implement generate_cuda_fir_filter_XXX.py
*/
#ifndef INCLUDED_CUDA_FIR_FILTER_FFF_H
@@ -34,7 +35,7 @@
typedef boost::shared_ptr<cuda_fir_filter_fff> cuda_fir_filter_fff_sptr;
cuda_fir_filter_fff_sptr cuda_make_fir_filter_fff (int decimation, const
std::vector<float> &taps);
-class cuda_fir_fff;
+class cuda_fir_fff_cuda;
/*!
* \brief FIR filter with float input, float output and float taps
@@ -45,7 +46,7 @@
private:
friend cuda_fir_filter_fff_sptr cuda_make_fir_filter_fff (int decimation,
const std::vector<float> &taps);
- cuda_fir_fff *d_fir;
+ cuda_fir_fff_cuda *d_fir;
std::vector<float> d_new_taps;
bool d_updated;
@@ -56,7 +57,27 @@
public:
~cuda_fir_filter_fff ();
+ /*!
+ * \brief Called to enable drivers, etc for i/o devices.
+ *
+ * This allows a block to enable an associated driver to begin
+ * transfering data just before we start to execute the scheduler.
+ * The end result is that this reduces latency in the pipeline when
+ * dealing with audio devices, usrps, etc.
+ *
+ * In this case initialize the cuda memory for the fir taps
+ * In cuda all device memory has to be created in the same thread
+ * as where it is used. So we have to initialize and destroy it with
+ * every start and stop.
+ */
+ virtual bool start();
+ /*!
+ * \brief Called to disable drivers, etc for i/o devices.
+ * In this case destroy the cuda memory for the fir taps
+ */
+ virtual bool stop();
+
void set_taps (const std::vector<float> &taps);
int work (int noutput_items,
Modified:
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.cc
===================================================================
---
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.cc
2008-09-10 01:23:21 UTC (rev 9549)
+++
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.cc
2008-09-10 15:30:20 UTC (rev 9550)
@@ -77,9 +77,6 @@
//cuda_wfm_rcv_cf *wfm_rcv = new cuda_wfm_rcv_cf;
set_output_multiple ( 32*1024 ); //511
set_history ( 1 );
- d_first=true;
- d_device_output_internal=true;
- d_device_input_internal=true;
}
/*
@@ -87,31 +84,12 @@
*/
cuda_quadrature_demod_cuda_cf::~cuda_quadrature_demod_cuda_cf ()
{
- //cuda_quadrature_demod_cf_destroy(d_cuda);
- if(d_device_input_internal) CUDA_SAFE_CALL( cudaFree( d_device_input));
- if(d_device_output_internal) CUDA_SAFE_CALL( cudaFree( d_device_output));
- //delete d_cuda;
}
-void
-cuda_quadrature_demod_cuda_cf::set_device_output(float *device_output)
-{
- if(!d_first) CUDA_SAFE_CALL( cudaFree( d_device_output));
- d_device_output=device_output;
- d_device_output_internal=false;
-}
-void
-cuda_quadrature_demod_cuda_cf::set_device_input(gr_complex *device_input)
+bool
+cuda_quadrature_demod_cuda_cf::start()
{
- if(!d_first) CUDA_SAFE_CALL( cudaFree( d_device_input));
- d_device_input=device_input;
- d_device_input_internal=false;
-}
-
-void
-cuda_quadrature_demod_cuda_cf::init_cuda()
-{
CUT_DEVICE_INIT(0, 0);
//d_cuda = new cuda_quadrature_demod_cf;
unsigned int max_num_outputs;
@@ -121,40 +99,29 @@
//d_cuda =cuda_quadrature_demod_cf_init (d_cuda,0,false);
- const unsigned bufsize_items=1024*1024;//QDEMOD_ITEMS;//32768;
+ //const unsigned bufsize_items=1024*1024;//QDEMOD_ITEMS;//32768;
//d_device_input = new gr_complex*;
- if(d_device_input_internal)
- {
- d_device_input=NULL;
- CUDA_SAFE_CALL( cudaMalloc( (void**)
&(d_device_input),sizeof(gr_complex)*bufsize_items ));
- }
+
//cuda_qdemod->device_input_items=bufsize_items;
//max_mem_size=sizeof( gr_complex) * MAX_NOUTPUTS;
//d_device_output = new float*;
- if(d_device_output_internal)
- {
- d_device_output=NULL;
- CUDA_SAFE_CALL( cudaMalloc( (void**) &(d_device_output),sizeof(float)
*bufsize_items ));
- }
- d_first=false;
}
+bool
+cuda_quadrature_demod_cuda_cf::stop()
+{
+}
+
int
cuda_quadrature_demod_cuda_cf::processN(float output[], const gr_complex
input[],
- unsigned long n,bool output_is_device, bool
input_is_device)
+ unsigned long n)
{
- if(d_first) init_cuda();
const gr_complex *in = input;//(const gr_complex *) input_items[0];
float *out = output;//(float *) output_items[0];
if(input!=NULL) in++;//make sure in[-1] is valid;TODO how to do this when
using device buffer directly
- enum cudaMemcpyKind
input_copy_kind=(input_is_device)?cudaMemcpyDeviceToDevice:cudaMemcpyHostToDevice;
- enum cudaMemcpyKind
output_copy_kind=(output_is_device)?cudaMemcpyDeviceToDevice:cudaMemcpyDeviceToHost;
- if(input!=NULL) CUDA_SAFE_CALL( cudaMemcpy( d_device_input,&in[-1], sizeof(
gr_complex) * (n+1), input_copy_kind ));//cudaMemcpyHostToDevice) );
+ cuda_quadrature_demod_cf_work_device (out, in,(unsigned
long)n,(float)d_gain);
- cuda_quadrature_demod_cf_work_device (d_device_output,
d_device_input,(unsigned long)n,(float)d_gain);
-
- if(output!=NULL) CUDA_SAFE_CALL( cudaMemcpy( out, &d_device_output[1],
sizeof( float) * (n), output_copy_kind));//cudaMemcpyDeviceToHost) );
//out[0]=out[1];//bug gives out[0] wrong value;
// Tell how many output items we produced.
@@ -182,7 +149,7 @@
CUDA_SAFE_CALL( cudaMemcpy( out, &d_device_output[1], sizeof( float) *
(noutput_items), cudaMemcpyDeviceToHost) );
//out[0]=out[1];//bug gives out[0] wrong value;
*/
- int noutput_items_produced=processN(out, in,(unsigned long)
noutput_items,true,true);
+ int noutput_items_produced=processN(out, in,(unsigned long) noutput_items);
// Tell runtime system how many input items we consumed on
// each input stream.
consume_each (noutput_items);
Modified:
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.h
===================================================================
---
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.h
2008-09-10 01:23:21 UTC (rev 9549)
+++
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.h
2008-09-10 15:30:20 UTC (rev 9550)
@@ -67,27 +67,19 @@
friend cuda_quadrature_demod_cuda_cf_sptr cuda_make_quadrature_demod_cuda_cf
(float gain);
//cuda_quadrature_demod_cuda_cf (float gain); // private constructor
- void init_cuda(); //initialize cuda context, must be done in same
thread as work is run in
float d_gain;
cuda_quadrature_demod_cf * d_cuda;
- gr_complex *d_device_input;
- float *d_device_output;
- bool d_first;
- bool d_device_output_internal;
- bool d_device_input_internal;
public:
cuda_quadrature_demod_cuda_cf (float gain); // public constructor
~cuda_quadrature_demod_cuda_cf (); // public destructor
+ virtual bool start(); //initialize cuda context, must be done in same
thread as work is run in
+ virtual bool stop(); //destroy cuda context, must be done in same
thread as work is run in
// ACCESSORS
- float * device_output(){return d_device_output;}
- gr_complex * device_input(){return d_device_input;}
// MANIPULATORS
- void set_device_output(float *device_output);
- void set_device_input(gr_complex *device_input);
// Where all the action really happens
int processN(float output[], const gr_complex input[],
- unsigned long n,bool output_is_device=false, bool
input_is_device=false);
+ unsigned long n);
int general_work (int noutput_items,
gr_vector_int &ninput_items,
gr_vector_const_void_star &input_items,
Modified:
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_wfm_rcv_cf.cc
===================================================================
---
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_wfm_rcv_cf.cc
2008-09-10 01:23:21 UTC (rev 9549)
+++
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_wfm_rcv_cf.cc
2008-09-10 15:30:20 UTC (rev 9550)
@@ -83,6 +83,7 @@
gr_vector_const_void_star &input_items,
gr_vector_void_star &output_items)
{
+#if 0
gr_complex *in = (gr_complex *) input_items[0];
gr_complex *stage1;//d_if_filtered_samples;
float *stage2;//d_fm_demodulated_samples;
@@ -124,4 +125,6 @@
d_audio_fir->filterNdec (out, d_qdemod->device_output()+1, block_size,
audio_decimation(),false,true);
}
return noutput_items_produced;
+#endif
+ return 0;
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Commit-gnuradio] r9550 - gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib,
nldudok1 <=