*** ./liboctave/ChangeLog.orig 2004-01-30 15:24:31.000000000 +0100 --- ./liboctave/ChangeLog 2004-01-30 15:25:30.000000000 +0100 *************** *** 1,3 **** --- 1,14 ---- + 2004-01-30 David Bateman + + * oct-fftw.cc: Add support for FFTW 3.x. Include the ability to + use the real to complex transform for fft's of real matrices + * oct-fftw.h: Decls updated for the above + + * dMatrix.cc: fft's use real to complex transforms. 1D fft of a + matrix done as single call rather than loop. Update for FFTW 3.x + * CMatrix.cc: 1D fft of a matrix done as single call rather than + loop. Update for FFTW 3.x + 2004-01-22 John W. Eaton * Array.cc (Array::assign2, Array::assignN): *** ./liboctave/dMatrix.cc.orig 2004-01-28 15:17:03.000000000 +0100 --- ./liboctave/dMatrix.cc 2004-01-30 19:20:07.000000000 +0100 *************** *** 52,58 **** #include "oct-cmplx.h" #include "quit.h" ! #ifdef HAVE_FFTW #include "oct-fftw.h" #endif --- 52,58 ---- #include "oct-cmplx.h" #include "quit.h" ! #if defined (HAVE_FFTW) || defined (HAVE_FFTW3) #include "oct-fftw.h" #endif *************** *** 766,772 **** } } ! #ifdef HAVE_FFTW ComplexMatrix Matrix::fourier (void) const --- 766,772 ---- } } ! #if defined (HAVE_FFTW) || defined (HAVE_FFTW3) ComplexMatrix Matrix::fourier (void) const *************** *** 789,804 **** nsamples = nc; } ! ComplexMatrix tmp (*this); ! Complex *in (tmp.fortran_vec ()); Complex *out (retval.fortran_vec ()); for (size_t i = 0; i < nsamples; i++) { OCTAVE_QUIT; octave_fftw::fft (&in[npts * i], &out[npts * i], npts); } return retval; } --- 789,809 ---- nsamples = nc; } ! const double *in (fortran_vec ()); Complex *out (retval.fortran_vec ()); + // XXX FIXME XXX octave_fftw::fft (in, out, npts, nsamples); + // should be faster for FFTW 2.1.x as well but isn't !!! + #ifdef HAVE_FFTW3 + octave_fftw::fft (in, out, npts, nsamples); + #else for (size_t i = 0; i < nsamples; i++) { OCTAVE_QUIT; octave_fftw::fft (&in[npts * i], &out[npts * i], npts); } + #endif return retval; } *************** *** 828,839 **** --- 833,850 ---- Complex *in (tmp.fortran_vec ()); Complex *out (retval.fortran_vec ()); + // XXX FIXME XXX octave_fftw::ifft (in, out, npts, nsamples); + // should be faster for FFTW 2.1.x as well but isn't !!! + #ifdef HAVE_FFTW3 + octave_fftw::ifft (in, out, npts, nsamples); + #else for (size_t i = 0; i < nsamples; i++) { OCTAVE_QUIT; octave_fftw::ifft (&in[npts * i], &out[npts * i], npts); } + #endif return retval; } *************** *** 844,853 **** int nr = rows (); int nc = cols (); ! ComplexMatrix retval (*this); // Note the order of passing the rows and columns to account for // column-major storage. ! octave_fftw::fft2d (retval.fortran_vec (), nc, nr); return retval; } --- 855,865 ---- int nr = rows (); int nc = cols (); ! const double *in = fortran_vec (); ! ComplexMatrix retval (rows (), cols ()); // Note the order of passing the rows and columns to account for // column-major storage. ! octave_fftw::fft2d (in, retval.fortran_vec (), nc, nr); return retval; } *************** *** 859,867 **** int nc = cols (); ComplexMatrix retval (*this); // Note the order of passing the rows and columns to account for // column-major storage. ! octave_fftw::ifft2d (retval.fortran_vec (), nc, nr); return retval; } --- 871,881 ---- int nc = cols (); ComplexMatrix retval (*this); + Complex *out (retval.fortran_vec ()); + // Note the order of passing the rows and columns to account for // column-major storage. ! octave_fftw::ifft2d (out, out, nc, nr); return retval; } *** ./liboctave/CMatrix.cc.orig 2004-01-28 15:17:09.000000000 +0100 --- ./liboctave/CMatrix.cc 2004-01-30 19:17:52.000000000 +0100 *************** *** 56,62 **** #include "mx-inlines.cc" #include "oct-cmplx.h" ! #ifdef HAVE_FFTW #include "oct-fftw.h" #endif --- 56,62 ---- #include "mx-inlines.cc" #include "oct-cmplx.h" ! #if defined (HAVE_FFTW) || defined (HAVE_FFTW3) #include "oct-fftw.h" #endif *************** *** 1102,1108 **** return retval; } ! #ifdef HAVE_FFTW ComplexMatrix ComplexMatrix::fourier (void) const --- 1102,1108 ---- return retval; } ! #if defined (HAVE_FFTW) || defined (HAVE_FFTW3) ComplexMatrix ComplexMatrix::fourier (void) const *************** *** 1128,1139 **** --- 1128,1145 ---- const Complex *in (data ()); Complex *out (retval.fortran_vec ()); + // XXX FIXME XXX octave_fftw::fft (in, out, npts, nsamples); + // should be faster for FFTW 2.1.x as well but isn't !!! + #ifdef HAVE_FFTW3 + octave_fftw::fft (in, out, npts, nsamples); + #else for (size_t i = 0; i < nsamples; i++) { OCTAVE_QUIT; octave_fftw::fft (&in[npts * i], &out[npts * i], npts); } + #endif return retval; } *************** *** 1162,1173 **** --- 1168,1185 ---- const Complex *in (data ()); Complex *out (retval.fortran_vec ()); + // XXX FIXME XXX octave_fftw::ifft (in, out, npts, nsamples); + // should be faster for FFTW 2.1.x as well but isn't !!! + #ifdef HAVE_FFTW3 + octave_fftw::ifft (in, out, npts, nsamples); + #else for (size_t i = 0; i < nsamples; i++) { OCTAVE_QUIT; octave_fftw::ifft (&in[npts * i], &out[npts * i], npts); } + #endif return retval; } *************** *** 1178,1187 **** int nr = rows (); int nc = cols (); ! ComplexMatrix retval (*this); // Note the order of passing the rows and columns to account for // column-major storage. ! octave_fftw::fft2d (retval.fortran_vec (), nc, nr); return retval; } --- 1190,1201 ---- int nr = rows (); int nc = cols (); ! ComplexMatrix retval (nr,nc); ! const Complex *in (data ()); ! Complex *out (retval.fortran_vec ()); // Note the order of passing the rows and columns to account for // column-major storage. ! octave_fftw::fft2d (in, out, nc, nr); return retval; } *************** *** 1192,1201 **** int nr = rows (); int nc = cols (); ! ComplexMatrix retval (*this); // Note the order of passing the rows and columns to account for // column-major storage. ! octave_fftw::ifft2d (retval.fortran_vec (), nc, nr); return retval; } --- 1206,1218 ---- int nr = rows (); int nc = cols (); ! ComplexMatrix retval (nr,nc); ! const Complex *in (data ()); ! Complex *out (retval.fortran_vec ()); ! // Note the order of passing the rows and columns to account for // column-major storage. ! octave_fftw::ifft2d (in, out, nc, nr); return retval; } *** ./liboctave/oct-fftw.h.orig 2004-01-28 15:17:17.000000000 +0100 --- ./liboctave/oct-fftw.h 2004-01-30 19:16:48.000000000 +0100 *************** *** 23,32 **** #include ! #if defined (HAVE_DFFTW_H) ! #include #else ! #include #endif #include "oct-cmplx.h" --- 23,38 ---- #include ! #if defined (HAVE_FFTW3) ! # include #else ! # if defined (HAVE_DFFTW_H) ! # include ! # include ! # else ! # include ! # include ! # endif #endif #include "oct-cmplx.h" *************** *** 35,45 **** octave_fftw { public: ! static int fft (const Complex*, Complex *, size_t); ! static int ifft (const Complex*, Complex *, size_t); ! static int fft2d (Complex*, size_t, size_t); ! static int ifft2d (Complex*, size_t, size_t); private: octave_fftw (); --- 41,57 ---- octave_fftw { public: ! static int fft (const double *in, Complex *out, size_t npts, ! size_t nsamples = 1); ! static int fft (const Complex *in, Complex *out, size_t npts, ! size_t nsamples = 1); ! static int ifft (const Complex *in, Complex *out, size_t npts, ! size_t nsamples = 1); ! static int fft2d (const double*, Complex*, size_t, size_t); ! ! static int fft2d (const Complex*, Complex*, size_t, size_t); ! static int ifft2d (const Complex*, Complex*, size_t, size_t); private: octave_fftw (); *** ./liboctave/oct-fftw.cc.orig 2004-01-28 15:17:22.000000000 +0100 --- ./liboctave/oct-fftw.cc 2004-01-30 23:30:57.000000000 +0100 *************** *** 22,39 **** #include #endif ! #ifdef HAVE_FFTW #include "oct-fftw.h" #include "lo-error.h" ! // Helper class to create and cache fftw plans for both 1d and 2d. This // implementation uses FFTW_ESTIMATE to create the plans, which in theory ! // is suboptimal, but provides quite reasonable performance. Future ! // enhancement will be to add a dynamically loadable interface ("fftw") ! // to manipulate fftw wisdom so that users may choose the appropriate ! // planner. class octave_fftw_planner --- 22,205 ---- #include #endif ! #if defined (HAVE_FFTW) || defined (HAVE_FFTW3) #include "oct-fftw.h" #include "lo-error.h" ! #include // Helper class to create and cache fftw plans for both 1d and 2d. This // implementation uses FFTW_ESTIMATE to create the plans, which in theory ! // is suboptimal, but provides quite reasonable performance. ! ! // Also note that if FFTW_ESTIMATE is not used the planner in FFTW3 ! // destroys the input and output arrays. So with the form of the ! // current code we definitely want FFTW_ESTIMATE!! However, we use ! // any wsidom that is available, either in a FFTW3 system wide file ! // or as supplied by the user. ! ! // XXX FIXME XXX If we can ensure 16 byte alignment in Array ( *data) ! // the FFTW3 can use SIMD instructions for further acceleration. ! ! // Note that it doesn't appear to be profitable to store FFTW3 plans, and ! // so the interface is simpler ! ! #ifdef HAVE_FFTW3 ! ! #define plan_flags (FFTW_ESTIMATE) ! ! // If we have a system wide wisdom file, import it. Use a stupid hack, ! // of defining a static variable defined as the return value of ! // fftw_import_system_wisdom to only need to do this once. The ! // variable itself is not used. ! static int got_system_wisdom = fftw_import_system_wisdom ( ); ! ! static inline void convert_packcomplex_1d (Complex *out, size_t nr, ! size_t nc) ! { ! // Fill in the missing data ! for (size_t i = 0; i < nr; i++) ! for (size_t j = nc/2+1; j < nc; j++) ! out[j + i*nc] = conj(out[nc - j + i*nc]); ! } ! ! static inline void convert_packcomplex_2d (Complex *out, size_t nr, ! size_t nc) ! { ! Complex *ptr1, *ptr2; ! ! // Create space for the missing elements ! for (size_t i = 0; i < nr; i++) ! { ! ptr1 = out + i * (nc/2 + 1) + nr*((nc-1)/2); ! ptr2 = out + i * nc; ! for (size_t j = 0; j < nc/2+1; j++) ! *ptr2++ = *ptr1++; ! } ! ! // Fill in the missing data ! for (size_t i = 1; i < nr; i++) ! for (size_t j = nc/2+1; j < nc; j++) ! out[j + i*nc] = conj(out[nc - j + (nr-i)*nc]); ! for (size_t j = nc/2+1; j < nc; j++) ! out[j] = conj(out[nc - j]); ! } ! ! int ! octave_fftw::fft (const double *in, Complex *out, size_t npts, size_t nsamples) ! { ! fftw_plan plan ! = fftw_plan_many_dft_r2c (1, reinterpret_cast (&npts), nsamples, ! (const_cast (in)), NULL, 1, npts, ! reinterpret_cast (out), NULL, 1, npts, plan_flags); ! ! fftw_execute (plan); ! ! fftw_destroy_plan (plan); ! ! // Need to create other half of the transform ! convert_packcomplex_1d (out, nsamples, npts); ! ! return 0; ! } ! ! int ! octave_fftw::fft (const Complex *in, Complex *out, size_t npts, ! size_t nsamples) ! { ! fftw_plan plan = fftw_plan_many_dft (1, reinterpret_cast (&npts), ! nsamples, ! reinterpret_cast (const_cast (in)), ! NULL, 1, npts, ! reinterpret_cast (out), NULL, 1, npts, ! FFTW_FORWARD, plan_flags); ! ! fftw_execute (plan); ! ! fftw_destroy_plan (plan); ! ! return 0; ! } ! ! int ! octave_fftw::ifft (const Complex *in, Complex *out, size_t npts, ! size_t nsamples) ! { ! fftw_plan plan = fftw_plan_many_dft (1, reinterpret_cast (&npts), ! nsamples, ! reinterpret_cast (const_cast (in)), ! NULL, 1, npts, ! reinterpret_cast (out), NULL, 1, npts, ! FFTW_BACKWARD, plan_flags); ! ! fftw_execute (plan); ! ! fftw_destroy_plan (plan); ! ! const Complex scale = npts; ! for (size_t i = 0; i < npts*nsamples; i++) ! out[i] /= scale; ! ! return 0; ! } ! ! int ! octave_fftw::fft2d (const double *in, Complex *out, size_t nr, size_t nc) ! { ! // Fool with the position of the start of the matrix, so that creating ! // other half of the matrix won't cause cache problems ! //fftw_plan plan = fftw_plan_dft_r2c_2d (nr, nc, (const_cast(in)), ! //reinterpret_cast (out + nr*((nc-1)/2)), plan_flags); ! fftw_plan plan = fftw_plan_dft_r2c_2d (nr, nc, (const_cast(in)), ! reinterpret_cast (out), plan_flags); ! ! fftw_execute (plan); ! ! fftw_destroy_plan (plan); ! ! // Need to create other half of the transform ! convert_packcomplex_2d (out, nr, nc); ! ! return 0; ! } ! ! int ! octave_fftw::fft2d (const Complex *in, Complex *out, size_t nr, size_t nc) ! { ! fftw_plan plan ! = fftw_plan_dft_2d (nr, nc, ! reinterpret_cast (const_cast (in)), ! reinterpret_cast (out), FFTW_FORWARD, plan_flags); ! ! fftw_execute (plan); ! ! fftw_destroy_plan (plan); ! ! return 0; ! } ! ! int ! octave_fftw::ifft2d (const Complex *in, Complex *out, size_t nr, size_t nc) ! { ! fftw_plan plan ! = fftw_plan_dft_2d (nr, nc, ! reinterpret_cast (const_cast (in)), ! reinterpret_cast (out), FFTW_BACKWARD, ! plan_flags); ! ! fftw_execute (plan); ! ! fftw_destroy_plan (plan); ! ! const size_t npts = nr * nc; ! const Complex scale = npts; ! for (size_t i = 0; i < npts; i++) ! out[i] /= scale; ! ! return 0; ! } ! ! #else // HAVE_FFTW3 class octave_fftw_planner *************** *** 44,68 **** fftw_plan create_plan (fftw_direction, size_t); fftwnd_plan create_plan2d (fftw_direction, size_t, size_t); private: int plan_flags; fftw_plan plan[2]; fftwnd_plan plan2d[2]; size_t n[2]; size_t n2d[2][2]; }; octave_fftw_planner::octave_fftw_planner () { ! plan_flags = FFTW_ESTIMATE; plan[0] = plan[1] = 0; plan2d[0] = plan2d[1] = 0; n[0] = n[1] = 0; n2d[0][0] = n2d[0][1] = n2d[1][0] = n2d[1][1] = 0; } fftw_plan --- 210,250 ---- fftw_plan create_plan (fftw_direction, size_t); fftwnd_plan create_plan2d (fftw_direction, size_t, size_t); + rfftw_plan create_rplan (fftw_direction, size_t); + rfftwnd_plan create_rplan2d (fftw_direction, size_t, size_t); + private: int plan_flags; fftw_plan plan[2]; fftwnd_plan plan2d[2]; + rfftw_plan rplan[2]; + rfftwnd_plan rplan2d[2]; + size_t n[2]; size_t n2d[2][2]; + + size_t rn[2]; + size_t rn2d[2][2]; }; octave_fftw_planner::octave_fftw_planner () { ! plan_flags = FFTW_ESTIMATE | FFTW_USE_WISDOM; plan[0] = plan[1] = 0; plan2d[0] = plan2d[1] = 0; n[0] = n[1] = 0; n2d[0][0] = n2d[0][1] = n2d[1][0] = n2d[1][1] = 0; + + rplan[0] = rplan[1] = 0; + rplan2d[0] = rplan2d[1] = 0; + + rn[0] = rn[1] = 0; + rn2d[0][0] = rn2d[0][1] = rn2d[1][0] = rn2d[1][1] = 0; + } fftw_plan *************** *** 92,106 **** return *cur_plan_p; } fftwnd_plan ! octave_fftw_planner::create_plan2d (fftw_direction dir, ! size_t nrows, size_t ncols) { size_t which = (dir == FFTW_FORWARD) ? 0 : 1; fftwnd_plan *cur_plan_p = &plan2d[which]; bool create_new_plan = false; ! if (plan2d[which] == 0 || n2d[which][0] != nrows || n2d[which][1] != ncols) { create_new_plan = true; --- 274,316 ---- return *cur_plan_p; } + rfftw_plan + octave_fftw_planner::create_rplan (fftw_direction dir, size_t npts) + { + size_t which = (dir == FFTW_FORWARD) ? 0 : 1; + rfftw_plan *cur_plan_p = &rplan[which]; + bool create_new_plan = false; + + if (rplan[which] == 0 || rn[which] != npts) + { + create_new_plan = true; + rn[which] = npts; + } + + if (create_new_plan) + { + if (*cur_plan_p) + rfftw_destroy_plan (*cur_plan_p); + + *cur_plan_p = rfftw_create_plan (npts, dir, plan_flags); + + if (*cur_plan_p == 0) + (*current_liboctave_error_handler) ("Error creating fftw plan"); + } + + return *cur_plan_p; + } + fftwnd_plan ! octave_fftw_planner::create_plan2d (fftw_direction dir, size_t nrows, ! size_t ncols) { size_t which = (dir == FFTW_FORWARD) ? 0 : 1; fftwnd_plan *cur_plan_p = &plan2d[which]; bool create_new_plan = false; ! if (plan2d[which] == 0 || n2d[which][0] != nrows || ! n2d[which][1] != ncols) { create_new_plan = true; *************** *** 113,121 **** if (*cur_plan_p) fftwnd_destroy_plan (*cur_plan_p); ! *cur_plan_p = fftw2d_create_plan (nrows, ncols, dir, ! plan_flags | FFTW_IN_PLACE); if (*cur_plan_p == 0) (*current_liboctave_error_handler) ("Error creating 2d fftw plan"); } --- 323,361 ---- if (*cur_plan_p) fftwnd_destroy_plan (*cur_plan_p); ! *cur_plan_p = fftw2d_create_plan (nrows, ncols, dir, plan_flags); ! ! if (*cur_plan_p == 0) ! (*current_liboctave_error_handler) ("Error creating 2d fftw plan"); ! } + return *cur_plan_p; + } + + rfftwnd_plan + octave_fftw_planner::create_rplan2d (fftw_direction dir, size_t nrows, + size_t ncols) + { + size_t which = (dir == FFTW_FORWARD) ? 0 : 1; + fftwnd_plan *cur_plan_p = &rplan2d[which]; + bool create_new_plan = false; + + if (rplan2d[which] == 0 || rn2d[which][0] != nrows || + rn2d[which][1] != ncols) + { + create_new_plan = true; + + rn2d[which][0] = nrows; + rn2d[which][1] = ncols; + } + + if (create_new_plan) + { + if (*cur_plan_p) + rfftwnd_destroy_plan (*cur_plan_p); + + *cur_plan_p = rfftw2d_create_plan (nrows, ncols, dir, plan_flags); + if (*cur_plan_p == 0) (*current_liboctave_error_handler) ("Error creating 2d fftw plan"); } *************** *** 125,160 **** static octave_fftw_planner fftw_planner; int ! octave_fftw::fft (const Complex *in, Complex *out, size_t npts) { ! fftw_one (fftw_planner.create_plan (FFTW_FORWARD, npts), ! reinterpret_cast (const_cast (in)), ! reinterpret_cast (out)); return 0; } int ! octave_fftw::ifft (const Complex *in, Complex *out, size_t npts) { ! fftw_one (fftw_planner.create_plan (FFTW_BACKWARD, npts), ! reinterpret_cast (const_cast (in)), ! reinterpret_cast (out)); const Complex scale = npts; ! for (size_t i = 0; i < npts; i++) out[i] /= scale; return 0; } int ! octave_fftw::fft2d (Complex *inout, size_t nr, size_t nc) { fftwnd_one (fftw_planner.create_plan2d (FFTW_FORWARD, nr, nc), ! reinterpret_cast (inout), ! 0); return 0; } --- 365,493 ---- static octave_fftw_planner fftw_planner; + static inline void convert_halfcomplex (Complex *in, size_t npts, + size_t nsamples) + { + // This is likely to thrash about in the cache !!! + double *ptr1, *ptr2; + + for (size_t k = 0; k < nsamples; k++) + { + // First move the imaginary parts + ptr1 = (double *)in + 3; + ptr2 = (double *)in + 2 * (npts-1); + for (size_t i = 0; i < (npts-1)/2; i++) + { + *ptr1 = *ptr2; + *(ptr2+1) = - *ptr2; + ptr1 += 2; + ptr2 -= 2; + } + + // Now move the real parts + ptr1 = (double *)in + 2; + ptr2 = (double *)in + 2 * (npts-1); + for (size_t i = 0; i < npts/2; i++) + { + *ptr2 = *ptr1; + ptr1 += 2; + ptr2 -= 2; + } + + // Fix-up complex parts at zero and n/2+1 if n is even + ptr1 = (double *)in; + *(ptr1 + 1) = 0.; + if (!(npts & 1)) + *(ptr1 + npts + 1) = 0.; + + // Advance to the next data + in += npts; + } + } + + static inline void convert_packcomplex (Complex *out, size_t nr, size_t nc) + { + Complex *ptr1, *ptr2; + + // Create space for the missing elements + for (size_t i = 0; i < nr; i++) + { + ptr1 = out + i * (nc/2 + 1) + nr*((nc-1)/2); + ptr2 = out + i * nc; + for (size_t j = 0; j < nc/2+1; j++) + *ptr2++ = *ptr1++; + } + + // Fill in the missing data + for (size_t i = 1; i < nr; i++) + for (size_t j = nc/2+1; j < nc; j++) + out[j + i*nc] = conj(out[nc - j + (nr-i)*nc]); + for (size_t j = nc/2+1; j < nc; j++) + out[j] = conj(out[nc - j]); + } + + int + octave_fftw::fft (const double *in, Complex *out, size_t npts, size_t nsamples) + { + // Fool with the stride to make the conversion to complex easier + rfftw (fftw_planner.create_rplan (FFTW_FORWARD, npts, nsamples, in, out), + nsamples, reinterpret_cast (const_cast (in)), + 1, npts, reinterpret_cast (out), 2, 2*npts); + + // Need to create other half of the transform + convert_halfcomplex (out, npts, nsamples); + + return 0; + } + int ! octave_fftw::fft (const Complex *in, Complex *out, size_t npts, ! size_t nsamples) { ! fftw (fftw_planner.create_plan (FFTW_FORWARD, npts), ! nsamples, reinterpret_cast (const_cast (in)), ! 1, npts, reinterpret_cast (out), 1, npts); return 0; } int ! octave_fftw::ifft (const Complex *in, Complex *out, size_t npts, ! size_t nsamples) { ! fftw (fftw_planner.create_plan (FFTW_BACKWARD, npts), ! nsamples, reinterpret_cast (const_cast (in)), ! 1, npts, reinterpret_cast (out), 1, npts); const Complex scale = npts; ! for (size_t i = 0; i < npts*nsamples; i++) out[i] /= scale; return 0; } int ! octave_fftw::fft2d (const double *in, Complex *out, size_t nr, size_t nc) ! { ! // Fool with the position of the start of the matrix, so that creating ! // other half of the matrix won't cause cache problems ! rfftwnd_one_real_to_complex (fftw_planner.create_rplan2d (FFTW_FORWARD, ! nr, nc, in, out), ! reinterpret_cast (const_cast(in)), ! reinterpret_cast (out + nr*((nc-1)/2))); ! ! // Need to create other half of the transform ! convert_packcomplex (out, nr, nc); ! ! return 0; ! } ! ! int ! octave_fftw::fft2d (const Complex *in, Complex *out, size_t nr, size_t nc) { fftwnd_one (fftw_planner.create_plan2d (FFTW_FORWARD, nr, nc), ! reinterpret_cast (const_cast(in)), ! reinterpret_cast (out)); return 0; } *************** *** 163,170 **** octave_fftw::ifft2d (Complex *inout, size_t nr, size_t nc) { fftwnd_one (fftw_planner.create_plan2d (FFTW_BACKWARD, nr, nc), ! reinterpret_cast (inout), ! 0); const size_t npts = nr * nc; const Complex scale = npts; --- 496,503 ---- octave_fftw::ifft2d (Complex *inout, size_t nr, size_t nc) { fftwnd_one (fftw_planner.create_plan2d (FFTW_BACKWARD, nr, nc), ! reinterpret_cast (const_cast(in)), ! reinterpret_cast (out)); const size_t npts = nr * nc; const Complex scale = npts; *************** *** 174,180 **** return 0; } ! #endif /* ;;; Local Variables: *** --- 507,514 ---- return 0; } ! #endif // HAVE_FFTW3 ! #endif // HAVE_FFTW or HAVE_FFTW3 /* ;;; Local Variables: *** *** ./src/DLD-FUNCTIONS/ifft.cc.orig 2004-01-30 22:46:35.000000000 +0100 --- ./src/DLD-FUNCTIONS/ifft.cc 2004-01-30 23:42:05.000000000 +0100 *************** *** 24,29 **** --- 24,35 ---- #include #endif + #ifdef HAVE_FFTW3 + #include + #include "oct-env.h" + #include "defaults.h" + #endif + #include "lo-mappers.h" #include "defun-dld.h" *************** *** 34,52 **** // This function should be merged with Ffft. DEFUN_DLD (ifft, args, , "-*- texinfo -*-\n\ @deftypefn {Loadable Function} {} ifft (@var{a}, @var{n})\n\ ! Compute the inverse FFT of @var{a} using subroutines from @sc{Fftpack}. If\n\ ! @var{a} is a matrix, @code{fft} computes the inverse FFT for each column\n\ ! of @var{a}.\n\ \n\ If called with two arguments, @var{n} is expected to be an integer\n\ specifying the number of elements of @var{a} to use. If @var{a} is a\n\ matrix, @var{n} specifies the number of rows of @var{a} to use. If\n\ @var{n} is larger than the size of @var{a}, @var{a} is resized and\n\ ! padded with zeros.\n\ ! @end deftypefn") { octave_value retval; --- 40,81 ---- // This function should be merged with Ffft. + #if defined(HAVE_FFTW3) || defined(HAVE_FFTW) + #define FFTSRC "@sc{Fftw}" + #ifdef HAVE_FFTW3 + #define WISDOM_DOC \ + "\n\ + If @var{a} is a string, then it is treated as a file containing FFTW\n\ + wisdom. This file is loaded and will later be used to accelerate all\n\ + FFTs. The system-wide wisdom file will always be used if available.\n" + #else + #define WISDOM_DOC \ + "\n\ + If @var{a} is a string, then it is treated as a file containing FFTW\n\ + wisdom. This file is loaded and will later be used to accelerate all\n\ + FFTs. In general wisdom files should be loaded prior to using this\n\ + function, to avoid stale plans FFTW being cached." + #endif + #else + #define WISDOM_DOC + #define FFTSRC "@sc{Fftpack}" + #endif + DEFUN_DLD (ifft, args, , "-*- texinfo -*-\n\ @deftypefn {Loadable Function} {} ifft (@var{a}, @var{n})\n\ ! Compute the inverse FFT of @var{a} using subroutines from\n" ! FFTSRC ! ". If @var{a} is a matrix, @code{fft} computes the inverse FFT for each\n\ ! column of @var{a}.\n\ \n\ If called with two arguments, @var{n} is expected to be an integer\n\ specifying the number of elements of @var{a} to use. If @var{a} is a\n\ matrix, @var{n} specifies the number of rows of @var{a} to use. If\n\ @var{n} is larger than the size of @var{a}, @var{a} is resized and\n\ ! padded with zeros.\n" ! WISDOM_DOC ! "@end deftypefn") { octave_value retval; *************** *** 59,64 **** --- 88,124 ---- } octave_value arg = args(0); + + #if defined(HAVE_FFTW3) || defined(HAVE_FFTW) + if (arg.is_string ()) + { + if (nargin != 1) + { + print_usage ("ifft"); + return retval; + } + + std::string wisdom = octave_env::make_absolute + (Vload_path_dir_path.find_first_of (arg.string_value ().c_str ()), + octave_env::getcwd ()); + + if (wisdom.empty ()) + error ("ifft: FFTW wisdom file not found"); + else + { + FILE *ifile = fopen (wisdom.c_str (), "r"); + #ifdef HAVE_FFTW3 + if (! fftw_import_wisdom_from_file (ifile)) + #else + if (fftw_import_wisdom_from_file (ifile) == FFTW_FAILURE) + #endif + error ("ifft: can not import wisdom from file"); + fclose (ifile); + } + + return retval; + } + #endif int n_points = arg.rows (); if (n_points == 1) *** ./src/DLD-FUNCTIONS/fft.cc.orig 2004-01-30 22:46:06.000000000 +0100 --- ./src/DLD-FUNCTIONS/fft.cc 2004-01-30 23:42:13.000000000 +0100 *************** *** 24,29 **** --- 24,35 ---- #include #endif + #ifdef HAVE_FFTW3 + #include + #include "oct-env.h" + #include "defaults.h" + #endif + #include "lo-mappers.h" #include "defun-dld.h" *************** *** 34,51 **** // This function should be merged with Fifft. DEFUN_DLD (fft, args, , "-*- texinfo -*-\n\ @deftypefn {Loadable Function} {} fft (@var{a}, @var{n})\n\ ! Compute the FFT of @var{a} using subroutines from @sc{Fftpack}. If @var{a}\n\ ! is a matrix, @code{fft} computes the FFT for each column of @var{a}.\n\ \n\ If called with two arguments, @var{n} is expected to be an integer\n\ specifying the number of elements of @var{a} to use. If @var{a} is a\n\ matrix, @var{n} specifies the number of rows of @var{a} to use. If\n\ @var{n} is larger than the size of @var{a}, @var{a} is resized and\n\ ! padded with zeros.\n\ ! @end deftypefn") { octave_value retval; --- 40,81 ---- // This function should be merged with Fifft. + #if defined(HAVE_FFTW3) || defined(HAVE_FFTW) + #define FFTSRC "@sc{Fftw}" + #ifdef HAVE_FFTW3 + #define WISDOM_DOC \ + "\n\ + If @var{a} is a string, then it is treated as a file containing FFTW\n\ + wisdom. This file is loaded and will later be used to accelerate all\n\ + FFTs. The system-wide wisdom file will always be used if available.\n" + #else + #define WISDOM_DOC \ + "\n\ + If @var{a} is a string, then it is treated as a file containing FFTW\n\ + wisdom. This file is loaded and will later be used to accelerate all\n\ + FFTs. In general wisdom files should be loaded prior to using this\n\ + function, to avoid stale plans FFTW being cached." + #endif + #else + #define WISDOM_DOC + #define FFTSRC "@sc{Fftpack}" + #endif + DEFUN_DLD (fft, args, , "-*- texinfo -*-\n\ @deftypefn {Loadable Function} {} fft (@var{a}, @var{n})\n\ ! Compute the FFT of @var{a} using subroutines from\n" ! FFTSRC ! ". If @var{a} is a matrix, @code{fft} computes the FFT for each column\n\ ! of @var{a}.\n\ \n\ If called with two arguments, @var{n} is expected to be an integer\n\ specifying the number of elements of @var{a} to use. If @var{a} is a\n\ matrix, @var{n} specifies the number of rows of @var{a} to use. If\n\ @var{n} is larger than the size of @var{a}, @var{a} is resized and\n\ ! padded with zeros.\n" ! WISDOM_DOC ! "@end deftypefn") { octave_value retval; *************** *** 59,64 **** --- 89,125 ---- octave_value arg = args(0); + #if defined(HAVE_FFTW3) || defined(HAVE_FFTW) + if (arg.is_string ()) + { + if (nargin != 1) + { + print_usage ("fft"); + return retval; + } + + std::string wisdom = octave_env::make_absolute + (Vload_path_dir_path.find_first_of (arg.string_value ().c_str ()), + octave_env::getcwd ()); + + if (wisdom.empty ()) + error ("fft: FFTW wisdom file not found"); + else + { + FILE *ifile = fopen (wisdom.c_str (), "r"); + #ifdef HAVE_FFTW3 + if (! fftw_import_wisdom_from_file (ifile)) + #else + if (fftw_import_wisdom_from_file (ifile) == FFTW_FAILURE) + #endif + error ("fft: can not import wisdom from file"); + fclose (ifile); + } + + return retval; + } + #endif + int n_points = arg.rows (); if (n_points == 1) n_points = arg.columns (); *** ./src/DLD-FUNCTIONS/fft2.cc.orig 2004-01-30 22:49:01.000000000 +0100 --- ./src/DLD-FUNCTIONS/fft2.cc 2004-01-30 23:41:49.000000000 +0100 *************** *** 24,29 **** --- 24,35 ---- #include #endif + #ifdef HAVE_FFTW3 + #include + #include "oct-env.h" + #include "defaults.h" + #endif + #include "lo-mappers.h" #include "defun-dld.h" *************** *** 34,39 **** --- 40,64 ---- // This function should be merged with Fifft2. + #if defined(HAVE_FFTW3) || defined(HAVE_FFTW) + #ifdef HAVE_FFTW3 + #define WISDOM_DOC \ + "\n\ + If @var{a} is a string, then it is treated as a file containing FFTW\n\ + wisdom. This file is loaded and will later be used to accelerate all\n\ + FFTs. The system-wide wisdom file will always be used if available.\n" + #else + #define WISDOM_DOC \ + "\n\ + If @var{a} is a string, then it is treated as a file containing FFTW\n\ + wisdom. This file is loaded and will later be used to accelerate all\n\ + FFTs. In general wisdom files should be loaded prior to using this\n\ + function, to avoid stale plans FFTW being cached." + #endif + #else + #define WISDOM_DOC + #endif + DEFUN_DLD (fft2, args, , "-*- texinfo -*-\n\ @deftypefn {Loadable Function} {} fft2 (@var{a}, @var{n}, @var{m})\n\ *************** *** 42,49 **** The optional arguments @var{n} and @var{m} may be used specify the\n\ number of rows and columns of @var{a} to use. If either of these is\n\ larger than the size of @var{a}, @var{a} is resized and padded with\n\ ! zeros.\n\ ! @end deftypefn") { octave_value retval; --- 67,75 ---- The optional arguments @var{n} and @var{m} may be used specify the\n\ number of rows and columns of @var{a} to use. If either of these is\n\ larger than the size of @var{a}, @var{a} is resized and padded with\n\ ! zeros.\n" ! WISDOM_DOC ! "@end deftypefn") { octave_value retval; *************** *** 57,62 **** --- 83,119 ---- octave_value arg = args(0); + #if defined(HAVE_FFTW3) || defined(HAVE_FFTW) + if (arg.is_string ()) + { + if (nargin != 1) + { + print_usage ("fft"); + return retval; + } + + std::string wisdom = octave_env::make_absolute + (Vload_path_dir_path.find_first_of (arg.string_value ().c_str ()), + octave_env::getcwd ()); + + if (wisdom.empty ()) + error ("fft: FFTW wisdom file not found"); + else + { + FILE *ifile = fopen (wisdom.c_str (), "r"); + #ifdef HAVE_FFTW3 + if (! fftw_import_wisdom_from_file (ifile)) + #else + if (fftw_import_wisdom_from_file (ifile) == FFTW_FAILURE) + #endif + error ("fft: can not import wisdom from file"); + fclose (ifile); + } + + return retval; + } + #endif + int n_rows = arg.rows (); if (nargin > 1) { *** ./src/DLD-FUNCTIONS/ifft2.cc.orig 2004-01-30 22:50:46.000000000 +0100 --- ./src/DLD-FUNCTIONS/ifft2.cc 2004-01-30 23:41:32.000000000 +0100 *************** *** 24,29 **** --- 24,35 ---- #include #endif + #ifdef HAVE_FFTW3 + #include + #include "oct-env.h" + #include "defaults.h" + #endif + #include "lo-mappers.h" #include "defun-dld.h" *************** *** 34,39 **** --- 40,64 ---- // This function should be merged with Ffft2. + #if defined(HAVE_FFTW3) || defined(HAVE_FFTW) + #ifdef HAVE_FFTW3 + #define WISDOM_DOC \ + "\n\ + If @var{a} is a string, then it is treated as a file containing FFTW\n\ + wisdom. This file is loaded and will later be used to accelerate all\n\ + FFTs. The system-wide wisdom file will always be used if available.\n" + #else + #define WISDOM_DOC \ + "\n\ + If @var{a} is a string, then it is treated as a file containing FFTW\n\ + wisdom. This file is loaded and will later be used to accelerate all\n\ + FFTs. In general wisdom files should be loaded prior to using this\n\ + function, to avoid stale plans FFTW being cached." + #endif + #else + #define WISDOM_DOC + #endif + DEFUN_DLD (ifft2, args, , "-*- texinfo -*-\n\ @deftypefn {Loadable Function} {} ifft2 (@var{a}, @var{n}, @var{m})\n\ *************** *** 42,49 **** The optional arguments @var{n} and @var{m} may be used specify the\n\ number of rows and columns of @var{a} to use. If either of these is\n\ larger than the size of @var{a}, @var{a} is resized and padded with\n\ ! zeros.\n\ ! @end deftypefn") { octave_value retval; --- 67,75 ---- The optional arguments @var{n} and @var{m} may be used specify the\n\ number of rows and columns of @var{a} to use. If either of these is\n\ larger than the size of @var{a}, @var{a} is resized and padded with\n\ ! zeros.\n" ! WISDOM_DOC ! "@end deftypefn") { octave_value retval; *************** *** 57,62 **** --- 83,119 ---- octave_value arg = args(0); + #if defined(HAVE_FFTW3) || defined(HAVE_FFTW) + if (arg.is_string ()) + { + if (nargin != 1) + { + print_usage ("fft"); + return retval; + } + + std::string wisdom = octave_env::make_absolute + (Vload_path_dir_path.find_first_of (arg.string_value ().c_str ()), + octave_env::getcwd ()); + + if (wisdom.empty ()) + error ("fft: FFTW wisdom file not found"); + else + { + FILE *ifile = fopen (wisdom.c_str (), "r"); + #ifdef HAVE_FFTW3 + if (! fftw_import_wisdom_from_file (ifile)) + #else + if (fftw_import_wisdom_from_file (ifile) == FFTW_FAILURE) + #endif + error ("fft: can not import wisdom from file"); + fclose (ifile); + } + + return retval; + } + #endif + int n_rows = arg.rows (); if (nargin > 1) { *** ./src/ChangeLog.orig 2004-01-30 23:14:31.000000000 +0100 --- ./src/ChangeLog 2004-01-30 23:14:18.000000000 +0100 *************** *** 1,3 **** --- 1,11 ---- + 2004-01-30 Dvaid Bateman + + * DLD-FUNCTIONS/fft.cc: Adapt to allow FFTW wisdom to be imported. + Update the documentation to match + * DLD-FUNCTIONS/ifft.cc: Likewise + * DLD-FUNCTIONS/fft2.cc: Likewise + * DLD-FUNCTIONS/ifft2.cc: Likewise + 2004-01-22 John W. Eaton * error.cc (pr_where): New arg, print_code with default value true. *** ./configure.in.orig 2004-01-28 15:16:49.000000000 +0100 --- ./configure.in 2004-01-29 21:30:25.000000000 +0100 *************** *** 407,425 **** with_fftw=$withval, with_fftw=yes) if test "$with_fftw" = "yes"; then ! have_fftw_header=no ! AC_CHECK_HEADERS(dfftw.h fftw.h, [have_fftw_header=yes; break]) ! if test "$have_fftw_header" = yes; then ! AC_CHECK_LIB(dfftw, fftw_create_plan, FFTW_LIBS="-ldfftw", ! [AC_CHECK_LIB(fftw, fftw_create_plan, FFTW_LIBS="-lfftw", with_fftw=no)]) ! else ! with_fftw=no fi fi ! if test "$with_fftw" = yes; then FFT_DIR='' ! AC_DEFINE(HAVE_FFTW, 1, [Define if the FFTW library is available.]) fi WITH_MPI=true --- 407,442 ---- with_fftw=$withval, with_fftw=yes) if test "$with_fftw" = "yes"; then ! have_fftw3_header=no ! with_fftw3=no ! AC_CHECK_HEADER(fftw3.h, [have_fftw3_header=yes; break]) ! if test "$have_fftw3_header" = yes; then ! AC_CHECK_LIB(fftw3, fftw_plan_dft_1d, [FFTW_LIBS="-lfftw3"; with_fftw3=yes]) ! fi ! if test "$with_fftw3" = no; then ! have_fftw_header=no ! have_rfftw_header=no ! AC_CHECK_HEADERS(dfftw.h fftw.h, [have_fftw_header=yes; break]) ! AC_CHECK_HEADERS(drfftw.h rfftw.h, [have_rfftw_header=yes; break]) ! if test "$have_fftw_header" = yes && test "$have_rfftw_header" = yes; then ! AC_CHECK_LIB(dfftw, fftw_create_plan, FFTW_LIBS="-ldfftw", ! [AC_CHECK_LIB(fftw, fftw_create_plan, FFTW_LIBS="-lfftw", with_fftw=no)]) ! AC_CHECK_LIB(drfftw, rfftw_create_plan, FFTW_LIBS="$FFTW_LIBS -ldrfftw", ! [AC_CHECK_LIB(rfftw, rfftw_create_plan, FFTW_LIBS="$FFTW_LIBS -lrfftw", with_fftw=no, $FFTW_LIBS)], $FFTW_LIBS) ! else ! with_fftw=no ! fi fi fi ! if test "$with_fftw" = yes || test "$with_fftw3" = yes; then FFT_DIR='' ! if test "$with_fftw3" = yes; then ! AC_DEFINE(HAVE_FFTW3, 1, [Define if the FFTW3 library is used.]) ! else ! AC_DEFINE(HAVE_FFTW, 1, [Define if the FFTW library is used.]) ! warn_fftw="Old version of FFTW found. This will affect performance!!" ! fi fi WITH_MPI=true *************** *** 1608,1613 **** --- 1625,1635 ---- warn_msg_printed=true fi + if test -n "$warn_fftw"; then + AC_MSG_WARN($warn_fftw) + warn_msg_printed=true + fi + if $warn_msg_printed; then AC_MSG_RESULT([]) fi *** ./ChangeLog.orig 2004-01-30 15:25:15.000000000 +0100 --- ./ChangeLog 2004-01-30 15:26:32.000000000 +0100 *************** *** 1,3 **** --- 1,8 ---- + 2004-01-30 David Bateman + + * configure.in: Test for the presence of FFTW 3.x and use it in + preference to FFTW 2.x. Define HAVE_FFTW3 + 2004-01-22 John W. Eaton * octMakefile.in (maintainer-clean, distclean):