*** ./liboctave/Array.h.orig 2004-02-20 22:16:53.000000000 +0100 --- ./liboctave/Array.h 2004-02-25 15:04:37.000000000 +0100 *************** *** 62,67 **** --- 62,73 ---- { public: + #ifdef HAVE_ATTRIB_ALIGN + typedef T alignedT __attribute__ ((aligned(16))); + #else + typedef T alignedT; + #endif + T *data; int len; int count; *************** *** 70,85 **** ArrayRep (void) : data (0), len (0), count (1) { } ! explicit ArrayRep (int n) : data (new T [n]), len (n), count (1) { } explicit ArrayRep (int n, const T& val) ! : data (new T [n]), len (n), count (1) { fill (val); } ArrayRep (const ArrayRep& a) ! : data (new T [a.len]), len (a.len), count (1) { for (int i = 0; i < len; i++) data[i] = a.data[i]; --- 76,92 ---- ArrayRep (void) : data (0), len (0), count (1) { } ! explicit ArrayRep (int n) : ! data (new alignedT [n]), len (n), count (1) { } explicit ArrayRep (int n, const T& val) ! : data (new alignedT [n]), len (n), count (1) { fill (val); } ArrayRep (const ArrayRep& a) ! : data (new alignedT [a.len]), len (a.len), count (1) { for (int i = 0; i < len; i++) data[i] = a.data[i]; *** ./liboctave/oct-fftw.cc.orig 2004-02-20 22:16:53.000000000 +0100 --- ./liboctave/oct-fftw.cc 2004-02-27 15:37:09.000000000 +0100 *************** *** 69,76 **** int r[2]; // rank int h[2]; // howmany dim_vector n[2]; // dims ! char ialign[2]; ! char oalign[2]; // Plan for fft of real values fftw_plan rplan; --- 69,75 ---- int r[2]; // rank int h[2]; // howmany dim_vector n[2]; // dims ! bool simd_align[2]; // Plan for fft of real values fftw_plan rplan; *************** *** 79,86 **** int rr; // rank int rh; // howmany dim_vector rn; // dims ! char rialign; ! char roalign; }; octave_fftw_planner::octave_fftw_planner () --- 78,84 ---- int rr; // rank int rh; // howmany dim_vector rn; // dims ! bool rsimd_align; }; octave_fftw_planner::octave_fftw_planner () *************** *** 89,106 **** plan[0] = plan[1] = 0; d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0; ! ialign[0] = ialign[1] = oalign[0] = oalign[1] = 0; n[0] = n[1] = dim_vector(); rplan = 0; rd = rs = rr = rh = 0; ! rialign = roalign = 0; rn = dim_vector (); // If we have a system wide wisdom file, import it fftw_import_system_wisdom ( ); } fftw_plan octave_fftw_planner::create_plan (int dir, const int rank, const dim_vector dims, int howmany, --- 87,107 ---- plan[0] = plan[1] = 0; d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0; ! simd_align[0] = simd_align[1] = false; n[0] = n[1] = dim_vector(); rplan = 0; rd = rs = rr = rh = 0; ! rsimd_align = false; rn = dim_vector (); // If we have a system wide wisdom file, import it fftw_import_system_wisdom ( ); } + #define CHECK_SIMD_ALIGNMENT(x) \ + ((reinterpret_cast (x)) & 0xF == 0) + fftw_plan octave_fftw_planner::create_plan (int dir, const int rank, const dim_vector dims, int howmany, *************** *** 110,121 **** int which = (dir == FFTW_FORWARD) ? 0 : 1; fftw_plan *cur_plan_p = &plan[which]; bool create_new_plan = false; ! char in_align = (reinterpret_cast (in)) & 0xF; ! char out_align = (reinterpret_cast (out)) & 0xF; if (plan[which] == 0 || d[which] != dist || s[which] != stride || r[which] != rank || h[which] != howmany ! || ialign[which] != in_align || oalign[which] != out_align) create_new_plan = true; else // We still might not have the same shape of array --- 111,125 ---- int which = (dir == FFTW_FORWARD) ? 0 : 1; fftw_plan *cur_plan_p = &plan[which]; bool create_new_plan = false; ! bool ioalign = CHECK_SIMD_ALIGNMENT (in) && ! CHECK_SIMD_ALIGNMENT (out); + // Don't create a new plan if we have a non SIMD plan already + // but can do SIMD. This prevents endlessly recreating plans + // if we change the alignment if (plan[which] == 0 || d[which] != dist || s[which] != stride || r[which] != rank || h[which] != howmany ! || ((ioalign != simd_align[which]) ? !ioalign : false)) create_new_plan = true; else // We still might not have the same shape of array *************** *** 132,141 **** s[which] = stride; r[which] = rank; h[which] = howmany; ! ialign[which] = in_align; ! oalign[which] = out_align; n[which] = dims; if (*cur_plan_p) fftw_destroy_plan (*cur_plan_p); --- 136,149 ---- s[which] = stride; r[which] = rank; h[which] = howmany; ! simd_align[which] = ioalign; n[which] = dims; + if (ioalign) + plan_flags &= ~FFTW_UNALIGNED; + else + plan_flags |= FFTW_UNALIGNED; + if (*cur_plan_p) fftw_destroy_plan (*cur_plan_p); *************** *** 164,174 **** { fftw_plan *cur_plan_p = &rplan; bool create_new_plan = false; ! char in_align = (reinterpret_cast (in)) & 0xF; ! char out_align = (reinterpret_cast (out)) & 0xF; if (rplan == 0 || rd != dist || rs != stride || rr != rank ! || rh != howmany || rialign != in_align || roalign != out_align) create_new_plan = true; else // We still might not have the same shape of array --- 172,185 ---- { fftw_plan *cur_plan_p = &rplan; bool create_new_plan = false; ! bool ioalign = CHECK_SIMD_ALIGNMENT (in) && ! CHECK_SIMD_ALIGNMENT (out); + // Don't create a new plan if we have a non SIMD plan already + // but can do SIMD. This prevents endlessly recreating plans + // if we change the alignment if (rplan == 0 || rd != dist || rs != stride || rr != rank ! || rh != howmany || ((ioalign != rsimd_align) ? !ioalign : false)) create_new_plan = true; else // We still might not have the same shape of array *************** *** 185,194 **** rs = stride; rr = rank; rh = howmany; ! rialign = in_align; ! roalign = out_align; rn = dims; if (*cur_plan_p) fftw_destroy_plan (*cur_plan_p); --- 196,209 ---- rs = stride; rr = rank; rh = howmany; ! rsimd_align = ioalign; rn = dims; + if (ioalign) + plan_flags &= ~FFTW_UNALIGNED; + else + plan_flags |= FFTW_UNALIGNED; + if (*cur_plan_p) fftw_destroy_plan (*cur_plan_p); *** ./configure.in.orig 2004-02-20 22:16:52.000000000 +0100 --- ./configure.in 2004-02-27 15:05:33.000000000 +0100 *************** *** 985,990 **** --- 985,997 ---- strerror strftime stricmp strncasecmp strnicmp strptime symlink \ tempnam umask unlink usleep vfprintf vsprintf vsnprintf waitpid) + AC_MSG_CHECKING(for __attribute__ ((aligned(16)))) + AC_TRY_COMPILE( , + [typedef double double_aligned __attribute__ ((aligned(16)));], + [AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_ATTRIB_ALIGN, 1, [Define if have __attribute__ ((aligned(16)))])], + AC_MSG_RESULT(no)) + OCTAVE_SMART_PUTENV ### Dynamic linking is now enabled only if we are building shared