octave-maintainers
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Fwd: 'for' loop vectorization


From: David Bateman
Subject: Re: Fwd: 'for' loop vectorization
Date: Wed, 24 Oct 2007 12:01:10 +0200
User-agent: Thunderbird 1.5.0.7 (X11/20060921)

In fact the triu.cc I sent was really old.. I just noticed it used "int"
rather than "octave_idx_type".. Here is an updated version that is a bit
more 2.9.15 friendly..

D.

-- 

David Bateman                                address@hidden
Motorola Labs - Paris                        +33 1 69 35 48 04 (Ph) 
Parc Les Algorithmes, Commune de St Aubin    +33 6 72 01 06 33 (Mob) 
91193 Gif-Sur-Yvette FRANCE                  +33 1 69 35 77 01 (Fax) 

The information contained in this communication has been classified as: 

[x] General Business Information 
[ ] Motorola Internal Use Only 
[ ] Motorola Confidential Proprietary

/*

Copyright (C) 2004, 2007 David Bateman

This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with Octave; see the file COPYING.  If not, write to the Free
Software Foundation, 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.

*/

// FIXME Uncomment the HAVE_CONFIG_H ifdef if include in Octave
//#ifdef HAVE_CONFIG_H
#include <config.h>
//#endif

#include "dNDArray.h"
#include "CNDArray.h"
#include "Cell.h"
#include "chNDArray.h"
#include "lo-mappers.h"

#include "defun-dld.h"
#include "error.h"
#include "oct-obj.h"

DEFUN_DLD (tril, args, nargout,
  "-*- texinfo -*-\n\
@deftypefn {Function File} {} tril (@var{a}, @var{k})\n\
@deftypefnx {Function File} {} triu (@var{a}, @var{k})\n\
Return a new matrix formed by extracting extract the lower (@code{tril})\n\
or upper (@code{triu}) triangular part of the matrix @var{a}, and\n\
setting all other elements to zero.  The second argument is optional,\n\
and specifies how many diagonals above or below the main diagonal should\n\
also be set to zero.\n\
\n\
The default value of @var{k} is zero, so that @code{triu} and\n\
@code{tril} normally include the main diagonal as part of the result\n\
matrix.\n\
\n\
If the value of @var{k} is negative, additional elements above (for\n\
@code{tril}) or below (for @code{triu}) the main diagonal are also\n\
selected.\n\
\n\
The absolute value of @var{k} must not be greater than the number of\n\
sub- or super-diagonals.\n\
\n\
For example,\n\
\n\
@example\n\
@group\n\
tril (ones (3), -1)\n\
     @result{}  0  0  0\n\
         1  0  0\n\
         1  1  0\n\
@end group\n\
@end example\n\
\n\
@noindent\n\
and\n\
\n\
@example\n\
@group\n\
tril (ones (3), 1)\n\
     @result{}  1  1  0\n\
         1  1  1\n\
         1  1  1\n\
@end group\n\
@end example\n\
@end deftypefn\n\
\n\
@seealso{triu, diag}")
{

  octave_value retval;
  int nargin = args.length ();
  octave_idx_type k = 0;
  if (nargin == 2)
    {
      k = args(1).int_value();
      
      if (error_state)
        return retval;
    }

  if (nargin < 1 || nargin > 2)
    usage ("tril");
  else if (args(0).is_sparse_type ())
    {

#define SPARSE_TRIL(MT, MV) \
        { \
          MT m = args(0).MV ## _value(); \
          \
          if (!error_state) \
            { \
              octave_idx_type nr = m.rows(); \
              octave_idx_type nc = m.cols(); \
              if ((k > 0 && k >= nc) || (k < 0 && k <= -nr)) \
                { \
                  error ("tril: requested diagonal out of range"); \
                  return retval; \
                } \
              \
              for (octave_idx_type j = 0; j < nc; j++) \
                for (octave_idx_type i = m.cidx(j); i < m.cidx(j+1); i++) \
                  if (m.ridx(i) < j-k) \
                    m.data(i) = 0.; \
                  \
              m.maybe_compress (true); \
              retval = m; \
            } \
        }

      if (args(0).is_complex_type())
        SPARSE_TRIL (SparseComplexMatrix, sparse_complex_matrix)
      else
        SPARSE_TRIL (SparseMatrix, sparse_matrix)

#undef SPARSE_TRIL
    }
  else
    {
      std::string cname = args(0).class_name ();

      
#define TRIL(MT, MV, ST, ZERO)                  \
        { \
          MT m = args(0).MV ## _value(); \
          \
          if (!error_state) \
            { \
              dim_vector dv = m.dims (); \
              if (dv.length () > 2) \
                { \
                  error ("tril: must be 2d"); \
                  return retval; \
                } \
              ST *m_vec = m.fortran_vec(); \
              octave_idx_type nr = dv(0); \
              octave_idx_type nc = dv(1); \
              if ((k > 0 && k >= nc) || (k < 0 && k <= -nr)) \
                { \
                  error ("tril: requested diagonal out of range"); \
                  return retval; \
                } \
              \
              for (octave_idx_type j = 0; j < nc; j++) \
                for (octave_idx_type i = 0; i < (j-k < nr ? j-k : nr); i++) \
                  m_vec[i+j*nr] = ZERO; \
              \
              retval = m; \
            } \
        }

      if (cname == "uint8")
        TRIL (uint8NDArray, uint8_array, octave_uint8, 0)
      else if (cname == "uint16")
        TRIL (uint16NDArray, uint16_array, octave_uint16, 0)
      else if (cname == "uint32")
        TRIL (uint32NDArray, uint32_array, octave_uint32, 0)
      else if (cname == "uint64")
        TRIL (uint64NDArray, uint64_array, octave_uint64, 0)
      else if (cname == "int8")
        TRIL (int8NDArray, int8_array, octave_int8, 0)
      else if (cname == "int16")
        TRIL (int16NDArray, int16_array, octave_int16, 0)
      else if (cname == "int32")
        TRIL (int32NDArray, int32_array, octave_int32, 0)
      else if (cname == "int64")
        TRIL (int64NDArray, int64_array, octave_int64, 0)
      else if (cname == "cell")
        TRIL (Cell, cell, octave_value, Matrix())
      else if (cname == "char")
        TRIL (charNDArray, char_array, char, 0)
      else if (cname == "double")
        {
          if (args(0).is_complex_type())
            TRIL (ComplexNDArray, complex_array, Complex, 0.)
          else
            TRIL (NDArray, array, double, 0.)
        }
      else
        {
          // Generic code that works on octave-values, that is slow
          // but will also work on arbitrary user types

          octave_value arg = args(0);
          octave_value tmp = arg;
          dim_vector dv = arg.dims ();
          if (dv.length () > 2) 
            {
              error ("tril: must be 2d");
              return retval;
            }

          octave_idx_type nr = dv(0);
          octave_idx_type nc = dv(1);

          if ((k > 0 && k >= nc) || (k < 0 && k <= -nr))
            {
              error ("tril: requested diagonal out of range");
              return retval;
            }

          // The sole purpose of the below is to force the correct
          // matrix size. This would not be necessary if the
          // octave_value resize function allowed a fill_value.
          // It also allows odd things in the things like galois fields
          // to be handled. With a fill_value ot should be replaced 
          // with
          //
          // octave_value_list ov_idx;
          // tmp = tmp.resize(dim_vector (0,0)).resize (dv, fill_value);

          octave_value_list ov_idx;
          std::list<octave_value_list> idx_tmp;
          ov_idx(1) = static_cast<double> (nc+1);
          ov_idx(0) = Range (1, nr);
          idx_tmp.push_back (ov_idx);
          ov_idx(1) = static_cast<double> (nc);
          tmp = tmp.resize (dim_vector (0,0));
          tmp = tmp.subsasgn("(",idx_tmp, arg.do_index_op (ov_idx));
          tmp = tmp.resize(dv);

          octave_idx_type st = nc < nr + k ? nc : nr + k;

          for (octave_idx_type j = 1; j <= st; j++)
            {
              octave_idx_type nr_limit = 1 > j - k ? 1 : j - k;
              octave_value_list ov_idx;
              ov_idx(1) = static_cast<double> (j);
              ov_idx(0) = Range (nr_limit, nr);
              std::list<octave_value_list> idx;
              idx.push_back (ov_idx);

              tmp = tmp.subsasgn ("(", idx, arg.do_index_op(ov_idx));

              if (error_state)
                return retval;
            }

          retval = tmp;
        }
#undef TRIL
    }

  return retval;
}

DEFUN_DLD (triu, args, nargout,
  "-*- texinfo -*-\n\
@deftypefn {Function File} {} triu (@var{a}, @var{k})\n\
See tril.\n\
@deftypefn")
{
  octave_value retval;
  int nargin = args.length ();
  octave_idx_type k = 0;

  if (nargin == 2)
    {
      k = args(1).int_value();
      
      if (error_state)
        return retval;
    }

  if (nargin < 1 || nargin > 2)
    usage ("tril");
  else if (args(0).is_sparse_type ())
    {
#define SPARSE_TRIU(MT, MV) \
        { \
          MT m = args(0).MV ## _value(); \
          \
          if (!error_state) \
            { \
              octave_idx_type nr = m.rows(); \
              octave_idx_type nc = m.cols(); \
              if ((k > 0 && k >= nc) || (k < 0 && k <= -nr)) \
                { \
                  error ("triu: requested diagonal out of range"); \
                  return retval; \
                } \
              \
              for (octave_idx_type j = 0; j < nc; j++) \
                for (octave_idx_type i = m.cidx(j); i < m.cidx(j+1); i++) \
                  if (m.ridx(i) > j-k) \
                    m.data(i) = 0.; \
                  \
              m.maybe_compress (true); \
              retval = m; \
            } \
        }

      if (args(0).is_complex_type())
        SPARSE_TRIU (SparseComplexMatrix, sparse_complex_matrix)
      else
        SPARSE_TRIU (SparseMatrix, sparse_matrix)

#undef SPARSE_TRIU
    }
  else
    {
      std::string cname = args(0).class_name ();

#define TRIU(MT, MV, ST, ZERO) \
        { \
          MT m = args(0).MV ## _value(); \
          \
          if (!error_state) \
            { \
              dim_vector dv = m.dims (); \
              if (dv.length () > 2) \
                { \
                  error ("triu: must be 2d"); \
                  return retval; \
                } \
              ST *m_vec = m.fortran_vec(); \
              octave_idx_type nr = dv(0); \
              octave_idx_type nc = dv(1); \
              if ((k > 0 && k >= nc) || (k < 0 && k <= -nr)) \
                { \
                  error ("triu: requested diagonal out of range"); \
                  return retval; \
                } \
              \
              for (octave_idx_type j = 0; j < nc; j++) \
                for (octave_idx_type i = (j-k+1 > 0 ? j-k+1 : 0); i < nr; i++) \
                  m_vec[i+j*nr] = ZERO; \
              \
              retval = m; \
            } \
        }

      if (cname == "uint8")
        TRIU (uint8NDArray, uint8_array, octave_uint8, 0)
      else if (cname == "uint16")
        TRIU (uint16NDArray, uint16_array, octave_uint16, 0)
      else if (cname == "uint32")
        TRIU (uint32NDArray, uint32_array, octave_uint32, 0)
      else if (cname == "uint64")
        TRIU (uint64NDArray, uint64_array, octave_uint64, 0)
      else if (cname == "int8")
        TRIU (int8NDArray, int8_array, octave_int8, 0)
      else if (cname == "int16")
        TRIU (int16NDArray, int16_array, octave_int16, 0)
      else if (cname == "int32")
        TRIU (int32NDArray, int32_array, octave_int32, 0)
      else if (cname == "int64")
        TRIU (int64NDArray, int64_array, octave_int64, 0)
      else if (cname == "cell")
        TRIU (Cell, cell, octave_value, Matrix())
      else if (cname == "char")
        TRIU (charNDArray, char_array, char, 0)
      else if (cname == "double")
        {
          if (args(0).is_complex_type())
            TRIU (ComplexNDArray, complex_array, Complex, 0.)
          else
            TRIU (NDArray, array, double, 0.)
        }
      else
        {
          // Generic code that works on octave-values, that is slow
          // but will also work on arbitrary user types

          octave_value arg = args(0);
          octave_value tmp = arg;
          dim_vector dv = arg.dims ();
          if (dv.length () > 2) 
            {
              error ("triu: must be 2d");
              return retval;
            }

          octave_idx_type nr = dv(0);
          octave_idx_type nc = dv(1);

          if ((k > 0 && k >= nc) || (k < 0 && k <= -nr))
            {
              error ("triu: requested diagonal out of range");
              return retval;
            }

          // The sole purpose of the below is to force the correct
          // matrix size. This would not be necessary if the
          // octave_value resize function allowed a fill_value
          // It also allows odd things in the things like galois fields
          // to be handled. With a fill_value ot should be replaced 
          // with
          //
          // octave_value_list ov_idx;
          // tmp = tmp.resize(dim_vector (0,0)).resize (dv, fill_value);

          octave_value_list ov_idx;
          std::list<octave_value_list> idx_tmp;
          ov_idx(1) = static_cast<double> (nc+1);
          ov_idx(0) = Range (1, nr);
          idx_tmp.push_back (ov_idx);
          ov_idx(1) = static_cast<double> (nc);
          tmp = tmp.resize (dim_vector (0,0));
          tmp = tmp.subsasgn("(",idx_tmp, arg.do_index_op (ov_idx));
          tmp = tmp.resize(dv);

          octave_idx_type st = k + 1 > 1 ? k + 1 : 1;

          for (octave_idx_type j = st; j <= nc; j++)
            {
              octave_idx_type nr_limit = nr < j - k ? nr : j - k;
              ov_idx(1) = static_cast<double> (j);
              ov_idx(0) = Range (1, nr_limit);
              std::list<octave_value_list> idx;
              idx.push_back (ov_idx);

              tmp = tmp.subsasgn ("(", idx, arg.do_index_op(ov_idx));

              if (error_state)
                return retval;
            }

          retval = tmp;
        }
#undef TRIU
    }

  return retval;
}

/*
;;; Local Variables: ***
;;; mode: C++ ***
;;; End: ***
*/

reply via email to

[Prev in Thread] Current Thread [Next in Thread]