Re: [Changeset} Re: cellfunc vs cell2mat speed?

help-octave

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Changeset} Re: cellfunc vs cell2mat speed?

From:	David Bateman
Subject:	Re: [Changeset} Re: cellfunc vs cell2mat speed?
Date:	Mon, 15 Sep 2008 18:03:40 +0200
User-agent:	Thunderbird 2.0.0.16 (X11/20080725)

John W. Eaton wrote:

On 10-Sep-2008, David Bateman wrote:
| I'd suggest something like teh attached changeset that improves the| speed but keeps teh code compatible.
Thanks, I applied it along with the additional change you sent
off the list.  The combined change is attached below as a single
changeset.

jwe

Thinking about this further we should in fact have single typeconcatenation specializations in Fcat as well in the same manner as inpt-mat.cc. The attached patch does this and then reworks cell2mat againfor even better speed.


Cheers
David


--
David Bateman                                address@hidden

Motorola Labs - Paris +33 1 69 35 48 04 (Ph)Parc Les Algorithmes, Commune de St Aubin +33 6 72 01 06 33 (Mob)91193 Gif-Sur-Yvette FRANCE +33 1 69 35 77 01 (Fax)The information contained in this communication has been classified as:[x] General Business Information[ ] Motorola Internal Use Only[ ] Motorola Confidential Proprietary

# HG changeset patch
# User David Bateman <address@hidden>
# Date 1221494504 -7200
# Node ID 6eb9b1bd1e6b0be4cc718c7fb0fbfcd53afb00e7
# Parent  adeab8d7be8c0d5b7600623be381b94b15b43211
Special case single type conacation in Fcat. Rework cell2mat to take advantage

diff --git a/scripts/ChangeLog b/scripts/ChangeLog
--- a/scripts/ChangeLog
+++ b/scripts/ChangeLog
@@ -1,3 +1,8 @@ 2008-09-09  David Bateman  <address@hidden
+2008-09-15  David Bateman  <address@hidden>
+
+       * general/cell2mat.m: Backout previous change. Special case 2D
+       case for speed.
+
 2008-09-09  David Bateman  <address@hidden>
 
        * general/cell2mat.m: Improve the speed..
diff --git a/scripts/general/cell2mat.m b/scripts/general/cell2mat.m
--- a/scripts/general/cell2mat.m
+++ b/scripts/general/cell2mat.m
@@ -50,44 +50,34 @@ function m = cell2mat (c)
     endif
   elseif (ndims (c) == 2)
     nr = rows (c);
-    c1 = cell (nr, 1);
-    for i = 1 : nr
-      c1{i} = [c{i : nr : end}];
-    endfor
-    ## This is faster than "c = cat(1, c{:})"
-    m = [cellfun(@(x) x.', c1, "UniformOutput", false){:}].';
+    nc = columns (c);
+    if (nc > nr)
+      c1 = cell (nr, 1);
+      for i = 1 : nr
+       c1{i} = [c{i : nr : end}];
+      endfor
+      m = cat (1, c1 {:});
+    else
+      c1 = cell (nc, 1);
+      for i = 1 : nc
+       c1{i} = cat (1, c{(i - 1) * nr  + [1 : nr]});
+      endfor
+      m = [c1{:}];
+    endif
   else
-   nd = ndims (c);
-   for k = nd : -1 : 2
+    ## n dimensions case
+    for k = ndims (c):-1:2,
       sz = size (c);
-      if (k > ndims (c) || sz(end) == 1)
-       continue;
-      endif
       sz(end) = 1;
       c1 = cell (sz);
-      sz = prod (sz);
-      if (k == 2)
-        for i = 1 : sz
-         c1{i} = [c{i : sz : end}];
-        endfor
-      else
-        ## This is faster than
-        ##   for i = 1:sz, c1{i} = cat (k, c{i:(prod (sz)):end}); endfor
-       idx = [1, k, (3 : (k - 1)), 2, ((k + 1): nd)];
-        c = cellfun(@(x) permute (x, idx), c, "UniformOutput", false);
-        for i = 1: sz
-         c1{i} = ipermute ([c{i : sz : end}], idx);
-        endfor
-      endif
+      for i = 1:(prod (sz))
+        c1{i} = cat (k, c{i:(prod (sz)):end});
+      endfor
       c = c1;
     endfor
-    if (numel (c) > 1)
-      idx = [2, 1, 3 : nd];
-      m = ipermute([cellfun(@(x) permute (x, idx), c, "UniformOutput", 
false){:}], idx);
-    else
-      m = c{1};
-    endif
+    m = cat (1, c1{:});
   endif
+
 endfunction
 
 ## Tests
diff --git a/src/ChangeLog b/src/ChangeLog
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,5 +1,14 @@ 2008-09-15  David Bateman  <address@hidden
 2008-09-15  David Bateman  <address@hidden>
 
+       * data.cc (SINGLE_TYPE_CONCAT, DO_SINGLE_TYPE_CONCAT): New macros
+       (do_cat): Special case single type concatenations for speed.
+       * pt.mat.cc (std::string get_concat_class (const std::string&,
+       const std::string&), void maybe_warn_string_concat (bool, bool)):
+       Remove static declaration.
+       * pt-mat.h (std::string get_concat_class (const std::string&,
+       const std::string&), void maybe_warn_string_concat (bool, bool)):
+       Define extern here.
+       
        * DLD-FUNCTIONS/sparse.cc (Fsparse): Clarify the help string.
 
 2008-09-13  David Bateman  <address@hidden>
diff --git a/src/data.cc b/src/data.cc
--- a/src/data.cc
+++ b/src/data.cc
@@ -1718,6 +1718,51 @@ return the product of the elements.\n\
 
  */
 
+#define SINGLE_TYPE_CONCAT(TYPE, EXTRACTOR) \
+  do \
+    { \
+      int dv_len = dv.length (); \
+      Array<octave_idx_type> ra_idx (dv_len > 1 ? dv_len : 2, 0); \
+      \
+      for (int j = 1; j < n_args; j++) \
+       { \
+         OCTAVE_QUIT; \
+         \
+         TYPE ra = args(j).EXTRACTOR ();       \
+         \
+         if (! error_state) \
+           { \
+             result.insert (ra, ra_idx); \
+             \
+             if (error_state) \
+               return retval; \
+             \
+             dim_vector dv_tmp = args (j).dims (); \
+             \
+             if (dim >= dv_len) \
+               { \
+                 if (j > 1) \
+                   error ("%s: indexing error", fname.c_str ()); \
+                 break; \
+               } \
+             else \
+               ra_idx (dim) += (dim < dv_tmp.length () ? dv_tmp (dim) : 1); \
+           } \
+       } \
+    } \
+ while (0)
+
+#define DO_SINGLE_TYPE_CONCAT(TYPE, EXTRACTOR) \
+  do \
+    { \
+      TYPE result (dv); \
+      \
+      SINGLE_TYPE_CONCAT(TYPE, EXTRACTOR); \
+      \
+      retval = result; \
+    } \
+ while (0)
+
 static octave_value
 do_cat (const octave_value_list& args, std::string fname)
 {
@@ -1743,7 +1788,13 @@ do_cat (const octave_value_list& args, s
        {
          
          dim_vector  dv = args(1).dims ();
+         std::string result_type = args(1).class_name ();
          
+         bool all_sq_strings_p = args(1).is_sq_string ();
+         bool all_dq_strings_p = args(1).is_dq_string ();
+         bool all_real_p = args(1).is_real_type ();
+         bool any_sparse_p = args(1).is_sparse_type();
+
          for (int i = 2; i < args.length (); i++)
            {
              // add_dims constructs a dimension vector which holds the
@@ -1755,68 +1806,145 @@ do_cat (const octave_value_list& args, s
                  error ("cat: dimension mismatch");
                  return retval;
                }
-           }
-
-         // The lines below might seem crazy, since we take a copy
-         // of the first argument, resize it to be empty and then resize
-         // it to be full. This is done since it means that there is no
-         // recopying of data, as would happen if we used a single resize.
-         // It should be noted that resize operation is also significantly 
-         // slower than the do_cat_op function, so it makes sense to have an
-         // empty matrix and copy all data.
-         //
-         // We might also start with a empty octave_value using
-         //   tmp = octave_value_typeinfo::lookup_type (args(1).type_name());
-         // and then directly resize. However, for some types there might be
-         // some additional setup needed, and so this should be avoided.
-
-         octave_value tmp = args (1);
-         tmp = tmp.resize (dim_vector (0,0)).resize (dv);
-
-         if (error_state)
-           return retval;
-
-         int dv_len = dv.length ();
-         Array<octave_idx_type> ra_idx (dv_len, 0);
-
-         for (int j = 1; j < n_args; j++)
-           {
-             // Can't fast return here to skip empty matrices as something
-             // like cat(1,[],single([])) must return an empty matrix of
-             // the right type.
-             tmp = do_cat_op (tmp, args (j), ra_idx);
+             
+             result_type = 
+               get_concat_class (result_type, args(i).class_name ());
+
+             if (all_sq_strings_p && ! args(i).is_sq_string ())
+               all_sq_strings_p = false;
+             if (all_dq_strings_p && ! args(i).is_dq_string ())
+               all_dq_strings_p = false;
+             if (all_real_p && ! args(i).is_real_type ())
+               all_real_p = false;
+             if (!any_sparse_p && args(i).is_sparse_type ())
+               any_sparse_p = true;
+           }
+
+         if (result_type == "double")
+           {
+             if (any_sparse_p)
+               {           
+                 if (all_real_p)
+                   DO_SINGLE_TYPE_CONCAT (SparseMatrix, sparse_matrix_value);
+                 else
+                   DO_SINGLE_TYPE_CONCAT (SparseComplexMatrix, 
sparse_complex_matrix_value);
+               }
+             else
+               {
+                 if (all_real_p)
+                   DO_SINGLE_TYPE_CONCAT (NDArray, array_value);
+                 else
+                   DO_SINGLE_TYPE_CONCAT (ComplexNDArray, complex_array_value);
+               }
+           }
+         else if (result_type == "single")
+           {
+             if (all_real_p)
+               DO_SINGLE_TYPE_CONCAT (FloatNDArray, float_array_value);
+             else
+               DO_SINGLE_TYPE_CONCAT (FloatComplexNDArray, 
+                                      float_complex_array_value);
+           }
+         else if (result_type == "char")
+           {
+             char type = all_dq_strings_p ? '"' : '\'';
+
+             maybe_warn_string_concat (all_dq_strings_p, all_sq_strings_p);
+
+             charNDArray result (dv, Vstring_fill_char);
+
+             SINGLE_TYPE_CONCAT (charNDArray, char_array_value);
+
+             retval = octave_value (result, true, type);
+           }
+         else if (result_type == "logical")
+           {
+             if (any_sparse_p)
+               DO_SINGLE_TYPE_CONCAT (SparseBoolMatrix, 
sparse_bool_matrix_value);
+             else
+               DO_SINGLE_TYPE_CONCAT (boolNDArray, bool_array_value);
+           }
+         else if (result_type == "int8")
+           DO_SINGLE_TYPE_CONCAT (int8NDArray, int8_array_value);
+         else if (result_type == "int16")
+           DO_SINGLE_TYPE_CONCAT (int16NDArray, int16_array_value);
+         else if (result_type == "int32")
+           DO_SINGLE_TYPE_CONCAT (int32NDArray, int32_array_value);
+         else if (result_type == "int64")
+           DO_SINGLE_TYPE_CONCAT (int64NDArray, int64_array_value);
+         else if (result_type == "uint8")
+           DO_SINGLE_TYPE_CONCAT (uint8NDArray, uint8_array_value);
+         else if (result_type == "uint16")
+           DO_SINGLE_TYPE_CONCAT (uint16NDArray, uint16_array_value);
+         else if (result_type == "uint32")
+           DO_SINGLE_TYPE_CONCAT (uint32NDArray, uint32_array_value);
+         else if (result_type == "uint64")
+           DO_SINGLE_TYPE_CONCAT (uint64NDArray, uint64_array_value);
+         else
+           {
+             // The lines below might seem crazy, since we take a copy
+             // of the first argument, resize it to be empty and then resize
+             // it to be full. This is done since it means that there is no
+             // recopying of data, as would happen if we used a single resize.
+             // It should be noted that resize operation is also significantly 
+             // slower than the do_cat_op function, so it makes sense to have
+             // an empty matrix and copy all data.
+             //
+             // We might also start with a empty octave_value using
+             //   tmp = octave_value_typeinfo::lookup_type 
+             //                                (args(1).type_name());
+             // and then directly resize. However, for some types there might
+             // be some additional setup needed, and so this should be avoided.
+
+             octave_value tmp = args (1);
+             tmp = tmp.resize (dim_vector (0,0)).resize (dv);
 
              if (error_state)
                return retval;
 
-             dim_vector dv_tmp = args (j).dims ();
-
-             if (dim >= dv_len)
-               {
-                 if (j > 1)
-                   error ("%s: indexing error", fname.c_str ());
-                 break;
-               }
-             else
-               ra_idx (dim) += (dim < dv_tmp.length () ? 
-                                dv_tmp (dim) : 1);
-           }
-
-         // Reshape, chopping trailing singleton dimensions
-         dv.chop_trailing_singletons ();
-         tmp = tmp.reshape (dv);
-
-         retval = tmp;
+             int dv_len = dv.length ();
+             Array<octave_idx_type> ra_idx (dv_len, 0);
+
+             for (int j = 1; j < n_args; j++)
+               {
+                 // Can't fast return here to skip empty matrices as something
+                 // like cat(1,[],single([])) must return an empty matrix of
+                 // the right type.
+                 tmp = do_cat_op (tmp, args (j), ra_idx);
+
+                 if (error_state)
+                   return retval;
+
+                 dim_vector dv_tmp = args (j).dims ();
+
+                 if (dim >= dv_len)
+                   {
+                     if (j > 1)
+                       error ("%s: indexing error", fname.c_str ());
+                     break;
+                   }
+                 else
+                   ra_idx (dim) += (dim < dv_tmp.length () ? 
+                                    dv_tmp (dim) : 1);
+               }
+             retval = tmp;
+           }
+
+         if (! error_state)
+           {
+             // Reshape, chopping trailing singleton dimensions
+             dv.chop_trailing_singletons ();
+             retval = retval.reshape (dv);
+           }
        }
       else
        error ("%s: invalid dimension argument", fname.c_str ());
     }
   else
     print_usage ();
- 
+
   return retval;
 }
-
 
 DEFUN (horzcat, args, ,
        "-*- texinfo -*-\n\
diff --git a/src/pt-mat.cc b/src/pt-mat.cc
--- a/src/pt-mat.cc
+++ b/src/pt-mat.cc
@@ -184,7 +184,7 @@ private:
   tm_row_const_rep *rep;
 };
 
-static std::string
+std::string
 get_concat_class (const std::string& c1, const std::string& c2)
 {
   std::string retval = octave_base_value::static_class_name ();
@@ -699,7 +699,7 @@ tree_matrix::rvalue (int nargout)
   return retval;
 }
 
-static void
+void
 maybe_warn_string_concat (bool all_dq_strings_p, bool all_sq_strings_p)
 {
   if (! (all_dq_strings_p || all_sq_strings_p))
diff --git a/src/pt-mat.h b/src/pt-mat.h
--- a/src/pt-mat.h
+++ b/src/pt-mat.h
@@ -81,6 +81,12 @@ private:
 // The character to fill with when creating string arrays.
 extern char Vstring_fill_char;
 
+extern std::string 
+get_concat_class (const std::string& c1, const std::string& c2);
+
+extern void
+maybe_warn_string_concat (bool all_dq_strings_p, bool all_sq_strings_p);
+
 #endif
 
 /*

[Prev in Thread]

Current Thread

[Next in Thread]

cellfunc vs cell2mat speed?, Levente Torok, 2008/09/09
- Re: cellfunc vs cell2mat speed?, David Bateman, 2008/09/09
  - Re: cellfunc vs cell2mat speed?, Levente Torok, 2008/09/09
    - Re: cellfunc vs cell2mat speed?, David Bateman, 2008/09/09
    - Re: cellfunc vs cell2mat speed?, Levente Torok, 2008/09/09
    - Re: cellfunc vs cell2mat speed?, David Bateman, 2008/09/09
    - [Changeset} Re: cellfunc vs cell2mat speed?, David Bateman, 2008/09/09
    - [Changeset} Re: cellfunc vs cell2mat speed?, John W. Eaton, 2008/09/11
    - Re: [Changeset} Re: cellfunc vs cell2mat speed?, David Bateman <=
    - Re: [Changeset} Re: cellfunc vs cell2mat speed?, John W. Eaton, 2008/09/17
    - Re: [Changeset} Re: cellfunc vs cell2mat speed?, dbateman, 2008/09/17
    - Re: [Changeset} Re: cellfunc vs cell2mat speed?, John W. Eaton, 2008/09/17

Prev by Date: Re: Displaying an animation / "movie"
Next by Date: Octave plot on time/date x-axis
Previous by thread: [Changeset} Re: cellfunc vs cell2mat speed?
Next by thread: Re: [Changeset} Re: cellfunc vs cell2mat speed?
Index(es):
- Date
- Thread