Index: src/SkalarFunction.cc =================================================================== --- src/SkalarFunction.cc (revision 164) +++ src/SkalarFunction.cc (working copy) @@ -18,6 +18,9 @@ along with this program. If not, see . */ +#include +#include + #include "SkalarFunction.hh" #include "Value.hh" #include "Workspace.hh" @@ -57,27 +60,54 @@ Bif_F12_WITHOUT Bif_F12_WITHOUT::fun; //----------------------------------------------------------------------------- + +class ApplyRavel { +public: + ApplyRavel( SkalarFunction &function_in, + Value_P &B_in, + Value_P &Z_in, + prim_f1 &fun_in ) + : function( function_in ), B( B_in ), Z( Z_in ), fun( fun_in ) {} + + void operator()( const tbb::blocked_range &range ) const { + for( ShapeItem c = range.begin() ; c != range.end() ; c++ ) { + const Cell * cell_B = &B->get_ravel(c); + Cell * cell_Z = &Z->get_ravel(c); + if (cell_B->is_pointer_cell()) + { + Token token = function.eval_skalar_B(cell_B->get_pointer_value(), fun); + new (cell_Z) PointerCell(token.get_apl_val()); + } + else + { + (cell_B->*fun)(cell_Z); + } + } + } + +private: + SkalarFunction &function; + Value_P &B; + Value_P &Z; + prim_f1 &fun; +}; + Token SkalarFunction::eval_skalar_B(Value_P B, prim_f1 fun) { const ShapeItem count = B->element_count(); if (count == 0) return eval_fill_B(B); -Value_P Z(new Value(B->get_shape(), LOC)); - loop(c, count) - { - const Cell * cell_B = &B->get_ravel(c); - Cell * cell_Z = &Z->get_ravel(c); - if (cell_B->is_pointer_cell()) - { - Token token = eval_skalar_B(cell_B->get_pointer_value(), fun); - new (cell_Z) PointerCell(token.get_apl_val()); - } - else - { - (cell_B->*fun)(cell_Z); - } - } + Value_P Z; + if( B->is_temp() ) { + Z = B; + } + else { + Z = new Value(B->get_shape(), LOC); + Z->set_temp(); + } +//Value_P Z(new Value(B->get_shape(), LOC)); +// Value_P Z(Zp); if (count == 0) // Z was empty (hence B was empty) { @@ -85,7 +115,36 @@ if (cB.is_pointer_cell()) Z->get_ravel(0).init(cB); else new (&Z->get_ravel(0)) IntCell(0); } + else { + struct timespec start_ts, end_ts; + clock_gettime( CLOCK_REALTIME, &start_ts ); +#ifdef PARALLEL_DISABLED + loop(c, count) + { + const Cell * cell_B = &B->get_ravel(c); + Cell * cell_Z = &Z->get_ravel(c); + if (cell_B->is_pointer_cell()) + { + Token token = eval_skalar_B(cell_B->get_pointer_value(), fun); + new (cell_Z) PointerCell(token.get_apl_val()); + } + else + { + (cell_B->*fun)(cell_Z); + } + } +#else + tbb::parallel_for( tbb::blocked_range( 0, count ), ApplyRavel( *this, B, Z, fun ) ); +#endif +#if 0 + clock_gettime( CLOCK_REALTIME, &end_ts ); + long millis = (end_ts.tv_sec * 1000 + end_ts.tv_nsec / 1000000) + - (start_ts.tv_sec * 1000 + start_ts.tv_nsec / 1000000); + cout << "Total time: " << millis << endl; +#endif + } + Z->check_value(LOC); return Token(TOK_APL_VALUE1, Z); } Index: src/Symbol.cc =================================================================== --- src/Symbol.cc (revision 164) +++ src/Symbol.cc (working copy) @@ -126,7 +126,10 @@ switch(vs.name_class) { case NC_UNUSED_USER_NAME: - new_value = new_value->clone(loc); + if( !new_value->is_temp() ) { + new_value = new_value->clone(loc); + } + new_value->clear_temp(); vs.name_class = NC_VARIABLE; vs.apl_val = new_value; @@ -136,7 +139,10 @@ case NC_VARIABLE: if (vs.apl_val == new_value) return; // X←X - new_value = new_value->clone(loc); + if( !new_value->is_temp() ) { + new_value = new_value->clone(loc); + } + new_value->clear_temp(); // un-assign and erase old value // Index: src/Value.cc =================================================================== --- src/Value.cc (revision 164) +++ src/Value.cc (working copy) @@ -18,6 +18,8 @@ along with this program. If not, see . */ +#include + #include "CDR_string.hh" #include "CharCell.hh" #include "Common.hh" @@ -127,6 +129,7 @@ flags(VF_NONE), valid_ravel_items(0) { + set_temp(); ADD_EVENT(this, VHE_Create, 0, loc); init_ravel(); } @@ -136,6 +139,7 @@ flags(VF_forever), valid_ravel_items(0) { + set_temp(); // default shape is skalar // ADD_EVENT(this, VHE_Create, 0, loc); @@ -249,6 +253,7 @@ flags(VF_NONE), valid_ravel_items(0) { + set_temp(); ADD_EVENT(this, VHE_Create, 0, loc); init_ravel(); } @@ -259,6 +264,7 @@ flags(VF_NONE), valid_ravel_items(0) { + set_temp(); ADD_EVENT(this, VHE_Create, 0, loc); init_ravel(); @@ -280,6 +286,7 @@ flags(VF_NONE), valid_ravel_items(0) { + set_temp(); ADD_EVENT(this, VHE_Create, 0, loc); init_ravel(); @@ -301,6 +308,7 @@ flags(VF_NONE), valid_ravel_items(0) { + set_temp(); ADD_EVENT(this, VHE_Create, 0, loc); init_ravel(); @@ -316,6 +324,7 @@ flags(VF_NONE), valid_ravel_items(0) { + set_temp(); ADD_EVENT(this, VHE_Create, 0, loc); init_ravel(); } @@ -325,6 +334,7 @@ flags(VF_NONE), valid_ravel_items(0) { + set_temp(); ADD_EVENT(this, VHE_Create, 0, loc); init_ravel(); @@ -1634,19 +1644,43 @@ } //----------------------------------------------------------------------------- + +class ApplyClone { +public: + ApplyClone( const Cell *src_in, Cell *dst_in ) : src( src_in ), dst( dst_in ) {} + void operator()( const tbb::blocked_range &range ) const { + for( size_t p = range.begin() ; p != range.end() ; p++ ) { + dst[p].init( src[p] ); + } + } + +private: + const Cell *src; + Cell *dst; +}; + Value_P Value::clone(const char * loc) const { -Value_P ret(new Value(get_shape(), loc)); + if( is_temp() ) { + return Value_P( const_cast( this ), LOC ); + } + else { + Value_P ret(new Value(get_shape(), loc)); -const Cell * src = &get_ravel(0); -Cell * dst = &ret->get_ravel(0); -const ShapeItem count = nz_element_count(); + const Cell * src = &get_ravel(0); + Cell * dst = &ret->get_ravel(0); + const ShapeItem count = nz_element_count(); - loop(c, count) dst++->init(*src++); +#ifdef PARALLEL_DISABLED + loop(c, count) dst++->init(*src++); +#else + tbb::parallel_for( tbb::blocked_range( 0, count ), ApplyClone( src, dst ) ); +#endif - ret->check_value(LOC); - return ret; + ret->check_value(LOC); + return ret; + } } //----------------------------------------------------------------------------- /// lrp p.138: S←⍴⍴A + NOTCHAR (per column) Index: src/Value.hh =================================================================== --- src/Value.hh (revision 164) +++ src/Value.hh (working copy) @@ -340,6 +340,7 @@ # define clear_forever() CLEAR_forever(_LOC) # define clear_marked() CLEAR_marked(_LOC) +# define clear_temp() CLEAR_temp(_LOC); VF_flag(forever) VF_flag(complete)