freepooma-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Freepooma-devel] [PATCH] Speeeeed up DomainIterator


From: Richard Guenther
Subject: [Freepooma-devel] [PATCH] Speeeeed up DomainIterator
Date: Thu, 7 Apr 2005 17:34:21 +0200 (CEST)

This patch builds on the domain cleanup and yields considerable
code improvements for DomainIterator.  The trick is to simplify
the CFG that is fed into the compiler considerably and to hint
it with branch probabilities to do the right BB ordering.

Also, the previous code was quite stupid in that just reversing
the counting of index_m yields in simpler loop exit tests, and
stripping the first iteration out of the increment loop and
reordering of the if/else bodies yields a much simpler CFG.

Of course we need to be careful about non-unit-stride domains
that do not allow indexing outside their domain with operator(),
like Grid, so we provide a version for unit-stride domains and
one for not-unit-stride domains.


A remaining problem and optimization opportunity is the
redundant iterator data, so that we need two increments and one
decrement per iteration, and what is worse, we're creating two
exit tests for the inner loop, though they are ordered such that
modern processors will predict them correctly.


The goal is to (in the very distant future...) replace all this
int indexing and code replication with Loc<> indexing and
DomainIterators.  But the compilers have a long way to go.


Richard.


Index: DomainIterator.h
===================================================================
RCS file: /home/rguenth/CVS/pooma-cpt/src/Domain/DomainIterator.h,v
retrieving revision 1.1.1.1
diff -c -3 -r1.1.1.1 DomainIterator.h
*** DomainIterator.h    22 Feb 2005 12:32:54 -0000      1.1.1.1
--- DomainIterator.h    7 Apr 2005 15:21:29 -0000
***************
*** 36,41 ****
--- 36,42 ----
  
//-----------------------------------------------------------------------------

  #include "Utilities/PAssert.h"
+ #include "Utilities/WrappedInt.h"
  #include "Domain/DomainTraits.h"

  #include <stddef.h> // ptrdiff_t
***************
*** 85,91 ****
    // iterators to the start. This constructor sets up a "begin" iterator.

    DomainIterator(const Dom &d, int size = 0)
!     : domain_m(d), loc_m(d.firsts()), index_m(size)
      {
        PAssert(index_m >= 0 && index_m <= domain_m.size());
        for (int i=0; i < dimensions; ++i)
--- 86,92 ----
    // iterators to the start. This constructor sets up a "begin" iterator.

    DomainIterator(const Dom &d, int size = 0)
!     : domain_m(d), loc_m(d.firsts()), index_m(domain_m.size()-size)
      {
        PAssert(index_m >= 0 && index_m <= domain_m.size());
        for (int i=0; i < dimensions; ++i)
***************
*** 153,159 ****

    bool done() const
      {
!       return (index_m >= domain_m.size());
      }

    //============================================================
--- 154,160 ----

    bool done() const
      {
!       return (index_m == 0);
      }

    //============================================================
***************
*** 178,184 ****

    This_t &operator++()
      {
!       increment();
        return *this;
      }

--- 179,185 ----

    This_t &operator++()
      {
!       increment(WrappedInt<Domain_t::unitStride>());
        return *this;
      }

***************
*** 188,194 ****
    This_t operator++(int)
      {
        DomainIterator<Dom> save(*this);
!       increment();
        return save;
      }

--- 189,195 ----
    This_t operator++(int)
      {
        DomainIterator<Dom> save(*this);
!       increment(WrappedInt<Domain_t::unitStride>());
        return save;
      }

***************
*** 218,250 ****
    // Implementation functions
    //============================================================

!   // Increment iterator.

!   void increment()
      {
        PAssert(!done());

!       for (int i = 0; i < dimensions; ++i)
        {
!         if (++(current_m[i]) >= domain_m[i].length())
!           {
!             if (i < dimensions-1)
!               {
!                 current_m[i] = 0;
!                 loc_m[i] = domain_m[i].first();
!               }
!           }
!         else
!           {
!             loc_m[i] = domain_m[i](current_m[i]);
!             break;
!           }
        }

!       // increase our total index

!       ++index_m;
      }
  };


--- 219,303 ----
    // Implementation functions
    //============================================================

!   // Increment iterator for unit-stride domains.
!   // To get optimal basic-block ordering, we try to help the
!   // compiler with the branch probabilities.
!
! #if __GNUC__ < 3
! #define __builtin_expect(x, y) x
! #endif

!   void increment(const WrappedInt<true>&)
      {
        PAssert(!done());

!       // increment
!       --index_m;
!       ++(current_m[0]);
!       ++loc_m[0];
!
!       // likely bail-out
!       if (__builtin_expect(current_m[0] < domain_m[0].length(), 1))
!         return;
!
!       // handle remaining dimensions and resetting
!       for (int i = 1; i < dimensions; ++i)
        {
!         // reset overflowed dimension
!         current_m[i-1] = 0;
!         loc_m[i-1] = domain_m[i-1].first();
!
!         // increment
!         ++(current_m[i]);
!         ++loc_m[i];
!
!         // likely bail-out
!         if (__builtin_expect(current_m[i] < domain_m[i].length(), 1))
!           return;
        }
+     }
+
+   // Increment for non-unit-stride domains.  We need to be careful
+   // not to request points outside of the domain.  Domains like Grid
+   // would choke on that.  Unfortunately this results in a more
+   // complex CFG for the compiler to optimize.
+
+   void increment(const WrappedInt<false>&)
+     {
+       PAssert(!done());

!       // increment
!       --index_m;
!       ++(current_m[0]);
!
!       // likely bail-out
!       if (__builtin_expect(current_m[0] < domain_m[0].length(), 1)) {
!         loc_m[0] = domain_m[0](current_m[0]);
!         return;
!       }

!       // handle remaining dimensions and resetting
!       for (int i = 1; i < dimensions; ++i)
!       {
!         // reset overflowed dimension
!         current_m[i-1] = 0;
!         loc_m[i-1] = domain_m[i-1].first();
!
!         // increment
!         ++(current_m[i]);
!
!         // likely bail-out
!         if (__builtin_expect(current_m[i] < domain_m[i].length(), 1)) {
!           loc_m[i] = domain_m[i](current_m[i]);
!           return;
!         }
!       }
      }
+
+ #if __GNUC__ < 3
+ #undef __builtin_expect
+ #endif
+
  };






reply via email to

[Prev in Thread] Current Thread [Next in Thread]