freepooma-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] Speed up guard update.


From: Jeffrey D. Oldham
Subject: Re: [PATCH] Speed up guard update.
Date: Thu, 15 Jan 2004 19:04:14 -0800
User-agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624

Richard Guenther wrote:
Hi!

This is a refined (aka shorter) patch which unifies the tracking of
up-to-date faces and the special optimized copy for MPI.

Tested on serial ia32 linux with gcc3.4 with no regression.

Ok?

Yes, assuming the user interface did not change. It looks like GCFillInfo's interface changed but existing code will still run because a parameter with a default argument was added.

Richard.


2004Jan14  Richard Guenther <address@hidden>

        * src/Engine/Intersector.h: track used guard faces.
        src/Engine/MultiPatchEngine.h: track up-to-dateness per
        face using a bitmask.
        src/Engine/Stencil.h: track used guard faces.
        src/Field/DiffOps/FieldStencil.h: track used guard faces.
        src/Layout/GridLayout.cpp: record face of guard update.
        src/Layout/LayoutBase.h: add face_m member to guard update
        struct.
        src/Layout/UniformGridLayout.cpp: record face of guard update.
        src/Engine/MultiPatchEngine.cpp: update only not up-to-date
        and needed faces during fillGuards(). Do manual Send/Receive
        of the inner guards domain for MPI.

--- cvs/r2/src/Engine/Intersector.h     2004-01-14 20:08:06.000000000 +0100
+++ pooma-mpi3/r2/src/Engine/Intersector.h      2004-01-14 20:13:32.000000000 
+0100
@@ -129,7 +129,8 @@
   }

   template<class Engine, int Dim2>
-  bool intersect(const Engine &engine, const GuardLayers<Dim2> &guard)
+  bool intersect(const Engine &engine, const GuardLayers<Dim2> &guard,
+                GuardLayers<Dim2> &usedGuards)
   {
     CTAssert(Engine::dimensions == Dim);

@@ -145,9 +146,7 @@
       // If we've seen this ID before, we're done.

       if (ids_m[i] == layout.ID())
-      {
        return false;
-      }

       // If we've seen the base ID before and the base domain is the same
       // we're done.
@@ -157,10 +156,27 @@
       {
        shared(layout.ID(),ids_m[i]);

-       // In this case we are using the guard cells unless this domain
-       // is exactly the same as one we've seen before.
+       // was: return (!sameBaseDomain(i,layout.baseDomain()));

-       return (!sameBaseDomain(i,layout.baseDomain()));
+        // We should be able to find out the actual shape of the
+       // used internal guards here, rather than just returning bool.
+       // Something like:
+
+       // But what do, if Dim2 > baseDims_m[i]!?
+       if (baseDims_m[i] < Dim2)
+         return true;
+
+       bool used = false;
+       for (int j = 0; j < Dim2; j++)
+       {
+         usedGuards.lower(j) = std::max(0, baseDomains_m[i][j].first() - 
layout.baseDomain()[j].first());
+         if (usedGuards.lower(j) != 0)
+           used = true;
+         usedGuards.upper(j) = std::max(0, layout.baseDomain()[j].last() - 
baseDomains_m[i][j].last());
+         if (usedGuards.upper(j) != 0)
+           used = true;
+       }
+       return used;
       }
     }

@@ -437,9 +453,9 @@

   template<class Engine, int Dim2>
   inline
-  bool intersect(const Engine &l, const GuardLayers<Dim2> &guard)
+  bool intersect(const Engine &l, const GuardLayers<Dim2> &guard, 
GuardLayers<Dim2> &usedGuards)
   {
-    return (data()->intersect(l,guard));
+    return (data()->intersect(l,guard,usedGuards));
   }

 private:
--- cvs/r2/src/Engine/MultiPatchEngine.h        2004-01-14 20:11:36.000000000 
+0100
+++ pooma-mpi3/r2/src/Engine/MultiPatchEngine.h 2004-01-14 20:13:32.000000000 
+0100
@@ -628,13 +628,18 @@
   //---------------------------------------------------------------------------
   /// Fill the internal guard cells.

-  inline void fillGuards() const
+  inline void fillGuards(const GuardLayers<Dim>& g) const
   {
-    fillGuardsHandler(WrappedInt<Layout_t::supportsGuards>());
+    fillGuardsHandler(g, WrappedInt<Layout_t::supportsGuards>());
+  }
+
+  inline void fillGuards() const
+  {
+    fillGuards(layout().internalGuards());
   }

-  inline void fillGuardsHandler(const WrappedInt<false>&) const { };
-  void fillGuardsHandler(const WrappedInt<true>&) const ;
+  inline void fillGuardsHandler(const GuardLayers<Dim>&, const 
WrappedInt<false>&) const { };
+  void fillGuardsHandler(const GuardLayers<Dim>&, const WrappedInt<true>&) 
const ;

   //---------------------------------------------------------------------------
   /// Set the internal guard cells to a particular value.
@@ -650,14 +655,31 @@
   /// Set and get the dirty flag (fillGuards is a no-op unless the
   /// dirty flag is true).

+  inline int dirty() const { return *pDirty_m; }
+
   inline void setDirty() const
   {
-    *pDirty_m = true;
+    *pDirty_m = (1<<(Dim*2))-1;
+  }
+
+  inline void clearDirty(int face = -1) const
+  {
+    if (face == -1)
+      *pDirty_m = 0;
+    else {
+      PAssert(face >= 0 && face <= Dim*2-1);
+      *pDirty_m &= ~(1<<face);
+    }
   }

-  inline bool isDirty() const
+  inline bool isDirty(int face = -1) const
   {
-    return *pDirty_m;
+    if (face == -1)
+      return *pDirty_m != 0;
+    else {
+      PAssert(face >= 0 && face <= Dim*2-1);
+      return *pDirty_m & (1<<face);
+    }
   }

   //============================================================
@@ -874,7 +896,7 @@
   /// must share the same flag. We use the reference count in
   /// data_m to decide whether to clean this up.

-  bool *pDirty_m;
+  int *pDirty_m;
 };


@@ -1193,6 +1215,11 @@
     baseEngine_m.fillGuards();
   }

+  inline void fillGuards(const GuardLayers<Dim2>& g) const
+  {
+    baseEngine_m.fillGuards(g);
+  }
+
   //---------------------------------------------------------------------------
   /// Set the internal guard cells to a particular value (default zero)

@@ -1217,10 +1244,15 @@
   {
     baseEngine_m.setDirty();
   }
+
+  inline void clearDirty(int face=-1) const
+  {
+    baseEngine_m.clearDirty(face);
+  }

-  inline bool isDirty() const
+  inline bool isDirty(int face=-1) const
   {
-    return baseEngine_m.isDirty();
+    return baseEngine_m.isDirty(face);
   }

   //---------------------------------------------------------------------------
@@ -1694,12 +1726,13 @@
   apply(const Engine<Dim,T,MultiPatch<LayoutTag,PatchTag> > &engine,
        const ExpressionApply<IntersectorTag<Intersect> > &tag)
   {
+    GuardLayers<Dim> usedGuards;
     bool useGuards =
       tag.tag().intersector_m.intersect(engine,
-                                 engine.layout().internalGuards());
+                                 engine.layout().internalGuards(), usedGuards);

     if (useGuards)
-      engine.fillGuards();
+      engine.fillGuards(usedGuards);

     return 0;
   }
@@ -1725,13 +1758,14 @@
               const ExpressionApply<IntersectorTag<Intersect> > &tag,
               const WrappedInt<true> &)
   {
+    GuardLayers<BD> usedGuards;
     bool useGuards =
       tag.tag().intersector_m.
       intersect(engine,
-               engine.layout().baseLayout().internalGuards());
+               engine.layout().baseLayout().internalGuards(), usedGuards);

     if (useGuards)
-      engine.fillGuards();
+      engine.fillGuards(usedGuards);

     return 0;
   }
@@ -1741,7 +1775,7 @@
               const ExpressionApply<IntersectorTag<Intersect> > &tag,
               const WrappedInt<false> &)
   {
-    tag.tag().intersector_m.intersect(engine, GuardLayers<Dim>());
+    tag.tag().intersector_m.intersect(engine);
     return 0;
   }
 };
--- cvs/r2/src/Engine/Stencil.h 2004-01-14 20:08:07.000000000 +0100
+++ pooma-mpi3/r2/src/Engine/Stencil.h  2004-01-14 20:13:32.000000000 +0100
@@ -752,11 +752,14 @@

   StencilIntersector(const This_t &model)
     : domain_m(model.domain_m),
+      stencilExtent_m(model.stencilExtent_m),
       intersector_m(model.intersector_m)
   { }

-  StencilIntersector(const Interval<Dim> &domain, const Intersect &intersect)
+  StencilIntersector(const Interval<Dim> &domain, const Intersect &intersect,
+                 const GuardLayers<Dim> &stencilExtent)
     : domain_m(domain),
+      stencilExtent_m(stencilExtent),
       intersector_m(intersect)
   { }

@@ -766,6 +769,7 @@
     {
       intersector_m = model.intersector_m;
       domain_m = model.domain_m;
+      stencilExtent_m = model.stencilExtent_m;
     }
     return *this;
   }
@@ -807,14 +811,19 @@

   template<class Engine, int Dim2>
   inline
-  bool intersect(const Engine &engine, const GuardLayers<Dim2> &)
+  bool intersect(const Engine &engine, const GuardLayers<Dim2> &g,
+                 GuardLayers<Dim> &usedGuards)
   {
     intersect(engine);
+    // FIXME: accumulate used guards from intersect above and
+    // stencil extent? I.e. allow  Stencil<>(a(i-1)+a(i+1))?
+    usedGuards = stencilExtent_m;
     return true;
   }

 private:
   Interval<Dim> domain_m;
+  GuardLayers<Dim> stencilExtent_m;
   Intersect     intersector_m;
 };

@@ -833,8 +842,14 @@
               const ExpressionApply<IntersectorTag<Intersect> > &tag)
   {
     typedef StencilIntersector<D, Intersect> NewIntersector_t;
+    GuardLayers<D> stencilExtent;
+    for (int i=0; i<D; ++i) {
+      stencilExtent.lower(i) = engine.function().lowerExtent(i);
+      stencilExtent.upper(i) = engine.function().upperExtent(i);
+    }
     NewIntersector_t newIntersector(engine.intersectDomain(),
-                                   tag.tag().intersector_m);
+                                   tag.tag().intersector_m,
+                                   stencilExtent);

     expressionApply(engine.expression(),
                    IntersectorTag<NewIntersector_t>(newIntersector));
--- cvs/r2/src/Field/DiffOps/FieldStencil.h     2004-01-14 20:08:09.000000000 
+0100
+++ pooma-mpi3/r2/src/Field/DiffOps/FieldStencil.h      2004-01-14 
20:13:32.000000000 +0100
@@ -614,11 +617,13 @@
   // Constructors

   FieldStencilIntersector(const This_t &model)
-    : domain_m(model.domain_m), intersector_m(model.intersector_m)
+    : domain_m(model.domain_m), stencilExtent_m(model.stencilExtent_m),
+      intersector_m(model.intersector_m)
   { }

-  FieldStencilIntersector(const Domain_t &dom, const Intersect &intersect)
-    : domain_m(dom), intersector_m(intersect)
+  FieldStencilIntersector(const Domain_t &dom, const Intersect &intersect,
+                 const GuardLayers<Dim> &stencilExtent)
+    : domain_m(dom), stencilExtent_m(stencilExtent), intersector_m(intersect)
   { }

   This_t &operator=(const This_t &model)
@@ -626,6 +631,7 @@
     if (this != &model)
     {
       domain_m = model.domain_m;
+      stencilExtent_m = model.stencilExtent_m;
       intersector_m = model.intersector_m;
     }
     return *this;
@@ -662,9 +668,13 @@
   }

   template<class Engine, int Dim2>
-  inline bool intersect(const Engine &engine, const GuardLayers<Dim2> &)
+  inline bool intersect(const Engine &engine, const GuardLayers<Dim2> &,
+                       GuardLayers<Dim> &usedGuards)
   {
     intersect(engine);
+    // FIXME: accumulate used guards from intersect above and
+    // stencil extent? I.e. allow  Stencil<>(a(i-1)+a(i+1))?
+    usedGuards = stencilExtent_m;
     return true;
   }

@@ -672,6 +682,7 @@


   Interval<Dim> domain_m;
+  GuardLayers<Dim> stencilExtent_m;
   Intersect     intersector_m;
 };

@@ -699,8 +710,14 @@
     // cells results in an error in the multipatch inode view.)

     typedef FieldStencilIntersector<Dim, Intersect> NewIntersector_t;
+    GuardLayers<Dim> stencilExtent;
+    for (int i=0; i<Dim; ++i) {
+      stencilExtent.lower(i) = engine.functor().lowerExtent(i);
+      stencilExtent.upper(i) = engine.functor().upperExtent(i);
+    }
     NewIntersector_t newIntersector(engine.intersectDomain(),
-                                   tag.tag().intersector_m);
+                                   tag.tag().intersector_m,
+                                   stencilExtent);

     expressionApply(engine.field(),
                    IntersectorTag<NewIntersector_t>(newIntersector));
--- cvs/r2/src/Layout/GridLayout.cpp    2004-01-14 20:08:10.000000000 +0100
+++ pooma-mpi3/r2/src/Layout/GridLayout.cpp     2004-01-14 20:13:32.000000000 
+0100
@@ -429,7 +436,7 @@

                      // Now, push IDs and source into cache...

-                     this->gcFillList_m.push_back(GCFillInfo_t(gcdom, 
sourceID, destID));
+                     this->gcFillList_m.push_back(GCFillInfo_t(gcdom, 
sourceID, destID, d*2));
                    }
                }
            }
@@ -481,7 +488,7 @@

                      // Now, push IDs and source into cache...

-                     this->gcFillList_m.push_back(GCFillInfo_t(gcdom, 
sourceID, destID));
+                     this->gcFillList_m.push_back(GCFillInfo_t(gcdom, 
sourceID, destID, d*2+1));
                    }
                }
            }
--- cvs/r2/src/Layout/LayoutBase.h      2004-01-14 20:08:12.000000000 +0100
+++ pooma-mpi3/r2/src/Layout/LayoutBase.h       2004-01-14 20:13:32.000000000 
+0100
@@ -119,8 +121,8 @@

   struct GCFillInfo
   {
-    GCFillInfo(const Domain_t &dom, int ownedID, int guardID)
-    : domain_m(dom), ownedID_m(ownedID), guardID_m(guardID) { }
+    GCFillInfo(const Domain_t &dom, int ownedID, int guardID, int face=-1)
+    : domain_m(dom), ownedID_m(ownedID), guardID_m(guardID), face_m(face) { }

     // Get a CW warning about this not having a default constructor
     // when we instantiate the vector<GCFillInfo> below. This never
@@ -131,6 +133,7 @@
     Domain_t domain_m;    // guard layer domain
     int      ownedID_m;   // node ID for which domain_m is owned
     int      guardID_m;   // node ID for which domain_m is in the guards
+    int      face_m;      // destination face of the guard layer (or -1, if 
unknown)

     Domain_t & domain() { return domain_m;}
     int & ownedID() { return ownedID_m;}
--- cvs/r2/src/Layout/UniformGridLayout.cpp     2004-01-14 20:08:13.000000000 
+0100
+++ pooma-mpi3/r2/src/Layout/UniformGridLayout.cpp      2004-01-14 
20:13:32.000000000 +0100
@@ -279,7 +279,7 @@
 //-----------------------------------------------------------------------------
 //
 // template <int Dim>
-// void UniformGridLayout<Dim>::calcGCFillList()
+// void UniformGridLayoutData<Dim>::calcGCFillList()
 //
 // Calculates the cached information needed by MultiPatch Engine to
 // fill the guard cells.
@@ -370,7 +370,7 @@
                    this->all_m[sourceID]->context() == Pooma::context() ||
                    this->all_m[destID]->context() == Pooma::context()
                    )
-                
this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID));
+                
this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID,d*2));
               }
           }

@@ -417,7 +417,7 @@
                    this->all_m[sourceID]->context() == Pooma::context() ||
                    this->all_m[destID]->context() == Pooma::context()
                    )
-                 
this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID));
+                 
this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID,d*2+1));
               }
           }
       }
--- cvs/r2/src/Engine/MultiPatchEngine.cpp      2004-01-14 20:11:34.000000000 
+0100
+++ pooma-mpi3/r2/src/Engine/MultiPatchEngine.cpp       2004-01-14 
20:23:23.000000000 +0100
@@ -34,6 +34,7 @@
 #include "Engine/CompressedFraction.h"
 #include "Array/Array.h"
 #include "Tulip/ReduceOverContexts.h"
+#include "Tulip/SendReceive.h"
 #include "Threads/PoomaCSem.h"
 #include "Domain/IteratorPairDomain.h"

@@ -77,16 +78,18 @@
 Engine(const Layout_t &layout)
   : layout_m(layout),
     data_m(layout.sizeGlobal()),
-    pDirty_m(new bool(true))
+    pDirty_m(new int)
 {
   typedef typename Layout_t::Value_t Node_t;

+  setDirty();
+
   // check for correct match of PatchTag and the mapper used to make the
   // layout.
   // THIS IS A HACK! we test on the context of the first patch, and if it
   // is -1, we have a Layout made with the LocalMapper.

-#if POOMA_CHEETAH
+#if POOMA_MESSAGING

   if( layout_m.nodeListGlobal().size() > 0)
   {
@@ -247,7 +250,7 @@
   PAssert(data_m.isValid());
   if (data_m.isShared()) {
     data_m.makeOwnCopy();
-    pDirty_m = new bool(*pDirty_m);
+    pDirty_m = new int(*pDirty_m);
   }

   return *this;
@@ -261,45 +264,88 @@
 //
 //-----------------------------------------------------------------------------

+/// Guard layer assign between non-remote engines, just use the
+/// ET mechanisms
+
+template <int Dim, class T, class Tag>
+static inline
+void simpleAssign(const Array<Dim, T, Tag>& lhs,
+                 const Array<Dim, T, Tag>& rhs,
+                 const Interval<Dim>& domain)
+{
+  lhs(domain) = rhs(domain);
+}
+
+/// Guard layer assign between remote engines, use Send/Receive directly
+/// to avoid one extra copy of the data.
+
+template <int Dim, class T, class Tag>
+static inline
+void simpleAssign(const Array<Dim, T, Remote<Tag> >& lhs,
+                 const Array<Dim, T, Remote<Tag> >& rhs,
+                 const Interval<Dim>& domain)
+{
+  if (lhs.engine().owningContext() == rhs.engine().owningContext())
+    lhs(domain) = rhs(domain);
+  else {
+    typedef typename NewEngine<Engine<Dim, T, Tag>, Interval<Dim> >::Type_t 
ViewEngine_t;
+    if (lhs.engine().engineIsLocal())
+      Receive<ViewEngine_t>::receive(ViewEngine_t(lhs.engine().localEngine(), 
domain),
+                                    rhs.engine().owningContext());
+    else if (rhs.engine().engineIsLocal())
+      SendReceive::send(ViewEngine_t(rhs.engine().localEngine(), domain),
+                       lhs.engine().owningContext());
+  }
+}
+
 template <int Dim, class T, class LayoutTag, class PatchTag>
 void Engine<Dim, T, MultiPatch<LayoutTag,PatchTag> >::
-fillGuardsHandler(const WrappedInt<true> &) const
+fillGuardsHandler(const GuardLayers<Dim>& g, const WrappedInt<true> &) const
 {
   if (!isDirty()) return;
-
-#if POOMA_PURIFY
-
-  // This is here to remove spurious UMRs that result when un-initialized
-  // guards are copied in the following loop. All of the unitialized data
-  // is ultimately overwritten with good data, so I don't see why purify
-  // calls these UMRs in stead of unitialized memory copies, but it does.
-  // I don't do this in general since it would be slow and since T(0) is
-  // not generally valid. This does mean that fillGuards() will fail
-  // with purify for types that do not know what to do with T(0).
-
-  setGuards(T(0));
-
-#endif

+  int updated = 0;
   typename Layout_t::FillIterator_t p = layout_m.beginFillList();
-
+
   while (p != layout_m.endFillList())
     {
       int src  = p->ownedID_m;
       int dest = p->guardID_m;

-      // Create patch arrays that see the entire patch:
+      // Skip face, if not dirty.
+
+      if (isDirty(p->face_m)) {
+
+        // Check, if the p->domain_m is a guard which matches the
+        // needed guard g.
+
+       int d = p->face_m/2;
+       int guardSizeNeeded = p->face_m & 1 ? g.upper(d) : g.lower(d);
+        if (!(p->face_m != -1
+             && guardSizeNeeded == 0)) {
+
+          // Create patch arrays that see the entire patch:

-      Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);
+          Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);

-      // Now do assignment from the subdomains.
+          // Now do assignment from the subdomains.
+#if POOMA_MPI
+          simpleAssign(lhs, rhs, p->domain_m);
+#else
+          lhs(p->domain_m) = rhs(p->domain_m);
+#endif
+
+         // Mark up-to-date.
+         updated |= 1<<p->face_m;
+
+       }
+
+      }

-      lhs(p->domain_m) = rhs(p->domain_m);
-
       ++p;
     }
-
-  *pDirty_m = false;
+
+  *pDirty_m &= ~updated;
 }


@@ -331,7 +377,7 @@
       ++p;
     }

-  *pDirty_m = true;
+  setDirty();
 }


@@ -366,7 +412,7 @@
       ++p;
     }

-  *pDirty_m = true;
+  setDirty();
 }




--
Jeffrey D. Oldham
address@hidden

reply via email to

[Prev in Thread] Current Thread [Next in Thread]