gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 6f7ff61 08/19: Library (match.h): Using the ol


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 6f7ff61 08/19: Library (match.h): Using the old infra-structure for double-matches
Date: Sun, 14 Nov 2021 20:40:58 -0500 (EST)

branch: master
commit 6f7ff61e87d3901e1b42a68897a32d91ba06f0ac
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    Library (match.h): Using the old infra-structure for double-matches
    
    Until now, we hadn't considered the fact that the main body of work for
    cleaning the final match (removing multiple nearby matches and formatting
    of the output) was already implemented in the sort-based match!
    
    With this commit, instead of trying to debug the steps in the last few
    commits, I just moved everything into a temporary file (deleted them from
    'lib/match.c') and added everything modeled based on sort-based matching
    implementation: using the same 'bina' array. Once that was implemented, it
    was really easy to just use the final functions of the sort-based matching
    and everything seems to work properly now, with no segmentation
    fault.
    
    In those functions of the sort-based matching that are now also used in the
    k-d tree based matching, the '_coordinate_' part is removed and they have
    been moved to a separate (generic) part of 'lib/match.c' to highlight that
    they are low-level and shared between both matching methods.
    
    Generally, in programming, its always good to avoid re-writing a function
    and modularize/reuse previously written functions. So for example if a bug
    is found (or a feature is added) in the sort-based matching, it will
    automatically be implemented in the k-d tree based matching too and no
    extra work is necessary.
    
    Some other minor points:
    
     - The 'inplace' argument to the sort-based matching was for the sorting
       step. It is not used in the k-d tree based matching, so I removed it
       from the arguments.
    
    Things remaining:
    
     - Find a good name for the sort-based matching.
    
     - Complete all the expected features for k-d tree based matching and write
       tests for them during 'make check'.
    
     - Test to see which matching methodology is faster for how many rows. It
       is expected that for very few rows (like 10 or 100) probably the
       sort-based matching is faster, but from a cetain number of rows and
       certain number of threads, the k-d tree based matching will be
       faster. Once we find this, we will be able to implement the 'automatic'
       mode.
---
 bin/match/match.c                    |    4 +-
 during-dev-test-data/match-query.txt |    3 +
 lib/gnuastro/match.h                 |    6 +-
 lib/match.c                          | 1266 ++++++++++++++++------------------
 4 files changed, 605 insertions(+), 674 deletions(-)

diff --git a/bin/match/match.c b/bin/match/match.c
index f8f455e..f421e0a 100644
--- a/bin/match/match.c
+++ b/bin/match/match.c
@@ -524,8 +524,8 @@ match_catalog_kdtree(struct matchparams *p, size_t 
*nummatched)
       kdtree = gal_kdtree_create(p->cols1, &root);
       out = gal_match_kdtree(p->cols1, p->cols2, kdtree, root,
                              p->aperture->array, p->cp.numthreads,
-                             p->cp.minmapsize, nummatched, 1,
-                             p->cp.quietmmap);
+                             p->cp.minmapsize, p->cp.quietmmap,
+                             nummatched);
       gal_list_data_free(kdtree);
       break;
 
diff --git a/during-dev-test-data/match-query.txt 
b/during-dev-test-data/match-query.txt
new file mode 100644
index 0000000..f88c129
--- /dev/null
+++ b/during-dev-test-data/match-query.txt
@@ -0,0 +1,3 @@
+7.2      2.1
+7.1      2.01
+4.2      7.01
diff --git a/lib/gnuastro/match.h b/lib/gnuastro/match.h
index 866b66a..1921464 100644
--- a/lib/gnuastro/match.h
+++ b/lib/gnuastro/match.h
@@ -50,12 +50,14 @@ gal_match_coordinates(gal_data_t *coord1, gal_data_t 
*coord2,
                       int inplace, size_t minmapsize, int quietmmap,
                       size_t *nummatched);
 
-
 gal_data_t *
 gal_match_kdtree(gal_data_t *coord1, gal_data_t *coord2,
                  gal_data_t *coord1_kdtree, size_t kdtree_root,
                  double *aperture, size_t numthreads, size_t minmapsize,
-                 size_t *nummatched, int inplace, int quietmmap);
+                 int quietmmap, size_t *nummatched);
+
+
+
 
 
 __END_C_DECLS    /* From C++ preparations */
diff --git a/lib/match.c b/lib/match.c
index ad5d800..6a0f9fb 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -49,11 +49,11 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 /**********************************************************************/
 /*****************   Coordinate match custom list   *******************/
 /**********************************************************************/
-struct match_coordinate_sfll
+struct match_sfll
 {
   float f;
   size_t v;
-  struct match_coordinate_sfll *next;
+  struct match_sfll *next;
 };
 
 
@@ -61,10 +61,10 @@ struct match_coordinate_sfll
 
 
 static void
-match_coordinate_add_to_sfll(struct match_coordinate_sfll **list,
-                             size_t value, float fvalue)
+match_add_to_sfll(struct match_sfll **list, size_t value,
+                  float fvalue)
 {
-  struct match_coordinate_sfll *newnode;
+  struct match_sfll *newnode;
 
   errno=0;
   newnode=malloc(sizeof *newnode);
@@ -83,10 +83,10 @@ match_coordinate_add_to_sfll(struct match_coordinate_sfll 
**list,
 
 
 static void
-match_coordinate_pop_from_sfll(struct match_coordinate_sfll **list,
+match_pop_from_sfll(struct match_sfll **list,
                                size_t *value, float *fvalue)
 {
-  struct match_coordinate_sfll *tmp;
+  struct match_sfll *tmp;
   tmp=*list;
   *value=tmp->v;
   *fvalue=tmp->f;
@@ -113,252 +113,15 @@ match_coordinate_pop_from_sfll(struct 
match_coordinate_sfll **list,
 
 
 
-/********************************************************************/
-/*************            Coordinate matching           *************/
-/*************      Sanity checks and preparations      *************/
-/********************************************************************/
-/* Since these checks are repetative, its easier to have a separate
-   function for both inputs. */
-static void
-match_coordinate_sanity_check_columns(gal_data_t *coord, char *info,
-                                      int inplace, int *allf64)
-{
-  gal_data_t *tmp;
-
-  for(tmp=coord; tmp!=NULL; tmp=tmp->next)
-    {
-      if(tmp->type!=GAL_TYPE_FLOAT64)
-        {
-          if(inplace)
-            error(EXIT_FAILURE, 0, "%s: when 'inplace' is activated, the "
-                  "input coordinates must have 'float64' type. At least "
-                  "one node of the %s list has type of '%s'", __func__, info,
-                  gal_type_name(tmp->type, 1));
-          else
-            *allf64=0;
-        }
-
-      if(tmp->ndim!=1)
-        error(EXIT_FAILURE, 0, "%s: each input coordinate column must have "
-              "a single dimension (be a single column). Atleast one node of "
-              "the %s list has %zu dimensions", __func__, info, tmp->ndim);
-
-      if(tmp->size!=coord->size)
-        error(EXIT_FAILURE, 0, "%s: the nodes of each list of coordinates "
-              "must have the same number of elements. At least one node of "
-              "the %s list has %zu elements while the first has %zu "
-              "elements", __func__, info, tmp->size, coord->size);
-    }
-}
-
-
-
-
-
-/* To keep the main function clean, we'll do the sanity checks here. */
-static void
-match_coordinaes_sanity_check(gal_data_t *coord1, gal_data_t *coord2,
-                              double *aperture, int inplace, int *allf64)
-{
-  size_t ncoord1=gal_list_data_number(coord1);
-
-  /* Make sure both lists have the same number of datasets. NOTE: they
-     don't need to have the same number of elements. */
-  if( ncoord1!=gal_list_data_number(coord2) )
-    error(EXIT_FAILURE, 0, "%s: the two inputs have different numbers of "
-          "datasets (%zu and %zu respectively)", __func__, ncoord1,
-          gal_list_data_number(coord2));
-
-
-  /* This function currently only works for less than 4 dimensions. */
-  if(ncoord1>3)
-    error(EXIT_FAILURE, 0, "%s: %zu dimension matching requested, this "
-          "function currently only matches datasets with a maximum of 3 "
-          "dimensions", __func__, ncoord1);
-
-  /* Check the column properties. */
-  match_coordinate_sanity_check_columns(coord1, "first", inplace, allf64);
-  match_coordinate_sanity_check_columns(coord2, "second", inplace, allf64);
-
-  /* Check the aperture values. */
-  if(aperture[0]<=0)
-    error(EXIT_FAILURE, 0, "%s: the first value in the aperture (%g) cannot "
-          "be zero or negative", __func__, aperture[0]);
-  switch(ncoord1)
-    {
-    case 1:  /* We don't need any checks in a 1D match. */
-      break;
-
-    case 2:
-      if( (aperture[1]<=0 || aperture[1]>1))
-        error(EXIT_FAILURE, 0, "%s: the second value in the aperture (%g) "
-              "is the axis ratio, so it must be larger than zero and less "
-              "than 1", __func__, aperture[1]);
-      break;
-
-    case 3:
-      if(aperture[1]<=0 || aperture[1]>1 || aperture[2]<=0 || aperture[2]>1)
-        error(EXIT_FAILURE, 0, "%s: at least one of the second or third "
-              "values in the aperture (%g and %g respectively) is smaller "
-              "than zero or larger than one. In a 3D match, these are the "
-              "axis ratios, so they must be larger than zero and less than "
-              "1", __func__, aperture[1], aperture[2]);
-      break;
-
-    default:
-      error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix "
-            "the issue. The value %zu not recognized for 'ndim'", __func__,
-            PACKAGE_BUGREPORT, ncoord1);
-    }
-}
-
-
-
-
-
-/* To keep things clean, the sorting of each input array will be done in
-   this function. */
-static size_t *
-match_coordinates_prepare_sort(gal_data_t *coords, size_t minmapsize)
-{
-  size_t i;
-  double *darr;
-  gal_data_t *tmp;
-  size_t *permutation=gal_pointer_allocate(GAL_TYPE_SIZE_T, coords->size, 0,
-                                           __func__, "permutation");
-
-  /* Unfortunately 'gsl_sort_index' doesn't account for NaN elements. So we
-     need to set them to the maximum possible floating point value. */
-  if( gal_blank_present(coords, 1) )
-    {
-      darr=coords->array;
-      for(i=0;i<coords->size;++i)
-        if( isnan(darr[i]) ) darr[i]=FLT_MAX;
-    }
-
-  /* Get the permutation necessary to sort all the columns (based on the
-     first column). */
-  gsl_sort_index(permutation, coords->array, 1, coords->size);
-
-  /* For a check.
-  if(coords->size>1)
-    for(size_t i=0; i<coords->size; ++i) printf("%zu\n", permutation[i]);
-  */
-
-  /* Sort all the coordinates. */
-  for(tmp=coords; tmp!=NULL; tmp=tmp->next)
-    gal_permutation_apply(tmp, permutation);
-
-  /* For a check.
-  if(coords->size>1)
-    {
-      for(i=0;i<coords->size;++i)
-        {
-          for(tmp=coords; tmp!=NULL; tmp=tmp->next)
-            {
-              printf("%f ", ((double *)(tmp->array))[i]);
-            }
-          printf("\n");
-        }
-      exit(0);
-    }
-  */
-
-  /* Return the permutation. */
-  return permutation;
-}
-
-
-
-
-
-/* Do the preparations for matching of coordinates. */
-static void
-match_coordinates_prepare(gal_data_t *coord1, gal_data_t *coord2,
-                          int sorted_by_first, int inplace, int allf64,
-                          gal_data_t **A_out, gal_data_t **B_out,
-                          size_t **A_perm, size_t **B_perm,
-                          size_t minmapsize)
-{
-  gal_data_t *c, *tmp, *A=NULL, *B=NULL;
-
-  /* Sort the datasets if they aren't sorted. If the dataset is already
-     sorted, then 'inplace' is irrelevant. */
-  if(sorted_by_first && allf64)
-    {
-      *A_out=coord1;
-      *B_out=coord2;
-    }
-  else
-    {
-      /* Allocating a new list is only necessary when 'inplace==0' or all
-         the columns are double. */
-      if( inplace && allf64 )
-        {
-          *A_out=coord1;
-          *B_out=coord2;
-        }
-      else
-        {
-          /* Copy the first list. */
-          for(tmp=coord1; tmp!=NULL; tmp=tmp->next)
-            {
-              c=gal_data_copy(tmp);
-              c->next=NULL;
-              gal_list_data_add(&A, c);
-            }
-
-          /* Copy the second list. */
-          for(tmp=coord2; tmp!=NULL; tmp=tmp->next)
-            {
-              c=gal_data_copy(tmp);
-              c->next=NULL;
-              gal_list_data_add(&B, c);
-            }
-
-          /* Reverse both lists: the copying process reversed the order. */
-          gal_list_data_reverse(&A);
-          gal_list_data_reverse(&B);
-
-          /* Set the output pointers. */
-          *A_out=A;
-          *B_out=B;
-        }
-
-      /* Sort each dataset by the first coordinate. */
-      *A_perm = match_coordinates_prepare_sort(*A_out, minmapsize);
-      *B_perm = match_coordinates_prepare_sort(*B_out, minmapsize);
-    }
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-/********************************************************************/
-/*************            Coordinate matching           *************/
-/********************************************************************/
-/* Preparations for 'match_coordinates_second_in_first'. */
+/**********************************************************************/
+/********      Generic functions (for any type of matching)    ********/
+/**********************************************************************/
+/* Preparations for the desired matching aperture. */
 static void
-match_coordinates_sif_prepare(gal_data_t *A, gal_data_t *B,
-                              double *aperture, size_t ndim, double **a,
-                              double **b, double *dist, double *c,
-                              double *s, int *iscircle)
+match_aperture_prepare(gal_data_t *A, gal_data_t *B,
+                       double *aperture, size_t ndim,
+                       double **a, double **b, double *dist,
+                       double *c, double *s, int *iscircle)
 {
   double semiaxes[3];
 
@@ -437,8 +200,8 @@ match_coordinates_sif_prepare(gal_data_t *A, gal_data_t *B,
 
 
 static double
-match_coordinates_elliptical_r_2d(double d1, double d2, double *ellipse,
-                                  double c, double s)
+match_elliptical_r_2d(double d1, double d2, double *ellipse,
+                      double c, double s)
 {
   double Xr = d1 * ( c       )     +   d2 * ( s );
   double Yr = d1 * ( -1.0f*s )     +   d2 * ( c );
@@ -450,8 +213,8 @@ match_coordinates_elliptical_r_2d(double d1, double d2, 
double *ellipse,
 
 
 static double
-match_coordinates_elliptical_r_3d(double *delta, double *ellipsoid,
-                                  double *c, double *s)
+match_elliptical_r_3d(double *delta, double *ellipsoid,
+                      double *c, double *s)
 {
   double Xr, Yr, Zr;
   double c1=c[0], s1=s[0];
@@ -471,8 +234,8 @@ match_coordinates_elliptical_r_3d(double *delta, double 
*ellipsoid,
 
 
 static double
-match_coordinates_distance(double *delta, int iscircle, size_t ndim,
-                           double *aperture, double *c, double *s)
+match_distance(double *delta, int iscircle, size_t ndim,
+               double *aperture, double *c, double *s)
 {
   /* For more than one dimension, we'll need to calculate the distance from
      the deltas (differences) in each dimension. */
@@ -484,178 +247,38 @@ match_coordinates_distance(double *delta, int iscircle, 
size_t ndim,
     case 2:
       return ( iscircle
                ? sqrt( delta[0]*delta[0] + delta[1]*delta[1] )
-               : match_coordinates_elliptical_r_2d(delta[0], delta[1],
-                                                   aperture, c[0], s[0]) );
+               : match_elliptical_r_2d(delta[0], delta[1],
+                                       aperture, c[0], s[0]) );
 
     case 3:
       return ( iscircle
                ? sqrt( delta[0]*delta[0]
                        + delta[1]*delta[1]
                        + delta[2]*delta[2] )
-               : match_coordinates_elliptical_r_3d(delta, aperture, c, s) );
-
-    default:
-      error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix "
-            "the problem. The value %zu is not recognized for ndim",
-            __func__, PACKAGE_BUGREPORT, ndim);
-    }
-
-  /* Control should not reach this point. */
-  error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix the "
-        "problem. Control should not reach the end of this function",
-        __func__, PACKAGE_BUGREPORT);
-  return NAN;
-}
-
-
-
-
-
-/* Go through both catalogs and find which records/rows in the second
-   catalog (catalog b) are within the acceptable distance of each record in
-   the first (a). */
-static void
-match_coordinates_second_in_first(gal_data_t *A, gal_data_t *B,
-                                  double *aperture,
-                                  struct match_coordinate_sfll **bina)
-{
-  /* To keep things easy to read, all variables related to catalog 1 start
-     with an 'a' and things related to catalog 2 are marked with a 'b'. The
-     redundant variables (those that equal a previous value) are only
-     defined to make it easy to read the code.*/
-  int iscircle=0;
-  size_t i, ar=A->size, br=B->size;
-  size_t ai, bi, blow=0, prevblow=0;
-  size_t ndim=gal_list_data_number(A);
-  double r, c[3]={NAN, NAN, NAN}, s[3]={NAN, NAN, NAN};
-  double dist[3]={NAN, NAN, NAN}, delta[3]={NAN, NAN, NAN};
-  double *a[3]={NULL, NULL, NULL}, *b[3]={NULL, NULL, NULL};
-
-  /* Necessary preperations. */
-  match_coordinates_sif_prepare(A, B, aperture,  ndim, a, b, dist, c, s,
-                                &iscircle);
-
-  /* For each row/record of catalog 'a', make a list of the nearest records
-     in catalog b within the maximum distance. Note that both catalogs are
-     sorted by their first axis coordinate.*/
-  for(ai=0;ai<ar;++ai)
-    if( !isnan(a[0][ai]) && blow<br)
-      {
-        /* Initialize 'bina'. */
-        bina[ai]=NULL;
-
-        /* Find the first (lowest first axis value) row/record in catalog
-           'b' that is within the search radius for this record of catalog
-           'a'. 'blow' is the index of the first element to start searching
-           in the catalog 'b' for a match to 'a[][ai]' (the record in
-           catalog a that is currently being searched). 'blow' is only
-           based on the first coordinate, not the second.
-
-           Both catalogs are sorted by their first coordinate, so the
-           'blow' to search for the next record in catalog 'a' will be
-           larger or equal to that of the previous catalog 'a' record. To
-           account for possibly large distances between the records, we do
-           a search here to change 'blow' if necessary before doing further
-           searching.*/
-        for( blow=prevblow; blow<br && b[0][blow] < a[0][ai]-dist[0]; ++blow)
-          { /* This can be blank, the 'for' does all we need :-). */ }
-
-        /* 'blow' is now found for this 'ai' and will be used unchanged to
-           the end of the loop. So keep its value to help the search for
-           the next entry in catalog 'a'. */
-        prevblow=blow;
-
-        /* Go through catalog 'b' (starting at 'blow') with a first axis
-           value smaller than the maximum acceptable range for 'si'. */
-        for( bi=blow; bi<br && b[0][bi] <= a[0][ai] + dist[0]; ++bi )
-          {
-            /* Only consider records with a second axis value in the
-               correct range, note that unlike the first axis, the second
-               axis is no longer sorted. so we have to do both lower and
-               higher limit checks for each item.
-
-               Positions can have an accuracy to a much higher order of
-               magnitude than the search radius. Therefore, it is
-               meaning-less to sort the second axis (after having sorted
-               the first). In other words, very rarely can two first axis
-               coordinates have EXACTLY the same floating point value as
-               each other to easily define an independent sorting in the
-               second axis. */
-            if( ndim<2
-                || (    b[1][bi] >= a[1][ai]-dist[1]
-                     && b[1][bi] <= a[1][ai]+dist[1] ) )
-              {
-                /* Now, 'bi' is within the rectangular range of 'ai'. But
-                   this is not enough to consider the two objects matched
-                   for the following reasons:
-
-                   1) Until now we have avoided calculations other than
-                   larger or smaller on double precision floating point
-                   variables for efficiency. So the 'bi' is within a square
-                   of side 'dist[0]*dist[1]' around 'ai' (not within a
-                   fixed radius).
-
-                   2) Other objects in the 'b' catalog may be closer to
-                   'ai' than this 'bi'.
-
-                   3) The closest 'bi' to 'ai' might be closer to another
-                   catalog 'a' record.
-
-                   To address these problems, we will use a linked list to
-                   keep the indexes of the 'b's near 'ai', along with their
-                   distance. We only add the 'bi's to this list that are
-                   within the acceptable distance.
-
-                   Since we are dealing with much fewer objects at this
-                   stage, it is justified to do complex mathematical
-                   operations like square root and multiplication. This
-                   fixes the first problem.
-
-                   The next two problems will be solved with the list after
-                   parsing of the whole catalog is complete.*/
-                if( ndim<3
-                    || ( b[2][bi] >= a[2][ai]-dist[2]
-                         && b[2][bi] <= a[2][ai]+dist[2] ) )
-                  {
-                    for(i=0;i<ndim;++i) delta[i]=b[i][bi]-a[i][ai];
-                    r=match_coordinates_distance(delta, iscircle, ndim,
-                                                 aperture, c, s);
-                    if(r<aperture[0])
-                      match_coordinate_add_to_sfll(&bina[ai], bi, r);
-                  }
-              }
-          }
-
-        /* If there was no objects within the acceptable distance, then the
-           linked list pointer will be NULL, so go on to the next 'ai'. */
-        if(bina[ai]==NULL)
-          continue;
+               : match_elliptical_r_3d(delta, aperture, c, s) );
 
-        /* For checking the status of affairs uncomment this block
-        {
-          struct match_coordinate_sfll *tmp;
-          printf("\n\nai: %lu:\n", ai);
-          printf("ax: %f (%f -- %f)\n", a[0][ai], a[0][ai]-dist[0],
-                 a[0][ai]+dist[0]);
-          printf("ay: %f (%f -- %f)\n", a[1][ai], a[1][ai]-dist[1],
-                 a[1][ai]+dist[1]);
-          for(tmp=bina[ai];tmp!=NULL;tmp=tmp->next)
-            printf("%lu: %f\n", tmp->v, tmp->f);
-        }
-        */
-      }
+    default:
+      error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix "
+            "the problem. The value %zu is not recognized for ndim",
+            __func__, PACKAGE_BUGREPORT, ndim);
+    }
+
+  /* Control should not reach this point. */
+  error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix the "
+        "problem. Control should not reach the end of this function",
+        __func__, PACKAGE_BUGREPORT);
+  return NAN;
 }
 
 
 
 
 
-/* In 'match_coordinates_second_in_first', we made an array of lists, here
-   we want to reverse that list to fix the second two issues that were
-   discussed there. */
+/* In the 'match_XXXX_second_in_first' functions, we made an array of
+   lists, here we want to reverse that list to fix the second two issues
+   that were discussed there. */
 void
-match_coordinates_rearrange(gal_data_t *A, gal_data_t *B,
-                            struct match_coordinate_sfll **bina)
+match_rearrange(gal_data_t *A, gal_data_t *B, struct match_sfll **bina)
 {
   size_t bi;
   float *fp, *fpf, r, *ainb;
@@ -680,12 +303,12 @@ match_coordinates_rearrange(gal_data_t *A, gal_data_t *B,
     while( bina[ai] )  /* As long as its not NULL.            */
       {
        /* Pop out a 'bi' and its distance to this 'ai' from 'bina'. */
-       match_coordinate_pop_from_sfll(&bina[ai], &bi, &r);
+       match_pop_from_sfll(&bina[ai], &bi, &r);
 
-       /* If nothing has been put here (the isnan condition below is
-          true), or something exists (the isnan is false, and so it
-          will check the second OR test) with a distance that is
-          larger than this distance then just put this value in. */
+       /* If nothing has been put here (the 'isnan' condition below is
+          true), or something exists (the isnan is false, and so it will
+          check the second OR test) with a distance that is larger than
+          this distance then just put this value in. */
        if( isnan(ainb[bi*2]) || r<ainb[bi*2+1] )
          {
            ainb[bi*2  ] = ai;
@@ -728,7 +351,7 @@ match_coordinates_rearrange(gal_data_t *A, gal_data_t *B,
              }
          }
        else
-          match_coordinate_add_to_sfll(&bina[ai], bi, r);
+          match_add_to_sfll(&bina[ai], bi, r);
       }
 
   /* For checking the status of affairs uncomment this block
@@ -736,7 +359,7 @@ match_coordinates_rearrange(gal_data_t *A, gal_data_t *B,
     size_t bi, counter=0;
     double *a[2]={A->array, A->next->array};
     double *b[2]={B->array, B->next->array};
-    printf("Rearranged bina:\n");
+    printf("\n\nRearranged bina:\n");
     for(ai=0;ai<ar;++ai)
       if(bina[ai])
         {
@@ -761,10 +384,9 @@ match_coordinates_rearrange(gal_data_t *A, gal_data_t *B,
 
 /* The matching has been done, write the output. */
 static gal_data_t *
-gal_match_coordinates_output(gal_data_t *A, gal_data_t *B, size_t *A_perm,
-                             size_t *B_perm,
-                             struct match_coordinate_sfll **bina,
-                             size_t minmapsize, int quietmmap)
+gal_match_output(gal_data_t *A, gal_data_t *B, size_t *A_perm,
+                 size_t *B_perm, struct match_sfll **bina,
+                 size_t minmapsize, int quietmmap)
 {
   float r;
   double *rval;
@@ -810,77 +432,437 @@ gal_match_coordinates_output(gal_data_t *A, gal_data_t 
*B, size_t *A_perm,
   nomatch_i = nummatched;
 
 
-  /* Fill in the output arrays. */
-  aind = out->array;
-  bind = out->next->array;
-  rval = out->next->next->array;
-  for(ai=0;ai<A->size;++ai)
-    {
-      /* A match was found. */
-      if(bina[ai])
-        {
-          /* Note that the permutation keeps the original indexs. */
-          match_coordinate_pop_from_sfll(&bina[ai], &bi, &r);
-          rval[ match_i   ] = r;
-          aind[ match_i   ] = A_perm ? A_perm[ai] : ai;
-          bind[ match_i++ ] = B_perm ? B_perm[bi] : bi;
+  /* Fill in the output arrays. */
+  aind = out->array;
+  bind = out->next->array;
+  rval = out->next->next->array;
+  for(ai=0;ai<A->size;++ai)
+    {
+      /* A match was found. */
+      if(bina[ai])
+        {
+          /* Note that the permutation keeps the original indexs. */
+          match_pop_from_sfll(&bina[ai], &bi, &r);
+          rval[ match_i   ] = r;
+          aind[ match_i   ] = A_perm ? A_perm[ai] : ai;
+          bind[ match_i++ ] = B_perm ? B_perm[bi] : bi;
+
+          /* Set a '1' for this object in the second catalog. This will
+             later be used to find which rows didn't match to fill in the
+             output. */
+          Bmatched[ B_perm ? B_perm[bi] : bi ] = 1;
+        }
+
+      /* No match found. At this stage, we can only fill the indexs of the
+         first input. The second input needs to be matched afterwards.*/
+      else aind[ nomatch_i++ ] = A_perm ? A_perm[ai] : ai;
+    }
+
+
+  /* Complete the second input's permutation. */
+  nomatch_i=nummatched;
+  for(bi=0;bi<B->size;++bi)
+    if( Bmatched[bi] == 0 )
+      bind[ nomatch_i++ ] = bi;
+
+
+  /* For a check
+  printf("\nFirst input's permutation (starred items not matched):\n");
+  for(ai=0;ai<A->size;++ai)
+    printf("%s%zu\n", ai<nummatched?"  ":"* ", aind[ai]+1);
+  printf("\nSecond input's permutation  (starred items not matched):\n");
+  for(bi=0;bi<B->size;++bi)
+    printf("%s%zu\n", bi<nummatched?"  ":"* ", bind[bi]+1);
+  exit(0);
+  */
+
+  /* Clean up and return. */
+  free(Bmatched);
+  return out;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/********************************************************************/
+/*************            Coordinate matching           *************/
+/********************************************************************/
+/* Since these checks are repetative, its easier to have a separate
+   function for both inputs. */
+static void
+match_coordinate_sanity_check_columns(gal_data_t *coord, char *info,
+                                      int inplace, int *allf64)
+{
+  gal_data_t *tmp;
+
+  for(tmp=coord; tmp!=NULL; tmp=tmp->next)
+    {
+      if(tmp->type!=GAL_TYPE_FLOAT64)
+        {
+          if(inplace)
+            error(EXIT_FAILURE, 0, "%s: when 'inplace' is activated, "
+                  "the input coordinates must have 'float64' type. At "
+                  "least one node of the %s list has type of '%s'",
+                  __func__, info, gal_type_name(tmp->type, 1));
+          else
+            *allf64=0;
+        }
+
+      if(tmp->ndim!=1)
+        error(EXIT_FAILURE, 0, "%s: each input coordinate column must "
+              "have a single dimension (be a single column). Atleast "
+              "one node of the %s list has %zu dimensions", __func__,
+              info, tmp->ndim);
+
+      if(tmp->size!=coord->size)
+        error(EXIT_FAILURE, 0, "%s: the nodes of each list of "
+              "coordinates must have the same number of elements. "
+              "At least one node of the %s list has %zu elements "
+              "while the first has %zu elements", __func__, info,
+              tmp->size, coord->size);
+    }
+}
+
+
+
+
+
+/* To keep the main function clean, we'll do the sanity checks here. */
+static void
+match_coordinates_sanity_check(gal_data_t *coord1, gal_data_t *coord2,
+                               double *aperture, int inplace, int *allf64)
+{
+  size_t ncoord1=gal_list_data_number(coord1);
+
+  /* Make sure both lists have the same number of datasets. NOTE: they
+     don't need to have the same number of elements. */
+  if( ncoord1!=gal_list_data_number(coord2) )
+    error(EXIT_FAILURE, 0, "%s: the two inputs have different "
+          "numbers of datasets (%zu and %zu respectively)",
+          __func__, ncoord1, gal_list_data_number(coord2));
+
+
+  /* This function currently only works for less than 4 dimensions. */
+  if(ncoord1>3)
+    error(EXIT_FAILURE, 0, "%s: %zu dimension matching requested, "
+          "this function currently only matches datasets with a "
+          "maximum of 3 dimensions", __func__, ncoord1);
+
+  /* Check the column properties. */
+  match_coordinate_sanity_check_columns(coord1, "first", inplace, allf64);
+  match_coordinate_sanity_check_columns(coord2, "second", inplace, allf64);
+
+  /* Check the aperture values. */
+  if(aperture[0]<=0)
+    error(EXIT_FAILURE, 0, "%s: the first value in the aperture (%g) "
+          "cannot be zero or negative", __func__, aperture[0]);
+  switch(ncoord1)
+    {
+    case 1:  /* We don't need any checks in a 1D match. */
+      break;
+
+    case 2:
+      if( (aperture[1]<=0 || aperture[1]>1))
+        error(EXIT_FAILURE, 0, "%s: the second value in the aperture "
+              "(%g) is the axis ratio, so it must be larger than zero "
+              "and less than 1", __func__, aperture[1]);
+      break;
+
+    case 3:
+      if(aperture[1]<=0 || aperture[1]>1 || aperture[2]<=0 || aperture[2]>1)
+        error(EXIT_FAILURE, 0, "%s: at least one of the second or "
+              "third values in the aperture (%g and %g respectively) "
+              "is smaller than zero or larger than one. In a 3D match, "
+              "these are the axis ratios, so they must be larger than "
+              "zero and less than 1", __func__, aperture[1], aperture[2]);
+      break;
+
+    default:
+      error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to "
+            "fix the issue. The value %zu not recognized for 'ndim'",
+            __func__, PACKAGE_BUGREPORT, ncoord1);
+    }
+}
+
+
+
+
+
+/* To keep things clean, the sorting of each input array will be done in
+   this function. */
+static size_t *
+match_coordinates_prepare_sort(gal_data_t *coords, size_t minmapsize)
+{
+  size_t i;
+  double *darr;
+  gal_data_t *tmp;
+  size_t *permutation=gal_pointer_allocate(GAL_TYPE_SIZE_T, coords->size,
+                                           0, __func__, "permutation");
+
+  /* Unfortunately 'gsl_sort_index' doesn't account for NaN elements. So we
+     need to set them to the maximum possible floating point value. */
+  if( gal_blank_present(coords, 1) )
+    {
+      darr=coords->array;
+      for(i=0;i<coords->size;++i)
+        if( isnan(darr[i]) ) darr[i]=FLT_MAX;
+    }
+
+  /* Get the permutation necessary to sort all the columns (based on the
+     first column). */
+  gsl_sort_index(permutation, coords->array, 1, coords->size);
+
+  /* For a check.
+  if(coords->size>1)
+    for(size_t i=0; i<coords->size; ++i) printf("%zu\n", permutation[i]);
+  */
+
+  /* Sort all the coordinates. */
+  for(tmp=coords; tmp!=NULL; tmp=tmp->next)
+    gal_permutation_apply(tmp, permutation);
+
+  /* For a check.
+  if(coords->size>1)
+    {
+      for(i=0;i<coords->size;++i)
+        {
+          for(tmp=coords; tmp!=NULL; tmp=tmp->next)
+            {
+              printf("%f ", ((double *)(tmp->array))[i]);
+            }
+          printf("\n");
+        }
+      exit(0);
+    }
+  */
+
+  /* Return the permutation. */
+  return permutation;
+}
+
+
+
+
+
+/* Do the preparations for matching of coordinates. */
+static void
+match_coordinates_prepare(gal_data_t *coord1, gal_data_t *coord2,
+                          int sorted_by_first, int inplace, int allf64,
+                          gal_data_t **A_out, gal_data_t **B_out,
+                          size_t **A_perm, size_t **B_perm,
+                          size_t minmapsize)
+{
+  gal_data_t *c, *tmp, *A=NULL, *B=NULL;
+
+  /* Sort the datasets if they aren't sorted. If the dataset is already
+     sorted, then 'inplace' is irrelevant. */
+  if(sorted_by_first && allf64)
+    {
+      *A_out=coord1;
+      *B_out=coord2;
+    }
+  else
+    {
+      /* Allocating a new list is only necessary when 'inplace==0' or all
+         the columns are double. */
+      if( inplace && allf64 )
+        {
+          *A_out=coord1;
+          *B_out=coord2;
+        }
+      else
+        {
+          /* Copy the first list. */
+          for(tmp=coord1; tmp!=NULL; tmp=tmp->next)
+            {
+              c=gal_data_copy(tmp);
+              c->next=NULL;
+              gal_list_data_add(&A, c);
+            }
+
+          /* Copy the second list. */
+          for(tmp=coord2; tmp!=NULL; tmp=tmp->next)
+            {
+              c=gal_data_copy(tmp);
+              c->next=NULL;
+              gal_list_data_add(&B, c);
+            }
+
+          /* Reverse both lists: the copying process reversed the order. */
+          gal_list_data_reverse(&A);
+          gal_list_data_reverse(&B);
 
-          /* Set a '1' for this object in the second catalog. This will
-             later be used to find which rows didn't match to fill in the
-             output.. */
-          Bmatched[ B_perm ? B_perm[bi] : bi ] = 1;
+          /* Set the output pointers. */
+          *A_out=A;
+          *B_out=B;
         }
 
-      /* No match found. At this stage, we can only fill the indexs of the
-         first input. The second input needs to be matched afterwards.*/
-      else aind[ nomatch_i++ ] = A_perm ? A_perm[ai] : ai;
+      /* Sort each dataset by the first coordinate. */
+      *A_perm = match_coordinates_prepare_sort(*A_out, minmapsize);
+      *B_perm = match_coordinates_prepare_sort(*B_out, minmapsize);
     }
+}
 
 
-  /* Complete the second input's permutation. */
-  nomatch_i=nummatched;
-  for(bi=0;bi<B->size;++bi)
-    if( Bmatched[bi] == 0 )
-      bind[ nomatch_i++ ] = bi;
 
 
-  /* For a check
-  printf("\nFirst input's permutation:\n");
-  for(ai=0;ai<A->size;++ai)
-    printf("%s%zu\n", ai<nummatched?"  ":"* ", aind[ai]+1);
-  printf("\nSecond input's permutation:\n");
-  for(bi=0;bi<B->size;++bi)
-    printf("%s%zu\n", bi<nummatched?"  ":"* ", bind[bi]+1);
-  exit(0);
-  */
 
-  /* Clean up and return. */
-  free(Bmatched);
-  return out;
-}
+/* Go through both catalogs and find which records/rows in the second
+   catalog (catalog b) are within the acceptable distance of each record in
+   the first (a). */
+static void
+match_coordinates_second_in_first(gal_data_t *A, gal_data_t *B,
+                                  double *aperture,
+                                  struct match_sfll **bina)
+{
+  /* To keep things easy to read, all variables related to catalog 1 start
+     with an 'a' and things related to catalog 2 are marked with a 'b'. The
+     redundant variables (those that equal a previous value) are only
+     defined to make it easy to read the code.*/
+  int iscircle=0;
+  size_t i, ar=A->size, br=B->size;
+  size_t ai, bi, blow=0, prevblow=0;
+  size_t ndim=gal_list_data_number(A);
+  double r, c[3]={NAN, NAN, NAN}, s[3]={NAN, NAN, NAN};
+  double dist[3]={NAN, NAN, NAN}, delta[3]={NAN, NAN, NAN};
+  double *a[3]={NULL, NULL, NULL}, *b[3]={NULL, NULL, NULL};
 
+  /* Necessary preperations. */
+  match_aperture_prepare(A, B, aperture,  ndim, a, b, dist,
+                         c, s, &iscircle);
 
+  /* For each row/record of catalog 'a', make a list of the nearest records
+     in catalog b within the maximum distance. Note that both catalogs are
+     sorted by their first axis coordinate.*/
+  for(ai=0;ai<ar;++ai)
+    if( !isnan(a[0][ai]) && blow<br)
+      {
+        /* Initialize 'bina'. */
+        bina[ai]=NULL;
 
+        /* Find the first (lowest first axis value) row/record in catalog
+           'b' that is within the search radius for this record of catalog
+           'a'. 'blow' is the index of the first element to start searching
+           in the catalog 'b' for a match to 'a[][ai]' (the record in
+           catalog a that is currently being searched). 'blow' is only
+           based on the first coordinate, not the second.
 
+           Both catalogs are sorted by their first coordinate, so the
+           'blow' to search for the next record in catalog 'a' will be
+           larger or equal to that of the previous catalog 'a' record. To
+           account for possibly large distances between the records, we do
+           a search here to change 'blow' if necessary before doing further
+           searching.*/
+        for( blow=prevblow; blow<br && b[0][blow] < a[0][ai]-dist[0];
+             ++blow)
+          { /* This can be blank, the 'for' does all we need :-). */ }
 
+        /* 'blow' is now found for this 'ai' and will be used unchanged to
+           the end of the loop. So keep its value to help the search for
+           the next entry in catalog 'a'. */
+        prevblow=blow;
 
+        /* Go through catalog 'b' (starting at 'blow') with a first axis
+           value smaller than the maximum acceptable range for 'si'. */
+        for( bi=blow; bi<br && b[0][bi] <= a[0][ai] + dist[0]; ++bi )
+          {
+            /* Only consider records with a second axis value in the
+               correct range, note that unlike the first axis, the second
+               axis is no longer sorted. so we have to do both lower and
+               higher limit checks for each item.
 
+               Positions can have an accuracy to a much higher order of
+               magnitude than the search radius. Therefore, it is
+               meaning-less to sort the second axis (after having sorted
+               the first). In other words, very rarely can two first axis
+               coordinates have EXACTLY the same floating point value as
+               each other to easily define an independent sorting in the
+               second axis. */
+            if( ndim<2
+                || (    b[1][bi] >= a[1][ai]-dist[1]
+                     && b[1][bi] <= a[1][ai]+dist[1] ) )
+              {
+                /* Now, 'bi' is within the rectangular range of 'ai'. But
+                   this is not enough to consider the two objects matched
+                   for the following reasons:
 
+                   1) Until now we have avoided calculations other than
+                   larger or smaller on double precision floating point
+                   variables for efficiency. So the 'bi' is within a square
+                   of side 'dist[0]*dist[1]' around 'ai' (not within a
+                   fixed radius).
 
+                   2) Other objects in the 'b' catalog may be closer to
+                   'ai' than this 'bi'.
 
+                   3) The closest 'bi' to 'ai' might be closer to another
+                   catalog 'a' record.
 
+                   To address these problems, we will use a linked list to
+                   keep the indexes of the 'b's near 'ai', along with their
+                   distance. We only add the 'bi's to this list that are
+                   within the acceptable distance.
 
+                   Since we are dealing with much fewer objects at this
+                   stage, it is justified to do complex mathematical
+                   operations like square root and multiplication. This
+                   fixes the first problem.
 
+                   The next two problems will be solved with the list after
+                   parsing of the whole catalog is complete.*/
+                if( ndim<3
+                    || ( b[2][bi] >= a[2][ai]-dist[2]
+                         && b[2][bi] <= a[2][ai]+dist[2] ) )
+                  {
+                    for(i=0;i<ndim;++i) delta[i]=b[i][bi]-a[i][ai];
+                    r=match_distance(delta, iscircle, ndim, aperture,
+                                     c, s);
+                    if(r<aperture[0])
+                      match_add_to_sfll(&bina[ai], bi, r);
+                  }
+              }
+          }
 
+        /* If there was no objects within the acceptable distance, then the
+           linked list pointer will be NULL, so go on to the next 'ai'. */
+        if(bina[ai]==NULL)
+          continue;
 
+        /* For checking the status of affairs uncomment this block
+        {
+          struct match_sfll *tmp;
+          printf("\n\nai: %lu:\n", ai);
+          printf("ax: %f (%f -- %f)\n", a[0][ai], a[0][ai]-dist[0],
+                 a[0][ai]+dist[0]);
+          printf("ay: %f (%f -- %f)\n", a[1][ai], a[1][ai]-dist[1],
+                 a[1][ai]+dist[1]);
+          for(tmp=bina[ai];tmp!=NULL;tmp=tmp->next)
+            printf("%lu: %f\n", tmp->v, tmp->f);
+        }
+        */
+      }
+}
 
 
 
 
 
-/********************************************************************/
-/*************            Coordinate matching           *************/
-/********************************************************************/
 /* Match two positions: the two inputs ('coord1' and 'coord2') should be
    lists of coordinates (each is a list of datasets). To speed up the
    search, this function will sort the inputs by their first column. If
@@ -909,14 +891,15 @@ gal_match_coordinates(gal_data_t *coord1, gal_data_t 
*coord2,
   int allf64=1;
   gal_data_t *A, *B, *out;
   size_t *A_perm=NULL, *B_perm=NULL;
-  struct match_coordinate_sfll **bina;
+  struct match_sfll **bina;
 
   /* Do a small sanity check and make the preparations. After this point,
      we'll call the two arrays 'a' and 'b'.*/
-  match_coordinaes_sanity_check(coord1, coord2, aperture, inplace,
-                                &allf64);
-  match_coordinates_prepare(coord1, coord2, sorted_by_first, inplace, allf64,
-                            &A, &B, &A_perm, &B_perm, minmapsize);
+  match_coordinates_sanity_check(coord1, coord2, aperture, inplace,
+                                 &allf64);
+  match_coordinates_prepare(coord1, coord2, sorted_by_first, inplace,
+                            allf64, &A, &B, &A_perm, &B_perm,
+                            minmapsize);
 
 
   /* Allocate the 'bina' array (an array of lists). Let's call the first
@@ -935,12 +918,12 @@ gal_match_coordinates(gal_data_t *coord1, gal_data_t 
*coord2,
 
 
   /* Two re-arrangings will fix the issue. */
-  match_coordinates_rearrange(A, B, bina);
+  match_rearrange(A, B, bina);
 
 
   /* The match is done, write the output. */
-  out=gal_match_coordinates_output(A, B, A_perm, B_perm, bina, minmapsize,
-                                   quietmmap);
+  out=gal_match_output(A, B, A_perm, B_perm, bina, minmapsize,
+                       quietmmap);
 
 
   /* Clean up. */
@@ -983,29 +966,96 @@ gal_match_coordinates(gal_data_t *coord1, gal_data_t 
*coord2,
 /********************************************************************/
 struct match_kdtree_params
 {
+  /* Input arguments. */
+  gal_data_t             *A;  /* 1st coordinate list of 'gal_data_t's */
+  gal_data_t             *B;  /* 2nd coordinate list of 'gal_data_t's */
   size_t               ndim;  /* The number of dimensions.            */
-  gal_data_t        *coord1;  /* 1st coordinate list of 'gal_data_t's */
-  gal_data_t        *coord2;  /* 2nd coordinate list of 'gal_data_t's */
-  size_t           *c2match;  /* Matched IDs for the second coords.   */
-  double          *distance;  /* The distance between the matches.    */
   double          *aperture;  /* Acceptable aperture for match.       */
   size_t        kdtree_root;  /* Index (counting from 0) of root.     */
-  gal_data_t *coord1_kdtree;  /* k-d tree of first coordinate.        */
-  size_t  *multiple_matches;  /* Multiple match count in 1st coords.  */
+  gal_data_t      *A_kdtree;  /* k-d tree of first coordinate.        */
 
   /* Internal parameters for easy aperture checking. For example there is
-     no need to calculate the fixed 'cos()' and 'sin()' functions. So we
-     calculate them once and just use their values. */
+     no need to calculate the fixed 'cos()' and 'sin()' functions every
+     time. So we calculate them once and store the results here to just use
+     their values for every check. */
   int              iscircle;  /* If the aperture is circular.         */
-  double              *a[3];  /* Direct pointers to column arrays.    */
-  double              *b[3];  /* Direct pointers to column arrays.    */
   double               c[3];  /* Fixed cos(), for elliptical dist.    */
   double               s[3];  /* Fixed sin(), for elliptical dist.    */
+
+  /* Internal items. */
+  double              *a[3];  /* Direct pointers to column arrays.    */
+  double              *b[3];  /* Direct pointers to column arrays.    */
+  struct match_sfll  **bina; /* Second cat. items in first. */
 };
 
 
 
 
+
+static void
+match_kdtree_sanity_check(struct match_kdtree_params *p)
+{
+  gal_data_t *tmp;
+
+  /* Make sure all coordinates and the k-d tree have the same number of
+     rows. */
+  p->ndim=gal_list_data_number(p->A);
+  if( (   p->ndim != gal_list_data_number(p->B))
+      || (p->ndim != gal_list_data_number(p->A_kdtree)) )
+    error(EXIT_FAILURE, 0, "%s: the 'coord1', 'coord2' and "
+          "'coord1_kdtree' arguments should have the same number "
+          "of nodes/columns (elements in a simply linked list). "
+          "But they each respectively have %zu, %zu and %zu "
+          "nodes/columns", __func__, p->ndim,
+          gal_list_data_number(p->B),
+          gal_list_data_number(p->A_kdtree));
+
+  /* Make sure the coordinates have a 'double' type. */
+  for(tmp=p->A; tmp!=NULL; tmp=tmp->next)
+    if( tmp->type!=GAL_TYPE_FLOAT64 )
+      error(EXIT_FAILURE, 0, "%s: the type of all columns in 'coord1' "
+            "should be 'double', but at least one of them is '%s'",
+            __func__, gal_type_name(tmp->type, 1));
+  for(tmp=p->B; tmp!=NULL; tmp=tmp->next)
+    if( tmp->type!=GAL_TYPE_FLOAT64 )
+      error(EXIT_FAILURE, 0, "%s: the type of all columns in 'coord2' "
+            "should be 'double', but at least one of them is '%s'",
+            __func__, gal_type_name(tmp->type, 1));
+  for(tmp=p->A_kdtree; tmp!=NULL; tmp=tmp->next)
+    if( tmp->type!=GAL_TYPE_UINT32 )
+      error(EXIT_FAILURE, 0, "%s: the type of both columns in "
+            "'coord1_kdtree' should be 'uint32', but it is '%s'",
+            __func__, gal_type_name(tmp->type, 1));
+
+  /* Allocate and initialize the 'bina' array (an array of lists). Let's
+     call the first catalog 'a' and the second 'b'. This array has
+     'a->size' elements (pointers) and for each, it keeps a list of 'b'
+     elements that are nearest to it. */
+  errno=0;
+  p->bina=calloc(p->A->size, sizeof *p->bina);
+  if(p->bina==NULL)
+    error(EXIT_FAILURE, errno, "%s: %zu bytes for 'bina'",
+          __func__, p->A->size*sizeof *p->bina);
+
+  /* Pointers to the input column arrays for easy parsing later. */
+  p->a[0]=p->A->array;
+  p->b[0]=p->B->array;
+  if( p->A->next )
+    {
+      p->a[1]=p->A->next->array;
+      if( p->A->next->next ) p->a[2]=p->A->next->next->array;
+    }
+  if( p->B->next )
+    {
+      p->b[1]=p->B->next->array;
+      if( p->B->next->next ) p->b[2]=p->B->next->next->array;
+    }
+}
+
+
+
+
+
 /* Main k-d tree matching function. */
 static void *
 match_kdtree_worker(void *in_prm)
@@ -1016,42 +1066,48 @@ match_kdtree_worker(void *in_prm)
 
   /* High level definitions. */
   gal_data_t *ccol;
-  size_t i, j, index, mindex;
+  double r, delta[3];
+  size_t i, j, ai, bi;
   double *point=NULL, least_dist;
 
   /* Allqocate space for all the matching points (based on the number of
      dimensions). */
-  point=gal_pointer_allocate(GAL_TYPE_FLOAT64, p->ndim, 0, __func__, "point");
+  point=gal_pointer_allocate(GAL_TYPE_FLOAT64, p->ndim, 0, __func__,
+                             "point");
 
-  /* Go over all the actions (rows in second catalog) that were assigned to
-     this thread. */
+  /* Go over all the rows in the second catalog that were assigned to this
+     thread. */
   for(i=0; tprm->indexs[i] != GAL_BLANK_SIZE_T; ++i)
     {
-      /* Fill the 'point' for this thread. */
+      /* Fill the 'point' for this thread (note that this is the index in
+         the second catalog, hence 'bi'). */
       j=0;
-      index = tprm->indexs[i];
-      for(ccol=p->coord2; ccol!=NULL; ccol=ccol->next)
-        point[ j++ ] = ((double *)(ccol->array))[index];
+      bi = tprm->indexs[i];
+      for(ccol=p->B; ccol!=NULL; ccol=ccol->next)
+        point[ j++ ] = ((double *)(ccol->array))[ bi ];
 
-      /* Find the index of the nearest neighbor to this item. */
-      mindex=gal_kdtree_nearest_neighbour(p->coord1, p->coord1_kdtree,
-                                          p->kdtree_root, point, &least_dist);
-
-      /* Store the distance between the matches. */
-      p->distance[index] = least_dist;
+      /* Find the index of the nearest neighbor in the first catalog to
+         this point in the second catalog. */
+      ai = gal_kdtree_nearest_neighbour(p->A, p->A_kdtree,
+                                        p->kdtree_root, point,
+                                        &least_dist);
 
       /* Make sure the matched point is within the given aperture (which
          may be elliptical). If it isn't, then the reported index for this
          element will be 'GAL_BLANK_SIZE_T'. */
-      p->c2match[index] = least_dist<p->aperture[0] ? mindex : 
GAL_BLANK_SIZE_T;
-
-      /* Keep count of the number of matches in the 1st coordinate.
-         Some points in the first catalog can match to multiple points in
-         the second catalog for a given aperture value. To avoid this multiple
-         matching, we keep a track of such points in the first catalog.
-         We use 'multiple_matches' to keep track for the same. */
-      if(p->c2match[index] != GAL_BLANK_SIZE_T)
-        p->multiple_matches[p->c2match[index]]++;
+      for(j=0;j<p->ndim;++j)
+        delta[j]=p->b[j][bi] - p->a[j][ai];
+      r=match_distance(delta, p->iscircle, p->ndim, p->aperture,
+                       p->c, p->s);
+
+      /* If the radial distance is smaller than the radial measure, then
+         add this item to a match with 'ai'. */
+      if(r<p->aperture[0])
+        match_add_to_sfll(&p->bina[ai], bi, r);
+
+      /* For a check:
+      printf("first[%zu] matched with second[%zu]\n", ai, bi);
+      */
     }
 
   /* Clean up. */
@@ -1066,100 +1122,21 @@ match_kdtree_worker(void *in_prm)
 
 
 
-gal_data_t *
-gal_match_kdtree_output(gal_data_t *A, gal_data_t *B, size_t *bina,
-                        double* distance, size_t *multiple_matches,
-                        size_t minmapsize, int quietmmap)
+static void
+match_kdtree_second_in_first(struct match_kdtree_params *p,
+                             size_t numthreads, size_t minmapsize,
+                             int quietmmap)
 {
-  double *rval;
-  gal_data_t *out;
-  uint8_t *Amatched;
-  size_t ai=0, bi=0, nummatched=0;
-  size_t *aind, *bind, match_i, nomatch_i;
-
-  /* Find how many matches there were in total. */
-  for(ai=0;ai<A->size;++ai) if(multiple_matches[ai]) ++nummatched;
-
-  /* If there aren't any matches, return NULL. */
-  if(nummatched==0) return NULL;
-
-
-  /* Allocate the output list. */
-  out=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &A->size, NULL, 0,
-                     minmapsize, quietmmap, "CAT1_ROW", "counter",
-                     "Row index in first catalog (counting from 0).");
-  out->next=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &B->size, NULL, 0,
-                           minmapsize, quietmmap, "CAT2_ROW", "counter",
-                           "Row index in second catalog (counting "
-                           "from 0).");
-  out->next->next=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &nummatched,
-                                 NULL, 0, minmapsize, quietmmap,
-                                 "MATCH_DIST", NULL,
-                                 "Distance between the match.");
-
-
-  /* Allocate the 'Amatched' array which is a flag for which rows of the
-     first catalog were matched. The columns that had a match will get a
-     value of one while we are parsing them below. */
-  Amatched=gal_pointer_allocate(GAL_TYPE_UINT8, A->size, 1, __func__,
-                                "Amatched");
-
-
-  /* Initialize the indexs. We want the first 'nummatched' indexs in both
-     outputs to be the matching rows. The non-matched rows should start to
-     be indexed after the matched ones. So the first non-matched index is
-     at the index 'nummatched'. */
-  match_i   = 0;
-  nomatch_i = nummatched;
-
-
-  /* Fill in the output arrays. */
-  aind = out->array;
-  bind = out->next->array;
-  rval = out->next->next->array;
-  for(bi=0;bi<B->size;++bi)
-    {
-      /* A match was found. */
-      if(bina[bi] != GAL_BLANK_SIZE_T && multiple_matches[bina[bi]])
-        {
-          multiple_matches[bina[bi]] = 0;
-          /* Note that the permutation keeps the original indexs. */
-          rval[ match_i   ] = distance[bi];
-          aind[ match_i   ] = bina[bi];
-          bind[ match_i++ ] = bi;
-
-          /* Set a '1' for this object in the first catalog. This will
-             later be used to find which rows didn't match to fill in the
-             output. */
-          Amatched[ bina[bi] ] = 1;
-        }
-
-      /* No match found. At this stage, we can only fill the indexs of the
-         second input. The first input needs to be matched afterwards.*/
-      else bind[ nomatch_i++ ] = bi;
-    }
-
-
-  /* Complete the first input's permutation. */
-  nomatch_i=nummatched;
-  for(ai=0;ai<A->size;++ai)
-    if( Amatched[ai] == 0 )
-      aind[ nomatch_i++ ] = ai;
-
+  double dist[3]; /* Just a place-holder in 'aperture_prepare'. */
 
-  /* For a check
-  printf("\nNumber of matches: %zu\n", nummatched);
-  printf("\nFirst input's permutation:\n");
-  for(ai=0;ai<A->size;++ai)
-    printf("%s%zu\n", ai<nummatched?"  ":"* ", aind[ai]+1);
-  printf("\nSecond input's permutation:\n");
-  for(bi=0;bi<B->size;++bi)
-    printf("%s%zu\n", bi<nummatched?"  ":"* ", bind[bi]+1);
-  exit(0);
-  */
+  /* Prepare the aperture-related checks. */
+  match_aperture_prepare(p->A, p->B, p->aperture,
+                         p->ndim, p->a, p->b, dist, p->c,
+                         p->s, &p->iscircle);
 
-  /* Return the output. */
-  return out;
+  /* Distribute the jobs in multiple threads. */
+  gal_threads_spin_off(match_kdtree_worker, p, p->B->size,
+                       numthreads, minmapsize, quietmmap);
 }
 
 
@@ -1170,86 +1147,35 @@ gal_data_t *
 gal_match_kdtree(gal_data_t *coord1, gal_data_t *coord2,
                  gal_data_t *coord1_kdtree, size_t kdtree_root,
                  double *aperture, size_t numthreads, size_t minmapsize,
-                 size_t *nummatched, int inplace, int quietmmap)
+                 int quietmmap, size_t *nummatched)
 {
-  gal_data_t *out;
+  gal_data_t *out=NULL;
   struct match_kdtree_params p;
-  gal_data_t *c2match, *distance, *multiple_matches;
-  double dist[3]; /* Just a place-holder in 'sif_prepare'. */
-
 
-  /* Basic sanity checks. */
-  p.ndim=gal_list_data_number(coord1);
-  if( (   p.ndim != gal_list_data_number(coord2))
-      || (p.ndim != gal_list_data_number(coord1_kdtree)) )
-    error(EXIT_FAILURE, 0, "%s: the 'coord1', 'coord2' and 'coord1_kdtree' "
-          "arguments should have the same number of nodes/columns (elements "
-          "in a simply linked list). But they each respectively have %zu, "
-          "%zu and %zu nodes/columns", __func__, p.ndim,
-          gal_list_data_number(coord2), gal_list_data_number(coord1_kdtree));
-  if( coord1->type!=GAL_TYPE_FLOAT64 )
-    error(EXIT_FAILURE, 0, "%s: the type of 'coord1' should be 'double', "
-          "but it is '%s'", __func__, gal_type_name(coord1->type, 1));
-  if( coord2->type!=GAL_TYPE_FLOAT64 )
-    error(EXIT_FAILURE, 0, "%s: the type of 'coord2' should be 'double', "
-          "but it is '%s'", __func__, gal_type_name(coord2->type, 1));
-  if( coord1_kdtree->type!=GAL_TYPE_UINT32 )
-    error(EXIT_FAILURE, 0, "%s: the type of 'coord1_kdtree' should be "
-          "'uint32', but it is '%s'", __func__,
-          gal_type_name(coord1_kdtree->type, 1));
-
-
-  /* Do the preparations. */
-  match_coordinates_sif_prepare(coord1, coord2, aperture, p.ndim,
-                                p.a, p.b, dist, p.c, p.s, &p.iscircle);
-
-
-  c2match=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &coord2->size, NULL, 0,
-                         minmapsize, quietmmap, NULL, NULL, NULL);
-  distance=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &coord2->size, NULL, 0,
-                          minmapsize, quietmmap, NULL, NULL, NULL);
-  multiple_matches=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &coord1->size,
-                                  NULL, 0, minmapsize, quietmmap, NULL,
-                                  NULL, NULL);
-
-
-
-  /* Set the threading parameters and spin-off the threads. */
-  p.coord1=coord1;
-  p.coord2=coord2;
+  /* Write the parameters into the structure. */
+  p.A=coord1;
+  p.B=coord2;
   p.aperture=aperture;
-  p.c2match=c2match->array;
+  p.A_kdtree=coord1_kdtree;
   p.kdtree_root=kdtree_root;
-  p.distance=distance->array;
-  p.coord1_kdtree=coord1_kdtree;
-  p.multiple_matches=multiple_matches->array;
-  gal_threads_spin_off(match_kdtree_worker, &p, coord2->size, numthreads,
-                       minmapsize, quietmmap);
 
+  /* Basic sanity checks. */
+  match_kdtree_sanity_check(&p);
 
-  /* The match is done, write the output. */
-  out=gal_match_kdtree_output(coord1, coord2, p.c2match, p.distance,
-                              p.multiple_matches, minmapsize, quietmmap);
-
-  /* For a check:
-  double *c1x=coord1->array, *c1y=coord1->next->array;
-  double *c2x=coord2->array, *c2y=coord2->next->array;
-  for(size_t i=0; i<coord2->size; ++i)
-    printf("%s: cat2's (%g, %g) matches with (%g, %g) of cat1\n"
-           "with distance %g c2match %zu\n",
-            __func__, c2x[i], c2y[i],
-            p.c2match[i]==GAL_BLANK_SIZE_T ? NAN : c1x[p.c2match[i]],
-            p.c2match[i]==GAL_BLANK_SIZE_T ? NAN : c1y[p.c2match[i]],
-            p.distance[i], p.c2match[i]);
-  */
+  /* Find all of the items in the second catalog items in the first. */
+  match_kdtree_second_in_first(&p, numthreads, minmapsize, quietmmap);
 
-  /* Clean up. */
-  gal_data_free(multiple_matches);
-  gal_data_free(distance);
-  gal_data_free(c2match);
+  /* Find the best match for each item (from possibly multiple matches). */
+  match_rearrange(p.A, p.B, p.bina);
 
+  /* The match is done, write the output. */
+  out=gal_match_output(p.A, p.B, NULL, NULL, p.bina, minmapsize,
+                       quietmmap);
 
   /* Set 'nummatched' and return output. */
   *nummatched = out ?  out->next->next->size : 0;
+
+  /* Clean up and return. */
+  free(p.bina);
   return out;
 }



reply via email to

[Prev in Thread] Current Thread [Next in Thread]