gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 3365cd5: MakeCatalog: better implementation of


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 3365cd5: MakeCatalog: better implementation of ignoring integers not in input
Date: Tue, 5 Nov 2019 15:37:51 -0500 (EST)

branch: master
commit 3365cd5256cd41c02e009b9256495f96422c2999
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    MakeCatalog: better implementation of ignoring integers not in input
    
    2 commits ago (0ebad2b4ef), this new feature was added to ignore creating a
    final catalog row for integers that don't actually have a pixel in the
    input. But that implementation was not very efficient: it involved defining
    a 1-pixel sized tile to parse over the non-existant integers and also
    allocating large space in memory for the extra output rows for them during
    the processing (they were only removed, to not be printed in the output, in
    the final step).
    
    With this commit, a much better implementation is done where tiles (and
    output rows) are only generated for labels that actually exist in the input
    and no extra rows are created in the middle of the processing. This will
    results in much faster processing and less memory consumption.
---
 NEWS                      |   1 +
 bin/mkcatalog/columns.c   |  52 ++++++----
 bin/mkcatalog/main.h      |   2 +-
 bin/mkcatalog/mkcatalog.c |  50 +--------
 bin/mkcatalog/ui.c        | 254 ++++++++++++++++++++++++++--------------------
 doc/gnuastro.texi         |  33 +++---
 lib/blank.c               |  19 ++++
 lib/gnuastro/blank.h      |   3 +-
 lib/statistics.c          |   9 +-
 9 files changed, 229 insertions(+), 194 deletions(-)

diff --git a/NEWS b/NEWS
index 9224f06..781fee0 100644
--- a/NEWS
+++ b/NEWS
@@ -113,6 +113,7 @@ See the end of the file for license conditions.
 
   Library:
    - gal_binary_connected_indexs: store indexs of connected components.
+   - gal_blank_remove_realloc: Remove blanks and shrink allocated space.
    - gal_box_bound_ellipsoid_extent: Extent of 3D ellipsoid.
    - gal_box_bound_ellipsoid: Bounding box for a 3D ellipsoid.
    - gal_statistics_unique: Return unique (non-blank) elements of the input.
diff --git a/bin/mkcatalog/columns.c b/bin/mkcatalog/columns.c
index 6fac3a7..9bb10e3 100644
--- a/bin/mkcatalog/columns.c
+++ b/bin/mkcatalog/columns.c
@@ -669,6 +669,7 @@ columns_define_alloc(struct mkcatalogparams *p)
           disp_width     = 10;
           disp_precision = 0;
           ciflag[ CCOL_MINX ] = 1;
+          oiflag[ OCOL_NUMALL ] = ciflag[ CCOL_NUMALL ] = 1;
           break;
 
         case UI_KEY_MAXX:
@@ -682,6 +683,7 @@ columns_define_alloc(struct mkcatalogparams *p)
           disp_width     = 10;
           disp_precision = 0;
           ciflag[ CCOL_MAXX ] = 1;
+          oiflag[ OCOL_NUMALL ] = ciflag[ CCOL_NUMALL ] = 1;
           break;
 
         case UI_KEY_MINY:
@@ -695,6 +697,7 @@ columns_define_alloc(struct mkcatalogparams *p)
           disp_width     = 10;
           disp_precision = 0;
           ciflag[ CCOL_MINY ] = 1;
+          oiflag[ OCOL_NUMALL ] = ciflag[ CCOL_NUMALL ] = 1;
           break;
 
         case UI_KEY_MAXY:
@@ -708,6 +711,7 @@ columns_define_alloc(struct mkcatalogparams *p)
           disp_width     = 10;
           disp_precision = 0;
           ciflag[ CCOL_MAXY ] = 1;
+          oiflag[ OCOL_NUMALL ] = ciflag[ CCOL_NUMALL ] = 1;
           break;
 
         case UI_KEY_MINZ:
@@ -721,6 +725,7 @@ columns_define_alloc(struct mkcatalogparams *p)
           disp_width     = 10;
           disp_precision = 0;
           ciflag[ CCOL_MINZ ] = 1;
+          oiflag[ OCOL_NUMALL ] = ciflag[ CCOL_NUMALL ] = 1;
           break;
 
         case UI_KEY_MAXZ:
@@ -734,6 +739,7 @@ columns_define_alloc(struct mkcatalogparams *p)
           disp_width     = 10;
           disp_precision = 0;
           ciflag[ CCOL_MAXZ ] = 1;
+          oiflag[ OCOL_NUMALL ] = ciflag[ CCOL_NUMALL ] = 1;
           break;
 
         case UI_KEY_W1:
@@ -1721,7 +1727,7 @@ columns_clump_brightness(double *ci)
 
 /* Measure the minimum and maximum positions. */
 static uint32_t
-columns_xy_extrema(struct mkcatalog_passparams *pp, size_t *coord, int key)
+columns_xy_extrema(struct mkcatalog_passparams *pp, double *oi, size_t *coord, 
int key)
 {
   size_t ndim=pp->tile->ndim;
   gal_data_t *tile=pp->tile, *block=tile->block;
@@ -1738,19 +1744,23 @@ columns_xy_extrema(struct mkcatalog_passparams *pp, 
size_t *coord, int key)
 
   /* Return the proper value: note that `coord' is in C standard: starting
      from the slowest dimension and counting from zero. */
-  switch(key)
-    {
-    case UI_KEY_MINX: return coord[ndim-1] + 1;                   break;
-    case UI_KEY_MAXX: return coord[ndim-1] + tile->dsize[ndim-1]; break;
-    case UI_KEY_MINY: return coord[ndim-2] + 1;                   break;
-    case UI_KEY_MAXY: return coord[ndim-2] + tile->dsize[ndim-2]; break;
-    case UI_KEY_MINZ: return coord[ndim-3] + 1;                   break;
-    case UI_KEY_MAXZ: return coord[ndim-3] + tile->dsize[ndim-3]; break;
-    default:
-      error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix the "
-            "problem. The value %d is not a recognized value", __func__,
-            PACKAGE_BUGREPORT, key);
-    }
+  if(oi[OCOL_NUMALL])
+    switch(key)
+      {
+      case UI_KEY_MINX: return coord[ndim-1] + 1;                   break;
+      case UI_KEY_MAXX: return coord[ndim-1] + tile->dsize[ndim-1]; break;
+      case UI_KEY_MINY: return coord[ndim-2] + 1;                   break;
+      case UI_KEY_MAXY: return coord[ndim-2] + tile->dsize[ndim-2]; break;
+      case UI_KEY_MINZ: return coord[ndim-3] + 1;                   break;
+      case UI_KEY_MAXZ: return coord[ndim-3] + tile->dsize[ndim-3]; break;
+      default:
+        error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix the "
+              "problem. The value %d is not a recognized value", __func__,
+              PACKAGE_BUGREPORT, key);
+      }
+  else
+    return 0;
+
 
   /* Control should not reach here. */
   error(EXIT_FAILURE, 0, "%s: a bug! please contact us at %s to fix the "
@@ -1819,10 +1829,18 @@ columns_fill(struct mkcatalog_passparams *pp)
   double *ci, *oi=pp->oi;
   size_t coord[3]={GAL_BLANK_SIZE_T, GAL_BLANK_SIZE_T, GAL_BLANK_SIZE_T};
 
-  size_t sr=pp->clumpstartindex, cind, coind;
-  size_t oind=pp->object-1; /* IDs start from 1, indexs from 0. */
+  size_t i, sr=pp->clumpstartindex, oind, cind, coind;
   double **vo=NULL, **vc=NULL, **go=NULL, **gc=NULL, **vcc=NULL, **gcc=NULL;
 
+  /* Find the object's index in final catalog. */
+  if(p->outlabs)
+    {
+      for(i=0;i<p->numobjects;++i)
+        if(p->outlabs[i]==pp->object)
+          { oind=i; break; }
+    }
+  else oind=pp->object-1;
+
   /* If a WCS column is requested (check will be done inside the function),
      then set the pointers. */
   columns_set_wcs_pointers(p, &vo, &vc, &go, &gc, &vcc, &gcc);
@@ -1936,7 +1954,7 @@ columns_fill(struct mkcatalog_passparams *pp)
         case UI_KEY_MAXY:
         case UI_KEY_MINZ:
         case UI_KEY_MAXZ:
-          ((uint32_t *)colarr)[oind]=columns_xy_extrema(pp, coord, key);
+          ((uint32_t *)colarr)[oind]=columns_xy_extrema(pp, oi, coord, key);
           break;
 
         case UI_KEY_W1:
diff --git a/bin/mkcatalog/main.h b/bin/mkcatalog/main.h
index 2721c25..0aa3f72 100644
--- a/bin/mkcatalog/main.h
+++ b/bin/mkcatalog/main.h
@@ -206,6 +206,7 @@ struct mkcatalogparams
   gal_data_t          *upmask;  /* Upper limit magnitude mask.          */
   float                medstd;  /* Median standard deviation value.     */
   float               cpscorr;  /* Counts-per-second correction.        */
+  int32_t            *outlabs;  /* Labels in output catalog (when necessary) */
   size_t           numobjects;  /* Number of object labels in image.    */
   float               clumpsn;  /* Clump S/N threshold.                 */
   size_t            numclumps;  /* Number of clumps in image.           */
@@ -229,7 +230,6 @@ struct mkcatalogparams
   size_t         *numclumps_c;  /* To sort the clumps table by Obj.ID.  */
   gal_data_t   *specsliceinfo;  /* Slice information for spectra.       */
   gal_data_t         *spectra;  /* Array of datasets containing spectra.*/
-  gal_data_t      *rowsremove;  /* For rows/labels with no pixel.       */
 
   char        *usedvaluesfile;  /* Ptr to final name used for values.   */
   char        *usedclumpsfile;  /* Ptr to final name used for clumps.   */
diff --git a/bin/mkcatalog/mkcatalog.c b/bin/mkcatalog/mkcatalog.c
index dfd24dc..af7a6c2 100644
--- a/bin/mkcatalog/mkcatalog.c
+++ b/bin/mkcatalog/mkcatalog.c
@@ -149,7 +149,9 @@ mkcatalog_single_object(void *in_prm)
       /* For easy reading. Note that the object IDs start from one while
          the array positions start from 0. */
       pp.ci       = NULL;
-      pp.object   = tprm->indexs[i] + 1;
+      pp.object   = ( p->outlabs
+                      ? p->outlabs[tprm->indexs[i]]
+                      : tprm->indexs[i] + 1 );
       pp.tile     = &p->tiles[   tprm->indexs[i] ];
       pp.spectrum = &p->spectra[ tprm->indexs[i] ];
 
@@ -610,47 +612,6 @@ sort_clumps_by_objid(struct mkcatalogparams *p)
 
 
 
-static void
-mkcatalog_remove_labels_with_no_pixel(struct mkcatalogparams *p)
-{
-  size_t i;
-  gal_data_t *tmp, *perm;
-  uint8_t *rarray=p->rowsremove->array;
-  size_t g=0, b=0, numgood=0, *permutation;
-
-  /* Count how many good rows we have. */
-  for(i=0;i<p->numobjects;++i) if(rarray[i]==0) ++numgood;
-
-  /* Define the permutation to keep only the rows that must be shown. */
-  perm=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &p->numobjects, NULL, 1,
-                      p->cp.minmapsize, p->cp.quietmmap, NULL, NULL, NULL);
-  permutation=perm->array;
-  for(i=0;i<p->numobjects;++i)
-    permutation[i] = rarray[i] ? numgood+b++ : g++;
-
-  /* For a check.
-  for(i=0;i<p->numobjects;++i)
-    printf("%zu: %u (%zu)\n", i+1, rarray[i], permutation[i]);
-  */
-
-  /* Apply the permutation. */
-  for(tmp=p->objectcols;tmp!=NULL;tmp=tmp->next)
-    {
-      /* Apply the permutation. */
-      gal_permutation_apply_inverse(tmp, permutation);
-
-      /* Correct the size. */
-      tmp->size=tmp->dsize[0]=numgood;
-    }
-
-  /* Clean up and return. */
-  gal_data_free(perm);
-}
-
-
-
-
-
 /* Write the produced columns into the output */
 static void
 mkcatalog_write_outputs(struct mkcatalogparams *p)
@@ -660,11 +621,6 @@ mkcatalog_write_outputs(struct mkcatalogparams *p)
   gal_list_str_t *comments;
   int outisfits=gal_fits_name_is_fits(p->objectsout);
 
-  /* If there are object labels without a pixel, then remove them from the
-     objects catalog. */
-  if(p->rowsremove)
-    mkcatalog_remove_labels_with_no_pixel(p);
-
   /* If a catalog is to be generated. */
   if(p->objectcols)
     {
diff --git a/bin/mkcatalog/ui.c b/bin/mkcatalog/ui.c
index 780434c..ebd1743 100644
--- a/bin/mkcatalog/ui.c
+++ b/bin/mkcatalog/ui.c
@@ -612,6 +612,144 @@ ui_num_clumps(struct mkcatalogparams *p)
 
 
 
+/* To make the catalog processing more scalable (and later allow for
+   over-lappping regions), we will define a tile for each object. */
+static void
+ui_one_tile_per_object_correct_numobjects(struct mkcatalogparams *p)
+{
+  size_t ndim=p->objects->ndim;
+
+  uint8_t *rarray=NULL;
+  int32_t *l, *lf, *start;
+  gal_data_t *rowsremove=NULL;
+  size_t i, j, d, no, *min, *max, exists, width=2*ndim;
+  size_t *minmax=gal_pointer_allocate(GAL_TYPE_SIZE_T,
+                                      width*p->numobjects, 0, __func__,
+                                      "minmax");
+  size_t *coord=gal_pointer_allocate(GAL_TYPE_SIZE_T, ndim, 0, __func__,
+                                      "coord");
+
+  /* Initialize the minimum and maximum position for each tile/object. So,
+     we'll initialize the minimum coordinates to the maximum possible
+     `size_t' value (in `GAL_BLANK_SIZE_T') and the maximums to zero. */
+  for(i=0;i<p->numobjects;++i)
+    for(d=0;d<ndim;++d)
+      {
+        minmax[ i * width +        d ] = GAL_BLANK_SIZE_T; /* Minimum. */
+        minmax[ i * width + ndim + d ] = 0;                /* Maximum. */
+      }
+
+  /* Go over the objects label image and correct the minimum and maximum
+     coordinates. */
+  start=p->objects->array;
+  lf=(l=p->objects->array)+p->objects->size;
+  do
+    if(*l>0)
+      {
+        /* Get the coordinates of this pixel. */
+        gal_dimension_index_to_coord(l-start, ndim, p->objects->dsize,
+                                     coord);
+
+        /* Check to see this coordinate is the smallest/largest found so
+           far for this label. Note that labels start from 1, while indexs
+           here start from zero. */
+        min = &minmax[ (*l-1) * width        ];
+        max = &minmax[ (*l-1) * width + ndim ];
+        for(d=0;d<ndim;++d)
+          {
+            if( coord[d] < min[d] ) min[d] = coord[d];
+            if( coord[d] > max[d] ) max[d] = coord[d];
+          }
+      }
+  while(++l<lf);
+
+  /* If a label doesn't exist in the image, then write over it and define
+     the unique labels to use for the next steps. To over-write, we have
+     two counters: `i' (for the position in the input array) and `no' (or
+     `num-objects' for the counter in the output table). In the end, `no'
+     counts the total number of unique labels in the input. */
+  no=0;
+  for(i=0;i<p->numobjects;++i)
+    {
+      /* Make sure a pixel with this label exists in all dimensions. */
+      exists=0;
+      for(d=0;d<ndim;++d)
+        if ( minmax[ i * width + d ] == GAL_BLANK_SIZE_T
+             && minmax[ i * width + ndim + d ] == 0 )
+          {
+            /* When the object doesn't exist, but the user wants a row
+               anyway, make all the minimums and maximums of all
+               coordinates 0, note that the maximum is already zero. */
+            if(p->inbetweenints)
+              minmax[ i * width + d ] = 0;
+          }
+        else
+          {
+            /* Write over the blank elements when necessary
+               (i!=j). When i==j, then these statements are
+               redundant. */
+            minmax[no*width+d]=minmax[i*width+d];
+            minmax[no*width+ndim+d]=minmax[i*width+ndim+d];
+
+            /* Set the checked flag. */
+            exists=1;
+          }
+
+      /* If it does (or if the user wants to keep all integers), then
+         increment the output counter.*/
+      if(p->inbetweenints || exists) ++no;
+      else
+        {
+          /* If `rarray' isn't defined yet, define it. */
+          if(rarray==NULL)
+            {
+              /* Note that by initializing with zeros, all (the possibly
+                 existing) previous rows that shouldn't be removed are
+                 flagged as zero in this array. */
+              rowsremove=gal_data_alloc(NULL, GAL_TYPE_UINT8, 1,
+                                        &p->numobjects, NULL, 1,
+                                        p->cp.minmapsize, p->cp.quietmmap,
+                                        NULL, NULL, NULL);
+              rarray=rowsremove->array;
+            }
+          rarray[i]=1;
+        }
+    }
+
+  /* If `rarray!=NULL', then there are elements to remove and we need to
+     make some modifications/corrections. */
+  if(rarray)
+    {
+      /* Build an array to keep the real ID of each tile. */
+      j=0;
+      p->outlabs=gal_pointer_allocate(GAL_TYPE_INT32, no, 0, __func__,
+                                      "p->outlabs");
+      for(i=0;i<p->numobjects;++i) if(rarray[i]==0) p->outlabs[j++]=i+1;
+
+      /* Correct numobjects and clean up. */
+      p->numobjects=no;
+      gal_data_free(rowsremove);
+    }
+
+  /* For a check.
+  for(i=0;i<p->numobjects;++i)
+    printf("%zu: (%zu, %zu) --> (%zu, %zu)\n",
+           p->outlabs ? p->outlabs[i] : i+1, minmax[i*width],
+           minmax[i*width+1], minmax[i*width+2], minmax[i*width+3]);
+  */
+
+  /* Make the tiles. */
+  p->tiles=gal_tile_series_from_minmax(p->objects, minmax, p->numobjects);
+
+  /* Clean up. */
+  free(coord);
+  free(minmax);
+}
+
+
+
+
+
 /* The only mandatory input is the objects image, so first read that and
    make sure its type is correct. */
 static void
@@ -675,6 +813,11 @@ ui_read_labels(struct mkcatalogparams *p)
   ui_wcs_info(p);
 
 
+  /* Make the tiles that cover each object and also correct the total
+     number of objects based on the parsing of the image. */
+  ui_one_tile_per_object_correct_numobjects(p);
+
+
   /* Read the clumps array if necessary. */
   if(p->clumpscat)
     {
@@ -747,7 +890,6 @@ ui_read_labels(struct mkcatalogparams *p)
         }
     }
 
-
   /* Clean up. */
   keys[0].name=keys[1].name=NULL;
   keys[0].array=keys[1].array=NULL;
@@ -1347,109 +1489,6 @@ ui_preparations_outnames(struct mkcatalogparams *p)
 
 
 
-/* To make the catalog processing more scalable (and later allow for
-   over-lappping regions), we will define a tile for each object. */
-void
-ui_one_tile_per_object(struct mkcatalogparams *p)
-{
-  size_t ndim=p->objects->ndim;
-
-  uint8_t *rarray=NULL;
-  int32_t *l, *lf, *start;
-  size_t i, d, *min, *max, width=2*ndim;
-  size_t *minmax=gal_pointer_allocate(GAL_TYPE_SIZE_T,
-                                      width*p->numobjects, 0, __func__,
-                                      "minmax");
-  size_t *coord=gal_pointer_allocate(GAL_TYPE_SIZE_T, ndim, 0, __func__,
-                                      "coord");
-
-
-  /* Initialize the minimum and maximum position for each tile/object. So,
-     we'll initialize the minimum coordinates to the maximum possible
-     `size_t' value (in `GAL_BLANK_SIZE_T') and the maximums to zero. */
-  for(i=0;i<p->numobjects;++i)
-    for(d=0;d<ndim;++d)
-      {
-        minmax[ i * width +        d ] = GAL_BLANK_SIZE_T; /* Minimum. */
-        minmax[ i * width + ndim + d ] = 0;                /* Maximum. */
-      }
-
-  /* Go over the objects label image and correct the minimum and maximum
-     coordinates. */
-  start=p->objects->array;
-  lf=(l=p->objects->array)+p->objects->size;
-  do
-    if(*l>0)
-      {
-        /* Get the coordinates of this pixel. */
-        gal_dimension_index_to_coord(l-start, ndim, p->objects->dsize,
-                                     coord);
-
-        /* Check to see this coordinate is the smallest/largest found so
-           far for this label. Note that labels start from 1, while indexs
-           here start from zero. */
-        min = &minmax[ (*l-1) * width        ];
-        max = &minmax[ (*l-1) * width + ndim ];
-        for(d=0;d<ndim;++d)
-          {
-            if( coord[d] < min[d] ) min[d] = coord[d];
-            if( coord[d] > max[d] ) max[d] = coord[d];
-          }
-      }
-  while(++l<lf);
-
-  /* If a label doesn't exist in the image, then set the minimum and
-     maximum values to zero. */
-  for(i=0;i<p->numobjects;++i)
-    for(d=0;d<ndim;++d)
-      {
-        if( minmax[ i * width + d ] == GAL_BLANK_SIZE_T
-            && minmax[ i * width + ndim + d ] == 0 )
-          {
-            /* Set the first and last pixel to 0. */
-            minmax[ i * width + d ] = minmax[ i * width + ndim + d ] = 0;
-
-            /* Flag this label as one that must be removed. The ones that
-               must be removed get a value of `1' in `p->rowsremove'. */
-            if(p->inbetweenints==0)
-              {
-                if(p->rowsremove)
-                  rarray[i]=1;
-                else
-                  {
-                    /* Note that by initializing with zeros, all (the
-                       possibly existing) previous rows that shouldn't be
-                       removed are flagged as zero in this array. */
-                    p->rowsremove=gal_data_alloc(NULL, GAL_TYPE_UINT8, 1,
-                                                 &p->numobjects, NULL, 1,
-                                                 p->cp.minmapsize,
-                                                 p->cp.quietmmap,
-                                                 NULL, NULL, NULL);
-                    rarray=p->rowsremove->array;
-                    rarray[i]=1;
-                  }
-              }
-          }
-      }
-
-  /* For a check.
-  for(i=0;i<p->numobjects;++i)
-    printf("%zu: (%zu, %zu) --> (%zu, %zu)\n", i+1, minmax[i*width],
-           minmax[i*width+1], minmax[i*width+2], minmax[i*width+3]);
-  */
-
-  /* Make the tiles. */
-  p->tiles=gal_tile_series_from_minmax(p->objects, minmax, p->numobjects);
-
-  /* Clean up. */
-  free(coord);
-  free(minmax);
-}
-
-
-
-
-
 /* When a spectrum is requested, the slice information (slice number and
    slice WCS) is common to all different spectra. So instead of calculating
    it every time, we'll just make it once here, then copy it for every
@@ -1621,10 +1660,6 @@ ui_preparations(struct mkcatalogparams *p)
   ui_preparations_outnames(p);
 
 
-  /* Make the tiles that cover each object. */
-  ui_one_tile_per_object(p);
-
-
   /* If a spectrum is requested, generate the two WCS columns. */
   if(p->spectrum)
     {
@@ -1861,6 +1896,9 @@ ui_free_report(struct mkcatalogparams *p, struct timeval 
*t1)
   gal_data_free(p->upmask);
   gal_data_free(p->clumps);
   gal_data_free(p->objects);
+  if(p->outlabs) free(p->outlabs);
+  gal_list_data_free(p->clumpcols);
+  gal_list_data_free(p->objectcols);
   gal_list_data_free(p->specsliceinfo);
   if(p->upcheckout) free(p->upcheckout);
   gal_data_array_free(p->tiles, p->numobjects, 0);
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 48a03b6..3ff0818 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -19109,10 +19109,10 @@ If you want to re-check a dataset with the 
blank-value-check flag already
 set (for example if you have made changes to it), then explicitly set the
 @code{GAL_DATA_FLAG_BLANK_CH} bit to zero before calling this
 function. When there are no other flags, you can just set the flags to zero
-(@code{input->flags=0}), otherwise you can use this expression:
+(@code{input->flag=0}), otherwise you can use this expression:
 
 @example
-input->flags &= ~GAL_DATA_FLAG_BLANK_CH;
+input->flag &= ~GAL_DATA_FLAG_BLANK_CH;
 @end example
 @end deftypefun
 
@@ -19139,20 +19139,17 @@ same size as @code{input}.
 
 
 @deftypefun void gal_blank_remove (gal_data_t @code{*input})
-Remove blank elements from a dataset, convert it to a 1D dataset, adjust
-the size properly (the number of non-blank elements), and toggle the
-blank-value-related bit-flags. In practice this function doesn't
-@code{realloc} the input array, it just shifts the blank elements to the
-end and adjusts the size elements of the @code{gal_data_t}, see
-@ref{Generic data container}.
+Remove blank elements from a dataset, convert it to a 1D dataset, adjust the 
size properly (the number of non-blank elements), and toggle the 
blank-value-related bit-flags.
+In practice this function doesn't@code{realloc} the input array (see 
@code{gal_blank_remove_realloc} for shrinking/re-allocating also), it just 
shifts the blank elements to the end and adjusts the size elements of the 
@code{gal_data_t}, see @ref{Generic data container}.
 
-If all the elements were blank, then @code{input->size} will be zero. This
-is thus a good parameter to check after calling this function to see if
-there actually were any non-blank elements in the input or not and take the
-appropriate measure. This check is highly recommended because it will avoid
-strange bugs in later steps.
+If all the elements were blank, then @code{input->size} will be zero.
+This is thus a good parameter to check after calling this function to see if 
there actually were any non-blank elements in the input or not and take the 
appropriate measure.
+This check is highly recommended because it will avoid strange bugs in later 
steps.
 @end deftypefun
 
+@deftypefun void gal_blank_remove_realloc (gal_data_t @code{*input})
+Similar to @code{gal_blank_remove}, but also shrinks/re-allocates the 
dataset's allocated memory.
+@end deftypefun
 
 
 
@@ -23778,10 +23775,10 @@ If you want to re-check a dataset with the 
blank-value-check flag already
 set (for example if you have made changes to it), then explicitly set the
 @code{GAL_DATA_FLAG_SORT_CH} bit to zero before calling this function. When
 there are no other flags, you can simply set the flags to zero (with
-@code{input->flags=0}), otherwise you can use this expression:
+@code{input->flag=0}), otherwise you can use this expression:
 
 @example
-input->flags &= ~GAL_DATA_FLAG_SORT_CH;
+input->flag &= ~GAL_DATA_FLAG_SORT_CH;
 @end example
 @end deftypefun
 
@@ -23803,7 +23800,7 @@ allocate a new copy of the dataset and work on that. 
Therefore if
 @code{inplace==0}, the input dataset will be modified.
 
 This function uses the bit flags of the input, so if you have modified the
-dataset, set @code{input->flags=0} before calling this function. Also note
+dataset, set @code{input->flag=0} before calling this function. Also note
 that @code{inplace} is only for the dataset elements. Therefore even when
 @code{inplace==0}, if the input is already sorted @emph{and} has no blank
 values, then the flags will be updated to show this.
@@ -24368,8 +24365,8 @@ your software), to avoid an extra parsing of the 
dataset and improve
 performance, you can set the two bits manually (see the description of
 @code{flags} in @ref{Generic data container}):
 @example
-input->flags |=  GAL_DATA_FLAG_BLANK_CH; /* Set bit to 1. */
-input->flags &= ~GAL_DATA_FLAG_HASBLANK; /* Set bit to 0. */
+input->flag |=  GAL_DATA_FLAG_BLANK_CH; /* Set bit to 1. */
+input->flag &= ~GAL_DATA_FLAG_HASBLANK; /* Set bit to 0. */
 @end example
 @end deftypefun
 
diff --git a/lib/blank.c b/lib/blank.c
index 77e6969..1d71f7d 100644
--- a/lib/blank.c
+++ b/lib/blank.c
@@ -713,3 +713,22 @@ gal_blank_remove(gal_data_t *input)
   input->flag |= GAL_DATA_FLAG_BLANK_CH;
   input->flag &= ~GAL_DATA_FLAG_HASBLANK;
 }
+
+
+
+
+
+/* Similar to `gal_blank_remove', but also reallocates/frees the extra
+   space. */
+void
+gal_blank_remove_realloc(gal_data_t *input)
+{
+  /* Remove the blanks and fix the size of the dataset. */
+  gal_blank_remove(input);
+
+  /* Run realloc to shrink the allocated space. */
+  input->array=realloc(input->array,
+                       input->size*gal_type_sizeof(input->type));
+  if(input->array==NULL)
+    error(EXIT_FAILURE, 0, "%s: couldn't reallocate memory", __func__);
+}
diff --git a/lib/gnuastro/blank.h b/lib/gnuastro/blank.h
index 0c52522..d9fedb3 100644
--- a/lib/gnuastro/blank.h
+++ b/lib/gnuastro/blank.h
@@ -132,7 +132,8 @@ gal_blank_flag_apply(gal_data_t *input, gal_data_t *flag);
 void
 gal_blank_remove(gal_data_t *data);
 
-
+void
+gal_blank_remove_realloc(gal_data_t *input);
 
 
 __END_C_DECLS    /* From C++ preparations */
diff --git a/lib/statistics.c b/lib/statistics.c
index 47f22a9..d11e86e 100644
--- a/lib/statistics.c
+++ b/lib/statistics.c
@@ -562,7 +562,12 @@ gal_statistics_unique(gal_data_t *input, int inplace)
 {
   gal_data_t *out = inplace ? input : gal_data_copy(input);
 
-  /* Remove the duplicates based on size. */
+  /* Since we are replacing the repeated elements with blank, re-set the
+     blank flags. */
+  out->flag &= ~GAL_DATA_FLAG_BLANK_CH; /* Set bit to 0. */
+  out->flag &= ~GAL_DATA_FLAG_HASBLANK; /* Set bit to 0. */
+
+  /* Set all non-unique elements to blank. */
   switch(out->type)
     {
     case GAL_TYPE_UINT8:   UNIQUE_BYTYPE( uint8_t  ); break;
@@ -582,7 +587,7 @@ gal_statistics_unique(gal_data_t *input, int inplace)
 
   /* Remove all blank elements (note that `gal_blank_remove' also corrects
      the size of the dataset and sets it to 1D). */
-  gal_blank_remove(out);
+  gal_blank_remove_realloc(out);
   return out;
 }
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]