gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master df23eac: Table: new --equal and --notequal ope


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master df23eac: Table: new --equal and --notequal operators to select specific rows
Date: Thu, 19 Sep 2019 13:43:50 -0400 (EDT)

branch: master
commit df23eac1219402d7553181d1dde0d3f87e079ef0
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    Table: new --equal and --notequal operators to select specific rows
    
    Until now, if you wanted to select some special rows by equality of their
    value with something, you had to use the `--range' option and give a value
    just larger than it (but not large enough to include another
    row). Alternatively, there was no way to remove some rows by equality (for
    example to discard some rows of the output based on their IDs). You would
    have to write a relatively complext AWK statement for this job. Both are
    very inconvenient and prone to errors.
    
    With this commit, Table now has two new options `--equal' and
    `--notequal'. They can be used for the two tasks mentioned above: selecting
    rows based on equality with some given criteria. It is therefore very easy
    now to select/discard some objects based on their IDs for example.
    
    Since this is ultimately a "selection" of the rows (like `range'), the
    implementation of `--range' was also modified to become generic and eaisly
    be used for these two operators is well (only the final step changing), and
    in the future, easily add new options for other operations.
---
 NEWS              |   7 ++
 bin/table/args.h  |  50 ++++++++-
 bin/table/main.h  |  26 ++++-
 bin/table/table.c | 250 ++++++++++++++++++++++++++++++-----------
 bin/table/ui.c    | 331 +++++++++++++++++++++++++++++-------------------------
 bin/table/ui.h    |  15 ++-
 doc/gnuastro.texi |  25 ++++-
 lib/options.c     |  38 ++++---
 8 files changed, 500 insertions(+), 242 deletions(-)

diff --git a/NEWS b/NEWS
index 7f58e34..14c5897 100644
--- a/NEWS
+++ b/NEWS
@@ -16,6 +16,13 @@ See the end of the file for license conditions.
      the redshift given to CosmicCalculator. You can either use known line
      names, or directly give a number as any emitted line's wavelength.
 
+  Table:
+   --equal: Output only rows that have a value equal to the given value in
+     the given column. For example `--equal=ID,2,4,5' will select only rows
+     that have a value of 2, 4 and 5 in the `ID' column.
+   --notequal: Output only rows that have a different value compared to the
+     values given to this option in the given column.
+
 ** Removed features
 
 ** Changed features
diff --git a/bin/table/args.h b/bin/table/args.h
index 144616c..2f2506e 100644
--- a/bin/table/args.h
+++ b/bin/table/args.h
@@ -102,13 +102,24 @@ struct argp_option program_options[] =
       GAL_OPTIONS_NOT_MANDATORY,
       GAL_OPTIONS_NOT_SET
     },
+
+
+
+
+
+    /* Output Rows */
+    {
+      0, 0, 0, 0,
+      "Rows in output:",
+      UI_GROUP_OUTROWS
+    },
     {
       "range",
       UI_KEY_RANGE,
       "STR,FLT:FLT",
       0,
       "Column, and range to limit output.",
-      GAL_OPTIONS_GROUP_OUTPUT,
+      UI_GROUP_OUTROWS,
       &p->range,
       GAL_TYPE_STRING,
       GAL_OPTIONS_RANGE_ANY,
@@ -117,12 +128,40 @@ struct argp_option program_options[] =
       gal_options_parse_name_and_values
     },
     {
+      "equal",
+      UI_KEY_EQUAL,
+      "STR,FLT,FLT",
+      0,
+      "Column, values to keep in output.",
+      UI_GROUP_OUTROWS,
+      &p->equal,
+      GAL_TYPE_STRING,
+      GAL_OPTIONS_RANGE_ANY,
+      GAL_OPTIONS_NOT_MANDATORY,
+      GAL_OPTIONS_NOT_SET,
+      gal_options_parse_name_and_values
+    },
+    {
+      "notequal",
+      UI_KEY_NOTEQUAL,
+      "STR,FLT,FLT",
+      0,
+      "Column, values to remove from output.",
+      UI_GROUP_OUTROWS,
+      &p->notequal,
+      GAL_TYPE_STRING,
+      GAL_OPTIONS_RANGE_ANY,
+      GAL_OPTIONS_NOT_MANDATORY,
+      GAL_OPTIONS_NOT_SET,
+      gal_options_parse_name_and_values
+    },
+    {
       "sort",
       UI_KEY_SORT,
       "STR,INT",
       0,
       "Column name or number for sorting.",
-      GAL_OPTIONS_GROUP_OUTPUT,
+      UI_GROUP_OUTROWS,
       &p->sort,
       GAL_TYPE_STRING,
       GAL_OPTIONS_RANGE_ANY,
@@ -135,7 +174,7 @@ struct argp_option program_options[] =
       0,
       0,
       "Sort in descending order: largets first.",
-      GAL_OPTIONS_GROUP_OUTPUT,
+      UI_GROUP_OUTROWS,
       &p->descending,
       GAL_OPTIONS_NO_ARG_TYPE,
       GAL_OPTIONS_RANGE_0_OR_1,
@@ -148,7 +187,7 @@ struct argp_option program_options[] =
       "INT",
       0,
       "Only output given number of top rows.",
-      GAL_OPTIONS_GROUP_OUTPUT,
+      UI_GROUP_OUTROWS,
       &p->head,
       GAL_TYPE_SIZE_T,
       GAL_OPTIONS_RANGE_GE_0,
@@ -161,7 +200,7 @@ struct argp_option program_options[] =
       "INT",
       0,
       "Only output given number of bottom rows.",
-      GAL_OPTIONS_GROUP_OUTPUT,
+      UI_GROUP_OUTROWS,
       &p->tail,
       GAL_TYPE_SIZE_T,
       GAL_OPTIONS_RANGE_GE_0,
@@ -172,7 +211,6 @@ struct argp_option program_options[] =
 
 
 
-
     /* End. */
     {0}
   };
diff --git a/bin/table/main.h b/bin/table/main.h
index 44694f6..0a55512 100644
--- a/bin/table/main.h
+++ b/bin/table/main.h
@@ -33,14 +33,26 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 #define PROGRAM_EXEC   "asttable"      /* Program executable name. */
 #define PROGRAM_STRING PROGRAM_NAME" (" PACKAGE_NAME ") " PACKAGE_VERSION
 
+/* Row selection types. */
+enum select_types
+{
+ /* Different types of row-selection */
+ SELECT_TYPE_RANGE,             /* 0 by C standard */
+ SELECT_TYPE_EQUAL,
+ SELECT_TYPE_NOTEQUAL,
+
+ /* This marks the total number of row-selection criteria. */
+ SELECT_TYPE_NUMBER,
+};
 
 
 
 /* Basic structure. */
-struct list_range
+struct list_select
 {
-  gal_data_t           *v;
-  struct list_range *next;
+  gal_data_t          *col;
+  int                 type;
+  struct list_select *next;
 };
 
 struct arithmetic_token
@@ -77,6 +89,8 @@ struct tableparams
   uint8_t         information;  /* ==1: only print FITS information.    */
   uint8_t     colinfoinstdout;  /* ==1: print column metadata in CL.    */
   gal_data_t           *range;  /* Range to limit output.               */
+  gal_data_t           *equal;  /* Values to keep in output.            */
+  gal_data_t        *notequal;  /* Values to not include in output.     */
   char                  *sort;  /* Column name or number for sorting.   */
   uint8_t          descending;  /* Sort columns in descending order.    */
   size_t                 head;  /* Output only the no. of top rows.     */
@@ -89,9 +103,11 @@ struct tableparams
   int                    nwcs;  /* Number of WCS structures.            */
   gal_data_t      *allcolinfo;  /* Information of all the columns.      */
   gal_data_t         *sortcol;  /* Column to define a sorting.          */
-  struct list_range *rangecol;  /* Column to define a range.            */
+  int               selection;  /* Any row-selection is requested.      */
+  gal_data_t          *select;  /* Select rows for output.              */
+  struct list_select *selectcol; /* Column to define a range.           */
   uint8_t            freesort;  /* If the sort column should be freed.  */
-  uint8_t          *freerange;  /* If the range column should be freed. */
+  uint8_t         *freeselect;  /* If selection columns should be freed.*/
   uint8_t              sortin;  /* If the sort column is in the output. */
   time_t              rawtime;  /* Starting time of the program.        */
   gal_data_t       **colarray;  /* Array of columns, with arithmetic.   */
diff --git a/bin/table/table.c b/bin/table/table.c
index 8640e95..7db80f7 100644
--- a/bin/table/table.c
+++ b/bin/table/table.c
@@ -75,81 +75,207 @@ table_apply_permutation(gal_data_t *table, size_t 
*permutation,
 
 
 
-static void
-table_range(struct tableparams *p)
+static gal_data_t *
+table_selection_range(struct tableparams *p, gal_data_t *col)
 {
-  uint8_t *u;
-  double *rarr;
-  gal_data_t *mask;
-  struct list_range *tmp;
-  gal_data_t *ref, *perm, *range, *blmask;
-  size_t i, g, b, *s, *sf, one=1, ngood=0;
-  gal_data_t *min, *max, *ltmin, *gemax, *sum;
-
+  size_t one=1;
+  double *darr;
   int numok=GAL_ARITHMETIC_NUMOK;
   int inplace=GAL_ARITHMETIC_INPLACE;
+  gal_data_t *min=NULL, *max=NULL, *tmp, *ltmin, *gemax=NULL;
 
-  /* Allocate datasets for the necessary numbers and write them in. */
+  /* First, make sure everything is OK. */
+  if(p->range==NULL)
+    error(EXIT_FAILURE, 0, "%s: a bug! Please contact us to fix the "
+          "problem at %s. `p->range' should not be NULL at this point",
+          __func__, PACKAGE_BUGREPORT);
+
+  /* Allocations. */
   min=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &one, NULL, 0, -1, 1,
                      NULL, NULL, NULL);
   max=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &one, NULL, 0, -1, 1,
                      NULL, NULL, NULL);
+
+  /* Read the range of values for this column. */
+  darr=p->range->array;
+  ((double *)(min->array))[0] = darr[0];
+  ((double *)(max->array))[0] = darr[1];
+
+  /* Move `p->range' to the next element in the list and free the current
+     one (we have already read its values and don't need it any more). */
+  tmp=p->range;
+  p->range=p->range->next;
+  gal_data_free(tmp);
+
+  /* Find all the elements outside this range (smaller than the minimum,
+     larger than the maximum or blank) as separate binary flags.. */
+  ltmin=gal_arithmetic(GAL_ARITHMETIC_OP_LT, 1, numok, col, min);
+  gemax=gal_arithmetic(GAL_ARITHMETIC_OP_GE, 1, numok, col, max);
+
+  /* Merge them both into one array. */
+  ltmin=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, inplace, ltmin, gemax);
+
+  /* For a check.
+  {
+    size_t i;
+    uint8_t *u=ltmin->array;
+    for(i=0;i<ltmin->size;++i) printf("%zu: %u\n", i, u[i]);
+    exit(0);
+  }
+  */
+
+  /* Clean up and return. */
+  gal_data_free(gemax);
+  gal_data_free(min);
+  gal_data_free(max);
+  return ltmin;
+}
+
+
+
+
+
+static gal_data_t *
+table_selection_equal_or_notequal(struct tableparams *p, gal_data_t *col,
+                                  int e0n1)
+{
+  double *darr;
+  size_t i, one=1;
+  int numok=GAL_ARITHMETIC_NUMOK;
+  int inplace=GAL_ARITHMETIC_INPLACE;
+  gal_data_t *eq, *out=NULL, *value=NULL;
+  gal_data_t *arg = e0n1 ? p->notequal : p->equal;
+
+  /* Note that this operator is used to make the "masked" array, so when
+     `e0n1==0' the operator should be `GAL_ARITHMETIC_OP_NE' and
+     vice-versa.
+
+     For the merging with other elements, when `e0n1==0', we need the
+     `GAL_ARITHMETIC_OP_AND', but for `e0n1==1', it should be `OR'. */
+  int mergeop  = e0n1 ? GAL_ARITHMETIC_OP_OR : GAL_ARITHMETIC_OP_AND;
+  int operator = e0n1 ? GAL_ARITHMETIC_OP_EQ : GAL_ARITHMETIC_OP_NE;
+
+  /* First, make sure everything is OK. */
+  if(arg==NULL)
+    error(EXIT_FAILURE, 0, "%s: a bug! Please contact us to fix the "
+          "problem at %s. `p->range' should not be NULL at this point",
+          __func__, PACKAGE_BUGREPORT);
+
+  /* Allocate space for the value. */
+  value=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &one, NULL, 0, -1, 1,
+                     NULL, NULL, NULL);
+
+  /* Go through the values given to this call of the option and flag the
+     elements. */
+  for(i=0;i<arg->size;++i)
+    {
+      darr=arg->array;
+      ((double *)(value->array))[0] = darr[i];
+      eq=gal_arithmetic(operator, 1, numok, col, value);
+      if(out)
+        {
+          out=gal_arithmetic(mergeop, 1, inplace, out, eq);
+          gal_data_free(eq);
+        }
+      else
+        out=eq;
+    }
+
+  /* For a check.
+  {
+    uint8_t *u=out->array;
+    for(i=0;i<out->size;++i) printf("%zu: %u\n", i, u[i]);
+    exit(0);
+  }
+  */
+
+  /* Move the main pointer to the next possible call of the given
+     option. With this, we can safely free `arg' at this point. */
+  if(e0n1) p->notequal=p->notequal->next;
+  else     p->equal=p->equal->next;
+
+  /* Clean up and return. */
+  gal_data_free(value);
+  gal_data_free(arg);
+  return out;
+}
+
+
+
+
+
+static void
+table_selection(struct tableparams *p)
+{
+  uint8_t *u;
+  struct list_select *tmp;
+  gal_data_t *mask, *addmask=NULL;
+  gal_data_t *sum, *perm, *blmask;
+  size_t i, g, b, *s, *sf, ngood=0;
+  int inplace=GAL_ARITHMETIC_INPLACE;
+
+  /* Allocate datasets for the necessary numbers and write them in. */
   perm=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, p->table->dsize, NULL, 0,
                       p->cp.minmapsize, p->cp.quietmmap, NULL, NULL, NULL);
   mask=gal_data_alloc(NULL, GAL_TYPE_UINT8, 1, p->table->dsize, NULL, 1,
                       p->cp.minmapsize, p->cp.quietmmap, NULL, NULL, NULL);
 
-  /* Go over all the necessary range options. */
-  range=p->range;
-  for(tmp=p->rangecol;tmp!=NULL;tmp=tmp->next)
+  /* Go over each selection criteria and remove the necessary elements. */
+  for(tmp=p->selectcol;tmp!=NULL;tmp=tmp->next)
     {
-      /* Set the minimum and maximum values. */
-      rarr=range->array;
-      ((double *)(min->array))[0] = rarr[0];
-      ((double *)(max->array))[0] = rarr[1];
-
-      /* Set the reference column to read values from. */
-      ref=tmp->v;
-
-      /* Find all the bad elements (smaller than the minimum, larger than
-         the maximum or blank) so we can flag them. */
-      ltmin=gal_arithmetic(GAL_ARITHMETIC_OP_LT, 1, numok, ref, min);
-      gemax=gal_arithmetic(GAL_ARITHMETIC_OP_GE, 1, numok, ref, max);
-      blmask = ( gal_blank_present(ref, 1)
-                 ? gal_arithmetic(GAL_ARITHMETIC_OP_ISBLANK, 1, 0, ref)
-                 : NULL );
-
-      /* Merge all the flags into one array. */
-      ltmin=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, inplace, ltmin, gemax);
-      if(blmask)
-        ltmin=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, inplace, ltmin, blmask);
-
-      /* Add these flags to all previous flags. */
-      mask=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, inplace, mask, ltmin);
+      switch(tmp->type)
+        {
+        case SELECT_TYPE_RANGE:
+          addmask=table_selection_range(p, tmp->col);
+          break;
+
+        case SELECT_TYPE_EQUAL:
+          addmask=table_selection_equal_or_notequal(p, tmp->col, 0);
+          break;
+
+        case SELECT_TYPE_NOTEQUAL:
+          addmask=table_selection_equal_or_notequal(p, tmp->col, 1);
+          break;
+
+        default:
+          error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s "
+                "to fix the problem. The code %d is not a recognized "
+                "range identifier", __func__, PACKAGE_BUGREPORT,
+                tmp->type);
+        }
 
-      /* For a check.
-      {
-        float *f=ref->array;
-        uint8_t *m=mask->array;
-        uint8_t *u=ltmin->array, *uf=u+ltmin->size;
-        printf("\n\nInput column: %s\n", ref->name ? ref->name : "No Name");
-        printf("Range: %g, %g\n", rarr[0], rarr[1]);
-        printf("%-20s%-20s%-20s\n", "Value", "This mask",
-               "Including previous");
-        do printf("%-20f%-20u%-20u\n", *f++, *u++, *m++); while(u<uf);
-        exit(0);
-      }
-      */
+      /* Remove any blank elements. */
+      if(gal_blank_present(tmp->col, 1))
+        {
+          blmask = gal_arithmetic(GAL_ARITHMETIC_OP_ISBLANK, 1, 0, tmp->col);
+          addmask=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, inplace,
+                                 addmask, blmask);
+          gal_data_free(blmask);
+        }
 
-      /* Clean up. */
-      gal_data_free(ltmin);
-      gal_data_free(gemax);
+      /* Add this mask array to the cumulative mask array (of all
+         selections). */
+      mask=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, inplace, mask, addmask);
 
-      /* Increment pointers. */
-      range=range->next;
+      /* For a check.
+         {
+           float *f=ref->array;
+           uint8_t *m=mask->array;
+           uint8_t *u=addmask->array, *uf=u+addmask->size;
+           printf("\n\nInput column: %s\n", ref->name ? ref->name : "No Name");
+           printf("Range: %g, %g\n", rarr[0], rarr[1]);
+           printf("%-20s%-20s%-20s\n", "Value", "This mask",
+           "Including previous");
+           do printf("%-20f%-20u%-20u\n", *f++, *u++, *m++); while(u<uf);
+           exit(0);
+           }
+        */
+
+      /* Final clean up. */
+      gal_data_free(addmask);
     }
 
-  /* Count the number of bad elements. */
+  /* Find the final number of elements to print. */
   sum=gal_statistics_sum(mask);
   ngood = p->table->size - ((double *)(sum->array))[0];
 
@@ -185,15 +311,13 @@ table_range(struct tableparams *p)
 
   /* Clean up. */
   i=0;
-  for(tmp=p->rangecol;tmp!=NULL;tmp=tmp->next)
-    { if(p->freerange[i]) {gal_data_free(tmp->v); tmp->v=NULL;} ++i; }
-  ui_list_range_free(p->rangecol, 0);
+  for(tmp=p->selectcol;tmp!=NULL;tmp=tmp->next)
+    { if(p->freeselect[i]) {gal_data_free(tmp->col); tmp->col=NULL;} ++i; }
+  ui_list_select_free(p->selectcol, 0);
   gal_data_free(mask);
   gal_data_free(perm);
+  free(p->freeselect);
   gal_data_free(sum);
-  gal_data_free(min);
-  gal_data_free(max);
-  free(p->freerange);
 }
 
 
@@ -349,7 +473,7 @@ void
 table(struct tableparams *p)
 {
   /* Apply a certain range (if required) to the output sample. */
-  if(p->range) table_range(p);
+  if(p->selection) table_selection(p);
 
   /* Sort it (if required). */
   if(p->sort) table_sort(p);
diff --git a/bin/table/ui.c b/bin/table/ui.c
index 4608f93..db30ae9 100644
--- a/bin/table/ui.c
+++ b/bin/table/ui.c
@@ -238,8 +238,9 @@ ui_read_check_only_options(struct tableparams *p)
       {
         /* Range needs two input numbers. */
         if(tmp->size!=2)
-          error(EXIT_FAILURE, 0, "two values (separated by comma) necessary "
-                "for `--range' in this format: `--range=COLUMN,min,max'");
+          error(EXIT_FAILURE, 0, "two values (separated by `:' or `,') are "
+                "necessary for `--range' in this format: "
+                "`--range=COLUMN,min:max'");
 
         /* The first must be smaller than the second. */
         darr=tmp->array;
@@ -248,6 +249,7 @@ ui_read_check_only_options(struct tableparams *p)
                 "be smaller than the second (%g)", darr[0], darr[1]);
       }
 
+
   /* Make sure `--head' and `--tail' aren't given together. */
   if(p->head!=GAL_BLANK_SIZE_T && p->tail!=GAL_BLANK_SIZE_T)
     error(EXIT_FAILURE, 0, "`--head' and `--tail' options cannot be "
@@ -292,19 +294,20 @@ ui_check_options_and_arguments(struct tableparams *p)
 
 
 /**************************************************************/
-/***************   List of range datasets   *******************/
+/************   List of row-selection requests   **************/
 /**************************************************************/
 static void
-ui_list_range_add(struct list_range **list, gal_data_t *dataset)
+ui_list_select_add(struct list_select **list, gal_data_t *col, int type)
 {
-  struct list_range *newnode;
+  struct list_select *newnode;
 
   errno=0;
   newnode=malloc(sizeof *newnode);
   if(newnode==NULL)
     error(EXIT_FAILURE, errno, "%s: allocating new node", __func__);
 
-  newnode->v=dataset;
+  newnode->col=col;
+  newnode->type=type;
   newnode->next=*list;
   *list=newnode;
 }
@@ -314,15 +317,19 @@ ui_list_range_add(struct list_range **list, gal_data_t 
*dataset)
 
 
 static gal_data_t *
-ui_list_range_pop(struct list_range **list)
+ui_list_select_pop(struct list_select **list, int *type)
 {
   gal_data_t *out=NULL;
-  struct list_range *tmp;
+  struct list_select *tmp;
   if(*list)
     {
+      /* Extract all the necessary components of the node. */
       tmp=*list;
-      out=tmp->v;
+      out=tmp->col;
+      *type=tmp->type;
       *list=tmp->next;
+
+      /* Delete the node. */
       free(tmp);
     }
   return out;
@@ -333,18 +340,19 @@ ui_list_range_pop(struct list_range **list)
 
 
 static void
-ui_list_range_reverse(struct list_range **list)
+ui_list_select_reverse(struct list_select **list)
 {
+  int thistype;
   gal_data_t *thisdata;
-  struct list_range *correctorder=NULL;
+  struct list_select *correctorder=NULL;
 
   /* Only do the reversal if there is more than one element. */
   if( *list && (*list)->next )
     {
       while(*list!=NULL)
         {
-          thisdata=ui_list_range_pop(list);
-          ui_list_range_add(&correctorder, thisdata);
+          thisdata=ui_list_select_pop(list, &thistype);
+          ui_list_select_add(&correctorder, thisdata, thistype);
         }
       *list=correctorder;
     }
@@ -355,14 +363,14 @@ ui_list_range_reverse(struct list_range **list)
 
 
 void
-ui_list_range_free(struct list_range *list, int freevalue)
+ui_list_select_free(struct list_select *list, int freevalue)
 {
-  struct list_range *tmp;
+  struct list_select *tmp;
   while(list!=NULL)
     {
       tmp=list->next;
       if(freevalue)
-        gal_data_free(list->v);
+        gal_data_free(list->col);
       free(list);
       list=tmp;
     }
@@ -645,7 +653,7 @@ ui_columns_prepare(struct tableparams *p)
    (starting from 0). So if we can read it as a number, we'll subtract one
    from it. */
 static size_t
-ui_check_range_sort_read_col_ind(char *string)
+ui_check_select_sort_read_col_ind(char *string)
 {
   size_t out;
   void *ptr=&out;
@@ -661,44 +669,58 @@ ui_check_range_sort_read_col_ind(char *string)
 
 
 
-/* See if the `--range' and `--sort' columns should also be added. */
+/* See if row selection or sorting needs any extra columns to be read. */
 static void
-ui_check_range_sort_before(struct tableparams *p, gal_list_str_t *lines,
-                           size_t *nrange, size_t *origoutncols,
-                           size_t *sortindout, size_t **rangeindout_out)
-{
-  size_t *rangeind=NULL;
-  size_t *rangeindout=NULL;
+ui_check_select_sort_before(struct tableparams *p, gal_list_str_t *lines,
+                            size_t *nselect, size_t *origoutncols,
+                            size_t *sortindout, size_t **selectindout_out,
+                            size_t **selecttypeout_out)
+{;
   gal_data_t *dtmp, *allcols;
   size_t sortind=GAL_BLANK_SIZE_T;
-  int tableformat, rangehasname=0;
   gal_list_sizet_t *tmp, *indexll;
   gal_list_str_t *stmp, *add=NULL;
-  size_t i, j, *s, *sf, allncols, numcols, numrows;
+  int tableformat, selecthasname=0;
+  size_t *selectind=NULL, *selecttype;
+  size_t *selectindout=NULL, *selecttypeout;
+  size_t i, j, k, *s, *sf, allncols, numcols, numrows;
+
+  /* Important note: these have to be in the same order as the `enum
+     select_types' in `main.h'. */
+  gal_data_t *select[SELECT_TYPE_NUMBER]={p->range, p->equal, p->notequal};
 
 
   /* Allocate necessary spaces. */
-  if(p->range)
+  if(p->selection)
     {
-      *nrange=gal_list_data_number(p->range);
-      rangeind=gal_pointer_allocate(GAL_TYPE_SIZE_T, *nrange, 0,
-                                    __func__, "rangeind");
-      rangeindout=gal_pointer_allocate(GAL_TYPE_SIZE_T, *nrange, 0,
-                                        __func__, "rangeindout");
-      sf=(s=rangeindout)+*nrange; do *s++=GAL_BLANK_SIZE_T; while(s<sf);
-      *rangeindout_out=rangeindout;
+      *nselect = ( gal_list_data_number(p->range)
+                   + gal_list_data_number(p->equal)
+                   + gal_list_data_number(p->notequal) );
+      selectind=gal_pointer_allocate(GAL_TYPE_SIZE_T, *nselect, 0,
+                                     __func__, "selectind");
+      selecttype=gal_pointer_allocate(GAL_TYPE_SIZE_T, *nselect, 0,
+                                      __func__, "selecttype");
+      selectindout=gal_pointer_allocate(GAL_TYPE_SIZE_T, *nselect, 0,
+                                        __func__, "selectindout");
+      selecttypeout=gal_pointer_allocate(GAL_TYPE_SIZE_T, *nselect, 0,
+                                         __func__, "selecttypeout");
+      sf=(s=selectindout)+*nselect; do *s++=GAL_BLANK_SIZE_T; while(s<sf);
+      *selectindout_out=selectindout;
+      *selecttypeout_out=selecttypeout;
     }
 
 
   /* See if the given columns are numbers or names. */
   i=0;
-  if(p->sort)  sortind  = ui_check_range_sort_read_col_ind(p->sort);
-  if(p->range)
-    for(dtmp=p->range;dtmp!=NULL;dtmp=dtmp->next)
-      {
-        rangeind[i] = ui_check_range_sort_read_col_ind(dtmp->name);
-        ++i;
-      }
+  if(p->sort)  sortind  = ui_check_select_sort_read_col_ind(p->sort);
+  if(p->selection)
+    for(k=0;k<SELECT_TYPE_NUMBER;++k)
+      for(dtmp=select[k];dtmp!=NULL;dtmp=dtmp->next)
+        {
+          selecttype[i] = k;
+          selectind[i] = ui_check_select_sort_read_col_ind(dtmp->name);
+          ++i;
+        }
 
 
   /* Get all the column information. */
@@ -714,21 +736,21 @@ ui_check_range_sort_before(struct tableparams *p, 
gal_list_str_t *lines,
           "number given to  `--sort' (%s)",
           gal_fits_name_save_as_string(p->filename, p->cp.hdu), numcols,
           p->sort);
-  if(p->range)
-    for(i=0;i<*nrange;++i)
-      if(rangeind[i]!=GAL_BLANK_SIZE_T && rangeind[i]>=numcols)
+  if(p->selection)
+    for(i=0;i<*nselect;++i)
+      if(selectind[i]!=GAL_BLANK_SIZE_T && selectind[i]>=numcols)
         error(EXIT_FAILURE, 0, "%s has %zu columns, less than the column "
-              "number given to  `--range' (%zu)",
+              "number given to  `--range', `--equal', or `--sort' (%zu)",
               gal_fits_name_save_as_string(p->filename, p->cp.hdu), numcols,
-              rangeind[i]);
+              selectind[i]);
 
 
   /* If any of the columns isn't specified by an index, go over the table
      information and set the index based on the names. */
-  if(p->range)
-    for(i=0;i<*nrange;++i)
-      if(rangeind[i]==GAL_BLANK_SIZE_T) { rangehasname=1; break; }
-  if( (p->sort && sortind==GAL_BLANK_SIZE_T) || rangehasname )
+  if(p->selection)
+    for(i=0;i<*nselect;++i)
+      if(selectind[i]==GAL_BLANK_SIZE_T) { selecthasname=1; break; }
+  if( (p->sort && sortind==GAL_BLANK_SIZE_T) || selecthasname )
     {
       /* For `--sort', go over all the columns if an index hasn't been set
          yet. If the input columns have a name, see if their names matches
@@ -738,46 +760,48 @@ ui_check_range_sort_before(struct tableparams *p, 
gal_list_str_t *lines,
           if( allcols[i].name && !strcasecmp(allcols[i].name, p->sort) )
             { sortind=i; break; }
 
-      /* Same for `--range'. Just note that here we may have multiple calls
-         to `--range'. It is thus important to loop over the values given
-         to range first, then loop over the column names from the start for
-         each new `--ran */
+      /* Same for the selection. Just note that here we may have multiple
+         calls. It is thus important to loop over the values given to range
+         first, then loop over the column names from the start for each new
+         `--ran */
       i=0;
-      if(p->range)
-        for(dtmp=p->range;dtmp!=NULL;dtmp=dtmp->next)
+      for(k=0;k<SELECT_TYPE_NUMBER;++k)
+        for(dtmp=select[k];dtmp!=NULL;dtmp=dtmp->next)
           {
-           if(rangeind[i]==GAL_BLANK_SIZE_T)
-             for(j=0;j<numcols;++j)
-               if( allcols[j].name
-                   && !strcasecmp(allcols[j].name, dtmp->name) )
-                 { rangeind[i]=j; break; }
-           ++i;
+            if(selectind[i]==GAL_BLANK_SIZE_T)
+              for(j=0;j<numcols;++j)
+                if( allcols[j].name
+                    && !strcasecmp(allcols[j].name, dtmp->name) )
+                  { selecttype[i]=k; selectind[i]=j; break; }
+            ++i;
           }
     }
 
 
-  /* Both columns must be good indexs now, if they aren't the user didn't
+  /* The columns must be good indexs now, if they don't the user didn't
      specify the name properly and the program must abort. */
   if( p->sort && sortind==GAL_BLANK_SIZE_T )
     error(EXIT_FAILURE, 0, "%s: no column named `%s' (value to `--sort') "
           "you can either specify a name or number",
           gal_fits_name_save_as_string(p->filename, p->cp.hdu), p->sort);
-  if(p->range)
+  if(p->selection)
     {
       i=0;
-      for(dtmp=p->range;dtmp!=NULL;dtmp=dtmp->next)
-        {
-          if(rangeind[i]==GAL_BLANK_SIZE_T)
-            error(EXIT_FAILURE, 0, "%s: no column named `%s' (value to "
-                  "`--range') you can either specify a name or number",
-                  gal_fits_name_save_as_string(p->filename, p->cp.hdu),
-                  dtmp->name);
-          ++i;
-        }
+      for(k=0;k<SELECT_TYPE_NUMBER;++k)
+        for(dtmp=select[k];dtmp!=NULL;dtmp=dtmp->next)
+          {
+            if(selectind[i]==GAL_BLANK_SIZE_T)
+              error(EXIT_FAILURE, 0, "%s: no column named `%s' (value to "
+                    "`--%s') you can either specify a name or number",
+                    gal_fits_name_save_as_string(p->filename, p->cp.hdu),
+                    dtmp->name,
+                    ( k==0?"range":( k==1?"equal":"notequal") ));
+            ++i;
+          }
     }
 
 
-  /* See which columns the user has asked for. */
+  /* See which columns the user has asked to output. */
   indexll=gal_table_list_of_indexs(p->columns, allcols, numcols,
                                    p->cp.searchin, p->cp.ignorecase,
                                    p->filename, p->cp.hdu, NULL);
@@ -789,47 +813,53 @@ ui_check_range_sort_before(struct tableparams *p, 
gal_list_str_t *lines,
   i=0;
   for(tmp=indexll; tmp!=NULL; tmp=tmp->next)
     {
-      if(p->sort  && *sortindout==GAL_BLANK_SIZE_T  && tmp->v == sortind)
+      if(p->sort && *sortindout==GAL_BLANK_SIZE_T  && tmp->v == sortind)
         *sortindout=i;
-      if(p->range)
-        for(j=0;j<*nrange;++j)
-          if(rangeindout[j]==GAL_BLANK_SIZE_T && tmp->v==rangeind[j])
-            rangeindout[j]=i;
+      if(p->selection)
+        for(j=0;j<*nselect;++j)
+          if(selectindout[j]==GAL_BLANK_SIZE_T && tmp->v==selectind[j])
+            {
+              selectindout[j]=i;
+              selecttypeout[j]=selecttype[j];
+            }
       ++i;
     }
 
 
-  /* See if any of the necessary columns (for `--sort' and `--range')
-     aren't requested as an output by the user. If there is any, such
-     columns, keep them here. */
-  if( p->sort && *sortindout==GAL_BLANK_SIZE_T )
-    { *sortindout=allncols++;  gal_list_str_add(&add, p->sort, 0); }
-
+  /* See if any of the sorting or selection columns aren't requested as an
+     output by the user. If there is, keep their new label.
 
-  /* Note that the sorting and range may be requested on the same
+     Note that the sorting and range may be requested on the same
      column. In this case, we don't want to read the same column twice. */
-  if(p->range)
+  if( p->sort && *sortindout==GAL_BLANK_SIZE_T )
+    { *sortindout=allncols++;  gal_list_str_add(&add, p->sort, 0); }
+  if(p->selection)
     {
       i=0;
-      for(dtmp=p->range;dtmp!=NULL;dtmp=dtmp->next)
-        {
-          if(*sortindout!=GAL_BLANK_SIZE_T
-             && rangeindout[i]==*sortindout)
-            rangeindout[i]=*sortindout;
-          else
-            {
-              if( rangeindout[i]==GAL_BLANK_SIZE_T )
-                {
-                  rangeindout[i]=allncols++;
-                  gal_list_str_add(&add, dtmp->name, 0);
-                }
-            }
-          ++i;
-        }
+      for(k=0;k<SELECT_TYPE_NUMBER;++k)
+        for(dtmp=select[k];dtmp!=NULL;dtmp=dtmp->next)
+          {
+            if(*sortindout!=GAL_BLANK_SIZE_T && selectindout[i]==*sortindout)
+              {
+                selecttypeout[i]=k;
+                selectindout[i]=*sortindout;
+              }
+            else
+              {
+                if( selectindout[i]==GAL_BLANK_SIZE_T )
+                  {
+                    selecttypeout[i]=k;
+                    selectindout[i]=allncols++;
+                    gal_list_str_add(&add, dtmp->name, 0);
+                  }
+              }
+            ++i;
+          }
     }
 
 
-  /* Add the possibly new set of columns to read. */
+  /* If any new (not requested by the user to output) columns must be read,
+     add them to the list of columns to read from the input file. */
   if(add)
     {
       gal_list_str_reverse(&add);
@@ -839,8 +869,9 @@ ui_check_range_sort_before(struct tableparams *p, 
gal_list_str_t *lines,
 
 
   /* Clean up. */
-  if(rangeind) free(rangeind);
   gal_list_sizet_free(indexll);
+  if(selectind) free(selectind);
+  if(selecttype) free(selecttype);
   gal_data_array_free(allcols, numcols, 0);
 }
 
@@ -849,80 +880,72 @@ ui_check_range_sort_before(struct tableparams *p, 
gal_list_str_t *lines,
 
 
 static void
-ui_check_range_sort_after(struct tableparams *p, size_t nrange,
-                          size_t origoutncols, size_t sortindout,
-                          size_t *rangeindout)
+ui_check_select_sort_after(struct tableparams *p, size_t nselect,
+                           size_t origoutncols, size_t sortindout,
+                           size_t *selectindout, size_t *selecttypeout)
 {
-  struct list_range *rtmp;
-  size_t i, j, *rangein=NULL;
-  gal_data_t *tmp, *last=NULL;
+  size_t i, j;
+  struct list_select *rtmp;
+  gal_data_t *tmp, *origlast=NULL;
 
   /* Allocate the necessary arrays. */
-  if(p->range)
-    {
-      rangein=gal_pointer_allocate(GAL_TYPE_UINT8, nrange, 0,
-                                   __func__, "rangein");
-      p->freerange=gal_pointer_allocate(GAL_TYPE_UINT8, nrange, 1,
-                                        __func__, "p->freerange");
-    }
+  if(p->selection)
+    p->freeselect=gal_pointer_allocate(GAL_TYPE_UINT8, nselect, 1,
+                                       __func__, "p->freeselect");
 
 
-  /* Set the proper pointers. For `rangecol' we'll need to do it separately
-     (because the orders can get confused).*/
+  /* Set some necessary pointers (last pointer of actual output table and
+     pointer to the sort column). */
   i=0;
   for(tmp=p->table; tmp!=NULL; tmp=tmp->next)
     {
-      if(i==origoutncols-1)           last=tmp;
+      if(i==origoutncols-1)        origlast=tmp;
       if(p->sort && i==sortindout) p->sortcol=tmp;
       ++i;
     }
 
 
-  /* Find the range columns. */
-  for(i=0;i<nrange;++i)
+  /* Since we can have several selection columns, we'll treat them
+     differently. */
+  for(i=0;i<nselect;++i)
     {
       j=0;
       for(tmp=p->table; tmp!=NULL; tmp=tmp->next)
         {
-          if(j==rangeindout[i])
+          if(j==selectindout[i])
             {
-              ui_list_range_add(&p->rangecol, tmp);
+              ui_list_select_add(&p->selectcol, tmp, selecttypeout[i]);
               break;
             }
           ++j;
         }
     }
-  ui_list_range_reverse(&p->rangecol);
+  ui_list_select_reverse(&p->selectcol);
 
 
-  /* Terminate the actual table where it should be terminated (by setting
-     `last->next' to NULL. */
-  last->next=NULL;
+  /* Terminate the desired output table where it should be terminated (by
+     setting `origlast->next' to NULL. */
+  origlast->next=NULL;
 
 
   /*  Also, remove any possibly existing `next' pointer for `sortcol' and
-     `rangecol'. */
+     `selectcol'. */
   if(p->sort && sortindout>=origoutncols)
     { p->sortcol->next=NULL;  p->freesort=1; }
   else p->sortin=1;
-  if(p->range)
+  if(p->selection)
     {
       i=0;
-      for(rtmp=p->rangecol;rtmp!=NULL;rtmp=rtmp->next)
+      for(rtmp=p->selectcol;rtmp!=NULL;rtmp=rtmp->next)
         {
-          if(rangeindout[i]>=origoutncols)
+          if(selectindout[i]>=origoutncols)
             {
-              rtmp->v->next=NULL;
-              p->freerange[i] = (rtmp->v==p->sortcol) ? 0 : 1;
+              rtmp->col->next=NULL;
+              p->freeselect[i] = (rtmp->col==p->sortcol) ? 0 : 1;
             }
-          else rangein[i]=1;
           ++i;
         }
     }
-
-
-  /* Clean up. */
-  if(rangein) free(rangein);
 }
 
 
@@ -935,9 +958,10 @@ ui_preparations(struct tableparams *p)
 {
   size_t *colmatch;
   gal_list_str_t *lines;
-  size_t nrange=0, origoutncols=0;
+  size_t nselect=0, origoutncols=0;
+  size_t sortindout=GAL_BLANK_SIZE_T;
   struct gal_options_common_params *cp=&p->cp;
-  size_t sortindout=GAL_BLANK_SIZE_T, *rangeindout=NULL;
+  size_t *selectindout=NULL, *selecttypeout=NULL;
 
   /* If there were no columns specified or the user has asked for
      information on the columns, we want the full set of columns. */
@@ -953,10 +977,14 @@ ui_preparations(struct tableparams *p)
   lines=gal_options_check_stdin(p->filename, p->cp.stdintimeout, "input");
 
 
-  /* If sort or range are given, see if we should read them also. */
-  if(p->range || p->sort)
-    ui_check_range_sort_before(p, lines, &nrange, &origoutncols, &sortindout,
-                               &rangeindout);
+  /* If any kind of row-selection is requested set `p->selection' to 1. */
+  p->selection = p->range || p->equal || p->notequal;
+
+  /* If row sorting or selection are requested, see if we should read any
+     extra columns. */
+  if(p->selection || p->sort)
+    ui_check_select_sort_before(p, lines, &nselect, &origoutncols, &sortindout,
+                                &selectindout, &selecttypeout);
 
 
   /* If we have any arithmetic operations, we need to make sure how many
@@ -976,11 +1004,11 @@ ui_preparations(struct tableparams *p)
   gal_list_str_free(lines, 1);
 
 
-  /* If the range and sort options are requested, keep them as separate
-     datasets. */
-  if(p->range || p->sort)
-    ui_check_range_sort_after(p, nrange, origoutncols, sortindout,
-                              rangeindout);
+  /* If row sorting or selection are requested, keep them as separate
+     datasets.*/
+  if(p->selection || p->sort)
+    ui_check_select_sort_after(p, nselect, origoutncols, sortindout,
+                               selectindout, selecttypeout);
 
 
   /* If there was no actual data in the file, then inform the user and
@@ -1019,7 +1047,8 @@ ui_preparations(struct tableparams *p)
 
   /* Clean up. */
   free(colmatch);
-  if(rangeindout) free(rangeindout);
+  if(selectindout) free(selectindout);
+  if(selecttypeout) free(selecttypeout);
 }
 
 
diff --git a/bin/table/ui.h b/bin/table/ui.h
index 37f61a3..7af1d1c 100644
--- a/bin/table/ui.h
+++ b/bin/table/ui.h
@@ -30,9 +30,18 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 
 
 
+/* Option groups particular to this program. */
+enum program_args_groups
+{
+  UI_GROUP_OUTROWS = GAL_OPTIONS_GROUP_AFTER_COMMON,
+};
+
+
+
+
 /* Available letters for short options:
 
-   a b d e f g j k l m n p t u v x y z
+   a b d f g j k l m p t u v x y z
    A B C E G H J L O Q R X Y
 */
 enum option_keys_enum
@@ -44,6 +53,8 @@ enum option_keys_enum
   UI_KEY_INFORMATION     = 'i',
   UI_KEY_COLINFOINSTDOUT = 'O',
   UI_KEY_RANGE           = 'r',
+  UI_KEY_EQUAL           = 'e',
+  UI_KEY_NOTEQUAL        = 'n',
   UI_KEY_SORT            = 's',
   UI_KEY_DESCENDING      = 'd',
   UI_KEY_HEAD            = 'H',
@@ -61,7 +72,7 @@ void
 ui_read_check_inputs_setup(int argc, char *argv[], struct tableparams *p);
 
 void
-ui_list_range_free(struct list_range *list, int freevalue);
+ui_list_select_free(struct list_select *list, int freevalue);
 
 void
 ui_free_report(struct tableparams *p);
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index dfb13f5..48e4e7a 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -8914,7 +8914,7 @@ So by default meta-data aren't included.
 
 @item -r STR,FLT:FLT
 @itemx --range=STR,FLT:FLT
-Only print the output rows that have a value within the given range in the 
@code{STR} column (can be a name or counter).
+Only output rows that have a value within the given range in the @code{STR} 
column (can be a name or counter).
 Note that the range is only inclusive in the lower-limit.
 For example with @code{--range=sn,5:20} the output's columns will only contain 
rows that have a value in the @code{sn} column (not case-sensitive) that is 
greater or equal to 5, and less than 20.
 
@@ -8927,6 +8927,29 @@ This is good when you just want to select using one 
column's values, but don't n
 For one example of using this option, see the example under
 @option{--sigclip-median} in @ref{Invoking aststatistics}.
 
+@item -e STR,INT/FLT,...
+@itemx --equal=STR,INT/FLT,...
+Only output rows that are equal to the given number(s) in the given column.
+The first argument is the column identifier (name or number, see 
@ref{Selecting table columns}), after that you can specify any number of values.
+For example @option{--equal=ID,5,6,8} will only print the rows that have a 
value of 5, 6, or 8 in the @code{ID} column.
+This option can also be called multiple times, so @option{--equal=ID,4,5 
--equal=ID,6,7} has the same effect as @option{--equal=4,5,6,7}.
+
+@cartouche
+@noindent
+@strong{Equality and floating point numbers:} Floating point numbers are only 
approximate values (see @ref{Numeric data types}).
+In this context, their equality depends on how the the input table was 
originally stored (as a plain text table or as an ASCII/binary FITS table).
+If you want to select floating point numbers, it is strongly recommended to 
use the @option{--range} option and set a very small interval around your 
desired number, don't use @option{--equal} or @option{--notequal}.
+@end cartouche
+
+@item -n STR,INT/FLT,...
+@itemx --notequal=STR,INT/FLT,...
+Only output rows that are @emph{not} equal to the given number(s) in the given 
column.
+The first argument is the column identifier (name or number, see 
@ref{Selecting table columns}), after that you can specify any number of values.
+For example @option{--notequal=ID,5,6,8} will only print the rows where the 
@code{ID} column doesn't have value of 5, 6, or 8.
+This option can also be called multiple times, so @option{--notequal=ID,4,5 
--notequal=ID,6,7} has the same effect as @option{--notequal=4,5,6,7}.
+
+Be very careful if you want to use the non-equality with floating point 
numbers, see the special note under @option{--equal} for more.
+
 @item -s STR
 @item --sort=STR
 Sort the output rows based on the values in the @code{STR} column (can be a 
column name or number).
diff --git a/lib/options.c b/lib/options.c
index 06c7c40..48a0a14 100644
--- a/lib/options.c
+++ b/lib/options.c
@@ -1140,24 +1140,34 @@ gal_options_parse_name_and_values(struct argp_option 
*option, char *arg,
 
       /* Read the values and write the name. */
       dataset=gal_options_parse_list_of_numbers(values, filename, lineno);
-      dataset->name=name;
 
-      /* Add the given dataset to the end of an existing dataset. */
-      existing = *(gal_data_t **)(option->value);
-      if(existing)
+      /* If there actually was a string of numbers, then do the rest. */
+      if(dataset)
         {
-          for(tmp=existing;tmp!=NULL;tmp=tmp->next)
-            if(tmp->next==NULL) { tmp->next=dataset; break; }
+          dataset->name=name;
+
+          /* Add the given dataset to the end of an existing dataset. */
+          existing = *(gal_data_t **)(option->value);
+          if(existing)
+            {
+              for(tmp=existing;tmp!=NULL;tmp=tmp->next)
+                if(tmp->next==NULL) { tmp->next=dataset; break; }
+            }
+          else
+            *(gal_data_t **)(option->value) = dataset;
+
+          /* For a check.
+             printf("arg: %s\n", arg);
+             darray=dataset->array;
+             for(i=0;i<dataset->size;++i) printf("%f\n", darray[i]);
+             exit(0);
+          */
         }
       else
-        *(gal_data_t **)(option->value) = dataset;
-
-      /* For a check.
-      printf("arg: %s\n", arg);
-      darray=dataset->array;
-      for(i=0;i<dataset->size;++i) printf("%f\n", darray[i]);
-      exit(0);
-      */
+        error(EXIT_FAILURE, 0, "`--%s' requires a string of numbers "
+              "(separated by `,' or `:') following its first argument, "
+              "please run with `--help' for more information",
+              option->name);
 
       /* Our job is done, return NULL. */
       return NULL;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]