gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 0a2fd0e 089/125: New implementations for histo


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 0a2fd0e 089/125: New implementations for histogram and CFP
Date: Sun, 23 Apr 2017 22:36:45 -0400 (EDT)

branch: master
commit 0a2fd0e9a65a857118cc77791de6d3d7b25f83a1
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    New implementations for histogram and CFP
    
    The old histogram and cumulative frequency plots needed an input dataset
    that is sorted. So based on some later experience in making a histogram in
    AWK a new implementation is now used which doesn't need sorting and is thus
    much more efficient and robust. For the cumulative frequency plot: it is no
    longer independent of the histogram. In this implementation, the cumulative
    frequency plot is essentially just the sum of all previous histogram bins.
    Also, the Statistics program now also makes an ASCII cumulative frequency
    plot.
    
    Some other minor things were also done with this commit.
    
      - A set of `gal_statistics_' wrapper functions were created for the
        statistics functions in Arithmetic (like `gal_statistics_num', or
        `gal_statistics_mean'). This makes it easier than calling
        `gal_arithmetic'. They are now used in the Statistics program's
        `ui_print_one_row' function.
    
      - In Arithmetic's macros, we previously used `*b' as a pointer to a
        dynamically allocated space as the blank value. But now, we use the
        `gal_blank_write' function to write the blank value in a statically
        allocated place which doesn't need freeing, and is faster.
    
      - The `UNIFUNC_MEDIANVALUE' macro now checks if the input is sorted, and
        if is already sorted, then no sorting is done.
    
      - The new `gal_blank_remove' now removes the blank values in a
        dataset. Afterwards, it sets its dimensionality to 1 and its size to
        the number of non-blank elements).
    
      - The new `gal_statistics_is_sorted' function is a fast way to check if a
        dataset is already sorted or not.
    
      - The new `gal_statistics_sort_increasing' and
        `gal_statistics_sort_decreasing' will sort a dataset of any type.
    
      - The three new `gal_statistics_regular_bins',
        `gal_statistics_histogram', and `gal_statistics_cfp' are now a much
        more robust and faster implementation of these jobs using the new
        features of `gal_data_t'.
---
 bin/statistics/args.h       |  17 +-
 bin/statistics/main.h       |   3 +
 bin/statistics/statistics.c |  95 ++++--
 bin/statistics/ui.c         |  79 +++--
 lib/arithmetic.c            | 290 +++++++++---------
 lib/blank.c                 |  50 ++++
 lib/gnuastro/blank.h        |   2 +
 lib/gnuastro/statistics.h   | 161 +++-------
 lib/qsort.c                 |   2 +-
 lib/statistics.c            | 696 ++++++++++++++++++++++++++++++++------------
 10 files changed, 875 insertions(+), 520 deletions(-)

diff --git a/bin/statistics/args.h b/bin/statistics/args.h
index 7ec8bfd..8b0e848 100644
--- a/bin/statistics/args.h
+++ b/bin/statistics/args.h
@@ -90,7 +90,7 @@ struct argp_option program_options[] =
 
     {
       0, 0, 0, 0,
-      "Values to print in one row",
+      "Values to print in one row (on command-line)",
       ARGS_GROUP_IN_ONE_ROW
     },
     {
@@ -220,7 +220,7 @@ struct argp_option program_options[] =
       ARGS_OPTION_KEY_ASCIIHIST,
       0,
       0,
-      "Print an ASCII histogram",
+      "Print an ASCII histogram.",
       ARGS_GROUP_PARTICULAR_STAT,
       &p->asciihist,
       GAL_OPTIONS_NO_ARG_TYPE,
@@ -229,6 +229,19 @@ struct argp_option program_options[] =
       GAL_OPTIONS_NOT_SET
     },
     {
+      "asciicfp",
+      ARGS_OPTION_KEY_ASCIICFP,
+      0,
+      0,
+      "Print an ASCII cumulative frequency plot.",
+      ARGS_GROUP_PARTICULAR_STAT,
+      &p->asciicfp,
+      GAL_OPTIONS_NO_ARG_TYPE,
+      GAL_OPTIONS_RANGE_0_OR_1,
+      GAL_OPTIONS_NOT_MANDATORY,
+      GAL_OPTIONS_NOT_SET
+    },
+    {
       "histogram",
       ARGS_OPTION_KEY_HISTOGRAM,
       0,
diff --git a/bin/statistics/main.h b/bin/statistics/main.h
index 2f9a796..3e26f67 100644
--- a/bin/statistics/main.h
+++ b/bin/statistics/main.h
@@ -52,6 +52,7 @@ struct statisticsparams
   float      quantilerange;  /* Quantile (Q) range: from Q to 1-Q.       */
 
   uint8_t        asciihist;  /* Print an ASCII histogram.                */
+  uint8_t         asciicfp;  /* Print an ASCII cumulative frequency plot.*/
   uint8_t        histogram;  /* Save histogram in output.                */
   uint8_t       cumulative;  /* Save cumulative distibution in output.   */
   char         *sigclipstr;  /* Multiple of sigma, and tolerance or num. */
@@ -63,6 +64,8 @@ struct statisticsparams
   uint8_t        maxbinone;  /* Set the maximum bin to 1.                */
 
   /* Internal */
+  uint8_t        needssort;  /* If sorting is needed.                    */
+  uint8_t        basicinfo;  /* Print the basic information.             */
   gal_data_t        *input;  /* Input data structure.                    */
   int               isfits;  /* Input is a FITS file.                    */
   int             hdu_type;  /* Type of HDU (image or table).            */
diff --git a/bin/statistics/statistics.c b/bin/statistics/statistics.c
index 83dad99..c08b0a5 100644
--- a/bin/statistics/statistics.c
+++ b/bin/statistics/statistics.c
@@ -97,46 +97,67 @@ reportsimplestats(struct statisticsparams *p)
 
 
 
-void
-ascii_hist(struct statisticsparams *p)
+static void
+print_ascii_plot(gal_data_t *plot, gal_data_t *bins, int h1_c0)
 {
-  size_t j;
-  size_t numbins=60, histheight=10;
-  float *bins, *sorted=p->input->array;
-  float quant=0.0f; /* histmin and histmax were already set before. */
-  int i, binonzero=0, normhist=0, maxhistone=1;
-
-  /* Find the histogram for the ASCII plot: */
-  gal_statistics_set_bins(sorted, p->input->size, numbins, 0, 0,
-                          binonzero, quant, &bins);
-  gal_statistics_histogram(sorted, p->input->size, bins, numbins,
-                           normhist, maxhistone);
-
-  /* It's maximum value is set to one. Multiply that by the desired
-     height in pixels. */
-  for(j=0;j<numbins;++j)
-    bins[j*2+1]*=histheight;
-
-  /* Plot the ASCII histogram: */
-  printf("\nRange of histogram: %g to %g\n\n", sorted[0],
-         sorted[p->input->size-1]);
-  for(i=histheight;i>=0;--i)
+  int i, j;
+  size_t height=10;
+  float *p, *b, *f, *ff, halfbinwidth;
+
+  /* Print the range so the user knows. */
+  b=bins->array;
+  halfbinwidth = (b[1]-b[0])/2;
+  printf("\n%s:\nX: (linear: %g -- %g)\nY: (linear)\n\n",
+         ( h1_c0 ? "Histogram" : "Cumulative frequency plot"),
+         b[0]-halfbinwidth, b[ bins->size - 1 ] + halfbinwidth);
+
+  /* The maximum values are set to one. So, multiply all the pixels by the
+     desired height in character-height-units. */
+  ff=(f=plot->array)+plot->size; do *f++ *= height; while(f<ff);
+
+  /* Print the ASCII plot: */
+  p=plot->array;
+  for(i=height;i>=0;--i)
     {
       printf("    |");
-      for(j=0;j<numbins;++j)
+      for(j=0;j<bins->size;++j)
         {
-          if(bins[j*2+1]>=((float)i-0.5f)
-             && bins[j*2+1]>0.0f) printf("*");
+          if(p[j]>=((float)i-0.5f) && p[j]>0.0f) printf("*");
           else printf(" ");
         }
       printf("\n");
     }
   printf("    |");
-  for(j=0;j<numbins;++j) printf("-");
+  for(j=0;j<bins->size;++j) printf("-");
   printf("\n\n");
+}
+
+
+
+
+static void
+ascii_plots(struct statisticsparams *p)
+{
+  size_t numbins=70;
+  gal_data_t *bins, *hist, *cfp;
+
+  /* Make the bins and the respective plot. */
+  bins=gal_statistics_regular_bins(p->input, NULL, numbins, NAN);
+  hist=gal_statistics_histogram(p->input, bins, 0, 1);
+  if(p->asciicfp)
+    {
+      bins->next=hist;
+      cfp=gal_statistics_cfp(p->input, bins, 1);
+    }
+
+  /* Print the plots. */
+  if(p->asciihist)  print_ascii_plot(hist, bins, 1);
+  if(p->asciicfp)   print_ascii_plot(cfp, bins, 0);
 
   /* Clean up.*/
-  free(bins);
+  gal_data_free(bins);
+  gal_data_free(hist);
+  if(p->asciicfp) gal_data_free(cfp);
 }
 
 
@@ -224,8 +245,22 @@ void
 statistics(struct statisticsparams *p)
 {
 
-  /* Print the ASCII histogram. */
-  if(p->asciihist) ascii_hist(p);
+  /* If none of the processes here are requested, return. */
+  if( p->asciihist==0
+      && p->asciicfp==0
+      && p->histogram==0
+      && p->cumulative==0
+      && p->sigclipstr==NULL
+      && isnan(p->mirrorquant) )
+    return;
+
+  /* For the main body of the program, we will assume the data is in
+     float32. */
+  p->input=gal_data_copy_to_new_type_free(p->input, GAL_DATA_TYPE_FLOAT32);
+
+  /* Print the ASCII histogram if requested. */
+  if(p->asciihist || p->asciicfp) ascii_plots(p);
+
 
 #if 0
   int r;
diff --git a/bin/statistics/ui.c b/bin/statistics/ui.c
index 735bf78..9fe149b 100644
--- a/bin/statistics/ui.c
+++ b/bin/statistics/ui.c
@@ -29,10 +29,11 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 
 #include <gnuastro/fits.h>
 #include <gnuastro/qsort.h>
-#include <gnuastro/table.h>
+#include <gnuastro/blank.h>
 #include <gnuastro/table.h>
 #include <gnuastro/arithmetic.h>
 #include <gnuastro/linkedlist.h>
+#include <gnuastro/statistics.h>
 
 #include <timing.h>
 #include <options.h>
@@ -115,6 +116,7 @@ enum option_keys_enum
   ARGS_OPTION_KEY_MINIMUM,
   ARGS_OPTION_KEY_MAXIMUM,
   ARGS_OPTION_KEY_SUM,
+  ARGS_OPTION_KEY_ASCIICFP,
   ARGS_OPTION_KEY_MIRROR,
   ARGS_OPTION_KEY_NUMBINS,
   ARGS_OPTION_KEY_LOWERBIN,
@@ -163,6 +165,7 @@ ui_initialize_options(struct statisticsparams *p,
   /* Program-specific initializers */
   p->lessthan            = NAN;
   p->onebinstart         = NAN;
+  p->mirrorquant         = NAN;
   p->greaterequal        = NAN;
   p->quantilerange       = NAN;
 
@@ -287,6 +290,7 @@ ui_read_check_only_options(struct statisticsparams *p)
      the output. */
   gal_table_check_fits_format(p->cp.output, p->cp.tableformat);
 
+
   /* If less than and greater than are both given, make sure that the value
      to greater than is smaller than the value to less-than. */
   if( !isnan(p->lessthan) && !isnan(p->greaterequal)
@@ -295,12 +299,27 @@ ui_read_check_only_options(struct statisticsparams *p)
           "than the value to `--greaterequal' (%g)", p->lessthan,
           p->greaterequal);
 
+
   /* Less-than and greater-equal cannot be called together with
      quantrange. */
   if( ( !isnan(p->lessthan) || !isnan(p->greaterequal) )
       && !isnan(p->quantilerange) )
     error(EXIT_FAILURE, 0, "`--lessthan' and/or `--greaterequal' cannot "
           "be called together with `--quantrange'");
+
+
+  /* The quantile range only makes sense with value less than 0.5. */
+  if( !isnan(p->quantilerange) && p->quantilerange>=0.5)
+    error(EXIT_FAILURE, 0, "%g>=0.5! The quantile range must be less than "
+          "0.5. Recall the the quantile (Q) range is defined to be: Q to "
+          "1-Q", p->quantilerange);
+
+
+  /* When binned outputs are requested, make sure that `numbins' is set. */
+  if(p->histogram || p->cumulative)
+    error(EXIT_FAILURE, 0, "`--numbins' isn't set. When the histogram or "
+          "cumulative frequency plots are requested, the number of bins "
+          "(`--numbins') is necessary");
 }
 
 
@@ -382,13 +401,9 @@ ui_check_options_and_arguments(struct statisticsparams *p)
 /***************       Preparations         *******************/
 /**************************************************************/
 static void
-ui_print_one_number(gal_data_t *data, int operator)
+ui_print_one_number(gal_data_t *out)
 {
-  char *toprint;
-  gal_data_t *out;
-  int flags=GAL_ARITHMETIC_NUMOK;
-  out=gal_arithmetic(operator, flags, data);
-  toprint=gal_data_write_to_string(out->array, out->type, 0);
+  char *toprint=gal_data_write_to_string(out->array, out->type, 0);
   printf("%s ", toprint);
   gal_data_free(out);
   free(toprint);
@@ -411,19 +426,19 @@ ui_print_one_row(struct statisticsparams *p)
     switch(tmp->v)
       {
       case ARGS_OPTION_KEY_NUMBER:
-        ui_print_one_number(p->input, GAL_ARITHMETIC_OP_NUMVAL); break;
+        ui_print_one_number( gal_statistics_number(p->input) );      break;
       case ARGS_OPTION_KEY_MINIMUM:
-        ui_print_one_number(p->input, GAL_ARITHMETIC_OP_MINVAL); break;
+        ui_print_one_number( gal_statistics_minimum(p->input) );     break;
       case ARGS_OPTION_KEY_MAXIMUM:
-        ui_print_one_number(p->input, GAL_ARITHMETIC_OP_MAXVAL); break;
+        ui_print_one_number( gal_statistics_maximum(p->input) );     break;
       case ARGS_OPTION_KEY_SUM:
-        ui_print_one_number(p->input, GAL_ARITHMETIC_OP_SUMVAL); break;
+        ui_print_one_number( gal_statistics_sum(p->input) );         break;
       case ARGS_OPTION_KEY_MEAN:
-        ui_print_one_number(p->input, GAL_ARITHMETIC_OP_MEANVAL); break;
+        ui_print_one_number( gal_statistics_mean(p->input) );        break;
       case ARGS_OPTION_KEY_STD:
-        ui_print_one_number(p->input, GAL_ARITHMETIC_OP_STDVAL); break;
+        ui_print_one_number( gal_statistics_std(p->input) );         break;
       case ARGS_OPTION_KEY_MEDIAN:
-        ui_print_one_number(p->input, GAL_ARITHMETIC_OP_MEDIANVAL); break;
+        ui_print_one_number( gal_statistics_median(p->input) );      break;
       case ARGS_OPTION_KEY_MODE:
         error(EXIT_FAILURE, 0, "mode isn't implemented yet!");
         break;
@@ -504,8 +519,7 @@ ui_preparations(struct statisticsparams *p)
 {
   gal_data_t *tmp;
   char *errorstring;
-  float *f, *ff, *fo;
-  size_t num, numcolmatches=0;
+  size_t numcolmatches=0;
   struct gal_linkedlist_stll *column=NULL;
 
   /* Read the input: no matter if its an image or a table column. */
@@ -534,33 +548,14 @@ ui_preparations(struct statisticsparams *p)
   /* Set the out-of-range values in the input to blank. */
   ui_out_of_range_to_blank(p);
 
-  /* Print the one-row numbers if the user asked for them. */
-  if(p->toprint) ui_print_one_row(p);
+  /* Only keep the numbers we want. */
+  gal_blank_remove(p->input);
 
-  /* For the main body of the program, we will assume the data is in
-     float32. */
-  p->input=gal_data_copy_to_new_type_free(p->input, GAL_DATA_TYPE_FLOAT32);
-
-  /* Put all the blank elements in the end for easier sorting */
-  ff=(fo=f=p->input->array)+p->input->size;
-  num=0; do if(!isnan(*f)) {*fo++=*f; ++num;} while(++f<ff);
-  f=((float *)(p->input->array))+num; do *f++=NAN; while(f<ff);
-
-  /* Sort the non-blank elements. */
-  qsort(p->input->array, num, sizeof(float), gal_qsort_float32_increasing);
-
-  /* For a check:
-  {
-    size_t i;
-    float *f=p->input->array;
-    for(i=0;i<p->input->size;++i) printf("%f\n", f[i]);
-  }
-  */
-
-  /* Remove the dimensionality of the array and correct its size to only be
-     the non-blank elements. */
-  p->input->ndim=1;
-  p->input->dsize[0]=p->input->size=num;
+  /* Print the one-row numbers if the user asked for them. We want this to
+     be done before converting the array to a float, since these operations
+     can be done on any type and if the user has just asked for these
+     operations, we don't want to waste their time for nothing. */
+  if(p->toprint) ui_print_one_row(p);
 }
 
 
diff --git a/lib/arithmetic.c b/lib/arithmetic.c
index f6f1c19..4490a3f 100644
--- a/lib/arithmetic.c
+++ b/lib/arithmetic.c
@@ -30,6 +30,7 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 
 #include <gnuastro/blank.h>
 #include <gnuastro/qsort.h>
+#include <gnuastro/statistics.h>
 #include <gnuastro/arithmetic.h>
 
 #include <arithmetic-binary.h>
@@ -259,34 +260,36 @@ arithmetic_check_float_input(gal_data_t *in, int 
operator, char *numstr)
 /***************             Unary functions              **************/
 /***********************************************************************/
 
-/* Note that for floating point types *b!=*b (by definition of NaN). And in
+/* Note that for floating point types b!=b (by definition of NaN). And in
    such cases, even if there are blank values, the smaller and larger
    condition checked will fail, therefore the final result will be what we
    want (to ignore the blank values). */
 #define UNIFUNC_MINVALUE(IT) {                                          \
     IT *oa=o->array;                                                    \
-    int blankeq = (*b==*b && gal_blank_present(in)) ? 1 : 0;            \
+    int blankeq = (b==b && gal_blank_present(in)) ? 1 : 0;              \
     if(blankeq)                                                         \
-      do if(*ia!=*b) *oa = *ia < *oa ? *ia : *oa; while(++ia<iaf);      \
+      do if(*ia!=b) *oa = *ia < *oa ? *ia : *oa; while(++ia<iaf);       \
     else                                                                \
       do *oa = *ia < *oa ? *ia : *oa; while(++ia<iaf);                  \
   }
 
+
 #define UNIFUNC_MAXVALUE(IT) {                                          \
     IT *oa=o->array;                                                    \
-    int blankeq = (*b==*b && gal_blank_present(in)) ? 1 : 0;            \
+    int blankeq = (b==b && gal_blank_present(in)) ? 1 : 0;              \
     if(blankeq)                                                         \
-      do if(*ia!=*b) *oa = *ia > *oa ? *ia : *oa; while(++ia<iaf);      \
+      do if(*ia!=b) *oa = *ia > *oa ? *ia : *oa; while(++ia<iaf);       \
     else                                                                \
       do *oa = *ia > *oa ? *ia : *oa; while(++ia<iaf);                  \
   }
 
+
 #define UNIFUNC_NUMVALUE {                                              \
     uint64_t *oa=o->array, num=0;                                       \
     if( gal_blank_present(in) )                                         \
       {                                                                 \
-        if(*b==*b) /* Is integer type. */                               \
-          do if(*ia!=*b)       ++num;               while(++ia<iaf);    \
+        if(b==b) /* Is integer type. */                                 \
+          do if(*ia!=b)       ++num;               while(++ia<iaf);     \
         else       /* Is float type with NaN blank.   */                \
           do if(*ia==*ia)      ++num;               while(++ia<iaf);    \
       }                                                                 \
@@ -294,13 +297,14 @@ arithmetic_check_float_input(gal_data_t *in, int 
operator, char *numstr)
     *oa=num;                                                            \
   }
 
+
 #define UNIFUNC_SUMVALUE {                                              \
     double sum=0.0f;                                                    \
     float *oa=o->array;                                                 \
     if( gal_blank_present(in) )                                         \
       {                                                                 \
-        if(*b==*b) /* Is integer type. */                               \
-          do if(*ia!=*b)              sum += *ia;   while(++ia<iaf);    \
+        if(b==b) /* Is integer type. */                                 \
+          do if(*ia!=b)              sum += *ia;   while(++ia<iaf);     \
         else       /* Is float type with NaN blank.   */                \
           do if(*ia==*ia)             sum += *ia;   while(++ia<iaf);    \
       }                                                                 \
@@ -309,14 +313,15 @@ arithmetic_check_float_input(gal_data_t *in, int 
operator, char *numstr)
     *oa=sum;                                                            \
   }
 
+
 #define UNIFUNC_MEANVALUE {                                             \
     int64_t num=0;                                                      \
     double sum=0.0f;                                                    \
     float *oa=o->array;                                                 \
     if( gal_blank_present(in) )                                         \
       {                                                                 \
-        if(*b==*b) /* Is integer type. */                               \
-          do if(*ia!=*b)     { ++num; sum += *ia; } while(++ia<iaf);    \
+        if(b==b) /* Is integer type. */                                 \
+          do if(*ia!=b)     { ++num; sum += *ia; } while(++ia<iaf);     \
         else       /* Is float type with NaN blank.   */                \
           do if(*ia==*ia)    { ++num; sum += *ia; } while(++ia<iaf);    \
       }                                                                 \
@@ -328,14 +333,15 @@ arithmetic_check_float_input(gal_data_t *in, int 
operator, char *numstr)
     *oa=sum/num;                                                        \
   }
 
+
 #define UNIFUNC_STDVALUE {                                              \
     int64_t n=0;                                                        \
     float *oa=o->array;                                                 \
     double s=0.0f, s2=0.0f;                                             \
     if( gal_blank_present(in) )                                         \
       {                                                                 \
-        if(*b==*b) /* Is integer type. */                               \
-          do if(*ia!=*b)                                                \
+        if(b==b) /* Is integer type. */                                 \
+          do if(*ia!=b)                                                 \
                { ++n; s += *ia; s2 += *ia * *ia; } while(++ia<iaf);     \
         else       /* Is float type with NaN blank.   */                \
           do if(*ia==*ia)                                               \
@@ -349,55 +355,61 @@ arithmetic_check_float_input(gal_data_t *in, int 
operator, char *numstr)
     *oa=sqrt( (s2-s*s/n)/n );                                           \
   }
 
-#define UNIFUNC_MEDIANVALUE(IT, QSORT_F) {                              \
+
+#define UNIFUNC_MEDIANVALUE(IT) {                                       \
     size_t n=0;                                                         \
-    IT *a, *noblank, *oa=o->array;                                      \
+    gal_data_t *noblank, *sorted;                                       \
+    IT *u=NULL, *s, *a, *oa=o->array;                                   \
                                                                         \
-    /* Prepare space for a clean (having no blanks) dataset. If the */  \
-    /* input array is to be freed later, then just use its own space */ \
-    if( flags & GAL_ARITHMETIC_FREE ) a=noblank=in->array;              \
-    else                                                                \
+    /* After this, there is no more blanks: only `noblank' is used.*/   \
+    if( gal_blank_present(in) )                                         \
       {                                                                 \
-        errno=0;                                                        \
-        a=noblank=malloc(in->size*sizeof *noblank);                     \
-        if(noblank==NULL)                                               \
-          error(EXIT_FAILURE, 0, "%zu bytes in UNIFUNC_MEDIANVALUE",    \
-                in->size*sizeof *noblank);                              \
+        if( flags & GAL_ARITHMETIC_FREE )                               \
+          {                                                             \
+            gal_blank_remove(in);                                       \
+            noblank=in;                                                 \
+          }                                                             \
+        else                                                            \
+          {                                                             \
+            u=a=gal_data_malloc_array(in->type, in->size);              \
+            if(b==b) do if (*ia!=b)   { *a++=*ia; ++n;} while(++ia<iaf); \
+            else     do if (*ia==*ia) { *a++=*ia; ++n;} while(++ia<iaf); \
+            noblank=gal_data_alloc(u, in->type, 1, &n, NULL, 0,         \
+                                   in->minmapsize, NULL, NULL, NULL);   \
+          }                                                             \
       }                                                                 \
+    else noblank=in;                                                    \
                                                                         \
-    /* Fill it in with the elements. */                                 \
-    if(gal_blank_present(in))                                           \
-      {                                                                 \
-        if(*b==*b) do if (*ia!=*b)  { *a++=*ia; ++n;} while(++ia<iaf);  \
-        else       do if (*ia==*ia) { *a++=*ia; ++n;} while(++ia<iaf);  \
-      }                                                                 \
+    /* After this, the array is sorted, only `sorted' is used. */       \
+    if( gal_statistics_is_sorted(noblank) ) sorted=noblank;             \
     else                                                                \
       {                                                                 \
-        n=in->size;                                                     \
-        if( (flags & GAL_ARITHMETIC_FREE)==0 )                          \
-          do *a++=*ia; while(++ia<iaf);                                 \
+        if( flags & GAL_ARITHMETIC_FREE ) sorted=noblank;               \
+        else                                                            \
+          {                                                             \
+            if(u) sorted=noblank;  /* New space is already allocated.*/ \
+            else  sorted=gal_data_copy(noblank);                        \
+          }                                                             \
+        gal_statistics_sort_increasing(sorted);                         \
       }                                                                 \
                                                                         \
-    /* Sort the array and put the median value in the output. */        \
-    qsort(noblank, n, sizeof *noblank, QSORT_F);                        \
-    *oa = n%2 ? noblank[n/2] : (noblank[n/2] + noblank[n/2-1])/2 ;      \
+    /* Find the median. */                                              \
+    n=sorted->size;                                                     \
+    s=sorted->array;                                                    \
+    *oa = (sorted->size)%2 ? s[n/2] : (s[n/2] + s[n/2-1])/2 ;           \
                                                                         \
-    /* Clean up. */                                                     \
-    if( (flags & GAL_ARITHMETIC_FREE)==0 ) free(noblank);               \
+    /* Clean up. If `sorted' doesn't equal `in', then it was */         \
+    /* allocated in this block and must be freed. */                    \
+    if( sorted!=in ) gal_data_free(sorted);                             \
   }
 
 
 
 
 
-#define UNIFUNC_RUN_FUNCTION_ON_ELEMENT(IT, OP){                        \
-    IT *ia=in->array, *oa=o->array, *iaf=ia + in->size;                 \
-    do *oa++ = OP(*ia++); while(ia<iaf);                                \
-  }
-
-#define UNIFUNC_RUN_FUNCTION_ON_ARRAY(IT, QSORT_F){                     \
-    IT *ia=in->array, *iaf=ia + in->size;                               \
-    IT *b = gal_blank_alloc_write(in->type);                            \
+#define UNIFUNC_RUN_FUNCTION_ON_ARRAY(IT){                              \
+    IT b, *ia=in->array, *iaf=ia + in->size;                            \
+    gal_blank_write(&b, in->type);                                      \
     switch(operator)                                                    \
       {                                                                 \
       case GAL_ARITHMETIC_OP_MINVAL:                                    \
@@ -419,13 +431,21 @@ arithmetic_check_float_input(gal_data_t *in, int 
operator, char *numstr)
         UNIFUNC_STDVALUE;                                               \
         break;                                                          \
       case GAL_ARITHMETIC_OP_MEDIANVAL:                                 \
-        UNIFUNC_MEDIANVALUE(IT, QSORT_F);                               \
+        UNIFUNC_MEDIANVALUE(IT);                                        \
         break;                                                          \
       default:                                                          \
         error(EXIT_FAILURE, 0, "the operator code %d is not "           \
               "recognized in UNIFUNC_RUN_FUNCTION_ON_ARRAY", operator); \
       }                                                                 \
-    free(b);                                                            \
+  }
+
+
+
+
+
+#define UNIFUNC_RUN_FUNCTION_ON_ELEMENT(IT, OP){                        \
+    IT *ia=in->array, *oa=o->array, *iaf=ia + in->size;                 \
+    do *oa++ = OP(*ia++); while(ia<iaf);                                \
   }
 
 
@@ -474,42 +494,42 @@ arithmetic_check_float_input(gal_data_t *in, int 
operator, char *numstr)
 
 
 
-#define UNIARY_FUNCTION_ON_ARRAY                                          \
-  switch(in->type)                                                        \
-    {                                                                     \
-    case GAL_DATA_TYPE_UINT8:                                             \
-      UNIFUNC_RUN_FUNCTION_ON_ARRAY(uint8_t, gal_qsort_uint8_increasing)  \
-      break;                                                              \
-    case GAL_DATA_TYPE_INT8:                                              \
-      UNIFUNC_RUN_FUNCTION_ON_ARRAY(int8_t, gal_qsort_int8_increasing)    \
-      break;                                                              \
-    case GAL_DATA_TYPE_UINT16:                                            \
-      UNIFUNC_RUN_FUNCTION_ON_ARRAY(uint16_t, gal_qsort_uint16_increasing)\
-      break;                                                              \
-    case GAL_DATA_TYPE_INT16:                                             \
-      UNIFUNC_RUN_FUNCTION_ON_ARRAY(int16_t, gal_qsort_int16_increasing)  \
-        break;                                                            \
-    case GAL_DATA_TYPE_UINT32:                                            \
-      UNIFUNC_RUN_FUNCTION_ON_ARRAY(uint32_t, gal_qsort_uint32_increasing)\
-        break;                                                            \
-    case GAL_DATA_TYPE_INT32:                                             \
-      UNIFUNC_RUN_FUNCTION_ON_ARRAY(int32_t, gal_qsort_int32_increasing)  \
-        break;                                                            \
-    case GAL_DATA_TYPE_UINT64:                                            \
-      UNIFUNC_RUN_FUNCTION_ON_ARRAY(uint64_t, gal_qsort_uint64_increasing)\
-        break;                                                            \
-    case GAL_DATA_TYPE_INT64:                                             \
-      UNIFUNC_RUN_FUNCTION_ON_ARRAY(int64_t, gal_qsort_int64_increasing)  \
-        break;                                                            \
-    case GAL_DATA_TYPE_FLOAT32:                                           \
-      UNIFUNC_RUN_FUNCTION_ON_ARRAY(float, gal_qsort_float32_increasing)  \
-      break;                                                              \
-    case GAL_DATA_TYPE_FLOAT64:                                           \
-      UNIFUNC_RUN_FUNCTION_ON_ARRAY(double, gal_qsort_float64_increasing) \
-        break;                                                            \
-    default:                                                              \
-      error(EXIT_FAILURE, 0, "type code %d not recognized in "            \
-            "`UNIFUNC_PER_ELEMENT'", in->type);                           \
+#define UNIARY_FUNCTION_ON_ARRAY                                        \
+  switch(in->type)                                                      \
+    {                                                                   \
+    case GAL_DATA_TYPE_UINT8:                                           \
+      UNIFUNC_RUN_FUNCTION_ON_ARRAY( uint8_t  );                        \
+      break;                                                            \
+    case GAL_DATA_TYPE_INT8:                                            \
+      UNIFUNC_RUN_FUNCTION_ON_ARRAY( int8_t   );                        \
+      break;                                                            \
+    case GAL_DATA_TYPE_UINT16:                                          \
+      UNIFUNC_RUN_FUNCTION_ON_ARRAY( uint16_t );                        \
+      break;                                                            \
+    case GAL_DATA_TYPE_INT16:                                           \
+      UNIFUNC_RUN_FUNCTION_ON_ARRAY( int16_t  );                        \
+      break;                                                            \
+    case GAL_DATA_TYPE_UINT32:                                          \
+      UNIFUNC_RUN_FUNCTION_ON_ARRAY( uint32_t );                        \
+      break;                                                            \
+    case GAL_DATA_TYPE_INT32:                                           \
+      UNIFUNC_RUN_FUNCTION_ON_ARRAY( int32_t );                         \
+      break;                                                            \
+    case GAL_DATA_TYPE_UINT64:                                          \
+      UNIFUNC_RUN_FUNCTION_ON_ARRAY( uint64_t );                        \
+      break;                                                            \
+    case GAL_DATA_TYPE_INT64:                                           \
+      UNIFUNC_RUN_FUNCTION_ON_ARRAY( int64_t  );                        \
+      break;                                                            \
+    case GAL_DATA_TYPE_FLOAT32:                                         \
+      UNIFUNC_RUN_FUNCTION_ON_ARRAY( float    );                        \
+      break;                                                            \
+    case GAL_DATA_TYPE_FLOAT64:                                         \
+      UNIFUNC_RUN_FUNCTION_ON_ARRAY( double   );                        \
+      break;                                                            \
+    default:                                                            \
+      error(EXIT_FAILURE, 0, "type code %d not recognized in "          \
+            "`UNIARY_FUNCTION_ON_ARRAY'", in->type);                    \
     }
 
 
@@ -804,14 +824,13 @@ arithmetic_binary_function_flt(int operator, unsigned 
char flags,
    data. */
 #define DO_WHERE_OPERATION(ITT, OT) {                                \
     ITT *it=iftrue->array;                                           \
-    OT *b, *o=out->array, *of=o+out->size;                           \
+    OT b, *o=out->array, *of=o+out->size;                            \
     if(iftrue->size==1)                                              \
       {                                                              \
         if( gal_blank_present(iftrue) )                              \
           {                                                          \
-            b=gal_blank_alloc_write(out->type);                      \
-            do { *o = *c++ ? *b : *o;        } while(++o<of);        \
-            free(b);                                                 \
+            gal_blank_write(&b, out->type);                          \
+            do { *o = *c++ ? b : *o;        } while(++o<of);         \
           }                                                          \
         else                                                         \
           do   { *o = *c++ ? *it : *o;       } while(++o<of);        \
@@ -959,10 +978,10 @@ arithmetic_where(unsigned char flags, gal_data_t *out, 
gal_data_t *cond,
       {                                                                 \
         p=max;                                                          \
         for(i=0;i<dnum;++i)  /* Loop over each array. */                \
-          {   /* Only for integer types, does *b==*b. */                \
-            if(hasblank[i] && *b==*b)                                   \
-              { if( *a[i] != *b ) p = *a[i] < p ? *a[i] : p;            \
-                else              p = *a[i] < p ? *a[i] : p; }          \
+          {   /* Only for integer types, does b==b. */                  \
+            if(hasblank[i] && b==b)                                     \
+              { if( *a[i] != b ) p = *a[i] < p ? *a[i] : p;             \
+                else             p = *a[i] < p ? *a[i] : p; }           \
             ++a[i];                                                     \
           }                                                             \
         *o++=p;                                                         \
@@ -981,10 +1000,10 @@ arithmetic_where(unsigned char flags, gal_data_t *out, 
gal_data_t *cond,
       {                                                                 \
         p=min;                                                          \
         for(i=0;i<dnum;++i)  /* Loop over each array. */                \
-          {   /* Only for integer types, does *b==*b. */                \
-            if(hasblank[i] && *b==*b)                                   \
-              { if( *a[i] != *b ) p = *a[i] > p ? *a[i] : p;            \
-                else              p = *a[i] > p ? *a[i] : p; }          \
+          {   /* Only for integer types, does b==b. */                  \
+            if(hasblank[i] && b==b)                                     \
+              { if( *a[i] != b ) p = *a[i] > p ? *a[i] : p;             \
+                else             p = *a[i] > p ? *a[i] : p; }           \
             ++a[i];                                                     \
           }                                                             \
         *o++=p;                                                         \
@@ -1001,14 +1020,14 @@ arithmetic_where(unsigned char flags, gal_data_t *out, 
gal_data_t *cond,
     do    /* Loop over each pixel */                                    \
       {                                                                 \
         n=0;                                                            \
-        use=1;                                                          \
         for(i=0;i<dnum;++i)  /* Loop over each array. */                \
           {                                                             \
             /* Only integers and non-NaN floats: v==v is 1. */          \
             if(hasblank[i])                                             \
-              use = ( *b==*b                                            \
-                      ? ( *a[i]!=*b    ? 1 : 0 )          /* Integer */ \
+              use = ( b==b                                              \
+                      ? ( *a[i]!=b     ? 1 : 0 )          /* Integer */ \
                       : ( *a[i]==*a[i] ? 1 : 0 ) );       /* Float   */ \
+            else use=1;                                                 \
                                                                         \
             /* a[i] must be incremented to next pixel in any case. */   \
             if(use) ++n; else ++a[i];                                   \
@@ -1028,20 +1047,20 @@ arithmetic_where(unsigned char flags, gal_data_t *out, 
gal_data_t *cond,
     do    /* Loop over each pixel */                                    \
       {                                                                 \
         n=0;                                                            \
-        use=1;                                                          \
         sum=0.0f;                                                       \
         for(i=0;i<dnum;++i)  /* Loop over each array. */                \
           {                                                             \
             /* Only integers and non-NaN floats: v==v is 1. */          \
             if(hasblank[i])                                             \
-              use = ( *b==*b                                            \
-                      ? ( *a[i]!=*b    ? 1 : 0 )          /* Integer */ \
+              use = ( b==b                                              \
+                      ? ( *a[i]!=b     ? 1 : 0 )          /* Integer */ \
                       : ( *a[i]==*a[i] ? 1 : 0 ) );       /* Float   */ \
+            else use=1;                                                 \
                                                                         \
             /* a[i] must be incremented to next pixel in any case. */   \
             if(use) { sum += *a[i]++; ++n; } else ++a[i];               \
           }                                                             \
-        *o++ = n ? sum : *b;                                            \
+        *o++ = n ? sum : b;                                             \
       }                                                                 \
     while(o<of);                                                        \
   }
@@ -1056,20 +1075,20 @@ arithmetic_where(unsigned char flags, gal_data_t *out, 
gal_data_t *cond,
     do    /* Loop over each pixel */                                    \
       {                                                                 \
         n=0;                                                            \
-        use=1;                                                          \
         sum=0.0f;                                                       \
         for(i=0;i<dnum;++i)  /* Loop over each array. */                \
           {                                                             \
             /* Only integers and non-NaN floats: v==v is 1. */          \
             if(hasblank[i])                                             \
-              use = ( *b==*b                                            \
-                      ? ( *a[i]!=*b    ? 1 : 0 )          /* Integer */ \
+              use = ( b==b                                              \
+                      ? ( *a[i]!=b     ? 1 : 0 )          /* Integer */ \
                       : ( *a[i]==*a[i] ? 1 : 0 ) );       /* Float   */ \
+            else use=1;                                                 \
                                                                         \
             /* a[i] must be incremented to next pixel in any case. */   \
             if(use) { sum += *a[i]++; ++n; } else ++a[i];               \
           }                                                             \
-        *o++ = n ? sum/n : *b;                                          \
+        *o++ = n ? sum/n : b;                                           \
       }                                                                 \
     while(o<of);                                                        \
   }
@@ -1084,15 +1103,15 @@ arithmetic_where(unsigned char flags, gal_data_t *out, 
gal_data_t *cond,
     do    /* Loop over each pixel */                                    \
       {                                                                 \
         n=0;                                                            \
-        use=1;                                                          \
         sum=sum2=0.0f;                                                  \
         for(i=0;i<dnum;++i)  /* Loop over each array. */                \
           {                                                             \
             /* Only integers and non-NaN floats: v==v is 1. */          \
             if(hasblank[i])                                             \
-              use = ( *b==*b                                            \
-                      ? ( *a[i]!=*b    ? 1 : 0 )          /* Integer */ \
+              use = ( b==b                                              \
+                      ? ( *a[i]!=b     ? 1 : 0 )          /* Integer */ \
                       : ( *a[i]==*a[i] ? 1 : 0 ) );       /* Float   */ \
+            else use=1;                                                 \
                                                                         \
             /* a[i] must be incremented to next pixel in any case. */   \
             if(use)                                                     \
@@ -1103,7 +1122,7 @@ arithmetic_where(unsigned char flags, gal_data_t *out, 
gal_data_t *cond,
               }                                                         \
             else ++a[i];                                                \
           }                                                             \
-        *o++ = n ? sqrt( (sum2-sum*sum/n)/n ) : *b;                     \
+        *o++ = n ? sqrt( (sum2-sum*sum/n)/n ) : b;                      \
       }                                                                 \
     while(o<of);                                                        \
   }
@@ -1113,26 +1132,23 @@ arithmetic_where(unsigned char flags, gal_data_t *out, 
gal_data_t *cond,
 
 
 #define MULTIOPERAND_MEDIAN(TYPE, QSORT_F) {                            \
-    TYPE *pixs;                                                         \
     int n, use;                                                         \
+    TYPE *pixs=gal_data_malloc_array(list->type, dnum);                 \
                                                                         \
-    errno=0;                                                            \
-    pixs=malloc(dnum*sizeof *pixs);                                     \
-    if(pixs==NULL)                                                      \
-      error(EXIT_FAILURE, 0, "%zu bytes in MULTIOPERAND_MEDIAN",        \
-            dnum*sizeof *pixs);                                         \
-                                                                        \
-    do    /* Loop over each pixel */                                    \
+    /* Loop over each pixel */                                          \
+    do                                                                  \
       {                                                                 \
         n=0;                                                            \
-        use=1;                                                          \
-        for(i=0;i<dnum;++i)  /* Loop over each array. */                \
+                                                                        \
+        /* Loop over each array. */                                     \
+        for(i=0;i<dnum;++i)                                             \
           {                                                             \
             /* Only integers and non-NaN floats: v==v is 1. */          \
             if(hasblank[i])                                             \
-              use = ( *b==*b                                            \
-                      ? ( *a[i]!=*b    ? 1 : 0 )          /* Integer */ \
-                      : ( *a[i]==*a[i] ? 1 : 0 ) );       /* Float   */ \
+              use = ( b==b                                              \
+                      ? ( *a[i]!=b     ? 1 : 0 )        /* Integer */   \
+                      : ( *a[i]==*a[i] ? 1 : 0 ) );     /* Float   */   \
+            else use=1;                                                 \
                                                                         \
             /* a[i] must be incremented to next pixel in any case. */   \
             if(use) pixs[n++]=*a[i]++; else ++a[i];                     \
@@ -1145,9 +1161,12 @@ arithmetic_where(unsigned char flags, gal_data_t *out, 
gal_data_t *cond,
             *o++ = n%2 ? pixs[n/2] : (pixs[n/2] + pixs[n/2-1])/2 ;      \
           }                                                             \
         else                                                            \
-          *o++=*b;                                                      \
+          *o++=b;                                                       \
       }                                                                 \
     while(o<of);                                                        \
+                                                                        \
+    /* Clean up. */                                                     \
+    free(pixs);                                                         \
   }
 
 
@@ -1155,8 +1174,7 @@ arithmetic_where(unsigned char flags, gal_data_t *out, 
gal_data_t *cond,
 
 
 #define MULTIOPERAND_TYPE_SET(TYPE, QSORT_F) {                          \
-    TYPE *o=out->array, *of=out->array+out->size;                       \
-    TYPE **a, *b=gal_blank_alloc_write(list->type);                     \
+    TYPE b, **a, *o=out->array, *of=out->array+out->size;               \
                                                                         \
     /* Allocate space to keep the pointers to the arrays of each. */    \
     /* Input data structure. The operators will increment these */      \
@@ -1167,7 +1185,8 @@ arithmetic_where(unsigned char flags, gal_data_t *out, 
gal_data_t *cond,
       error(EXIT_FAILURE, 0, "%zu bytes for `arrays' in "               \
             "MULTIOPERAND_TYPE_SET", dnum*sizeof *a);                   \
                                                                         \
-    /* Fill in the array pointers. */                                   \
+    /* Fill in the array pointers and the blank value for this type. */ \
+    gal_blank_write(&b, list->type);                                    \
     for(tmp=list;tmp!=NULL;tmp=tmp->next)                               \
       a[i++]=tmp->array;                                                \
                                                                         \
@@ -1208,7 +1227,6 @@ arithmetic_where(unsigned char flags, gal_data_t *out, 
gal_data_t *cond,
       }                                                                 \
                                                                         \
     /* Clean up. */                                                     \
-    free(b);                                                            \
     free(a);                                                            \
   }
 
@@ -1222,7 +1240,7 @@ arithmetic_where(unsigned char flags, gal_data_t *out, 
gal_data_t *cond,
 static gal_data_t *
 arithmetic_multioperand(int operator, unsigned char flags, gal_data_t *list)
 {
-  int *hasblank;
+  uint8_t *hasblank;
   size_t i=0, dnum=1;
   gal_data_t *out, *tmp, *ttmp;
 
@@ -1261,15 +1279,9 @@ arithmetic_multioperand(int operator, unsigned char 
flags, gal_data_t *list)
                          list->wcs, 0, list->minmapsize, NULL, NULL, NULL);
 
 
-  /* hasblank is used to see if a blank value should be checked or not. */
-  errno=0;
-  hasblank=malloc(dnum*sizeof *hasblank);
-  if(hasblank==NULL)
-    error(EXIT_FAILURE, 0, "%zu bytes for hasblank in "
-          "`arithmetic_multioperand", dnum*sizeof *hasblank);
-
-
-  /* Fill in the hasblank array. */
+  /* hasblank is used to see if a blank value should be checked for each
+     list element or not. */
+  hasblank=gal_data_malloc_array(GAL_DATA_TYPE_UINT8, dnum);
   for(tmp=list;tmp!=NULL;tmp=tmp->next)
     hasblank[i++]=gal_blank_present(tmp);
 
diff --git a/lib/blank.c b/lib/blank.c
index 4474a09..e4a64e6 100644
--- a/lib/blank.c
+++ b/lib/blank.c
@@ -400,3 +400,53 @@ gal_blank_flag(gal_data_t *data)
   /* Return */
   return out;
 }
+
+
+
+
+
+#define BLANK_REMOVE(IT) {                                              \
+    IT b, *a=data->array, *af=a+data->size, *o=data->array;             \
+    if( gal_blank_present(data) )                                       \
+      {                                                                 \
+        gal_blank_write(&b, data->type);                                \
+        if(b==b)          /* Can be checked with equal: isn't NaN */    \
+          do if(*a!=b)  { ++num; *o++=*a; } while(++a<af);              \
+        else /* Blank value is NaN, so equals will fail on blank elements*/ \
+          do if(*a==*a) { ++num; *o++=*a; } while(++a<af);              \
+      }                                                                 \
+    else num=data->size;                                                \
+  }
+
+
+/* Remove blank elements from a dataset, convert it to a 1D dataset and
+   adjust the size properly. In practice this function doesn't `realloc'
+   the input array, all it does is to shift the blank eleemnts to the end
+   and adjust the size elements of the `gal_data_t'. */
+void
+gal_blank_remove(gal_data_t *data)
+{
+  size_t num=0;
+
+  /* Shift all non-blank elements to the start of the array. */
+  switch(data->type)
+    {
+    case GAL_DATA_TYPE_UINT8:    BLANK_REMOVE( uint8_t  );    break;
+    case GAL_DATA_TYPE_INT8:     BLANK_REMOVE( int8_t   );    break;
+    case GAL_DATA_TYPE_UINT16:   BLANK_REMOVE( uint16_t );    break;
+    case GAL_DATA_TYPE_INT16:    BLANK_REMOVE( int16_t  );    break;
+    case GAL_DATA_TYPE_UINT32:   BLANK_REMOVE( uint32_t );    break;
+    case GAL_DATA_TYPE_INT32:    BLANK_REMOVE( int32_t  );    break;
+    case GAL_DATA_TYPE_UINT64:   BLANK_REMOVE( uint64_t );    break;
+    case GAL_DATA_TYPE_INT64:    BLANK_REMOVE( int64_t  );    break;
+    case GAL_DATA_TYPE_FLOAT32:  BLANK_REMOVE( float    );    break;
+    case GAL_DATA_TYPE_FLOAT64:  BLANK_REMOVE( double   );    break;
+    default:
+      error(EXIT_FAILURE, 0, "type code %d not recognized in "
+            "`gal_blank_remove'", data->type);
+    }
+
+  /* Adjust the size elements of the dataset. */
+  data->ndim=1;
+  data->dsize[0]=data->size=num;
+}
diff --git a/lib/gnuastro/blank.h b/lib/gnuastro/blank.h
index 2ee0716..e35110e 100644
--- a/lib/gnuastro/blank.h
+++ b/lib/gnuastro/blank.h
@@ -82,6 +82,8 @@ gal_blank_present(gal_data_t *data);
 gal_data_t *
 gal_blank_flag(gal_data_t *data);
 
+void
+gal_blank_remove(gal_data_t *data);
 
 
 
diff --git a/lib/gnuastro/statistics.h b/lib/gnuastro/statistics.h
index ce798f2..34857f4 100644
--- a/lib/gnuastro/statistics.h
+++ b/lib/gnuastro/statistics.h
@@ -25,7 +25,7 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 
 /* Include other headers if necessary here. Note that other header files
    must be included before the C++ preparations below */
-
+#include <gnuastro/data.h>
 
 
 /* C++ Preparations */
@@ -51,139 +51,69 @@ __BEGIN_C_DECLS  /* From C++ preparations */
 #define GAL_STATISTICS_MAX_SIG_CLIP_CONVERGE 50
 
 
-/****************************************************************
- *****************    Mininum and Maximum    ********************
- ****************************************************************/
-void
-gal_statistics_long_non_blank_min(long *in, size_t size, long *min,
-                                  long blank);
-
-void
-gal_statistics_long_non_blank_max(long *in, size_t size, long *max,
-                                  long blank);
-
-void
-gal_statistics_float_min(float *in, size_t size, float *min);
-
-void
-gal_statistics_float_max(float *in, size_t size, float *max);
-
-void
-gal_statistics_double_min(double *in, size_t size, double *min);
-
-double
-gal_statistics_double_min_return(double *in, size_t size);
-
-void
-gal_statistics_double_max(double *in, size_t size, double *max);
-
-double
-gal_statistics_double_max_return(double *in, size_t size);
-
-void
-gal_statistics_float_max_masked(float *in, unsigned char *mask, size_t size,
-                                float *max);
-
-void
-gal_statistics_float_second_max(float *in, size_t size, float *secondmax);
 
-void
-gal_statistics_float_second_min(float *in, size_t size, float *secondmin);
 
-void
-gal_statistics_f_min_max(float *in, size_t size, float *min, float *max);
 
-void
-gal_statistics_d_min_max(double *in, size_t size, double *min, double *max);
-
-void
-gal_statistics_d_max_with_index(double *in, size_t size, double *max,
-                                size_t *index);
-
-void
-gal_statistics_f_max_with_index(float *in, size_t size, float *max,
-                                size_t *index);
-
-void
-gal_statistics_d_min_with_index(double *in, size_t size, double *min,
-                                size_t *index);
+/* Enumerators */
+enum is_sorted_outputs
+{
+  GAL_STATISTICS_SORTED_NOT,             /* ==0 by C standard. */
 
-void
-gal_statistics_f_min_with_index(float *in, size_t size, float *min,
-                                size_t *index);
+  GAL_STATISTICS_SORTED_INCREASING,
+  GAL_STATISTICS_SORTED_DECREASING,
+};
 
 
+enum bin_status
+{
+  GAL_STATISTICS_BINS_INVALID,           /* ==0 by C standard.  */
 
+  GAL_STATISTICS_BINS_REGULAR,
+  GAL_STATISTICS_BINS_IRREGULAR,
+};
 
 
 /****************************************************************
- *****************            Sum            ********************
+ ********               Simple statistics                 *******
+ ****        (wrappers for functions in `arithmetic.h')      ****
  ****************************************************************/
-float
-gal_statistics_float_sum(float *in, size_t size);
+gal_data_t *
+gal_statistics_number(gal_data_t *data);
 
-float
-gal_statistics_float_sum_num(float *in, size_t *size);
+gal_data_t *
+gal_statistics_minimum(gal_data_t *data);
 
-float
-gal_statistics_float_sum_squared(float *in, size_t size);
+gal_data_t *
+gal_statistics_maximum(gal_data_t *data);
 
-float
-gal_statistics_float_sum_mask(float *in, unsigned char *mask, size_t size,
-                              size_t *nsize);
+gal_data_t *
+gal_statistics_sum(gal_data_t *data);
 
-float
-gal_statistics_float_sum_mask_l(float *in, long *mask, size_t size,
-                                size_t *nsize);
+gal_data_t *
+gal_statistics_mean(gal_data_t *data);
 
-float
-gal_statistics_float_sum_squared_mask(float *in, unsigned char *mask,
-                                      size_t size, size_t *nsize);
+gal_data_t *
+gal_statistics_std(gal_data_t *data);
 
-float
-gal_statistics_float_sum_squared_mask_l(float *in, long *mask,
-                                        size_t size, size_t *nsize);
+gal_data_t *
+gal_statistics_median(gal_data_t *data);
 
 
 
 
 
 /****************************************************************
- *****************      Average and          ********************
- ****************    Standard deviation      ********************
+ ********                      Sort                       *******
  ****************************************************************/
-float
-gal_statistics_float_average(float *in, size_t size);
-
-double
-gal_statistics_double_average(double *in, size_t size);
-
-void
-gal_statistics_f_ave(float *in, size_t size, float *ave, unsigned char *mask);
 
-void
-gal_statistics_f_ave_l(float *in, size_t size, float *ave, long *mask);
+int
+gal_statistics_is_sorted(gal_data_t *data);
 
 void
-gal_statistics_f_ave_std(float *in, size_t size, float *ave,
-                         float *std, unsigned char *mask);
+gal_statistics_sort_increasing(gal_data_t *data);
 
 void
-gal_statistics_f_ave_std_l(float *in, size_t size, float *ave,
-                           float *std, long *mask);
-
-
-
-
-
-/****************************************************************
- *****************           Median            ******************
- ****************************************************************/
-float
-gal_statistics_median(float *array, size_t insize);
-
-double
-gal_statistics_median_double_in_place(double *array, size_t insize);
+gal_statistics_sort_decreasing(gal_data_t *data);
 
 
 
@@ -192,22 +122,17 @@ gal_statistics_median_double_in_place(double *array, 
size_t insize);
 /****************************************************************
  ********     Histogram and Cumulative Frequency Plot     *******
  ****************************************************************/
-void
-gal_statistics_set_bins(float *sorted, size_t size, size_t numbins,
-                        float min, float max, float onebinvalue,
-                        float quant, float **obins);
+gal_data_t *
+gal_statistics_regular_bins(gal_data_t *data, gal_data_t *range,
+                            size_t numbins, float onebinstart);
 
-void
-gal_statistics_histogram(float *sorted, size_t size, float *bins,
-                         size_t numbins, int normhist, int maxhistone);
+gal_data_t *
+gal_statistics_histogram(gal_data_t *data, gal_data_t *bins,
+                         int normalize, int maxhistone);
 
-void
-gal_statistics_cumulative_fp(float *sorted, size_t size, float *bins,
-                             size_t numbins, int normcfp);
+gal_data_t *
+gal_statistics_cfp(gal_data_t *data, gal_data_t *bins, int normalize);
 
-void
-gal_statistics_save_hist(float *sorted, size_t size, size_t numbins,
-                         char *filename, char *comment);
 
 
 
diff --git a/lib/qsort.c b/lib/qsort.c
index d02a892..b06981a 100644
--- a/lib/qsort.c
+++ b/lib/qsort.c
@@ -86,7 +86,7 @@ gal_qsort_uint16_increasing(const void *a, const void *b)
 }
 
 int
-gal_qsort_in16_decreasing(const void *a, const void *b)
+gal_qsort_int16_decreasing(const void *a, const void *b)
 {
   return ( *(int16_t *)b - *(int16_t *)a );
 }
diff --git a/lib/statistics.c b/lib/statistics.c
index 4d26261..1c51fe7 100644
--- a/lib/statistics.c
+++ b/lib/statistics.c
@@ -32,8 +32,11 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 
 #include <gnuastro/data.h>
 #include <gnuastro/qsort.h>
+#include <gnuastro/arithmetic.h>
 #include <gnuastro/statistics.h>
 
+#include <checkset.h>
+
 #include "mode.h"
 
 
@@ -41,267 +44,584 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 
 
 
+
+
+
+
+/****************************************************************
+ ********               Simple statistics                 *******
+ ****        (wrappers for functions in `arithmetic.h')      ****
+ ****************************************************************/
+#define SIMP_FLAGS GAL_ARITHMETIC_NUMOK
+
+gal_data_t *
+gal_statistics_number(gal_data_t *data)
+{
+  return gal_arithmetic(GAL_ARITHMETIC_OP_NUMVAL, SIMP_FLAGS, data);
+}
+
+gal_data_t *
+gal_statistics_minimum(gal_data_t *data)
+{
+  return gal_arithmetic(GAL_ARITHMETIC_OP_MINVAL, SIMP_FLAGS, data);
+}
+
+gal_data_t *
+gal_statistics_maximum(gal_data_t *data)
+{
+  return gal_arithmetic(GAL_ARITHMETIC_OP_MAXVAL, SIMP_FLAGS, data);
+}
+
+gal_data_t *
+gal_statistics_sum(gal_data_t *data)
+{
+  return gal_arithmetic(GAL_ARITHMETIC_OP_SUMVAL, SIMP_FLAGS, data);
+}
+
+gal_data_t *
+gal_statistics_mean(gal_data_t *data)
+{
+  return gal_arithmetic(GAL_ARITHMETIC_OP_MEANVAL, SIMP_FLAGS, data);
+}
+
+gal_data_t *
+gal_statistics_std(gal_data_t *data)
+{
+  return gal_arithmetic(GAL_ARITHMETIC_OP_STDVAL, SIMP_FLAGS, data);
+}
+
+gal_data_t *
+gal_statistics_median(gal_data_t *data)
+{
+  return gal_arithmetic(GAL_ARITHMETIC_OP_MEDIANVAL, SIMP_FLAGS, data);
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 /****************************************************************
  ********                      Sort                       *******
  ****************************************************************/
+/* Check if the given dataset is sorted. Output values are:
+
+     - 0: Dataset is not sorted.
+     - 1: Dataset is sorted and increasing or equal.
+     - 2: dataset is sorted and decreasing.                  */
+
+#define IS_SORTED(IT) {                                                 \
+    IT *aa=data->array, *a=data->array, *af=a+data->size-1;             \
+    if(a[1]>=a[0]) do if( *(a+1) < *a ) break; while(++a<af);           \
+    else           do if( *(a+1) > *a ) break; while(++a<af);           \
+    return ( a==af                                                      \
+             ? ( aa[1]>=aa[0]                                           \
+                 ? GAL_STATISTICS_SORTED_INCREASING                     \
+                 : GAL_STATISTICS_SORTED_DECREASING )                   \
+             : GAL_STATISTICS_SORTED_NOT );                             \
+  }
+
+int
+gal_statistics_is_sorted(gal_data_t *data)
+{
+  /* A one-element dataset can be considered, sorted, so we'll just return
+     1 (for sorted and increasing). */
+  if(data->size==1) return GAL_STATISTICS_SORTED_INCREASING;
+
+  /* Do the check. */
+  switch(data->type)
+    {
+    case GAL_DATA_TYPE_UINT8:     IS_SORTED( uint8_t  );    break;
+    case GAL_DATA_TYPE_INT8:      IS_SORTED( int8_t   );    break;
+    case GAL_DATA_TYPE_UINT16:    IS_SORTED( uint16_t );    break;
+    case GAL_DATA_TYPE_INT16:     IS_SORTED( int16_t  );    break;
+    case GAL_DATA_TYPE_UINT32:    IS_SORTED( uint32_t );    break;
+    case GAL_DATA_TYPE_INT32:     IS_SORTED( int32_t  );    break;
+    case GAL_DATA_TYPE_UINT64:    IS_SORTED( uint64_t );    break;
+    case GAL_DATA_TYPE_INT64:     IS_SORTED( int64_t  );    break;
+    case GAL_DATA_TYPE_FLOAT32:   IS_SORTED( float    );    break;
+    case GAL_DATA_TYPE_FLOAT64:   IS_SORTED( double   );    break;
+    default:
+      error(EXIT_FAILURE, 0, "type code %d not recognized in "
+            "`gal_statistics_is_sorted'", data->type);
+    }
+
+  /* Control shouldn't reach this point. */
+  error(EXIT_FAILURE, 0, "a bug! Please contact us at %s so we can fix the "
+        "problem. For some reason, control has reached the end of "
+        "`gal_statistics_is_sorted'", PACKAGE_BUGREPORT);
+  return -1;
+}
+
+
+
+
+
+/* This function is ignorant to blank values, if you want to make sure
+   there is no blank values, you can call `gal_blank_remove' first. */
+#define STATISTICS_SORT(QSORT_F) {                                        \
+    qsort(data->array, data->size, gal_data_sizeof(data->type), QSORT_F); \
+  }
 void
-gal_statistics_sort()
+gal_statistics_sort_increasing(gal_data_t *data)
 {
+  switch(data->type)
+    {
+    case GAL_DATA_TYPE_UINT8:
+      STATISTICS_SORT(gal_qsort_uint8_increasing);    break;
+    case GAL_DATA_TYPE_INT8:
+      STATISTICS_SORT(gal_qsort_int8_increasing);     break;
+    case GAL_DATA_TYPE_UINT16:
+      STATISTICS_SORT(gal_qsort_uint16_increasing);   break;
+    case GAL_DATA_TYPE_INT16:
+      STATISTICS_SORT(gal_qsort_int16_increasing);    break;
+    case GAL_DATA_TYPE_UINT32:
+      STATISTICS_SORT(gal_qsort_uint32_increasing);   break;
+    case GAL_DATA_TYPE_INT32:
+      STATISTICS_SORT(gal_qsort_int32_increasing);    break;
+    case GAL_DATA_TYPE_UINT64:
+      STATISTICS_SORT(gal_qsort_uint64_increasing);   break;
+    case GAL_DATA_TYPE_INT64:
+      STATISTICS_SORT(gal_qsort_int64_increasing);    break;
+    case GAL_DATA_TYPE_FLOAT32:
+      STATISTICS_SORT(gal_qsort_float32_increasing);  break;
+    case GAL_DATA_TYPE_FLOAT64:
+      STATISTICS_SORT(gal_qsort_float64_increasing);  break;
+    default:
+      error(EXIT_FAILURE, 0, "type code %d not recognized in "
+            "`gal_statistics_sort_increasing'", data->type);
+    }
+}
 
+
+
+
+
+/* See explanations above `gal_statistics_sort_increasing'. */
+void
+gal_statistics_sort_decreasing(gal_data_t *data)
+{
+  switch(data->type)
+    {
+    case GAL_DATA_TYPE_UINT8:
+      STATISTICS_SORT(gal_qsort_uint8_decreasing);    break;
+    case GAL_DATA_TYPE_INT8:
+      STATISTICS_SORT(gal_qsort_int8_decreasing);     break;
+    case GAL_DATA_TYPE_UINT16:
+      STATISTICS_SORT(gal_qsort_uint16_decreasing);   break;
+    case GAL_DATA_TYPE_INT16:
+      STATISTICS_SORT(gal_qsort_int16_decreasing);    break;
+    case GAL_DATA_TYPE_UINT32:
+      STATISTICS_SORT(gal_qsort_uint32_decreasing);   break;
+    case GAL_DATA_TYPE_INT32:
+      STATISTICS_SORT(gal_qsort_int32_decreasing);    break;
+    case GAL_DATA_TYPE_UINT64:
+      STATISTICS_SORT(gal_qsort_uint64_decreasing);   break;
+    case GAL_DATA_TYPE_INT64:
+      STATISTICS_SORT(gal_qsort_int64_decreasing);    break;
+    case GAL_DATA_TYPE_FLOAT32:
+      STATISTICS_SORT(gal_qsort_float32_decreasing);  break;
+    case GAL_DATA_TYPE_FLOAT64:
+      STATISTICS_SORT(gal_qsort_float64_decreasing);  break;
+    default:
+      error(EXIT_FAILURE, 0, "type code %d not recognized in "
+            "`gal_statistics_sort_decreasing'", data->type);
+    }
 }
 
 
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 /****************************************************************
  ********     Histogram and Cumulative Frequency Plot     *******
  ****************************************************************/
-/* Set the bin lower values for all the bins. If the minimum and
-   maximum are equal, then use the quantile.
+/* Generate an array of regularly spaced elements. For a 1D dataset, the
+   output will be 1D, for 2D, it will be 2D.
 
-   If the value of onebinvalue is NaN, then nothing will happen,
-   however, if it is not a NaN, all the bins will be shifted such that
-   the lower values of one of the bins is placed on this value (if it
-   is in the range of the data).
-*/
-void
-gal_statistics_set_bins(float *sorted, size_t size, size_t numbins,
-                        float min, float max, float onebinvalue,
-                        float quant, float **obins)
-{
-  size_t i;
-  float diff, *bins, binwidth;
+   Input arguments:
 
-  /* Allocate space for the array. The extra bin is only for internal
-     purposes (so the loops for the histogram and CFP can see the end
-     of the last bin). It will never be seen by the user. */
-  errno=0;
-  bins=*obins=calloc((numbins+1)*2, sizeof *bins);
-  if(bins==NULL)
-    error(EXIT_FAILURE, errno, "%zu bytes for bins in gal_statistics_set_bins "
-          "(statistics.c)", (numbins+1)*2*sizeof *bins);
+     * The `data' set you want to apply the bins to. This is only necessary
+       if the range argument is not complete, see below. If `range' has all
+       the necessary information, you can pass a NULL pointer for `data'.
+
+     * The `range' data structure keeps the desired range along each
+       dimension of the input data structure, it has to be in float32
+       type. Note that if
 
-  /* If the range is not defined, find it and set the bin width. */
-  if(min==max)
+         - If you want the full range of the dataset (in any dimensions,
+           then just set `range' to NULL and the range will be specified
+           from the minimum and maximum value of the dataset.
+
+         - If there is one element for each dimension in range, then it is
+           viewed as a quantile (Q), and the range will be: `Q to 1-Q'.
+
+         - If there are two elements for each dimension in range, then they
+           are assumed to be your desired minimum and maximum values. When
+           either of the two are NaN, the minimum and maximum will be
+           calculated for it.
+
+     * The number of bins: must be larger than 0.
+
+     * `onebinstart' A desired value for onebinstart. Note that with this
+        option, the bins won't start and end exactly on the given range
+        values, it will be slightly shifted to accommodate this
+        request.  */
+gal_data_t *
+gal_statistics_regular_bins(gal_data_t *data, gal_data_t *range,
+                            size_t numbins, float onebinstart)
+{
+  size_t i;
+  gal_data_t *bins, *tmp;
+  float *b, *ra, min, max, hbw, diff, binwidth;
+
+
+  /* Some sanity checks. */
+  if(numbins==0)
+    error(EXIT_FAILURE, 0, "`numbins' in `gal_statistics_regular_bins' "
+          "cannot be given a value of 0");
+  if(range && range->type!=GAL_DATA_TYPE_FLOAT32)
+    error(EXIT_FAILURE, 0, "gal_statistics_regular_bins currently on works "
+          "on ranges of type float32");
+  if(data->ndim!=1)
+    error(EXIT_FAILURE, 0, "gal_statistics_regular_bins currently on works "
+          "in 1D data");
+  if(data->type!=GAL_DATA_TYPE_FLOAT32)
+    error(EXIT_FAILURE, 0, "gal_statistics_regular_bins currently on works "
+          "on float32 type data");
+
+
+  /* Set the minimum and maximum values. */
+  if(range && range->size)
     {
-      if(quant!=0.0f)
-        {
-          min=sorted[ gal_statistics_index_from_quantile(size, quant)   ];
-          max=sorted[ gal_statistics_index_from_quantile(size, 1-quant) ];
-        }
+      ra=range->array;
+      if( (range->size)%2 )
+        error(EXIT_FAILURE, 0, "Quantile ranges are not implemented in "
+              "`gal_statistics_regular_bins' yet.");
       else
         {
-          min=sorted[0];
-          max=sorted[size-1];
+          if( isnan(ra[0]) )
+            {
+              tmp=gal_statistics_minimum(data);
+              min=*((float *)(tmp->array));
+              gal_data_free(tmp);
+            }
+          else min=ra[0];
+          if( isnan(ra[1]) )                       /* When the maximum    */
+            {                                      /* isn't set, we'll    */
+              tmp=gal_statistics_maximum(data);    /* Add a very small    */
+              max=*((float *)(tmp->array)) + 1e-5; /* value so all the    */
+              gal_data_free(tmp);                  /* points are counted. */
+            }
+          else max=ra[1];
         }
     }
-  binwidth=(max-min)/numbins;
+  else
+    {
+      tmp=gal_statistics_minimum(data);
+      min=*((float *)(tmp->array));
+      gal_data_free(tmp);
+      tmp=gal_statistics_maximum(data);
+      max=*((float *)(tmp->array));
+      gal_data_free(tmp);
+    }
+
+
+  /* Allocate the space for the bins. */
+  bins=gal_data_alloc(NULL, GAL_DATA_TYPE_FLOAT32, 1, &numbins, NULL,
+                      0, data->minmapsize, "bin_center", data->unit,
+                      "Center value of each bin.");
+
+
+  /* Set central bin values. */
+  b=bins->array;
+  hbw = ( binwidth=(max-min)/numbins )/2;
+  for(i=0;i<numbins;++i) b[i] = min + i*binwidth + hbw;
 
-  /* Set all the bin smaller sides: */
-  for(i=0;i<numbins+1;++i)
-    bins[i*2]=min+i*binwidth;
 
   /* Go over all the bins and stop when the sign of the two sides
      of one bin are different. */
-  if(isnan(onebinvalue)==0)
+  if( !isnan(onebinstart) )
     {
-      for(i=0;i<numbins;++i)
-        if(bins[i*2]<onebinvalue && bins[(i+1)*2]>onebinvalue) break;
-      if(i!=numbins)
+      for(i=0;i<numbins-1;++i)
+        if( (b[i]-hbw) < onebinstart && (b[i+1]-hbw) > onebinstart) break;
+      if( i != numbins-1 )
         {
-          diff=onebinvalue-bins[i*2];
-          for(i=0;i<numbins+1;++i)
-            bins[i*2]+=diff;
+          diff=onebinstart-b[i];
+          for(i=0;i<numbins;++i)
+            b[i]+=diff;
         }
     }
 
   /* For a check
+  printf("min: %g\n", min);
+  printf("max: %g\n", max);
+  printf("binwidth: %g\n", binwidth);
   for(i=0;i<numbins;++i)
-    printf("%zu: %.4f\n", i+1, bins[i*2]);
+    printf("%zu: %.4f\n", i, b[i]);
   */
+
+  /* Set the status of the bins to regular and return. */
+  bins->status=GAL_STATISTICS_BINS_REGULAR;
+  return bins;
 }
 
 
 
 
 
-void
-gal_statistics_histogram(float *sorted, size_t size, float *bins,
-                         size_t numbins, int normhist, int maxhistone)
+/* Make a histogram of all the elements in the given dataset with bin
+   values that are defined in the `bins' structure (see
+   `gal_statistics_regular_bins'). */
+gal_data_t *
+gal_statistics_histogram(gal_data_t *data, gal_data_t *bins,
+                         int normalize, int maxhistone)
 {
-  float max=-FLT_MAX;
-  size_t histrow=0, i;
+  size_t i, *h;
+  double ref=NAN;
+  gal_data_t *hist;
+  float *f, *ff, min, max, binwidth;
 
-  if((long)numbins<=0)
-    error(EXIT_FAILURE, 0, "the number of bins in gal_statistics_histogram "
-          "(statistics.h) must be >0.  You have given asked for %ld",
-          (long)numbins);
 
-  /* Fill the histogram. */
-  for(i=0;i<size;++i)
-    {
-      /* For a check:
-      printf("%f:\n  histrow: %zu, numbins: %zu\n",
-             sorted[i], histrow, numbins);
-      */
-      /* Data has not reached bins yet: */
-      if(sorted[i]<bins[histrow*2]) continue;
-
-      /* Jump bins until sorted[i] is smaller than the larger value of
-         one bin. If we are on the last bin (where
-         histrow==numbins-1), then you don't have to increase histrow
-         any more, as soon as sorted[i] becomes larger than the lagest
-         bin, quit the search. The 1e-6f is to account for floating
-         point error. NOTE: the last interval is closed on both
-         sides.*/
-      if(histrow==numbins-1)
-        { if(sorted[i]>bins[numbins*2]+1e-6f) break; }
-      else
-        while(sorted[i]>=bins[(histrow+1)*2])
-          if(++histrow==numbins-1) break;
+  /* Some basic sanity checks for now (until it is generalized). */
+  if(data->ndim!=1)
+    error(EXIT_FAILURE, 0, "gal_statistics_histogram currently on works "
+          "in 1D data");
+  if(data->type!=GAL_DATA_TYPE_FLOAT32)
+    error(EXIT_FAILURE, 0, "gal_statistics_histogram currently on works "
+          "on float32 type data");
 
-      /* For a check:
-      printf("  histrow: %zu\n", histrow);
-      */
-      ++bins[histrow*2+1];
-    }
 
-  /* In case a normalized histogram is desired: */
-  if(normhist)
-    for(i=0;i<numbins;++i)
-      bins[i*2+1]/=size;
+  /* Check if the bins are regular or not. For irregular bins, we can
+     either use the old implementation, or GSL's histogram
+     functionality. */
+  if(bins->status!=GAL_STATISTICS_BINS_REGULAR)
+    error(EXIT_FAILURE, 0, "the input bins to `gal_statistics_histogram' "
+          "are not regular. Currently it is only implemented for regular "
+          "bins");
 
-  /* In case the maximum value is to become one. */
-  if(maxhistone)
-    {
-      for(i=0;i<numbins;++i)
-        if(bins[i*2+1]>max)
-          max=bins[i*2+1];
-      for(i=0;i<numbins;++i)
-        bins[i*2+1]/=max;
-    }
 
-  /* In case you want to see the histogram:
-  for(i=0;i<numbins;++i)
-    printf("%zu: %.4f %.4F\n", i+1, bins[i*2], bins[i*2+1]);
-  */
-}
+  /* Check if normalize and maxhistone are not called together. */
+  if(normalize && maxhistone)
+    error(EXIT_FAILURE, 0, "only one of `normalize' and `maxhistone' may "
+          "be given to `gal_statistics_histogram'");
 
+  /* Allocate the histogram (note that we are clearning it. */
+  hist=gal_data_alloc(NULL, GAL_DATA_TYPE_SIZE_T, bins->ndim, bins->dsize,
+                      NULL, 1, data->minmapsize, "hist_number", "counts",
+                      "Number of data points within each bin.");
 
 
+  /* Set the minimum and maximum values: */
+  f=bins->array;
+  binwidth=f[1]-f[0];
+  max = f[bins->size - 1] + binwidth/2;
+  min = f[0]              - binwidth/2;
 
 
-void
-gal_statistics_cumulative_fp(float *sorted, size_t size, float *bins,
-                             size_t numbins, int normcfp)
-{
-  float prevind=0;
-  size_t cfprow=0, i, numinds=0;
+  /* Go through all the elements and find out which bin they belong to. */
+  h=hist->array;
+  f=data->array;
+  for(i=0;i<data->size;++i)
+    if(f[i]>=min && f[i]<max)
+      {
+        ++h[ (size_t)((f[i]-min)/binwidth) ];
+        /*printf("%-10.3f%zu\n", f[i], (size_t)((f[i]-min)/binwidth) );*/
+      }
 
 
-  /* Fill the Cumulative frequency plot. The steps are just like the
-     histogram above so we won't explain similar concepts here
-     again. */
-  for(i=0;i<size;++i)
+  /* Find the reference to correct the histogram if necessary. */
+  if(normalize)
+    {
+      /* Set the reference. */
+      ref=0.0f;
+      hist=gal_data_copy_to_new_type_free(hist, GAL_DATA_TYPE_FLOAT32);
+      ff=(f=hist->array)+hist->size; do ref += *f++;   while(f<ff);
+
+      /* Correct the name, units and comments. */
+      free(hist->name); free(hist->unit); free(hist->comment);
+      gal_checkset_allocate_copy("hist_normalized", &hist->name);
+      gal_checkset_allocate_copy("frac", &hist->unit);
+      gal_checkset_allocate_copy("Normalized histogram value for this bin",
+                                 &hist->comment);
+    }
+  if(maxhistone)
     {
-      if(sorted[i]<bins[cfprow*2]) continue;
+      /* Calculate the reference. */
+      ref=-FLT_MAX;
+      hist=gal_data_copy_to_new_type_free(hist, GAL_DATA_TYPE_FLOAT32);
+      ff=(f=hist->array)+hist->size;
+      do ref = *f>ref ? *f : ref; while(++f<ff);
+
+      /* Correct the name, units and comments. */
+      free(hist->name); free(hist->unit); free(hist->comment);
+      gal_checkset_allocate_copy("hist_maxone", &hist->name);
+      gal_checkset_allocate_copy("frac", &hist->unit);
+      gal_checkset_allocate_copy("Fractional histogram value for this bin "
+                                 "when maximum bin value is 1.0",
+                                 &hist->comment);
+    }
 
-      if(cfprow==numbins-1)
-        { if(sorted[i]>bins[numbins*2]+1e-6f) break; }
-      else
-        while(sorted[i]>=bins[(cfprow+1)*2])
-          {
-            /* We have not yet reached the last bin. But we have
-               reached the sorted[i] that is larger than the current
-               bin and we want to switch to the next bin. So we have
-               to finalize the current bin value.  bins[cfprow*2+1]
-               has kept the sum of all the indexs that lie within this
-               bin. Using numinds, we also kept a count of how many
-               indexs there was.
-
-               In case there were no indexs in this bin, then we have
-               to pass on the previous value (since this bin did not
-               contribute any data). We set numinds=0 and start
-               working on the next bin. */
-            if(numinds>0)
-              prevind=bins[cfprow*2+1]/=numinds; /* Divide by num indexs */
-            else
-              bins[cfprow*2+1]=prevind;
-            numinds=0;
-            if(++cfprow==numbins-1) break;
-          }
 
-      /* We can't plot every index, also in every bin, there might be
-         a sharp gradient. So in order to make things smooth, the
-         value given to each bin will be the average of all the indexs
-         that like over that bin. Here we keep the sum and number of
-         indexs so in the end we can find the average. */
-      bins[cfprow*2+1]+=i;
-      ++numinds;
-    }
+  /* Correct the histogram if necessary. */
+  if( !isnan(ref) )
+    { ff=(f=hist->array)+hist->size; do *f++ /= ref;   while(f<ff); }
 
+  /* Return the histogram. */
+  return hist;
+}
 
-  /* The last bin was not finalized. So we will do that here: */
-  bins[cfprow*2+1] = numinds>0 ? bins[cfprow*2+1]/numinds : prevind;
 
 
-  /* For a normalized CFP: */
-  if(normcfp)
-    for(i=0;i<numbins;++i)
-      bins[i*2+1]/=size;
 
 
-  /* In a CFP, all bins that are possibly left behind (there was no
-     data to fill them) should get the same value as the lastly filled
-     value. They should not be zero. Note that this has to be done
-     after the possible normalization. */
-  for(i=cfprow;i<numbins;++i)
-    bins[i*2+1]=bins[(cfprow-1)*2+1];
+/* Make a cumulative frequency plot (CFP) of all the elements in the given
+   dataset with bin values that are defined in the `bins' structure (see
+   `gal_statistics_regular_bins').
 
+   The CFP is built from the histogram: in each bin, the value is the sum
+   of all previous bins in the histogram. Thus, if you have already
+   calculated the histogram before calling this function, you can pass it
+   onto this function as the data structure in `bins->next'. If
+   `bin->next!=NULL', then it is assumed to be the histogram. If it is
+   NULL, then the histogram will be calculated internally and freed after
+   the job is finished.
 
-  /* In case you want to see the CFP:
-  for(i=0;i<numbins;++i)
-    printf("%zu: %.4f %.4F\n", i+1, bins[i*2], bins[i*2+1]);
-  */
-}
+   When a histogram is given and it is normalized, the CFP will also be
+   normalized (even if the normalized flag is not set here): note that a
+   normalized CFP's maximum value is 1.
+*/
+gal_data_t *
+gal_statistics_cfp(gal_data_t *data, gal_data_t *bins, int normalize)
+{
+  double sum;
+  float *f, *ff, *hf;
+  gal_data_t *hist, *cfp;
+  size_t *s, *sf, *hs, sums;
 
 
+  /* Some basic sanity checks for now (until it is generalized). */
+  if(data->ndim!=1)
+    error(EXIT_FAILURE, 0, "gal_statistics_cfp currently on works "
+          "in 1D data");
+  if(data->type!=GAL_DATA_TYPE_FLOAT32)
+    error(EXIT_FAILURE, 0, "gal_statistics_cfp currently on works "
+          "on float32 type data");
 
 
+  /* Check if the bins are regular or not. For irregular bins, we can
+     either use the old implementation, or GSL's histogram
+     functionality. */
+  if(bins->status!=GAL_STATISTICS_BINS_REGULAR)
+    error(EXIT_FAILURE, 0, "the input bins to `gal_statistics_cfp' "
+          "are not regular. Currently it is only implemented for regular "
+          "bins");
 
-void
-gal_statistics_save_hist(float *sorted, size_t size, size_t numbins,
-                         char *filename, char *comment)
-{
-  FILE *fp;
-  size_t i;
-  float onebinvalue=NAN;
-  int normhist=0, maxhistone=0;
-  float d, *bins, min=0.0f, max=0.0f, quant=0.0f;
 
-  /* Set the bin sides: */
-  gal_statistics_set_bins(sorted, size, numbins, min, max,
-                          onebinvalue, quant, &bins);
+  /* Prepare the histogram. */
+  hist = ( bins->next
+           ? bins->next
+           : gal_statistics_histogram(data, bins, 0, 0) );
 
-  /* Set the size of half a bin width:*/
-  d=(bins[2]-bins[0])/2;
 
-  /* Fill the histogram: */
-  gal_statistics_histogram(sorted, size, bins, numbins, normhist, maxhistone);
+  /* If the histogram has float32 type it was given by the user and is
+     either normalized or its maximum was set to 1. We can only use it if
+     it was normalized. If its maximum is 1, then we must ignore it and
+     build the histogram again.*/
+  if(hist->type==GAL_DATA_TYPE_FLOAT32)
+    {
+      sum=0.0f;
+      ff=(f=hist->array)+hist->size; do sum += *f++;   while(f<ff);
+      if(sum!=1.0f)
+        hist=gal_statistics_histogram(data, bins, 0, 0);
+    }
 
-  /* Open the file for writing and save the histogram: */
-  errno=0;
-  fp=fopen(filename, "w");
-  if(fp==NULL)
-    error(EXIT_FAILURE, errno, "couldn't open file %s", filename);
-  fprintf(fp, "%s\n", comment);
-  fprintf(fp, "# The input %zu points binned in %zu bins\n#\n",
-          size, numbins);
-  fprintf(fp, "# Column 0: Value in the middle of this bin.\n");
-  fprintf(fp, "# Column 1: Number of points in this bin.\n");
-  for(i=0;i<numbins;++i)
-    fprintf(fp, "%-15.6f%.0f\n", bins[i*2]+d, bins[i*2+1]);
-  fclose(fp);
-  free(bins);
+  /* Allocate the cumulative frequency plot's necessary space. */
+  cfp=gal_data_alloc( NULL, hist->type, bins->ndim, bins->dsize,
+                      NULL, 1, data->minmapsize,
+                      ( hist->type==GAL_DATA_TYPE_FLOAT32
+                        ? "cfp_normalized" : "cfp_number" ),
+                      ( hist->type==GAL_DATA_TYPE_FLOAT32
+                        ? "frac" : "count" ),
+                      ( hist->type==GAL_DATA_TYPE_FLOAT32
+                        ? "Fraction of data elements from the start to this "
+                        "bin (inclusive)."
+                        : "Number of data elements from the start to this "
+                        "bin (inclusive).") );
+
+
+  /* Fill in the cumulative frequency plot. */
+  switch(hist->type)
+    {
+    case GAL_DATA_TYPE_SIZE_T:
+      sums=0; hs=hist->array; sf=(s=cfp->array)+cfp->size;
+      do sums = (*s += *hs++ + sums); while(++s<sf);
+      break;
+
+    case GAL_DATA_TYPE_FLOAT32:
+      sum=0.0f; hf=hist->array; ff=(f=cfp->array)+cfp->size;
+      do sum = (*f += *hf++ + sum);  while(++f<ff);
+      break;
+
+    default:
+      error(EXIT_FAILURE, 0, "type code %d not recognized in "
+            "`gal_statistics_cfp'", cfp->type);
+    }
+
+
+  /* Normalize the CFP if the user asked for it and it wasn't normalized
+     until now. */
+  if(normalize && cfp->type==GAL_DATA_TYPE_SIZE_T)
+    {
+      /* Find the sum, then divide the plot by it. Note that the sum must
+         come from the histogram, not the CFP!*/
+      sums=0;
+      cfp=gal_data_copy_to_new_type_free(cfp, GAL_DATA_TYPE_FLOAT32);
+      sf=(s=hist->array)+hist->size; do sums += *s++;   while(s<sf);
+      ff=(f=cfp->array)+cfp->size;   do *f++ /= sums;   while(f<ff);
+
+      /* Correct the name, units and comments. */
+      free(cfp->name); free(cfp->unit); free(cfp->comment);
+      gal_checkset_allocate_copy("cfp_normalized", &cfp->name);
+      gal_checkset_allocate_copy("frac", &cfp->unit);
+      gal_checkset_allocate_copy("Fraction of data elements from the start "
+                                 "to this bin (inclusive).", &cfp->comment);
+    }
+
+  /* If the histogram was allocated here, free it. */
+  if(hist!=bins->next) gal_data_free(hist);
+  return cfp;
 }
 
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]