gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master fc586b6 2/2: NoiseChisel uses outlier rejectio


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master fc586b6 2/2: NoiseChisel uses outlier rejection algorithm on qthresh
Date: Sun, 19 Aug 2018 19:09:51 -0400 (EDT)

branch: master
commit fc586b6bbe7aee78d5998264d9ec73ec33206299
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    NoiseChisel uses outlier rejection algorithm on qthresh
    
    Until now, the user had to manually specify a quantile to remove some tiles
    from the quantile threshold interpolation step. But this solution was not
    generic and too ugly (too dependent on each particular input dataset).
    
    A new outlier identification algorithm was re-implemented in Gnuastro's
    statistics library (the library function had been commented out) in the
    previous commit for this purpose. With this commit, it now replaces the
    manual quantile identification of NoiseChisel. To configure this new
    algorithm, three new options have been added to it as described in the book
    and the NEWS file.
    
    This task was found after a great discussion with Pierre-Alain Duc on some
    of his datasets during the Lorentz center workshop on "The Bewildering
    Nature of Ultra-diffuse Galaxies", in Leiden, The Netherlands.
---
 NEWS                                |  16 +++++-
 THANKS                              |   1 +
 bin/noisechisel/args.h              |  37 +++++++++++--
 bin/noisechisel/astnoisechisel.conf |  48 ++++++++--------
 bin/noisechisel/main.h              |   7 ++-
 bin/noisechisel/threshold.c         |  83 +++++++++++++++++++---------
 bin/noisechisel/ui.c                |   5 ++
 bin/noisechisel/ui.h                |   3 +
 doc/announce-acknowledge.txt        |   1 +
 doc/gnuastro.texi                   | 107 ++++++++++++++++++++++--------------
 lib/gnuastro/statistics.h           |   2 +-
 lib/statistics.c                    |  30 ++++++++--
 12 files changed, 236 insertions(+), 104 deletions(-)

diff --git a/NEWS b/NEWS
index 061735f..ad39ab7 100644
--- a/NEWS
+++ b/NEWS
@@ -11,14 +11,26 @@ GNU Astronomy Utilities NEWS                          -*- 
outline -*-
 
   Library:
     -- gal_fits_key_list_reverse: Reverse the given list of FITS keywords.
-    -- gal_fits_key_write_title_in_ptr: Write a two-line title in FITS 
keywords.
+    -- gal_fits_key_write_title_in_ptr: Write a two-line title FITS keyword.
     -- gal_fits_key_write_in_ptr: New name of gal_fits_key_write.
-    -- gal_fits_key_write_version_in_ptr: new name of 
gal_fits_key_write_version.
+    -- gal_fits_key_write_version_in_ptr: old gal_fits_key_write_version.
     -- gal_fits_key_write_config: write key list and version as config header.
     -- gal_statistics_outlier_positive: Find the first positive outlier.
 
+  NoiseChisel:
+    -- New outlier identification algorithm for the quantile threshold
+       step. It is very useful when there are extended and bright sources
+       in the dataset: the tiles containing their footprint (outliers) can
+       be identified and removed with the parameters/options below:
+       --qthreshoutnum: Number of elements to search for outliers.
+       --qthreshoutsigma: Multiple of sigma to define an outlier.
+       --qthreshoutsclip: Sigma-clipping parameters for the process.
+
 ** Removed features
 
+  NoiseChisel:
+    --qthreshtilequant: removed due to new outlier rejection algorithm.
+
 ** Changed features
 
   MakeProfiles:
diff --git a/THANKS b/THANKS
index b4fa288..2f507f6 100644
--- a/THANKS
+++ b/THANKS
@@ -28,6 +28,7 @@ support in Gnuastro. The list is ordered alphabetically (by 
family name).
     Benjamin Clement                     address@hidden
     Nima Dehdilani                       address@hidden
     Antonio Diaz Diaz                    address@hidden
+    Pierre-Alain Duc                     address@hidden
     Thérèse Godefroy                     address@hidden
     Madusha Gunawardhana                 address@hidden
     Stephen Hamer                        address@hidden
diff --git a/bin/noisechisel/args.h b/bin/noisechisel/args.h
index 4d6a3f2..c4eb095 100644
--- a/bin/noisechisel/args.h
+++ b/bin/noisechisel/args.h
@@ -225,19 +225,46 @@ struct argp_option program_options[] =
       GAL_OPTIONS_NOT_SET
     },
     {
-      "qthreshtilequant",
-      UI_KEY_QTHRESHTILEQUANT,
+      "qthreshoutnum",
+      UI_KEY_QTHRESHOUTNUM,
+      "INT",
+      0,
+      "Number of tiles to find outliers in qthresh.",
+      UI_GROUP_DETECTION,
+      &p->qthreshoutnum,
+      GAL_TYPE_SIZE_T,
+      GAL_OPTIONS_RANGE_GE_0,
+      GAL_OPTIONS_MANDATORY,
+      GAL_OPTIONS_NOT_SET
+    },
+    {
+      "qthreshoutsigma",
+      UI_KEY_QTHRESHOUTSIGMA,
       "FLT",
       0,
-      "Remove tiles at higher quantiles.",
+      "Multiple of sigma to define outliers.",
       UI_GROUP_DETECTION,
-      &p->qthreshtilequant,
+      &p->qthreshoutsigma,
       GAL_TYPE_FLOAT32,
-      GAL_OPTIONS_RANGE_GE_0_LE_1,
+      GAL_OPTIONS_RANGE_GE_0,
       GAL_OPTIONS_MANDATORY,
       GAL_OPTIONS_NOT_SET
     },
     {
+      "qthreshoutsclip",
+      UI_KEY_QTHRESHOUTSCLIP,
+      "FLT,FLT",
+      0,
+      "Sigma-clip params for qthresh outliers.",
+      UI_GROUP_DETECTION,
+      &p->qthreshoutsclip,
+      GAL_TYPE_STRING,
+      GAL_OPTIONS_RANGE_ANY,
+      GAL_OPTIONS_MANDATORY,
+      GAL_OPTIONS_NOT_SET,
+      gal_options_read_sigma_clip
+    },
+    {
       "smoothwidth",
       UI_KEY_SMOOTHWIDTH,
       "INT",
diff --git a/bin/noisechisel/astnoisechisel.conf 
b/bin/noisechisel/astnoisechisel.conf
index 5126341..7927151 100644
--- a/bin/noisechisel/astnoisechisel.conf
+++ b/bin/noisechisel/astnoisechisel.conf
@@ -18,32 +18,34 @@
 # warranty.
 
 # Input:
- khdu                 1
- whdu                 1
- chdu                 1
- minskyfrac         0.7
- minnumfalse        100
+ khdu                    1
+ whdu                    1
+ chdu                    1
+ minskyfrac            0.7
+ minnumfalse           100
 
 # Tessellation
- largetilesize  200,200
+ largetilesize     200,200
 
 # Detection:
- mirrordist         1.5
- modmedqdiff       0.01
- qthresh            0.3
- qthreshtilequant   1.0
- smoothwidth          3
- erode                2
- erodengb             4
- noerodequant    0.9331
- opening              1
- openingngb           8
- sigmaclip        3,0.2
- dthresh            0.0
- snminarea           10
- snquant           0.95
- detgrowquant      0.80
- detgrowmaxholesize 100
+ mirrordist            1.5
+ modmedqdiff          0.01
+ qthresh               0.3
+ qthreshoutnum          15
+ qthreshoutsigma         5
+ qthreshoutsclip     3,0.2
+ smoothwidth             3
+ erode                   2
+ erodengb                4
+ noerodequant       0.9331
+ opening                 1
+ openingngb              8
+ sigmaclip           3,0.2
+ dthresh               0.0
+ snminarea              10
+ snquant              0.95
+ detgrowquant         0.80
+ detgrowmaxholesize    100
 
 # Operating mode
- continueaftercheck   0
+ continueaftercheck      0
diff --git a/bin/noisechisel/main.h b/bin/noisechisel/main.h
index a2590bc..d109a6d 100644
--- a/bin/noisechisel/main.h
+++ b/bin/noisechisel/main.h
@@ -60,7 +60,9 @@ struct noisechiselparams
   float           modmedqdiff;  /* Difference between mode and median.    */
   float            minskyfrac;  /* Undetected area min. frac. in tile.    */
   float               qthresh;  /* Quantile threshold on convolved image. */
-  float      qthreshtilequant;  /* Remove tiles with lower quantile.      */
+  size_t        qthreshoutnum;  /* Number of elements to find outliers.   */
+  float       qthreshoutsigma;  /* Multiple of sigma to define outlier.   */
+  double   qthreshoutsclip[2];  /* qthresh outlier Sigma-clipping params. */
   size_t          smoothwidth;  /* Interpolation: flat kernel to smooth.  */
   uint8_t        checkqthresh;  /* Save the quantile threhsold steps.     */
   size_t                erode;  /* Number of erosions after thresholding. */
@@ -95,6 +97,9 @@ struct noisechiselparams
   gal_data_t      *widekernel;  /* Wider kernel.                          */
   gal_data_t            *conv;  /* Convolved wth sharper kernel.          */
   gal_data_t           *wconv;  /* Convolved with wider kernel.           */
+  float           qthresh1out;  /* Outlier value for quantile thresh No.1.*/
+  float           qthresh2out;  /* Outlier value for quantile thresh No.2.*/
+  float           qthresh3out;  /* Outlier value for quantile thresh No.3.*/
   gal_data_t          *binary;  /* For binary operations.                 */
   gal_data_t          *olabel;  /* Labels of objects in the detection.    */
   gal_data_t   *expand_thresh;  /* Quantile threshold to expand per tile. */
diff --git a/bin/noisechisel/threshold.c b/bin/noisechisel/threshold.c
index 9fb44c6..0be1ec2 100644
--- a/bin/noisechisel/threshold.c
+++ b/bin/noisechisel/threshold.c
@@ -517,10 +517,11 @@ threshold_qthresh_clean_work(struct noisechiselparams *p, 
gal_data_t *first,
                              gal_data_t *second, gal_data_t *third,
                              size_t start, size_t number)
 {
-  gal_data_t *quantile;
+  char *msg;
+  gal_data_t *outlier;
   size_t i, osize=first->size;
   float *oa1=NULL, *oa2=NULL, *oa3=NULL;
-  float q, *arr1=NULL, *arr2=NULL, *arr3=NULL;
+  float o, *arr1=NULL, *arr2=NULL, *arr3=NULL;
 
   /* A small sanity check. */
   if(first->type!=GAL_TYPE_FLOAT32)
@@ -554,38 +555,58 @@ threshold_qthresh_clean_work(struct noisechiselparams *p, 
gal_data_t *first,
   /* Find the quantile and remove all tiles that are more than it in the
      first array. */
   arr1=first->array;
-  quantile=gal_statistics_quantile(first, p->qthreshtilequant, 0);
-  q=*((float *)(quantile->array));
-  for(i=0;i<first->size;++i)
-    /* Just note that we have blank (NaN) values, so to avoid doing a
-       NaN check with `isnan', we will check if the value is below the
-       quantile, if it succeeds (isn't NaN and is below the quantile),
-       then we'll put it's actual value, otherwise, a NaN. */
-    arr1[i] = arr1[i]<q ? arr1[i] : NAN;
-  gal_data_free(quantile);
-
-  /* Second quantile threshold. */
+  outlier=gal_statistics_outlier_positive(first, p->qthreshoutnum,
+                                          p->qthreshoutsigma,
+                                          p->qthreshoutsclip[0],
+                                          p->qthreshoutsclip[1], 0, 1);
+  if(outlier)
+    {
+      p->qthresh1out=o=*((float *)(outlier->array));
+      for(i=0;i<first->size;++i)
+        /* Just note that we have blank (NaN) values, so to avoid doing a
+           NaN check with `isnan', we will check if the value is below the
+           quantile, if it succeeds (isn't NaN and is below the quantile),
+           then we'll put it's actual value, otherwise, a NaN. */
+        arr1[i] = arr1[i]<o ? arr1[i] : NAN;
+      gal_data_free(outlier);
+    }
+
+  /* Second quantile threshold. We are finding the outliers independently
+     on each dataset to later remove any tile that is blank in atleast one
+     of them. */
   arr2=second->array;
-  quantile=gal_statistics_quantile(second, p->qthreshtilequant, 0);
-  q=*((float *)(quantile->array));
-  for(i=0;i<second->size;++i)
-    arr2[i] = arr2[i]<q ? arr2[i] : NAN;
-  gal_data_free(quantile);
+  outlier=gal_statistics_outlier_positive(second, p->qthreshoutnum,
+                                          p->qthreshoutsigma,
+                                          p->qthreshoutsclip[0],
+                                          p->qthreshoutsclip[1], 0, 0);
+  if(outlier)
+    {
+      p->qthresh2out=o=*((float *)(outlier->array));
+      for(i=0;i<second->size;++i)
+        arr2[i] = arr2[i]<o ? arr2[i] : NAN;
+      gal_data_free(outlier);
+    }
 
   /* The third (if it exists). */
   if(third)
     {
       arr3=third->array;
-      quantile=gal_statistics_quantile(third, p->qthreshtilequant, 0);
-      q=*((float *)(quantile->array));
-      for(i=0;i<third->size;++i)
-        arr3[i] = arr3[i]<q ? arr3[i] : NAN;
-      gal_data_free(quantile);
+      outlier=gal_statistics_outlier_positive(third, p->qthreshoutnum,
+                                              p->qthreshoutsigma,
+                                              p->qthreshoutsclip[0],
+                                              p->qthreshoutsclip[1], 0, 0);
+      if(outlier)
+        {
+          p->qthresh3out=o=*((float *)(outlier->array));
+          for(i=0;i<third->size;++i)
+            arr3[i] = arr3[i]<o ? arr3[i] : NAN;
+          gal_data_free(outlier);
+        }
     }
 
   /* Make sure all three have the same NaN pixels. */
   for(i=0;i<first->size;++i)
-    if( isnan(arr1[i]) || isnan(arr2[i]) || isnan(arr3[i]) )
+    if( isnan(arr1[i]) || isnan(arr2[i]) || (third && isnan(arr3[i])) )
       {
         arr1[i] = arr2[i] = NAN;
         if(third) arr3[i] = NAN;
@@ -599,6 +620,16 @@ threshold_qthresh_clean_work(struct noisechiselparams *p, 
gal_data_t *first,
       first->size = second->size = osize;
       if(third) { third->array=oa3; third->size=osize; }
     }
+
+  /* Report the values if necessary. */
+  if(!p->cp.quiet)
+    {
+      if( asprintf(&msg, "quantile threshold outlier limit: %f",
+                   p->qthresh1out)<0 )
+        error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
+      gal_timing_report(NULL, msg, 2);
+      free(msg);
+    }
 }
 
 
@@ -738,7 +769,7 @@ threshold_quantile_find_apply(struct noisechiselparams *p)
 
 
   /* Remove higher thresholds if requested. */
-  if(p->qthreshtilequant!=1.0)
+  if(p->qthreshoutnum!=1.0)
     threshold_qthresh_clean(p, qprm.erode_th, qprm.noerode_th,
                             qprm.expand_th ? qprm.expand_th : NULL,
                             p->qthreshname);
@@ -751,7 +782,7 @@ threshold_quantile_find_apply(struct noisechiselparams *p)
   num=gal_statistics_number(qprm.erode_th);
   nval=((size_t *)(num->array))[0];
   if( nval < cp->interpnumngb)
-    error(EXIT_FAILURE, 0, "%zu tiles can be used for interpolation of the "
+    error(EXIT_FAILURE, 0, "%zu tile(s) can be used for interpolation of the "
           "quantile threshold values over the full dataset. This is smaller "
           "than the requested minimum value of %zu (value to the "
           "`--interpnumngb' option).\n\n"
diff --git a/bin/noisechisel/ui.c b/bin/noisechisel/ui.c
index 42a7d6c..7a7d667 100644
--- a/bin/noisechisel/ui.c
+++ b/bin/noisechisel/ui.c
@@ -115,6 +115,11 @@ ui_initialize_options(struct noisechiselparams *p,
   cp->numthreads         = gal_threads_number();
   cp->coptions           = gal_commonopts_options;
 
+  p->qthresh1out         = NAN;
+  p->qthresh2out         = NAN;
+  p->qthresh3out         = NAN;
+
+
   /* Modify common options. */
   for(i=0; !gal_options_is_last(&cp->coptions[i]); ++i)
     {
diff --git a/bin/noisechisel/ui.h b/bin/noisechisel/ui.h
index c3e72b2..2a79cc5 100644
--- a/bin/noisechisel/ui.h
+++ b/bin/noisechisel/ui.h
@@ -83,6 +83,9 @@ enum option_keys_enum
   UI_KEY_MINNUMFALSE,
   UI_KEY_SMOOTHWIDTH,
   UI_KEY_QTHRESHTILEQUANT,
+  UI_KEY_QTHRESHOUTNUM,
+  UI_KEY_QTHRESHOUTSIGMA,
+  UI_KEY_QTHRESHOUTSCLIP,
   UI_KEY_CHECKQTHRESH,
   UI_KEY_ERODENGB,
   UI_KEY_NOERODEQUANT,
diff --git a/doc/announce-acknowledge.txt b/doc/announce-acknowledge.txt
index 36f8ee2..c5824b6 100644
--- a/doc/announce-acknowledge.txt
+++ b/doc/announce-acknowledge.txt
@@ -1,3 +1,4 @@
 Alphabetically ordered list to acknowledge in the next release.
 
+Pierre-Alain Duc
 Michael Stein
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index fa8987a..af55be4 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -15470,7 +15470,7 @@ Segmentation has been completely moved to a new 
program: @ref{Segment}.
 @end itemize
 
 @noindent
-Added options:
+Added features/options:
 @itemize
 
 @item
@@ -15499,9 +15499,14 @@ give this option a value of 1 (only the largest valued 
pixel in the input
 will not be eroded).
 
 @item
address@hidden: to manually remove the measured qthresh from
-some tiles. This feature helps in detecting large and extended diffuse
-(almost flat) signal when necessary, see @ref{Detection options}.
+Outlier rejection in quantile thresholds: When there are large galaxies or
+bright stars in the image, their gradient may be on a smaller scale than
+the selected tile size. In such cases, those tiles will be identified as
+tiles with no signal and thus preserved. An outlier identification
+algorithm has been added to NoiseChisel and can be configured with the
+following three options: @option{--qthreshoutnum},
address@hidden and @option{--qthreshoutsclip}. See their
+description in @ref{Detection options} for more.
 
 @item
 @option{--detgrowquant}: is used to grow the final true detections until a
@@ -15835,43 +15840,56 @@ is complete, we will have a binary (two valued) 
image. The pixels above the
 threshold are known as foreground pixels (have a value of 1) while those
 which lie below the threshold are known as background (have a value of 0).
 
address@hidden --qthreshtilequant=FLT
-Only keep tiles which have a q-thresh value above the given quantile of the
-all successful tiles. Hence, when given a value of @code{1}, this option
-will be ignored. When there is more than one channel (and
address@hidden is not called), quantile calculation and application
-will be done on each channel independently.
-
-This option is useful when a large and diffuse (almost flat within each
-tile) signal exists with very small regions of Sky. The flatness of the
-profile will cause it to successfully pass the tests of @ref{Quantifying
-signal in a tile}. As a result, without this option the flat and diffuse
-signal will be interpreted as sky. In such cases, you can see the status
-of the tiles with the @option{--checkqthresh} option (first image extension
-is enough) and select a quantile through this option to ignore the measured
-values in the higher-valued tiles.
-
-This option can also be useful when there are large bright objects in the
-image with large flat wings which can also pass the tests and give outlier
-values. When there is a sky gradient over the image (mainly due to
-post-processing issues like bad flat fielding), this option must be set to
address@hidden so it is completely ignored and the sky gradient is accurately
-measured and subtracted.
-
-To get an estimate of the measured qthresh distribution, you can run the
-following commands. The first will create the qthresh check image with one
-value/pixel per tile (see @ref{Processing options}). Open the image in a
-FITS viewer and inspect it. The second command below will print the basic
-information about the distribution of values and the third will print the
-value at the 0.4 quantile. Recall that Gnuastro's Statistics program
-ignores blank values (in this case: tiles with significant signal), see
address@hidden
-
address@hidden
-$ astnoisechisel image.fits --checkqthresh --oneelempertile
-$ aststatistics image_qthresh.fits
-$ aststatistics image_qthresh.fits --quantile=0.4
address@hidden example
address@hidden ----qthreshoutnum=INT
+Number of tiles (in sorted array) to use as a moving window when estimating
+the outlier tiles. If it is given a value of zero, no outlier rejection
+will take place.
+
+This option is useful when the dataset contains a large and diffuse (almost
+flat within each tile) signal. The flatness of the profile will cause it to
+successfully pass the tests of @ref{Quantifying signal in a tile}. As a
+result, when outlier rejection is disabled, the flat and diffuse signal
+will be interpreted as sky. As a result, the quantile threshold in those
+tiles (and the ones around them) can be strongly over-estimated, causing a
+failure in their detection.
+
+Outlier identification proceeds as follows: all non-blank tile values are
+sorted by flux. A sliding window, with width equal to the value given to
+this option, is parsed over the dataset and is used as a reference to
+identify the first outlier element. The first element where the distance to
+the previous (sorted) element is sigma units (@option{--qthreshoutsigma})
+away from the distribution of distances of the element in the window is
+considered an outlier and all tiles larger than that value are ignored for
+the later step (interpolation).
+
+Formally, Let's take @mymath{v_i} to be the @mymath{i}-th element of the
+sorted input (with no blank values) and @mymath{m} and @mymath{\sigma} as
+the @mymath{\sigma}-clipped median and standard deviation from the
+distances of the previous @mymath{N} elements (@mymath{N} is the value to
+this option). The @option{--qthreshoutsclip} option can be used to
+configure the @mymath{\sigma}-clipping. If the value given to
address@hidden is displayed with @mymath{s}, the returned
+outlier is the first (sorted) elemented where the following condition
+becomes true:
+
address@hidden(v_i−v_{i−1})−m>s\times\sigma}
+
+Note that by this definition, the outlier cannot be any of the first
address@hidden elements, since they constitute the window that is used for the
+first checked element. It is therefore important that while @mymath{N}
+isn't too small, it also isn't larger than (approximately) half of the
+initially measured tiles. A warning will be printed if this condition
+doesn't hold. You can use @option{--checkqthresh} to inspect the steps.
+
address@hidden --qthreshoutsigma=FLT
+Multiple of sigma to define an outlier, see description of
address@hidden for more.
+
address@hidden --qthreshoutsclip=FLT,FLT
+Sigma-clipping parameters for the outlier rejection of the quantile
+threshold. The format of the given values is similar to
address@hidden below. see description of @option{--qthreshoutnum} for
+a description of the outlier rejection algorithm.
 
 @item --smoothwidth=INT
 Width of flat kernel used to smooth the interpolated quantile thresholds,
@@ -27191,7 +27209,7 @@ above.
 @end deftypefun
 
 
address@hidden {gal_data_t *} gal_statistics_outlier_positive (gal_data_t 
@code{*input}, size_t @code{window_size}, float @code{sigma}, float 
@code{sigclip_multip}, float @code{sigclip_param}, int @code{inplace})
address@hidden {gal_data_t *} gal_statistics_outlier_positive (gal_data_t 
@code{*input}, size_t @code{window_size}, float @code{sigma}, float 
@code{sigclip_multip}, float @code{sigclip_param}, int @code{inplace}, int 
@code{quiet})
 Find the first positive outlier in the @code{input} distribution. The
 returned dataset contains a single element: the first positive outlier. It
 is one of the dataset's elements, in the same type as the input. If the
@@ -27227,6 +27245,11 @@ done within the input dataset's allocated space. 
Otherwise, this function
 will internally allocate (and later free) the necessary space to keep the
 intermediate space that this process requires.
 
+If @code{quiet!=0}, this function will report a warning when the moving
+window size is not much smaller than the number of non-blank elements. In
+such cases, the outlier estimation may be wrong: the outlier point may lie
+before the first checked element.
+
 @end deftypefun
 
 
diff --git a/lib/gnuastro/statistics.h b/lib/gnuastro/statistics.h
index f7860d7..ec3ecd2 100644
--- a/lib/gnuastro/statistics.h
+++ b/lib/gnuastro/statistics.h
@@ -176,7 +176,7 @@ gal_statistics_sigma_clip(gal_data_t *input, float multip, 
float param,
 gal_data_t *
 gal_statistics_outlier_positive(gal_data_t *input, size_t window_size,
                                 float sigma, float sigclip_multip,
-                                float sigclip_param, int inplace);
+                                float sigclip_param, int inplace, int quiet);
 
 
 
diff --git a/lib/statistics.c b/lib/statistics.c
index 22ed7af..4734683 100644
--- a/lib/statistics.c
+++ b/lib/statistics.c
@@ -2084,8 +2084,8 @@ gal_statistics_sigma_clip(gal_data_t *input, float 
multip, float param,
                                         sigclip_param, 0, 1);           \
         sarr=sclip->array;                                              \
                                                                         \
-        /* For a check                                   */             \
-        /* printf("%f (%f, %f)\n",                       */             \
+        /* For a check */                                               \
+        /* printf("%f: %f (%f, %f)\n", (float)(arr[i]),  */             \
         /*        ((double)(arr[i]-arr[i-1])) - sarr[1], */             \
         /*        sarr[1], sigma*sarr[3]);               */             \
                                                                         \
@@ -2110,7 +2110,7 @@ gal_statistics_sigma_clip(gal_data_t *input, float 
multip, float param,
 gal_data_t *
 gal_statistics_outlier_positive(gal_data_t *input, size_t window_size,
                                 float sigma, float sigclip_multip,
-                                float sigclip_param, int inplace)
+                                float sigclip_param, int inplace, int quiet)
 {
   float *sarr;
   double *darr;
@@ -2120,9 +2120,29 @@ gal_statistics_outlier_positive(gal_data_t *input, 
size_t window_size,
   /* Remove all blanks and sort the dataset. */
   nbs=gal_statistics_no_blank_sorted(input, inplace);
 
+  /* For a check.
+  if(nbs->type==GAL_TYPE_FLOAT32)
+    {
+      float *n=nbs->array;
+      for(i=0;i<nbs->size;++i)
+        printf("%f\n", n[i]);
+      exit(0);
+    }
+  */
+
   /* A small sanity check. */
   if(window_size<nbs->size && window_size!=0)
     {
+      /* Print a warning if necessary. */
+      if(quiet!=0 && window_size*2 > nbs->size)
+        error(EXIT_SUCCESS, 0, "%s: [WARNING] the number of elements in the "
+              "moving window size (%zu) is not much smaller than the number "
+              "of non-blank elements (%zu). This can cause a wrong outlier "
+              "estimation (the outlier may be at an earlier point in the "
+              "sorted array). For this dataset, it is recommended to "
+              "decrease the number of elements in the moving window",
+              __func__, window_size, nbs->size);
+
       /* Allocate space to keep the distances. */
       dist=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &window_size, NULL,
                           0, -1, NULL, NULL, NULL);
@@ -2145,10 +2165,12 @@ gal_statistics_outlier_positive(gal_data_t *input, 
size_t window_size,
           error(EXIT_FAILURE, 0, "%s: type code %d not recognized",
                 __func__, input->type);
         }
+
+      /* Clean up. */
+      gal_data_free(dist);
     }
 
   /* Clean up and return. */
-  gal_data_free(dist);
   if(nbs!=input) gal_data_free(nbs);
   return out;
 }



reply via email to

[Prev in Thread] Current Thread [Next in Thread]