/* integration/cquad.c
 * 
 * Copyright (C) 2010 Pedro Gonnet
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or (at
 * your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include <stdlib.h>
#include <math.h>
#include <float.h>
#include <octave/oct.h>
#include <octave/parse.h>
#include <octave/ov-fcn-handle.h>

#include "cquad.h"
#include "cquad_consts.h"


/* Allocates a workspace for the given maximum number of intervals.
    Note that if the workspace gets filled, the intervals with the
    lowest error estimates are dropped. The maximum number of
    intervals is therefore not the maximum number of intervals
    that will be computed, but merely the size of the buffer.
    */

/* Compute the product of the fx with one of the inverse
    Vandermonde-like matrices. */

void
Vinvfx (const double *fx, double *c, const int d)
{

  int i, j;

  switch (d)
    {
    case 0:
      for (i = 0; i <= 4; i++)
        {
          c[i] = 0.0;
          for (j = 0; j <= 4; j++)
            c[i] += V1inv[i * 5 + j] * fx[j * 8];
        }
      break;
    case 1:
      for (i = 0; i <= 8; i++)
        {
          c[i] = 0.0;
          for (j = 0; j <= 8; j++)
            c[i] += V2inv[i * 9 + j] * fx[j * 4];
        }
      break;
    case 2:
      for (i = 0; i <= 16; i++)
        {
          c[i] = 0.0;
          for (j = 0; j <= 16; j++)
            c[i] += V3inv[i * 17 + j] * fx[j * 2];
        }
      break;
    case 3:
      for (i = 0; i <= 32; i++)
        {
          c[i] = 0.0;
          for (j = 0; j <= 32; j++)
            c[i] += V4inv[i * 33 + j] * fx[j];
        }
      break;
    }

}


/* Downdate the interpolation given by the n coefficients c
    by removing the nodes with indices in nans. */

void
downdate (double *c, int n, int d, int *nans, int nnans)
{

  static const int bidx[4] = { 0, 6, 16, 34 };
  double b_new[34], alpha;
  int i, j;

  for (i = 0; i <= n + 1; i++)
    b_new[i] = bee[bidx[d] + i];
  for (i = 0; i < nnans; i++)
    {
      b_new[n + 1] = b_new[n + 1] / Lalpha[n];
      b_new[n] = (b_new[n] + xi[nans[i]] * b_new[n + 1]) / Lalpha[n - 1];
      for (j = n - 1; j > 0; j--)
        b_new[j] =
          (b_new[j] + xi[nans[i]] * b_new[j + 1] -
           Lgamma[j + 1] * b_new[j + 2]) / Lalpha[j - 1];
      for (j = 0; j <= n; j++)
        b_new[j] = b_new[j + 1];
      alpha = c[n] / b_new[n];
      for (j = 0; j < n; j++)
        c[j] -= alpha * b_new[j];
      c[n] = 0;
      n--;
    }

}


/* The actual integration routine.
    */

DEFUN_DLD (cquad, args, nargout, 
"-*- texinfo -*-\n\
@deftypefn {Function File} address@hidden, @var{err}, @var{nr_points}] =} cquad (@var{f}, @var{a}, @var{b}, @var{tol})\n\
@deftypefnx {Function File} address@hidden, @var{err}, @var{nr_points}] =} cquad (@var{f}, @var{a}, @var{b}, @var{tol}, @var{sing})\n\
Numerically evaluates an integral using the doubly-adaptive\n\
quadrature described by P. Gonnet in @cite{\"Increasing the\n\
Reliability of Adaptive Quadrature Using Explicit Interpolants\",\n\
ACM Transactions on Mathematical Software, in Press, 2010}.\n\
The algorithm uses Clenshaw-Curtis quadrature rules of increasing\n\
degree in each interval and bisects the interval if either the\n\
function does not appear to be smooth or a rule of maximum\n\
degree has been reached. The error estimate is computed from the\n\
L2-norm of the difference between two successive interpolations\n\
of the integrand over the nodes of the respective quadrature rules.\n\
\n\
For example,\n\
\n\
@example\n\
   int = cquad ( f , a , b , 1.0e-6 );\n\
@end example\n\
\n\
@noindent computes the integral of a function @var{f} in the interval\n\
address@hidden,@var{b}] to the relative precision of six\n\
decimal digits.\n\
The integrand @var{f} should accept a vector argument and return a vector\n\
result containing the integrand evaluated at each element of the\n\
argument, for example\n\
\n\
@example\n\
   f = @@(x) x .* sin ( 1 ./ x ) .* sqrt ( abs ( 1 - x ) );\n\
@end example\n\
\n\
If the integrand has known singularieites or discontinuities\n\
in any of its derivatives inside the interval,\n\
as does the above example at x=1, these can be specified in\n\
the additional argument @var{sing} as follows\n\
\n\
@example\n\
   int = cquad ( f , a , b , 1.0e-6 , [ 1 ] );\n\
@end example\n\
\n\
The two additional output variables @var{err} and @var{nr_points}\n\
return an estimate of the absolute integration error and\n\
the number of points at which the integrand was evaluated\n\
respectively.\n\
If the adaptive integration did not converge, the value of\n\
@var{err} will be larger than the requested tolerance. It is\n\
therefore recommended to verify this value for difficult\n\
integrands.\n\
\n\
If either @var{a} or @var{b} are @code{+/-Inf}, @code{cquad}\n\
integrates @var{f} by substituting the variable of integration\n\
with @code{x=tan(pi/2*u)}.\n\
\n\
@code{cquad} is capable of dealing with non-numerical\n\
values of the integrand such as @code{NaN}, @code{Inf}\n\
or @code{-Inf}, as the above example at x=0.\n\
If the integral diverges and @code{cquad} detects this, \n\
a warning is issued and @code{Inf} or @code{-Inf} is returned.\n\
\n\
Note that @code{cquad} is a general purpose quadrature algorithm\n\
and as such may be less efficient for smooth or otherwise\n\
well-behaved integrand than other methods such as\n\
@code{quadgk} or @code{trapz}.\n\
\n\
@seealso{triplequad, dblquad, quadgk, quadl, quadv, trapz}\n\
@end deftypefn")
{

  /* Some constants that we will need. */
  static const int n[4] = { 4, 8, 16, 32 };
  static const int skip[4] = { 8, 4, 2, 1 };
  static const int idx[4] = { 0, 5, 14, 31 };
  static const double w = M_SQRT2 / 2;
  static const int ndiv_max = 20;

  /* The interval heap. */
  cquad_ival ivals[cquad_heapsize];
  int heap[cquad_heapsize];

  /* Arguments left and right */
  int nargin = args.length ();
  octave_function *fcn;
  double a, b, tol, *iivals, *sing;

  /* Variables needed for transforming the integrand. */
  int wrap = 0;
  double xw;

  /* Stuff we will need to call the integrand. */
  octave_value_list fargs, retval;

  /* Actual variables (as opposed to constants above). */
  double m, h, ml, hl, mr, hr, temp;
  double igral, err, igral_final, err_final, err_excess;
  int nivals, neval = 0;
  int i, j, d, split, t;
  int nnans, nans[33];
  cquad_ival *iv, *ivl, *ivr;
  double nc, ncdiff;


  /* Parse the input arguments. */
  if (nargin < 1)
    {
      error
        ("cquad: first argument (integrand) of type function handle required.");
      return octave_value_list ();
    }
  else
    {
      if (args (0).is_function_handle () || args (0).is_inline_function ())
        fcn = args (0).function_value ();
      else
        error
          ("cquad: first argument (integrand) must be a function handle or an inline function.");
    }

  if (nargin < 2 || !args (1).is_real_scalar ())
    {
      error
        ("cquad: second argument (left interval edge) must be a single real scalar.");
      return octave_value_list ();
    }
  else
    a = args (1).double_value ();

  if (nargin < 3 || !args (2).is_real_scalar ())
    {
      error
        ("cquad: third argument (right interval edge) must be a single real scalar.");
      return octave_value_list ();
    }
  else
    b = args (2).double_value ();

  if (nargin < 4)
    tol = 1.0e-6;
  else if (!args (3).is_real_scalar ())
    {
      error
        ("cquad: fourth argument (tolerance) must be a single real scalar.");
      return octave_value_list ();
    }
  else
    tol = args (3).double_value ();

  if (nargin < 5)
    {
      nivals = 1;
      iivals = (double *) alloca (sizeof (double) * (nivals + 1));
      iivals[0] = a;
      iivals[1] = b;
    }
  else if (!(args (4).is_real_scalar () || args (4).is_real_matrix ()))
    {
      error
        ("cquad: fifth argument (singularities) must be a vector of real values.");
      return octave_value_list ();
    }
  else
    {
      nivals = 1 + args (4).length ();
      iivals = (double *) alloca (sizeof (double) * (nivals + 1));
      sing = args (4).array_value ().fortran_vec ();
      iivals[0] = a;
      for (i = 0; i < nivals - 2; i++)
        iivals[i + 1] = sing[i];
      iivals[nivals] = b;
    }

  /* If a or b are +/-Inf, transform the integral. */
  if (std::isinf (a) || std::isinf (b))
    {
      wrap = 1;
      for (i = 0; i <= nivals; i++)
        if (std::isinf (iivals[i]))
          iivals[i] = copysign (1.0, iivals[i]);
        else
          iivals[i] = 2.0 * atan (iivals[i]) / M_PI;
    }


  /* Initialize the heaps. */
  for (i = 0; i < cquad_heapsize; i++)
    heap[i] = i;


  /* Create the first interval(s). */
  igral = 0.0;
  err = 0.0;
  for (j = 0; j < nivals; j++)
    {

      /* Initialize the interval. */
      iv = &(ivals[heap[j]]);
      m = (iivals[j] + iivals[j + 1]) / 2;
      h = (iivals[j + 1] - iivals[j]) / 2;
      nnans = 0;
      ColumnVector ex (33);
      if (wrap)
        {
          for (i = 0; i <= n[3]; i++)
            ex (i) = tan (M_PI / 2 * (m + xi[i] * h));
        }
      else
        {
          for (i = 0; i <= n[3]; i++)
            ex (i) = m + xi[i] * h;
        }
      fargs (0) = ex;
      retval = feval (fcn, fargs, 1);
      if (retval.length () != 1 || !retval (0).is_real_matrix ())
        {
          error
            ("cquad: integrand must return a single, real-valued vector.");
          return octave_value_list ();
        }
      Matrix effex = retval (0).matrix_value ();
      if (effex.length () != ex.length ())
        {
          error
            ("cquad: integrand must return a single, real-valued vector of the same size as the input");
          return octave_value_list ();
        }
      for (i = 0; i <= n[3]; i++)
        {
          iv->fx[i] = effex (i);
          if (wrap)
            {
              xw = ex(i);
              iv->fx[i] *= (1.0 + xw * xw) * M_PI / 2;
            }
          neval++;
          if (!std::isfinite (iv->fx[i]))
            {
              nans[nnans++] = i;
              iv->fx[i] = 0.0;
            }
        }
      Vinvfx (iv->fx, &(iv->c[idx[3]]), 3);
      Vinvfx (iv->fx, &(iv->c[idx[2]]), 2);
      Vinvfx (iv->fx, &(iv->c[0]), 0);
      for (i = 0; i < nnans; i++)
        iv->fx[i] = NAN;
      iv->a = iivals[j];
      iv->b = iivals[j + 1];
      iv->depth = 3;
      iv->rdepth = 1;
      iv->ndiv = 0;
      iv->igral = 2 * h * iv->c[idx[3]] * w;
      nc = 0.0;
      for (i = n[2] + 1; i <= n[3]; i++)
        {
          temp = iv->c[idx[3] + i];
          nc += temp * temp;
        }
      ncdiff = nc;
      for (i = 0; i <= n[2]; i++)
        {
          temp = iv->c[idx[2] + i] - iv->c[idx[3] + i];
          ncdiff += temp * temp;
          nc += iv->c[idx[3] + i] * iv->c[idx[3] + i];
        }
      ncdiff = sqrt (ncdiff);
      nc = sqrt (nc);
      iv->err = ncdiff * 2 * h;
      if (ncdiff / nc > 0.1 && iv->err < 2 * h * nc)
        iv->err = 2 * h * nc;

      /* Tabulate this interval's data. */
      igral += iv->igral;
      err += iv->err;

      /* Sift it up the heap. */
      i = j;
      while (i > 0 && ivals[heap[i / 2]].err < ivals[heap[i]].err)
        {
          temp = heap[i];
          heap[i] = heap[i / 2];
          heap[i / 2] = temp;
          i /= 2;
        }

    }


  /* Initialize some global values. */
  igral_final = 0.0;
  err_final = 0.0;
  err_excess = 0.0;


  /* Main loop. */
  while (nivals > 0 && err > 0.0 && err > fabs (igral) * tol
         && !(err_final > fabs (igral) * tol
              && err - err_final < fabs (igral) * tol))
    {

      /* Allow the user to interrupt. */
      OCTAVE_QUIT;

      /* Put our finger on the interval with the largest error. */
      iv = &(ivals[heap[0]]);
      m = (iv->a + iv->b) / 2;
      h = (iv->b - iv->a) / 2;

/*      printf
        ("cquad: processing ival %i (of %i) with [%e,%e] int=%e, err=%e, depth=%i\n",
         heap[0], nivals, iv->a, iv->b, iv->igral, iv->err, iv->depth);
*/
      /* Should we try to increase the degree? */
      if (iv->depth < 3)
        {

          /* Keep tabs on some variables. */
          d = ++iv->depth;

          /* Get the new (missing) function values */
          {
            ColumnVector ex (n[d] / 2);
            if (wrap)
              {
                for (i = 0; i < n[d] / 2; i++)
                  ex (i) =
                    tan (M_PI / 2 * (m + xi[(2 * i + 1) * skip[d]] * h));
              }
            else
              {
                for (i = 0; i < n[d] / 2; i++)
                  ex (i) = m + xi[(2 * i + 1) * skip[d]] * h;
              }
            fargs (0) = ex;
            retval = feval (fcn, fargs, 1);
            if (retval.length () != 1 || !retval (0).is_real_matrix ())
              {
                error
                  ("cquad: integrand must return a single, real-valued vector.");
                return octave_value_list ();
              }
            Matrix effex = retval (0).matrix_value ();
            if (effex.length () != ex.length ())
              {
                error
                  ("cquad: integrand must return a single, real-valued vector of the same size as the input.");
                return octave_value_list ();
              }
            neval += effex.length ();
            for (i = 0; i < n[d] / 2; i++)
              {
                j = (2 * i + 1) * skip[d];
                iv->fx[j] = effex (i);
                if (wrap)
                  {
                    xw = ex(i);
                    iv->fx[j] *= (1.0 + xw * xw) * M_PI / 2;
                  }
              }
          }
          nnans = 0;
          for (i = 0; i <= 32; i += skip[d])
            {
              if (!std::isfinite (iv->fx[i]))
                {
                  nans[nnans++] = i;
                  iv->fx[i] = 0.0;
                }
            }

          /* Compute the new coefficients. */
          Vinvfx (iv->fx, &(iv->c[idx[d]]), d);
          /* Downdate any NaNs. */
          if (nnans > 0)
            {
              downdate (&(iv->c[idx[d]]), n[d], d, nans, nnans);
              for (i = 0; i < nnans; i++)
                iv->fx[i] = NAN;
            }

          /* Compute the error estimate. */
          nc = 0.0;
          for (i = n[d - 1] + 1; i <= n[d]; i++)
            {
              temp = iv->c[idx[d] + i];
              nc += temp * temp;
            }
          ncdiff = nc;
          for (i = 0; i <= n[d - 1]; i++)
            {
              temp = iv->c[idx[d - 1] + i] - iv->c[idx[d] + i];
              ncdiff += temp * temp;
              nc += iv->c[idx[d] + i] * iv->c[idx[d] + i];
            }
          ncdiff = sqrt (ncdiff);
          nc = sqrt (nc);
          iv->err = ncdiff * 2 * h;
          /* Compute the local integral. */
          iv->igral = 2 * h * w * iv->c[idx[d]];
          /* Split the interval prematurely? */
          split = (nc > 0 && ncdiff / nc > 0.1);
        }

      /* Maximum degree reached, just split. */
      else
        {
          split = 1;
        }


      /* Should we drop this interval? */
      if ((m + h * xi[0]) >= (m + h * xi[1])
          || (m + h * xi[31]) >= (m + h * xi[32])
          || iv->err < fabs (iv->igral) * DBL_EPSILON * 10)
        {

/*          printf
            ("cquad: dropping ival %i (of %i) with [%e,%e] int=%e, err=%e, depth=%i\n",
             heap[0], nivals, iv->a, iv->b, iv->igral, iv->err,
             iv->depth);
*/
          /* Keep this interval's contribution */
          err_final += iv->err;
          igral_final += iv->igral;
          /* Swap with the last element on the heap */
          t = heap[nivals - 1];
          heap[nivals - 1] = heap[0];
          heap[0] = t;
          nivals--;
          /* Fix up the heap */
          i = 0;
          while (2 * i + 1 < nivals)
            {

              /* Get the kids */
              j = 2 * i + 1;
              /* If the j+1st entry exists and is larger than the jth,
                 use it instead. */
              if (j + 1 < nivals
                  && ivals[heap[j + 1]].err >= ivals[heap[j]].err)
                j++;
              /* Do we need to move the ith entry up? */
              if (ivals[heap[j]].err <= ivals[heap[i]].err)
                break;
              else
                {
                  t = heap[j];
                  heap[j] = heap[i];
                  heap[i] = t;
                  i = j;
                }
            }

        }

      /* Do we need to split this interval? */
      else if (split)
        {

          /* Some values we will need often... */
          d = iv->depth;
          /* Generate the interval on the left */
          ivl = &(ivals[heap[nivals++]]);
          ivl->a = iv->a;
          ivl->b = m;
          ml = (ivl->a + ivl->b) / 2;
          hl = h / 2;
          ivl->depth = 0;
          ivl->rdepth = iv->rdepth + 1;
          ivl->fx[0] = iv->fx[0];
          ivl->fx[32] = iv->fx[16];
          {
            ColumnVector ex (n[0] - 1);
            if (wrap)
              {
                for (i = 0; i < n[0] - 1; i++)
                  ex (i) = tan (M_PI / 2 * (ml + xi[(i + 1) * skip[0]] * hl));
              }
            else
              {
                for (i = 0; i < n[0] - 1; i++)
                  ex (i) = ml + xi[(i + 1) * skip[0]] * hl;
              }
            fargs (0) = ex;
            retval = feval (fcn, fargs, 1);
            if (retval.length () != 1 || !retval (0).is_real_matrix ())
              {
                error
                  ("cquad: integrand must return a single, real-valued vector.");
                return octave_value_list ();
              }
            Matrix effex = retval (0).matrix_value ();
            if (effex.length () != ex.length ())
              {
                error
                  ("cquad: integrand must return a single, real-valued vector of the same size as the input.");
                return octave_value_list ();
              }
            neval += effex.length ();
            for (i = 0; i < n[0] - 1; i++)
              {
                j = (i + 1) * skip[0];
                ivl->fx[j] = effex (i);
                if (wrap)
                  {
                    xw = ex(i);
                    ivl->fx[j] *= (1.0 + xw * xw) * M_PI / 2;
                  }
              }
          }
          nnans = 0;
          for (i = 0; i <= 32; i += skip[0])
            {
              if (!std::isfinite (ivl->fx[i]))
                {
                  nans[nnans++] = i;
                  ivl->fx[i] = 0.0;
                }
            }
          Vinvfx (ivl->fx, ivl->c, 0);
          if (nnans > 0)
            {
              downdate (ivl->c, n[0], 0, nans, nnans);
              for (i = 0; i < nnans; i++)
                ivl->fx[i] = NAN;
            }
          for (i = 0; i <= n[d]; i++)
            {
              ivl->c[idx[d] + i] = 0.0;
              for (j = i; j <= n[d]; j++)
                ivl->c[idx[d] + i] += Tleft[i * 33 + j] * iv->c[idx[d] + j];
            }
          ncdiff = 0.0;
          for (i = 0; i <= n[0]; i++)
            {
              temp = ivl->c[i] - ivl->c[idx[d] + i];
              ncdiff += temp * temp;
            }
          for (i = n[0] + 1; i <= n[d]; i++)
            {
              temp = ivl->c[idx[d] + i];
              ncdiff += temp * temp;
            }
          ncdiff = sqrt (ncdiff);
          ivl->err = ncdiff * h;
          /* Check for divergence. */
          ivl->ndiv = iv->ndiv + (fabs (iv->c[0]) > 0
                                  && ivl->c[0] / iv->c[0] > 2);
          if (ivl->ndiv > ndiv_max && 2 * ivl->ndiv > ivl->rdepth)
            {
              igral = copysign (INFINITY, igral);
              warning ("cquad: divergent integral detected.");
              break;
            }

          /* Compute the local integral. */
          ivl->igral = h * w * ivl->c[0];
          
          
          /* Generate the interval on the right */
          ivr = &(ivals[heap[nivals++]]);
          ivr->a = m;
          ivr->b = iv->b;
          mr = (ivr->a + ivr->b) / 2;
          hr = h / 2;
          ivr->depth = 0;
          ivr->rdepth = iv->rdepth + 1;
          ivr->fx[0] = iv->fx[16];
          ivr->fx[32] = iv->fx[32];
          {
            ColumnVector ex (n[0] - 1);
            if (wrap)
              {
                for (i = 0; i < n[0] - 1; i++)
                  ex (i) = tan (M_PI / 2 * (mr + xi[(i + 1) * skip[0]] * hr));
              }
            else
              {
                for (i = 0; i < n[0] - 1; i++)
                  ex (i) = mr + xi[(i + 1) * skip[0]] * hr;
              }
            fargs (0) = ex;
            retval = feval (fcn, fargs, 1);
            if (retval.length () != 1 || !retval (0).is_real_matrix ())
              {
                error
                  ("cquad: integrand must return a single, real-valued vector.");
                return octave_value_list ();
              }
            Matrix effex = retval (0).matrix_value ();
            if (effex.length () != ex.length ())
              {
                error
                  ("cquad: integrand must return a single, real-valued vector of the same size as the input.");
                return octave_value_list ();
              }
            neval += effex.length ();
            for (i = 0; i < n[0] - 1; i++)
              {
                j = (i + 1) * skip[0];
                ivr->fx[j] = effex (i);
                if (wrap)
                  {
                    xw = ex(i);
                    ivr->fx[j] *= (1.0 + xw * xw) * M_PI / 2;
                  }
              }
          }
          nnans = 0;
          for (i = 0; i <= 32; i += skip[0])
            {
              if (!std::isfinite (ivr->fx[i]))
                {
                  nans[nnans++] = i;
                  ivr->fx[i] = 0.0;
                }
            }
          Vinvfx (ivr->fx, ivr->c, 0);
          if (nnans > 0)
            {
              downdate (ivr->c, n[0], 0, nans, nnans);
              for (i = 0; i < nnans; i++)
                ivr->fx[i] = NAN;
            }
          for (i = 0; i <= n[d]; i++)
            {
              ivr->c[idx[d] + i] = 0.0;
              for (j = i; j <= n[d]; j++)
                ivr->c[idx[d] + i] += Tright[i * 33 + j] * iv->c[idx[d] + j];
            }
          ncdiff = 0.0;
          for (i = 0; i <= n[0]; i++)
            {
              temp = ivr->c[i] - ivr->c[idx[d] + i];
              ncdiff += temp * temp;
            }
          for (i = n[0] + 1; i <= n[d]; i++)
            {
              temp = ivr->c[idx[d] + i];
              ncdiff += temp * temp;
            }
          ncdiff = sqrt (ncdiff);
          ivr->err = ncdiff * h;
          /* Check for divergence. */
          ivr->ndiv = iv->ndiv + (fabs (iv->c[0]) > 0
                                  && ivr->c[0] / iv->c[0] > 2);
          if (ivr->ndiv > ndiv_max && 2 * ivr->ndiv > ivr->rdepth)
            {
              igral = copysign (INFINITY, igral);
              warning ("cquad: divergent integral detected.");
              break;
            }

          /* Compute the local integral. */
          ivr->igral = h * w * ivr->c[0];
          
          
          /* Fix-up the heap: we now have one interval on top
             that we don't need any more and two new, unsorted
             ones at the bottom. */
          /* Flip the last interval to the top of the heap and
             sift down. */
          t = heap[nivals - 1];
          heap[nivals - 1] = heap[0];
          heap[0] = t;
          nivals--;
          /* Sift this interval back down the heap. */
          i = 0;
          while (2 * i + 1 < nivals - 1)
            {
              j = 2 * i + 1;
              if (j + 1 < nivals - 1
                  && ivals[heap[j + 1]].err >= ivals[heap[j]].err)
                j++;
              if (ivals[heap[j]].err <= ivals[heap[i]].err)
                break;
              else
                {
                  t = heap[j];
                  heap[j] = heap[i];
                  heap[i] = t;
                  i = j;
                }
            }

          /* Now grab the last interval and sift it up the heap. */
          i = nivals - 1;
          while (i > 0)
            {
              j = (i - 1) / 2;
              if (ivals[heap[j]].err < ivals[heap[i]].err)
                {
                  t = heap[j];
                  heap[j] = heap[i];
                  heap[i] = t;
                  i = j;
                }
              else
                break;
            }


        }

      /* Otherwise, just fix-up the heap. */
      else
        {
          i = 0;
          while (2 * i + 1 < nivals)
            {
              j = 2 * i + 1;
              if (j + 1 < nivals
                  && ivals[heap[j + 1]].err >= ivals[heap[j]].err)
                j++;
              if (ivals[heap[j]].err <= ivals[heap[i]].err)
                break;
              else
                {
                  t = heap[j];
                  heap[j] = heap[i];
                  heap[i] = t;
                  i = j;
                }
            }

        }

      /* If the heap is about to overflow, remove the last two
         intervals. */
      while (nivals > cquad_heapsize - 2)
        {
          iv = &(ivals[heap[nivals - 1]]);
/*          printf
            ("cquad: dropping ival %i (of %i) with [%e,%e] int=%e, err=%e, depth=%i\n",
             heap[0], nivals, iv->a, iv->b, iv->igral, iv->err,
             iv->depth);
*/
          err_final += iv->err;
          igral_final += iv->igral;
          nivals--;
        }

      /* Collect the value of the integral and error. */
      igral = igral_final;
      err = err_final;
      for (i = 0; i < nivals; i++)
        {
          igral += ivals[heap[i]].igral;
          err += ivals[heap[i]].err;
        }

    }

  /* Dump the contents of the heap. */
/*  for (i = 0; i < nivals; i++)
    {
      iv = &(ivals[heap[i]]);
      printf
        ("cquad: ival %i (%i) with [%e,%e], int=%e, err=%e, depth=%i, rdepth=%i, ndiv=%i\n",
         i, heap[i], iv->a, iv->b, iv->igral, iv->err, iv->depth,
         iv->rdepth, iv->ndiv);
    }
*/
  /* Clean up and present the results. */
  retval (0) = igral;
  if (nargout > 1)
    retval (1) = err;
  if (nargout > 2)
    retval (2) = neval;
  /* All is well that ends well. */
  return retval;
}