rootdoc/html/GoFTest_8cxx_source.html

 // @(#)root/mathcore:$Id$
 // Authors: Bartolomeu Rabacal    05/2010
 /**********************************************************************
  *                                                                    *
  * Copyright (c) 2006 , LCG ROOT MathLib Team                         *
  *                                                                    *
  *                                                                    *
  **********************************************************************/
 // implementation file for GoFTest


 #include <algorithm>
 #include <functional>
 #include <iostream>
 #include <map>
 #include <numeric>
 #include <string.h>
 #include <cassert>

 #include "Math/Error.h"
 #include "Math/Math.h"
 #include "Math/IFunction.h"
 #include "Math/IFunctionfwd.h"
 #include "Math/Integrator.h"
 #include "Math/ProbFuncMathCore.h"
 #include "Math/WrappedFunction.h"

 #include "Math/GoFTest.h"

 #include "Fit/BinData.h"

 #include "TStopwatch.h"

 /* Note: The references mentioned here are stated in GoFTest.h */

 namespace ROOT {
 namespace Math {

    struct CDFWrapper : public IGenFunction {
       // wrapper around a cdf funciton to re-scale for the range
       Double_t fXmin; // lower range for x
       Double_t fXmax; // lower range for x
       Double_t fNorm; // normalization
       const IGenFunction* fCDF; // cdf pointer (owned by the class)


       virtual ~CDFWrapper() { if (fCDF) delete fCDF; }

       CDFWrapper(const IGenFunction& cdf, Double_t xmin=0, Double_t xmax=-1) :
          fCDF(cdf.Clone())
       {
          if (xmin >= xmax) {
             fNorm = 1;
             fXmin = -std::numeric_limits<double>::infinity();
             fXmax = std::numeric_limits<double>::infinity();
          }
          else {
             fNorm = cdf(xmax) - cdf(xmin);
             fXmin = xmin;
             fXmax = xmax;
          }
       }

       Double_t DoEval(Double_t x) const {
          if (x <= fXmin) return 0;
          if (x >= fXmax) return 1.0;
          return (*fCDF)(x)/fNorm;
       }

       IGenFunction* Clone() const {
          return new CDFWrapper(*fCDF,fXmin,fXmax);
       }
    };


    class PDFIntegral : public IGenFunction {
       Double_t fXmin; // lower range for x
       Double_t fXmax; // lower range for x
       Double_t fNorm; // normalization
       mutable IntegratorOneDim fIntegral;
       const IGenFunction* fPDF; // pdf pointer (owned by the class)
    public:

       virtual ~PDFIntegral() { if (fPDF) delete fPDF; }

       PDFIntegral(const IGenFunction& pdf, Double_t xmin = 0, Double_t xmax = -1) :
          fXmin(xmin),
          fXmax(xmax),
          fNorm(1),
          fPDF(pdf.Clone())
       {
          // compute normalization
          fIntegral.SetFunction(*fPDF);  // N.B. must be fPDF (the cloned copy) and not pdf which can disappear
          if (fXmin >= fXmax) {
             fXmin = -std::numeric_limits<double>::infinity();
             fXmax = std::numeric_limits<double>::infinity();
          }
          if (fXmin == -std::numeric_limits<double>::infinity() && fXmax == std::numeric_limits<double>::infinity() ) {
             fNorm = fIntegral.Integral();
          }
          else if (fXmin == -std::numeric_limits<double>::infinity() )
             fNorm = fIntegral.IntegralLow(fXmax);
          else if (fXmax == std::numeric_limits<double>::infinity() )
             fNorm = fIntegral.IntegralUp(fXmin);
          else
             fNorm = fIntegral.Integral(fXmin, fXmax);
       }

       Double_t DoEval(Double_t x) const {
          if (x <= fXmin) return 0;
          if (x >= fXmax) return 1.0;
          if (fXmin == -std::numeric_limits<double>::infinity() )
             return fIntegral.IntegralLow(x)/fNorm;
          else
             return fIntegral.Integral(fXmin,x)/fNorm;
       }

       IGenFunction* Clone() const {
          return new PDFIntegral(*fPDF, fXmin, fXmax);
       }
    };

    void GoFTest::SetDistribution(EDistribution dist) {
       if (!(kGaussian <= dist && dist <= kExponential)) {
          MATH_ERROR_MSG("SetDistribution", "Cannot set distribution type! Distribution type option must be ennabled.");
          return;
       }
       fDist = dist;
       SetCDF();
    }

    GoFTest::GoFTest( UInt_t sample1Size, const Double_t* sample1, UInt_t sample2Size, const Double_t* sample2 )
    : fDist(kUndefined),
      fSamples(std::vector<std::vector<Double_t> >(2)),
      fTestSampleFromH0(kFALSE) {
       Bool_t badSampleArg = sample1 == 0 || sample1Size == 0;
       if (badSampleArg) {
          std::string msg = "'sample1";
          msg += !sample1Size ? "Size' cannot be zero" : "' cannot be zero-length";
          MATH_ERROR_MSG("GoFTest", msg.c_str());
          assert(!badSampleArg);
       }
       badSampleArg = sample2 == 0 || sample2Size == 0;
       if (badSampleArg) {
          std::string msg = "'sample2";
          msg += !sample2Size ? "Size' cannot be zero" : "' cannot be zero-length";
          MATH_ERROR_MSG("GoFTest", msg.c_str());
          assert(!badSampleArg);
       }
       std::vector<const Double_t*> samples(2);
       std::vector<UInt_t> samplesSizes(2);
       samples[0] = sample1;
       samples[1] = sample2;
       samplesSizes[0] = sample1Size;
       samplesSizes[1] = sample2Size;
       SetSamples(samples, samplesSizes);
       SetParameters();
    }

    GoFTest::GoFTest(UInt_t sampleSize, const Double_t* sample, EDistribution dist)
    : fDist(dist),
      fSamples(std::vector<std::vector<Double_t> >(1)),
      fTestSampleFromH0(kTRUE) {
       Bool_t badSampleArg = sample == 0 || sampleSize == 0;
       if (badSampleArg) {
          std::string msg = "'sample";
          msg += !sampleSize ? "Size' cannot be zero" : "' cannot be zero-length";
          MATH_ERROR_MSG("GoFTest", msg.c_str());
          assert(!badSampleArg);
       }
       std::vector<const Double_t*> samples(1, sample);
       std::vector<UInt_t> samplesSizes(1, sampleSize);
       SetSamples(samples, samplesSizes);
       SetParameters();
       SetCDF();
    }

    GoFTest::~GoFTest() {}

    void GoFTest::SetSamples(std::vector<const Double_t*> samples, const std::vector<UInt_t> samplesSizes) {
       fCombinedSamples.assign(std::accumulate(samplesSizes.begin(), samplesSizes.end(), 0), 0.0);
       UInt_t combinedSamplesSize = 0;
       for (UInt_t i = 0; i < samples.size(); ++i) {
          fSamples[i].assign(samples[i], samples[i] + samplesSizes[i]);
          std::sort(fSamples[i].begin(), fSamples[i].end());
          for (UInt_t j = 0; j < samplesSizes[i]; ++j) {
             fCombinedSamples[combinedSamplesSize + j] = samples[i][j];
          }
          combinedSamplesSize += samplesSizes[i];
       }
       std::sort(fCombinedSamples.begin(), fCombinedSamples.end());

       Bool_t degenerateSamples = *(fCombinedSamples.begin()) == *(fCombinedSamples.end() - 1);
       if (degenerateSamples) {
          std::string msg = "Degenerate sample";
          msg += samplesSizes.size() > 1 ? "s!" : "!";
          msg += " Sampling values all identical.";
          MATH_ERROR_MSG("SetSamples", msg.c_str());
          assert(!degenerateSamples);
       }
    }

    void GoFTest::SetParameters() {
       fMean = std::accumulate(fSamples[0].begin(), fSamples[0].end(), 0.0) / fSamples[0].size();
       fSigma = TMath::Sqrt(1. / (fSamples[0].size() - 1) * (std::inner_product(fSamples[0].begin(), fSamples[0].end(),     fSamples[0].begin(), 0.0) - fSamples[0].size() * TMath::Power(fMean, 2)));
    }

    void GoFTest::operator()(ETestType test, Double_t& pvalue, Double_t& testStat) const {
       switch (test) {
          default:
          case kAD:
             AndersonDarlingTest(pvalue, testStat);
             break;
          case kAD2s:
             AndersonDarling2SamplesTest(pvalue, testStat);
             break;
          case kKS:
             KolmogorovSmirnovTest(pvalue, testStat);
             break;
          case kKS2s:
             KolmogorovSmirnov2SamplesTest(pvalue, testStat);
       }
    }

    Double_t GoFTest::operator()(ETestType test, const Char_t* option) const {
       Double_t result = 0.0;
       switch (test) {
          default:
          case kAD:
             result = AndersonDarlingTest(option);
             break;
          case kAD2s:
             result = AndersonDarling2SamplesTest(option);
             break;
          case kKS:
             result = KolmogorovSmirnovTest(option);
             break;
          case kKS2s:
             result = KolmogorovSmirnov2SamplesTest(option);
       }
       return result;
    }

    void GoFTest::SetCDF() { // Setting parameter-free distributions
       IGenFunction* cdf = 0;
       switch (fDist) {
       case kLogNormal:
          LogSample();
          /* fall through */
       case kGaussian :
          cdf = new ROOT::Math::WrappedMemFunction<GoFTest, Double_t (GoFTest::*)(Double_t) const>(*this, &GoFTest::GaussianCDF);
          break;
       case kExponential:
          cdf = new ROOT::Math::WrappedMemFunction<GoFTest, Double_t (GoFTest::*)(Double_t) const>(*this, &GoFTest::ExponentialCDF);
          break;
       case kUserDefined:
       case kUndefined:
       default:
          break;
       }
       fCDF.reset(cdf);
    }

    void GoFTest::SetDistributionFunction(const IGenFunction& f, Bool_t isPDF, Double_t xmin, Double_t xmax) {
       if (fDist > kUserDefined) {
          MATH_WARN_MSG("SetDistributionFunction","Distribution type is changed to user defined");
       }
       fDist = kUserDefined;
       // function will be cloned inside the wrapper PDFIntegral of CDFWrapper classes
       if (isPDF)
          fCDF.reset(new PDFIntegral(f, xmin, xmax) );
       else
          fCDF.reset(new CDFWrapper(f, xmin, xmax) );
    }

    void GoFTest::Instantiate(const Double_t* sample, UInt_t sampleSize) {
       // initialization function for the template constructors
       Bool_t badSampleArg = sample == 0 || sampleSize == 0;
       if (badSampleArg) {
          std::string msg = "'sample";
          msg += !sampleSize ? "Size' cannot be zero" : "' cannot be zero-length";
          MATH_ERROR_MSG("GoFTest", msg.c_str());
          assert(!badSampleArg);
       }
       fCDF.reset((IGenFunction*)0);
       fDist = kUserDefined;
       fMean = 0;
       fSigma = 0;
       fSamples = std::vector<std::vector<Double_t> >(1);
       fTestSampleFromH0 = kTRUE;
       SetSamples(std::vector<const Double_t*>(1, sample), std::vector<UInt_t>(1, sampleSize));
    }

    Double_t GoFTest::GaussianCDF(Double_t x) const {
       return ROOT::Math::normal_cdf(x, fSigma, fMean);
    }

    Double_t GoFTest::ExponentialCDF(Double_t x) const {
       return ROOT::Math::exponential_cdf(x, 1.0 / fMean);
    }

    void GoFTest::LogSample() {
       transform(fSamples[0].begin(), fSamples[0].end(), fSamples[0].begin(),
                 std::function<Double_t(Double_t)>(TMath::Log));
       SetParameters();
    }

 /*
   Taken from (1)
 */
    Double_t GoFTest::GetSigmaN(const std::vector<UInt_t> & ns, UInt_t N) {
       // compute moments of AD distribution (from Scholz-Stephen paper, paragraph 3)

       Double_t sigmaN = 0.0, h = 0.0, H = 0.0, g = 0.0, a, b, c, d, k = ns.size();

       for (UInt_t i = 0; i < ns.size(); ++i) {
          H += 1.0 /  double( ns[i] );
       }

       // use approximate formulas for large N
       // cache Sum( 1 / i)
       if (N < 2000) {
          std::vector<double> invI(N);
          for (UInt_t i = 1; i <= N - 1; ++i) {
             invI[i] = 1.0 / i;
             h += invI[i];
          }
          for (UInt_t i = 1; i <= N - 2; ++i) {
             double tmp = invI[N-i];
             for (UInt_t j = i + 1; j <= N - 1; ++j) {
                g += tmp * invI[j];
             }
          }
       }
       else {
          // for N larger than 2000 error difference in g is ~ 5 10^-3 while in h is at the level of 10^-5
          const double emc = 0.5772156649015328606065120900824024; // Euler-Mascheroni constant
          h = std::log(double(N-1) ) + emc;
          g = (M_PI)*(M_PI)/6.0;
       }
       double k2 = std::pow(k,2);
       a = (4 * g - 6) * k + (10 - 6 * g) * H - 4 * g + 6;
       b = (2 * g - 4) * k2 + 8 * h * k + (2 * g - 14 * h - 4) * H - 8 * h + 4 * g - 6;
       c = (6 * h + 2 * g - 2) * k2 + (4 * h - 4 *g + 6) * k + (2 * h - 6) * H + 4 * h;
       d = (2 * h + 6) * k2 - 4 * h * k;
       sigmaN +=  a * std::pow(double(N),3) + b * std::pow(double(N),2) + c * N + d;
       sigmaN /= ( double(N - 1) * double(N - 2) * double(N - 3) );
       sigmaN = TMath::Sqrt(sigmaN);
       return sigmaN;
    }


    Double_t GoFTest::PValueADKSamples(UInt_t nsamples, Double_t tx)  {

       /*
        Computation of p-values according to
        "K-Sample Anderson-Darling Tests" by F.W. Scholz
        and M.A. Stephens (1987), Journal of the American Statistical Association,
        Vol 82, No. 399, pp 918-924.
        Code from kSamples package from R (author F. Scholtz)

        This function uses the upper T_m quantiles as obtained via simulation of
        the Anderson-Darling test statistics (Nsim = 2*10^6) with sample sizes n=500
        for each sample, and after standardization, in order to emulate the Table 1
        values given in the above reference. However, here we estimate p-quantiles
        for p = .00001,.00005,.0001,.0005,.001,.005,.01,.025,.05,.075,
        .1,.2,.3,.4,.5,.6,.7,.8,.9,.925,.95,.975,.99,.9925,.995,.9975,.999,
        .99925,.9995,.99975,.9999,.999925,.99995,.999975,.99999
        First the appropriate p-quantiles are determined from those simulated
        for ms = 1,2,3,4,6,8,10, Inf, interpolating to the given value of m.
        Since we use only m=2 we avoid this interpolation.

        Next linear inetrpolation to find the observed p value given the observed test statistic value.
        We use interpolation in the test statistic -> log((1-p)/p) domain
        and we extrapolatelinearly) beyond p = .00001 and .99999.
       */

       // sample values
       //double ms[] = { 1, 2, 3, 4, 6, 8, 10, TMath::Infinity() };
       //int ns = ms.size();
       const int ns = 8;
       double ts[ ]           = { -1.1954, -1.5806, -1.8172,
                                  -2.0032, -2.2526, -2.4204, -2.5283, -4.2649, -1.1786, -1.5394,
                                  -1.7728, -1.9426, -2.1685, -2.3288, -2.4374, -3.8906, -1.166,
                                  -1.5193, -1.7462, -1.9067, -2.126, -2.2818, -2.3926, -3.719,
                                  -1.1407, -1.4659, -1.671, -1.8105, -2.0048, -2.1356, -2.2348,
                                  -3.2905, -1.1253, -1.4371, -1.6314, -1.7619, -1.9396, -2.0637,
                                  -2.1521, -3.0902, -1.0777, -1.3503, -1.5102, -1.6177, -1.761,
                                  -1.8537, -1.9178, -2.5758, -1.0489, -1.2984, -1.4415, -1.5355,
                                  -1.6625, -1.738, -1.7936, -2.3263, -0.9978, -1.2098, -1.3251,
                                  -1.4007, -1.4977, -1.5555, -1.5941, -1.96, -0.9417, -1.1187,
                                  -1.209, -1.2671, -1.3382, -1.379, -1.405, -1.6449, -0.8981, -1.0491,
                                  -1.1235, -1.1692, -1.2249, -1.2552, -1.2755, -1.4395, -0.8598,
                                  -0.9904, -1.0513, -1.0879, -1.1317, -1.155, -1.1694, -1.2816,
                                  -0.7258, -0.7938, -0.8188, -0.8312, -0.8435, -0.8471, -0.8496,
                                  -0.8416, -0.5966, -0.617, -0.6177, -0.6139, -0.6073, -0.5987,
                                  -0.5941, -0.5244, -0.4572, -0.4383, -0.419, -0.4033, -0.3834,
                                  -0.3676, -0.3587, -0.2533, -0.2966, -0.2428, -0.2078, -0.1844,
                                  -0.1548, -0.1346, -0.1224, 0, -0.1009, -0.0169, 0.0304, 0.0596,
                                  0.0933, 0.1156, 0.1294, 0.2533, 0.1571, 0.2635, 0.3169, 0.348,
                                  0.3823, 0.4038, 0.4166, 0.5244, 0.5357, 0.6496, 0.6992, 0.7246,
                                  0.7528, 0.7683, 0.7771, 0.8416, 1.2255, 1.2989, 1.3202, 1.3254,
                                  1.3305, 1.3286, 1.3257, 1.2816, 1.5262, 1.5677, 1.5709, 1.5663,
                                  1.5561, 1.5449, 1.5356, 1.4395, 1.9633, 1.943, 1.919, 1.8975,
                                  1.8641, 1.8389, 1.8212, 1.6449, 2.7314, 2.5899, 2.5, 2.4451,
                                  2.3664, 2.3155, 2.2823, 1.96, 3.7825, 3.4425, 3.2582, 3.1423,
                                  3.0036, 2.9101, 2.8579, 2.3263, 4.1241, 3.716, 3.4984, 3.3651,
                                  3.2003, 3.0928, 3.0311, 2.4324, 4.6044, 4.0847, 3.8348, 3.6714,
                                  3.4721, 3.3453, 3.2777, 2.5758, 5.409, 4.7223, 4.4022, 4.1791,
                                  3.9357, 3.7809, 3.6963, 2.807, 6.4954, 5.5823, 5.1456, 4.8657,
                                  4.5506, 4.3275, 4.2228, 3.0902, 6.8279, 5.8282, 5.3658, 5.0749,
                                  4.7318, 4.4923, 4.3642, 3.1747, 7.2755, 6.197, 5.6715, 5.3642,
                                  4.9991, 4.7135, 4.5945, 3.2905, 8.1885, 6.8537, 6.2077, 5.8499,
                                  5.4246, 5.1137, 4.9555, 3.4808, 9.3061, 7.6592, 6.85, 6.4806,
                                  5.9919, 5.6122, 5.5136, 3.719, 9.6132, 7.9234, 7.1025, 6.6731,
                                  6.1549, 5.8217, 5.7345, 3.7911, 10.0989, 8.2395, 7.4326, 6.9567,
                                  6.3908, 6.011, 5.9566, 3.8906, 10.8825, 8.8994, 7.8934, 7.4501,
                                  6.9009, 6.4538, 6.2705, 4.0556, 11.8537, 9.5482, 8.5568, 8.0283,
                                  7.4418, 6.9524, 6.6195, 4.2649 };


       // p values bins
       double p[] = { .00001,.00005,.0001,.0005,.001,.005,.01,.025,.05,.075,.1,.2,.3,.4,.5,.6,.7,.8,.9,
                        .925,.95,.975,.99,.9925,.995,.9975,.999,.99925,.9995,.99975,.9999,.999925,.99995,.999975,.99999 };

       //int nbins = p.size();
       const int nbins = 35;
       //assert ( nbins*ns == ts.size() );

       // get ts values for nsamples = 2
       // corresponding value is for m=nsamples-1
       int offset = 0;  // for m = 1 (i.e. for nsamples = 2)
       if (nsamples != 2) {
          MATH_ERROR_MSG("InterpolatePValues", "Interpolation not implemented for nsamples not equal to  2");
          return 0;
       }
       std::vector<double> ts2(nbins); // ts values for nsamples = 2
       std::vector<double> lp(nbins);   // log ( p / (1-p) )
       for (int i = 0; i < nbins; ++i)
       {
          ts2[i] = ts[offset+ i * ns];
          p[i] = 1.-p[i];
          lp[i] = std::log( p[i]/(1.-p[i] ) );
       }
       // do linear interpolation to find right lp value for given observed test staistic value
       //auto it = std::lower_bound(ts2.begin(), ts2.end(), tx );
       int i1 = std::distance(ts2.begin(),  std::lower_bound(ts2.begin(), ts2.end(), tx ) ) - 1;
       int i2 = i1+1;
       // if tx is before min of tabluated data
       if (i1 < 0) {
          i1 = 0;
          i2 = 1;
       }
       // if tx is after max of tabulated data
       if (i2 >= int(ts2.size()) ) {
          i1 = ts2.size()-2;
          i2 = ts2.size()-1;
       }

       //std::cout << i1 << " , " << i2 << std::endl;
       assert(i1 < (int) lp.size() && i2 < (int) lp.size() );
       double lp1 = lp[i1];
       double lp2 = lp[i2];
       double tx1 = ts2[i1];
       double tx2 = ts2[i2];

       //std::cout << " tx1,2 " << tx1 << "  " << tx2 << std::endl;
       /// find interpolated (or extrapolated value)(
       double lp0 = (lp1-lp2) * (tx - tx2)/ ( tx1-tx2) + lp2;


       double p0 = exp(lp0)/(1. + exp(lp0) );
       return p0;

    }


 /*
   Taken from (2)
 */ Double_t GoFTest::PValueAD1Sample(Double_t A2) const {
       Double_t pvalue = 0.0;
       if (A2 <= 0.0) {
          return pvalue;
       } else if (A2 < 2.) {
          pvalue = std::pow(A2, -0.5) * std::exp(-1.2337141 / A2) * (2.00012 + (0.247105 - (0.0649821 - (0.0347962 - (0.011672 - 0.00168691 * A2) * A2) * A2) * A2) * A2);
       } else {
          pvalue = std::exp(-1. * std::exp(1.0776 - (2.30695 - (0.43424 - (.082433 - (0.008056 - 0.0003146 * A2) * A2) * A2) * A2) * A2));
       }
       return 1. - pvalue;
    }


 // code from kSamples (R) F. Scholz

 /* computes the k-sample Anderson-Darling test statistics in both original
    and alternative versions for the nonparametric (rank) test described in
    Scholz F.W. and Stephens M.A. (1987), K-sample Anderson-Darling Tests,
    Journal of the American Statistical Association, Vol 82, No. 399,
    pp. 918-924

    Arguments:
    adk: double array with length 2, stores AkN2 and AakN2
    k: integer, number of samples being compared
    x: double array storing the concatenated samples in the same order as ns
    ns: integer array storing the k sample sizes, corresponding to x
    zstar: double array storing the l distinct ordered observations in the
       pooled sample
    l: integer, length of zstar

    Outputs:
    when the computation ends, AkN2 and AakN2 are stored in the given memory
    pointed by adk
 */

 /* counts and returns the number of occurrence of a given number
    in a double array */
 int getCount(double z, const double *dat, int n) {
    int i;
    int count = 0;

    for (i = 0; i < n; i++) {
       if (dat[i] == z) {
          count++;
       }
    }

    return(count);
 }

 /* computes and returns the sum of elements in a given integer array */
 int getSum(const int *x, int n) {
    int i;
    int sum = 0;

    for (i = 0; i < n; i++) {
       sum += x[i];
    }

    return(sum);
 }


 void adkTestStat(double *adk, const std::vector<std::vector<double> > & samples, const std::vector<double> & zstar) {

    int i;
    int j;

    int nsum; /* total sample size = n_1 + ... + n_k */
    int k = samples.size();
    int l = zstar.size();

    /* fij records the number of observations in the ith sample coinciding
       with zstar[j], where i = 1, ..., k, and j = 1, ..., l */
    std::vector<int> fij (k*l);
    /* lvec is an integer vector with length l,
       whose jth entry = \sum_{i=1}^{k} f_{ij}, i.e., the multiplicity
       of zstar[j] */
    std::vector<int> lvec(l);

    /* for computation */
    double mij;
    double maij;
    double innerSum;
    double aInnerSum;
    double bj;
    double baj;
    double tmp;

    /* samples is a two-dimensional double array with length k;
       it stores an array of k pointers to double arrays which are
       the k samples beeing compared */
 // double **samples;

    /* dynamically allocate memory */
         //std::vector< std::vector<double> > samples(k);
    std::vector<int> ns(k);
    nsum = 0;
    for (i = 0; i < k; i++) {
       ns[i] = samples[i].size();
       nsum += ns[i];
    }

    /* fij: k*l integer matrix, where l is the length of zstar and
       k is the number of samples being compared
       lvec: integer vector of length l, records the multiplicity of
       each element of zstar */
    for (j = 0; j < l; j++) {
       lvec[j] = 0;
       for (i = 0; i < k; i++) {
          fij[i + j*k] = getCount(zstar[j], &samples[i][0], ns[i]);
          lvec[j] += fij[i + j*k];
       }
    }

    // loop on samples to compute the adk's
    // Formula (6) and (7) of the paper
    adk[0] = adk[1] = 0;
    for (i = 0; i < k; i++) {
       mij = 0;
       maij = 0;
       innerSum = 0;
       aInnerSum = 0;

       for (j = 0; j < l; j++) {
          mij += fij[i + j*k];
          maij = mij - (double) fij[i + j*k] / 2.0;
          bj = getSum(&lvec[0], j + 1);
          baj = bj - (double) lvec[j] / 2.0;

          if (j < l - 1) {
             tmp = (double) nsum * mij - (double) ns[i] * bj;
             innerSum = innerSum + (double) lvec[j] * tmp * tmp /
                        (bj * ((double) nsum - bj));
          }

          tmp = (double) nsum * maij - (double) ns[i] * baj;
          aInnerSum = aInnerSum + (double) lvec[j] * tmp * tmp /
                      (baj * (nsum - baj) - nsum * (double) lvec[j] / 4.0);
       }

       adk[0] = adk[0] + innerSum / ns[i]; /* AkN2*/
       adk[1] = adk[1] + aInnerSum / ns[i]; /* AakN2 */
    }

    /* k-sample Anderson-Darling test statistics in both original and
       alternative versions, AkN2 and AakN2, are stored in the given
       double array adk */
    adk[0] = adk[0] / (double) nsum; /* AkN2*/
    adk[1] = (nsum - 1) * adk[1] / ((double) nsum * (double) nsum); /* AakN2 */

    // /* free pointers */
    // for (i = 0; i < k; i++) {
    //    free(samples[i]);
    // }
    // free(samples);

 }


 /*
   Taken from (1) -- Named for 2 samples but implemented for K. Restricted to K = 2 by the class's constructors
 */
 void GoFTest::AndersonDarling2SamplesTest(Double_t& pvalue, Double_t& testStat) const {
       pvalue = -1;
       testStat = -1;
       if (fTestSampleFromH0) {
          MATH_ERROR_MSG("AndersonDarling2SamplesTest", "Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
          return;
       }
       std::vector<Double_t> z(fCombinedSamples);
       // unique removes all consecutives duplicates elements. This is exactly what we wants
       // for example unique of v={1,2,2,3,1,2,3,3} results in {1,2,3,1,2,3}  which is exactly what we wants
       std::vector<Double_t>::iterator endUnique = std::unique(z.begin(), z.end()); //z_j's in (1)
       z.erase(endUnique, z.end() );
       std::vector<UInt_t> h; // h_j's in (1)
       std::vector<Double_t> H; // H_j's in (1)
       UInt_t N = fCombinedSamples.size();
       Double_t A2 = 0.0; // Anderson-Darling A^2 Test Statistic

 #ifdef USE_OLDIMPL

       TStopwatch w; w.Start();

       unsigned int nSamples = fSamples.size();

       // old implementation
       for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
          UInt_t n = std::count(fCombinedSamples.begin(), fCombinedSamples.end(), *data);
          h.push_back(n);
          H.push_back(std::count_if(fCombinedSamples.begin(), fCombinedSamples.end(),
                      std::bind(std::less<Double_t>(), std::placeholders::_1, *data)) + n / 2.);
       }
       std::cout << "time for H";
       w.Print();
       w.Reset(); w.Start();
       std::vector<std::vector<Double_t> > F(nSamples); // F_ij's in (1)
       for (UInt_t i = 0; i < nSamples; ++i) {
          for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
             UInt_t n = std::count(fSamples[i].begin(), fSamples[i].end(), *data);
             F[i].push_back(std::count_if(fSamples[i].begin(), fSamples[i].end(),
                            std::bind(std::less<Double_t>(), std::placeholders::_1, *data)) + n / 2.);
          }
       }
       std::cout << "time for F";
       w.Print();
       for (UInt_t i = 0; i < nSamples; ++i) {
          Double_t sum_result = 0.0;
          UInt_t j = 0;
          w.Reset(); w.Start();
          for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
             sum_result += h[j] *  TMath::Power(N * F[i][j]- fSamples[i].size() * H[j], 2) / (H[j] * (N - H[j]) - N * h[j] / 4.0);
             ++j;
          }
          std::cout << "time for sum_resut";
          w.Print();
          std::cout << "sum_result " << sum_result << std::endl;
          A2 += 1.0 / fSamples[i].size() * sum_result;
       }
       A2 *= (N - 1) / (TMath::Power(N, 2)); // A2_akN in (1)

       std::cout << "A2 - old Bartolomeo code " << A2 << std::endl;
 #endif
       // w.Reset();
       // w.Start();

       double adk[2] = {0,0};

       //debug
       // std::cout << "combined samples\n";
       // for (int i = 0; i < fCombinedSamples.size(); ++i)
       //    std::cout << fCombinedSamples[i] << " ,";
       // std::cout << std::endl;
       // std::cout << ns[0] << "  " << ns[1] << std::endl;
       // std::cout << "Z\n";
       // for (int i = 0; i < z.size(); ++i)
       //    std::cout << z[i] << " ,";
       // std::cout << std::endl;

       // use function from kSamples code
       adkTestStat(adk, fSamples, z );
       // w.Print();
       // std::cout << "A2 - new kSamples  code " << adk[0] << "  " << adk[1]  << std::endl;

       A2 = adk[0];

       // compute the normalized test statistic

       std::vector<UInt_t> ns(fSamples.size());
       for (unsigned int k = 0; k < ns.size(); ++k) ns[k] = fSamples[k].size();
       Double_t sigmaN = GetSigmaN(ns, N);
       A2 -= fSamples.size() - 1;
       A2 /= sigmaN; // standartized test statistic

       pvalue = PValueADKSamples(2,A2);
       testStat = A2;
       return;
    }


 /*
    Compute Anderson Darling test for two binned data set.
    A binned data set can be seen as many identical observation happening at the center of the bin
    In this way it is trivial to apply the formula (6) in the paper of W. Scholz, M. Stephens, "K-Sample Anderson-Darling Tests"
    to the case of histograms. See also http://arxiv.org/pdf/0804.0380v1.pdf paragraph  3.3.5
    It is importat that empty bins are not present
 */
    void GoFTest::AndersonDarling2SamplesTest(const ROOT::Fit::BinData &data1, const ROOT::Fit::BinData & data2, Double_t& pvalue, Double_t& testStat)  {
       pvalue = -1;
       testStat = -1;
       //
       // compute cumulative sum of bin counts
       // std::vector<double> sum1(data1.Size() );
       // std::vector<double> sum2(data2.Size() );
       // std::vector<double> sumAll(data1.Size() + data2.Size() );

       if (data1.NDim() != 1 && data2.NDim() != 1) {
             MATH_ERROR_MSG("AndersonDarling2SamplesTest", "Bin Data set must be one-dimensional ");
             return;
       }
       unsigned int n1 = data1.Size();
       unsigned int n2 = data2.Size();
       double ntot1 = 0;
       double ntot2 = 0;


       // make a combined data set and sort it
       std::vector<double> xdata(n1+n2);
       for (unsigned int i = 0; i < n1; ++i) {
          double value = 0;
          const double * x = data1.GetPoint(i, value);
          xdata[i] = *x;
          ntot1 += value;
       }
       for (unsigned int i = 0; i < n2; ++i) {
          double value = 0;
          const double * x = data2.GetPoint(i, value);
          xdata[n1+i] = *x;
          ntot2 += value;
       }
       double nall = ntot1+ntot2;
       // sort the combined data
       std::vector<unsigned int> index(n1+n2);
       TMath::Sort(n1+n2, &xdata[0], &index[0], false );

       // now compute the sums for the tests
       double sum1 = 0;
       double sum2 = 0;
       double sumAll = 0;
       double adsum = 0;
       unsigned int j = 0;

       while( j < n1+n2 ) {
 //      for (unsigned int j = 0; j < n1+n2; ++j) {
          // skip equal observations
          double x = xdata[ index[j] ];
          unsigned int k = j;
          // loop on the bins with the same center value
          double t = 0;
          do {
             unsigned int i = index[k];
             double value = 0;
             if (i < n1 ) {
                value = data1.Value(i);
                sum1 += value;
             }
             else {
                // from data2
                i -= n1;
                assert(i < n2);
                value = data2.Value(i);
                sum2 += value;
             }
             sumAll += value;
             t += value;
             //std::cout << "j " << j << " k " << k << " data " << x << " index " << index[k] << " value " << value << std::endl;
             k++;
          } while ( k < n1+n2 && xdata[ index[k] ] == x  );


          j = k;
          // skip last point
          if (j < n1+n2) {
             double tmp1 =  ( nall * sum1 - ntot1 * sumAll );
             double tmp2 =  ( nall * sum2 - ntot2 * sumAll );
             adsum += t * (tmp1*tmp1/ntot1 + tmp2*tmp2/ntot2) / ( sumAll *  (nall - sumAll) ) ;

             //std::cout << "comp sum " << adsum << "  " << t << "  " << sumAll << " s1 " << sum1 << " s2 " << sum2 << " tmp1 " << tmp1 << " tmp2 " << tmp2 << std::endl;
          }
       }
       double A2 = adsum / nall;

       // compute the normalized test statistic
       std::vector<unsigned int> ns(2);
       ns[0] = ntot1;
       ns[1] = ntot2;
       //std::cout << " ad2 = " << A2 << " nall " << nall;

       Double_t sigmaN = GetSigmaN(ns,nall);
       A2 -= 1;
       A2 /= sigmaN; // standartized test statistic

       //std::cout << " sigmaN " << sigmaN << " new A2 " << A2;

       pvalue = PValueADKSamples(2,A2);
       //std::cout << " pvalue = " << pvalue << std::endl;
       testStat = A2;
       return;
    }


    Double_t GoFTest::AndersonDarling2SamplesTest(const Char_t* option) const {
       Double_t pvalue, testStat;
       AndersonDarling2SamplesTest(pvalue, testStat);
       return (strncmp(option, "p", 1) == 0 || strncmp(option, "t", 1) != 0) ? pvalue : testStat;
    }

 /*
   Taken from (3)
 */ void GoFTest::AndersonDarlingTest(Double_t& pvalue, Double_t& testStat) const {
       pvalue = -1;
       testStat = -1;
       if (!fTestSampleFromH0) {
          MATH_ERROR_MSG("AndersonDarlingTest", "Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
          return;
       }
       if (fDist == kUndefined) {
          MATH_ERROR_MSG("AndersonDarlingTest", "Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
          return;
       }
       Double_t A2 = 0.0;
       Int_t n = fSamples[0].size();
       for (Int_t i = 0; i < n ; ++i) {
          Double_t x1 = fSamples[0][i];
          Double_t w1 = (*fCDF)(x1);
          Double_t result = (2 * (i + 1) - 1) * TMath::Log(w1) + (2 * (n - (i + 1)) + 1) * TMath::Log(1 - w1);
          A2 += result;
       }
       (A2 /= -n) -= n;
       if (A2 != A2) {
          MATH_ERROR_MSG("AndersonDarlingTest", "Cannot compute p-value: data below or above the distribution's thresholds. Check sample consistency.");
          return;
       }
       pvalue = PValueAD1Sample(A2);
       testStat = A2;
    }

    Double_t GoFTest::AndersonDarlingTest(const Char_t* option) const {
       Double_t pvalue, testStat;
       AndersonDarlingTest(pvalue, testStat);
       return (strncmp(option, "p", 1) == 0 || strncmp(option, "t", 1) != 0) ? pvalue : testStat;
    }

    void GoFTest::KolmogorovSmirnov2SamplesTest(Double_t& pvalue, Double_t& testStat) const {
       pvalue = -1;
       testStat = -1;
       if (fTestSampleFromH0) {
          MATH_ERROR_MSG("KolmogorovSmirnov2SamplesTest", "Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
          return;
       }
       const UInt_t na = fSamples[0].size();
       const UInt_t nb = fSamples[1].size();
       std::vector<Double_t> a(na);
       std::vector<Double_t> b(nb);
       std::copy(fSamples[0].begin(), fSamples[0].end(), a.begin());
       std::copy(fSamples[1].begin(), fSamples[1].end(), b.begin());
       pvalue = TMath::KolmogorovTest(na, a.data(), nb, b.data(), 0);
       testStat = TMath::KolmogorovTest(na, a.data(), nb, b.data(), "M");
    }

    Double_t GoFTest::KolmogorovSmirnov2SamplesTest(const Char_t* option) const {
       Double_t pvalue, testStat;
       KolmogorovSmirnov2SamplesTest(pvalue, testStat);
       return (strncmp(option, "p", 1) == 0 || strncmp(option, "t", 1) != 0) ? pvalue : testStat;
    }

 /*
    Algorithm taken from (3) in page 737
 */ void GoFTest::KolmogorovSmirnovTest(Double_t& pvalue, Double_t& testStat) const {
       pvalue = -1;
       testStat = -1;
       if (!fTestSampleFromH0) {
          MATH_ERROR_MSG("KolmogorovSmirnovTest", "Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
          return;
       }
       if (fDist == kUndefined) {
          MATH_ERROR_MSG("KolmogorovSmirnovTest", "Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
          return;
       }
       Double_t Fo = 0.0, Dn = 0.0;
       UInt_t n = fSamples[0].size();
       for (UInt_t i = 0; i < n; ++i) {
          Double_t Fn = (i + 1.0) / n;
          Double_t F = (*fCDF)(fSamples[0][i]);
          Double_t result = std::max(TMath::Abs(Fn - F), TMath::Abs(Fo - Fn));
          if (result > Dn) Dn = result;
          Fo = Fn;
       }
       pvalue = TMath::KolmogorovProb(Dn * (TMath::Sqrt(n) + 0.12 + 0.11 / TMath::Sqrt(n)));
       testStat = Dn;
    }

    Double_t GoFTest::KolmogorovSmirnovTest(const Char_t* option) const {
       Double_t pvalue, testStat;
       KolmogorovSmirnovTest(pvalue, testStat);
       return (strncmp(option, "p", 1) == 0 || strncmp(option, "t", 1) != 0) ? pvalue : testStat;
    }


 } // ROOT namespace
 } // Math namespace

TMath::H
constexpr Double_t H()
Planck&#39;s constant in  .
Definition: TMath.h:189

ROOT::Math::GoFTest::kUserDefined
Definition: GoFTest.h:43

ROOT::Math::GoFTest::SetDistribution
void SetDistribution(EDistribution dist)
Definition: GoFTest.cxx:123

ROOT::Math::GoFTest::SetCDF
void SetCDF()
Definition: GoFTest.cxx:244

ROOT::Math::gv_detail::dist
double dist(Rotation3D const &r1, Rotation3D const &r2)
Definition: 3DDistances.cxx:48

ProbFuncMathCore.h

ROOT::Math::GoFTest::fTestSampleFromH0
Bool_t fTestSampleFromH0
Definition: GoFTest.h:193

ROOT::Math::getCount
int getCount(double z, const double *dat, int n)
Definition: GoFTest.cxx:520

ROOT::Math::GoFTest::fMean
Double_t fMean
Definition: GoFTest.h:186

ROOT::Math::IBaseFunctionOneDim
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
Definition: IFunction.h:135

ROOT::Math::GoFTest::operator()
void operator()(ETestType test, Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:208

ROOT
Namespace for new ROOT classes and functions.
Definition: TFoamSampler.h:19

TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:759

ROOT::Math::GoFTest::SetSamples
void SetSamples(std::vector< const Double_t *> samples, const std::vector< UInt_t > samplesSizes)
Definition: GoFTest.cxx:180

Math.h

TMath::KolmogorovProb
Double_t KolmogorovProb(Double_t z)
Calculates the Kolmogorov distribution function,.
Definition: TMath.cxx:663

ROOT::Math::GoFTest::fCombinedSamples
std::vector< Double_t > fCombinedSamples
Definition: GoFTest.h:189

ROOT::Math::GoFTest::EDistribution
EDistribution
Definition: GoFTest.h:41

ROOT::Math::GoFTest::ETestType
ETestType
Definition: GoFTest.h:54

ROOT::Math::GoFTest::AndersonDarlingTest
void AndersonDarlingTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:862

ROOT::Minuit2::inner_product
double inner_product(const LAVector &, const LAVector &)
Definition: LaInnerProduct.cxx:19

ROOT::Math::GoFTest::kAD2s
Definition: GoFTest.h:56

std
STL namespace.

MATH_WARN_MSG
#define MATH_WARN_MSG(loc, str)
Definition: Error.h:47

ROOT::Math::GoFTest::GoFTest
GoFTest()

BinData.h

TMath::Power
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:734

ROOT::Fit::BinData::GetPoint
const double * GetPoint(unsigned int ipoint, double &value) const
retrieve at the same time a pointer to the coordinate data and the fit value More efficient than call...
Definition: BinData.h:370

ROOT::Math::GoFTest::kUndefined
Definition: GoFTest.h:42

ROOT::Math::GoFTest::kGaussian
Definition: GoFTest.h:44

ROOT::Math::GoFTest::GaussianCDF
Double_t GaussianCDF(Double_t x) const
Definition: GoFTest.cxx:294

ROOT::Math::normal_cdf
double normal_cdf(double x, double sigma=1, double x0=0)
Cumulative distribution function of the normal (Gaussian) distribution (lower tail).
Definition: ProbFuncMathCore.cxx:234

Error.h

ROOT::Math::GoFTest::PValueADKSamples
static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2)
Definition: GoFTest.cxx:353

ROOT::Math::GoFTest::fDist
EDistribution fDist
Definition: GoFTest.h:184

pow
double pow(double, double)

N
#define N
Definition: MixMaxEngineImpl.h:32

TMath::Sort
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
Sort the n elements of the array a of generic templated type Element.
Definition: TMath.h:1282

ROOT::Math::GoFTest::GetSigmaN
static Double_t GetSigmaN(const std::vector< UInt_t > &ns, UInt_t N)
Definition: GoFTest.cxx:311

GoFTest.h

ROOT::Math::GoFTest::KolmogorovSmirnovTest
void KolmogorovSmirnovTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:921

ROOT::Fit::FitData::Size
unsigned int Size() const
return number of fit points
Definition: FitData.h:303

MATH_ERROR_MSG
#define MATH_ERROR_MSG(loc, str)
Definition: Error.h:50

IFunctionfwd.h

ROOT::Math::getSum
int getSum(const int *x, int n)
Definition: GoFTest.cxx:534

ROOT::Math::GoFTest::kKS
Definition: GoFTest.h:57

ROOT::Math::GoFTest::kAD
Definition: GoFTest.h:55

ROOT::Math::IGenFunction
IBaseFunctionOneDim IGenFunction
Definition: IFunctionfwd.h:39

TMath::KolmogorovTest
Double_t KolmogorovTest(Int_t na, const Double_t *a, Int_t nb, const Double_t *b, Option_t *option)
Statistical test whether two one-dimensional sets of points are compatible with coming from the same ...
Definition: TMath.cxx:789

M_PI
#define M_PI
Definition: Math.h:38

ROOT::Math::GoFTest::kKS2s
Definition: GoFTest.h:58

ROOT::Math::GoFTest::~GoFTest
virtual ~GoFTest()
Definition: GoFTest.cxx:178

ROOT::Fit::BinData
Class describing the binned data sets : vectors of x coordinates, y values and optionally error on y ...
Definition: BinData.h:53

ROOT::Math::GoFTest::fSigma
Double_t fSigma
Definition: GoFTest.h:187

ROOT::Math::GoFTest::kExponential
Definition: GoFTest.h:46

x
* x
Deprecated and error prone model selection interface.
Definition: TRolke.cxx:630

ROOT::Math::GoFTest::fSamples
std::vector< std::vector< Double_t > > fSamples
Definition: GoFTest.h:191

ROOT::Math::GoFTest::SetParameters
void SetParameters()
Definition: GoFTest.cxx:203

ROOT::Math::GoFTest::Instantiate
void Instantiate(const Double_t *sample, UInt_t sampleSize)
Definition: GoFTest.cxx:276

ROOT::Math::GoFTest::kLogNormal
Definition: GoFTest.h:45

ROOT::Fit::BinData::Value
double Value(unsigned int ipoint) const
return the value for the given fit point
Definition: BinData.h:217

Integrator.h

ROOT::Math::GoFTest::KolmogorovSmirnov2SamplesTest
void KolmogorovSmirnov2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:896

Math
Namespace for new Math classes and functions.

ROOT::Math::GoFTest::PValueAD1Sample
Double_t PValueAD1Sample(Double_t A2) const
Definition: GoFTest.cxx:483

z
you should not use this method at all Int_t Int_t z
Definition: TRolke.cxx:630

ROOT::Math::GoFTest::ExponentialCDF
Double_t ExponentialCDF(Double_t x) const
Definition: GoFTest.cxx:298

b
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630

ROOT::Fit::FitData::NDim
unsigned int NDim() const
return coordinate data dimension
Definition: FitData.h:311

TMath::Sqrt
Double_t Sqrt(Double_t x)
Definition: TMath.h:690

ROOT::Math::GoFTest::LogSample
void LogSample()
Definition: GoFTest.cxx:302

exp
double exp(double)

ROOT::Math::GoFTest::SetDistributionFunction
void SetDistributionFunction(const IGenFunction &cdf, Bool_t isPDF, Double_t xmin, Double_t xmax)
Definition: GoFTest.cxx:264

IFunction.h

ROOT::Math::WrappedMemFunction
Template class to wrap any member function of a class taking a double and returning a double in a 1D ...
Definition: WrappedFunction.h:106

log
double log(double)

ROOT::Math::adkTestStat
void adkTestStat(double *adk, const std::vector< std::vector< double > > &samples, const std::vector< double > &zstar)
Definition: GoFTest.cxx:546

WrappedFunction.h

ROOT::Math::exponential_cdf
double exponential_cdf(double x, double lambda, double x0=0)
Cumulative distribution function of the exponential distribution (lower tail).
Definition: ProbFuncMathCore.cxx:161

ROOT::Math::GoFTest::AndersonDarling2SamplesTest
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:646