32 #include "TStopwatch.h" 47 virtual ~CDFWrapper() {
if (fCDF)
delete fCDF; }
49 CDFWrapper(
const IGenFunction& cdf, Double_t xmin=0, Double_t xmax=-1) :
54 fXmin = -std::numeric_limits<double>::infinity();
55 fXmax = std::numeric_limits<double>::infinity();
58 fNorm = cdf(xmax) - cdf(xmin);
64 Double_t DoEval(Double_t
x)
const {
65 if (x <= fXmin)
return 0;
66 if (x >= fXmax)
return 1.0;
67 return (*fCDF)(
x)/fNorm;
71 return new CDFWrapper(*fCDF,fXmin,fXmax);
80 mutable IntegratorOneDim fIntegral;
84 virtual ~PDFIntegral() {
if (fPDF)
delete fPDF; }
86 PDFIntegral(
const IGenFunction& pdf, Double_t xmin = 0, Double_t xmax = -1) :
93 fIntegral.SetFunction(*fPDF);
95 fXmin = -std::numeric_limits<double>::infinity();
96 fXmax = std::numeric_limits<double>::infinity();
98 if (fXmin == -std::numeric_limits<double>::infinity() && fXmax == std::numeric_limits<double>::infinity() ) {
99 fNorm = fIntegral.Integral();
101 else if (fXmin == -std::numeric_limits<double>::infinity() )
102 fNorm = fIntegral.IntegralLow(fXmax);
103 else if (fXmax == std::numeric_limits<double>::infinity() )
104 fNorm = fIntegral.IntegralUp(fXmin);
106 fNorm = fIntegral.Integral(fXmin, fXmax);
109 Double_t DoEval(Double_t x)
const {
110 if (x <= fXmin)
return 0;
111 if (x >= fXmax)
return 1.0;
112 if (fXmin == -std::numeric_limits<double>::infinity() )
113 return fIntegral.IntegralLow(x)/fNorm;
115 return fIntegral.Integral(fXmin,x)/fNorm;
119 return new PDFIntegral(*fPDF, fXmin, fXmax);
124 if (!(kGaussian <= dist && dist <= kExponential)) {
125 MATH_ERROR_MSG(
"SetDistribution",
"Cannot set distribution type! Distribution type option must be ennabled.");
132 GoFTest::GoFTest( UInt_t sample1Size,
const Double_t* sample1, UInt_t sample2Size,
const Double_t* sample2 )
134 fSamples(
std::vector<
std::vector<Double_t> >(2)),
135 fTestSampleFromH0(kFALSE) {
136 Bool_t badSampleArg = sample1 == 0 || sample1Size == 0;
138 std::string msg =
"'sample1";
139 msg += !sample1Size ?
"Size' cannot be zero" :
"' cannot be zero-length";
141 assert(!badSampleArg);
143 badSampleArg = sample2 == 0 || sample2Size == 0;
145 std::string msg =
"'sample2";
146 msg += !sample2Size ?
"Size' cannot be zero" :
"' cannot be zero-length";
148 assert(!badSampleArg);
150 std::vector<const Double_t*> samples(2);
151 std::vector<UInt_t> samplesSizes(2);
152 samples[0] = sample1;
153 samples[1] = sample2;
154 samplesSizes[0] = sample1Size;
155 samplesSizes[1] = sample2Size;
164 Bool_t badSampleArg = sample == 0 || sampleSize == 0;
166 std::string msg =
"'sample";
167 msg += !sampleSize ?
"Size' cannot be zero" :
"' cannot be zero-length";
169 assert(!badSampleArg);
171 std::vector<const Double_t*> samples(1, sample);
172 std::vector<UInt_t> samplesSizes(1, sampleSize);
181 fCombinedSamples.assign(std::accumulate(samplesSizes.begin(), samplesSizes.end(), 0), 0.0);
182 UInt_t combinedSamplesSize = 0;
183 for (UInt_t i = 0; i < samples.size(); ++i) {
184 fSamples[i].assign(samples[i], samples[i] + samplesSizes[i]);
186 for (UInt_t j = 0; j < samplesSizes[i]; ++j) {
189 combinedSamplesSize += samplesSizes[i];
194 if (degenerateSamples) {
195 std::string msg =
"Degenerate sample";
196 msg += samplesSizes.size() > 1 ?
"s!" :
"!";
197 msg +=
" Sampling values all identical.";
199 assert(!degenerateSamples);
226 Double_t result = 0.0;
266 MATH_WARN_MSG(
"SetDistributionFunction",
"Distribution type is changed to user defined");
271 fCDF.reset(
new PDFIntegral(f, xmin, xmax) );
273 fCDF.reset(
new CDFWrapper(f, xmin, xmax) );
278 Bool_t badSampleArg = sample == 0 || sampleSize == 0;
280 std::string msg =
"'sample";
281 msg += !sampleSize ?
"Size' cannot be zero" :
"' cannot be zero-length";
283 assert(!badSampleArg);
289 fSamples = std::vector<std::vector<Double_t> >(1);
291 SetSamples(std::vector<const Double_t*>(1, sample), std::vector<UInt_t>(1, sampleSize));
304 std::function<Double_t(Double_t)>(
TMath::Log));
314 Double_t sigmaN = 0.0, h = 0.0,
H = 0.0, g = 0.0, a,
b, c, d, k = ns.size();
316 for (UInt_t i = 0; i < ns.size(); ++i) {
317 H += 1.0 / double( ns[i] );
323 std::vector<double> invI(N);
324 for (UInt_t i = 1; i <= N - 1; ++i) {
328 for (UInt_t i = 1; i <= N - 2; ++i) {
329 double tmp = invI[N-i];
330 for (UInt_t j = i + 1; j <= N - 1; ++j) {
337 const double emc = 0.5772156649015328606065120900824024;
342 a = (4 * g - 6) * k + (10 - 6 * g) *
H - 4 * g + 6;
343 b = (2 * g - 4) * k2 + 8 * h * k + (2 * g - 14 * h - 4) *
H - 8 * h + 4 * g - 6;
344 c = (6 * h + 2 * g - 2) * k2 + (4 * h - 4 *g + 6) * k + (2 * h - 6) *
H + 4 * h;
345 d = (2 * h + 6) * k2 - 4 * h * k;
346 sigmaN += a *
std::pow(
double(N),3) + b *
std::pow(
double(N),2) + c * N + d;
347 sigmaN /= ( double(N - 1) * double(N - 2) * double(N - 3) );
382 double ts[ ] = { -1.1954, -1.5806, -1.8172,
383 -2.0032, -2.2526, -2.4204, -2.5283, -4.2649, -1.1786, -1.5394,
384 -1.7728, -1.9426, -2.1685, -2.3288, -2.4374, -3.8906, -1.166,
385 -1.5193, -1.7462, -1.9067, -2.126, -2.2818, -2.3926, -3.719,
386 -1.1407, -1.4659, -1.671, -1.8105, -2.0048, -2.1356, -2.2348,
387 -3.2905, -1.1253, -1.4371, -1.6314, -1.7619, -1.9396, -2.0637,
388 -2.1521, -3.0902, -1.0777, -1.3503, -1.5102, -1.6177, -1.761,
389 -1.8537, -1.9178, -2.5758, -1.0489, -1.2984, -1.4415, -1.5355,
390 -1.6625, -1.738, -1.7936, -2.3263, -0.9978, -1.2098, -1.3251,
391 -1.4007, -1.4977, -1.5555, -1.5941, -1.96, -0.9417, -1.1187,
392 -1.209, -1.2671, -1.3382, -1.379, -1.405, -1.6449, -0.8981, -1.0491,
393 -1.1235, -1.1692, -1.2249, -1.2552, -1.2755, -1.4395, -0.8598,
394 -0.9904, -1.0513, -1.0879, -1.1317, -1.155, -1.1694, -1.2816,
395 -0.7258, -0.7938, -0.8188, -0.8312, -0.8435, -0.8471, -0.8496,
396 -0.8416, -0.5966, -0.617, -0.6177, -0.6139, -0.6073, -0.5987,
397 -0.5941, -0.5244, -0.4572, -0.4383, -0.419, -0.4033, -0.3834,
398 -0.3676, -0.3587, -0.2533, -0.2966, -0.2428, -0.2078, -0.1844,
399 -0.1548, -0.1346, -0.1224, 0, -0.1009, -0.0169, 0.0304, 0.0596,
400 0.0933, 0.1156, 0.1294, 0.2533, 0.1571, 0.2635, 0.3169, 0.348,
401 0.3823, 0.4038, 0.4166, 0.5244, 0.5357, 0.6496, 0.6992, 0.7246,
402 0.7528, 0.7683, 0.7771, 0.8416, 1.2255, 1.2989, 1.3202, 1.3254,
403 1.3305, 1.3286, 1.3257, 1.2816, 1.5262, 1.5677, 1.5709, 1.5663,
404 1.5561, 1.5449, 1.5356, 1.4395, 1.9633, 1.943, 1.919, 1.8975,
405 1.8641, 1.8389, 1.8212, 1.6449, 2.7314, 2.5899, 2.5, 2.4451,
406 2.3664, 2.3155, 2.2823, 1.96, 3.7825, 3.4425, 3.2582, 3.1423,
407 3.0036, 2.9101, 2.8579, 2.3263, 4.1241, 3.716, 3.4984, 3.3651,
408 3.2003, 3.0928, 3.0311, 2.4324, 4.6044, 4.0847, 3.8348, 3.6714,
409 3.4721, 3.3453, 3.2777, 2.5758, 5.409, 4.7223, 4.4022, 4.1791,
410 3.9357, 3.7809, 3.6963, 2.807, 6.4954, 5.5823, 5.1456, 4.8657,
411 4.5506, 4.3275, 4.2228, 3.0902, 6.8279, 5.8282, 5.3658, 5.0749,
412 4.7318, 4.4923, 4.3642, 3.1747, 7.2755, 6.197, 5.6715, 5.3642,
413 4.9991, 4.7135, 4.5945, 3.2905, 8.1885, 6.8537, 6.2077, 5.8499,
414 5.4246, 5.1137, 4.9555, 3.4808, 9.3061, 7.6592, 6.85, 6.4806,
415 5.9919, 5.6122, 5.5136, 3.719, 9.6132, 7.9234, 7.1025, 6.6731,
416 6.1549, 5.8217, 5.7345, 3.7911, 10.0989, 8.2395, 7.4326, 6.9567,
417 6.3908, 6.011, 5.9566, 3.8906, 10.8825, 8.8994, 7.8934, 7.4501,
418 6.9009, 6.4538, 6.2705, 4.0556, 11.8537, 9.5482, 8.5568, 8.0283,
419 7.4418, 6.9524, 6.6195, 4.2649 };
426 double p[] = { .00001,.00005,.0001,.0005,.001,.005,.01,.025,.05,.075,.1,.2,.3,.4,.5,.6,.7,.8,.9,
427 .925,.95,.975,.99,.9925,.995,.9975,.999,.99925,.9995,.99975,.9999,.999925,.99995,.999975,.99999 };
430 const int nbins = 35;
437 MATH_ERROR_MSG(
"InterpolatePValues",
"Interpolation not implemented for nsamples not equal to 2");
440 std::vector<double> ts2(nbins);
441 std::vector<double> lp(nbins);
442 for (
int i = 0; i < nbins; ++i)
444 ts2[i] = ts[offset+ i * ns];
446 lp[i] =
std::log( p[i]/(1.-p[i] ) );
450 int i1 = std::distance(ts2.begin(), std::lower_bound(ts2.begin(), ts2.end(), tx ) ) - 1;
458 if (i2 >=
int(ts2.size()) ) {
464 assert(i1 < (
int) lp.size() && i2 < (int) lp.size() );
467 double tx1 = ts2[i1];
468 double tx2 = ts2[i2];
472 double lp0 = (lp1-lp2) * (tx - tx2)/ ( tx1-tx2) + lp2;
475 double p0 =
exp(lp0)/(1. +
exp(lp0) );
484 Double_t pvalue = 0.0;
487 }
else if (A2 < 2.) {
488 pvalue =
std::pow(A2, -0.5) *
std::exp(-1.2337141 / A2) * (2.00012 + (0.247105 - (0.0649821 - (0.0347962 - (0.011672 - 0.00168691 * A2) * A2) * A2) * A2) * A2);
490 pvalue =
std::exp(-1. *
std::exp(1.0776 - (2.30695 - (0.43424 - (.082433 - (0.008056 - 0.0003146 * A2) * A2) * A2) * A2) * A2));
524 for (i = 0; i < n; i++) {
538 for (i = 0; i < n; i++) {
546 void adkTestStat(
double *adk,
const std::vector<std::vector<double> > & samples,
const std::vector<double> & zstar) {
552 int k = samples.size();
553 int l = zstar.size();
557 std::vector<int> fij (k*l);
561 std::vector<int> lvec(l);
579 std::vector<int> ns(k);
581 for (i = 0; i < k; i++) {
582 ns[i] = samples[i].size();
590 for (j = 0; j < l; j++) {
592 for (i = 0; i < k; i++) {
593 fij[i + j*k] =
getCount(zstar[j], &samples[i][0], ns[i]);
594 lvec[j] += fij[i + j*k];
601 for (i = 0; i < k; i++) {
607 for (j = 0; j < l; j++) {
609 maij = mij - (double) fij[i + j*k] / 2.0;
610 bj =
getSum(&lvec[0], j + 1);
611 baj = bj - (double) lvec[j] / 2.0;
614 tmp = (double) nsum * mij - (
double) ns[i] * bj;
615 innerSum = innerSum + (double) lvec[j] * tmp * tmp /
616 (bj * ((
double) nsum - bj));
619 tmp = (double) nsum * maij - (
double) ns[i] * baj;
620 aInnerSum = aInnerSum + (double) lvec[j] * tmp * tmp /
621 (baj * (nsum - baj) - nsum * (double) lvec[j] / 4.0);
624 adk[0] = adk[0] + innerSum / ns[i];
625 adk[1] = adk[1] + aInnerSum / ns[i];
631 adk[0] = adk[0] / (double) nsum;
632 adk[1] = (nsum - 1) * adk[1] / ((
double) nsum * (double) nsum);
650 MATH_ERROR_MSG(
"AndersonDarling2SamplesTest",
"Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
656 std::vector<Double_t>::iterator endUnique = std::unique(z.begin(), z.end());
657 z.erase(endUnique, z.end() );
658 std::vector<UInt_t> h;
659 std::vector<Double_t>
H;
665 TStopwatch w; w.Start();
667 unsigned int nSamples =
fSamples.size();
670 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
674 std::bind(std::less<Double_t>(), std::placeholders::_1, *data)) + n / 2.);
676 std::cout <<
"time for H";
678 w.Reset(); w.Start();
679 std::vector<std::vector<Double_t> > F(nSamples);
680 for (UInt_t i = 0; i < nSamples; ++i) {
681 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
684 std::bind(std::less<Double_t>(), std::placeholders::_1, *data)) + n / 2.);
687 std::cout <<
"time for F";
689 for (UInt_t i = 0; i < nSamples; ++i) {
690 Double_t sum_result = 0.0;
692 w.Reset(); w.Start();
693 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
694 sum_result += h[j] *
TMath::Power(N * F[i][j]-
fSamples[i].size() * H[j], 2) / (H[j] * (N - H[j]) - N * h[j] / 4.0);
697 std::cout <<
"time for sum_resut";
699 std::cout <<
"sum_result " << sum_result << std::endl;
700 A2 += 1.0 /
fSamples[i].size() * sum_result;
704 std::cout <<
"A2 - old Bartolomeo code " << A2 << std::endl;
709 double adk[2] = {0,0};
731 std::vector<UInt_t> ns(
fSamples.size());
732 for (
unsigned int k = 0; k < ns.size(); ++k) ns[k] =
fSamples[k].size();
759 if (data1.
NDim() != 1 && data2.
NDim() != 1) {
760 MATH_ERROR_MSG(
"AndersonDarling2SamplesTest",
"Bin Data set must be one-dimensional ");
763 unsigned int n1 = data1.
Size();
764 unsigned int n2 = data2.
Size();
770 std::vector<double> xdata(n1+n2);
771 for (
unsigned int i = 0; i < n1; ++i) {
773 const double * x = data1.
GetPoint(i, value);
777 for (
unsigned int i = 0; i < n2; ++i) {
779 const double * x = data2.
GetPoint(i, value);
783 double nall = ntot1+ntot2;
785 std::vector<unsigned int> index(n1+n2);
798 double x = xdata[ index[j] ];
803 unsigned int i = index[k];
806 value = data1.
Value(i);
813 value = data2.
Value(i);
820 }
while ( k < n1+n2 && xdata[ index[k] ] == x );
826 double tmp1 = ( nall * sum1 - ntot1 * sumAll );
827 double tmp2 = ( nall * sum2 - ntot2 * sumAll );
828 adsum += t * (tmp1*tmp1/ntot1 + tmp2*tmp2/ntot2) / ( sumAll * (nall - sumAll) ) ;
833 double A2 = adsum / nall;
836 std::vector<unsigned int> ns(2);
855 Double_t pvalue, testStat;
857 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
866 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
870 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
875 for (Int_t i = 0; i < n ; ++i) {
877 Double_t w1 = (*fCDF)(x1);
878 Double_t result = (2 * (i + 1) - 1) *
TMath::Log(w1) + (2 * (n - (i + 1)) + 1) *
TMath::Log(1 - w1);
883 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Cannot compute p-value: data below or above the distribution's thresholds. Check sample consistency.");
891 Double_t pvalue, testStat;
893 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
900 MATH_ERROR_MSG(
"KolmogorovSmirnov2SamplesTest",
"Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
903 const UInt_t na =
fSamples[0].size();
904 const UInt_t nb =
fSamples[1].size();
905 std::vector<Double_t> a(na);
906 std::vector<Double_t>
b(nb);
914 Double_t pvalue, testStat;
916 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
925 MATH_ERROR_MSG(
"KolmogorovSmirnovTest",
"Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
929 MATH_ERROR_MSG(
"KolmogorovSmirnovTest",
"Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
932 Double_t Fo = 0.0, Dn = 0.0;
934 for (UInt_t i = 0; i < n; ++i) {
935 Double_t Fn = (i + 1.0) / n;
936 Double_t F = (*fCDF)(
fSamples[0][i]);
937 Double_t result = std::max(TMath::Abs(Fn - F), TMath::Abs(Fo - Fn));
938 if (result > Dn) Dn = result;
946 Double_t pvalue, testStat;
948 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
constexpr Double_t H()
Planck's constant in .
void SetDistribution(EDistribution dist)
double dist(Rotation3D const &r1, Rotation3D const &r2)
int getCount(double z, const double *dat, int n)
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
void operator()(ETestType test, Double_t &pvalue, Double_t &testStat) const
Namespace for new ROOT classes and functions.
void SetSamples(std::vector< const Double_t *> samples, const std::vector< UInt_t > samplesSizes)
Double_t KolmogorovProb(Double_t z)
Calculates the Kolmogorov distribution function,.
std::vector< Double_t > fCombinedSamples
void AndersonDarlingTest(Double_t &pvalue, Double_t &testStat) const
double inner_product(const LAVector &, const LAVector &)
#define MATH_WARN_MSG(loc, str)
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
const double * GetPoint(unsigned int ipoint, double &value) const
retrieve at the same time a pointer to the coordinate data and the fit value More efficient than call...
Double_t GaussianCDF(Double_t x) const
double normal_cdf(double x, double sigma=1, double x0=0)
Cumulative distribution function of the normal (Gaussian) distribution (lower tail).
static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2)
double pow(double, double)
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
Sort the n elements of the array a of generic templated type Element.
static Double_t GetSigmaN(const std::vector< UInt_t > &ns, UInt_t N)
void KolmogorovSmirnovTest(Double_t &pvalue, Double_t &testStat) const
unsigned int Size() const
return number of fit points
#define MATH_ERROR_MSG(loc, str)
int getSum(const int *x, int n)
IBaseFunctionOneDim IGenFunction
Double_t KolmogorovTest(Int_t na, const Double_t *a, Int_t nb, const Double_t *b, Option_t *option)
Statistical test whether two one-dimensional sets of points are compatible with coming from the same ...
Class describing the binned data sets : vectors of x coordinates, y values and optionally error on y ...
* x
Deprecated and error prone model selection interface.
std::vector< std::vector< Double_t > > fSamples
void Instantiate(const Double_t *sample, UInt_t sampleSize)
double Value(unsigned int ipoint) const
return the value for the given fit point
void KolmogorovSmirnov2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Namespace for new Math classes and functions.
Double_t PValueAD1Sample(Double_t A2) const
you should not use this method at all Int_t Int_t z
Double_t ExponentialCDF(Double_t x) const
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
unsigned int NDim() const
return coordinate data dimension
Double_t Sqrt(Double_t x)
void SetDistributionFunction(const IGenFunction &cdf, Bool_t isPDF, Double_t xmin, Double_t xmax)
Template class to wrap any member function of a class taking a double and returning a double in a 1D ...
void adkTestStat(double *adk, const std::vector< std::vector< double > > &samples, const std::vector< double > &zstar)
double exponential_cdf(double x, double lambda, double x0=0)
Cumulative distribution function of the exponential distribution (lower tail).
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const