105 0.454937, 1.38629, 2.36597, 3.35670, 4.35146, 5.34812, 6.34581, 7.34412, 8.34283,
106 9.34182, 10.34, 11.34, 12.34, 13.34, 14.34, 15.34, 16.34, 17.34, 18.34, 19.34,
107 20.34, 21.34, 22.34, 23.34, 24.34, 25.34, 26.34, 27.34, 28.34, 29.34, 30.34,
108 31.34, 32.34, 33.34, 34.34, 35.34, 36.34, 37.34, 38.34, 39.34, 40.34,
109 41.34, 42.34, 43.34, 44.34, 45.34, 46.34, 47.34, 48.34, 49.33};
112 5.02389, 7.3776,9.34840,11.1433,12.8325,
113 14.4494,16.0128,17.5346,19.0228,20.4831,21.920,23.337,
114 24.736,26.119,27.488,28.845,30.191,31.526,32.852,34.170,
115 35.479,36.781,38.076,39.364,40.646,41.923,43.194,44.461,
116 45.722,46.979,48.232,49.481,50.725,51.966,53.203,54.437,
117 55.668,56.896,58.120,59.342,60.561,61.777,62.990,64.201,
118 65.410,66.617,67.821,69.022,70.222,71.420};
139 fData(nvectors, nvariables)
141 if ((nvectors<=1)||(nvariables<=0)){
142 Error(
"TRobustEstimator",
"Not enough vectors or variables");
146 Error(
"TRobustEstimator",
"For the univariate case, use the default constructor and EvaluateUni() function");
154 Warning(
"TRobustEstimator",
"chosen h is too small, default h is taken instead");
181 for (Int_t i=0; i<
fN; i++) {
199 for (Int_t i=0; i<
fNvar; i++)
213 Warning(
"Evaluate",
"Chosen h = #observations, so classic estimates of location and scatter will be calculated");
226 Int_t *index =
new Int_t[
fN];
227 Double_t *ndist =
new Double_t[
fN];
229 Double_t *deti=
new Double_t[nbest];
230 for (i=0; i<nbest; i++)
244 for (k=0; k<k1; k++) {
248 for (i=0; i<
fH; i++) {
249 for(j=0; j<
fNvar; j++)
250 vec(j)=
fData[index[i]][j];
263 det =
CStep(fN, fH, index,
fData, sscp, ndist);
271 det =
CStep(fN, fH, index,
fData, sscp, ndist);
280 if(det<deti[maxind]) {
282 for(ii=0; ii<
fNvar; ii++) {
283 mstock(maxind, ii)=
fMean(ii);
284 for(jj=0; jj<
fNvar; jj++)
293 for (i=0; i<nbest; i++) {
294 for(ii=0; ii<
fNvar; ii++) {
295 fMean(ii)=mstock(i, ii);
296 for (jj=0; jj<
fNvar; jj++)
303 if(TMath::Abs(det-deti[i])<kEps)
308 for(ii=0; ii<
fNvar; ii++) {
309 mstock(i,ii)=
fMean(ii);
310 for (jj=0; jj<
fNvar; jj++)
316 for(ii=0; ii<
fNvar; ii++) {
317 fMean(ii)=mstock(detind,ii);
319 for(jj=0; jj<
fNvar; jj++)
323 if (deti[detind]!=0) {
325 Int_t nout =
RDist(sscp);
331 for (i=0; i<
fN; i++) {
353 for (ii=0; ii<5; ii++)
359 for (ii=0; ii<5; ii++)
361 Int_t *subdat=
new Int_t[sum];
364 for (
int iii = 0; iii < sum; ++iii) subdat[iii] = -999;
365 RDraw(subdat, nsub, indsubdat);
366 for (
int iii = 0; iii < sum; ++iii) {
367 if (subdat[iii] < 0 || subdat[iii] >= fN ) {
368 Error(
"Evaluate",
"subdat index is invalid subdat[%d] = %d",iii, subdat[iii] );
374 Int_t nbestsub=nbest*nsub;
378 for (i=0; i<nbestsub; i++) {
379 for(j=0; j<
fNvar; j++)
382 Double_t *detibig =
new Double_t[nbestsub];
387 Int_t k2=Int_t(k1/nsub);
390 for (Int_t kgroup=0; kgroup<nsub; kgroup++) {
392 Int_t ntemp=indsubdat[kgroup];
394 for (i=0; i<kgroup; i++)
399 for(i=0; i<ntemp; i++) {
400 for (j=0; j<
fNvar; j++) {
401 dattemp(i,j)=
fData[subdat[temp+i]][j];
404 Int_t htemp=Int_t(
fH*ntemp/fN);
406 for (i=0; i<nbest; i++)
409 for(k=0; k<k2; k++) {
412 for (i=0; i<htemp; i++) {
413 for(j=0; j<
fNvar; j++) {
414 vec(j)=dattemp(index[i],j);
421 par =
Exact2(mstockbig, cstockbig, hyperplane, deti, nbest, kgroup, sscp,ndist);
433 det =
CStep(ntemp, htemp, index, dattemp, sscp, ndist);
435 par=
Exact2(mstockbig, cstockbig, hyperplane, deti, nbest, kgroup, sscp, ndist);
447 det=
CStep(ntemp,htemp, index, dattemp, sscp, ndist);
449 par=
Exact2(mstockbig, cstockbig, hyperplane, deti, nbest, kgroup, sscp,ndist);
463 if(det<deti[maxind]) {
465 for(i=0; i<
fNvar; i++) {
466 mstockbig(nbest*kgroup+maxind,i)=
fMean(i);
467 for(j=0; j<
fNvar; j++) {
468 cstockbig(i,nbest*kgroup*fNvar+maxind*fNvar+j)=
fCovariance(i,j);
479 if (deti[maxind]<kEps)
484 for(i=0; i<nbest; i++) {
485 detibig[kgroup*nbest + i]=deti[i];
496 for(i=0; i<sum; i++) {
497 for (j=0; j<
fNvar; j++)
498 datmerged(i,j)=
fData[subdat[i]][j];
501 Int_t hmerged=Int_t(sum*
fH/fN);
504 for(k=0; k<nbestsub; k++) {
506 for(ii=0; ii<
fNvar; ii++) {
507 fMean(ii)=mstockbig(k,ii);
508 for(jj=0; jj<
fNvar; jj++)
512 for(i=0; i<
fNvar; i++)
517 det=
CStep(sum, hmerged, index, datmerged, sscp, ndist);
534 det=
CStep(sum, hmerged, index, datmerged, sscp, ndist);
548 for(i=0; i<
fNvar; i++) {
549 mstockbig(k,i)=
fMean(i);
550 for(j=0; j<
fNvar; j++) {
559 for(i=0; i<
fNvar; i++) {
560 fMean(i)=mstockbig(minind,i);
562 for(j=0; j<
fNvar; j++)
570 if(TMath::Abs(det-detibig[minind])<kEps) {
580 Int_t nout =
RDist(sscp);
586 for (i=0; i<
fN; i++) {
612 Double_t faclts[]={2.6477,2.5092,2.3826,2.2662,2.1587,2.0589,1.9660,1.879,1.7973,1.7203,1.6473};
613 Int_t *index=
new Int_t[nvectors];
617 nquant=TMath::Min(Int_t(Double_t(((hh*1./nvectors)-0.5)*40))+1, 11);
618 Double_t factor=faclts[nquant-1];
620 Double_t *aw=
new Double_t[nvectors];
621 Double_t *aw2=
new Double_t[nvectors];
625 Int_t len=nvectors-hh;
626 Double_t *slutn=
new Double_t[len];
627 for(Int_t i=0; i<len; i++)
629 for(Int_t jint=0; jint<len; jint++) {
631 for (Int_t j=0; j<hh; j++) {
632 aw[jint]+=data[index[j+jint]];
634 sq+=data[index[j]]*data[index[j]];
636 aw2[jint]=aw[jint]*aw[jint]/hh;
641 slutn[ndup]=aw[jint];
644 sq=sq - data[index[jint-1]]*data[index[jint-1]]+
645 data[index[jint+hh]]*data[index[jint+hh]]-
646 aw2[jint]+aw2[jint-1];
650 slutn[ndup]=aw[jint];
655 slutn[ndup]=aw[jint];
661 slutn[0]=slutn[Int_t((ndup)/2)]/hh;
686 if (i < 0 || i >= 50)
return 0;
696 Warning(
"GetCovariance",
"provided matrix is of the wrong size, it will be resized");
708 Warning(
"GetCorrelation",
"provided matrix is of the wrong size, it will be resized");
720 Error(
"GetHyperplane",
"the data doesn't lie on a hyperplane!\n");
733 Error(
"GetHyperplane",
"the data doesn't lie on a hyperplane!\n");
737 Warning(
"GetHyperPlane",
"provided vector is of the wrong size, it will be resized");
749 Warning(
"GetMean",
"provided vector is of the wrong size, it will be resized");
761 Warning(
"GetRDistances",
"provided vector is of the wrong size, it will be resized");
772 return fOut.GetSize();
781 for (j=1; j<
fNvar+1; j++) {
782 sscp(0, j) +=vec(j-1);
783 sscp(j, 0) = sscp(0, j);
785 for (i=1; i<fNvar+1; i++) {
786 for (j=1; j<fNvar+1; j++) {
787 sscp(i, j) += vec(i-1)*vec(j-1);
797 for (Int_t i=0; i<
fNvar+1; i++) {
798 for (Int_t j=0; j<fNvar+1; j++) {
813 for (Int_t i=0; i<
fN; i++) {
814 for (Int_t j=0; j<
fNvar; j++)
830 for (i=0; i<
fNvar; i++) {
832 sd[i]=sscp(i+1, i+1);
833 f=(sd[i]-
m(i)*
m(i)/nvec)/(nvec-1);
838 for (i=0; i<
fNvar; i++) {
839 for (j=0; j<
fNvar; j++) {
840 cov(i, j)=sscp(i+1, j+1)-nvec*
m(i)*
m(j);
852 Double_t *sd=
new Double_t[
fNvar];
853 for(j=0; j<
fNvar; j++)
855 for(i=0; i<
fNvar; i++) {
856 for (j=0; j<
fNvar; j++) {
879 Double_t kEps = 1
e-14;
881 Bool_t repeat=kFALSE;
884 for(i=0; i<ntotal; i++)
887 for (i=0; i<p+1; i++) {
890 for(j=0; j<=i-1; j++) {
908 for (i=0; i<p+1; i++) {
909 for (j=0; j<
fNvar; j++) {
910 vec[j]=data[index[i]][j];
918 while((det<kEps)&&(nindex < htotal)) {
926 for(i=0; i<nindex; i++) {
932 }
while(repeat==kTRUE);
937 for(j=0; j<
fNvar; j++)
938 vec[j]=data[index[nindex-1]][j];
949 for(j=0; j<ntotal; j++) {
951 for(i=0; i<
fNvar; i++)
952 temp[i]=data[j][i] -
fMean(i);
954 for(i=0; i<
fNvar; i++)
955 ndist[j]+=(data[j][i]-
fMean(i))*temp[i];
957 KOrdStat(ntotal, ndist, htotal-1,index);
971 for (i=0; i<nmerged; i++) {
973 for(j=0; j<
fNvar; j++) {
975 ndist[i]=TMath::Abs(ndist[i]);
978 KOrdStat(nmerged, ndist, hmerged-1, index);
980 for (i=0; i<hmerged; i++) {
981 for(j=0; j<
fNvar; j++)
982 vec[j]=dat[index[i]][j];
1009 for(j=0; j<ntotal; j++) {
1011 for(i=0; i<
fNvar; i++)
1012 temp[i]=data[j][i]-
fMean[i];
1014 for(i=0; i<
fNvar; i++)
1015 ndist[j]+=(data[j][i]-
fMean[i])*temp[i];
1019 KOrdStat(ntotal, ndist, htotal-1, index);
1022 for (i=0; i<htotal; i++) {
1023 for (j=0; j<
fNvar; j++)
1024 temp[j]=data[index[i]][j];
1044 for (j=0; j<
fNvar; j++) {
1048 for (i=0; i<
fN; i++) {
1050 for(j=0; j<
fNvar; j++) {
1052 ndist[i]=TMath::Abs(ndist[i]);
1057 for (i=0; i<
fN; i++) {
1058 if(ndist[i] < 1
e-14) nhyp++;
1072 Double_t *deti, Int_t nbest, Int_t kgroup,
1079 Int_t nh=
Exact(ndist);
1084 for (i=0; i<
fN; i++) {
1085 if(ndist[i]<1
e-14) {
1086 for (j=0; j<
fNvar; j++)
1101 for(i=0; i<
fNvar; i++) {
1102 mstockbig(nbest*kgroup+maxind,i)=
fMean(i);
1104 for(j=0; j<
fNvar; j++) {
1105 cstockbig(i,nbest*kgroup*fNvar+maxind*fNvar+j)=
fCovariance(i,j);
1121 if ((
fN>=2*nmini) && (
fN<=(3*nmini-1))) {
1123 indsubdat[0]=Int_t(
fN*0.5);
1124 indsubdat[1]=Int_t(
fN*0.5)+1;
1126 indsubdat[0]=indsubdat[1]=Int_t(
fN/2);
1130 if((
fN>=3*nmini) && (
fN<(4*nmini -1))) {
1132 indsubdat[0]=indsubdat[1]=indsubdat[2]=Int_t(
fN/3);
1134 indsubdat[0]=Int_t(
fN/3);
1135 indsubdat[1]=Int_t(
fN/3)+1;
1136 if (
fN%3==1) indsubdat[2]=Int_t(
fN/3);
1137 else indsubdat[2]=Int_t(
fN/3)+1;
1142 if((
fN>=4*nmini)&&(
fN<=(5*nmini-1))){
1143 if (
fN%4==0) indsubdat[0]=indsubdat[1]=indsubdat[2]=indsubdat[3]=Int_t(
fN/4);
1145 indsubdat[0]=Int_t(
fN/4);
1146 indsubdat[1]=Int_t(
fN/4)+1;
1147 if(
fN%4==1) indsubdat[2]=indsubdat[3]=Int_t(
fN/4);
1149 indsubdat[2]=Int_t(
fN/4)+1;
1150 indsubdat[3]=Int_t(
fN/4);
1152 if(
fN%4==3) indsubdat[2]=indsubdat[3]=Int_t(
fN/4)+1;
1156 for(Int_t i=0; i<5; i++)
1182 for (i=0; i<
fN; i++) {
1184 for(j=0; j<
fNvar; j++) {
1188 for(j=0; j<
fNvar; j++) {
1202 for (i=0; i<
fN; i++) {
1204 for(j=0; j<
fNvar; j++) {
1209 for(j=0; j<
fNvar; j++) {
1217 for(i=0; i<
fN; i++) {
1218 if (
fRd[i]<=cutoff) {
1219 for(j=0; j<
fNvar; j++)
1220 temp[j]=
fData[i][j];
1240 for (k=1; k<=ngroup; k++) {
1241 for (m=1; m<=indsubdat[k-1]; m++) {
1248 subdat[jndex-1]=nrand+jndex-2;
1249 for (i=1; i<=jndex-1; i++) {
1250 if(subdat[i-1] > nrand+i-2) {
1251 for(j=jndex; j>=i+1; j--) {
1252 subdat[j-1]=subdat[j-2];
1255 subdat[i-1]=nrand+i-2;
1268 Bool_t isAllocated = kFALSE;
1269 const Int_t kWorkMax=100;
1270 Int_t i, ir, j, l,
mid;
1273 Int_t workLocal[kWorkMax];
1281 if (ntotal > kWorkMax) {
1282 isAllocated = kTRUE;
1283 ind =
new Int_t[ntotal];
1287 for (Int_t ii=0; ii<ntotal; ii++) {
1295 if (ir == l+1 && a[ind[ir]]<a[ind[l]])
1296 {temp = ind[l]; ind[l]=ind[ir]; ind[ir]=temp;}
1297 Double_t tmp = a[ind[rk]];
1303 {temp = ind[
mid]; ind[
mid]=ind[l+1]; ind[l+1]=temp;}
1304 if (a[ind[l]]>a[ind[ir]])
1305 {temp = ind[l]; ind[l]=ind[ir]; ind[ir]=temp;}
1307 if (a[ind[l+1]]>a[ind[ir]])
1308 {temp=ind[l+1]; ind[l+1]=ind[ir]; ind[ir]=temp;}
1310 if (a[ind[l]]>a[ind[l+1]])
1311 {temp = ind[l]; ind[l]=ind[l+1]; ind[l+1]=temp;}
1317 do i++;
while (a[ind[i]]<a[arr]);
1318 do j--;
while (a[ind[j]]>a[arr]);
1320 {temp=ind[i]; ind[i]=ind[j]; ind[j]=temp;}
1324 if (j>=rk) ir = j-1;
void Classic()
called when h=n.
Int_t GetNOut()
returns the number of outliers
TVectorT< Element > & ResizeTo(Int_t lwb, Int_t upb)
Resize the vector to [lwb:upb] .
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
void CreateOrtSubset(TMatrixD &dat, Int_t *index, Int_t hmerged, Int_t nmerged, TMatrixD &sscp, Double_t *ndist)
creates a subset of hmerged vectors with smallest orthogonal distances to the hyperplane hyp[1]*(x1-m...
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t mid
Long64_t LocMax(Long64_t n, const T *a)
Return index of array with the maximum element.
void CreateSubset(Int_t ntotal, Int_t htotal, Int_t p, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist)
creates a subset of htotal elements from ntotal elements first, p+1 elements are drawn randomly(witho...
const Double_t kChiMedian[50]
void AddColumn(Double_t *col)
adds a column to the data matrix it is assumed that the column has size fN variable fVarTemp keeps th...
void Evaluate()
Finds the estimate of multivariate mean and variance.
Int_t Exact2(TMatrixD &mstockbig, TMatrixD &cstockbig, TMatrixD &hyperplane, Double_t *deti, Int_t nbest, Int_t kgroup, TMatrixD &sscp, Double_t *ndist)
This function is called if determinant of the covariance matrix of a subset=0.
void ClearSscp(TMatrixD &sscp)
clear the sscp matrix, used for covariance and mean calculation
Double_t CStep(Int_t ntotal, Int_t htotal, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist)
from the input htotal-subset constructs another htotal subset with lower determinant ...
Double_t KOrdStat(Int_t ntotal, Double_t *arr, Int_t k, Int_t *work)
because I need an Int_t work array
void AddRow(Double_t *row)
adds a vector to the data matrix it is supposed that the vector is of size fNvar
const TMatrixDSym * GetCovariance() const
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
const TVectorD * GetMean() const
TMatrixDSym fInvcovariance
Minimum Covariance Determinant Estimator - a Fast Algorithm invented by Peter J.Rousseeuw and Katrien...
Double_t GetChiQuant(Int_t i) const
returns the chi2 quantiles
const Double_t kChiQuant[50]
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
Sort the n elements of the array a of generic templated type Element.
Bool_t Invert(TMatrixDSym &inv)
For a symmetric matrix A(m,m), its inverse A_inv(m,m) is returned .
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t Int_t m
Element * GetMatrixArray()
Cholesky Decomposition class.
virtual Double_t Determinant() const
void RDraw(Int_t *subdat, Int_t ngroup, Int_t *indsubdat)
Draws ngroup nonoverlapping subdatasets out of a dataset of size n such that the selected case number...
void AddToSscp(TMatrixD &sscp, TVectorD &vec)
update the sscp matrix with vector vec
const TVectorD * GetRDistances() const
Int_t GetNoElements() const
Int_t Exact(Double_t *ndist)
for the exact fit situations returns number of observations on the hyperplane
Int_t Partition(Int_t nmini, Int_t *indsubdat)
divides the elements into approximately equal subgroups number of elements in each subgroup is stored...
R__EXTERN TRandom * gRandom
const TMatrixD & GetEigenVectors() const
const TVectorD * GetHyperplane() const
if the points are on a hyperplane, returns this hyperplane
const TVectorD & GetEigenValues() const
const TMatrixDSym * GetCorrelation() const
Double_t Median(Long64_t n, const T *a, const Double_t *w=0, Long64_t *work=0)
Return the median of the array a where each entry i has weight w[i] .
Int_t RDist(TMatrixD &sscp)
Calculates robust distances.Then the samples with robust distances greater than a cutoff value (0...
void EvaluateUni(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh=0)
for the univariate case estimates of location and scatter are returned in mean and sigma parameters t...
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
virtual Double_t Uniform(Double_t x1=1)
Returns a uniform deviate on the interval (0, x1).
Int_t GetBDPoint()
returns the breakdown point of the algorithm
void Correl()
transforms covariance matrix into correlation matrix
void Covar(TMatrixD &sscp, TVectorD &m, TMatrixDSym &cov, TVectorD &sd, Int_t nvec)
calculates mean and covariance
TRobustEstimator()
this constructor should be used in a univariate case: first call this constructor, then - the EvaluateUni(..) function
Double_t Sqrt(Double_t x)
Long64_t LocMin(Long64_t n, const T *a)
Return index of array with the minimum element.