Logo ROOT   6.13/01
Reference Guide
TMLPAnalyzer.cxx
Go to the documentation of this file.
1 // @(#)root/mlp:$Id$
2 // Author: Christophe.Delaere@cern.ch 25/04/04
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2003, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ///////////////////////////////////////////////////////////////////////////
13 //
14 // TMLPAnalyzer
15 //
16 // This utility class contains a set of tests usefull when developing
17 // a neural network.
18 // It allows you to check for unneeded variables, and to control
19 // the network structure.
20 //
21 ///////////////////////////////////////////////////////////////////////////
22 
23 #include "TROOT.h"
24 #include "TSynapse.h"
25 #include "TNeuron.h"
26 #include "TMultiLayerPerceptron.h"
27 #include "TMLPAnalyzer.h"
28 #include "TTree.h"
29 #include "TTreeFormula.h"
30 #include "TEventList.h"
31 #include "TH1D.h"
32 #include "TProfile.h"
33 #include "THStack.h"
34 #include "TLegend.h"
35 #include "TPad.h"
36 #include "TCanvas.h"
37 #include "TGaxis.h"
38 #include "TRegexp.h"
39 #include "TMath.h"
40 #include "Riostream.h"
41 #include <stdlib.h>
42 
43 ClassImp(TMLPAnalyzer);
44 
45 ////////////////////////////////////////////////////////////////////////////////
46 /// Destructor
47 
49 {
50  delete fAnalysisTree;
51  delete fIOTree;
52 }
53 
54 ////////////////////////////////////////////////////////////////////////////////
55 /// Returns the number of layers.
56 
58 {
59  TString fStructure = fNetwork->GetStructure();
60  return fStructure.CountChar(':')+1;
61 }
62 
63 ////////////////////////////////////////////////////////////////////////////////
64 /// Returns the number of neurons in given layer.
65 
66 Int_t TMLPAnalyzer::GetNeurons(Int_t layer)
67 {
68  if(layer==1) {
69  TString fStructure = fNetwork->GetStructure();
70  TString input = TString(fStructure(0, fStructure.First(':')));
71  return input.CountChar(',')+1;
72  }
73  else if(layer==GetLayers()) {
74  TString fStructure = fNetwork->GetStructure();
75  TString output = TString(fStructure(fStructure.Last(':') + 1,
76  fStructure.Length() - fStructure.Last(':')));
77  return output.CountChar(',')+1;
78  }
79  else {
80  Int_t cnt=1;
81  TString fStructure = fNetwork->GetStructure();
82  TString hidden = TString(fStructure(fStructure.First(':') + 1,
83  fStructure.Last(':') - fStructure.First(':') - 1));
84  Int_t beg = 0;
85  Int_t end = hidden.Index(":", beg + 1);
86  Int_t num = 0;
87  while (end != -1) {
88  num = atoi(TString(hidden(beg, end - beg)).Data());
89  cnt++;
90  beg = end + 1;
91  end = hidden.Index(":", beg + 1);
92  if(layer==cnt) return num;
93  }
94  num = atoi(TString(hidden(beg, hidden.Length() - beg)).Data());
95  cnt++;
96  if(layer==cnt) return num;
97  }
98  return -1;
99 }
100 
101 ////////////////////////////////////////////////////////////////////////////////
102 /// Returns the formula used as input for neuron (idx) in
103 /// the first layer.
104 
106 {
107  TString fStructure = fNetwork->GetStructure();
108  TString input = TString(fStructure(0, fStructure.First(':')));
109  Int_t beg = 0;
110  Int_t end = input.Index(",", beg + 1);
111  TString brName;
112  Int_t cnt = 0;
113  while (end != -1) {
114  brName = TString(input(beg, end - beg));
115  if (brName[0]=='@')
116  brName = brName(1,brName.Length()-1);
117  beg = end + 1;
118  end = input.Index(",", beg + 1);
119  if(cnt==idx) return brName;
120  cnt++;
121  }
122  brName = TString(input(beg, input.Length() - beg));
123  if (brName[0]=='@')
124  brName = brName(1,brName.Length()-1);
125  return brName;
126 }
127 
128 ////////////////////////////////////////////////////////////////////////////////
129 /// Returns the name of any neuron from the input layer
130 
132 {
133  TNeuron* neuron=(TNeuron*)fNetwork->fFirstLayer[in];
134  return neuron ? neuron->GetName() : "NO SUCH NEURON";
135 }
136 
137 ////////////////////////////////////////////////////////////////////////////////
138 /// Returns the name of any neuron from the output layer
139 
141 {
142  TNeuron* neuron=(TNeuron*)fNetwork->fLastLayer[out];
143  return neuron ? neuron->GetName() : "NO SUCH NEURON";
144 }
145 
146 ////////////////////////////////////////////////////////////////////////////////
147 /// Gives some information about the network in the terminal.
148 
150 {
151  TString fStructure = fNetwork->GetStructure();
152  std::cout << "Network with structure: " << fStructure.Data() << std::endl;
153  std::cout << "inputs with low values in the differences plot may not be needed" << std::endl;
154  // Checks if some input variable is not needed
155  char var[64], sel[64];
156  for (Int_t i = 0; i < GetNeurons(1); i++) {
157  snprintf(var,64,"diff>>tmp%d",i);
158  snprintf(sel,64,"inNeuron==%d",i);
159  fAnalysisTree->Draw(var, sel, "goff");
160  TH1F* tmp = (TH1F*)gDirectory->Get(Form("tmp%d",i));
161  if (!tmp) continue;
162  std::cout << GetInputNeuronTitle(i)
163  << " -> " << tmp->GetMean()
164  << " +/- " << tmp->GetRMS() << std::endl;
165  }
166 }
167 
168 ////////////////////////////////////////////////////////////////////////////////
169 /// Collect information about what is usefull in the network.
170 /// This method has to be called first when analyzing a network.
171 /// Fills the two analysis trees.
172 
174 {
175  Double_t shift = 0.1;
176  TTree* data = fNetwork->fData;
177  TEventList* test = fNetwork->fTest;
178  Int_t nEvents = test->GetN();
179  Int_t nn = GetNeurons(1);
180  Double_t* params = new Double_t[nn];
181  Double_t* rms = new Double_t[nn];
182  TTreeFormula** formulas = new TTreeFormula*[nn];
183  Int_t* index = new Int_t[nn];
184  TString formula;
185  TRegexp re("{[0-9]+}$");
186  Ssiz_t len = formula.Length();
187  Ssiz_t pos = -1;
188  Int_t i(0), j(0), k(0), l(0);
189  for(i=0; i<nn; i++){
190  formula = GetNeuronFormula(i);
191  pos = re.Index(formula,&len);
192  if(pos==-1 || len<3) {
193  formulas[i] = new TTreeFormula(Form("NF%lu",(ULong_t)this),formula,data);
194  index[i] = 0;
195  }
196  else {
197  TString newformula(formula,pos);
198  TString val = formula(pos+1,len-2);
199  formulas[i] = new TTreeFormula(Form("NF%lu",(ULong_t)this),newformula,data);
200  formula = newformula;
201  index[i] = val.Atoi();
202  }
203  TH1D tmp("tmpb", "tmpb", 1, -FLT_MAX, FLT_MAX);
204  data->Draw(Form("%s>>tmpb",formula.Data()),"","goff");
205  rms[i] = tmp.GetRMS();
206  }
207  Int_t inNeuron = 0;
208  Double_t diff = 0.;
209  if(fAnalysisTree) delete fAnalysisTree;
210  fAnalysisTree = new TTree("result","analysis");
211  fAnalysisTree->SetDirectory(0);
212  fAnalysisTree->Branch("inNeuron",&inNeuron,"inNeuron/I");
213  fAnalysisTree->Branch("diff",&diff,"diff/D");
214  Int_t numOutNodes=GetNeurons(GetLayers());
215  Double_t *outVal=new Double_t[numOutNodes];
216  Double_t *trueVal=new Double_t[numOutNodes];
217 
218  delete fIOTree;
219  fIOTree=new TTree("MLP_iotree","MLP_iotree");
220  fIOTree->SetDirectory(0);
221  TString leaflist;
222  for (i=0; i<nn; i++)
223  leaflist+=Form("In%d/D:",i);
224  leaflist.Remove(leaflist.Length()-1);
225  fIOTree->Branch("In", params, leaflist);
226 
227  leaflist="";
228  for (i=0; i<numOutNodes; i++)
229  leaflist+=Form("Out%d/D:",i);
230  leaflist.Remove(leaflist.Length()-1);
231  fIOTree->Branch("Out", outVal, leaflist);
232 
233  leaflist="";
234  for (i=0; i<numOutNodes; i++)
235  leaflist+=Form("True%d/D:",i);
236  leaflist.Remove(leaflist.Length()-1);
237  fIOTree->Branch("True", trueVal, leaflist);
238  Double_t v1 = 0.;
239  Double_t v2 = 0.;
240  // Loop on the events in the test sample
241  for(j=0; j< nEvents; j++) {
242  fNetwork->GetEntry(test->GetEntry(j));
243  // Loop on the neurons to evaluate
244  for(k=0; k<GetNeurons(1); k++) {
245  params[k] = formulas[k]->EvalInstance(index[k]);
246  }
247  for(k=0; k<GetNeurons(GetLayers()); k++) {
248  outVal[k] = fNetwork->Evaluate(k,params);
249  trueVal[k] = ((TNeuron*)fNetwork->fLastLayer[k])->GetBranch();
250  }
251  fIOTree->Fill();
252 
253  // Loop on the input neurons
254  for (i = 0; i < GetNeurons(1); i++) {
255  inNeuron = i;
256  diff = 0;
257  // Loop on the neurons in the output layer
258  for(l=0; l<GetNeurons(GetLayers()); l++){
259  params[i] += shift*rms[i];
260  v1 = fNetwork->Evaluate(l,params);
261  params[i] -= 2*shift*rms[i];
262  v2 = fNetwork->Evaluate(l,params);
263  diff += (v1-v2)*(v1-v2);
264  // reset to original vealue
265  params[i] += shift*rms[i];
266  }
267  diff = TMath::Sqrt(diff);
268  fAnalysisTree->Fill();
269  }
270  }
271  delete[] params;
272  delete[] rms;
273  delete[] outVal;
274  delete[] trueVal;
275  delete[] index;
276  for(i=0; i<GetNeurons(1); i++) delete formulas[i];
277  delete [] formulas;
278  fAnalysisTree->ResetBranchAddresses();
279  fIOTree->ResetBranchAddresses();
280 }
281 
282 ////////////////////////////////////////////////////////////////////////////////
283 /// Draws the distribution (on the test sample) of the
284 /// impact on the network output of a small variation of
285 /// the ith input.
286 
288 {
289  char sel[64];
290  snprintf(sel,64, "inNeuron==%d", i);
291  fAnalysisTree->Draw("diff", sel);
292 }
293 
294 ////////////////////////////////////////////////////////////////////////////////
295 /// Draws the distribution (on the test sample) of the
296 /// impact on the network output of a small variation of
297 /// each input.
298 /// DrawDInputs() draws something that approximates the distribution of the
299 /// derivative of the NN w.r.t. each input. That quantity is recognized as
300 /// one of the measures to determine key quantities in the network.
301 ///
302 /// What is done is to vary one input around its nominal value and to see
303 /// how the NN changes. This is done for each entry in the sample and produces
304 /// a distribution.
305 ///
306 /// What you can learn from that is:
307 /// - is variable a really useful, or is my network insensitive to it ?
308 /// - is there any risk of big systematic ? Is the network extremely sensitive
309 /// to small variations of any of my inputs ?
310 ///
311 /// As you might understand, this is to be considered with care and can serve
312 /// as input for an "educated guess" when optimizing the network.
313 
315 {
316  THStack* stack = new THStack("differences","differences (impact of variables on ANN)");
317  TLegend* legend = new TLegend(0.75,0.75,0.95,0.95);
318  TH1F* tmp = 0;
319  char var[64], sel[64];
320  for(Int_t i = 0; i < GetNeurons(1); i++) {
321  snprintf(var,64, "diff>>tmp%d", i);
322  snprintf(sel,64, "inNeuron==%d", i);
323  fAnalysisTree->Draw(var, sel, "goff");
324  tmp = (TH1F*)gDirectory->Get(Form("tmp%d",i));
325  tmp->SetDirectory(0);
326  tmp->SetLineColor(i+1);
327  stack->Add(tmp);
328  legend->AddEntry(tmp,GetInputNeuronTitle(i),"l");
329  }
330  stack->Draw("nostack");
331  legend->Draw();
332  gPad->SetLogy();
333 }
334 
335 ////////////////////////////////////////////////////////////////////////////////
336 /// Draws the distribution of the neural network (using ith neuron).
337 /// Two distributions are drawn, for events passing respectively the "signal"
338 /// and "background" cuts. Only the test sample is used.
339 
340 void TMLPAnalyzer::DrawNetwork(Int_t neuron, const char* signal, const char* bg)
341 {
342  TTree* data = fNetwork->fData;
343  TEventList* test = fNetwork->fTest;
344  TEventList* current = data->GetEventList();
345  data->SetEventList(test);
346  THStack* stack = new THStack("__NNout_TMLPA",Form("Neural net output (neuron %d)",neuron));
347  TH1F *bgh = new TH1F("__bgh_TMLPA", "NN output", 50, -0.5, 1.5);
348  TH1F *sigh = new TH1F("__sigh_TMLPA", "NN output", 50, -0.5, 1.5);
349  bgh->SetDirectory(0);
350  sigh->SetDirectory(0);
351  Int_t nEvents = 0;
352  Int_t j=0;
353  // build event lists for signal and background
354  TEventList* signal_list = new TEventList("__tmpSig_MLPA");
355  TEventList* bg_list = new TEventList("__tmpBkg_MLPA");
356  data->Draw(">>__tmpSig_MLPA",signal,"goff");
357  data->Draw(">>__tmpBkg_MLPA",bg,"goff");
358 
359  // fill the background
360  nEvents = bg_list->GetN();
361  for(j=0; j< nEvents; j++) {
362  bgh->Fill(fNetwork->Result(bg_list->GetEntry(j),neuron));
363  }
364  // fill the signal
365  nEvents = signal_list->GetN();
366  for(j=0; j< nEvents; j++) {
367  sigh->Fill(fNetwork->Result(signal_list->GetEntry(j),neuron));
368  }
369  // draws the result
370  bgh->SetLineColor(kBlue);
371  bgh->SetFillStyle(3008);
372  bgh->SetFillColor(kBlue);
373  sigh->SetLineColor(kRed);
374  sigh->SetFillStyle(3003);
375  sigh->SetFillColor(kRed);
376  bgh->SetStats(0);
377  sigh->SetStats(0);
378  stack->Add(bgh);
379  stack->Add(sigh);
380  TLegend *legend = new TLegend(.75, .80, .95, .95);
381  legend->AddEntry(bgh, "Background");
382  legend->AddEntry(sigh,"Signal");
383  stack->Draw("nostack");
384  legend->Draw();
385  // restore the default event list
386  data->SetEventList(current);
387  delete signal_list;
388  delete bg_list;
389 }
390 
391 ////////////////////////////////////////////////////////////////////////////////
392 /// Create a profile of the difference of the MLP output minus the
393 /// true value for a given output node outnode, vs the true value for
394 /// outnode, for all test data events. This method is mainly useful
395 /// when doing regression analysis with the MLP (i.e. not classification,
396 /// but continuous truth values).
397 /// The resulting TProfile histogram is returned.
398 /// It is not drawn if option "goff" is specified.
399 /// Options are passed to TProfile::Draw
400 
401 TProfile* TMLPAnalyzer::DrawTruthDeviation(Int_t outnode /*=0*/,
402  Option_t *option /*=""*/)
403 {
404  if (!fIOTree) GatherInformations();
405  TString pipehist=Form("MLP_truthdev_%d",outnode);
406  TString drawline;
407  drawline.Form("Out.Out%d-True.True%d:True.True%d>>",
408  outnode, outnode, outnode);
409  fIOTree->Draw(drawline+pipehist+"(20)", "", "goff prof");
410  TProfile* h=(TProfile*)gDirectory->Get(pipehist);
411  h->SetDirectory(0);
412  const char* title=GetOutputNeuronTitle(outnode);
413  if (title) {
414  h->SetTitle(Form("#Delta(output - truth) vs. truth for %s",
415  title));
416  h->GetXaxis()->SetTitle(title);
417  h->GetYaxis()->SetTitle(Form("#Delta(output - truth) for %s", title));
418  }
419  if (!strstr(option,"goff"))
420  h->Draw();
421  return h;
422 }
423 
424 ////////////////////////////////////////////////////////////////////////////////
425 /// Creates TProfiles of the difference of the MLP output minus the
426 /// true value vs the true value, one for each output, filled with the
427 /// test data events. This method is mainly useful when doing regression
428 /// analysis with the MLP (i.e. not classification, but continuous truth
429 /// values).
430 /// The returned THStack contains all the TProfiles. It is drawn unless
431 /// the option "goff" is specified.
432 /// Options are passed to TProfile::Draw.
433 
434 THStack* TMLPAnalyzer::DrawTruthDeviations(Option_t *option /*=""*/)
435 {
436  THStack *hs=new THStack("MLP_TruthDeviation",
437  "Deviation of MLP output from truth");
438 
439  // leg!=0 means we're drawing
440  TLegend *leg=0;
441  if (!option || !strstr(option,"goff"))
442  leg=new TLegend(.4,.85,.95,.95,"#Delta(output - truth) vs. truth for:");
443 
444  const char* xAxisTitle=0;
445 
446  // create profile for each input neuron,
447  // adding them into the THStack and the TLegend
448  for (Int_t outnode=0; outnode<GetNeurons(GetLayers()); outnode++) {
449  TProfile* h=DrawTruthDeviation(outnode, "goff");
450  h->SetLineColor(1+outnode);
451  hs->Add(h, option);
452  if (leg) leg->AddEntry(h,GetOutputNeuronTitle(outnode));
453  if (!outnode)
454  // Xaxis title is the same for all, extract it from the first one.
455  xAxisTitle=h->GetXaxis()->GetTitle();
456  }
457 
458  if (leg) {
459  hs->Draw("nostack");
460  leg->Draw();
461  // gotta draw before accessing the axes
462  hs->GetXaxis()->SetTitle(xAxisTitle);
463  hs->GetYaxis()->SetTitle("#Delta(output - truth)");
464  }
465 
466  return hs;
467 }
468 
469 ////////////////////////////////////////////////////////////////////////////////
470 /// Creates a profile of the difference of the MLP output outnode minus
471 /// the true value of outnode vs the input value innode, for all test
472 /// data events.
473 /// The resulting TProfile histogram is returned.
474 /// It is not drawn if option "goff" is specified.
475 /// Options are passed to TProfile::Draw
476 
478  Int_t outnode /*=0*/,
479  Option_t *option /*=""*/)
480 {
481  if (!fIOTree) GatherInformations();
482  TString pipehist=Form("MLP_truthdev_i%d_o%d", innode, outnode);
483  TString drawline;
484  drawline.Form("Out.Out%d-True.True%d:In.In%d>>",
485  outnode, outnode, innode);
486  fIOTree->Draw(drawline+pipehist+"(50)", "", "goff prof");
487  TProfile* h=(TProfile*)gROOT->FindObject(pipehist);
488  h->SetDirectory(0);
489  const char* titleInNeuron=GetInputNeuronTitle(innode);
490  const char* titleOutNeuron=GetOutputNeuronTitle(outnode);
491  h->SetTitle(Form("#Delta(output - truth) of %s vs. input %s",
492  titleOutNeuron, titleInNeuron));
493  h->GetXaxis()->SetTitle(Form("%s", titleInNeuron));
494  h->GetYaxis()->SetTitle(Form("#Delta(output - truth) for %s",
495  titleOutNeuron));
496  if (!strstr(option,"goff"))
497  h->Draw(option);
498  return h;
499 }
500 
501 ////////////////////////////////////////////////////////////////////////////////
502 /// Creates a profile of the difference of the MLP output outnode minus the
503 /// true value of outnode vs the input value, stacked for all inputs, for
504 /// all test data events.
505 /// The returned THStack contains all the TProfiles. It is drawn unless
506 /// the option "goff" is specified.
507 /// Options are passed to TProfile::Draw.
508 
509 THStack* TMLPAnalyzer::DrawTruthDeviationInsOut(Int_t outnode /*=0*/,
510  Option_t *option /*=""*/)
511 {
512  TString sName;
513  sName.Form("MLP_TruthDeviationIO_%d", outnode);
514  const char* outputNodeTitle=GetOutputNeuronTitle(outnode);
515  THStack *hs=new THStack(sName,
516  Form("Deviation of MLP output %s from truth",
517  outputNodeTitle));
518 
519  // leg!=0 means we're drawing.
520  TLegend *leg=0;
521  if (!option || !strstr(option,"goff"))
522  leg=new TLegend(.4,.75,.95,.95,
523  Form("#Delta(output - truth) of %s vs. input for:",
524  outputNodeTitle));
525 
526  // create profile for each input neuron,
527  // adding them into the THStack and the TLegend
528  Int_t numInNodes=GetNeurons(1);
529  Int_t innode=0;
530  for (innode=0; innode<numInNodes; innode++) {
531  TProfile* h=DrawTruthDeviationInOut(innode, outnode, "goff");
532  h->SetLineColor(1+innode);
533  hs->Add(h, option);
534  if (leg) leg->AddEntry(h,h->GetXaxis()->GetTitle());
535  }
536 
537  if (leg) {
538  hs->Draw("nostack");
539  leg->Draw();
540  // gotta draw before accessing the axes
541  hs->GetXaxis()->SetTitle("Input value");
542  hs->GetYaxis()->SetTitle(Form("#Delta(output - truth) for %s",
543  outputNodeTitle));
544  }
545 
546  return hs;
547 }
const char * GetOutputNeuronTitle(Int_t out)
Returns the name of any neuron from the output layer.
void CheckNetwork()
Gives some information about the network in the terminal.
void DrawDInput(Int_t i)
Draws the distribution (on the test sample) of the impact on the network output of a small variation ...
Double_t Evaluate(Int_t index, Double_t *params) const
Returns the Neural Net for a given set of input parameters #parameters must equal #input neurons...
void GatherInformations()
Collect information about what is usefull in the network.
void DrawDInputs()
Draws the distribution (on the test sample) of the impact on the network output of a small variation ...
Int_t GetLayers()
Returns the number of layers.
Int_t GetNeurons(Int_t layer)
Returns the number of neurons in given layer.
void DrawNetwork(Int_t neuron, const char *signal, const char *bg)
Draws the distribution of the neural network (using ith neuron).
TProfile * DrawTruthDeviationInOut(Int_t innode, Int_t outnode=0, Option_t *option="")
Creates a profile of the difference of the MLP output outnode minus the true value of outnode vs the ...
TMultiLayerPerceptron * fNetwork
Definition: TMLPAnalyzer.h:39
const char * GetInputNeuronTitle(Int_t in)
Returns the name of any neuron from the input layer.
virtual ~TMLPAnalyzer()
Destructor.
TTree * fAnalysisTree
Definition: TMLPAnalyzer.h:40
TEventList * fTest
EventList defining the events in the training dataset.
TString GetNeuronFormula(Int_t idx)
Returns the formula used as input for neuron (idx) in the first layer.
TProfile * DrawTruthDeviation(Int_t outnode=0, Option_t *option="")
Create a profile of the difference of the MLP output minus the true value for a given output node out...
THStack * DrawTruthDeviations(Option_t *option="")
Creates TProfiles of the difference of the MLP output minus the true value vs the true value...
void GetEntry(Int_t) const
Load an entry into the network.
Double_t Sqrt(Double_t x)
Definition: TMath.h:690
Double_t Result(Int_t event, Int_t index=0) const
Computes the output for a given event.
TTree * fIOTree
Definition: TMLPAnalyzer.h:41
THStack * DrawTruthDeviationInsOut(Int_t outnode=0, Option_t *option="")
Creates a profile of the difference of the MLP output outnode minus the true value of outnode vs the ...