All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
compareRootHistos.py
Go to the documentation of this file.
1 from ROOT import TFile
2 import sets
3 import sys
4 
5 histos = ['TH1D', 'TH2D', 'TProfile']
6 ser = 'SERIAL'
7 par = 'PARALL'
8 
9 # =================================================================================================
10 # Method : rec( o, path=None, lst=None )
11 #
12 # @param o : a ROOT object
13 # @param path : a string like a transient store path; ie '/stat/CaloPIDs/ECALPIDE'
14 # @param lst : a list to hold (path, object) tuples
15 #
16 # function : recursively pull apart a ROOT file, making a list of (path, TObject) tuples
17 # This is done by GetListOfKeys method, which lets one work down through directories
18 # until you hit the Histo at the end of the path. The list of tuples is returned
19 #
20 def rec( o, path=None, lst=None ) :
21  if not path : path = '/stat' ; lst = []
22  else : path = path + '/' + o.GetName()
23  lst.append( (path,o) )
24  if 'GetListOfKeys' in dir(o) :
25  keys = o.GetListOfKeys()
26  for k in keys :
27  name = k.GetName()
28  rec( o.Get(name), path, lst )
29  else :
30  pass
31  return lst
32 # =================================================================================================
33 
34 # =================================================================================================
35 # Method : composition( t )
36 #
37 # @param t : a tuple of ( type, d ) where type is either 'SERIAL' or 'PARALL'
38 # and d is a dictionary of ROOT objects, with each key = ROOT path
39 #
40 # function : deduce the composition, (objects/histos) counts
41 #
42 def composition( t ) :
43  typ, d = t
44  hists = 0 ; objs = 0
45  for k in d.keys() :
46  if d[k].__class__.__name__ in histos : hists += 1
47  else : objs += 1
48  return objs, hists
49 # =================================================================================================
50 
51 # =================================================================================================
52 # Method : comparePaths( t1, t2 )
53 #
54 # @param t1, t2 : a tuple of ( type, d ) where type is either 'SERIAL' or 'PARALL'
55 # and d is a dictionary of ROOT objects, with each key = ROOT path
56 #
57 # function : compare the paths between the two histo files. If the files are identical, they
58 # should have the same set of paths. The Parallel file should definitely have the
59 # same paths as the Serial. Perhaps the Serial file will have some more paths due
60 # to extra histos added as part of Application Sequencer finalisation
61 # Arguments t1 and t2 are checked and the parallel/serial auto-detected
62 # Uses sets module for intersections/unions, etc.
63 #
64 def comparePaths( t1, t2 ) :
65  if t1[0] == ser : ds = t1[1] ; dp = t2[1]
66  elif t2[0] == ser : ds = t2[1] ; dp = t1[1]
67  else : print 'Neither tuple is Serial Root file reference?' ; return
68 
69  dsks = ds.keys() ; dpks = dp.keys()
70  dsks.sort() ; dpks.sort()
71 
72  sset = sets.Set( dsks )
73  pset = sets.Set( dpks )
74  os, hs = composition( (ser, ds) )
75  op, hp = composition( (par, dp) )
76  print '\n' + '='*80
77  print 'Comparison of Paths : Serial vs Parallel ROOT files'
78  print '-'*80
79  print 'Number of paths in Serial file : %i (objects, histos) = ( %i, %i )'%( len(dsks), os, hs )
80  print 'Number of paths in Parall file : %i (objects, histos) = ( %i, %i )'%( len(dpks), op, hp )
81  matching = sset.intersection(pset)
82  matchingHistos = 0
83  for n in matching :
84  if ds[n].__class__.__name__ in histos : matchingHistos += 1
85  print '\nMatching paths : %i'%( len(matching) )
86  uSer = sset - pset
87  # work out histos unique to parallel file
88  uniqueSerialHistos = 0
89  for n in uSer :
90  if ds[n].__class__.__name__ in histos : uniqueSerialHistos += 1
91  print 'Paths unique to Serial file : %i ( %i Histos )'%( len(uSer), uniqueSerialHistos )
92  if uSer :
93  for n in uSer : print '\t%s : \t%s'%( ds[n], n )
94  uPar = pset - sset
95  uniqueParallHistos = 0
96  for n in uPar :
97  if dp[n].__class__.__name__ in histos : uniqueParallHistos += 1
98  print 'Paths unique to Parall file : %i ( %i Histos )'%( len(uPar), uniqueParallHistos )
99  if uPar :
100  for n in uPar : print '\t%s : \t%s'%( dp[n], n )
101  print 'Matching Histos to test : %i'%( matchingHistos )
102  print '='*80 + '\n'
103  return ( ((os,hs),(op,hp)), (uSer, uniqueSerialHistos), (uPar, uniqueParallHistos), matchingHistos )
104 # =================================================================================================
105 
106 # =================================================================================================
107 # Method : compareHistos( t1, t2 )
108 #
109 # @param t1, t2 : a tuple of ( type, d ) where type is either 'SERIAL' or 'PARALL'
110 # and d is a dictionary of ROOT objects, with each key = ROOT path
111 #
112 # function : compare the histograms in Serial/Parallel ROOT files. First, go through each
113 # dict to collect the histos (ignore TDirectory objects, etc). Then the histos
114 # in the parallel file (experimental) are compared to their equivalents in the
115 # serial file (definitely correct) using 3 methods.
116 # 1) The entries are checked, they should be equal
117 # 2) If entries are equal, check the Integral(); should be equal
118 # 3) If integrals are equal, check the KolmogorovTest() ; should be 1
119 # Arguments t1 and t2 are checked and the parallel/serial auto-detected
120 #
121 def compareHistos(t1, t2, state) :
122 
123  ( ((serialObjects,serialHistos),(parallObjects, parallHistos)), (uniqueSerPaths,uniqueSerHistos), (uniqueParPaths,uniqueParHistos), mh ) = state
124 
125  # deduce which one is parallel, which serial
126  if t1[0] == ser : ds = t1[1] ; dp = t2[1]
127  elif t2[0] == ser : ds = t2[1] ; dp = t1[1]
128  else : print 'Neither tuple is Serial Root file reference?' ; return
129 
130  # histocount, objectcount for parallel/serial
131  hcp = 0 ; pHistos = []
132  hcs = 0 ; sHistos = []
133 
134  omit = ['/stat/Brunel/MemoryTool/Virtual mem, all entries',
135  '/stat/Brunel/MemoryTool/Virtual mem, downscaled']
136  omit = []
137 
138  # find the histos in the serial file
139  for k in ds.keys() :
140  if k not in omit :
141  if ds[k].__class__.__name__ in histos : hcs += 1 ; sHistos.append( k )
142  # same for parallel
143  for k in dp.keys() :
144  if k not in omit :
145  if dp[k].__class__.__name__ in histos : hcp += 1 ; pHistos.append( k )
146 
147 
148  cEntries = 0 ; xEntries = 0 ; diffEntries = []
149  cIntegrals = 0 ; xIntegrals = 0 ; diffIntegrals = []
150  passedKol = 0 ; failedKol = 0 ; diffKols = [] ; zeroIntegrals = 0
151  kTested = 0
152  notfound = 0 ; integralMatch = 0 ; otherTest = 0 ; zeroIntegralMatch = 0
153  for h in sHistos :
154  if h in pHistos :
155  # matching histos to check
156  cEntries += 1
157  sh = ds[h] ; ph = dp[h]
158  # first check entries
159  if sh.GetEntries() != ph.GetEntries() : diffEntries.append(h) ; xEntries += 1 ; continue
160  # check for (non-zero sum of bin error) && (non-zero integrals) for K-Test
161  sBinError = 0.0 ; pBinError = 0.0
162  for i in xrange(sh.GetNbinsX()) : sBinError += sh.GetBinError(i)
163  for i in xrange(ph.GetNbinsX()) : pBinError += ph.GetBinError(i)
164  sint = sh.Integral() ; pint = ph.Integral()
165  if (bool(sint) and bool(pint)) and ( sBinError>0 and pBinError>0 ) :
166  kTested += 1
167  kTest = sh.KolmogorovTest(ph)
168  if int(kTest) : passedKol += 1
169  else : failedKol += 1 ; diffKols.append(h) # ; print 'KTest result : ', kTest
170  else :
171  # try the integral test?
172  otherTest += 1
173  if all((sint, pint)) and (sint==pint) :
174  integralMatch += 1
175  elif (sint==pint) :
176  zeroIntegralMatch += 1
177  else :
178  diffIntegrals.append( h )
179  xIntegrals += 1
180  else :
181  notfound += 1 ; print 'not found? ', h
182 
183  # report on Failed Entry-Checks
184  print '\n\n'+'-'*80
185  print 'Summary of histos with different Entries'
186  print '-'*80
187  if diffEntries :
188  diffEntries.sort()
189  for e in diffEntries : print '\t\t\t%s:\t%i != %i'%( e, int(ds[e].GetEntries()), int(dp[e].GetEntries()) )
190  print '-'*80
191 
192  # report on Failed Kolmogorov Tests
193  print '\n\n'+'-'*60
194  print 'Summary of histos which failed Kolmogorov Test'
195  print '-'*60
196  if diffKols :
197  diffKols.sort()
198  for e in diffKols :
199  result = ds[e].KolmogorovTest(dp[e])
200  print '%s\t\t%s :\tK-Test Result :\t %5.16f'%( type(ds[e]), e, result )
201  print '-'*60
202 
203  # report on Failed Integral Checks
204  print '\n\n'+'-'*60
205  print 'Summary of histos which failed Integral Check'
206  print '-'*60
207  if diffIntegrals :
208  diffIntegrals.sort()
209  for e in diffIntegrals :
210  diff = dp[e].Integral()-ds[e].Integral()
211  pc = (diff*100)/ds[e].Integral()
212  print '%s\t\t%s:\t Diff = %5.6f\tPercent Diff to Serial : %5.6f '%( type(ds[e]), e, diff, pc )
213  print '-'*60 + '\n'
214  print '='*80 + '\n'
215 
216  print '\n' + '='*80
217  print 'Comparison : Serial/Parallel ROOT Histo files'
218  print '\n\t\tSerial\tParall'
219  print '\tObjects : %i\t%i\t\t( p-s = %i )'%( serialObjects, parallObjects, parallObjects-serialObjects )
220  print '\tHistos : %i\t%i\t\t( p-s = %i )'%( serialHistos, parallHistos, parallHistos-serialHistos )
221  print '\t __________'
222  print '\tTotal : %i\t%i\n'%( serialHistos+serialObjects, parallHistos+parallObjects )
223  print 'Objects/Histos unique to Serial File : %i / %i'%( len(uniqueSerPaths)-uniqueSerHistos, uniqueSerHistos )
224  print 'Objects/Histos unique to Parall File : %i / %i'%( len(uniqueParPaths)-uniqueParHistos, uniqueParHistos )
225  print '\nMatching Histograms valid for Comparison : %i'%( mh )
226  print '\nOmissions : '
227  for entry in omit : print '\t%s'%( entry )
228  print '\nHistograms for Comparison (after Omissions) : %i'%( mh-len(omit) )
229  print '\n\tHISTOGRAM TESTS : '
230  print '\t\tKOLMOGOROV TEST : %i'%( kTested )
231  print '\t\tINTEGRAL TEST : %i'%( otherTest )
232  print '\t\tENTRIES TEST : %i'%( xEntries )
233  print '\t\t ____'
234  print '\t\tTested : %i'%( cEntries )
235 
236  print '\n\tDISCREPANCIES : '
237  print '\t\tK-Test : %i'%( failedKol )
238  print '\t\tIntegrals : %i'%( xIntegrals )
239  print '\t\tEntries : %i'%( xEntries )
240  print '\n'+'='*80
241 
242 # =================================================================================================
243 
244 if __name__ == '__main__' :
245  sys.argv.pop(0) # get rid of script name
246  if len(sys.argv) == 2 :
247  pFile = sys.argv[0]
248  sFile = sys.argv[1]
249  else :
250  print '*'*80
251  print 'Wrong count of arguments? > python compareRootHistos.py someParallelFile.root someSerialFile.root'
252  print '*'*80
253  sys.exit(0)
254  tfs = TFile( sFile, 'REC' ) ; print 'opening Serial File : %s'%( sFile )
255  tfp = TFile( pFile, 'REC' ) ; print 'opening Parall File : %s'%( pFile )
256 
257  # get structure of TFiles in a list of (path, object) tuples
258  lser = rec(tfs) ; lpar = rec(tfp)
259  # make a dictionary of lser and lpar. keys=paths
260  dserial = dict( [(n, o) for n, o in lser] )
261  dparall = dict( [(n, o) for n, o in lpar] )
262  # make a tuple of (type, dict) where type is either 'serial' or 'parallel'
263  ts = ( ser, dserial ) ; tp = ( par, dparall )
264 
265  # check objs/histos in each file
266 # composition( ts ) ; composition( tp )
267 
268  # compare paths from each file
269  state = comparePaths( ts, tp )
270 
271  # compare histos from each file
272  compareHistos( ts, tp, state )
273 
274 # # finished with TFiles
275 # tfs.Close() ; tfp.Close()
276 #
277 
string type
Definition: gaudirun.py:126
GAUDI_API double Integral(const Genfun::AbsFunction &function, const double a, const double b, const GaudiMath::Integration::Type type=GaudiMath::Integration::Adaptive, const GaudiMath::Integration::KronrodRule rule=GaudiMath::Integration::Default, const double epsabs=1.e-10, const double epsrel=1.e-7, const size_t size=1000)
Definition: Integral.cpp:28