The Gaudi Framework  v29r0 (ff2e7097)
compareRootHistos.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 from optparse import OptionParser
3 import re
4 import sys
5 # Well known trick
6 backupArgv = sys.argv[:]
7 sys.argv = []
8 from ROOT import TFile
9 sys.argv = backupArgv
10 
11 # The list of histograms not to check. Expressed as regexps
12 gRegexBlackList = []
13 histos = ['TH1D', 'TH1F', 'TH2D', 'TH2F', 'TProfile']
14 ref = 'REFERENCE'
15 test = 'TEST'
16 
17 # =============================================================================
18 # Method : rec( o, path=None, lst=None )
19 #
20 # @param o : a ROOT object
21 # @param path : a string like a transient store path; ie '/stat/CaloPIDs/ECALPIDE'
22 # @param lst : a list to hold (path, object) tuples
23 #
24 # function : recursively pull apart a ROOT file, making a list of (path, TObject) tuples
25 # This is done by GetListOfKeys method, which lets one work down through directories
26 # until you hit the Histo at the end of the path. The list of tuples is returned
27 #
28 
29 
30 def rec(o, path=None, lst=None):
31  if not path:
32  path = '/stat'
33  lst = []
34  else:
35  path = path + '/' + o.GetName()
36  lst.append((path, o))
37  if 'GetListOfKeys' in dir(o):
38  keys = o.GetListOfKeys()
39  for k in keys:
40  name = k.GetName()
41  rec(o.Get(name), path, lst)
42  else:
43  pass
44  return lst
45 # =============================================================================
46 
47 # =============================================================================
48 # Method : composition( t )
49 #
50 # @param t : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
51 # and d is a dictionary of ROOT objects, with each key = ROOT path
52 #
53 # function : deduce the composition, (objects/histos) counts
54 #
55 
56 
57 def composition(t):
58  typ, d = t
59  hists = 0
60  objs = 0
61  for k in d.keys():
62  if d[k].__class__.__name__ in histos:
63  hists += 1
64  else:
65  objs += 1
66  return objs, hists
67 # =============================================================================
68 
69 # =============================================================================
70 # Method : comparePaths( t1, t2 )
71 #
72 # @param t1, t2 : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
73 # and d is a dictionary of ROOT objects, with each key = ROOT path
74 #
75 # function : compare the paths between the two histo files. If the files are identical, they
76 # should have the same set of paths. The Test file should definitely have the
77 # same paths as the Reference. Perhaps the Reference file will have some more paths due
78 # to extra histos added as part of Application Sequencer finalisation
79 # Arguments t1 and t2 are checked and the test/reference auto-detected
80 #
81 
82 
83 def comparePaths(t1, t2):
84  if t1[0] == ref:
85  ds = t1[1]
86  dp = t2[1]
87  elif t2[0] == ref:
88  ds = t2[1]
89  dp = t1[1]
90  else:
91  print 'Neither tuple is Reference Root file reference?'
92  return
93 
94  dsks = ds.keys()
95  dpks = dp.keys()
96  dsks.sort()
97  dpks.sort()
98 
99  sset = set(dsks)
100  pset = set(dpks)
101  os, hs = composition((ref, ds))
102  op, hp = composition((test, dp))
103  print '\n' + '=' * 80
104  print 'Comparison of Paths : Reference vs Test ROOT files'
105  print '-' * 80
106  print 'Number of paths in Reference file : %i (objects, histos) = ( %i, %i )' % (len(dsks), os, hs)
107  print 'Number of paths in Test file : %i (objects, histos) = ( %i, %i )' % (len(dpks), op, hp)
108  matching = sset.intersection(pset)
109  matchingHistos = 0
110  for n in matching:
111  if ds[n].__class__.__name__ in histos:
112  matchingHistos += 1
113  print '\nMatching paths : %i' % (len(matching))
114  uSer = sset - pset
115  # work out histos unique to test file
116  uniqueReferenceHistos = 0
117  for n in uSer:
118  if ds[n].__class__.__name__ in histos:
119  uniqueReferenceHistos += 1
120  print 'Paths unique to Reference file : %i ( %i Histos )' % (len(uSer), uniqueReferenceHistos)
121  if uSer:
122  for n in uSer:
123  print '\t%s : \t%s' % (ds[n], n)
124  uPar = pset - sset
125  uniqueTestHistos = 0
126  for n in uPar:
127  if dp[n].__class__.__name__ in histos:
128  uniqueTestHistos += 1
129  print 'Paths unique to Test file : %i ( %i Histos )' % (len(uPar), uniqueTestHistos)
130  if uPar:
131  for n in uPar:
132  print '\t%s : \t%s' % (dp[n], n)
133  print 'Matching Histos to test : %i' % (matchingHistos)
134  print '=' * 80 + '\n'
135  return (((os, hs), (op, hp)), (uSer, uniqueReferenceHistos), (uPar, uniqueTestHistos), matchingHistos)
136 # =============================================================================
137 
138 # =============================================================================
139 # Method : bin2binIdentity(h1,h2)
140 #
141 # @param h1, h2 : The two histogtams to compare
142 # function : Return the number of different bins
143 
144 
145 def bin2binIdentity(h1, h2):
146  def getNbins(h):
147  biny = h.GetNbinsY()
148  if biny > 1:
149  biny += 1
150  binz = h.GetNbinsZ()
151  if binz > 1:
152  binz += 1
153  return (h.GetNbinsX() + 1) * (biny) * (binz)
154 
155  nbins = getNbins(h1)
156  diffbins = 0
157  for ibin in xrange(0, nbins):
158  h1bin = h1.GetBinContent(ibin)
159  h2bin = h2.GetBinContent(ibin)
160  diffbins += (h1bin != h2bin)
161  return diffbins
162 
163 
164 # =============================================================================
165 # Method : compareHistos( t1, t2 )
166 #
167 # @param t1, t2 : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
168 # and d is a dictionary of ROOT objects, with each key = ROOT path
169 #
170 # function : compare the histograms in Reference/Test ROOT files. First, go through each
171 # dict to collect the histos (ignore TDirectory objects, etc). Then the histos
172 # in the test file (experimental) are compared to their equivalents in the
173 # reference file (definitely correct) using 3 methods.
174 # 1) The entries are checked, they should be equal
175 # 2) If entries are equal, check the Integral(); should be equal
176 # 3) If integrals are equal, check the KolmogorovTest() ; should be 1
177 # 4) If identity flag is there and KS test is performed, perform bin2bin identity test
178 # Arguments t1 and t2 are checked and the test/reference auto-detected
179 #
180 def compareHistos(t1, t2, state, checkBin2BinIdentity):
181 
182  (((referenceObjects, referenceHistos), (parallObjects, parallHistos)),
183  (uniqueSerPaths, uniqueSerHistos), (uniqueParPaths, uniqueParHistos), mh) = state
184 
185  # deduce which one is test, which reference
186  if t1[0] == ref:
187  ds = t1[1]
188  dp = t2[1]
189  elif t2[0] == ref:
190  ds = t2[1]
191  dp = t1[1]
192  else:
193  print 'Neither tuple is Reference Root file reference?'
194  return
195 
196  # histocount, objectcount for test/reference
197  hcp = 0
198  pHistos = []
199  hcs = 0
200  sHistos = []
201 
202  omit = [re.compile(regex) for regex in gRegexBlackList]
203 
204  # find the histos in the reference file
205  for k in ds.keys():
206  if not any(regex.search(k) != None for regex in omit):
207  if ds[k].__class__.__name__ in histos:
208  hcs += 1
209  sHistos.append(k)
210  # same for test
211  for k in dp.keys():
212  if not any(regex.search(k) != None for regex in omit):
213  if dp[k].__class__.__name__ in histos:
214  hcp += 1
215  pHistos.append(k)
216 
217  cEntries = 0
218  xEntries = 0
219  diffEntries = []
220  cIntegrals = 0
221  xIntegrals = 0
222  diffIntegrals = []
223  passedKol = 0
224  failedKol = 0
225  diffKols = []
226  zeroIntegrals = 0
227  passedIdentity = 0
228  failedIdentity = 0
229  diffIdentity = []
230  identityDiffBins = {}
231  kTested = 0
232  kTestResults = {}
233  notfound = 0
234  integralMatch = 0
235  otherTest = 0
236  zeroIntegralMatch = 0
237  for h in sHistos:
238  if h in pHistos:
239  # matching histos to check
240  cEntries += 1
241  sh = ds[h]
242  ph = dp[h]
243  # first check entries
244  if sh.GetEntries() != ph.GetEntries():
245  diffEntries.append(h)
246  xEntries += 1
247  continue
248  # check for (non-zero sum of bin error) && (non-zero integrals) for K-Test
249  sBinError = 0.0
250  pBinError = 0.0
251  for i in xrange(sh.GetNbinsX()):
252  sBinError += sh.GetBinError(i)
253  for i in xrange(ph.GetNbinsX()):
254  pBinError += ph.GetBinError(i)
255  sint = sh.Integral()
256  pint = ph.Integral()
257  doKS = (bool(sint) and bool(pint)) and (
258  sBinError > 0 and pBinError > 0)
259  if checkBin2BinIdentity and doKS:
260  diffBins = bin2binIdentity(sh, ph)
261  if diffBins == 0:
262  passedIdentity += 1
263  else:
264  failedIdentity += 1
265  diffIdentity.append(h)
266  identityDiffBins[h] = diffBins
267  if (bool(sint) and bool(pint)) and (sBinError > 0 and pBinError > 0):
268  kTested += 1
269  kTest = sh.KolmogorovTest(ph)
270  kTestResults[h] = kTest
271  if int(kTest):
272  passedKol += 1
273  else:
274  # ; print 'KTest result : ', kTest
275  failedKol += 1
276  diffKols.append(h)
277  else:
278  # try the integral test?
279  otherTest += 1
280  if all((sint, pint)) and (sint == pint):
281  integralMatch += 1
282  elif (sint == pint):
283  zeroIntegralMatch += 1
284  else:
285  diffIntegrals.append(h)
286  xIntegrals += 1
287  else:
288  notfound += 1
289  print 'not found? ', h
290 
291  # report on Failed Entry-Checks
292  print '\n\n' + '-' * 80
293  print 'Summary of histos with different Entries'
294  print '-' * 80
295  if diffEntries:
296  diffEntries.sort()
297  for e in diffEntries:
298  print '\t\t\t%s:\t%i != %i' % (e, int(ds[e].GetEntries()), int(dp[e].GetEntries()))
299  print '-' * 80
300 
301  # report on Failed Kolmogorov Tests
302  print '\n\n' + '-' * 60
303  print 'Summary of histos which failed Kolmogorov Test'
304  print '-' * 60
305  if diffKols:
306  diffKols.sort()
307  for e in diffKols:
308  result = kTestResults[e] # DP Calculated twice ARGH!!
309  print '%s\t\t%s :\tK-Test Result :\t %5.16f' % (ds[e].ClassName(), e, result)
310  print '-' * 60
311 
312  # report on Failed Integral Checks
313  print '\n\n' + '-' * 60
314  print 'Summary of histos which failed Integral Check'
315  print '-' * 60
316  if diffIntegrals:
317  diffIntegrals.sort()
318  for e in diffIntegrals:
319  diff = dp[e].Integral() - ds[e].Integral()
320  pc = (diff * 100) / ds[e].Integral()
321  print '%s\t\t%s:\t Diff = %5.6f\tPercent Diff to Reference : %5.6f ' % (ds[e].ClassName(), e, diff, pc)
322  print '-' * 60 + '\n'
323  print '=' * 80 + '\n'
324 
325  # Report on failed bin2bin identity
326  if checkBin2BinIdentity:
327  # report on b2b checks
328  print '\n\n' + '-' * 80
329  print 'Summary of histos with at least one bin with different Entries'
330  print '-' * 80
331  if diffIdentity:
332  diffIdentity.sort()
333  for e in diffIdentity:
334  print '%s\t\t%s: %i different bins' % (ds[e].ClassName(), e, identityDiffBins[e])
335  print '-' * 80
336 
337  print '\n' + '=' * 80
338  print 'Comparison : Reference/Test ROOT Histo files'
339  print '\n\t\tReference\tTest'
340  print '\tObjects : %i\t%i\t\t( p-s = %i )' % (referenceObjects, parallObjects, parallObjects - referenceObjects)
341  print '\tHistos : %i\t%i\t\t( p-s = %i )' % (referenceHistos, parallHistos, parallHistos - referenceHistos)
342  print '\t __________'
343  print '\tTotal : %i\t%i\n' % (referenceHistos + referenceObjects, parallHistos + parallObjects)
344  print 'Objects/Histos unique to Reference File : %i / %i' % (len(uniqueSerPaths) - uniqueSerHistos, uniqueSerHistos)
345  print 'Objects/Histos unique to Test File : %i / %i' % (len(uniqueParPaths) - uniqueParHistos, uniqueParHistos)
346  print '\nMatching Histograms valid for Comparison : %i' % (mh)
347  print '\nOmissions\' patterns : '
348  for entry in gRegexBlackList:
349  print '\t%s' % (entry)
350  print '\nHistograms for Comparison (after Omissions) : %i' % (mh - len(gRegexBlackList))
351  print '\n\tHISTOGRAM TESTS : '
352  print '\t\tKOLMOGOROV TEST : %i' % (kTested)
353  print '\t\tINTEGRAL TEST : %i' % (otherTest)
354  print '\t\tENTRIES TEST : %i' % (xEntries)
355  if checkBin2BinIdentity:
356  print '\t\tBIN2BIN TEST : %i' % (passedIdentity)
357  print '\t\t ____'
358  print '\t\tTested : %i' % (cEntries)
359 
360  print '\n\tDISCREPANCIES : '
361  print '\t\tK-Test : %i' % (failedKol)
362  print '\t\tIntegrals : %i' % (xIntegrals)
363  print '\t\tEntries : %i' % (xEntries)
364  retval = failedKol + xIntegrals + xEntries + failedIdentity
365  if retval != 0:
366  print '\nThe two sets of histograms were not identical'
367  print '\n' + '=' * 80
368  return retval
369 
370 # =============================================================================
371 
372 
373 def extractBlacklist(listString):
374  global gRegexBlackList
375  if listString:
376  for blackRegexp in listString.split(","):
377  gRegexBlackList.append(blackRegexp)
378  else:
379  gBlackList = []
380 
381 # =============================================================================
382 
383 
384 if __name__ == '__main__':
385  usage = "usage: %prog testFile.root referenceFile.root [options]"
386  parser = OptionParser()
387  parser.add_option("-b", dest="blacklist",
388  help='Comma separated list of regexps matching histograms to skip (for example -b"MemoryTool,ProcTime")')
389 
390  parser.add_option("-i",
391  action="store_true", dest="bin2bin", default=False,
392  help="Check for bin to bin identity")
393  (options, args) = parser.parse_args()
394 
395  if len(args) != 2:
396  print "Wrong number of rootfiles. Usage:"
397  print usage
398  sys.exit(1)
399 
400  extractBlacklist(options.blacklist)
401 
402  testFile, referenceFile = args
403 
404  tfs = TFile(testFile, 'REC'); print 'opening Test File : %s' % (testFile)
405  tfp = TFile(referenceFile, 'REC'); print 'opening Reference File : %s' % (referenceFile)
406 
407  # get structure of TFiles in a list of (path, object) tuples
408  lref = rec(tfs)
409  ltest = rec(tfp)
410  # make a dictionary of lser and lpar. keys=paths
411  dref = dict([(n, o) for n, o in lref])
412  dtest = dict([(n, o) for n, o in ltest])
413  # make a tuple of (type, dict) where type is either 'reference' or 'test'
414  ts = (ref, dref)
415  tp = (test, dtest)
416 
417  # check objs/histos in each file
418  composition(ts)
419  composition(tp)
420 
421  # compare paths from each file
422  state = comparePaths(ts, tp)
423 
424  # compare histos from each file
425  retval = compareHistos(ts, tp, state, checkBin2BinIdentity=options.bin2bin)
426 
427  # finished with TFiles
428  tfs.Close()
429  tfp.Close()
430 
431  sys.exit(retval)
def rec(o, path=None, lst=None)
def compareHistos(t1, t2, state, checkBin2BinIdentity)
def extractBlacklist(listString)
def comparePaths(t1, t2)
def bin2binIdentity(h1, h2)
GAUDI_API double Integral(const Genfun::AbsFunction &function, const double a, const double b, const GaudiMath::Integration::Type type=GaudiMath::Integration::Adaptive, const GaudiMath::Integration::KronrodRule rule=GaudiMath::Integration::Default, const double epsabs=1.e-10, const double epsrel=1.e-7, const size_t size=1000)
Definition: Integral.cpp:26