Loading [MathJax]/extensions/tex2jax.js
The Gaudi Framework  v31r0 (aeb156f0)
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
compareRootHistos.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 from optparse import OptionParser
3 import re
4 import sys
5 # Well known trick
6 backupArgv = sys.argv[:]
7 sys.argv = []
8 from ROOT import TFile
9 sys.argv = backupArgv
10 
11 # The list of histograms not to check. Expressed as regexps
12 gRegexBlackList = []
13 histos = ['TH1D', 'TH1F', 'TH2D', 'TH2F', 'TProfile']
14 ref = 'REFERENCE'
15 test = 'TEST'
16 
17 # =============================================================================
18 # Method : rec( o, path=None, lst=None )
19 #
20 # @param o : a ROOT object
21 # @param path : a string like a transient store path; ie '/stat/CaloPIDs/ECALPIDE'
22 # @param lst : a list to hold (path, object) tuples
23 #
24 # function : recursively pull apart a ROOT file, making a list of (path, TObject) tuples
25 # This is done by GetListOfKeys method, which lets one work down through directories
26 # until you hit the Histo at the end of the path. The list of tuples is returned
27 #
28 
29 
30 def rec(o, path=None, lst=None):
31  if not path:
32  path = '/stat'
33  lst = []
34  else:
35  path = path + '/' + o.GetName()
36  lst.append((path, o))
37  if 'GetListOfKeys' in dir(o):
38  keys = o.GetListOfKeys()
39  for k in keys:
40  name = k.GetName()
41  rec(o.Get(name), path, lst)
42  else:
43  pass
44  return lst
45 
46 
47 # =============================================================================
48 
49 # =============================================================================
50 # Method : composition( t )
51 #
52 # @param t : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
53 # and d is a dictionary of ROOT objects, with each key = ROOT path
54 #
55 # function : deduce the composition, (objects/histos) counts
56 #
57 
58 
59 def composition(t):
60  typ, d = t
61  hists = 0
62  objs = 0
63  for k in d.keys():
64  if d[k].__class__.__name__ in histos:
65  hists += 1
66  else:
67  objs += 1
68  return objs, hists
69 
70 
71 # =============================================================================
72 
73 # =============================================================================
74 # Method : comparePaths( t1, t2 )
75 #
76 # @param t1, t2 : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
77 # and d is a dictionary of ROOT objects, with each key = ROOT path
78 #
79 # function : compare the paths between the two histo files. If the files are identical, they
80 # should have the same set of paths. The Test file should definitely have the
81 # same paths as the Reference. Perhaps the Reference file will have some more paths due
82 # to extra histos added as part of Application Sequencer finalisation
83 # Arguments t1 and t2 are checked and the test/reference auto-detected
84 #
85 
86 
87 def comparePaths(t1, t2):
88  if t1[0] == ref:
89  ds = t1[1]
90  dp = t2[1]
91  elif t2[0] == ref:
92  ds = t2[1]
93  dp = t1[1]
94  else:
95  print 'Neither tuple is Reference Root file reference?'
96  return
97 
98  dsks = ds.keys()
99  dpks = dp.keys()
100  dsks.sort()
101  dpks.sort()
102 
103  sset = set(dsks)
104  pset = set(dpks)
105  os, hs = composition((ref, ds))
106  op, hp = composition((test, dp))
107  print '\n' + '=' * 80
108  print 'Comparison of Paths : Reference vs Test ROOT files'
109  print '-' * 80
110  print 'Number of paths in Reference file : %i (objects, histos) = ( %i, %i )' % (
111  len(dsks), os, hs)
112  print 'Number of paths in Test file : %i (objects, histos) = ( %i, %i )' % (
113  len(dpks), op, hp)
114  matching = sset.intersection(pset)
115  matchingHistos = 0
116  for n in matching:
117  if ds[n].__class__.__name__ in histos:
118  matchingHistos += 1
119  print '\nMatching paths : %i' % (len(matching))
120  uSer = sset - pset
121  # work out histos unique to test file
122  uniqueReferenceHistos = 0
123  for n in uSer:
124  if ds[n].__class__.__name__ in histos:
125  uniqueReferenceHistos += 1
126  print 'Paths unique to Reference file : %i ( %i Histos )' % (
127  len(uSer), uniqueReferenceHistos)
128  if uSer:
129  for n in uSer:
130  print '\t%s : \t%s' % (ds[n], n)
131  uPar = pset - sset
132  uniqueTestHistos = 0
133  for n in uPar:
134  if dp[n].__class__.__name__ in histos:
135  uniqueTestHistos += 1
136  print 'Paths unique to Test file : %i ( %i Histos )' % (len(uPar),
137  uniqueTestHistos)
138  if uPar:
139  for n in uPar:
140  print '\t%s : \t%s' % (dp[n], n)
141  print 'Matching Histos to test : %i' % (matchingHistos)
142  print '=' * 80 + '\n'
143  return (((os, hs), (op, hp)), (uSer, uniqueReferenceHistos),
144  (uPar, uniqueTestHistos), matchingHistos)
145 
146 
147 # =============================================================================
148 
149 # =============================================================================
150 # Method : bin2binIdentity(h1,h2)
151 #
152 # @param h1, h2 : The two histogtams to compare
153 # function : Return the number of different bins
154 
155 
156 def bin2binIdentity(h1, h2):
157  def getNbins(h):
158  biny = h.GetNbinsY()
159  if biny > 1:
160  biny += 1
161  binz = h.GetNbinsZ()
162  if binz > 1:
163  binz += 1
164  return (h.GetNbinsX() + 1) * (biny) * (binz)
165 
166  nbins = getNbins(h1)
167  diffbins = 0
168  for ibin in xrange(0, nbins):
169  h1bin = h1.GetBinContent(ibin)
170  h2bin = h2.GetBinContent(ibin)
171  diffbins += (h1bin != h2bin)
172  return diffbins
173 
174 
175 # =============================================================================
176 # Method : compareHistos( t1, t2 )
177 #
178 # @param t1, t2 : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
179 # and d is a dictionary of ROOT objects, with each key = ROOT path
180 #
181 # function : compare the histograms in Reference/Test ROOT files. First, go through each
182 # dict to collect the histos (ignore TDirectory objects, etc). Then the histos
183 # in the test file (experimental) are compared to their equivalents in the
184 # reference file (definitely correct) using 3 methods.
185 # 1) The entries are checked, they should be equal
186 # 2) If entries are equal, check the Integral(); should be equal
187 # 3) If integrals are equal, check the KolmogorovTest() ; should be 1
188 # 4) If identity flag is there and KS test is performed, perform bin2bin identity test
189 # Arguments t1 and t2 are checked and the test/reference auto-detected
190 #
191 def compareHistos(t1, t2, state, checkBin2BinIdentity):
192 
193  (((referenceObjects, referenceHistos), (parallObjects, parallHistos)),
194  (uniqueSerPaths, uniqueSerHistos), (uniqueParPaths,
195  uniqueParHistos), mh) = state
196 
197  # deduce which one is test, which reference
198  if t1[0] == ref:
199  ds = t1[1]
200  dp = t2[1]
201  elif t2[0] == ref:
202  ds = t2[1]
203  dp = t1[1]
204  else:
205  print 'Neither tuple is Reference Root file reference?'
206  return
207 
208  # histocount, objectcount for test/reference
209  hcp = 0
210  pHistos = []
211  hcs = 0
212  sHistos = []
213 
214  omit = [re.compile(regex) for regex in gRegexBlackList]
215 
216  # find the histos in the reference file
217  for k in ds.keys():
218  if not any(regex.search(k) != None for regex in omit):
219  if ds[k].__class__.__name__ in histos:
220  hcs += 1
221  sHistos.append(k)
222  # same for test
223  for k in dp.keys():
224  if not any(regex.search(k) != None for regex in omit):
225  if dp[k].__class__.__name__ in histos:
226  hcp += 1
227  pHistos.append(k)
228 
229  cEntries = 0
230  xEntries = 0
231  diffEntries = []
232  cIntegrals = 0
233  xIntegrals = 0
234  diffIntegrals = []
235  passedKol = 0
236  failedKol = 0
237  diffKols = []
238  zeroIntegrals = 0
239  passedIdentity = 0
240  failedIdentity = 0
241  diffIdentity = []
242  identityDiffBins = {}
243  kTested = 0
244  kTestResults = {}
245  notfound = 0
246  integralMatch = 0
247  otherTest = 0
248  zeroIntegralMatch = 0
249  for h in sHistos:
250  if h in pHistos:
251  # matching histos to check
252  cEntries += 1
253  sh = ds[h]
254  ph = dp[h]
255  # first check entries
256  if sh.GetEntries() != ph.GetEntries():
257  diffEntries.append(h)
258  xEntries += 1
259  continue
260  # check for (non-zero sum of bin error) && (non-zero integrals) for K-Test
261  sBinError = 0.0
262  pBinError = 0.0
263  for i in xrange(sh.GetNbinsX()):
264  sBinError += sh.GetBinError(i)
265  for i in xrange(ph.GetNbinsX()):
266  pBinError += ph.GetBinError(i)
267  sint = sh.Integral()
268  pint = ph.Integral()
269  doKS = (bool(sint) and bool(pint)) and (sBinError > 0
270  and pBinError > 0)
271  if checkBin2BinIdentity and doKS:
272  diffBins = bin2binIdentity(sh, ph)
273  if diffBins == 0:
274  passedIdentity += 1
275  else:
276  failedIdentity += 1
277  diffIdentity.append(h)
278  identityDiffBins[h] = diffBins
279  if (bool(sint) and bool(pint)) and (sBinError > 0
280  and pBinError > 0):
281  kTested += 1
282  kTest = sh.KolmogorovTest(ph)
283  kTestResults[h] = kTest
284  if int(kTest):
285  passedKol += 1
286  else:
287  # ; print 'KTest result : ', kTest
288  failedKol += 1
289  diffKols.append(h)
290  else:
291  # try the integral test?
292  otherTest += 1
293  if all((sint, pint)) and (sint == pint):
294  integralMatch += 1
295  elif (sint == pint):
296  zeroIntegralMatch += 1
297  else:
298  diffIntegrals.append(h)
299  xIntegrals += 1
300  else:
301  notfound += 1
302  print 'not found? ', h
303 
304  # report on Failed Entry-Checks
305  print '\n\n' + '-' * 80
306  print 'Summary of histos with different Entries'
307  print '-' * 80
308  if diffEntries:
309  diffEntries.sort()
310  for e in diffEntries:
311  print '\t\t\t%s:\t%i != %i' % (e, int(ds[e].GetEntries()),
312  int(dp[e].GetEntries()))
313  print '-' * 80
314 
315  # report on Failed Kolmogorov Tests
316  print '\n\n' + '-' * 60
317  print 'Summary of histos which failed Kolmogorov Test'
318  print '-' * 60
319  if diffKols:
320  diffKols.sort()
321  for e in diffKols:
322  result = kTestResults[e] # DP Calculated twice ARGH!!
323  print '%s\t\t%s :\tK-Test Result :\t %5.16f' % (ds[e].ClassName(),
324  e, result)
325  print '-' * 60
326 
327  # report on Failed Integral Checks
328  print '\n\n' + '-' * 60
329  print 'Summary of histos which failed Integral Check'
330  print '-' * 60
331  if diffIntegrals:
332  diffIntegrals.sort()
333  for e in diffIntegrals:
334  diff = dp[e].Integral() - ds[e].Integral()
335  pc = (diff * 100) / ds[e].Integral()
336  print '%s\t\t%s:\t Diff = %5.6f\tPercent Diff to Reference : %5.6f ' % (
337  ds[e].ClassName(), e, diff, pc)
338  print '-' * 60 + '\n'
339  print '=' * 80 + '\n'
340 
341  # Report on failed bin2bin identity
342  if checkBin2BinIdentity:
343  # report on b2b checks
344  print '\n\n' + '-' * 80
345  print 'Summary of histos with at least one bin with different Entries'
346  print '-' * 80
347  if diffIdentity:
348  diffIdentity.sort()
349  for e in diffIdentity:
350  print '%s\t\t%s: %i different bins' % (ds[e].ClassName(), e,
351  identityDiffBins[e])
352  print '-' * 80
353 
354  print '\n' + '=' * 80
355  print 'Comparison : Reference/Test ROOT Histo files'
356  print '\n\t\tReference\tTest'
357  print '\tObjects : %i\t%i\t\t( p-s = %i )' % (
358  referenceObjects, parallObjects, parallObjects - referenceObjects)
359  print '\tHistos : %i\t%i\t\t( p-s = %i )' % (
360  referenceHistos, parallHistos, parallHistos - referenceHistos)
361  print '\t __________'
362  print '\tTotal : %i\t%i\n' % (referenceHistos + referenceObjects,
363  parallHistos + parallObjects)
364  print 'Objects/Histos unique to Reference File : %i / %i' % (
365  len(uniqueSerPaths) - uniqueSerHistos, uniqueSerHistos)
366  print 'Objects/Histos unique to Test File : %i / %i' % (
367  len(uniqueParPaths) - uniqueParHistos, uniqueParHistos)
368  print '\nMatching Histograms valid for Comparison : %i' % (mh)
369  print '\nOmissions\' patterns : '
370  for entry in gRegexBlackList:
371  print '\t%s' % (entry)
372  print '\nHistograms for Comparison (after Omissions) : %i' % (
373  mh - len(gRegexBlackList))
374  print '\n\tHISTOGRAM TESTS : '
375  print '\t\tKOLMOGOROV TEST : %i' % (kTested)
376  print '\t\tINTEGRAL TEST : %i' % (otherTest)
377  print '\t\tENTRIES TEST : %i' % (xEntries)
378  if checkBin2BinIdentity:
379  print '\t\tBIN2BIN TEST : %i' % (passedIdentity)
380  print '\t\t ____'
381  print '\t\tTested : %i' % (cEntries)
382 
383  print '\n\tDISCREPANCIES : '
384  print '\t\tK-Test : %i' % (failedKol)
385  print '\t\tIntegrals : %i' % (xIntegrals)
386  print '\t\tEntries : %i' % (xEntries)
387  retval = failedKol + xIntegrals + xEntries + failedIdentity
388  if retval != 0:
389  print '\nThe two sets of histograms were not identical'
390  print '\n' + '=' * 80
391  return retval
392 
393 
394 # =============================================================================
395 
396 
397 def extractBlacklist(listString):
398  global gRegexBlackList
399  if listString:
400  for blackRegexp in listString.split(","):
401  gRegexBlackList.append(blackRegexp)
402  else:
403  gBlackList = []
404 
405 
406 # =============================================================================
407 
408 if __name__ == '__main__':
409  usage = "usage: %prog testFile.root referenceFile.root [options]"
410  parser = OptionParser()
411  parser.add_option(
412  "-b",
413  dest="blacklist",
414  help=
415  'Comma separated list of regexps matching histograms to skip (for example -b"MemoryTool,ProcTime")'
416  )
417 
418  parser.add_option(
419  "-i",
420  action="store_true",
421  dest="bin2bin",
422  default=False,
423  help="Check for bin to bin identity")
424  (options, args) = parser.parse_args()
425 
426  if len(args) != 2:
427  print "Wrong number of rootfiles. Usage:"
428  print usage
429  sys.exit(1)
430 
431  extractBlacklist(options.blacklist)
432 
433  testFile, referenceFile = args
434 
435  tfs = TFile(testFile, 'REC')
436  print 'opening Test File : %s' % (testFile)
437  tfp = TFile(referenceFile, 'REC')
438  print 'opening Reference File : %s' % (referenceFile)
439 
440  # get structure of TFiles in a list of (path, object) tuples
441  lref = rec(tfs)
442  ltest = rec(tfp)
443  # make a dictionary of lser and lpar. keys=paths
444  dref = dict([(n, o) for n, o in lref])
445  dtest = dict([(n, o) for n, o in ltest])
446  # make a tuple of (type, dict) where type is either 'reference' or 'test'
447  ts = (ref, dref)
448  tp = (test, dtest)
449 
450  # check objs/histos in each file
451  composition(ts)
452  composition(tp)
453 
454  # compare paths from each file
455  state = comparePaths(ts, tp)
456 
457  # compare histos from each file
458  retval = compareHistos(ts, tp, state, checkBin2BinIdentity=options.bin2bin)
459 
460  # finished with TFiles
461  tfs.Close()
462  tfp.Close()
463 
464  sys.exit(retval)
def rec(o, path=None, lst=None)
def compareHistos(t1, t2, state, checkBin2BinIdentity)
def extractBlacklist(listString)
def comparePaths(t1, t2)
def bin2binIdentity(h1, h2)
GAUDI_API double Integral(const Genfun::AbsFunction &function, const double a, const double b, const GaudiMath::Integration::Type type=GaudiMath::Integration::Adaptive, const GaudiMath::Integration::KronrodRule rule=GaudiMath::Integration::Default, const double epsabs=1.e-10, const double epsrel=1.e-7, const size_t size=1000)
Definition: Integral.cpp:25