The Gaudi Framework  v32r2 (46d42edc)
compareRootHistos.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 from __future__ import print_function
3 from optparse import OptionParser
4 import re
5 import sys
6 # Well known trick
7 backupArgv = sys.argv[:]
8 sys.argv = []
9 from ROOT import TFile
10 sys.argv = backupArgv
11 
12 # The list of histograms not to check. Expressed as regexps
13 gRegexBlackList = []
14 histos = ['TH1D', 'TH1F', 'TH2D', 'TH2F', 'TProfile']
15 ref = 'REFERENCE'
16 test = 'TEST'
17 
18 # =============================================================================
19 # Method : rec( o, path=None, lst=None )
20 #
21 # @param o : a ROOT object
22 # @param path : a string like a transient store path; ie '/stat/CaloPIDs/ECALPIDE'
23 # @param lst : a list to hold (path, object) tuples
24 #
25 # function : recursively pull apart a ROOT file, making a list of (path, TObject) tuples
26 # This is done by GetListOfKeys method, which lets one work down through directories
27 # until you hit the Histo at the end of the path. The list of tuples is returned
28 #
29 
30 
31 def rec(o, path=None, lst=None):
32  if not path:
33  path = '/stat'
34  lst = []
35  else:
36  path = path + '/' + o.GetName()
37  lst.append((path, o))
38  if 'GetListOfKeys' in dir(o):
39  keys = o.GetListOfKeys()
40  for k in keys:
41  name = k.GetName()
42  rec(o.Get(name), path, lst)
43  else:
44  pass
45  return lst
46 
47 
48 # =============================================================================
49 
50 # =============================================================================
51 # Method : composition( t )
52 #
53 # @param t : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
54 # and d is a dictionary of ROOT objects, with each key = ROOT path
55 #
56 # function : deduce the composition, (objects/histos) counts
57 #
58 
59 
60 def composition(t):
61  typ, d = t
62  hists = 0
63  objs = 0
64  for k in d.keys():
65  if d[k].__class__.__name__ in histos:
66  hists += 1
67  else:
68  objs += 1
69  return objs, hists
70 
71 
72 # =============================================================================
73 
74 # =============================================================================
75 # Method : comparePaths( t1, t2 )
76 #
77 # @param t1, t2 : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
78 # and d is a dictionary of ROOT objects, with each key = ROOT path
79 #
80 # function : compare the paths between the two histo files. If the files are identical, they
81 # should have the same set of paths. The Test file should definitely have the
82 # same paths as the Reference. Perhaps the Reference file will have some more paths due
83 # to extra histos added as part of Application Sequencer finalisation
84 # Arguments t1 and t2 are checked and the test/reference auto-detected
85 #
86 
87 
88 def comparePaths(t1, t2):
89  if t1[0] == ref:
90  ds = t1[1]
91  dp = t2[1]
92  elif t2[0] == ref:
93  ds = t2[1]
94  dp = t1[1]
95  else:
96  print('Neither tuple is Reference Root file reference?')
97  return
98 
99  dsks = ds.keys()
100  dpks = dp.keys()
101  dsks.sort()
102  dpks.sort()
103 
104  sset = set(dsks)
105  pset = set(dpks)
106  os, hs = composition((ref, ds))
107  op, hp = composition((test, dp))
108  print('\n' + '=' * 80)
109  print('Comparison of Paths : Reference vs Test ROOT files')
110  print('-' * 80)
111  print(
112  'Number of paths in Reference file : %i (objects, histos) = ( %i, %i )'
113  % (len(dsks), os, hs))
114  print('Number of paths in Test file : %i (objects, histos) = ( %i, %i )' %
115  (len(dpks), op, hp))
116  matching = sset.intersection(pset)
117  matchingHistos = 0
118  for n in matching:
119  if ds[n].__class__.__name__ in histos:
120  matchingHistos += 1
121  print('\nMatching paths : %i' % (len(matching)))
122  uSer = sset - pset
123  # work out histos unique to test file
124  uniqueReferenceHistos = 0
125  for n in uSer:
126  if ds[n].__class__.__name__ in histos:
127  uniqueReferenceHistos += 1
128  print('Paths unique to Reference file : %i ( %i Histos )' %
129  (len(uSer), uniqueReferenceHistos))
130  if uSer:
131  for n in uSer:
132  print('\t%s : \t%s' % (ds[n], n))
133  uPar = pset - sset
134  uniqueTestHistos = 0
135  for n in uPar:
136  if dp[n].__class__.__name__ in histos:
137  uniqueTestHistos += 1
138  print('Paths unique to Test file : %i ( %i Histos )' % (len(uPar),
139  uniqueTestHistos))
140  if uPar:
141  for n in uPar:
142  print('\t%s : \t%s' % (dp[n], n))
143  print('Matching Histos to test : %i' % (matchingHistos))
144  print('=' * 80 + '\n')
145  return (((os, hs), (op, hp)), (uSer, uniqueReferenceHistos),
146  (uPar, uniqueTestHistos), matchingHistos)
147 
148 
149 # =============================================================================
150 
151 # =============================================================================
152 # Method : bin2binIdentity(h1,h2)
153 #
154 # @param h1, h2 : The two histogtams to compare
155 # function : Return the number of different bins
156 
157 
158 def bin2binIdentity(h1, h2):
159  def getNbins(h):
160  biny = h.GetNbinsY()
161  if biny > 1:
162  biny += 1
163  binz = h.GetNbinsZ()
164  if binz > 1:
165  binz += 1
166  return (h.GetNbinsX() + 1) * (biny) * (binz)
167 
168  nbins = getNbins(h1)
169  diffbins = 0
170  for ibin in range(0, nbins):
171  h1bin = h1.GetBinContent(ibin)
172  h2bin = h2.GetBinContent(ibin)
173  diffbins += (h1bin != h2bin)
174  return diffbins
175 
176 
177 # =============================================================================
178 # Method : compareHistos( t1, t2 )
179 #
180 # @param t1, t2 : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
181 # and d is a dictionary of ROOT objects, with each key = ROOT path
182 #
183 # function : compare the histograms in Reference/Test ROOT files. First, go through each
184 # dict to collect the histos (ignore TDirectory objects, etc). Then the histos
185 # in the test file (experimental) are compared to their equivalents in the
186 # reference file (definitely correct) using 3 methods.
187 # 1) The entries are checked, they should be equal
188 # 2) If entries are equal, check the Integral(); should be equal
189 # 3) If integrals are equal, check the KolmogorovTest() ; should be 1
190 # 4) If identity flag is there and KS test is performed, perform bin2bin identity test
191 # Arguments t1 and t2 are checked and the test/reference auto-detected
192 #
193 def compareHistos(t1, t2, state, checkBin2BinIdentity):
194 
195  (((referenceObjects, referenceHistos), (parallObjects, parallHistos)),
196  (uniqueSerPaths, uniqueSerHistos), (uniqueParPaths,
197  uniqueParHistos), mh) = state
198 
199  # deduce which one is test, which reference
200  if t1[0] == ref:
201  ds = t1[1]
202  dp = t2[1]
203  elif t2[0] == ref:
204  ds = t2[1]
205  dp = t1[1]
206  else:
207  print('Neither tuple is Reference Root file reference?')
208  return
209 
210  # histocount, objectcount for test/reference
211  hcp = 0
212  pHistos = []
213  hcs = 0
214  sHistos = []
215 
216  omit = [re.compile(regex) for regex in gRegexBlackList]
217 
218  # find the histos in the reference file
219  for k in ds.keys():
220  if not any(regex.search(k) != None for regex in omit):
221  if ds[k].__class__.__name__ in histos:
222  hcs += 1
223  sHistos.append(k)
224  # same for test
225  for k in dp.keys():
226  if not any(regex.search(k) != None for regex in omit):
227  if dp[k].__class__.__name__ in histos:
228  hcp += 1
229  pHistos.append(k)
230 
231  cEntries = 0
232  xEntries = 0
233  diffEntries = []
234  cIntegrals = 0
235  xIntegrals = 0
236  diffIntegrals = []
237  passedKol = 0
238  failedKol = 0
239  diffKols = []
240  zeroIntegrals = 0
241  passedIdentity = 0
242  failedIdentity = 0
243  diffIdentity = []
244  identityDiffBins = {}
245  kTested = 0
246  kTestResults = {}
247  notfound = 0
248  integralMatch = 0
249  otherTest = 0
250  zeroIntegralMatch = 0
251  for h in sHistos:
252  if h in pHistos:
253  # matching histos to check
254  cEntries += 1
255  sh = ds[h]
256  ph = dp[h]
257  # first check entries
258  if sh.GetEntries() != ph.GetEntries():
259  diffEntries.append(h)
260  xEntries += 1
261  continue
262  # check for (non-zero sum of bin error) && (non-zero integrals) for K-Test
263  sBinError = 0.0
264  pBinError = 0.0
265  for i in range(sh.GetNbinsX()):
266  sBinError += sh.GetBinError(i)
267  for i in range(ph.GetNbinsX()):
268  pBinError += ph.GetBinError(i)
269  sint = sh.Integral()
270  pint = ph.Integral()
271  doKS = (bool(sint) and bool(pint)) and (sBinError > 0
272  and pBinError > 0)
273  if checkBin2BinIdentity and doKS:
274  diffBins = bin2binIdentity(sh, ph)
275  if diffBins == 0:
276  passedIdentity += 1
277  else:
278  failedIdentity += 1
279  diffIdentity.append(h)
280  identityDiffBins[h] = diffBins
281  if (bool(sint) and bool(pint)) and (sBinError > 0
282  and pBinError > 0):
283  kTested += 1
284  kTest = sh.KolmogorovTest(ph)
285  kTestResults[h] = kTest
286  if int(kTest):
287  passedKol += 1
288  else:
289  # ; print 'KTest result : ', kTest
290  failedKol += 1
291  diffKols.append(h)
292  else:
293  # try the integral test?
294  otherTest += 1
295  if all((sint, pint)) and (sint == pint):
296  integralMatch += 1
297  elif (sint == pint):
298  zeroIntegralMatch += 1
299  else:
300  diffIntegrals.append(h)
301  xIntegrals += 1
302  else:
303  notfound += 1
304  print('not found? ', h)
305 
306  # report on Failed Entry-Checks
307  print('\n\n' + '-' * 80)
308  print('Summary of histos with different Entries')
309  print('-' * 80)
310  if diffEntries:
311  diffEntries.sort()
312  for e in diffEntries:
313  print('\t\t\t%s:\t%i != %i' % (e, int(ds[e].GetEntries()),
314  int(dp[e].GetEntries())))
315  print('-' * 80)
316 
317  # report on Failed Kolmogorov Tests
318  print('\n\n' + '-' * 60)
319  print('Summary of histos which failed Kolmogorov Test')
320  print('-' * 60)
321  if diffKols:
322  diffKols.sort()
323  for e in diffKols:
324  result = kTestResults[e] # DP Calculated twice ARGH!!
325  print('%s\t\t%s :\tK-Test Result :\t %5.16f' % (ds[e].ClassName(),
326  e, result))
327  print('-' * 60)
328 
329  # report on Failed Integral Checks
330  print('\n\n' + '-' * 60)
331  print('Summary of histos which failed Integral Check')
332  print('-' * 60)
333  if diffIntegrals:
334  diffIntegrals.sort()
335  for e in diffIntegrals:
336  diff = dp[e].Integral() - ds[e].Integral()
337  pc = (diff * 100) / ds[e].Integral()
338  print(
339  '%s\t\t%s:\t Diff = %5.6f\tPercent Diff to Reference : %5.6f '
340  % (ds[e].ClassName(), e, diff, pc))
341  print('-' * 60 + '\n')
342  print('=' * 80 + '\n')
343 
344  # Report on failed bin2bin identity
345  if checkBin2BinIdentity:
346  # report on b2b checks
347  print('\n\n' + '-' * 80)
348  print('Summary of histos with at least one bin with different Entries')
349  print('-' * 80)
350  if diffIdentity:
351  diffIdentity.sort()
352  for e in diffIdentity:
353  print('%s\t\t%s: %i different bins' % (ds[e].ClassName(), e,
354  identityDiffBins[e]))
355  print('-' * 80)
356 
357  print('\n' + '=' * 80)
358  print('Comparison : Reference/Test ROOT Histo files')
359  print('\n\t\tReference\tTest')
360  print('\tObjects : %i\t%i\t\t( p-s = %i )' %
361  (referenceObjects, parallObjects, parallObjects - referenceObjects))
362  print('\tHistos : %i\t%i\t\t( p-s = %i )' %
363  (referenceHistos, parallHistos, parallHistos - referenceHistos))
364  print('\t __________')
365  print('\tTotal : %i\t%i\n' % (referenceHistos + referenceObjects,
366  parallHistos + parallObjects))
367  print('Objects/Histos unique to Reference File : %i / %i' %
368  (len(uniqueSerPaths) - uniqueSerHistos, uniqueSerHistos))
369  print('Objects/Histos unique to Test File : %i / %i' %
370  (len(uniqueParPaths) - uniqueParHistos, uniqueParHistos))
371  print('\nMatching Histograms valid for Comparison : %i' % (mh))
372  print('\nOmissions\' patterns : ')
373  for entry in gRegexBlackList:
374  print('\t%s' % (entry))
375  print('\nHistograms for Comparison (after Omissions) : %i' %
376  (mh - len(gRegexBlackList)))
377  print('\n\tHISTOGRAM TESTS : ')
378  print('\t\tKOLMOGOROV TEST : %i' % (kTested))
379  print('\t\tINTEGRAL TEST : %i' % (otherTest))
380  print('\t\tENTRIES TEST : %i' % (xEntries))
381  if checkBin2BinIdentity:
382  print('\t\tBIN2BIN TEST : %i' % (passedIdentity))
383  print('\t\t ____')
384  print('\t\tTested : %i' % (cEntries))
385 
386  print('\n\tDISCREPANCIES : ')
387  print('\t\tK-Test : %i' % (failedKol))
388  print('\t\tIntegrals : %i' % (xIntegrals))
389  print('\t\tEntries : %i' % (xEntries))
390  retval = failedKol + xIntegrals + xEntries + failedIdentity
391  if retval != 0:
392  print('\nThe two sets of histograms were not identical')
393  print('\n' + '=' * 80)
394  return retval
395 
396 
397 # =============================================================================
398 
399 
400 def extractBlacklist(listString):
401  global gRegexBlackList
402  if listString:
403  for blackRegexp in listString.split(","):
404  gRegexBlackList.append(blackRegexp)
405  else:
406  gBlackList = []
407 
408 
409 # =============================================================================
410 
411 if __name__ == '__main__':
412  usage = "usage: %prog testFile.root referenceFile.root [options]"
413  parser = OptionParser()
414  parser.add_option(
415  "-b",
416  dest="blacklist",
417  help=
418  'Comma separated list of regexps matching histograms to skip (for example -b"MemoryTool,ProcTime")'
419  )
420 
421  parser.add_option(
422  "-i",
423  action="store_true",
424  dest="bin2bin",
425  default=False,
426  help="Check for bin to bin identity")
427  (options, args) = parser.parse_args()
428 
429  if len(args) != 2:
430  print("Wrong number of rootfiles. Usage:")
431  print(usage)
432  sys.exit(1)
433 
434  extractBlacklist(options.blacklist)
435 
436  testFile, referenceFile = args
437 
438  tfs = TFile(testFile, 'REC')
439  print('opening Test File : %s' % (testFile))
440  tfp = TFile(referenceFile, 'REC')
441  print('opening Reference File : %s' % (referenceFile))
442 
443  # get structure of TFiles in a list of (path, object) tuples
444  lref = rec(tfs)
445  ltest = rec(tfp)
446  # make a dictionary of lser and lpar. keys=paths
447  dref = dict([(n, o) for n, o in lref])
448  dtest = dict([(n, o) for n, o in ltest])
449  # make a tuple of (type, dict) where type is either 'reference' or 'test'
450  ts = (ref, dref)
451  tp = (test, dtest)
452 
453  # check objs/histos in each file
454  composition(ts)
455  composition(tp)
456 
457  # compare paths from each file
458  state = comparePaths(ts, tp)
459 
460  # compare histos from each file
461  retval = compareHistos(ts, tp, state, checkBin2BinIdentity=options.bin2bin)
462 
463  # finished with TFiles
464  tfs.Close()
465  tfp.Close()
466 
467  sys.exit(retval)
def rec(o, path=None, lst=None)
def compareHistos(t1, t2, state, checkBin2BinIdentity)
def extractBlacklist(listString)
def comparePaths(t1, t2)
def bin2binIdentity(h1, h2)
decltype(auto) range(Args &&... args)
Zips multiple containers together to form a single range.