The Gaudi Framework  v36r1 (3e2fb5a8)
compareRootHistos.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
12 from __future__ import print_function
13 from optparse import OptionParser
14 import re
15 import sys
16 # Well known trick
17 backupArgv = sys.argv[:]
18 sys.argv = []
19 from ROOT import TFile
20 sys.argv = backupArgv
21 
22 # The list of histograms not to check. Expressed as regexps
23 gRegexBlackList = []
24 histos = ['TH1D', 'TH1F', 'TH2D', 'TH2F', 'TProfile']
25 ref = 'REFERENCE'
26 test = 'TEST'
27 
28 # =============================================================================
29 # Method : rec( o, path=None, lst=None )
30 #
31 # @param o : a ROOT object
32 # @param path : a string like a transient store path; ie '/stat/CaloPIDs/ECALPIDE'
33 # @param lst : a list to hold (path, object) tuples
34 #
35 # function : recursively pull apart a ROOT file, making a list of (path, TObject) tuples
36 # This is done by GetListOfKeys method, which lets one work down through directories
37 # until you hit the Histo at the end of the path. The list of tuples is returned
38 #
39 
40 
41 def rec(o, path=None, lst=None):
42  if not path:
43  path = '/stat'
44  lst = []
45  else:
46  path = path + '/' + o.GetName()
47  lst.append((path, o))
48  if 'GetListOfKeys' in dir(o):
49  keys = o.GetListOfKeys()
50  for k in keys:
51  name = k.GetName()
52  rec(o.Get(name), path, lst)
53  else:
54  pass
55  return lst
56 
57 
58 # =============================================================================
59 
60 # =============================================================================
61 # Method : composition( t )
62 #
63 # @param t : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
64 # and d is a dictionary of ROOT objects, with each key = ROOT path
65 #
66 # function : deduce the composition, (objects/histos) counts
67 #
68 
69 
70 def composition(t):
71  typ, d = t
72  hists = 0
73  objs = 0
74  for k in d.keys():
75  if d[k].__class__.__name__ in histos:
76  hists += 1
77  else:
78  objs += 1
79  return objs, hists
80 
81 
82 # =============================================================================
83 
84 # =============================================================================
85 # Method : comparePaths( t1, t2 )
86 #
87 # @param t1, t2 : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
88 # and d is a dictionary of ROOT objects, with each key = ROOT path
89 #
90 # function : compare the paths between the two histo files. If the files are identical, they
91 # should have the same set of paths. The Test file should definitely have the
92 # same paths as the Reference. Perhaps the Reference file will have some more paths due
93 # to extra histos added as part of Application Sequencer finalisation
94 # Arguments t1 and t2 are checked and the test/reference auto-detected
95 #
96 
97 
98 def comparePaths(t1, t2):
99  if t1[0] == ref:
100  ds = t1[1]
101  dp = t2[1]
102  elif t2[0] == ref:
103  ds = t2[1]
104  dp = t1[1]
105  else:
106  print('Neither tuple is Reference Root file reference?')
107  return
108 
109  dsks = ds.keys()
110  dpks = dp.keys()
111  dsks.sort()
112  dpks.sort()
113 
114  sset = set(dsks)
115  pset = set(dpks)
116  os, hs = composition((ref, ds))
117  op, hp = composition((test, dp))
118  print('\n' + '=' * 80)
119  print('Comparison of Paths : Reference vs Test ROOT files')
120  print('-' * 80)
121  print(
122  'Number of paths in Reference file : %i (objects, histos) = ( %i, %i )'
123  % (len(dsks), os, hs))
124  print('Number of paths in Test file : %i (objects, histos) = ( %i, %i )' %
125  (len(dpks), op, hp))
126  matching = sset.intersection(pset)
127  matchingHistos = 0
128  for n in matching:
129  if ds[n].__class__.__name__ in histos:
130  matchingHistos += 1
131  print('\nMatching paths : %i' % (len(matching)))
132  uSer = sset - pset
133  # work out histos unique to test file
134  uniqueReferenceHistos = 0
135  for n in uSer:
136  if ds[n].__class__.__name__ in histos:
137  uniqueReferenceHistos += 1
138  print('Paths unique to Reference file : %i ( %i Histos )' %
139  (len(uSer), uniqueReferenceHistos))
140  if uSer:
141  for n in uSer:
142  print('\t%s : \t%s' % (ds[n], n))
143  uPar = pset - sset
144  uniqueTestHistos = 0
145  for n in uPar:
146  if dp[n].__class__.__name__ in histos:
147  uniqueTestHistos += 1
148  print('Paths unique to Test file : %i ( %i Histos )' % (len(uPar),
149  uniqueTestHistos))
150  if uPar:
151  for n in uPar:
152  print('\t%s : \t%s' % (dp[n], n))
153  print('Matching Histos to test : %i' % (matchingHistos))
154  print('=' * 80 + '\n')
155  return (((os, hs), (op, hp)), (uSer, uniqueReferenceHistos),
156  (uPar, uniqueTestHistos), matchingHistos)
157 
158 
159 # =============================================================================
160 
161 # =============================================================================
162 # Method : bin2binIdentity(h1,h2)
163 #
164 # @param h1, h2 : The two histogtams to compare
165 # function : Return the number of different bins
166 
167 
168 def bin2binIdentity(h1, h2):
169  def getNbins(h):
170  biny = h.GetNbinsY()
171  if biny > 1:
172  biny += 1
173  binz = h.GetNbinsZ()
174  if binz > 1:
175  binz += 1
176  return (h.GetNbinsX() + 1) * (biny) * (binz)
177 
178  nbins = getNbins(h1)
179  diffbins = 0
180  for ibin in range(0, nbins):
181  h1bin = h1.GetBinContent(ibin)
182  h2bin = h2.GetBinContent(ibin)
183  diffbins += (h1bin != h2bin)
184  return diffbins
185 
186 
187 # =============================================================================
188 # Method : compareHistos( t1, t2 )
189 #
190 # @param t1, t2 : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
191 # and d is a dictionary of ROOT objects, with each key = ROOT path
192 #
193 # function : compare the histograms in Reference/Test ROOT files. First, go through each
194 # dict to collect the histos (ignore TDirectory objects, etc). Then the histos
195 # in the test file (experimental) are compared to their equivalents in the
196 # reference file (definitely correct) using 3 methods.
197 # 1) The entries are checked, they should be equal
198 # 2) If entries are equal, check the Integral(); should be equal
199 # 3) If integrals are equal, check the KolmogorovTest() ; should be 1
200 # 4) If identity flag is there and KS test is performed, perform bin2bin identity test
201 # Arguments t1 and t2 are checked and the test/reference auto-detected
202 #
203 def compareHistos(t1, t2, state, checkBin2BinIdentity):
204 
205  (((referenceObjects, referenceHistos), (parallObjects, parallHistos)),
206  (uniqueSerPaths, uniqueSerHistos), (uniqueParPaths,
207  uniqueParHistos), mh) = state
208 
209  # deduce which one is test, which reference
210  if t1[0] == ref:
211  ds = t1[1]
212  dp = t2[1]
213  elif t2[0] == ref:
214  ds = t2[1]
215  dp = t1[1]
216  else:
217  print('Neither tuple is Reference Root file reference?')
218  return
219 
220  # histocount, objectcount for test/reference
221  hcp = 0
222  pHistos = []
223  hcs = 0
224  sHistos = []
225 
226  omit = [re.compile(regex) for regex in gRegexBlackList]
227 
228  # find the histos in the reference file
229  for k in ds.keys():
230  if not any(regex.search(k) != None for regex in omit):
231  if ds[k].__class__.__name__ in histos:
232  hcs += 1
233  sHistos.append(k)
234  # same for test
235  for k in dp.keys():
236  if not any(regex.search(k) != None for regex in omit):
237  if dp[k].__class__.__name__ in histos:
238  hcp += 1
239  pHistos.append(k)
240 
241  cEntries = 0
242  xEntries = 0
243  diffEntries = []
244  cIntegrals = 0
245  xIntegrals = 0
246  diffIntegrals = []
247  passedKol = 0
248  failedKol = 0
249  diffKols = []
250  zeroIntegrals = 0
251  passedIdentity = 0
252  failedIdentity = 0
253  diffIdentity = []
254  identityDiffBins = {}
255  kTested = 0
256  kTestResults = {}
257  notfound = 0
258  integralMatch = 0
259  otherTest = 0
260  zeroIntegralMatch = 0
261  for h in sHistos:
262  if h in pHistos:
263  # matching histos to check
264  cEntries += 1
265  sh = ds[h]
266  ph = dp[h]
267  # first check entries
268  if sh.GetEntries() != ph.GetEntries():
269  diffEntries.append(h)
270  xEntries += 1
271  continue
272  # check for (non-zero sum of bin error) && (non-zero integrals) for K-Test
273  sBinError = 0.0
274  pBinError = 0.0
275  for i in range(sh.GetNbinsX()):
276  sBinError += sh.GetBinError(i)
277  for i in range(ph.GetNbinsX()):
278  pBinError += ph.GetBinError(i)
279  sint = sh.Integral()
280  pint = ph.Integral()
281  doKS = (bool(sint) and bool(pint)) and (sBinError > 0
282  and pBinError > 0)
283  if checkBin2BinIdentity and doKS:
284  diffBins = bin2binIdentity(sh, ph)
285  if diffBins == 0:
286  passedIdentity += 1
287  else:
288  failedIdentity += 1
289  diffIdentity.append(h)
290  identityDiffBins[h] = diffBins
291  if (bool(sint) and bool(pint)) and (sBinError > 0
292  and pBinError > 0):
293  kTested += 1
294  kTest = sh.KolmogorovTest(ph)
295  kTestResults[h] = kTest
296  if int(kTest):
297  passedKol += 1
298  else:
299  # ; print 'KTest result : ', kTest
300  failedKol += 1
301  diffKols.append(h)
302  else:
303  # try the integral test?
304  otherTest += 1
305  if all((sint, pint)) and (sint == pint):
306  integralMatch += 1
307  elif (sint == pint):
308  zeroIntegralMatch += 1
309  else:
310  diffIntegrals.append(h)
311  xIntegrals += 1
312  else:
313  notfound += 1
314  print('not found? ', h)
315 
316  # report on Failed Entry-Checks
317  print('\n\n' + '-' * 80)
318  print('Summary of histos with different Entries')
319  print('-' * 80)
320  if diffEntries:
321  diffEntries.sort()
322  for e in diffEntries:
323  print('\t\t\t%s:\t%i != %i' % (e, int(ds[e].GetEntries()),
324  int(dp[e].GetEntries())))
325  print('-' * 80)
326 
327  # report on Failed Kolmogorov Tests
328  print('\n\n' + '-' * 60)
329  print('Summary of histos which failed Kolmogorov Test')
330  print('-' * 60)
331  if diffKols:
332  diffKols.sort()
333  for e in diffKols:
334  result = kTestResults[e] # DP Calculated twice ARGH!!
335  print('%s\t\t%s :\tK-Test Result :\t %5.16f' % (ds[e].ClassName(),
336  e, result))
337  print('-' * 60)
338 
339  # report on Failed Integral Checks
340  print('\n\n' + '-' * 60)
341  print('Summary of histos which failed Integral Check')
342  print('-' * 60)
343  if diffIntegrals:
344  diffIntegrals.sort()
345  for e in diffIntegrals:
346  diff = dp[e].Integral() - ds[e].Integral()
347  pc = (diff * 100) / ds[e].Integral()
348  print(
349  '%s\t\t%s:\t Diff = %5.6f\tPercent Diff to Reference : %5.6f '
350  % (ds[e].ClassName(), e, diff, pc))
351  print('-' * 60 + '\n')
352  print('=' * 80 + '\n')
353 
354  # Report on failed bin2bin identity
355  if checkBin2BinIdentity:
356  # report on b2b checks
357  print('\n\n' + '-' * 80)
358  print('Summary of histos with at least one bin with different Entries')
359  print('-' * 80)
360  if diffIdentity:
361  diffIdentity.sort()
362  for e in diffIdentity:
363  print('%s\t\t%s: %i different bins' % (ds[e].ClassName(), e,
364  identityDiffBins[e]))
365  print('-' * 80)
366 
367  print('\n' + '=' * 80)
368  print('Comparison : Reference/Test ROOT Histo files')
369  print('\n\t\tReference\tTest')
370  print('\tObjects : %i\t%i\t\t( p-s = %i )' %
371  (referenceObjects, parallObjects, parallObjects - referenceObjects))
372  print('\tHistos : %i\t%i\t\t( p-s = %i )' %
373  (referenceHistos, parallHistos, parallHistos - referenceHistos))
374  print('\t __________')
375  print('\tTotal : %i\t%i\n' % (referenceHistos + referenceObjects,
376  parallHistos + parallObjects))
377  print('Objects/Histos unique to Reference File : %i / %i' %
378  (len(uniqueSerPaths) - uniqueSerHistos, uniqueSerHistos))
379  print('Objects/Histos unique to Test File : %i / %i' %
380  (len(uniqueParPaths) - uniqueParHistos, uniqueParHistos))
381  print('\nMatching Histograms valid for Comparison : %i' % (mh))
382  print('\nOmissions\' patterns : ')
383  for entry in gRegexBlackList:
384  print('\t%s' % (entry))
385  print('\nHistograms for Comparison (after Omissions) : %i' %
386  (mh - len(gRegexBlackList)))
387  print('\n\tHISTOGRAM TESTS : ')
388  print('\t\tKOLMOGOROV TEST : %i' % (kTested))
389  print('\t\tINTEGRAL TEST : %i' % (otherTest))
390  print('\t\tENTRIES TEST : %i' % (xEntries))
391  if checkBin2BinIdentity:
392  print('\t\tBIN2BIN TEST : %i' % (passedIdentity))
393  print('\t\t ____')
394  print('\t\tTested : %i' % (cEntries))
395 
396  print('\n\tDISCREPANCIES : ')
397  print('\t\tK-Test : %i' % (failedKol))
398  print('\t\tIntegrals : %i' % (xIntegrals))
399  print('\t\tEntries : %i' % (xEntries))
400  retval = failedKol + xIntegrals + xEntries + failedIdentity
401  if retval != 0:
402  print('\nThe two sets of histograms were not identical')
403  print('\n' + '=' * 80)
404  return retval
405 
406 
407 # =============================================================================
408 
409 
410 def extractBlacklist(listString):
411  global gRegexBlackList
412  if listString:
413  for blackRegexp in listString.split(","):
414  gRegexBlackList.append(blackRegexp)
415  else:
416  gBlackList = []
417 
418 
419 # =============================================================================
420 
421 if __name__ == '__main__':
422  usage = "usage: %prog testFile.root referenceFile.root [options]"
423  parser = OptionParser()
424  parser.add_option(
425  "-b",
426  dest="blacklist",
427  help=
428  'Comma separated list of regexps matching histograms to skip (for example -b"MemoryTool,ProcTime")'
429  )
430 
431  parser.add_option(
432  "-i",
433  action="store_true",
434  dest="bin2bin",
435  default=False,
436  help="Check for bin to bin identity")
437  (options, args) = parser.parse_args()
438 
439  if len(args) != 2:
440  print("Wrong number of rootfiles. Usage:")
441  print(usage)
442  sys.exit(1)
443 
444  extractBlacklist(options.blacklist)
445 
446  testFile, referenceFile = args
447 
448  tfs = TFile(testFile, 'REC')
449  print('opening Test File : %s' % (testFile))
450  tfp = TFile(referenceFile, 'REC')
451  print('opening Reference File : %s' % (referenceFile))
452 
453  # get structure of TFiles in a list of (path, object) tuples
454  lref = rec(tfs)
455  ltest = rec(tfp)
456  # make a dictionary of lser and lpar. keys=paths
457  dref = dict([(n, o) for n, o in lref])
458  dtest = dict([(n, o) for n, o in ltest])
459  # make a tuple of (type, dict) where type is either 'reference' or 'test'
460  ts = (ref, dref)
461  tp = (test, dtest)
462 
463  # check objs/histos in each file
464  composition(ts)
465  composition(tp)
466 
467  # compare paths from each file
468  state = comparePaths(ts, tp)
469 
470  # compare histos from each file
471  retval = compareHistos(ts, tp, state, checkBin2BinIdentity=options.bin2bin)
472 
473  # finished with TFiles
474  tfs.Close()
475  tfp.Close()
476 
477  sys.exit(retval)
AlgSequencer.all
all
Definition: AlgSequencer.py:54
compareRootHistos.composition
def composition(t)
Definition: compareRootHistos.py:70
compareRootHistos.compareHistos
def compareHistos(t1, t2, state, checkBin2BinIdentity)
Definition: compareRootHistos.py:203
compareRootHistos.rec
def rec(o, path=None, lst=None)
Definition: compareRootHistos.py:41
compareRootHistos.comparePaths
def comparePaths(t1, t2)
Definition: compareRootHistos.py:98
compareRootHistos.extractBlacklist
def extractBlacklist(listString)
Definition: compareRootHistos.py:410
compareRootHistos.bin2binIdentity
def bin2binIdentity(h1, h2)
Definition: compareRootHistos.py:168
Gaudi::Functional::details::zip::range
decltype(auto) range(Args &&... args)
Zips multiple containers together to form a single range.
Definition: FunctionalDetails.h:97