The Gaudi Framework  v37r1 (a7f61348)
compareRootHistos.py
Go to the documentation of this file.
1 #! /usr/bin/env python3
2 
12 from __future__ import print_function
13 
14 import re
15 import sys
16 from optparse import OptionParser
17 
18 # Well known trick
19 backupArgv = sys.argv[:]
20 sys.argv = []
21 from ROOT import TFile
22 
23 sys.argv = backupArgv
24 
25 # The list of histograms not to check. Expressed as regexps
26 gRegexBlackList = []
27 histos = ["TH1D", "TH1F", "TH2D", "TH2F", "TProfile"]
28 ref = "REFERENCE"
29 test = "TEST"
30 
31 # =============================================================================
32 # Method : rec( o, path=None, lst=None )
33 #
34 # @param o : a ROOT object
35 # @param path : a string like a transient store path; ie '/stat/CaloPIDs/ECALPIDE'
36 # @param lst : a list to hold (path, object) tuples
37 #
38 # function : recursively pull apart a ROOT file, making a list of (path, TObject) tuples
39 # This is done by GetListOfKeys method, which lets one work down through directories
40 # until you hit the Histo at the end of the path. The list of tuples is returned
41 #
42 
43 
44 def rec(o, path=None, lst=None):
45  if not path:
46  path = "/stat"
47  lst = []
48  else:
49  path = path + "/" + o.GetName()
50  lst.append((path, o))
51  if "GetListOfKeys" in dir(o):
52  keys = o.GetListOfKeys()
53  for k in keys:
54  name = k.GetName()
55  rec(o.Get(name), path, lst)
56  else:
57  pass
58  return lst
59 
60 
61 # =============================================================================
62 
63 # =============================================================================
64 # Method : composition( t )
65 #
66 # @param t : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
67 # and d is a dictionary of ROOT objects, with each key = ROOT path
68 #
69 # function : deduce the composition, (objects/histos) counts
70 #
71 
72 
73 def composition(t):
74  typ, d = t
75  hists = 0
76  objs = 0
77  for k in d.keys():
78  if d[k].__class__.__name__ in histos:
79  hists += 1
80  else:
81  objs += 1
82  return objs, hists
83 
84 
85 # =============================================================================
86 
87 # =============================================================================
88 # Method : comparePaths( t1, t2 )
89 #
90 # @param t1, t2 : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
91 # and d is a dictionary of ROOT objects, with each key = ROOT path
92 #
93 # function : compare the paths between the two histo files. If the files are identical, they
94 # should have the same set of paths. The Test file should definitely have the
95 # same paths as the Reference. Perhaps the Reference file will have some more paths due
96 # to extra histos added as part of Application Sequencer finalisation
97 # Arguments t1 and t2 are checked and the test/reference auto-detected
98 #
99 
100 
101 def comparePaths(t1, t2):
102  if t1[0] == ref:
103  ds = t1[1]
104  dp = t2[1]
105  elif t2[0] == ref:
106  ds = t2[1]
107  dp = t1[1]
108  else:
109  print("Neither tuple is Reference Root file reference?")
110  return
111 
112  dsks = ds.keys()
113  dpks = dp.keys()
114  dsks.sort()
115  dpks.sort()
116 
117  sset = set(dsks)
118  pset = set(dpks)
119  os, hs = composition((ref, ds))
120  op, hp = composition((test, dp))
121  print("\n" + "=" * 80)
122  print("Comparison of Paths : Reference vs Test ROOT files")
123  print("-" * 80)
124  print(
125  "Number of paths in Reference file : %i (objects, histos) = ( %i, %i )"
126  % (len(dsks), os, hs)
127  )
128  print(
129  "Number of paths in Test file : %i (objects, histos) = ( %i, %i )"
130  % (len(dpks), op, hp)
131  )
132  matching = sset.intersection(pset)
133  matchingHistos = 0
134  for n in matching:
135  if ds[n].__class__.__name__ in histos:
136  matchingHistos += 1
137  print("\nMatching paths : %i" % (len(matching)))
138  uSer = sset - pset
139  # work out histos unique to test file
140  uniqueReferenceHistos = 0
141  for n in uSer:
142  if ds[n].__class__.__name__ in histos:
143  uniqueReferenceHistos += 1
144  print(
145  "Paths unique to Reference file : %i ( %i Histos )"
146  % (len(uSer), uniqueReferenceHistos)
147  )
148  if uSer:
149  for n in uSer:
150  print("\t%s : \t%s" % (ds[n], n))
151  uPar = pset - sset
152  uniqueTestHistos = 0
153  for n in uPar:
154  if dp[n].__class__.__name__ in histos:
155  uniqueTestHistos += 1
156  print(
157  "Paths unique to Test file : %i ( %i Histos )" % (len(uPar), uniqueTestHistos)
158  )
159  if uPar:
160  for n in uPar:
161  print("\t%s : \t%s" % (dp[n], n))
162  print("Matching Histos to test : %i" % (matchingHistos))
163  print("=" * 80 + "\n")
164  return (
165  ((os, hs), (op, hp)),
166  (uSer, uniqueReferenceHistos),
167  (uPar, uniqueTestHistos),
168  matchingHistos,
169  )
170 
171 
172 # =============================================================================
173 
174 # =============================================================================
175 # Method : bin2binIdentity(h1,h2)
176 #
177 # @param h1, h2 : The two histogtams to compare
178 # function : Return the number of different bins
179 
180 
181 def bin2binIdentity(h1, h2):
182  def getNbins(h):
183  biny = h.GetNbinsY()
184  if biny > 1:
185  biny += 1
186  binz = h.GetNbinsZ()
187  if binz > 1:
188  binz += 1
189  return (h.GetNbinsX() + 1) * (biny) * (binz)
190 
191  nbins = getNbins(h1)
192  diffbins = 0
193  for ibin in range(0, nbins):
194  h1bin = h1.GetBinContent(ibin)
195  h2bin = h2.GetBinContent(ibin)
196  diffbins += h1bin != h2bin
197  return diffbins
198 
199 
200 # =============================================================================
201 # Method : compareHistos( t1, t2 )
202 #
203 # @param t1, t2 : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
204 # and d is a dictionary of ROOT objects, with each key = ROOT path
205 #
206 # function : compare the histograms in Reference/Test ROOT files. First, go through each
207 # dict to collect the histos (ignore TDirectory objects, etc). Then the histos
208 # in the test file (experimental) are compared to their equivalents in the
209 # reference file (definitely correct) using 3 methods.
210 # 1) The entries are checked, they should be equal
211 # 2) If entries are equal, check the Integral(); should be equal
212 # 3) If integrals are equal, check the KolmogorovTest() ; should be 1
213 # 4) If identity flag is there and KS test is performed, perform bin2bin identity test
214 # Arguments t1 and t2 are checked and the test/reference auto-detected
215 #
216 def compareHistos(t1, t2, state, checkBin2BinIdentity):
217  (
218  ((referenceObjects, referenceHistos), (parallObjects, parallHistos)),
219  (uniqueSerPaths, uniqueSerHistos),
220  (uniqueParPaths, uniqueParHistos),
221  mh,
222  ) = state
223 
224  # deduce which one is test, which reference
225  if t1[0] == ref:
226  ds = t1[1]
227  dp = t2[1]
228  elif t2[0] == ref:
229  ds = t2[1]
230  dp = t1[1]
231  else:
232  print("Neither tuple is Reference Root file reference?")
233  return
234 
235  # histocount, objectcount for test/reference
236  hcp = 0
237  pHistos = []
238  hcs = 0
239  sHistos = []
240 
241  omit = [re.compile(regex) for regex in gRegexBlackList]
242 
243  # find the histos in the reference file
244  for k in ds.keys():
245  if not any(regex.search(k) is not None for regex in omit):
246  if ds[k].__class__.__name__ in histos:
247  hcs += 1
248  sHistos.append(k)
249  # same for test
250  for k in dp.keys():
251  if not any(regex.search(k) is not None for regex in omit):
252  if dp[k].__class__.__name__ in histos:
253  hcp += 1
254  pHistos.append(k)
255 
256  cEntries = 0
257  xEntries = 0
258  diffEntries = []
259  xIntegrals = 0
260  diffIntegrals = []
261  passedKol = 0
262  failedKol = 0
263  diffKols = []
264  passedIdentity = 0
265  failedIdentity = 0
266  diffIdentity = []
267  identityDiffBins = {}
268  kTested = 0
269  kTestResults = {}
270  notfound = 0
271  integralMatch = 0
272  otherTest = 0
273  zeroIntegralMatch = 0
274  for h in sHistos:
275  if h in pHistos:
276  # matching histos to check
277  cEntries += 1
278  sh = ds[h]
279  ph = dp[h]
280  # first check entries
281  if sh.GetEntries() != ph.GetEntries():
282  diffEntries.append(h)
283  xEntries += 1
284  continue
285  # check for (non-zero sum of bin error) && (non-zero integrals) for K-Test
286  sBinError = 0.0
287  pBinError = 0.0
288  for i in range(sh.GetNbinsX()):
289  sBinError += sh.GetBinError(i)
290  for i in range(ph.GetNbinsX()):
291  pBinError += ph.GetBinError(i)
292  sint = sh.Integral()
293  pint = ph.Integral()
294  doKS = (bool(sint) and bool(pint)) and (sBinError > 0 and pBinError > 0)
295  if checkBin2BinIdentity and doKS:
296  diffBins = bin2binIdentity(sh, ph)
297  if diffBins == 0:
298  passedIdentity += 1
299  else:
300  failedIdentity += 1
301  diffIdentity.append(h)
302  identityDiffBins[h] = diffBins
303  if (bool(sint) and bool(pint)) and (sBinError > 0 and pBinError > 0):
304  kTested += 1
305  kTest = sh.KolmogorovTest(ph)
306  kTestResults[h] = kTest
307  if int(kTest):
308  passedKol += 1
309  else:
310  # ; print 'KTest result : ', kTest
311  failedKol += 1
312  diffKols.append(h)
313  else:
314  # try the integral test?
315  otherTest += 1
316  if all((sint, pint)) and (sint == pint):
317  integralMatch += 1
318  elif sint == pint:
319  zeroIntegralMatch += 1
320  else:
321  diffIntegrals.append(h)
322  xIntegrals += 1
323  else:
324  notfound += 1
325  print("not found? ", h)
326 
327  # report on Failed Entry-Checks
328  print("\n\n" + "-" * 80)
329  print("Summary of histos with different Entries")
330  print("-" * 80)
331  if diffEntries:
332  diffEntries.sort()
333  for e in diffEntries:
334  print(
335  "\t\t\t%s:\t%i != %i"
336  % (e, int(ds[e].GetEntries()), int(dp[e].GetEntries()))
337  )
338  print("-" * 80)
339 
340  # report on Failed Kolmogorov Tests
341  print("\n\n" + "-" * 60)
342  print("Summary of histos which failed Kolmogorov Test")
343  print("-" * 60)
344  if diffKols:
345  diffKols.sort()
346  for e in diffKols:
347  result = kTestResults[e] # DP Calculated twice ARGH!!
348  print(
349  "%s\t\t%s :\tK-Test Result :\t %5.16f" % (ds[e].ClassName(), e, result)
350  )
351  print("-" * 60)
352 
353  # report on Failed Integral Checks
354  print("\n\n" + "-" * 60)
355  print("Summary of histos which failed Integral Check")
356  print("-" * 60)
357  if diffIntegrals:
358  diffIntegrals.sort()
359  for e in diffIntegrals:
360  diff = dp[e].Integral() - ds[e].Integral()
361  pc = (diff * 100) / ds[e].Integral()
362  print(
363  "%s\t\t%s:\t Diff = %5.6f\tPercent Diff to Reference : %5.6f "
364  % (ds[e].ClassName(), e, diff, pc)
365  )
366  print("-" * 60 + "\n")
367  print("=" * 80 + "\n")
368 
369  # Report on failed bin2bin identity
370  if checkBin2BinIdentity:
371  # report on b2b checks
372  print("\n\n" + "-" * 80)
373  print("Summary of histos with at least one bin with different Entries")
374  print("-" * 80)
375  if diffIdentity:
376  diffIdentity.sort()
377  for e in diffIdentity:
378  print(
379  "%s\t\t%s: %i different bins"
380  % (ds[e].ClassName(), e, identityDiffBins[e])
381  )
382  print("-" * 80)
383 
384  print("\n" + "=" * 80)
385  print("Comparison : Reference/Test ROOT Histo files")
386  print("\n\t\tReference\tTest")
387  print(
388  "\tObjects : %i\t%i\t\t( p-s = %i )"
389  % (referenceObjects, parallObjects, parallObjects - referenceObjects)
390  )
391  print(
392  "\tHistos : %i\t%i\t\t( p-s = %i )"
393  % (referenceHistos, parallHistos, parallHistos - referenceHistos)
394  )
395  print("\t __________")
396  print(
397  "\tTotal : %i\t%i\n"
398  % (referenceHistos + referenceObjects, parallHistos + parallObjects)
399  )
400  print(
401  "Objects/Histos unique to Reference File : %i / %i"
402  % (len(uniqueSerPaths) - uniqueSerHistos, uniqueSerHistos)
403  )
404  print(
405  "Objects/Histos unique to Test File : %i / %i"
406  % (len(uniqueParPaths) - uniqueParHistos, uniqueParHistos)
407  )
408  print("\nMatching Histograms valid for Comparison : %i" % (mh))
409  print("\nOmissions' patterns : ")
410  for entry in gRegexBlackList:
411  print("\t%s" % (entry))
412  print(
413  "\nHistograms for Comparison (after Omissions) : %i"
414  % (mh - len(gRegexBlackList))
415  )
416  print("\n\tHISTOGRAM TESTS : ")
417  print("\t\tKOLMOGOROV TEST : %i" % (kTested))
418  print("\t\tINTEGRAL TEST : %i" % (otherTest))
419  print("\t\tENTRIES TEST : %i" % (xEntries))
420  if checkBin2BinIdentity:
421  print("\t\tBIN2BIN TEST : %i" % (passedIdentity))
422  print("\t\t ____")
423  print("\t\tTested : %i" % (cEntries))
424 
425  print("\n\tDISCREPANCIES : ")
426  print("\t\tK-Test : %i" % (failedKol))
427  print("\t\tIntegrals : %i" % (xIntegrals))
428  print("\t\tEntries : %i" % (xEntries))
429  retval = failedKol + xIntegrals + xEntries + failedIdentity
430  if retval != 0:
431  print("\nThe two sets of histograms were not identical")
432  print("\n" + "=" * 80)
433  return retval
434 
435 
436 # =============================================================================
437 
438 
439 def extractBlacklist(listString):
440  global gRegexBlackList
441  if listString:
442  for blackRegexp in listString.split(","):
443  gRegexBlackList.append(blackRegexp)
444  else:
445  gRegexBlackList = []
446 
447 
448 # =============================================================================
449 
450 if __name__ == "__main__":
451  usage = "usage: %prog testFile.root referenceFile.root [options]"
452  parser = OptionParser()
453  parser.add_option(
454  "-b",
455  dest="blacklist",
456  help='Comma separated list of regexps matching histograms to skip (for example -b"MemoryTool,ProcTime")',
457  )
458 
459  parser.add_option(
460  "-i",
461  action="store_true",
462  dest="bin2bin",
463  default=False,
464  help="Check for bin to bin identity",
465  )
466  (options, args) = parser.parse_args()
467 
468  if len(args) != 2:
469  print("Wrong number of rootfiles. Usage:")
470  print(usage)
471  sys.exit(1)
472 
473  extractBlacklist(options.blacklist)
474 
475  testFile, referenceFile = args
476 
477  tfs = TFile(testFile, "REC")
478  print("opening Test File : %s" % (testFile))
479  tfp = TFile(referenceFile, "REC")
480  print("opening Reference File : %s" % (referenceFile))
481 
482  # get structure of TFiles in a list of (path, object) tuples
483  lref = rec(tfs)
484  ltest = rec(tfp)
485  # make a dictionary of lser and lpar. keys=paths
486  dref = dict([(n, o) for n, o in lref])
487  dtest = dict([(n, o) for n, o in ltest])
488  # make a tuple of (type, dict) where type is either 'reference' or 'test'
489  ts = (ref, dref)
490  tp = (test, dtest)
491 
492  # check objs/histos in each file
493  composition(ts)
494  composition(tp)
495 
496  # compare paths from each file
497  state = comparePaths(ts, tp)
498 
499  # compare histos from each file
500  retval = compareHistos(ts, tp, state, checkBin2BinIdentity=options.bin2bin)
501 
502  # finished with TFiles
503  tfs.Close()
504  tfp.Close()
505 
506  sys.exit(retval)
AlgSequencer.all
all
Definition: AlgSequencer.py:56
compareRootHistos.composition
def composition(t)
Definition: compareRootHistos.py:73
compareRootHistos.compareHistos
def compareHistos(t1, t2, state, checkBin2BinIdentity)
Definition: compareRootHistos.py:216
compareRootHistos.rec
def rec(o, path=None, lst=None)
Definition: compareRootHistos.py:44
compareRootHistos.comparePaths
def comparePaths(t1, t2)
Definition: compareRootHistos.py:101
compareRootHistos.extractBlacklist
def extractBlacklist(listString)
Definition: compareRootHistos.py:439
compareRootHistos.bin2binIdentity
def bin2binIdentity(h1, h2)
Definition: compareRootHistos.py:181
Gaudi::Functional::details::zip::range
decltype(auto) range(Args &&... args)
Zips multiple containers together to form a single range.
Definition: details.h:98