The Gaudi Framework  v36r13 (995e4364)
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
compareRootHistos.py
Go to the documentation of this file.
1 #! /usr/bin/env python3
2 
12 from __future__ import print_function
13 
14 import re
15 import sys
16 from optparse import OptionParser
17 
18 # Well known trick
19 backupArgv = sys.argv[:]
20 sys.argv = []
21 from ROOT import TFile
22 
23 sys.argv = backupArgv
24 
25 # The list of histograms not to check. Expressed as regexps
26 gRegexBlackList = []
27 histos = ["TH1D", "TH1F", "TH2D", "TH2F", "TProfile"]
28 ref = "REFERENCE"
29 test = "TEST"
30 
31 # =============================================================================
32 # Method : rec( o, path=None, lst=None )
33 #
34 # @param o : a ROOT object
35 # @param path : a string like a transient store path; ie '/stat/CaloPIDs/ECALPIDE'
36 # @param lst : a list to hold (path, object) tuples
37 #
38 # function : recursively pull apart a ROOT file, making a list of (path, TObject) tuples
39 # This is done by GetListOfKeys method, which lets one work down through directories
40 # until you hit the Histo at the end of the path. The list of tuples is returned
41 #
42 
43 
44 def rec(o, path=None, lst=None):
45  if not path:
46  path = "/stat"
47  lst = []
48  else:
49  path = path + "/" + o.GetName()
50  lst.append((path, o))
51  if "GetListOfKeys" in dir(o):
52  keys = o.GetListOfKeys()
53  for k in keys:
54  name = k.GetName()
55  rec(o.Get(name), path, lst)
56  else:
57  pass
58  return lst
59 
60 
61 # =============================================================================
62 
63 # =============================================================================
64 # Method : composition( t )
65 #
66 # @param t : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
67 # and d is a dictionary of ROOT objects, with each key = ROOT path
68 #
69 # function : deduce the composition, (objects/histos) counts
70 #
71 
72 
73 def composition(t):
74  typ, d = t
75  hists = 0
76  objs = 0
77  for k in d.keys():
78  if d[k].__class__.__name__ in histos:
79  hists += 1
80  else:
81  objs += 1
82  return objs, hists
83 
84 
85 # =============================================================================
86 
87 # =============================================================================
88 # Method : comparePaths( t1, t2 )
89 #
90 # @param t1, t2 : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
91 # and d is a dictionary of ROOT objects, with each key = ROOT path
92 #
93 # function : compare the paths between the two histo files. If the files are identical, they
94 # should have the same set of paths. The Test file should definitely have the
95 # same paths as the Reference. Perhaps the Reference file will have some more paths due
96 # to extra histos added as part of Application Sequencer finalisation
97 # Arguments t1 and t2 are checked and the test/reference auto-detected
98 #
99 
100 
101 def comparePaths(t1, t2):
102  if t1[0] == ref:
103  ds = t1[1]
104  dp = t2[1]
105  elif t2[0] == ref:
106  ds = t2[1]
107  dp = t1[1]
108  else:
109  print("Neither tuple is Reference Root file reference?")
110  return
111 
112  dsks = ds.keys()
113  dpks = dp.keys()
114  dsks.sort()
115  dpks.sort()
116 
117  sset = set(dsks)
118  pset = set(dpks)
119  os, hs = composition((ref, ds))
120  op, hp = composition((test, dp))
121  print("\n" + "=" * 80)
122  print("Comparison of Paths : Reference vs Test ROOT files")
123  print("-" * 80)
124  print(
125  "Number of paths in Reference file : %i (objects, histos) = ( %i, %i )"
126  % (len(dsks), os, hs)
127  )
128  print(
129  "Number of paths in Test file : %i (objects, histos) = ( %i, %i )"
130  % (len(dpks), op, hp)
131  )
132  matching = sset.intersection(pset)
133  matchingHistos = 0
134  for n in matching:
135  if ds[n].__class__.__name__ in histos:
136  matchingHistos += 1
137  print("\nMatching paths : %i" % (len(matching)))
138  uSer = sset - pset
139  # work out histos unique to test file
140  uniqueReferenceHistos = 0
141  for n in uSer:
142  if ds[n].__class__.__name__ in histos:
143  uniqueReferenceHistos += 1
144  print(
145  "Paths unique to Reference file : %i ( %i Histos )"
146  % (len(uSer), uniqueReferenceHistos)
147  )
148  if uSer:
149  for n in uSer:
150  print("\t%s : \t%s" % (ds[n], n))
151  uPar = pset - sset
152  uniqueTestHistos = 0
153  for n in uPar:
154  if dp[n].__class__.__name__ in histos:
155  uniqueTestHistos += 1
156  print(
157  "Paths unique to Test file : %i ( %i Histos )" % (len(uPar), uniqueTestHistos)
158  )
159  if uPar:
160  for n in uPar:
161  print("\t%s : \t%s" % (dp[n], n))
162  print("Matching Histos to test : %i" % (matchingHistos))
163  print("=" * 80 + "\n")
164  return (
165  ((os, hs), (op, hp)),
166  (uSer, uniqueReferenceHistos),
167  (uPar, uniqueTestHistos),
168  matchingHistos,
169  )
170 
171 
172 # =============================================================================
173 
174 # =============================================================================
175 # Method : bin2binIdentity(h1,h2)
176 #
177 # @param h1, h2 : The two histogtams to compare
178 # function : Return the number of different bins
179 
180 
181 def bin2binIdentity(h1, h2):
182  def getNbins(h):
183  biny = h.GetNbinsY()
184  if biny > 1:
185  biny += 1
186  binz = h.GetNbinsZ()
187  if binz > 1:
188  binz += 1
189  return (h.GetNbinsX() + 1) * (biny) * (binz)
190 
191  nbins = getNbins(h1)
192  diffbins = 0
193  for ibin in range(0, nbins):
194  h1bin = h1.GetBinContent(ibin)
195  h2bin = h2.GetBinContent(ibin)
196  diffbins += h1bin != h2bin
197  return diffbins
198 
199 
200 # =============================================================================
201 # Method : compareHistos( t1, t2 )
202 #
203 # @param t1, t2 : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
204 # and d is a dictionary of ROOT objects, with each key = ROOT path
205 #
206 # function : compare the histograms in Reference/Test ROOT files. First, go through each
207 # dict to collect the histos (ignore TDirectory objects, etc). Then the histos
208 # in the test file (experimental) are compared to their equivalents in the
209 # reference file (definitely correct) using 3 methods.
210 # 1) The entries are checked, they should be equal
211 # 2) If entries are equal, check the Integral(); should be equal
212 # 3) If integrals are equal, check the KolmogorovTest() ; should be 1
213 # 4) If identity flag is there and KS test is performed, perform bin2bin identity test
214 # Arguments t1 and t2 are checked and the test/reference auto-detected
215 #
216 def compareHistos(t1, t2, state, checkBin2BinIdentity):
217 
218  (
219  ((referenceObjects, referenceHistos), (parallObjects, parallHistos)),
220  (uniqueSerPaths, uniqueSerHistos),
221  (uniqueParPaths, uniqueParHistos),
222  mh,
223  ) = state
224 
225  # deduce which one is test, which reference
226  if t1[0] == ref:
227  ds = t1[1]
228  dp = t2[1]
229  elif t2[0] == ref:
230  ds = t2[1]
231  dp = t1[1]
232  else:
233  print("Neither tuple is Reference Root file reference?")
234  return
235 
236  # histocount, objectcount for test/reference
237  hcp = 0
238  pHistos = []
239  hcs = 0
240  sHistos = []
241 
242  omit = [re.compile(regex) for regex in gRegexBlackList]
243 
244  # find the histos in the reference file
245  for k in ds.keys():
246  if not any(regex.search(k) is not None for regex in omit):
247  if ds[k].__class__.__name__ in histos:
248  hcs += 1
249  sHistos.append(k)
250  # same for test
251  for k in dp.keys():
252  if not any(regex.search(k) is not None for regex in omit):
253  if dp[k].__class__.__name__ in histos:
254  hcp += 1
255  pHistos.append(k)
256 
257  cEntries = 0
258  xEntries = 0
259  diffEntries = []
260  xIntegrals = 0
261  diffIntegrals = []
262  passedKol = 0
263  failedKol = 0
264  diffKols = []
265  passedIdentity = 0
266  failedIdentity = 0
267  diffIdentity = []
268  identityDiffBins = {}
269  kTested = 0
270  kTestResults = {}
271  notfound = 0
272  integralMatch = 0
273  otherTest = 0
274  zeroIntegralMatch = 0
275  for h in sHistos:
276  if h in pHistos:
277  # matching histos to check
278  cEntries += 1
279  sh = ds[h]
280  ph = dp[h]
281  # first check entries
282  if sh.GetEntries() != ph.GetEntries():
283  diffEntries.append(h)
284  xEntries += 1
285  continue
286  # check for (non-zero sum of bin error) && (non-zero integrals) for K-Test
287  sBinError = 0.0
288  pBinError = 0.0
289  for i in range(sh.GetNbinsX()):
290  sBinError += sh.GetBinError(i)
291  for i in range(ph.GetNbinsX()):
292  pBinError += ph.GetBinError(i)
293  sint = sh.Integral()
294  pint = ph.Integral()
295  doKS = (bool(sint) and bool(pint)) and (sBinError > 0 and pBinError > 0)
296  if checkBin2BinIdentity and doKS:
297  diffBins = bin2binIdentity(sh, ph)
298  if diffBins == 0:
299  passedIdentity += 1
300  else:
301  failedIdentity += 1
302  diffIdentity.append(h)
303  identityDiffBins[h] = diffBins
304  if (bool(sint) and bool(pint)) and (sBinError > 0 and pBinError > 0):
305  kTested += 1
306  kTest = sh.KolmogorovTest(ph)
307  kTestResults[h] = kTest
308  if int(kTest):
309  passedKol += 1
310  else:
311  # ; print 'KTest result : ', kTest
312  failedKol += 1
313  diffKols.append(h)
314  else:
315  # try the integral test?
316  otherTest += 1
317  if all((sint, pint)) and (sint == pint):
318  integralMatch += 1
319  elif sint == pint:
320  zeroIntegralMatch += 1
321  else:
322  diffIntegrals.append(h)
323  xIntegrals += 1
324  else:
325  notfound += 1
326  print("not found? ", h)
327 
328  # report on Failed Entry-Checks
329  print("\n\n" + "-" * 80)
330  print("Summary of histos with different Entries")
331  print("-" * 80)
332  if diffEntries:
333  diffEntries.sort()
334  for e in diffEntries:
335  print(
336  "\t\t\t%s:\t%i != %i"
337  % (e, int(ds[e].GetEntries()), int(dp[e].GetEntries()))
338  )
339  print("-" * 80)
340 
341  # report on Failed Kolmogorov Tests
342  print("\n\n" + "-" * 60)
343  print("Summary of histos which failed Kolmogorov Test")
344  print("-" * 60)
345  if diffKols:
346  diffKols.sort()
347  for e in diffKols:
348  result = kTestResults[e] # DP Calculated twice ARGH!!
349  print(
350  "%s\t\t%s :\tK-Test Result :\t %5.16f" % (ds[e].ClassName(), e, result)
351  )
352  print("-" * 60)
353 
354  # report on Failed Integral Checks
355  print("\n\n" + "-" * 60)
356  print("Summary of histos which failed Integral Check")
357  print("-" * 60)
358  if diffIntegrals:
359  diffIntegrals.sort()
360  for e in diffIntegrals:
361  diff = dp[e].Integral() - ds[e].Integral()
362  pc = (diff * 100) / ds[e].Integral()
363  print(
364  "%s\t\t%s:\t Diff = %5.6f\tPercent Diff to Reference : %5.6f "
365  % (ds[e].ClassName(), e, diff, pc)
366  )
367  print("-" * 60 + "\n")
368  print("=" * 80 + "\n")
369 
370  # Report on failed bin2bin identity
371  if checkBin2BinIdentity:
372  # report on b2b checks
373  print("\n\n" + "-" * 80)
374  print("Summary of histos with at least one bin with different Entries")
375  print("-" * 80)
376  if diffIdentity:
377  diffIdentity.sort()
378  for e in diffIdentity:
379  print(
380  "%s\t\t%s: %i different bins"
381  % (ds[e].ClassName(), e, identityDiffBins[e])
382  )
383  print("-" * 80)
384 
385  print("\n" + "=" * 80)
386  print("Comparison : Reference/Test ROOT Histo files")
387  print("\n\t\tReference\tTest")
388  print(
389  "\tObjects : %i\t%i\t\t( p-s = %i )"
390  % (referenceObjects, parallObjects, parallObjects - referenceObjects)
391  )
392  print(
393  "\tHistos : %i\t%i\t\t( p-s = %i )"
394  % (referenceHistos, parallHistos, parallHistos - referenceHistos)
395  )
396  print("\t __________")
397  print(
398  "\tTotal : %i\t%i\n"
399  % (referenceHistos + referenceObjects, parallHistos + parallObjects)
400  )
401  print(
402  "Objects/Histos unique to Reference File : %i / %i"
403  % (len(uniqueSerPaths) - uniqueSerHistos, uniqueSerHistos)
404  )
405  print(
406  "Objects/Histos unique to Test File : %i / %i"
407  % (len(uniqueParPaths) - uniqueParHistos, uniqueParHistos)
408  )
409  print("\nMatching Histograms valid for Comparison : %i" % (mh))
410  print("\nOmissions' patterns : ")
411  for entry in gRegexBlackList:
412  print("\t%s" % (entry))
413  print(
414  "\nHistograms for Comparison (after Omissions) : %i"
415  % (mh - len(gRegexBlackList))
416  )
417  print("\n\tHISTOGRAM TESTS : ")
418  print("\t\tKOLMOGOROV TEST : %i" % (kTested))
419  print("\t\tINTEGRAL TEST : %i" % (otherTest))
420  print("\t\tENTRIES TEST : %i" % (xEntries))
421  if checkBin2BinIdentity:
422  print("\t\tBIN2BIN TEST : %i" % (passedIdentity))
423  print("\t\t ____")
424  print("\t\tTested : %i" % (cEntries))
425 
426  print("\n\tDISCREPANCIES : ")
427  print("\t\tK-Test : %i" % (failedKol))
428  print("\t\tIntegrals : %i" % (xIntegrals))
429  print("\t\tEntries : %i" % (xEntries))
430  retval = failedKol + xIntegrals + xEntries + failedIdentity
431  if retval != 0:
432  print("\nThe two sets of histograms were not identical")
433  print("\n" + "=" * 80)
434  return retval
435 
436 
437 # =============================================================================
438 
439 
440 def extractBlacklist(listString):
441  global gRegexBlackList
442  if listString:
443  for blackRegexp in listString.split(","):
444  gRegexBlackList.append(blackRegexp)
445  else:
446  gRegexBlackList = []
447 
448 
449 # =============================================================================
450 
451 if __name__ == "__main__":
452  usage = "usage: %prog testFile.root referenceFile.root [options]"
453  parser = OptionParser()
454  parser.add_option(
455  "-b",
456  dest="blacklist",
457  help='Comma separated list of regexps matching histograms to skip (for example -b"MemoryTool,ProcTime")',
458  )
459 
460  parser.add_option(
461  "-i",
462  action="store_true",
463  dest="bin2bin",
464  default=False,
465  help="Check for bin to bin identity",
466  )
467  (options, args) = parser.parse_args()
468 
469  if len(args) != 2:
470  print("Wrong number of rootfiles. Usage:")
471  print(usage)
472  sys.exit(1)
473 
474  extractBlacklist(options.blacklist)
475 
476  testFile, referenceFile = args
477 
478  tfs = TFile(testFile, "REC")
479  print("opening Test File : %s" % (testFile))
480  tfp = TFile(referenceFile, "REC")
481  print("opening Reference File : %s" % (referenceFile))
482 
483  # get structure of TFiles in a list of (path, object) tuples
484  lref = rec(tfs)
485  ltest = rec(tfp)
486  # make a dictionary of lser and lpar. keys=paths
487  dref = dict([(n, o) for n, o in lref])
488  dtest = dict([(n, o) for n, o in ltest])
489  # make a tuple of (type, dict) where type is either 'reference' or 'test'
490  ts = (ref, dref)
491  tp = (test, dtest)
492 
493  # check objs/histos in each file
494  composition(ts)
495  composition(tp)
496 
497  # compare paths from each file
498  state = comparePaths(ts, tp)
499 
500  # compare histos from each file
501  retval = compareHistos(ts, tp, state, checkBin2BinIdentity=options.bin2bin)
502 
503  # finished with TFiles
504  tfs.Close()
505  tfp.Close()
506 
507  sys.exit(retval)
AlgSequencer.all
all
Definition: AlgSequencer.py:61
compareRootHistos.composition
def composition(t)
Definition: compareRootHistos.py:73
compareRootHistos.compareHistos
def compareHistos(t1, t2, state, checkBin2BinIdentity)
Definition: compareRootHistos.py:216
compareRootHistos.rec
def rec(o, path=None, lst=None)
Definition: compareRootHistos.py:44
compareRootHistos.comparePaths
def comparePaths(t1, t2)
Definition: compareRootHistos.py:101
compareRootHistos.extractBlacklist
def extractBlacklist(listString)
Definition: compareRootHistos.py:440
compareRootHistos.bin2binIdentity
def bin2binIdentity(h1, h2)
Definition: compareRootHistos.py:181
Gaudi::Functional::details::zip::range
decltype(auto) range(Args &&... args)
Zips multiple containers together to form a single range.
Definition: FunctionalDetails.h:102