The Gaudi Framework  master (37c0b60a)
compareRootHistos.py
Go to the documentation of this file.
1 #! /usr/bin/env python3
2 
12 
13 import re
14 import sys
15 from optparse import OptionParser
16 
17 # Well known trick
18 backupArgv = sys.argv[:]
19 sys.argv = []
20 from ROOT import TFile
21 
22 sys.argv = backupArgv
23 
24 # The list of histograms not to check. Expressed as regexps
25 gRegexBlackList = []
26 histos = ["TH1D", "TH1F", "TH2D", "TH2F", "TProfile"]
27 ref = "REFERENCE"
28 test = "TEST"
29 
30 # =============================================================================
31 # Method : rec( o, path=None, lst=None )
32 #
33 # @param o : a ROOT object
34 # @param path : a string like a transient store path; ie '/stat/CaloPIDs/ECALPIDE'
35 # @param lst : a list to hold (path, object) tuples
36 #
37 # function : recursively pull apart a ROOT file, making a list of (path, TObject) tuples
38 # This is done by GetListOfKeys method, which lets one work down through directories
39 # until you hit the Histo at the end of the path. The list of tuples is returned
40 #
41 
42 
43 def rec(o, path=None, lst=None):
44  if not path:
45  path = "/stat"
46  lst = []
47  else:
48  path = path + "/" + o.GetName()
49  lst.append((path, o))
50  if "GetListOfKeys" in dir(o):
51  keys = o.GetListOfKeys()
52  for k in keys:
53  name = k.GetName()
54  rec(o.Get(name), path, lst)
55  else:
56  pass
57  return lst
58 
59 
60 # =============================================================================
61 
62 # =============================================================================
63 # Method : composition( t )
64 #
65 # @param t : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
66 # and d is a dictionary of ROOT objects, with each key = ROOT path
67 #
68 # function : deduce the composition, (objects/histos) counts
69 #
70 
71 
72 def composition(t):
73  typ, d = t
74  hists = 0
75  objs = 0
76  for k in d.keys():
77  if d[k].__class__.__name__ in histos:
78  hists += 1
79  else:
80  objs += 1
81  return objs, hists
82 
83 
84 # =============================================================================
85 
86 # =============================================================================
87 # Method : comparePaths( t1, t2 )
88 #
89 # @param t1, t2 : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
90 # and d is a dictionary of ROOT objects, with each key = ROOT path
91 #
92 # function : compare the paths between the two histo files. If the files are identical, they
93 # should have the same set of paths. The Test file should definitely have the
94 # same paths as the Reference. Perhaps the Reference file will have some more paths due
95 # to extra histos added as part of Application Sequencer finalisation
96 # Arguments t1 and t2 are checked and the test/reference auto-detected
97 #
98 
99 
100 def comparePaths(t1, t2):
101  if t1[0] == ref:
102  ds = t1[1]
103  dp = t2[1]
104  elif t2[0] == ref:
105  ds = t2[1]
106  dp = t1[1]
107  else:
108  print("Neither tuple is Reference Root file reference?")
109  return
110 
111  dsks = ds.keys()
112  dpks = dp.keys()
113  dsks.sort()
114  dpks.sort()
115 
116  sset = set(dsks)
117  pset = set(dpks)
118  os, hs = composition((ref, ds))
119  op, hp = composition((test, dp))
120  print("\n" + "=" * 80)
121  print("Comparison of Paths : Reference vs Test ROOT files")
122  print("-" * 80)
123  print(
124  "Number of paths in Reference file : %i (objects, histos) = ( %i, %i )"
125  % (len(dsks), os, hs)
126  )
127  print(
128  "Number of paths in Test file : %i (objects, histos) = ( %i, %i )"
129  % (len(dpks), op, hp)
130  )
131  matching = sset.intersection(pset)
132  matchingHistos = 0
133  for n in matching:
134  if ds[n].__class__.__name__ in histos:
135  matchingHistos += 1
136  print("\nMatching paths : %i" % (len(matching)))
137  uSer = sset - pset
138  # work out histos unique to test file
139  uniqueReferenceHistos = 0
140  for n in uSer:
141  if ds[n].__class__.__name__ in histos:
142  uniqueReferenceHistos += 1
143  print(
144  "Paths unique to Reference file : %i ( %i Histos )"
145  % (len(uSer), uniqueReferenceHistos)
146  )
147  if uSer:
148  for n in uSer:
149  print("\t%s : \t%s" % (ds[n], n))
150  uPar = pset - sset
151  uniqueTestHistos = 0
152  for n in uPar:
153  if dp[n].__class__.__name__ in histos:
154  uniqueTestHistos += 1
155  print(
156  "Paths unique to Test file : %i ( %i Histos )" % (len(uPar), uniqueTestHistos)
157  )
158  if uPar:
159  for n in uPar:
160  print("\t%s : \t%s" % (dp[n], n))
161  print("Matching Histos to test : %i" % (matchingHistos))
162  print("=" * 80 + "\n")
163  return (
164  ((os, hs), (op, hp)),
165  (uSer, uniqueReferenceHistos),
166  (uPar, uniqueTestHistos),
167  matchingHistos,
168  )
169 
170 
171 # =============================================================================
172 
173 # =============================================================================
174 # Method : bin2binIdentity(h1,h2)
175 #
176 # @param h1, h2 : The two histogtams to compare
177 # function : Return the number of different bins
178 
179 
180 def bin2binIdentity(h1, h2):
181  def getNbins(h):
182  biny = h.GetNbinsY()
183  if biny > 1:
184  biny += 1
185  binz = h.GetNbinsZ()
186  if binz > 1:
187  binz += 1
188  return (h.GetNbinsX() + 1) * (biny) * (binz)
189 
190  nbins = getNbins(h1)
191  diffbins = 0
192  for ibin in range(0, nbins):
193  h1bin = h1.GetBinContent(ibin)
194  h2bin = h2.GetBinContent(ibin)
195  diffbins += h1bin != h2bin
196  return diffbins
197 
198 
199 # =============================================================================
200 # Method : compareHistos( t1, t2 )
201 #
202 # @param t1, t2 : a tuple of ( type, d ) where type is either 'REFERENCE' or 'TEST'
203 # and d is a dictionary of ROOT objects, with each key = ROOT path
204 #
205 # function : compare the histograms in Reference/Test ROOT files. First, go through each
206 # dict to collect the histos (ignore TDirectory objects, etc). Then the histos
207 # in the test file (experimental) are compared to their equivalents in the
208 # reference file (definitely correct) using 3 methods.
209 # 1) The entries are checked, they should be equal
210 # 2) If entries are equal, check the Integral(); should be equal
211 # 3) If integrals are equal, check the KolmogorovTest() ; should be 1
212 # 4) If identity flag is there and KS test is performed, perform bin2bin identity test
213 # Arguments t1 and t2 are checked and the test/reference auto-detected
214 #
215 def compareHistos(t1, t2, state, checkBin2BinIdentity):
216  (
217  ((referenceObjects, referenceHistos), (parallObjects, parallHistos)),
218  (uniqueSerPaths, uniqueSerHistos),
219  (uniqueParPaths, uniqueParHistos),
220  mh,
221  ) = state
222 
223  # deduce which one is test, which reference
224  if t1[0] == ref:
225  ds = t1[1]
226  dp = t2[1]
227  elif t2[0] == ref:
228  ds = t2[1]
229  dp = t1[1]
230  else:
231  print("Neither tuple is Reference Root file reference?")
232  return
233 
234  # histocount, objectcount for test/reference
235  hcp = 0
236  pHistos = []
237  hcs = 0
238  sHistos = []
239 
240  omit = [re.compile(regex) for regex in gRegexBlackList]
241 
242  # find the histos in the reference file
243  for k in ds.keys():
244  if not any(regex.search(k) is not None for regex in omit):
245  if ds[k].__class__.__name__ in histos:
246  hcs += 1
247  sHistos.append(k)
248  # same for test
249  for k in dp.keys():
250  if not any(regex.search(k) is not None for regex in omit):
251  if dp[k].__class__.__name__ in histos:
252  hcp += 1
253  pHistos.append(k)
254 
255  cEntries = 0
256  xEntries = 0
257  diffEntries = []
258  xIntegrals = 0
259  diffIntegrals = []
260  passedKol = 0
261  failedKol = 0
262  diffKols = []
263  passedIdentity = 0
264  failedIdentity = 0
265  diffIdentity = []
266  identityDiffBins = {}
267  kTested = 0
268  kTestResults = {}
269  notfound = 0
270  integralMatch = 0
271  otherTest = 0
272  zeroIntegralMatch = 0
273  for h in sHistos:
274  if h in pHistos:
275  # matching histos to check
276  cEntries += 1
277  sh = ds[h]
278  ph = dp[h]
279  # first check entries
280  if sh.GetEntries() != ph.GetEntries():
281  diffEntries.append(h)
282  xEntries += 1
283  continue
284  # check for (non-zero sum of bin error) && (non-zero integrals) for K-Test
285  sBinError = 0.0
286  pBinError = 0.0
287  for i in range(sh.GetNbinsX()):
288  sBinError += sh.GetBinError(i)
289  for i in range(ph.GetNbinsX()):
290  pBinError += ph.GetBinError(i)
291  sint = sh.Integral()
292  pint = ph.Integral()
293  doKS = (bool(sint) and bool(pint)) and (sBinError > 0 and pBinError > 0)
294  if checkBin2BinIdentity and doKS:
295  diffBins = bin2binIdentity(sh, ph)
296  if diffBins == 0:
297  passedIdentity += 1
298  else:
299  failedIdentity += 1
300  diffIdentity.append(h)
301  identityDiffBins[h] = diffBins
302  if (bool(sint) and bool(pint)) and (sBinError > 0 and pBinError > 0):
303  kTested += 1
304  kTest = sh.KolmogorovTest(ph)
305  kTestResults[h] = kTest
306  if int(kTest):
307  passedKol += 1
308  else:
309  # ; print 'KTest result : ', kTest
310  failedKol += 1
311  diffKols.append(h)
312  else:
313  # try the integral test?
314  otherTest += 1
315  if all((sint, pint)) and (sint == pint):
316  integralMatch += 1
317  elif sint == pint:
318  zeroIntegralMatch += 1
319  else:
320  diffIntegrals.append(h)
321  xIntegrals += 1
322  else:
323  notfound += 1
324  print("not found? ", h)
325 
326  # report on Failed Entry-Checks
327  print("\n\n" + "-" * 80)
328  print("Summary of histos with different Entries")
329  print("-" * 80)
330  if diffEntries:
331  diffEntries.sort()
332  for e in diffEntries:
333  print(
334  "\t\t\t%s:\t%i != %i"
335  % (e, int(ds[e].GetEntries()), int(dp[e].GetEntries()))
336  )
337  print("-" * 80)
338 
339  # report on Failed Kolmogorov Tests
340  print("\n\n" + "-" * 60)
341  print("Summary of histos which failed Kolmogorov Test")
342  print("-" * 60)
343  if diffKols:
344  diffKols.sort()
345  for e in diffKols:
346  result = kTestResults[e] # DP Calculated twice ARGH!!
347  print(
348  "%s\t\t%s :\tK-Test Result :\t %5.16f" % (ds[e].ClassName(), e, result)
349  )
350  print("-" * 60)
351 
352  # report on Failed Integral Checks
353  print("\n\n" + "-" * 60)
354  print("Summary of histos which failed Integral Check")
355  print("-" * 60)
356  if diffIntegrals:
357  diffIntegrals.sort()
358  for e in diffIntegrals:
359  diff = dp[e].Integral() - ds[e].Integral()
360  pc = (diff * 100) / ds[e].Integral()
361  print(
362  "%s\t\t%s:\t Diff = %5.6f\tPercent Diff to Reference : %5.6f "
363  % (ds[e].ClassName(), e, diff, pc)
364  )
365  print("-" * 60 + "\n")
366  print("=" * 80 + "\n")
367 
368  # Report on failed bin2bin identity
369  if checkBin2BinIdentity:
370  # report on b2b checks
371  print("\n\n" + "-" * 80)
372  print("Summary of histos with at least one bin with different Entries")
373  print("-" * 80)
374  if diffIdentity:
375  diffIdentity.sort()
376  for e in diffIdentity:
377  print(
378  "%s\t\t%s: %i different bins"
379  % (ds[e].ClassName(), e, identityDiffBins[e])
380  )
381  print("-" * 80)
382 
383  print("\n" + "=" * 80)
384  print("Comparison : Reference/Test ROOT Histo files")
385  print("\n\t\tReference\tTest")
386  print(
387  "\tObjects : %i\t%i\t\t( p-s = %i )"
388  % (referenceObjects, parallObjects, parallObjects - referenceObjects)
389  )
390  print(
391  "\tHistos : %i\t%i\t\t( p-s = %i )"
392  % (referenceHistos, parallHistos, parallHistos - referenceHistos)
393  )
394  print("\t __________")
395  print(
396  "\tTotal : %i\t%i\n"
397  % (referenceHistos + referenceObjects, parallHistos + parallObjects)
398  )
399  print(
400  "Objects/Histos unique to Reference File : %i / %i"
401  % (len(uniqueSerPaths) - uniqueSerHistos, uniqueSerHistos)
402  )
403  print(
404  "Objects/Histos unique to Test File : %i / %i"
405  % (len(uniqueParPaths) - uniqueParHistos, uniqueParHistos)
406  )
407  print("\nMatching Histograms valid for Comparison : %i" % (mh))
408  print("\nOmissions' patterns : ")
409  for entry in gRegexBlackList:
410  print("\t%s" % (entry))
411  print(
412  "\nHistograms for Comparison (after Omissions) : %i"
413  % (mh - len(gRegexBlackList))
414  )
415  print("\n\tHISTOGRAM TESTS : ")
416  print("\t\tKOLMOGOROV TEST : %i" % (kTested))
417  print("\t\tINTEGRAL TEST : %i" % (otherTest))
418  print("\t\tENTRIES TEST : %i" % (xEntries))
419  if checkBin2BinIdentity:
420  print("\t\tBIN2BIN TEST : %i" % (passedIdentity))
421  print("\t\t ____")
422  print("\t\tTested : %i" % (cEntries))
423 
424  print("\n\tDISCREPANCIES : ")
425  print("\t\tK-Test : %i" % (failedKol))
426  print("\t\tIntegrals : %i" % (xIntegrals))
427  print("\t\tEntries : %i" % (xEntries))
428  retval = failedKol + xIntegrals + xEntries + failedIdentity
429  if retval != 0:
430  print("\nThe two sets of histograms were not identical")
431  print("\n" + "=" * 80)
432  return retval
433 
434 
435 # =============================================================================
436 
437 
438 def extractBlacklist(listString):
439  global gRegexBlackList
440  if listString:
441  for blackRegexp in listString.split(","):
442  gRegexBlackList.append(blackRegexp)
443  else:
444  gRegexBlackList = []
445 
446 
447 # =============================================================================
448 
449 if __name__ == "__main__":
450  usage = "usage: %prog testFile.root referenceFile.root [options]"
451  parser = OptionParser()
452  parser.add_option(
453  "-b",
454  dest="blacklist",
455  help='Comma separated list of regexps matching histograms to skip (for example -b"MemoryTool,ProcTime")',
456  )
457 
458  parser.add_option(
459  "-i",
460  action="store_true",
461  dest="bin2bin",
462  default=False,
463  help="Check for bin to bin identity",
464  )
465  (options, args) = parser.parse_args()
466 
467  if len(args) != 2:
468  print("Wrong number of rootfiles. Usage:")
469  print(usage)
470  sys.exit(1)
471 
472  extractBlacklist(options.blacklist)
473 
474  testFile, referenceFile = args
475 
476  tfs = TFile(testFile, "REC")
477  print("opening Test File : %s" % (testFile))
478  tfp = TFile(referenceFile, "REC")
479  print("opening Reference File : %s" % (referenceFile))
480 
481  # get structure of TFiles in a list of (path, object) tuples
482  lref = rec(tfs)
483  ltest = rec(tfp)
484  # make a dictionary of lser and lpar. keys=paths
485  dref = dict([(n, o) for n, o in lref])
486  dtest = dict([(n, o) for n, o in ltest])
487  # make a tuple of (type, dict) where type is either 'reference' or 'test'
488  ts = (ref, dref)
489  tp = (test, dtest)
490 
491  # check objs/histos in each file
492  composition(ts)
493  composition(tp)
494 
495  # compare paths from each file
496  state = comparePaths(ts, tp)
497 
498  # compare histos from each file
499  retval = compareHistos(ts, tp, state, checkBin2BinIdentity=options.bin2bin)
500 
501  # finished with TFiles
502  tfs.Close()
503  tfp.Close()
504 
505  sys.exit(retval)
compareRootHistos.composition
def composition(t)
Definition: compareRootHistos.py:72
compareRootHistos.compareHistos
def compareHistos(t1, t2, state, checkBin2BinIdentity)
Definition: compareRootHistos.py:215
compareRootHistos.rec
def rec(o, path=None, lst=None)
Definition: compareRootHistos.py:43
GaudiPartProp.decorators.all
all
decorate service
Definition: decorators.py:53
compareRootHistos.comparePaths
def comparePaths(t1, t2)
Definition: compareRootHistos.py:100
compareRootHistos.extractBlacklist
def extractBlacklist(listString)
Definition: compareRootHistos.py:438
compareRootHistos.bin2binIdentity
def bin2binIdentity(h1, h2)
Definition: compareRootHistos.py:180
Gaudi::Functional::details::zip::range
decltype(auto) range(Args &&... args)
Zips multiple containers together to form a single range.
Definition: details.h:97