The Gaudi Framework  v33r0 (d5ea422b)
precedence.py
Go to the documentation of this file.
1 
11 from __future__ import print_function
12 import os
13 import sys
14 import random
15 import string
16 import json
17 import networkx as nx
18 
19 from Gaudi.Configuration import INFO
20 from Configurables import GaudiSequencer, CPUCruncher
21 
22 
23 def _buildFilePath(filePath):
24 
25  if not os.path.exists(filePath):
26  __fullFilePath__ = os.path.realpath(
27  os.path.join(
28  os.environ.get('GAUDIHIVEROOT', ''), "data", filePath))
29  if not os.path.exists(__fullFilePath__):
30  print(
31  "\nERROR: invalid file path '%s'. It must be either absolute, or relative to '$GAUDIHIVEROOT/data/'."
32  % filePath)
33  sys.exit(1)
34  else:
35  __fullFilePath__ = filePath
36 
37  return __fullFilePath__
38 
39 
40 class UniformTimeValue(object):
41  """A class to manage uniform algorithm timing"""
42 
43  def __init__(self, avgRuntime, varRuntime=0):
44 
45  self.avgRuntime = avgRuntime
46  self.varRuntime = varRuntime
47 
48  def get(self, algoName=''):
49  """Get time and its variance (in a tuple) for a given algorithm name"""
50 
51  return self.avgRuntime, self.varRuntime
52 
53 
54 class RealTimeValue(object):
55  """A class to manage real algorithm timing"""
56 
57  def __init__(self, path, defaultTime, factor=1):
58  """
59  defaultTime -- run time, assigned to an algorithm if no time is found in provided timing library
60  (and it will also be scaled by the 'factor' argument)
61  """
62 
63  self.path = os.path.realpath(_buildFilePath(path))
64  self.factor = factor
65  self.defaultTime = defaultTime # typically 0.05s
66  self.varRuntime = 0
67 
68  self.file = open(self.path)
69  self.timings = json.load(self.file)
70 
71  def get(self, algoName=''):
72  """Get time for a given algorithm name"""
73 
74  if algoName in self.timings:
75  time = float(self.timings[algoName])
76  else:
77  capAlgoName = algoName[0].upper() + algoName[1:len(algoName)]
78 
79  if capAlgoName in self.timings:
80  time = float(self.timings[capAlgoName])
81  else:
82  time = self.defaultTime
83  print(
84  "WARNING: TimiNg for %s (or %s) not found in the provided library, using default one: %s"
85  % (algoName, capAlgoName, time))
86 
87  time = time * self.factor
88 
89  return time, self.varRuntime
90 
91 
92 class UniformBooleanValue(object):
93  def __init__(self, value):
94 
95  self.value = value
96 
97  def get(self):
98 
99  return self.value
100 
101 
102 class RndBiasedBooleanValue(object):
103  """Provides randomly ordered set of boolean values with requested proportion of True and False."""
104 
105  def __init__(self, pattern, seed=None):
106  """
107  Keyword arguments:
108  pattern -- either a dictionary describing proportion of True and False (e.g., {True:5,False:15}), or
109  a list/tuple containing a pattern to be used as-is (e.g., [False,True,True,False])
110  seed -- an int, long or other hashable object to initialize random number generator (passed to random.shuffle as-is)
111  """
112 
113  if isinstance(pattern, dict):
114  proportion = pattern
115 
116  length = proportion[True] + proportion[False]
117  if length <= 0:
118  raise "ERROR: Wrong set length requested: %i " % length
119 
120  self.pattern = [False for i in range(proportion[False])
121  ] + [True for i in range(proportion[True])]
122 
123  if seed is not None:
124  random.seed(seed)
125 
126  random.shuffle(self.pattern)
127 
128  elif isinstance(pattern, (list, tuple)):
129  self.pattern = pattern
130  else:
131  raise "ERROR: unknown pattern type"
132 
134 
135  def _create_generator(self, pattern):
136 
137  for b in pattern:
138  yield b
139 
140  def get(self):
141 
142  return next(self.generator)
143 
144  def get_pattern(self):
145 
146  return self.pattern
147 
148 
149 class CruncherSequence(object):
150  """Constructs the sequence tree of CPUCrunchers with provided control flow and data flow precedence rules."""
151 
152  unique_sequencers = []
153  dupl_seqs = {}
154  OR_sequencers = []
155  unique_algos = []
156  dupl_algos = {}
157 
158  unique_data_objects = []
159 
160  def __init__(self,
161  timeValue,
162  IOboolValue,
163  sleepFraction,
164  cfgPath,
165  dfgPath,
166  topSequencer,
167  showStat=False,
168  timeline=False,
169  outputLevel=INFO):
170  """
171  Keyword arguments:
172  timeValue -- timeValue object to set algorithm execution time
173  IOboolValue -- *BooleanValue object to set whether an algorithm has to experience IO-bound execution
174  cfgPath -- relative to $GAUDIHIVEROOT/data path to GRAPHML file with control flow dependencies
175  dfgPath -- relative to $GAUDIHIVEROOT/data path to GRAPHML file with data flow dependencies
176  showStat -- print out statistics on precedence graph
177  """
178 
179  self.timeValue = timeValue
180  self.IOboolValue = IOboolValue
181  self.sleepFraction = sleepFraction
183  self.cfg = nx.read_graphml(_buildFilePath(cfgPath))
184  self.dfg = nx.read_graphml(_buildFilePath(dfgPath))
185 
186  self.enableTimeline = timeline
187 
188  self.outputLevel = outputLevel
189 
190  # Generate control flow part
191  self.sequencer = self._generate_sequence(topSequencer)
192 
193  if showStat:
194  import pprint
195 
196  print("\n===== Statistics on Algorithms =====")
197  print("Total number of algorithm nodes: ", len(self.unique_algos) +
198  sum([self.dupl_algos[i] - 1 for i in self.dupl_algos]))
199  print("Number of unique algorithms: ", len(self.unique_algos))
200  print(" -->", len(self.dupl_algos),
201  "of them being re-used with the following distribution: ",
202  [self.dupl_algos[i] for i in self.dupl_algos])
203  # pprint.pprint(dupl_algos)
204 
205  print("\n===== Statistics on Sequencers =====")
206  print("Total number of sequencers: ", len(self.unique_sequencers) +
207  sum([self.dupl_seqs[i] - 1 for i in self.dupl_seqs]))
208  print("Number of unique sequencers: ", len(self.unique_sequencers))
209  print(" -->", len(self.dupl_seqs),
210  "of them being re-used with the following distribution: ",
211  [self.dupl_seqs[i] for i in self.dupl_seqs])
212  # pprint.pprint(dupl_seqs)
213  print("Number of OR-sequencers: ", len(self.OR_sequencers))
214 
215  print("\n===== Statistics on DataObjects =====")
216  print("Number of unique DataObjects: ",
217  len(self.unique_data_objects))
218  # pprint.pprint(self.unique_data_objects)
219  print()
220 
221  def get(self):
222 
223  return self.sequencer
224 
225  def _declare_data_deps(self, algo_name, algo):
226  """ Declare data inputs and outputs for a given algorithm. """
227 
228  # Declare data inputs
229  for inNode, outNode in self.dfg.in_edges(algo_name):
230  dataName = inNode
231  if dataName not in self.unique_data_objects:
232  self.unique_data_objects.append(dataName)
233 
234  if dataName not in algo.inpKeys:
235  algo.inpKeys.append(dataName)
236 
237  # Declare data outputs
238  for inNode, outNode in self.dfg.out_edges(algo_name):
239  dataName = outNode
240  if dataName not in self.unique_data_objects:
241  self.unique_data_objects.append(dataName)
242 
243  if dataName not in algo.outKeys:
244  algo.outKeys.append(dataName)
245 
246  def _generate_sequence(self, name, seq=None):
247  """ Assemble the tree of sequencers. """
248 
249  if not seq:
250  seq = GaudiSequencer(name, ShortCircuit=False)
251 
252  for n in self.cfg[name]:
253  # extract entity name and type
254  algo_name = n.split('/')[1] if '/' in n else n
255 
256  if 'type' in self.cfg.node[n]:
257  # first rely on explicit type, if given
258  algo_type = self.cfg.node[n].get('type')
259  else:
260  # if the type is not given explicitly, try to extract it from entity name,
261  # and, if unsuccessful, assume it is an algorithm
262  algo_type = n.split('/')[0] if '/' in n else 'Algorithm'
263 
264  if algo_type in ['GaudiSequencer', 'AthSequencer', 'ProcessPhase']:
265  if algo_name in ['RecoITSeq', 'RecoOTSeq', 'RecoTTSeq']:
266  continue
267 
268  if n not in self.unique_sequencers:
269  self.unique_sequencers.append(n)
270  else:
271  if n not in self.dupl_seqs:
272  self.dupl_seqs[n] = 2
273  else:
274  self.dupl_seqs[n] += 1
275 
276  seq_daughter = GaudiSequencer(algo_name, OutputLevel=INFO)
277  if self.cfg.node[n].get('ModeOR') == 'True':
278  self.OR_sequencers.append(n)
279  seq_daughter.ModeOR = True
280  # if self.cfg.node[n].get('Lazy') == 'False':
281  # print "Non-Lazy - ", n
282  seq_daughter.ShortCircuit = False
283  if seq_daughter not in seq.Members:
284  seq.Members += [seq_daughter]
285  # iterate deeper
286  self._generate_sequence(n, seq_daughter)
287  else:
288  #rndname = ''.join(random.choice(string.lowercase) for i in range(5))
289  #if algo_name in unique_algos: algo_name = algo_name + "-" + rndname
290  if n not in self.unique_algos:
291  self.unique_algos.append(n)
292  else:
293  if n not in self.dupl_algos:
294  self.dupl_algos[n] = 2
295  else:
296  self.dupl_algos[n] += 1
297 
298  avgRuntime, varRuntime = self.timeValue.get(algo_name)
299 
300  algo_daughter = CPUCruncher(
301  algo_name,
302  OutputLevel=self.outputLevel,
303  varRuntime=varRuntime,
304  avgRuntime=avgRuntime,
305  SleepFraction=self.sleepFraction
306  if self.IOboolValue.get() else 0.,
307  Timeline=self.enableTimeline)
308 
309  self._declare_data_deps(algo_name, algo_daughter)
310 
311  if algo_daughter not in seq.Members:
312  seq.Members += [algo_daughter]
313 
314  return seq
def __init__(self, path, defaultTime, factor=1)
Definition: precedence.py:57
def _generate_sequence(self, name, seq=None)
Definition: precedence.py:246
def __init__(self, avgRuntime, varRuntime=0)
Definition: precedence.py:43
def get(self, algoName='')
Definition: precedence.py:71
def get(self, algoName='')
Definition: precedence.py:48
def __init__(self, timeValue, IOboolValue, sleepFraction, cfgPath, dfgPath, topSequencer, showStat=False, timeline=False, outputLevel=INFO)
Definition: precedence.py:160
def _declare_data_deps(self, algo_name, algo)
Definition: precedence.py:225
def _buildFilePath(filePath)
Definition: precedence.py:23
decltype(auto) range(Args &&... args)
Zips multiple containers together to form a single range.
def __init__(self, pattern, seed=None)
Definition: precedence.py:105