The Gaudi Framework  v32r0 (3325bb39)
precedence.py
Go to the documentation of this file.
1 import os
2 import sys
3 import random
4 import string
5 import json
6 import networkx as nx
7 
8 from Gaudi.Configuration import INFO
9 from Configurables import GaudiSequencer, CPUCruncher
10 
11 
12 def _buildFilePath(filePath):
13 
14  if not os.path.exists(filePath):
15  __fullFilePath__ = os.path.realpath(
16  os.path.join(
17  os.environ.get('GAUDIHIVEROOT', ''), "data", filePath))
18  if not os.path.exists(__fullFilePath__):
19  print "\nERROR: invalid file path '%s'. It must be either absolute, or relative to '$GAUDIHIVEROOT/data/'." % filePath
20  sys.exit(1)
21  else:
22  __fullFilePath__ = filePath
23 
24  return __fullFilePath__
25 
26 
27 class UniformTimeValue(object):
28  """A class to manage uniform algorithm timing"""
29 
30  def __init__(self, avgRuntime, varRuntime=0):
31 
32  self.avgRuntime = avgRuntime
33  self.varRuntime = varRuntime
34 
35  def get(self, algoName=''):
36  """Get time and its variance (in a tuple) for a given algorithm name"""
37 
38  return self.avgRuntime, self.varRuntime
39 
40 
41 class RealTimeValue(object):
42  """A class to manage real algorithm timing"""
43 
44  def __init__(self, path, defaultTime, factor=1):
45  """
46  defaultTime -- run time, assigned to an algorithm if no time is found in provided timing library
47  (and it will also be scaled by the 'factor' argument)
48  """
49 
50  self.path = os.path.realpath(_buildFilePath(path))
51  self.factor = factor
52  self.defaultTime = defaultTime # typically 0.05s
53  self.varRuntime = 0
54 
55  self.file = open(self.path)
56  self.timings = json.load(self.file)
57 
58  def get(self, algoName=''):
59  """Get time for a given algorithm name"""
60 
61  if algoName in self.timings:
62  time = float(self.timings[algoName])
63  else:
64  capAlgoName = algoName[0].upper() + algoName[1:len(algoName)]
65 
66  if capAlgoName in self.timings:
67  time = float(self.timings[capAlgoName])
68  else:
69  time = self.defaultTime
70  print "WARNING: TimiNg for %s (or %s) not found in the provided library, using default one: %s" % (
71  algoName, capAlgoName, time)
72 
73  time = time * self.factor
74 
75  return time, self.varRuntime
76 
77 
78 class UniformBooleanValue(object):
79  def __init__(self, value):
80 
81  self.value = value
82 
83  def get(self):
84 
85  return self.value
86 
87 
88 class RndBiasedBooleanValue(object):
89  """Provides randomly ordered set of boolean values with requested proportion of True and False."""
90 
91  def __init__(self, pattern, seed=None):
92  """
93  Keyword arguments:
94  pattern -- either a dictionary describing proportion of True and False (e.g., {True:5,False:15}), or
95  a list/tuple containing a pattern to be used as-is (e.g., [False,True,True,False])
96  seed -- an int, long or other hashable object to initialize random number generator (passed to random.shuffle as-is)
97  """
98 
99  if isinstance(pattern, dict):
100  proportion = pattern
101 
102  length = proportion[True] + proportion[False]
103  if length <= 0:
104  raise "ERROR: Wrong set length requested: %i " % length
105 
106  self.pattern = [False for i in range(proportion[False])
107  ] + [True for i in range(proportion[True])]
108 
109  if seed is not None:
110  random.seed(seed)
111 
112  random.shuffle(self.pattern)
113 
114  elif isinstance(pattern, (list, tuple)):
115  self.pattern = pattern
116  else:
117  raise "ERROR: unknown pattern type"
118 
120 
121  def _create_generator(self, pattern):
122 
123  for b in pattern:
124  yield b
125 
126  def get(self):
127 
128  return next(self.generator)
129 
130  def get_pattern(self):
131 
132  return self.pattern
133 
134 
135 class CruncherSequence(object):
136  """Constructs the sequence tree of CPUCrunchers with provided control flow and data flow precedence rules."""
137 
138  unique_sequencers = []
139  dupl_seqs = {}
140  OR_sequencers = []
141  unique_algos = []
142  dupl_algos = {}
143 
144  unique_data_objects = []
145 
146  def __init__(self,
147  timeValue,
148  IOboolValue,
149  sleepFraction,
150  cfgPath,
151  dfgPath,
152  topSequencer,
153  showStat=False,
154  timeline=False,
155  outputLevel=INFO):
156  """
157  Keyword arguments:
158  timeValue -- timeValue object to set algorithm execution time
159  IOboolValue -- *BooleanValue object to set whether an algorithm has to experience IO-bound execution
160  cfgPath -- relative to $GAUDIHIVEROOT/data path to GRAPHML file with control flow dependencies
161  dfgPath -- relative to $GAUDIHIVEROOT/data path to GRAPHML file with data flow dependencies
162  showStat -- print out statistics on precedence graph
163  """
164 
165  self.timeValue = timeValue
166  self.IOboolValue = IOboolValue
167  self.sleepFraction = sleepFraction
168 
169  self.cfg = nx.read_graphml(_buildFilePath(cfgPath))
170  self.dfg = nx.read_graphml(_buildFilePath(dfgPath))
171 
172  self.enableTimeline = timeline
173 
174  self.outputLevel = outputLevel
175 
176  # Generate control flow part
177  self.sequencer = self._generate_sequence(topSequencer)
178 
179  if showStat:
180  import pprint
181 
182  print "\n===== Statistics on Algorithms ====="
183  print "Total number of algorithm nodes: ", len(
184  self.unique_algos) + sum(
185  [self.dupl_algos[i] - 1 for i in self.dupl_algos])
186  print "Number of unique algorithms: ", len(self.unique_algos)
187  print " -->", len(
188  self.dupl_algos
189  ), "of them being re-used with the following distribution: ", [
190  self.dupl_algos[i] for i in self.dupl_algos
191  ]
192  # pprint.pprint(dupl_algos)
193 
194  print "\n===== Statistics on Sequencers ====="
195  print "Total number of sequencers: ", len(
196  self.unique_sequencers) + sum(
197  [self.dupl_seqs[i] - 1 for i in self.dupl_seqs])
198  print "Number of unique sequencers: ", len(self.unique_sequencers)
199  print " -->", len(
200  self.dupl_seqs
201  ), "of them being re-used with the following distribution: ", [
202  self.dupl_seqs[i] for i in self.dupl_seqs
203  ]
204  # pprint.pprint(dupl_seqs)
205  print "Number of OR-sequencers: ", len(self.OR_sequencers)
206 
207  print "\n===== Statistics on DataObjects ====="
208  print "Number of unique DataObjects: ", len(
209  self.unique_data_objects)
210  # pprint.pprint(self.unique_data_objects)
211  print
212 
213  def get(self):
214 
215  return self.sequencer
216 
217  def _declare_data_deps(self, algo_name, algo):
218  """ Declare data inputs and outputs for a given algorithm. """
219 
220  # Declare data inputs
221  for inNode, outNode in self.dfg.in_edges(algo_name):
222  dataName = inNode
223  if dataName not in self.unique_data_objects:
224  self.unique_data_objects.append(dataName)
225 
226  if dataName not in algo.inpKeys:
227  algo.inpKeys.append(dataName)
228 
229  # Declare data outputs
230  for inNode, outNode in self.dfg.out_edges(algo_name):
231  dataName = outNode
232  if dataName not in self.unique_data_objects:
233  self.unique_data_objects.append(dataName)
234 
235  if dataName not in algo.outKeys:
236  algo.outKeys.append(dataName)
237 
238  def _generate_sequence(self, name, seq=None):
239  """ Assemble the tree of sequencers. """
240 
241  if not seq:
242  seq = GaudiSequencer(name, ShortCircuit=False)
243 
244  for n in self.cfg[name]:
245  # extract entity name and type
246  algo_name = n.split('/')[1] if '/' in n else n
247 
248  if self.cfg.node[n].has_key('type'):
249  # first rely on explicit type, if given
250  algo_type = self.cfg.node[n].get('type')
251  else:
252  # if the type is not given explicitly, try to extract it from entity name,
253  # and, if unsuccessful, assume it is an algorithm
254  algo_type = n.split('/')[0] if '/' in n else 'Algorithm'
255 
256  if algo_type in ['GaudiSequencer', 'AthSequencer', 'ProcessPhase']:
257  if algo_name in ['RecoITSeq', 'RecoOTSeq', 'RecoTTSeq']:
258  continue
259 
260  if n not in self.unique_sequencers:
261  self.unique_sequencers.append(n)
262  else:
263  if n not in self.dupl_seqs:
264  self.dupl_seqs[n] = 2
265  else:
266  self.dupl_seqs[n] += 1
267 
268  seq_daughter = GaudiSequencer(algo_name, OutputLevel=INFO)
269  if self.cfg.node[n].get('ModeOR') == 'True':
270  self.OR_sequencers.append(n)
271  seq_daughter.ModeOR = True
272  # if self.cfg.node[n].get('Lazy') == 'False':
273  # print "Non-Lazy - ", n
274  seq_daughter.ShortCircuit = False
275  if seq_daughter not in seq.Members:
276  seq.Members += [seq_daughter]
277  # iterate deeper
278  self._generate_sequence(n, seq_daughter)
279  else:
280  #rndname = ''.join(random.choice(string.lowercase) for i in range(5))
281  #if algo_name in unique_algos: algo_name = algo_name + "-" + rndname
282  if n not in self.unique_algos:
283  self.unique_algos.append(n)
284  else:
285  if n not in self.dupl_algos:
286  self.dupl_algos[n] = 2
287  else:
288  self.dupl_algos[n] += 1
289 
290  avgRuntime, varRuntime = self.timeValue.get(algo_name)
291 
292  algo_daughter = CPUCruncher(
293  algo_name,
294  OutputLevel=self.outputLevel,
295  varRuntime=varRuntime,
296  avgRuntime=avgRuntime,
297  SleepFraction=self.sleepFraction
298  if self.IOboolValue.get() else 0.,
299  Timeline=self.enableTimeline)
300 
301  self._declare_data_deps(algo_name, algo_daughter)
302 
303  if algo_daughter not in seq.Members:
304  seq.Members += [algo_daughter]
305 
306  return seq
def __init__(self, path, defaultTime, factor=1)
Definition: precedence.py:44
def _generate_sequence(self, name, seq=None)
Definition: precedence.py:238
def __init__(self, avgRuntime, varRuntime=0)
Definition: precedence.py:30
def get(self, algoName='')
Definition: precedence.py:58
def get(self, algoName='')
Definition: precedence.py:35
def __init__(self, timeValue, IOboolValue, sleepFraction, cfgPath, dfgPath, topSequencer, showStat=False, timeline=False, outputLevel=INFO)
Definition: precedence.py:155
decltype(auto) range(Args &&...args)
Zips multiple containers together to form a single range.
def _declare_data_deps(self, algo_name, algo)
Definition: precedence.py:217
def _buildFilePath(filePath)
Definition: precedence.py:12
def __init__(self, pattern, seed=None)
Definition: precedence.py:91