The Gaudi Framework  v33r2 (a6f0ec87)
precedence.py
Go to the documentation of this file.
1 
11 from __future__ import print_function
12 import os
13 import sys
14 import random
15 import string
16 import json
17 import networkx as nx
18 
19 from Gaudi.Configuration import INFO
20 from Configurables import GaudiSequencer, CPUCruncher
21 
22 
23 def _buildFilePath(filePath):
24 
25  if not os.path.exists(filePath):
26  __fullFilePath__ = os.path.realpath(
27  os.path.join(
28  os.environ.get('GAUDIHIVEROOT', ''), "data", filePath))
29  if not os.path.exists(__fullFilePath__):
30  print(
31  "\nERROR: invalid file path '%s'. It must be either absolute, or relative to '$GAUDIHIVEROOT/data/'."
32  % filePath)
33  sys.exit(1)
34  else:
35  __fullFilePath__ = filePath
36 
37  return __fullFilePath__
38 
39 
40 class UniformTimeValue(object):
41  """A class to manage uniform algorithm timing"""
42 
43  def __init__(self, avgRuntime, varRuntime=0):
44 
45  self.avgRuntime = avgRuntime
46  self.varRuntime = varRuntime
47 
48  def get(self, algoName=''):
49  """Get time and its variance (in a tuple) for a given algorithm name"""
50 
51  return self.avgRuntime, self.varRuntime
52 
53 
54 class RealTimeValue(object):
55  """A class to manage real algorithm timing"""
56 
57  def __init__(self, path, defaultTime, factor=1):
58  """
59  defaultTime -- run time, assigned to an algorithm if no time is found in provided timing library
60  (and it will also be scaled by the 'factor' argument)
61  """
62 
63  self.path = os.path.realpath(_buildFilePath(path))
64  self.factor = factor
65  self.defaultTime = defaultTime # typically 0.05s
66  self.varRuntime = 0
67 
68  self.file = open(self.path)
69  self.timings = json.load(self.file)
70 
71  def get(self, algoName=''):
72  """Get time for a given algorithm name"""
73 
74  if algoName in self.timings:
75  time = float(self.timings[algoName])
76  else:
77  capAlgoName = algoName[0].upper() + algoName[1:len(algoName)]
78 
79  if capAlgoName in self.timings:
80  time = float(self.timings[capAlgoName])
81  else:
82  time = self.defaultTime
83  print(
84  "WARNING: TimiNg for %s (or %s) not found in the provided library, using default one: %s"
85  % (algoName, capAlgoName, time))
86 
87  time = time * self.factor
88 
89  return time, self.varRuntime
90 
91 
92 class UniformBooleanValue(object):
93  def __init__(self, value):
94 
95  self.value = value
96 
97  def get(self):
98 
99  return self.value
100 
101 
102 class RndBiasedBooleanValue(object):
103  """Provides randomly ordered set of boolean values with requested proportion of True and False."""
104 
105  def __init__(self, pattern, seed=None):
106  """
107  Keyword arguments:
108  pattern -- either a dictionary describing proportion of True and False (e.g., {True:5,False:15}), or
109  a list/tuple containing a pattern to be used as-is (e.g., [False,True,True,False])
110  seed -- an int, long or other hashable object to initialize random number generator (passed to random.shuffle as-is)
111  """
112 
113  if isinstance(pattern, dict):
114  proportion = pattern
115 
116  length = proportion[True] + proportion[False]
117  if length <= 0:
118  raise "ERROR: Wrong set length requested: %i " % length
119 
120  self.pattern = [False for i in range(proportion[False])
121  ] + [True for i in range(proportion[True])]
122 
123  if seed is not None:
124  random.seed(seed)
125 
126  random.shuffle(self.pattern)
127 
128  elif isinstance(pattern, (list, tuple)):
129  self.pattern = pattern
130  else:
131  raise "ERROR: unknown pattern type"
132 
134 
135  def _create_generator(self, pattern):
136 
137  for b in pattern:
138  yield b
139 
140  def get(self):
141 
142  return next(self.generator)
143 
144  def get_pattern(self):
145 
146  return self.pattern
147 
148 
149 class CruncherSequence(object):
150  """Constructs the sequence tree of CPUCrunchers with provided control flow and data flow precedence rules."""
151 
152  unique_sequencers = []
153  dupl_seqs = {}
154  OR_sequencers = []
155  unique_algos = []
156  dupl_algos = {}
157 
158  unique_data_objects = []
159 
160  def __init__(self,
161  timeValue,
162  BlockingBoolValue,
163  sleepFraction,
164  cfgPath,
165  dfgPath,
166  topSequencer,
167  showStat=False,
168  timeline=False,
169  outputLevel=INFO):
170  """
171  Keyword arguments:
172  timeValue -- timeValue object to set algorithm execution time
173  BlockingBoolValue -- *BooleanValue object to set whether an algorithm has to experience CPU-blocking execution
174  cfgPath -- relative to $GAUDIHIVEROOT/data path to GRAPHML file with control flow dependencies
175  dfgPath -- relative to $GAUDIHIVEROOT/data path to GRAPHML file with data flow dependencies
176  showStat -- print out statistics on precedence graph
177  """
178 
179  self.timeValue = timeValue
180  self.BlockingBoolValue = BlockingBoolValue
181  self.sleepFraction = sleepFraction
183  self.cfg = nx.read_graphml(_buildFilePath(cfgPath))
184  self.dfg = nx.read_graphml(_buildFilePath(dfgPath))
185 
186  self.enableTimeline = timeline
187 
188  self.outputLevel = outputLevel
189 
190  # Generate control flow part
191  self.sequencer = self._generate_sequence(topSequencer)
192 
193  if showStat:
194  import pprint
195 
196  print("\n===== Statistics on Algorithms =====")
197  print(
198  "Total number of algorithm nodes: ",
199  len(self.unique_algos) + sum(
200  [self.dupl_algos[i] - 1 for i in self.dupl_algos]))
201  print("Number of unique algorithms: ", len(self.unique_algos))
202  print(" -->", len(self.dupl_algos),
203  "of them being re-used with the following distribution: ",
204  [self.dupl_algos[i] for i in self.dupl_algos])
205  # pprint.pprint(dupl_algos)
206 
207  print("\n===== Statistics on Sequencers =====")
208  print(
209  "Total number of sequencers: ",
210  len(self.unique_sequencers) + sum(
211  [self.dupl_seqs[i] - 1 for i in self.dupl_seqs]))
212  print("Number of unique sequencers: ", len(self.unique_sequencers))
213  print(" -->", len(self.dupl_seqs),
214  "of them being re-used with the following distribution: ",
215  [self.dupl_seqs[i] for i in self.dupl_seqs])
216  # pprint.pprint(dupl_seqs)
217  print("Number of OR-sequencers: ", len(self.OR_sequencers))
218 
219  print("\n===== Statistics on DataObjects =====")
220  print("Number of unique DataObjects: ",
221  len(self.unique_data_objects))
222  # pprint.pprint(self.unique_data_objects)
223  print()
224 
225  def get(self):
226 
227  return self.sequencer
228 
229  def _declare_data_deps(self, algo_name, algo):
230  """ Declare data inputs and outputs for a given algorithm. """
231 
232  # Declare data inputs
233  for inNode, outNode in self.dfg.in_edges(algo_name):
234  dataName = inNode
235  if dataName not in self.unique_data_objects:
236  self.unique_data_objects.append(dataName)
237 
238  if dataName not in algo.inpKeys:
239  algo.inpKeys.append(dataName)
240 
241  # Declare data outputs
242  for inNode, outNode in self.dfg.out_edges(algo_name):
243  dataName = outNode
244  if dataName not in self.unique_data_objects:
245  self.unique_data_objects.append(dataName)
246 
247  if dataName not in algo.outKeys:
248  algo.outKeys.append(dataName)
249 
250  def _generate_sequence(self, name, seq=None):
251  """ Assemble the tree of sequencers. """
252 
253  if not seq:
254  seq = GaudiSequencer(name, ShortCircuit=False)
255 
256  for n in self.cfg[name]:
257  # extract entity name and type
258  algo_name = n.split('/')[1] if '/' in n else n
259 
260  if 'type' in self.cfg.node[n]:
261  # first rely on explicit type, if given
262  algo_type = self.cfg.node[n].get('type')
263  else:
264  # if the type is not given explicitly, try to extract it from entity name,
265  # and, if unsuccessful, assume it is an algorithm
266  algo_type = n.split('/')[0] if '/' in n else 'Algorithm'
267 
268  if algo_type in ['GaudiSequencer', 'AthSequencer', 'ProcessPhase']:
269  if algo_name in ['RecoITSeq', 'RecoOTSeq', 'RecoTTSeq']:
270  continue
271 
272  if n not in self.unique_sequencers:
273  self.unique_sequencers.append(n)
274  else:
275  if n not in self.dupl_seqs:
276  self.dupl_seqs[n] = 2
277  else:
278  self.dupl_seqs[n] += 1
279 
280  seq_daughter = GaudiSequencer(algo_name, OutputLevel=INFO)
281  if self.cfg.node[n].get('ModeOR') == 'True':
282  self.OR_sequencers.append(n)
283  seq_daughter.ModeOR = True
284  # if self.cfg.node[n].get('Lazy') == 'False':
285  # print "Non-Lazy - ", n
286  seq_daughter.ShortCircuit = False
287  if seq_daughter not in seq.Members:
288  seq.Members += [seq_daughter]
289  # iterate deeper
290  self._generate_sequence(n, seq_daughter)
291  else:
292  #rndname = ''.join(random.choice(string.lowercase) for i in range(5))
293  #if algo_name in unique_algos: algo_name = algo_name + "-" + rndname
294  if n not in self.unique_algos:
295  self.unique_algos.append(n)
296  else:
297  if n not in self.dupl_algos:
298  self.dupl_algos[n] = 2
299  else:
300  self.dupl_algos[n] += 1
301 
302  avgRuntime, varRuntime = self.timeValue.get(algo_name)
303 
304  algo_daughter = CPUCruncher(
305  algo_name,
306  OutputLevel=self.outputLevel,
307  varRuntime=varRuntime,
308  avgRuntime=avgRuntime,
309  SleepFraction=self.sleepFraction
310  if self.BlockingBoolValue.get() else 0.,
311  Timeline=self.enableTimeline)
312 
313  self._declare_data_deps(algo_name, algo_daughter)
314 
315  if algo_daughter not in seq.Members:
316  seq.Members += [algo_daughter]
317 
318  return seq
def __init__(self, path, defaultTime, factor=1)
Definition: precedence.py:57
def _generate_sequence(self, name, seq=None)
Definition: precedence.py:250
def __init__(self, avgRuntime, varRuntime=0)
Definition: precedence.py:43
def get(self, algoName='')
Definition: precedence.py:71
def get(self, algoName='')
Definition: precedence.py:48
def _declare_data_deps(self, algo_name, algo)
Definition: precedence.py:229
def __init__(self, timeValue, BlockingBoolValue, sleepFraction, cfgPath, dfgPath, topSequencer, showStat=False, timeline=False, outputLevel=INFO)
Definition: precedence.py:160
def _buildFilePath(filePath)
Definition: precedence.py:23
decltype(auto) range(Args &&... args)
Zips multiple containers together to form a single range.
def __init__(self, pattern, seed=None)
Definition: precedence.py:105