The Gaudi Framework  v32r2 (46d42edc)
precedence.py
Go to the documentation of this file.
1 from __future__ import print_function
2 import os
3 import sys
4 import random
5 import string
6 import json
7 import networkx as nx
8 
9 from Gaudi.Configuration import INFO
10 from Configurables import GaudiSequencer, CPUCruncher
11 
12 
13 def _buildFilePath(filePath):
14 
15  if not os.path.exists(filePath):
16  __fullFilePath__ = os.path.realpath(
17  os.path.join(
18  os.environ.get('GAUDIHIVEROOT', ''), "data", filePath))
19  if not os.path.exists(__fullFilePath__):
20  print(
21  "\nERROR: invalid file path '%s'. It must be either absolute, or relative to '$GAUDIHIVEROOT/data/'."
22  % filePath)
23  sys.exit(1)
24  else:
25  __fullFilePath__ = filePath
26 
27  return __fullFilePath__
28 
29 
30 class UniformTimeValue(object):
31  """A class to manage uniform algorithm timing"""
32 
33  def __init__(self, avgRuntime, varRuntime=0):
34 
35  self.avgRuntime = avgRuntime
36  self.varRuntime = varRuntime
37 
38  def get(self, algoName=''):
39  """Get time and its variance (in a tuple) for a given algorithm name"""
40 
41  return self.avgRuntime, self.varRuntime
42 
43 
44 class RealTimeValue(object):
45  """A class to manage real algorithm timing"""
46 
47  def __init__(self, path, defaultTime, factor=1):
48  """
49  defaultTime -- run time, assigned to an algorithm if no time is found in provided timing library
50  (and it will also be scaled by the 'factor' argument)
51  """
52 
53  self.path = os.path.realpath(_buildFilePath(path))
54  self.factor = factor
55  self.defaultTime = defaultTime # typically 0.05s
56  self.varRuntime = 0
57 
58  self.file = open(self.path)
59  self.timings = json.load(self.file)
60 
61  def get(self, algoName=''):
62  """Get time for a given algorithm name"""
63 
64  if algoName in self.timings:
65  time = float(self.timings[algoName])
66  else:
67  capAlgoName = algoName[0].upper() + algoName[1:len(algoName)]
68 
69  if capAlgoName in self.timings:
70  time = float(self.timings[capAlgoName])
71  else:
72  time = self.defaultTime
73  print(
74  "WARNING: TimiNg for %s (or %s) not found in the provided library, using default one: %s"
75  % (algoName, capAlgoName, time))
76 
77  time = time * self.factor
78 
79  return time, self.varRuntime
80 
81 
82 class UniformBooleanValue(object):
83  def __init__(self, value):
84 
85  self.value = value
86 
87  def get(self):
88 
89  return self.value
90 
91 
92 class RndBiasedBooleanValue(object):
93  """Provides randomly ordered set of boolean values with requested proportion of True and False."""
94 
95  def __init__(self, pattern, seed=None):
96  """
97  Keyword arguments:
98  pattern -- either a dictionary describing proportion of True and False (e.g., {True:5,False:15}), or
99  a list/tuple containing a pattern to be used as-is (e.g., [False,True,True,False])
100  seed -- an int, long or other hashable object to initialize random number generator (passed to random.shuffle as-is)
101  """
102 
103  if isinstance(pattern, dict):
104  proportion = pattern
105 
106  length = proportion[True] + proportion[False]
107  if length <= 0:
108  raise "ERROR: Wrong set length requested: %i " % length
109 
110  self.pattern = [False for i in range(proportion[False])
111  ] + [True for i in range(proportion[True])]
112 
113  if seed is not None:
114  random.seed(seed)
115 
116  random.shuffle(self.pattern)
117 
118  elif isinstance(pattern, (list, tuple)):
119  self.pattern = pattern
120  else:
121  raise "ERROR: unknown pattern type"
122 
124 
125  def _create_generator(self, pattern):
126 
127  for b in pattern:
128  yield b
129 
130  def get(self):
131 
132  return next(self.generator)
133 
134  def get_pattern(self):
135 
136  return self.pattern
137 
138 
139 class CruncherSequence(object):
140  """Constructs the sequence tree of CPUCrunchers with provided control flow and data flow precedence rules."""
141 
142  unique_sequencers = []
143  dupl_seqs = {}
144  OR_sequencers = []
145  unique_algos = []
146  dupl_algos = {}
147 
148  unique_data_objects = []
149 
150  def __init__(self,
151  timeValue,
152  IOboolValue,
153  sleepFraction,
154  cfgPath,
155  dfgPath,
156  topSequencer,
157  showStat=False,
158  timeline=False,
159  outputLevel=INFO):
160  """
161  Keyword arguments:
162  timeValue -- timeValue object to set algorithm execution time
163  IOboolValue -- *BooleanValue object to set whether an algorithm has to experience IO-bound execution
164  cfgPath -- relative to $GAUDIHIVEROOT/data path to GRAPHML file with control flow dependencies
165  dfgPath -- relative to $GAUDIHIVEROOT/data path to GRAPHML file with data flow dependencies
166  showStat -- print out statistics on precedence graph
167  """
168 
169  self.timeValue = timeValue
170  self.IOboolValue = IOboolValue
171  self.sleepFraction = sleepFraction
173  self.cfg = nx.read_graphml(_buildFilePath(cfgPath))
174  self.dfg = nx.read_graphml(_buildFilePath(dfgPath))
175 
176  self.enableTimeline = timeline
177 
178  self.outputLevel = outputLevel
179 
180  # Generate control flow part
181  self.sequencer = self._generate_sequence(topSequencer)
182 
183  if showStat:
184  import pprint
185 
186  print("\n===== Statistics on Algorithms =====")
187  print("Total number of algorithm nodes: ", len(self.unique_algos) +
188  sum([self.dupl_algos[i] - 1 for i in self.dupl_algos]))
189  print("Number of unique algorithms: ", len(self.unique_algos))
190  print(" -->", len(self.dupl_algos),
191  "of them being re-used with the following distribution: ",
192  [self.dupl_algos[i] for i in self.dupl_algos])
193  # pprint.pprint(dupl_algos)
194 
195  print("\n===== Statistics on Sequencers =====")
196  print("Total number of sequencers: ", len(self.unique_sequencers) +
197  sum([self.dupl_seqs[i] - 1 for i in self.dupl_seqs]))
198  print("Number of unique sequencers: ", len(self.unique_sequencers))
199  print(" -->", len(self.dupl_seqs),
200  "of them being re-used with the following distribution: ",
201  [self.dupl_seqs[i] for i in self.dupl_seqs])
202  # pprint.pprint(dupl_seqs)
203  print("Number of OR-sequencers: ", len(self.OR_sequencers))
204 
205  print("\n===== Statistics on DataObjects =====")
206  print("Number of unique DataObjects: ",
207  len(self.unique_data_objects))
208  # pprint.pprint(self.unique_data_objects)
209  print()
210 
211  def get(self):
212 
213  return self.sequencer
214 
215  def _declare_data_deps(self, algo_name, algo):
216  """ Declare data inputs and outputs for a given algorithm. """
217 
218  # Declare data inputs
219  for inNode, outNode in self.dfg.in_edges(algo_name):
220  dataName = inNode
221  if dataName not in self.unique_data_objects:
222  self.unique_data_objects.append(dataName)
223 
224  if dataName not in algo.inpKeys:
225  algo.inpKeys.append(dataName)
226 
227  # Declare data outputs
228  for inNode, outNode in self.dfg.out_edges(algo_name):
229  dataName = outNode
230  if dataName not in self.unique_data_objects:
231  self.unique_data_objects.append(dataName)
232 
233  if dataName not in algo.outKeys:
234  algo.outKeys.append(dataName)
235 
236  def _generate_sequence(self, name, seq=None):
237  """ Assemble the tree of sequencers. """
238 
239  if not seq:
240  seq = GaudiSequencer(name, ShortCircuit=False)
241 
242  for n in self.cfg[name]:
243  # extract entity name and type
244  algo_name = n.split('/')[1] if '/' in n else n
245 
246  if 'type' in self.cfg.node[n]:
247  # first rely on explicit type, if given
248  algo_type = self.cfg.node[n].get('type')
249  else:
250  # if the type is not given explicitly, try to extract it from entity name,
251  # and, if unsuccessful, assume it is an algorithm
252  algo_type = n.split('/')[0] if '/' in n else 'Algorithm'
253 
254  if algo_type in ['GaudiSequencer', 'AthSequencer', 'ProcessPhase']:
255  if algo_name in ['RecoITSeq', 'RecoOTSeq', 'RecoTTSeq']:
256  continue
257 
258  if n not in self.unique_sequencers:
259  self.unique_sequencers.append(n)
260  else:
261  if n not in self.dupl_seqs:
262  self.dupl_seqs[n] = 2
263  else:
264  self.dupl_seqs[n] += 1
265 
266  seq_daughter = GaudiSequencer(algo_name, OutputLevel=INFO)
267  if self.cfg.node[n].get('ModeOR') == 'True':
268  self.OR_sequencers.append(n)
269  seq_daughter.ModeOR = True
270  # if self.cfg.node[n].get('Lazy') == 'False':
271  # print "Non-Lazy - ", n
272  seq_daughter.ShortCircuit = False
273  if seq_daughter not in seq.Members:
274  seq.Members += [seq_daughter]
275  # iterate deeper
276  self._generate_sequence(n, seq_daughter)
277  else:
278  #rndname = ''.join(random.choice(string.lowercase) for i in range(5))
279  #if algo_name in unique_algos: algo_name = algo_name + "-" + rndname
280  if n not in self.unique_algos:
281  self.unique_algos.append(n)
282  else:
283  if n not in self.dupl_algos:
284  self.dupl_algos[n] = 2
285  else:
286  self.dupl_algos[n] += 1
287 
288  avgRuntime, varRuntime = self.timeValue.get(algo_name)
289 
290  algo_daughter = CPUCruncher(
291  algo_name,
292  OutputLevel=self.outputLevel,
293  varRuntime=varRuntime,
294  avgRuntime=avgRuntime,
295  SleepFraction=self.sleepFraction
296  if self.IOboolValue.get() else 0.,
297  Timeline=self.enableTimeline)
298 
299  self._declare_data_deps(algo_name, algo_daughter)
300 
301  if algo_daughter not in seq.Members:
302  seq.Members += [algo_daughter]
303 
304  return seq
def __init__(self, path, defaultTime, factor=1)
Definition: precedence.py:47
def _generate_sequence(self, name, seq=None)
Definition: precedence.py:236
def __init__(self, avgRuntime, varRuntime=0)
Definition: precedence.py:33
def get(self, algoName='')
Definition: precedence.py:61
def get(self, algoName='')
Definition: precedence.py:38
def __init__(self, timeValue, IOboolValue, sleepFraction, cfgPath, dfgPath, topSequencer, showStat=False, timeline=False, outputLevel=INFO)
Definition: precedence.py:150
def _declare_data_deps(self, algo_name, algo)
Definition: precedence.py:215
def _buildFilePath(filePath)
Definition: precedence.py:13
decltype(auto) range(Args &&... args)
Zips multiple containers together to form a single range.
def __init__(self, pattern, seed=None)
Definition: precedence.py:95