The Gaudi Framework  v37r1 (a7f61348)
precedence.py
Go to the documentation of this file.
1 
11 import json
12 import os
13 import random
14 import sys
15 
16 # FIXME: workaround for the old version of networkx in LCG 100
17 import warnings
18 
19 warnings.filterwarnings("ignore", message='"is" with a literal', category=SyntaxWarning)
20 
21 import networkx as nx
22 from Configurables import CPUCruncher, Gaudi__Sequencer
23 from Gaudi.Configuration import INFO
24 
25 
26 def _buildFilePath(filePath):
27  if not os.path.exists(filePath):
28  __fullFilePath__ = os.path.realpath(
29  os.path.join(
30  os.environ.get("ENV_PROJECT_SOURCE_DIR", ""),
31  "GaudiHive",
32  "data",
33  filePath,
34  )
35  )
36  if not os.path.exists(__fullFilePath__):
37  __fullFilePath__ = os.path.realpath(
38  os.path.join(
39  os.environ.get("ENV_PROJECT_SOURCE_DIR", ""),
40  "Gaudi",
41  "GaudiHive",
42  "data",
43  filePath,
44  )
45  )
46  if not os.path.exists(__fullFilePath__):
47  print(
48  "\nERROR: invalid file path '%s'. "
49  "It must be either absolute, or relative to "
50  "'$ENV_PROJECT_SOURCE_DIR/GaudiHive/data/' or to "
51  "'$ENV_PROJECT_SOURCE_DIR/Gaudi/GaudiHive/data/'." % filePath
52  )
53  sys.exit(1)
54  else:
55  __fullFilePath__ = filePath
56 
57  return __fullFilePath__
58 
59 
60 class UniformTimeValue(object):
61  """A class to manage uniform algorithm timing"""
62 
63  def __init__(self, avgRuntime, varRuntime=0):
64  self.avgRuntime = avgRuntime
65  self.varRuntime = varRuntime
66 
67  def get(self, algoName=""):
68  """Get time and its variance (in a tuple) for a given algorithm name"""
69 
70  return self.avgRuntime, self.varRuntime
71 
72 
73 class RealTimeValue(object):
74  """A class to manage real algorithm timing"""
75 
76  def __init__(self, path, defaultTime, factor=1):
77  """
78  defaultTime -- run time, assigned to an algorithm if no time is found in provided timing library
79  (and it will also be scaled by the 'factor' argument)
80  """
81 
82  self.path = os.path.realpath(_buildFilePath(path))
83  self.factor = factor
84  self.defaultTime = defaultTime # typically 0.05s
85  self.varRuntime = 0
86 
87  self.file = open(self.path)
88  self.timings = json.load(self.file)
89 
90  def get(self, algoName=""):
91  """Get time for a given algorithm name"""
92 
93  if algoName in self.timings:
94  time = float(self.timings[algoName])
95  else:
96  capAlgoName = algoName[0].upper() + algoName[1 : len(algoName)]
97 
98  if capAlgoName in self.timings:
99  time = float(self.timings[capAlgoName])
100  else:
101  time = self.defaultTime
102  print(
103  "WARNING: Timing for %s (or %s) not found in the provided library, using default one: %s"
104  % (algoName, capAlgoName, time)
105  )
106 
107  time = time * self.factor
108 
109  return time, self.varRuntime
110 
111 
112 class UniformBooleanValue(object):
113  def __init__(self, value):
114  self.value = value
115 
116  def get(self):
117  return self.value
118 
119 
120 class RndBiasedBooleanValue(object):
121  """Provides randomly ordered set of boolean values with requested proportion of True and False."""
122 
123  def __init__(self, pattern, seed=None):
124  """
125  Keyword arguments:
126  pattern -- either a dictionary describing proportion of True and False (e.g., {True:5,False:15}), or
127  a list/tuple containing a pattern to be used as-is (e.g., [False,True,True,False])
128  seed -- an int, long or other hashable object to initialize random number generator (passed to random.shuffle as-is)
129  """
130 
131  if isinstance(pattern, dict):
132  proportion = pattern
133 
134  length = proportion[True] + proportion[False]
135  if length <= 0:
136  raise "ERROR: Wrong set length requested: %i " % length
137 
138  self.pattern = [False for i in range(proportion[False])] + [
139  True for i in range(proportion[True])
140  ]
141 
142  if seed is not None:
143  random.seed(seed)
144 
145  random.shuffle(self.pattern)
146 
147  elif isinstance(pattern, (list, tuple)):
148  self.pattern = pattern
149  else:
150  raise "ERROR: unknown pattern type"
151 
153 
154  def _create_generator(self, pattern):
155  for b in pattern:
156  yield b
157 
158  def get(self):
159  return next(self.generator)
160 
161  def get_pattern(self):
162  return self.pattern
163 
164 
165 class CruncherSequence(object):
166  """Constructs the sequence tree of CPUCrunchers with provided control flow and data flow precedence rules."""
167 
168  unique_sequencers = []
169  dupl_seqs = {}
170  OR_sequencers = []
171  unique_algos = []
172  dupl_algos = {}
173 
174  unique_data_objects = []
175 
176  def __init__(
177  self,
178  timeValue,
179  BlockingBoolValue,
180  sleepFraction,
181  cfgPath,
182  dfgPath,
183  topSequencer,
184  showStat=False,
185  timeline=False,
186  outputLevel=INFO,
187  cardinality=1,
188  ):
189  """
190  Keyword arguments:
191  timeValue -- timeValue object to set algorithm execution time
192  BlockingBoolValue -- *BooleanValue object to set whether an algorithm has to experience CPU-blocking execution
193  cfgPath -- relative to $ENV_PROJECT_SOURCE_DIR/GaudiHive/data path to GRAPHML file with control flow dependencies
194  dfgPath -- relative to $ENV_PROJECT_SOURCE_DIR/GaudiHive/data path to GRAPHML file with data flow dependencies
195  showStat -- print out statistics on precedence graph
196  """
197 
198  self.cardinality = cardinality
199  self.timeValue = timeValue
200  self.BlockingBoolValue = BlockingBoolValue
201  self.sleepFraction = sleepFraction
202 
203  self.cfg = nx.read_graphml(_buildFilePath(cfgPath))
204  self.dfg = nx.read_graphml(_buildFilePath(dfgPath))
205 
206  self.enableTimeline = timeline
207 
208  self.outputLevel = outputLevel
209 
210  # Generate control flow part
211  self.sequencer = self._generate_sequence(topSequencer)
212 
213  if showStat:
214  print("\n===== Statistics on Algorithms =====")
215  print(
216  "Total number of algorithm nodes: ",
217  len(self.unique_algos)
218  + sum([self.dupl_algos[i] - 1 for i in self.dupl_algos]),
219  )
220  print("Number of unique algorithms: ", len(self.unique_algos))
221  print(
222  " -->",
223  len(self.dupl_algos),
224  "of them being re-used with the following distribution: ",
225  [self.dupl_algos[i] for i in self.dupl_algos],
226  )
227  # pprint.pprint(dupl_algos)
228 
229  print("\n===== Statistics on Sequencers =====")
230  print(
231  "Total number of sequencers: ",
232  len(self.unique_sequencers)
233  + sum([self.dupl_seqs[i] - 1 for i in self.dupl_seqs]),
234  )
235  print("Number of unique sequencers: ", len(self.unique_sequencers))
236  print(
237  " -->",
238  len(self.dupl_seqs),
239  "of them being re-used with the following distribution: ",
240  [self.dupl_seqs[i] for i in self.dupl_seqs],
241  )
242  # pprint.pprint(dupl_seqs)
243  print("Number of OR-sequencers: ", len(self.OR_sequencers))
244 
245  print("\n===== Statistics on DataObjects =====")
246  print("Number of unique DataObjects: ", len(self.unique_data_objects))
247  # pprint.pprint(self.unique_data_objects)
248  print()
249 
250  def get(self):
251  return self.sequencer
252 
253  def _declare_data_deps(self, algo_name, algo):
254  """Declare data inputs and outputs for a given algorithm."""
255 
256  # Declare data inputs
257  for inNode, outNode in self.dfg.in_edges(algo_name):
258  dataName = inNode
259  if dataName not in self.unique_data_objects:
260  self.unique_data_objects.append(dataName)
261 
262  if dataName not in algo.inpKeys:
263  algo.inpKeys.append(dataName)
264 
265  # Declare data outputs
266  for inNode, outNode in self.dfg.out_edges(algo_name):
267  dataName = outNode
268  if dataName not in self.unique_data_objects:
269  self.unique_data_objects.append(dataName)
270 
271  if dataName not in algo.outKeys:
272  algo.outKeys.append(dataName)
273 
274  def _generate_sequence(self, name, seq=None):
275  """Assemble the tree of sequencers."""
276 
277  if not seq:
278  seq = Gaudi__Sequencer(name, ShortCircuit=False)
279 
280  for n in self.cfg[name]:
281  # extract entity name and type
282  algo_name = n.split("/")[1] if "/" in n else n
283 
284  if "type" in self.cfg.nodes[n]:
285  # first rely on explicit type, if given
286  algo_type = self.cfg.nodes[n].get("type")
287  else:
288  # if the type is not given explicitly, try to extract it from entity name,
289  # and, if unsuccessful, assume it is an algorithm
290  algo_type = n.split("/")[0] if "/" in n else "Algorithm"
291 
292  if algo_type in ["GaudiSequencer", "AthSequencer", "ProcessPhase"]:
293  if algo_name in ["RecoITSeq", "RecoOTSeq", "RecoTTSeq"]:
294  continue
295 
296  if n not in self.unique_sequencers:
297  self.unique_sequencers.append(n)
298  else:
299  if n not in self.dupl_seqs:
300  self.dupl_seqs[n] = 2
301  else:
302  self.dupl_seqs[n] += 1
303 
304  seq_daughter = Gaudi__Sequencer(algo_name, OutputLevel=INFO)
305  if self.cfg.nodes[n].get("ModeOR") == "True":
306  self.OR_sequencers.append(n)
307  seq_daughter.ModeOR = True
308  # if self.cfg.nodes[n].get('Lazy') == 'False':
309  # print "Non-Lazy - ", n
310  seq_daughter.ShortCircuit = False
311  if seq_daughter not in seq.Members:
312  seq.Members += [seq_daughter]
313  # iterate deeper
314  self._generate_sequence(n, seq_daughter)
315  else:
316  # rndname = ''.join(random.choice(string.lowercase) for i in range(5))
317  # if algo_name in unique_algos: algo_name = algo_name + "-" + rndname
318  if n not in self.unique_algos:
319  self.unique_algos.append(n)
320  else:
321  if n not in self.dupl_algos:
322  self.dupl_algos[n] = 2
323  else:
324  self.dupl_algos[n] += 1
325 
326  avgRuntime, varRuntime = self.timeValue.get(algo_name)
327 
328  algo_daughter = CPUCruncher(
329  algo_name,
330  Cardinality=self.cardinality,
331  OutputLevel=self.outputLevel,
332  varRuntime=varRuntime,
333  avgRuntime=avgRuntime,
334  SleepFraction=self.sleepFraction
335  if self.BlockingBoolValue.get()
336  else 0.0,
337  Timeline=self.enableTimeline,
338  )
339 
340  self._declare_data_deps(algo_name, algo_daughter)
341 
342  if algo_daughter not in seq.Members:
343  seq.Members += [algo_daughter]
344 
345  return seq
GaudiHive.precedence._buildFilePath
def _buildFilePath(filePath)
Definition: precedence.py:26
GaudiHive.precedence.CruncherSequence.__init__
def __init__(self, timeValue, BlockingBoolValue, sleepFraction, cfgPath, dfgPath, topSequencer, showStat=False, timeline=False, outputLevel=INFO, cardinality=1)
Definition: precedence.py:176
GaudiHive.precedence.RndBiasedBooleanValue.get_pattern
def get_pattern(self)
Definition: precedence.py:161
GaudiHive.precedence.CruncherSequence.enableTimeline
enableTimeline
Definition: precedence.py:194
GaudiHive.precedence.RealTimeValue.defaultTime
defaultTime
Definition: precedence.py:84
GaudiHive.precedence.RealTimeValue.timings
timings
Definition: precedence.py:88
GaudiHive.precedence.UniformBooleanValue.__init__
def __init__(self, value)
Definition: precedence.py:113
GaudiHive.precedence.RealTimeValue.get
def get(self, algoName="")
Definition: precedence.py:90
GaudiHive.precedence.CruncherSequence.unique_sequencers
unique_sequencers
Definition: precedence.py:168
GaudiHive.precedence.CruncherSequence.timeValue
timeValue
Definition: precedence.py:187
GaudiHive.precedence.CruncherSequence.sleepFraction
sleepFraction
Definition: precedence.py:189
GaudiHive.precedence.CruncherSequence.dupl_algos
dupl_algos
Definition: precedence.py:172
GaudiHive.precedence.RealTimeValue.factor
factor
Definition: precedence.py:83
GaudiHive.precedence.UniformBooleanValue
Definition: precedence.py:112
GaudiHive.precedence.RealTimeValue.file
file
Definition: precedence.py:87
GaudiHive.precedence.UniformTimeValue.avgRuntime
avgRuntime
Definition: precedence.py:64
GaudiHive.precedence.CruncherSequence.unique_algos
unique_algos
Definition: precedence.py:171
GaudiHive.precedence.RealTimeValue.__init__
def __init__(self, path, defaultTime, factor=1)
Definition: precedence.py:76
GaudiHive.precedence.RndBiasedBooleanValue
Definition: precedence.py:120
GaudiHive.precedence.CruncherSequence.get
def get(self)
Definition: precedence.py:250
GaudiHive.precedence.CruncherSequence.BlockingBoolValue
BlockingBoolValue
Definition: precedence.py:188
GaudiHive.precedence.CruncherSequence.sequencer
sequencer
Definition: precedence.py:199
Gaudi.Configuration
Definition: Configuration.py:1
GaudiHive.precedence.CruncherSequence.cfg
cfg
Definition: precedence.py:191
GaudiHive.precedence.CruncherSequence.dfg
dfg
Definition: precedence.py:192
GaudiHive.precedence.UniformTimeValue
Definition: precedence.py:60
GaudiHive.precedence.RndBiasedBooleanValue.get
def get(self)
Definition: precedence.py:158
GaudiHive.precedence.RndBiasedBooleanValue.generator
generator
Definition: precedence.py:152
GaudiHive.precedence.UniformBooleanValue.value
value
Definition: precedence.py:114
GaudiHive.precedence.RealTimeValue.path
path
Definition: precedence.py:82
GaudiHive.precedence.UniformTimeValue.varRuntime
varRuntime
Definition: precedence.py:65
GaudiHive.precedence.CruncherSequence.OR_sequencers
OR_sequencers
Definition: precedence.py:170
GaudiHive.precedence.UniformBooleanValue.get
def get(self)
Definition: precedence.py:116
GaudiHive.precedence.RndBiasedBooleanValue.pattern
pattern
Definition: precedence.py:138
GaudiHive.precedence.RealTimeValue
Definition: precedence.py:73
GaudiHive.precedence.CruncherSequence._generate_sequence
def _generate_sequence(self, name, seq=None)
Definition: precedence.py:274
GaudiHive.precedence.RndBiasedBooleanValue._create_generator
def _create_generator(self, pattern)
Definition: precedence.py:154
GaudiHive.precedence.CruncherSequence.outputLevel
outputLevel
Definition: precedence.py:196
GaudiHive.precedence.CruncherSequence
Definition: precedence.py:165
GaudiHive.precedence.CruncherSequence.unique_data_objects
unique_data_objects
Definition: precedence.py:174
GaudiHive.precedence.RealTimeValue.varRuntime
varRuntime
Definition: precedence.py:85
GaudiHive.precedence.CruncherSequence._declare_data_deps
def _declare_data_deps(self, algo_name, algo)
Definition: precedence.py:253
GaudiHive.precedence.RndBiasedBooleanValue.__init__
def __init__(self, pattern, seed=None)
Definition: precedence.py:123
GaudiHive.precedence.CruncherSequence.dupl_seqs
dupl_seqs
Definition: precedence.py:169
GaudiHive.precedence.UniformTimeValue.__init__
def __init__(self, avgRuntime, varRuntime=0)
Definition: precedence.py:63
Gaudi::Functional::details::zip::range
decltype(auto) range(Args &&... args)
Zips multiple containers together to form a single range.
Definition: details.h:98
GaudiHive.precedence.CruncherSequence.cardinality
cardinality
Definition: precedence.py:186
GaudiHive.precedence.UniformTimeValue.get
def get(self, algoName="")
Definition: precedence.py:67