The Gaudi Framework  master (37c0b60a)
precedence.py
Go to the documentation of this file.
1 
11 import json
12 import os
13 import random
14 import sys
15 
16 # FIXME: workaround for the old version of networkx in LCG 100
17 import warnings
18 
19 warnings.filterwarnings("ignore", message='"is" with a literal', category=SyntaxWarning)
20 
21 import networkx as nx
22 from Configurables import CPUCruncher, Gaudi__Sequencer
23 
24 from Gaudi.Configuration import INFO
25 
26 
27 def _buildFilePath(filePath):
28  if not os.path.exists(filePath):
29  __fullFilePath__ = os.path.realpath(
30  os.path.join(
31  os.environ.get("ENV_PROJECT_SOURCE_DIR", ""),
32  "GaudiHive",
33  "data",
34  filePath,
35  )
36  )
37  if not os.path.exists(__fullFilePath__):
38  __fullFilePath__ = os.path.realpath(
39  os.path.join(
40  os.environ.get("ENV_PROJECT_SOURCE_DIR", ""),
41  "Gaudi",
42  "GaudiHive",
43  "data",
44  filePath,
45  )
46  )
47  if not os.path.exists(__fullFilePath__):
48  print(
49  "\nERROR: invalid file path '%s'. "
50  "It must be either absolute, or relative to "
51  "'$ENV_PROJECT_SOURCE_DIR/GaudiHive/data/' or to "
52  "'$ENV_PROJECT_SOURCE_DIR/Gaudi/GaudiHive/data/'." % filePath
53  )
54  sys.exit(1)
55  else:
56  __fullFilePath__ = filePath
57 
58  return __fullFilePath__
59 
60 
61 class UniformTimeValue(object):
62  """A class to manage uniform algorithm timing"""
63 
64  def __init__(self, avgRuntime, varRuntime=0):
65  self.avgRuntime = avgRuntime
66  self.varRuntime = varRuntime
67 
68  def get(self, algoName=""):
69  """Get time and its variance (in a tuple) for a given algorithm name"""
70 
71  return self.avgRuntime, self.varRuntime
72 
73 
74 class RealTimeValue(object):
75  """A class to manage real algorithm timing"""
76 
77  def __init__(self, path, defaultTime, factor=1):
78  """
79  defaultTime -- run time, assigned to an algorithm if no time is found in provided timing library
80  (and it will also be scaled by the 'factor' argument)
81  """
82 
83  self.path = os.path.realpath(_buildFilePath(path))
84  self.factor = factor
85  self.defaultTime = defaultTime # typically 0.05s
86  self.varRuntime = 0
87 
88  self.file = open(self.path)
89  self.timings = json.load(self.file)
90 
91  def get(self, algoName=""):
92  """Get time for a given algorithm name"""
93 
94  if algoName in self.timings:
95  time = float(self.timings[algoName])
96  else:
97  capAlgoName = algoName[0].upper() + algoName[1 : len(algoName)]
98 
99  if capAlgoName in self.timings:
100  time = float(self.timings[capAlgoName])
101  else:
102  time = self.defaultTime
103  print(
104  "WARNING: Timing for %s (or %s) not found in the provided library, using default one: %s"
105  % (algoName, capAlgoName, time)
106  )
107 
108  time = time * self.factor
109 
110  return time, self.varRuntime
111 
112 
113 class UniformBooleanValue(object):
114  def __init__(self, value):
115  self.value = value
116 
117  def get(self):
118  return self.value
119 
120 
121 class RndBiasedBooleanValue(object):
122  """Provides randomly ordered set of boolean values with requested proportion of True and False."""
123 
124  def __init__(self, pattern, seed=None):
125  """
126  Keyword arguments:
127  pattern -- either a dictionary describing proportion of True and False (e.g., {True:5,False:15}), or
128  a list/tuple containing a pattern to be used as-is (e.g., [False,True,True,False])
129  seed -- an int, long or other hashable object to initialize random number generator (passed to random.shuffle as-is)
130  """
131 
132  if isinstance(pattern, dict):
133  proportion = pattern
134 
135  length = proportion[True] + proportion[False]
136  if length <= 0:
137  raise "ERROR: Wrong set length requested: %i " % length
138 
139  self.pattern = [False for i in range(proportion[False])] + [
140  True for i in range(proportion[True])
141  ]
142 
143  if seed is not None:
144  random.seed(seed)
145 
146  random.shuffle(self.pattern)
147 
148  elif isinstance(pattern, (list, tuple)):
149  self.pattern = pattern
150  else:
151  raise "ERROR: unknown pattern type"
152 
154 
155  def _create_generator(self, pattern):
156  for b in pattern:
157  yield b
158 
159  def get(self):
160  return next(self.generator)
161 
162  def get_pattern(self):
163  return self.pattern
164 
165 
166 class CruncherSequence(object):
167  """Constructs the sequence tree of CPUCrunchers with provided control flow and data flow precedence rules."""
168 
169  unique_sequencers = []
170  dupl_seqs = {}
171  OR_sequencers = []
172  unique_algos = []
173  dupl_algos = {}
174 
175  unique_data_objects = []
176 
177  def __init__(
178  self,
179  timeValue,
180  BlockingBoolValue,
181  sleepFraction,
182  cfgPath,
183  dfgPath,
184  topSequencer,
185  showStat=False,
186  timeline=False,
187  outputLevel=INFO,
188  cardinality=1,
189  ):
190  """
191  Keyword arguments:
192  timeValue -- timeValue object to set algorithm execution time
193  BlockingBoolValue -- *BooleanValue object to set whether an algorithm has to experience CPU-blocking execution
194  cfgPath -- relative to $ENV_PROJECT_SOURCE_DIR/GaudiHive/data path to GRAPHML file with control flow dependencies
195  dfgPath -- relative to $ENV_PROJECT_SOURCE_DIR/GaudiHive/data path to GRAPHML file with data flow dependencies
196  showStat -- print out statistics on precedence graph
197  """
198 
199  self.cardinality = cardinality
200  self.timeValue = timeValue
201  self.BlockingBoolValue = BlockingBoolValue
202  self.sleepFraction = sleepFraction
203 
204  self.cfg = nx.read_graphml(_buildFilePath(cfgPath))
205  self.dfg = nx.read_graphml(_buildFilePath(dfgPath))
206 
207  self.enableTimeline = timeline
208 
209  self.outputLevel = outputLevel
210 
211  # Generate control flow part
212  self.sequencer = self._generate_sequence(topSequencer)
213 
214  if showStat:
215  print("\n===== Statistics on Algorithms =====")
216  print(
217  "Total number of algorithm nodes: ",
218  len(self.unique_algos)
219  + sum([self.dupl_algos[i] - 1 for i in self.dupl_algos]),
220  )
221  print("Number of unique algorithms: ", len(self.unique_algos))
222  print(
223  " -->",
224  len(self.dupl_algos),
225  "of them being re-used with the following distribution: ",
226  [self.dupl_algos[i] for i in self.dupl_algos],
227  )
228  # pprint.pprint(dupl_algos)
229 
230  print("\n===== Statistics on Sequencers =====")
231  print(
232  "Total number of sequencers: ",
233  len(self.unique_sequencers)
234  + sum([self.dupl_seqs[i] - 1 for i in self.dupl_seqs]),
235  )
236  print("Number of unique sequencers: ", len(self.unique_sequencers))
237  print(
238  " -->",
239  len(self.dupl_seqs),
240  "of them being re-used with the following distribution: ",
241  [self.dupl_seqs[i] for i in self.dupl_seqs],
242  )
243  # pprint.pprint(dupl_seqs)
244  print("Number of OR-sequencers: ", len(self.OR_sequencers))
245 
246  print("\n===== Statistics on DataObjects =====")
247  print("Number of unique DataObjects: ", len(self.unique_data_objects))
248  # pprint.pprint(self.unique_data_objects)
249  print()
250 
251  def get(self):
252  return self.sequencer
253 
254  def _declare_data_deps(self, algo_name, algo):
255  """Declare data inputs and outputs for a given algorithm."""
256 
257  # Declare data inputs
258  for inNode, outNode in self.dfg.in_edges(algo_name):
259  dataName = inNode
260  if dataName not in self.unique_data_objects:
261  self.unique_data_objects.append(dataName)
262 
263  if dataName not in algo.inpKeys:
264  algo.inpKeys.append(dataName)
265 
266  # Declare data outputs
267  for inNode, outNode in self.dfg.out_edges(algo_name):
268  dataName = outNode
269  if dataName not in self.unique_data_objects:
270  self.unique_data_objects.append(dataName)
271 
272  if dataName not in algo.outKeys:
273  algo.outKeys.append(dataName)
274 
275  def _generate_sequence(self, name, seq=None):
276  """Assemble the tree of sequencers."""
277 
278  if not seq:
279  seq = Gaudi__Sequencer(name, ShortCircuit=False)
280 
281  for n in self.cfg[name]:
282  # extract entity name and type
283  algo_name = n.split("/")[1] if "/" in n else n
284 
285  if "type" in self.cfg.nodes[n]:
286  # first rely on explicit type, if given
287  algo_type = self.cfg.nodes[n].get("type")
288  else:
289  # if the type is not given explicitly, try to extract it from entity name,
290  # and, if unsuccessful, assume it is an algorithm
291  algo_type = n.split("/")[0] if "/" in n else "Algorithm"
292 
293  if algo_type in ["GaudiSequencer", "AthSequencer", "ProcessPhase"]:
294  if algo_name in ["RecoITSeq", "RecoOTSeq", "RecoTTSeq"]:
295  continue
296 
297  if n not in self.unique_sequencers:
298  self.unique_sequencers.append(n)
299  else:
300  if n not in self.dupl_seqs:
301  self.dupl_seqs[n] = 2
302  else:
303  self.dupl_seqs[n] += 1
304 
305  seq_daughter = Gaudi__Sequencer(algo_name, OutputLevel=INFO)
306  if self.cfg.nodes[n].get("ModeOR") == "True":
307  self.OR_sequencers.append(n)
308  seq_daughter.ModeOR = True
309  # if self.cfg.nodes[n].get('Lazy') == 'False':
310  # print "Non-Lazy - ", n
311  seq_daughter.ShortCircuit = False
312  if seq_daughter not in seq.Members:
313  seq.Members += [seq_daughter]
314  # iterate deeper
315  self._generate_sequence(n, seq_daughter)
316  else:
317  # rndname = ''.join(random.choice(string.lowercase) for i in range(5))
318  # if algo_name in unique_algos: algo_name = algo_name + "-" + rndname
319  if n not in self.unique_algos:
320  self.unique_algos.append(n)
321  else:
322  if n not in self.dupl_algos:
323  self.dupl_algos[n] = 2
324  else:
325  self.dupl_algos[n] += 1
326 
327  avgRuntime, varRuntime = self.timeValue.get(algo_name)
328 
329  algo_daughter = CPUCruncher(
330  algo_name,
331  Cardinality=self.cardinality,
332  OutputLevel=self.outputLevel,
333  varRuntime=varRuntime,
334  avgRuntime=avgRuntime,
335  SleepFraction=self.sleepFraction
336  if self.BlockingBoolValue.get()
337  else 0.0,
338  Timeline=self.enableTimeline,
339  )
340 
341  self._declare_data_deps(algo_name, algo_daughter)
342 
343  if algo_daughter not in seq.Members:
344  seq.Members += [algo_daughter]
345 
346  return seq
precedence.UniformTimeValue.varRuntime
varRuntime
Definition: precedence.py:66
precedence.RealTimeValue.path
path
Definition: precedence.py:83
precedence.UniformTimeValue
Definition: precedence.py:61
precedence.CruncherSequence.cfg
cfg
Definition: precedence.py:192
precedence.CruncherSequence.__init__
def __init__(self, timeValue, BlockingBoolValue, sleepFraction, cfgPath, dfgPath, topSequencer, showStat=False, timeline=False, outputLevel=INFO, cardinality=1)
Definition: precedence.py:177
precedence.CruncherSequence.dupl_seqs
dupl_seqs
Definition: precedence.py:170
precedence.RealTimeValue.get
def get(self, algoName="")
Definition: precedence.py:91
precedence.RealTimeValue.factor
factor
Definition: precedence.py:84
precedence.CruncherSequence.unique_sequencers
unique_sequencers
Definition: precedence.py:169
precedence.UniformBooleanValue.get
def get(self)
Definition: precedence.py:117
precedence.CruncherSequence.unique_data_objects
unique_data_objects
Definition: precedence.py:175
precedence.CruncherSequence.dupl_algos
dupl_algos
Definition: precedence.py:173
precedence.RndBiasedBooleanValue.get
def get(self)
Definition: precedence.py:159
precedence.UniformBooleanValue.value
value
Definition: precedence.py:115
precedence.RealTimeValue
Definition: precedence.py:74
precedence.RndBiasedBooleanValue.generator
generator
Definition: precedence.py:153
precedence.CruncherSequence.timeValue
timeValue
Definition: precedence.py:188
precedence.CruncherSequence.sleepFraction
sleepFraction
Definition: precedence.py:190
precedence.CruncherSequence.cardinality
cardinality
Definition: precedence.py:187
Gaudi.Configuration
Definition: Configuration.py:1
precedence.RndBiasedBooleanValue.pattern
pattern
Definition: precedence.py:139
precedence.UniformTimeValue.avgRuntime
avgRuntime
Definition: precedence.py:65
precedence.RealTimeValue.__init__
def __init__(self, path, defaultTime, factor=1)
Definition: precedence.py:77
precedence.RealTimeValue.timings
timings
Definition: precedence.py:89
precedence.CruncherSequence._generate_sequence
def _generate_sequence(self, name, seq=None)
Definition: precedence.py:275
precedence.RndBiasedBooleanValue
Definition: precedence.py:121
precedence.CruncherSequence
Definition: precedence.py:166
precedence.UniformTimeValue.get
def get(self, algoName="")
Definition: precedence.py:68
precedence.CruncherSequence.dfg
dfg
Definition: precedence.py:193
precedence.CruncherSequence.outputLevel
outputLevel
Definition: precedence.py:197
precedence.RndBiasedBooleanValue.get_pattern
def get_pattern(self)
Definition: precedence.py:162
precedence.RndBiasedBooleanValue._create_generator
def _create_generator(self, pattern)
Definition: precedence.py:155
precedence.RealTimeValue.defaultTime
defaultTime
Definition: precedence.py:85
precedence._buildFilePath
def _buildFilePath(filePath)
Definition: precedence.py:27
precedence.CruncherSequence.unique_algos
unique_algos
Definition: precedence.py:172
precedence.UniformTimeValue.__init__
def __init__(self, avgRuntime, varRuntime=0)
Definition: precedence.py:64
precedence.CruncherSequence._declare_data_deps
def _declare_data_deps(self, algo_name, algo)
Definition: precedence.py:254
precedence.RndBiasedBooleanValue.__init__
def __init__(self, pattern, seed=None)
Definition: precedence.py:124
precedence.CruncherSequence.get
def get(self)
Definition: precedence.py:251
precedence.RealTimeValue.file
file
Definition: precedence.py:88
precedence.CruncherSequence.enableTimeline
enableTimeline
Definition: precedence.py:195
precedence.CruncherSequence.BlockingBoolValue
BlockingBoolValue
Definition: precedence.py:189
precedence.UniformBooleanValue
Definition: precedence.py:113
precedence.RealTimeValue.varRuntime
varRuntime
Definition: precedence.py:86
precedence.CruncherSequence.sequencer
sequencer
Definition: precedence.py:200
precedence.CruncherSequence.OR_sequencers
OR_sequencers
Definition: precedence.py:171
Gaudi::Functional::details::zip::range
decltype(auto) range(Args &&... args)
Zips multiple containers together to form a single range.
Definition: details.h:97
precedence.UniformBooleanValue.__init__
def __init__(self, value)
Definition: precedence.py:114