The Gaudi Framework  v36r1 (3e2fb5a8)
precedence.py
Go to the documentation of this file.
1 
11 from __future__ import print_function
12 import os
13 import sys
14 import random
15 import string
16 import json
17 
18 # FIXME: workaround for the old version of networkx in LCG 100
19 import warnings
20 warnings.filterwarnings(
21  "ignore", message='"is" with a literal', category=SyntaxWarning)
22 
23 import networkx as nx
24 
25 from Gaudi.Configuration import INFO
26 from Configurables import GaudiSequencer, CPUCruncher
27 
28 
29 def _buildFilePath(filePath):
30 
31  if not os.path.exists(filePath):
32  __fullFilePath__ = os.path.realpath(
33  os.path.join(
34  os.environ.get('ENV_CMAKE_SOURCE_DIR', ''), "GaudiHive",
35  "data", filePath))
36  if not os.path.exists(__fullFilePath__):
37  __fullFilePath__ = os.path.realpath(
38  os.path.join(
39  os.environ.get('ENV_CMAKE_SOURCE_DIR', ''), "Gaudi",
40  "GaudiHive", "data", filePath))
41  if not os.path.exists(__fullFilePath__):
42  print("\nERROR: invalid file path '%s'. "
43  "It must be either absolute, or relative to "
44  "'$ENV_CMAKE_SOURCE_DIR/GaudiHive/data/' or to "
45  "'$ENV_CMAKE_SOURCE_DIR/Gaudi/GaudiHive/data/'." %
46  filePath)
47  sys.exit(1)
48  else:
49  __fullFilePath__ = filePath
50 
51  return __fullFilePath__
52 
53 
54 class UniformTimeValue(object):
55  """A class to manage uniform algorithm timing"""
56 
57  def __init__(self, avgRuntime, varRuntime=0):
58 
59  self.avgRuntime = avgRuntime
60  self.varRuntime = varRuntime
61 
62  def get(self, algoName=''):
63  """Get time and its variance (in a tuple) for a given algorithm name"""
64 
65  return self.avgRuntime, self.varRuntime
66 
67 
68 class RealTimeValue(object):
69  """A class to manage real algorithm timing"""
70 
71  def __init__(self, path, defaultTime, factor=1):
72  """
73  defaultTime -- run time, assigned to an algorithm if no time is found in provided timing library
74  (and it will also be scaled by the 'factor' argument)
75  """
76 
77  self.path = os.path.realpath(_buildFilePath(path))
78  self.factor = factor
79  self.defaultTime = defaultTime # typically 0.05s
80  self.varRuntime = 0
81 
82  self.file = open(self.path)
83  self.timings = json.load(self.file)
84 
85  def get(self, algoName=''):
86  """Get time for a given algorithm name"""
87 
88  if algoName in self.timings:
89  time = float(self.timings[algoName])
90  else:
91  capAlgoName = algoName[0].upper() + algoName[1:len(algoName)]
92 
93  if capAlgoName in self.timings:
94  time = float(self.timings[capAlgoName])
95  else:
96  time = self.defaultTime
97  print(
98  "WARNING: Timing for %s (or %s) not found in the provided library, using default one: %s"
99  % (algoName, capAlgoName, time))
100 
101  time = time * self.factor
102 
103  return time, self.varRuntime
104 
105 
106 class UniformBooleanValue(object):
107  def __init__(self, value):
108 
109  self.value = value
110 
111  def get(self):
112 
113  return self.value
114 
115 
116 class RndBiasedBooleanValue(object):
117  """Provides randomly ordered set of boolean values with requested proportion of True and False."""
118 
119  def __init__(self, pattern, seed=None):
120  """
121  Keyword arguments:
122  pattern -- either a dictionary describing proportion of True and False (e.g., {True:5,False:15}), or
123  a list/tuple containing a pattern to be used as-is (e.g., [False,True,True,False])
124  seed -- an int, long or other hashable object to initialize random number generator (passed to random.shuffle as-is)
125  """
126 
127  if isinstance(pattern, dict):
128  proportion = pattern
129 
130  length = proportion[True] + proportion[False]
131  if length <= 0:
132  raise "ERROR: Wrong set length requested: %i " % length
133 
134  self.pattern = [False for i in range(proportion[False])
135  ] + [True for i in range(proportion[True])]
136 
137  if seed is not None:
138  random.seed(seed)
139 
140  random.shuffle(self.pattern)
141 
142  elif isinstance(pattern, (list, tuple)):
143  self.pattern = pattern
144  else:
145  raise "ERROR: unknown pattern type"
146 
148 
149  def _create_generator(self, pattern):
150 
151  for b in pattern:
152  yield b
153 
154  def get(self):
155 
156  return next(self.generator)
157 
158  def get_pattern(self):
159 
160  return self.pattern
161 
162 
163 class CruncherSequence(object):
164  """Constructs the sequence tree of CPUCrunchers with provided control flow and data flow precedence rules."""
165 
166  unique_sequencers = []
167  dupl_seqs = {}
168  OR_sequencers = []
169  unique_algos = []
170  dupl_algos = {}
171 
172  unique_data_objects = []
173 
174  def __init__(self,
175  timeValue,
176  BlockingBoolValue,
177  sleepFraction,
178  cfgPath,
179  dfgPath,
180  topSequencer,
181  showStat=False,
182  timeline=False,
183  outputLevel=INFO,
184  cardinality=1):
185  """
186  Keyword arguments:
187  timeValue -- timeValue object to set algorithm execution time
188  BlockingBoolValue -- *BooleanValue object to set whether an algorithm has to experience CPU-blocking execution
189  cfgPath -- relative to $ENV_CMAKE_SOURCE_DIR/GaudiHive/data path to GRAPHML file with control flow dependencies
190  dfgPath -- relative to $ENV_CMAKE_SOURCE_DIR/GaudiHive/data path to GRAPHML file with data flow dependencies
191  showStat -- print out statistics on precedence graph
192  """
193 
194  self.cardinality = cardinality
195  self.timeValue = timeValue
196  self.BlockingBoolValue = BlockingBoolValue
197  self.sleepFraction = sleepFraction
198 
199  self.cfg = nx.read_graphml(_buildFilePath(cfgPath))
200  self.dfg = nx.read_graphml(_buildFilePath(dfgPath))
201 
202  self.enableTimeline = timeline
203 
204  self.outputLevel = outputLevel
205 
206  # Generate control flow part
207  self.sequencer = self._generate_sequence(topSequencer)
208 
209  if showStat:
210  import pprint
211 
212  print("\n===== Statistics on Algorithms =====")
213  print(
214  "Total number of algorithm nodes: ",
215  len(self.unique_algos) + sum(
216  [self.dupl_algos[i] - 1 for i in self.dupl_algos]))
217  print("Number of unique algorithms: ", len(self.unique_algos))
218  print(" -->", len(self.dupl_algos),
219  "of them being re-used with the following distribution: ",
220  [self.dupl_algos[i] for i in self.dupl_algos])
221  # pprint.pprint(dupl_algos)
222 
223  print("\n===== Statistics on Sequencers =====")
224  print(
225  "Total number of sequencers: ",
226  len(self.unique_sequencers) + sum(
227  [self.dupl_seqs[i] - 1 for i in self.dupl_seqs]))
228  print("Number of unique sequencers: ", len(self.unique_sequencers))
229  print(" -->", len(self.dupl_seqs),
230  "of them being re-used with the following distribution: ",
231  [self.dupl_seqs[i] for i in self.dupl_seqs])
232  # pprint.pprint(dupl_seqs)
233  print("Number of OR-sequencers: ", len(self.OR_sequencers))
234 
235  print("\n===== Statistics on DataObjects =====")
236  print("Number of unique DataObjects: ",
237  len(self.unique_data_objects))
238  # pprint.pprint(self.unique_data_objects)
239  print()
240 
241  def get(self):
242 
243  return self.sequencer
244 
245  def _declare_data_deps(self, algo_name, algo):
246  """ Declare data inputs and outputs for a given algorithm. """
247 
248  # Declare data inputs
249  for inNode, outNode in self.dfg.in_edges(algo_name):
250  dataName = inNode
251  if dataName not in self.unique_data_objects:
252  self.unique_data_objects.append(dataName)
253 
254  if dataName not in algo.inpKeys:
255  algo.inpKeys.append(dataName)
256 
257  # Declare data outputs
258  for inNode, outNode in self.dfg.out_edges(algo_name):
259  dataName = outNode
260  if dataName not in self.unique_data_objects:
261  self.unique_data_objects.append(dataName)
262 
263  if dataName not in algo.outKeys:
264  algo.outKeys.append(dataName)
265 
266  def _generate_sequence(self, name, seq=None):
267  """ Assemble the tree of sequencers. """
268 
269  if not seq:
270  seq = GaudiSequencer(name, ShortCircuit=False)
271 
272  for n in self.cfg[name]:
273  # extract entity name and type
274  algo_name = n.split('/')[1] if '/' in n else n
275 
276  if 'type' in self.cfg.node[n]:
277  # first rely on explicit type, if given
278  algo_type = self.cfg.node[n].get('type')
279  else:
280  # if the type is not given explicitly, try to extract it from entity name,
281  # and, if unsuccessful, assume it is an algorithm
282  algo_type = n.split('/')[0] if '/' in n else 'Algorithm'
283 
284  if algo_type in ['GaudiSequencer', 'AthSequencer', 'ProcessPhase']:
285  if algo_name in ['RecoITSeq', 'RecoOTSeq', 'RecoTTSeq']:
286  continue
287 
288  if n not in self.unique_sequencers:
289  self.unique_sequencers.append(n)
290  else:
291  if n not in self.dupl_seqs:
292  self.dupl_seqs[n] = 2
293  else:
294  self.dupl_seqs[n] += 1
295 
296  seq_daughter = GaudiSequencer(algo_name, OutputLevel=INFO)
297  if self.cfg.node[n].get('ModeOR') == 'True':
298  self.OR_sequencers.append(n)
299  seq_daughter.ModeOR = True
300  # if self.cfg.node[n].get('Lazy') == 'False':
301  # print "Non-Lazy - ", n
302  seq_daughter.ShortCircuit = False
303  if seq_daughter not in seq.Members:
304  seq.Members += [seq_daughter]
305  # iterate deeper
306  self._generate_sequence(n, seq_daughter)
307  else:
308  #rndname = ''.join(random.choice(string.lowercase) for i in range(5))
309  #if algo_name in unique_algos: algo_name = algo_name + "-" + rndname
310  if n not in self.unique_algos:
311  self.unique_algos.append(n)
312  else:
313  if n not in self.dupl_algos:
314  self.dupl_algos[n] = 2
315  else:
316  self.dupl_algos[n] += 1
317 
318  avgRuntime, varRuntime = self.timeValue.get(algo_name)
319 
320  algo_daughter = CPUCruncher(
321  algo_name,
322  Cardinality=self.cardinality,
323  OutputLevel=self.outputLevel,
324  varRuntime=varRuntime,
325  avgRuntime=avgRuntime,
326  SleepFraction=self.sleepFraction
327  if self.BlockingBoolValue.get() else 0.,
328  Timeline=self.enableTimeline)
329 
330  self._declare_data_deps(algo_name, algo_daughter)
331 
332  if algo_daughter not in seq.Members:
333  seq.Members += [algo_daughter]
334 
335  return seq
GaudiHive.precedence._buildFilePath
def _buildFilePath(filePath)
Definition: precedence.py:29
GaudiHive.precedence.CruncherSequence.__init__
def __init__(self, timeValue, BlockingBoolValue, sleepFraction, cfgPath, dfgPath, topSequencer, showStat=False, timeline=False, outputLevel=INFO, cardinality=1)
Definition: precedence.py:174
GaudiHive.precedence.RndBiasedBooleanValue.get_pattern
def get_pattern(self)
Definition: precedence.py:158
GaudiHive.precedence.CruncherSequence.enableTimeline
enableTimeline
Definition: precedence.py:192
GaudiHive.precedence.RealTimeValue.defaultTime
defaultTime
Definition: precedence.py:79
GaudiHive.precedence.RealTimeValue.timings
timings
Definition: precedence.py:83
GaudiHive.precedence.UniformBooleanValue.__init__
def __init__(self, value)
Definition: precedence.py:107
GaudiHive.precedence.CruncherSequence.timeValue
timeValue
Definition: precedence.py:185
GaudiHive.precedence.CruncherSequence.sleepFraction
sleepFraction
Definition: precedence.py:187
GaudiHive.precedence.RealTimeValue.factor
factor
Definition: precedence.py:78
GaudiHive.precedence.UniformBooleanValue
Definition: precedence.py:106
GaudiHive.precedence.CruncherSequence.unique_sequencers
list unique_sequencers
Definition: precedence.py:166
GaudiHive.precedence.RealTimeValue.file
file
Definition: precedence.py:82
GaudiHive.precedence.UniformTimeValue.avgRuntime
avgRuntime
Definition: precedence.py:59
GaudiHive.precedence.RealTimeValue.__init__
def __init__(self, path, defaultTime, factor=1)
Definition: precedence.py:71
GaudiHive.precedence.RndBiasedBooleanValue
Definition: precedence.py:116
GaudiHive.precedence.CruncherSequence.get
def get(self)
Definition: precedence.py:241
GaudiHive.precedence.CruncherSequence.BlockingBoolValue
BlockingBoolValue
Definition: precedence.py:186
GaudiHive.precedence.CruncherSequence.sequencer
sequencer
Definition: precedence.py:197
GaudiHive.precedence.CruncherSequence.dupl_algos
dictionary dupl_algos
Definition: precedence.py:170
GaudiHive.precedence.CruncherSequence.unique_algos
list unique_algos
Definition: precedence.py:169
GaudiHive.precedence.CruncherSequence.unique_data_objects
list unique_data_objects
Definition: precedence.py:172
Gaudi.Configuration
Definition: Configuration.py:1
GaudiHive.precedence.CruncherSequence.cfg
cfg
Definition: precedence.py:189
GaudiHive.precedence.CruncherSequence.dfg
dfg
Definition: precedence.py:190
GaudiHive.precedence.UniformTimeValue
Definition: precedence.py:54
GaudiHive.precedence.RndBiasedBooleanValue.get
def get(self)
Definition: precedence.py:154
GaudiHive.precedence.RealTimeValue.get
def get(self, algoName='')
Definition: precedence.py:85
GaudiHive.precedence.RndBiasedBooleanValue.generator
generator
Definition: precedence.py:147
GaudiHive.precedence.UniformBooleanValue.value
value
Definition: precedence.py:109
GaudiHive.precedence.RealTimeValue.path
path
Definition: precedence.py:77
GaudiHive.precedence.UniformTimeValue.varRuntime
varRuntime
Definition: precedence.py:60
GaudiHive.precedence.UniformTimeValue.get
def get(self, algoName='')
Definition: precedence.py:62
GaudiHive.precedence.UniformBooleanValue.get
def get(self)
Definition: precedence.py:111
GaudiHive.precedence.RndBiasedBooleanValue.pattern
pattern
Definition: precedence.py:134
GaudiHive.precedence.CruncherSequence.dupl_seqs
dictionary dupl_seqs
Definition: precedence.py:167
GaudiHive.precedence.RealTimeValue
Definition: precedence.py:68
GaudiHive.precedence.CruncherSequence._generate_sequence
def _generate_sequence(self, name, seq=None)
Definition: precedence.py:266
GaudiHive.precedence.CruncherSequence.OR_sequencers
list OR_sequencers
Definition: precedence.py:168
GaudiHive.precedence.RndBiasedBooleanValue._create_generator
def _create_generator(self, pattern)
Definition: precedence.py:149
GaudiHive.precedence.CruncherSequence.outputLevel
outputLevel
Definition: precedence.py:194
GaudiHive.precedence.CruncherSequence
Definition: precedence.py:163
GaudiHive.precedence.RealTimeValue.varRuntime
varRuntime
Definition: precedence.py:80
GaudiHive.precedence.CruncherSequence._declare_data_deps
def _declare_data_deps(self, algo_name, algo)
Definition: precedence.py:245
GaudiHive.precedence.RndBiasedBooleanValue.__init__
def __init__(self, pattern, seed=None)
Definition: precedence.py:119
GaudiHive.precedence.UniformTimeValue.__init__
def __init__(self, avgRuntime, varRuntime=0)
Definition: precedence.py:57
Gaudi::Functional::details::zip::range
decltype(auto) range(Args &&... args)
Zips multiple containers together to form a single range.
Definition: FunctionalDetails.h:97
GaudiHive.precedence.CruncherSequence.cardinality
cardinality
Definition: precedence.py:184