Loading [MathJax]/extensions/tex2jax.js
The Gaudi Framework  v31r0 (aeb156f0)
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
precedence.py
Go to the documentation of this file.
1 import os
2 import sys
3 import random
4 import string
5 import json
6 import networkx as nx
7 
8 from Gaudi.Configuration import INFO
9 from Configurables import GaudiSequencer, CPUCruncher
10 
11 
12 def _buildFilePath(filePath):
13 
14  if not os.path.exists(filePath):
15  __fullFilePath__ = os.path.realpath(
16  os.path.join(
17  os.environ.get('GAUDIHIVEROOT', ''), "data", filePath))
18  if not os.path.exists(__fullFilePath__):
19  print "\nERROR: invalid file path '%s'. It must be either absolute, or relative to '$GAUDIHIVEROOT/data/'." % filePath
20  sys.exit(1)
21  else:
22  __fullFilePath__ = filePath
23 
24  return __fullFilePath__
25 
26 
27 class UniformTimeValue(object):
28  """A class to manage uniform algorithm timing"""
29 
30  def __init__(self, avgRuntime, varRuntime=0):
31 
32  self.avgRuntime = avgRuntime
33  self.varRuntime = varRuntime
34 
35  def get(self, algoName=''):
36  """Get time and its variance (in a tuple) for a given algorithm name"""
37 
38  return self.avgRuntime, self.varRuntime
39 
40 
41 class RealTimeValue(object):
42  """A class to manage real algorithm timing"""
43 
44  def __init__(self, path, defaultTime, factor=1):
45  """
46  defaultTime -- run time, assigned to an algorithm if no time is found in provided timing library
47  (and it will also be scaled by the 'factor' argument)
48  """
49 
50  self.path = os.path.realpath(_buildFilePath(path))
51  self.factor = factor
52  self.defaultTime = defaultTime # typically 0.05s
53  self.varRuntime = 0
54 
55  self.file = open(self.path)
56  self.timings = json.load(self.file)
57 
58  def get(self, algoName=''):
59  """Get time for a given algorithm name"""
60 
61  if algoName in self.timings:
62  time = float(self.timings[algoName])
63  else:
64  capAlgoName = algoName[0].upper() + algoName[1:len(algoName)]
65 
66  if capAlgoName in self.timings:
67  time = float(self.timings[capAlgoName])
68  else:
69  time = self.defaultTime
70  print "WARNING: TimiNg for %s (or %s) not found in the provided library, using default one: %s" % (
71  algoName, capAlgoName, time)
72 
73  time = time * self.factor
74 
75  return time, self.varRuntime
76 
77 
78 class UniformBooleanValue(object):
79  def __init__(self, value):
80 
81  self.value = value
82 
83  def get(self):
84 
85  return self.value
86 
87 
88 class RndBiasedBooleanValue(object):
89  """Provides randomly ordered set of boolean values with requested proportion of True and False."""
90 
91  def __init__(self, pattern, seed=None):
92  """
93  Keyword arguments:
94  pattern -- either a dictionary describing proportion of True and False (e.g., {True:5,False:15}), or
95  a list/tuple containing a pattern to be used as-is (e.g., [False,True,True,False])
96  seed -- an int, long or other hashable object to initialize random number generator (passed to random.shuffle as-is)
97  """
98 
99  if isinstance(pattern, dict):
100  proportion = pattern
101 
102  length = proportion[True] + proportion[False]
103  if length <= 0:
104  raise "ERROR: Wrong set length requested: %i " % length
105 
106  self.pattern = [False for i in range(proportion[False])
107  ] + [True for i in range(proportion[True])]
108 
109  if seed is not None:
110  random.seed(seed)
111 
112  random.shuffle(self.pattern)
113 
114  elif isinstance(pattern, (list, tuple)):
115  self.pattern = pattern
116  else:
117  raise "ERROR: unknown pattern type"
118 
120 
121  def _create_generator(self, pattern):
122 
123  for b in pattern:
124  yield b
125 
126  def get(self):
127 
128  return next(self.generator)
129 
130  def get_pattern(self):
131 
132  return self.pattern
133 
134 
135 class CruncherSequence(object):
136  """Constructs the sequence tree of CPUCrunchers with provided control flow and data flow precedence rules."""
137 
138  unique_sequencers = []
139  dupl_seqs = {}
140  OR_sequencers = []
141  unique_algos = []
142  dupl_algos = {}
143 
144  unique_data_objects = []
145 
146  def __init__(self,
147  timeValue,
148  IOboolValue,
149  sleepFraction,
150  cfgPath,
151  dfgPath,
152  topSequencer,
153  showStat=False,
154  timeline=False,
155  outputLevel=INFO):
156  """
157  Keyword arguments:
158  timeValue -- timeValue object to set algorithm execution time
159  IOboolValue -- *BooleanValue object to set whether an algorithm has to experience IO-bound execution
160  cfgPath -- relative to $GAUDIHIVEROOT/data path to GRAPHML file with control flow dependencies
161  dfgPath -- relative to $GAUDIHIVEROOT/data path to GRAPHML file with data flow dependencies
162  showStat -- print out statistics on precedence graph
163  """
164 
165  self.timeValue = timeValue
166  self.IOboolValue = IOboolValue
167  self.sleepFraction = sleepFraction
168 
169  self.cfg = nx.read_graphml(_buildFilePath(cfgPath))
170  self.dfg = nx.read_graphml(_buildFilePath(dfgPath))
171 
172  self.enableTimeline = timeline
173 
174  self.outputLevel = outputLevel
175 
176  # Generate control flow part
177  self.sequencer = self._generate_sequence(topSequencer)
178 
179  if showStat:
180  import pprint
181 
182  print "\n===== Statistics on Algorithms ====="
183  print "Total number of algorithm nodes: ", len(
184  self.unique_algos) + sum(
185  [self.dupl_algos[i] - 1 for i in self.dupl_algos])
186  print "Number of unique algorithms: ", len(self.unique_algos)
187  print " -->", len(
188  self.dupl_algos
189  ), "of them being re-used with the following distribution: ", [
190  self.dupl_algos[i] for i in self.dupl_algos
191  ]
192  # pprint.pprint(dupl_algos)
193 
194  print "\n===== Statistics on Sequencers ====="
195  print "Total number of sequencers: ", len(
196  self.unique_sequencers) + sum(
197  [self.dupl_seqs[i] - 1 for i in self.dupl_seqs])
198  print "Number of unique sequencers: ", len(self.unique_sequencers)
199  print " -->", len(
200  self.dupl_seqs
201  ), "of them being re-used with the following distribution: ", [
202  self.dupl_seqs[i] for i in self.dupl_seqs
203  ]
204  # pprint.pprint(dupl_seqs)
205  print "Number of OR-sequencers: ", len(self.OR_sequencers)
206 
207  print "\n===== Statistics on DataObjects ====="
208  print "Number of unique DataObjects: ", len(
209  self.unique_data_objects)
210  # pprint.pprint(self.unique_data_objects)
211  print
212 
213  def get(self):
214 
215  return self.sequencer
216 
217  def _declare_data_deps(self, algo_name, algo):
218  """ Declare data inputs and outputs for a given algorithm. """
219 
220  # Declare data inputs
221  for inNode, outNode in self.dfg.in_edges(algo_name):
222  dataName = inNode
223  if dataName not in self.unique_data_objects:
224  self.unique_data_objects.append(dataName)
225 
226  if dataName not in algo.inpKeys:
227  algo.inpKeys.append(dataName)
228 
229  # Declare data outputs
230  for inNode, outNode in self.dfg.out_edges(algo_name):
231  dataName = outNode
232  if dataName not in self.unique_data_objects:
233  self.unique_data_objects.append(dataName)
234 
235  if dataName not in algo.outKeys:
236  algo.outKeys.append(dataName)
237 
238  def _generate_sequence(self, name, seq=None):
239  """ Assemble the tree of sequencers. """
240 
241  if not seq:
242  seq = GaudiSequencer(name, ShortCircuit=False)
243 
244  for n in self.cfg[name]:
245  if '/' in n:
246  algo_type, algo_name = n.split('/')
247  else:
248  algo_type = 'GaudiAlgorithm'
249  algo_name = n
250 
251  if algo_type in ['GaudiSequencer', 'AthSequencer', 'ProcessPhase']:
252  if algo_name in ['RecoITSeq', 'RecoOTSeq', 'RecoTTSeq']:
253  continue
254 
255  if n not in self.unique_sequencers:
256  self.unique_sequencers.append(n)
257  else:
258  if n not in self.dupl_seqs:
259  self.dupl_seqs[n] = 2
260  else:
261  self.dupl_seqs[n] += 1
262 
263  seq_daughter = GaudiSequencer(algo_name, OutputLevel=INFO)
264  if self.cfg.node[n].get('ModeOR') == 'True':
265  self.OR_sequencers.append(n)
266  seq_daughter.ModeOR = True
267  # if self.cfg.node[n].get('Lazy') == 'False':
268  # print "Non-Lazy - ", n
269  seq_daughter.ShortCircuit = False
270  if seq_daughter not in seq.Members:
271  seq.Members += [seq_daughter]
272  # iterate deeper
273  self._generate_sequence(n, seq_daughter)
274  else:
275  #rndname = ''.join(random.choice(string.lowercase) for i in range(5))
276  #if algo_name in unique_algos: algo_name = algo_name + "-" + rndname
277  if n not in self.unique_algos:
278  self.unique_algos.append(n)
279  else:
280  if n not in self.dupl_algos:
281  self.dupl_algos[n] = 2
282  else:
283  self.dupl_algos[n] += 1
284 
285  avgRuntime, varRuntime = self.timeValue.get(algo_name)
286  algo_daughter = CPUCruncher(
287  algo_name,
288  OutputLevel=self.outputLevel,
289  varRuntime=varRuntime,
290  avgRuntime=avgRuntime,
291  SleepFraction=self.sleepFraction
292  if self.IOboolValue.get() else 0.,
293  Timeline=self.enableTimeline)
294 
295  self._declare_data_deps(algo_name, algo_daughter)
296 
297  if algo_daughter not in seq.Members:
298  seq.Members += [algo_daughter]
299 
300  return seq
def __init__(self, path, defaultTime, factor=1)
Definition: precedence.py:44
def _generate_sequence(self, name, seq=None)
Definition: precedence.py:238
def __init__(self, avgRuntime, varRuntime=0)
Definition: precedence.py:30
double sum(double x, double y, double z)
def get(self, algoName='')
Definition: precedence.py:58
def get(self, algoName='')
Definition: precedence.py:35
def __init__(self, timeValue, IOboolValue, sleepFraction, cfgPath, dfgPath, topSequencer, showStat=False, timeline=False, outputLevel=INFO)
Definition: precedence.py:155
decltype(auto) range(Args &&...args)
Zips multiple containers together to form a single range.
def _declare_data_deps(self, algo_name, algo)
Definition: precedence.py:217
def _buildFilePath(filePath)
Definition: precedence.py:12
def __init__(self, pattern, seed=None)
Definition: precedence.py:91