precedence.py
Go to the documentation of this file.
1 import os, sys, random, string, json
2 import networkx as nx
3 
4 from Gaudi.Configuration import INFO, DEBUG
5 from Configurables import GaudiSequencer, CPUCruncher
6 
7 
8 def _buildFilePath(filePath):
9 
10  if not os.path.exists(filePath):
11  __fullFilePath__ = os.path.realpath(os.path.join(os.environ.get('GAUDIHIVEROOT',''), "data", filePath))
12  if not os.path.exists(__fullFilePath__):
13  print "\nERROR: invalid file path '%s'. It must be either absolute, or relative to '$GAUDIHIVEROOT/data/'." %filePath
14  sys.exit(1)
15  else:
16  __fullFilePath__ = filePath
17 
18  return __fullFilePath__
19 
20 class UniformTimeValue(object):
21  """A class to manage uniform algorithm timing"""
22 
23  def __init__(self, avgRuntime, varRuntime = 0):
24 
25  self.avgRuntime = avgRuntime
26  self.varRuntime = varRuntime
27 
28  def get(self, algoName = ''):
29  """Get time and its variance (in a tuple) for a given algorithm name"""
30 
31  return self.avgRuntime, self.varRuntime
32 
33 class RealTimeValue(object):
34  """A class to manage real algorithm timing"""
35 
36  def __init__(self, path, defaultTime, factor = 1):
37  """
38  defaultTime -- run time, assigned to an algorithm if no time is found in provided timing library
39  (and it will also be scaled by the 'factor' argument)
40  """
41 
42  self.path = os.path.realpath(_buildFilePath(path))
43  self.factor = factor
44  self.defaultTime = defaultTime # typically 0.05s
45  self.varRuntime = 0
46 
47  self.file=open(self.path)
48  self.timings=json.load(self.file)
49 
50 
51  def get(self, algoName = ''):
52  """Get time for a given algorithm name"""
53 
54  if algoName in self.timings:
55  time = float(self.timings[algoName])
56  else:
57  capAlgoName = algoName[0].upper() + algoName[1:len(algoName)]
58 
59  if capAlgoName in self.timings:
60  time = float(self.timings[capAlgoName])
61  else:
62  time = self.defaultTime
63  print "WARNING: TimiNg for %s (or %s) not found in the provided library, using default one: %s" %(algoName,capAlgoName,time)
64 
65  time = time * self.factor
66 
67  return time, self.varRuntime
68 
69 class UniformBooleanValue(object):
70 
71  def __init__(self, value):
72 
73  self.value = value
74 
75  def get(self):
76 
77  return self.value
78 
80  """
81  Provides randomly distributed boolean value with True taking only 10%.
82  The distribution has only 276 values and is reproducible, if no pattern re-generation is requested.
83  """
84 
85  # 276 values, biased as 90% to 10%
86  builtinPattern = [True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, True, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, True, False, True, False, False, False, False, False, False, False, False, True, False, True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, True, False, False, False, False, True, True, False, False, False, False, False, False, False, False, False, False, True, True, False, True, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, True, False, True]
87  pattern = []
88 
89  def __init__(self, useBuiltinPattern=True):
90 
91  if useBuiltinPattern:
92  self.pattern = self.builtinPattern
93  else:
94  # 278 values, biased approximately as 90% to 10% (276 precedence graph algorithms, plus two algorithms added manually - DstWriter and Framework)
95  self.pattern = [False for i in range(249)] + [True for i in range(29)]
96  random.shuffle(self.pattern)
97 
99 
100  def _create_generator(self, pattern):
101 
102  for b in pattern:
103  yield b
104 
105  def get(self):
106 
107  return next(self.generator)
108 
109  def get_pattern(self):
110 
111  return self.pattern
112 
113 
114 class CruncherSequence(object):
115  """Constructs the sequence tree of CPUCrunchers with provided control flow and data flow precedence rules."""
116 
117  unique_sequencers=[]
118  dupl_seqs={}
119  OR_sequencers=[]
120  unique_algos=[]
121  dupl_algos={}
122 
123  unique_data_objects = []
124 
125  def __init__(self, timeValue, IOboolValue, sleepFraction, cfgPath, dfgPath, topSequencer, showStat=False, algoDebug = False):
126  """
127  Keyword arguments:
128  timeValue -- timeValue object to set algorithm execution time
129  IOboolValue -- *BooleanValue object to set whether an algorithm has to experience IO-bound execution
130  cfgPath -- relative to $GAUDIHIVEROOT/data path to GRAPHML file with control flow dependencies
131  dfgPath -- relative to $GAUDIHIVEROOT/data path to GRAPHML file with data flow dependencies
132  showStat -- print out statistics on precedence graph
133  """
134 
135  self.timeValue = timeValue
136  self.IOboolValue = IOboolValue
137  self.sleepFraction = sleepFraction
138 
139  self.cfg = nx.read_graphml(_buildFilePath(cfgPath))
140  self.dfg = nx.read_graphml(_buildFilePath(dfgPath))
141 
142  self.algoDebug = algoDebug
143 
144  # Generate control flow part
145  self.sequencer = self._generate_sequence(topSequencer)
146 
147  if showStat:
148  import pprint
149 
150  print "\n===== Statistics on Algorithms ====="
151  print "Total number of algorithm nodes: ", len(self.unique_algos) + sum([self.dupl_algos[i]-1 for i in self.dupl_algos])
152  print "Number of unique algorithms: ", len(self.unique_algos)
153  print " -->", len(self.dupl_algos), "of them being re-used with the following distribution: ", [self.dupl_algos[i] for i in self.dupl_algos]
154  #pprint.pprint(dupl_algos)
155 
156  print "\n===== Statistics on Sequencers ====="
157  print "Total number of sequencers: ", len(self.unique_sequencers) + sum([self.dupl_seqs[i]-1 for i in self.dupl_seqs])
158  print "Number of unique sequencers: ", len(self.unique_sequencers)
159  print " -->", len(self.dupl_seqs), "of them being re-used with the following distribution: ", [self.dupl_seqs[i] for i in self.dupl_seqs]
160  #pprint.pprint(dupl_seqs)
161  print "Number of OR-sequencers: ", len(self.OR_sequencers)
162 
163  print "\n===== Statistics on DataObjects ====="
164  print "Number of unique DataObjects: ", len(self.unique_data_objects)
165  #pprint.pprint(self.unique_data_objects)
166  print
167 
168  def get(self):
169 
170  return self.sequencer
171 
172  def _declare_data_deps(self, algo_name, algo):
173  """ Declare data inputs and outputs for a given algorithm. """
174 
175  # Declare data inputs
176  for inNode, outNode in self.dfg.in_edges(algo_name):
177  dataName = inNode
178  if dataName not in self.unique_data_objects:
179  self.unique_data_objects.append(dataName)
180 
181  algo.inpKeys.append(dataName)
182 
183  # Declare data outputs
184  for inNode, outNode in self.dfg.out_edges(algo_name):
185  dataName = outNode
186  if dataName not in self.unique_data_objects:
187  self.unique_data_objects.append(dataName)
188  algo.outKeys.append(dataName)
189 
190 
191  def _generate_sequence(self, name, seq=None):
192  """ Assemble the tree of sequencers. """
193 
194  if not seq:
195  seq = GaudiSequencer(name, ShortCircuit = False)
196 
197  for n in self.cfg[name]:
198  if '/' in n:
199  algo_type, algo_name = n.split('/')
200  else:
201  algo_type = 'GaudiAlgorithm'
202  algo_name = n
203 
204  if algo_type in ['GaudiSequencer', 'AthSequencer', 'ProcessPhase']:
205  if algo_name in ['RecoITSeq','RecoOTSeq','RecoTTSeq']: continue
206 
207  if n not in self.unique_sequencers:
208  self.unique_sequencers.append(n)
209  else:
210  if n not in self.dupl_seqs: self.dupl_seqs[n] = 2
211  else: self.dupl_seqs[n] += 1
212 
213  seq_daughter=GaudiSequencer(algo_name, OutputLevel=INFO )
214  if self.cfg.node[n].get('ModeOR') == 'True':
215  self.OR_sequencers.append(n)
216  seq_daughter.ModeOR = True
217  #if self.cfg.node[n].get('Lazy') == 'False':
218  # print "Non-Lazy - ", n
219  seq_daughter.ShortCircuit = False
220  if seq_daughter not in seq.Members:
221  seq.Members += [seq_daughter]
222  # iterate deeper
223  self._generate_sequence(n,seq_daughter)
224  else:
225  #rndname = ''.join(random.choice(string.lowercase) for i in range(5))
226  #if algo_name in unique_algos: algo_name = algo_name + "-" + rndname
227  if n not in self.unique_algos:
228  self.unique_algos.append(n)
229  else:
230  if n not in self.dupl_algos: self.dupl_algos[n] = 2
231  else: self.dupl_algos[n] += 1
232 
233  avgRuntime, varRuntime = self.timeValue.get(algo_name)
234  algo_daughter = CPUCruncher(algo_name,
235  OutputLevel = DEBUG if self.algoDebug else INFO,
236  shortCalib = True,
237  varRuntime = varRuntime,
238  avgRuntime = avgRuntime,
239  SleepFraction = self.sleepFraction if self.IOboolValue.get() else 0.)
240 
241  self._declare_data_deps(algo_name, algo_daughter)
242 
243  if algo_daughter not in seq.Members:
244  seq.Members += [algo_daughter]
245 
246  return seq
def __init__(self, timeValue, IOboolValue, sleepFraction, cfgPath, dfgPath, topSequencer, showStat=False, algoDebug=False)
Definition: precedence.py:125
def __init__(self, path, defaultTime, factor=1)
Definition: precedence.py:36
def _generate_sequence(self, name, seq=None)
Definition: precedence.py:191
def __init__(self, avgRuntime, varRuntime=0)
Definition: precedence.py:23
double sum(double x, double y, double z)
def get(self, algoName='')
Definition: precedence.py:51
def get(self, algoName='')
Definition: precedence.py:28
NamedRange_< CONTAINER > range(const CONTAINER &cnt, std::string name)
simple function to create the named range form arbitrary container
Definition: NamedRange.h:130
def __init__(self, useBuiltinPattern=True)
Definition: precedence.py:89
def _declare_data_deps(self, algo_name, algo)
Definition: precedence.py:172
def _buildFilePath(filePath)
Definition: precedence.py:8