The Gaudi Framework  v29r0 (ff2e7097)
graphanalysis.py
Go to the documentation of this file.
1 from pygraph.classes.digraph import digraph
2 from pygraph.algorithms.cycles import find_cycle
3 from pygraph.algorithms.accessibility import connected_components
4 from pygraph.algorithms.critical import critical_path
5 
6 
7 def read_graph_from_json(filename):
8  data = open(filename).read()
9  workflow = eval(data)
10  gr = digraph()
11  # create all nodes
12  for algo in workflow["algorithms"]:
13  # to differentiate algos from equally named products
14  name = algo["name"] + "_algo"
15  gr.add_nodes([name])
16  for input in algo["inputs"]:
17  if input == "":
18  input = "dummy"
19  if not gr.has_node(input):
20  gr.add_nodes([input])
21  if not gr.has_edge((input, name)):
22  gr.add_edge((input, name), wt=0)
23  for output in algo["outputs"]:
24  if output == "":
25  output = "dummy"
26  if not gr.has_node(output):
27  gr.add_nodes([output])
28  if not gr.has_edge((name, output)):
29  # the output edge gets the weigth corresponding to the runtime
30  gr.add_edge((name, output), wt=algo["runtimes_wall"][0] * 100)
31  return gr
32 
33 
35  has_loop = True
36  n_cycles = 0
37  while has_loop:
38  cycle = find_cycle(gr)
39  if len(cycle) == 0:
40  has_loop = False
41  continue
42  n_cycles += 1
43  print cycle
44  print "Removed loop by deleting edge (%s,%s)" % (cycle[-1], cycle[0])
45  gr.del_edge((cycle[-1], cycle[0]))
46  print "\nIN TOTAL %i CYCLES\n" % (n_cycles)
47  return n_cycles > 0 # whether it needed to fix cycles
48 
49 
51  cc = connected_components(gr)
52  cc_size = {}
53  for i in cc.values():
54  cc_size[i] = 0
55  for k, v in cc.iteritems():
56  cc_size[v] = cc_size[v] + 1
57  print "Connected components have the following size:"
58 # for k,v in cc_size.iteritems():
59 # print "%i : %i" %(k,v)
60  print "NUMBER OF CONNECTED COMPONENTS: %i" % (len(cc_size.keys()))
61 
62 
64  total_time = 0
65  critical_time = 0
66  cp = critical_path(gr)
67  # calculate total time
68  for edge in gr.edges():
69  total_time += gr.edge_weight(edge)
70 
71  # calculate time of critical path
72  edges = [tuple(cp[i:i + 2]) for i in range(0, len(cp))]
73  for edge in edges:
74  critical_time += gr.edge_weight(edge)
75 
76  print "Total time : %s" % total_time
77  print "Critical path: %s" % critical_time
78  print "POSSIBLE SPEEDUP: %s" % (total_time / critical_time)
79 
80 
81 def print_graph_to_json(gr, filename):
82  algorithms = {} # still make it known to workflow
83  known_names = set()
84  for edge in gr.edges():
85  if edge[0].endswith("_algo"):
86  algoname = edge[0].rstrip("_algo")
87  product = edge[1]
88  reading = False
89  else:
90  algoname = edge[1].rstrip("_algo")
91  product = edge[0]
92  reading = True
93 
94  if algoname not in known_names:
95  algorithms[algoname] = {"name": algoname,
96  "inputs": [],
97  "outputs": [],
98  "runtimes": [1000], # TODO dummy
99  "runtimes_wall": [1000] # TODO dummy
100  }
101  known_names.add(algoname)
102  if reading:
103  algorithms[algoname]["inputs"].append(product)
104  else:
105  algorithms[algoname]["outputs"].append(product)
106  algorithms[algoname]["runtimes_wall"] = [
107  gr.edge_weight(edge) / 100, ]
108  out = open(filename, "w")
109  algorithm_list = [item for item in algorithms.values()]
110  workflow = {"algorithms": algorithm_list}
111  out.write(workflow.__repr__())
112  out.close()
113 
114 
115 ##########################
116 if __name__ == "__main__":
117  filename = "Athena.json"
118  gr = read_graph_from_json(filename)
119 
120  # let's analysis and fix for loops:
121  had_to_fix_cycles = analyze_and_fix_cycles(gr)
122  # write file with fixed graph
123  if had_to_fix_cycles:
124  print_graph_to_json(gr, filename.replace(".json", "_loopfixed.json"))
125 
126  # see how many disconnected components are there
128 
129  # let's check the minimum time required
130  # and print achievable speedup
def analyze_connected_componets(gr)
def print_graph_to_json(gr, filename)
def analyze_critical_path(gr)
def read(f, regex='.*', skipevents=0)
Definition: hivetimeline.py:22
def analyze_and_fix_cycles(gr)
decltype(auto) range(Args &&...args)
Zips multiple containers together to form a single range.
def read_graph_from_json(filename)
Definition: graphanalysis.py:7