forked from JCMoure/RVCAT
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexec_graph.py
More file actions
207 lines (161 loc) · 6.92 KB
/
Copy pathexec_graph.py
File metadata and controls
207 lines (161 loc) · 6.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
from math import inf
def old_priority(isps: dict[int,list]) -> dict[tuple[int,int]]:
_ais = [inf] # assigned instructions
_aps = [-1] # assigned ports
nis = len(isps) # number of instructions
isps_is = sorted(isps.keys()) # instructions indexes
def dfs(n=0, ais=[], aps=[]):
if n == nis:
nonlocal _ais, _aps
if len(ais) >= len(_ais) and sum(ais) < sum(_ais):
_ais = ais
_aps = aps
return
i = isps_is[n]
assigned = False
for p in isps[i]:
if p not in aps:
assigned = True
dfs(n+1, ais+[i], aps+[p])
if not assigned:
dfs(n+1, ais, aps)
dfs()
return dict(zip(_ais, _aps))
def generate_execution_graph (program, N, window_size, DepEdges):
ExecGraph = []
static_instructions = program.n
# First Node contains a fake arch on dispatch node and retire node,
# that will be removed after inserting values
for i in range(0,N): # each dynamic instruction executed, except first one
# Insert node corresponding to dispatch stage of current instruction
DispatchEdges = [ [(i-1)*3 , 0] ] # Dispatch node depends on dispatch of previous instruction
if i >= window_size : # Dependence on Retire node corresponding to finite ROB (Window) size
DispatchEdges.append( [(i-window_size)*3+2 , 0] )
ExecGraph.append( DispatchEdges) # Add Dispatch node
# Insert node corresponding to execution stage of current instruction
ExecuteEdges = [ [i*3, 1] ] # depends on dispatch of current instruction, with latency = 1
for dep_offsets in DepEdges[ i % static_instructions]:
j = i - dep_offsets
if j >= 0: # dependence on dynamic instruction
ExecuteEdges.append( [j*3+1, 0] ) # depends on execution stage of prev. instr. (latency annotated later)
ExecGraph.append( ExecuteEdges ) # Add Execution node
# Insert node corresponding to retire stage of current instruction
RetireEdges = [ [i*3+1, 1] ] # depends on execution stage of current instr., lat=1 + measured execution latency (later)
RetireEdges.append([(i-1)*3+2, 0]) # depends on retire of previous instruction
ExecGraph.append( RetireEdges ) # Add Retire node
return ExecGraph
def get_iteration_idx( program, index ):
instr_idx = index // 3
static_idx= instr_idx % program.n
iteration = instr_idx // program.n
return iteration, static_idx
def get_node_arch ( program, index, cost ):
iteration, idx = get_iteration_idx(program, index)
name = f" --({cost})--> [{iteration},{idx}]"
if index%3 == 0:
name += "Disp"
elif index%3 == 1:
name += "eXec"
else:
name += "Retr"
return name
def longest_path (ExecGraph):
N = len(ExecGraph)
dist = [-10**9 for i in range(N)] # initialize distances to all vertices as infinite
path = [ [] for i in range(N)] # initialize longest path to all vertices as EMPTY
dist[N-1] = 0
path[N-1] = [[N-1, 1]]
for u in range(N-1,0,-1):
for node in ExecGraph[u]:
if dist[node[0]] < dist[u] + node[1]:
dist[node[0]] = dist[u] + node[1]
new_path = path[u] + [node]
path[node[0]] = new_path
return path[0]
def get_list_of_edges (list):
str = ""
for node in list:
str += get_node_arch(node[0], node[1]) + " ; "
return str
def print_path (path):
path_str = f"CRITICAL Path:\n "
count = 5
for node in path:
path_str += get_node_arch(node[0], node[1]) + " "
count -= 1
if count == 0:
count = 5
path_str += "\n "
print(path_str+"\n")
def critical_path_statistics (program, path) -> str:
N = program.n
total_lat = 0
histogram = [0 for i in range(N)]
decode_lat= 0
retire_lat= 0
for node in path:
stage = node[0] % 3
iteration, idx = get_iteration_idx(program, node[0])
if stage == 1:
histogram[idx] += node[1]
elif stage == 0:
decode_lat += node[1]
else:
retire_lat += node[1]
total_lat += node[1]
out = ""
for i in range(N):
out += f" Instr. {i:2}: {100*histogram[i]/total_lat:0.2f}%\n"
out += f" DISPATCH : {100*decode_lat/total_lat:0.2f}%\n"
out += f" RETIRE : {100*retire_lat/total_lat:0.2f}%\n"
return out
def critical_path_statistics_json (program, path) -> str:
N = program.n
total_lat = 0
histogram = [0 for i in range(N)]
decode_lat= 0
retire_lat= 0
for node in path:
stage = node[0] % 3
iteration, idx = get_iteration_idx(program, node[0])
if stage == 1:
histogram[idx] += node[1]
elif stage == 0:
decode_lat += node[1]
else:
retire_lat += node[1]
total_lat += node[1]
out = {'instructions': []}
for i in range(N):
out['instructions'].append({'id': i,
'instruction': program.instructions[i][1].HLdescrp,
'percentage': 100*histogram[i]/total_lat})
out['dispatch'] = 100*decode_lat/total_lat
out['retire'] = 100*retire_lat/total_lat
return out
def print_graph ( program, ExecGraph ):
n_times_3 = len( ExecGraph )
n = n_times_3 // 3
for i in range(n):
static_idx= i % program.n
iteration = i // program.n
out = f"[{iteration},{static_idx}]:\n"
out += f" Dispatch= {get_list_of_edges(ExecGraph[i*3])}\n"
out += f" Execute = {get_list_of_edges(ExecGraph[i*3+1])}\n"
out += f" Retire = {get_list_of_edges(ExecGraph[i*3+2])}\n"
print(out)
def exec_graph_update(ExecGraph, dynamic_idx, disp_latency, exec_latency, ret_latency):
# IMPORTANT: in execution graph, dispatch and retire latencies are set to a maximum of 1 cycle
# since we do not want this to be an explanation of performance
if disp_latency > 1:
disp_latency = 1
if ret_latency > 1:
ret_latency = 1
ExecGraph[dynamic_idx*3+0][0][1] = disp_latency # update dispatch latency on node
for dep_node in ExecGraph[dynamic_idx*3+1][1:]: # update execution latencies of dependent instructions
dep_node[1] = ExecGraph[dep_node[0]+1][0][1] - 1
ExecGraph[dynamic_idx*3+2][0][1] += exec_latency # update execution latency of retire node
ExecGraph[dynamic_idx*3+2][1][1] = ret_latency # update retire latency on node
if dynamic_idx == 0:
ExecGraph[0].pop(-1) # remove last arch from first dispatch node
ExecGraph[2].pop(-1) # remove last arch from first retire node