blob: ae38b4168c8315e49a80e2b94fe5670ebff300ad (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
imm i"4" $s0/1000
imm i"0b111" $s1/1000
imm i"0b1110" $s2/1000
imm i"6" $s3/1000
# + l1bid x 16
ilsl $l1bid $s0 $nowrite
iadd $aluf $s128v $s128v
# + (peid & 0b111) x 2
iand $peid $s1 $nowrite
ipackbit $aluf $n0 $nowrite
iadd $aluf $s128v $s128v
# + (mabid & 0b1110) x 64
iand $mabid $s2 $nowrite
ilsl $aluf $s3 $nowrite
iadd $aluf $s128v $s128v
ipassa $aluf $s132v
# $ls128v = offset (i32 x 8)
imm i"4096" $ls256v
imm i"1" $r0/1000
imm i"2" $r1/1000
imm i"1025" $r2/1000
imm i"1026" $r3/1000
imm i"2049" $r4/1000
imm i"2050" $r5/1000
imm i"3073" $r6/1000
imm i"3074" $r7/1000
# (4096/256/2).times { |i| puts "iadd $aluf $ls256v $lr#{8*i}v" }
iadd $lr0v $ls128v $lr0v
iadd $aluf $ls256v $lr8v
iadd $aluf $ls256v $lr16v
iadd $aluf $ls256v $lr24v
iadd $aluf $ls256v $lr32v
iadd $aluf $ls256v $lr40v
iadd $aluf $ls256v $lr48v
iadd $aluf $ls256v $lr56v
# 1 LW / pe / cycle
# 64 LW / cycle
# 256 LW / step
# (4096/256/2).times { |i| puts "l1bmd $lr#{8*i}v $lb#{256*i}" }
l1bmd $lr0v $lb0
l1bmd $lr8v $lb256
l1bmd $lr16v $lb512
l1bmd $lr24v $lb768
l1bmd $lr32v $lb1024
l1bmd $lr40v $lb1280
l1bmd $lr48v $lb1536
l1bmd $lr56v $lb1792
# 8 LW / l1b / cycle
# 64 LW / cycle
# 256 LW / step
# (32768/256/2).times { |i| puts "l2bmd $lb#{32*i} $lc#{256*i}" }
l2bmd $lb0 $lc0
l2bmd $lb32 $lc256
l2bmd $lb64 $lc512
l2bmd $lb96 $lc768
l2bmd $lb128 $lc1024
l2bmd $lb160 $lc1280
l2bmd $lb192 $lc1536
l2bmd $lb224 $lc1792
l2bmd $lb256 $lc2048
l2bmd $lb288 $lc2304
l2bmd $lb320 $lc2560
l2bmd $lb352 $lc2816
l2bmd $lb384 $lc3072
l2bmd $lb416 $lc3328
l2bmd $lb448 $lc3584
l2bmd $lb480 $lc3840
l2bmd $lb512 $lc4096
l2bmd $lb544 $lc4352
l2bmd $lb576 $lc4608
l2bmd $lb608 $lc4864
l2bmd $lb640 $lc5120
l2bmd $lb672 $lc5376
l2bmd $lb704 $lc5632
l2bmd $lb736 $lc5888
l2bmd $lb768 $lc6144
l2bmd $lb800 $lc6400
l2bmd $lb832 $lc6656
l2bmd $lb864 $lc6912
l2bmd $lb896 $lc7168
l2bmd $lb928 $lc7424
l2bmd $lb960 $lc7680
l2bmd $lb992 $lc7936
l2bmd $lb1024 $lc8192
l2bmd $lb1056 $lc8448
l2bmd $lb1088 $lc8704
l2bmd $lb1120 $lc8960
l2bmd $lb1152 $lc9216
l2bmd $lb1184 $lc9472
l2bmd $lb1216 $lc9728
l2bmd $lb1248 $lc9984
l2bmd $lb1280 $lc10240
l2bmd $lb1312 $lc10496
l2bmd $lb1344 $lc10752
l2bmd $lb1376 $lc11008
l2bmd $lb1408 $lc11264
l2bmd $lb1440 $lc11520
l2bmd $lb1472 $lc11776
l2bmd $lb1504 $lc12032
l2bmd $lb1536 $lc12288
l2bmd $lb1568 $lc12544
l2bmd $lb1600 $lc12800
l2bmd $lb1632 $lc13056
l2bmd $lb1664 $lc13312
l2bmd $lb1696 $lc13568
l2bmd $lb1728 $lc13824
l2bmd $lb1760 $lc14080
l2bmd $lb1792 $lc14336
l2bmd $lb1824 $lc14592
l2bmd $lb1856 $lc14848
l2bmd $lb1888 $lc15104
l2bmd $lb1920 $lc15360
l2bmd $lb1952 $lc15616
l2bmd $lb1984 $lc15872
l2bmd $lb2016 $lc16128
nop
# 16 LW / cycle
# 64 LW / step
# 2^15 W = 2^14 LW
mvp/n16384 $lc0@.0 $d0
# L1B 0 = 1 ... 16 129 ... 144 257 ... 272 385 ... 400
# L1B 1 = 17 ... 32 145 ... 160 273 ... 288 401 ... 416
# L1B 2 = 33 ... 48 161 ... 176 289 ... 304 417 ... 432
# L1B 3 = 49 ... 64 177 ... 192 305 ... 320 433 ... 448
# L1B 4 = 65 ... 80 193 ... 208 321 ... 336 449 ... 464
# L1B 5 = 81 ... 96 209 ... 224 337 ... 352 465 ... 480
# L1B 6 = 97 ... 112 225 ... 240 353 ... 368 481 ... 496
# L1B 7 = 113 ... 128 241 ... 256 369 ... 384 497 ... 512
#
# L1B 0
# PE 0.0 (0) = 1 2 | 1 + ($peid & 0b111)<<1
# PE 0.1 (1) = 3 4 |
# PE 0.2 (2) = 5 6 |
# PE 0.3 (3) = 7 8 |
# PE 1.0 (4) = 9 10 |
# PE 1.1 (5) = 11 12 |
# PE 1.2 (6) = 13 14 |
# PE 1.3 (7) = 15 16 |
# PE 2.0 (0) = 129 130 | 1 + ($peid & 0b111)<<1 + ($mabid & 0b10)<<6
# PE 2.1 (1) = 131 132 |
# PE 2.2 (2) = 133 134 |
# PE 2.3 (3) = 135 136 |
# PE 3.0 (4) = 137 138 |
# PE 3.1 (5) = 139 140 |
# PE 3.2 (6) = 141 142 |
# PE 3.3 (7) = 143 144 |
# PE 4.0 (0) = 257 258 | 1 + ($peid & 0b111)<<1 + ($mabid & 0b10)<<6
# ...
# PE 5.3 (7) = 271 272 |
# PE 6.0 (0) = 385 386 | 1 + ($peid & 0b111)<<1 + ($mabid & 0b10)<<6
# ...
# PE 7.3 (7) = 399 400 |
|