aboutsummaryrefslogtreecommitdiffhomepage
path: root/problems/19-Inversion/03.vsm
blob: 3b9c805f4312794b0adedfc3f824802d63279341 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# j = $l2bid x 8 + $l1bid
# $n128 = j
# $n256 = A[j]
# $s256 = 1
# $s272 = 0
# $ls[0:128] = k (from 0 to 63)
# $ls[128:256] = j >= k
# $lr[0:128] = A[k]
# $lr[0:128] = A[j] < A[k]
# $lr[0:128] = (j >= k) and (A[j] < A[k])

# 64.times { |i| puts "imm i\"#{i}\" $s#{2*i}v/1100 # $s#{2*i} = $s#{2*i+1} = #{i}" }
# imm i"0" $s0v/1100 # $s0 = $s1 = 0
imm i"1" $ls2/1000 # $s2 = $s3 = 1
imm i"2" $ls4/1000 # $s4 = $s5 = 2
imm i"3" $ls6/1000 # $s6 = $s7 = 3
imm i"4" $ls8/1000 # $s8 = $s9 = 4
iadd $aluf $ls2v $ls10v
iadd $aluf $ls8 $ls18v
iadd $aluf $ls8 $ls26v
iadd $aluf $ls8 $ls34v
iadd $aluf $ls8 $ls42v
iadd $aluf $ls8 $ls50v
iadd $aluf $ls8 $ls58v
iadd $aluf $ls8 $ls66v
iadd $aluf $ls8 $ls74v
iadd $aluf $ls8 $ls82v
iadd $aluf $ls8 $ls90v
iadd $aluf $ls8 $ls98v
iadd $aluf $ls8 $ls106v
iadd $aluf $ls8 $ls114v
iadd $aluf $ls8 $ls122v/1110

# d getf $ls0n0c0b0m0p0 64
# quit

ilsl $l2bid $s6 $nowrite
iadd $l1bid $aluf $n128v

# (128/16).times { |i| puts "lpassa $llm#{i*16}v $llr#{i*16}v" }
lpassa $llm0v $llr0v
lpassa $llm16v $llr16v
lpassa $llm32v $llr32v
lpassa $llm48v $llr48v
lpackbit $ln128 $ls272 $t
lpassa $llm64v $llr64v
iinc $lmt0 $ln256
lpassa $llm80v $llr80v
lpassa $llm96v $llr96v
lpassa $llm112v $llr112v

# (128/8).times { |i| puts "isub $ln128 $ls#{i*8}v $omr1"; puts "isub $lr#{i*8}v $ln256 $omr1/$imr1"; puts "ipassa/$imr1 $ls2 $lr#{i*8}v" }
# (128/8).times { |i| puts "l1bmd $lr#{i*8}v $lb#{i*256}" }
# (4096/64).times { |i| puts "l2bmriiadd $lb#{i*64} $lc#{i*64}" }
isub $ln128 $ls0v $omr1
isub $lr0v $ln256 $omr1/$imr1
ipassa/$imr1 $ls2 $lr0v
isub $ln128 $ls8v $omr1;        l1bmd $aluf $lb0
isub $lr8v $ln256 $omr1/$imr1
ipassa/$imr1 $ls2 $lr8v
isub $ln128 $ls16v $omr1;        l1bmd $aluf $lb256;    l2bmriiadd $lb0 $lc0
isub $lr16v $ln256 $omr1/$imr1;                         l2bmriiadd $lb64 $lc64
ipassa/$imr1 $ls2 $lr16v;                               l2bmriiadd $lb128 $lc128
isub $ln128 $ls24v $omr1;        l1bmd $aluf $lb512;    l2bmriiadd $lb192 $lc192
isub $lr24v $ln256 $omr1/$imr1;                         l2bmriiadd $lb256 $lc256
ipassa/$imr1 $ls2 $lr24v;                               l2bmriiadd $lb320 $lc320
isub $ln128 $ls32v $omr1;        l1bmd $aluf $lb768;    l2bmriiadd $lb384 $lc384
isub $lr32v $ln256 $omr1/$imr1;                         l2bmriiadd $lb448 $lc448
ipassa/$imr1 $ls2 $lr32v;                               l2bmriiadd $lb512 $lc512
isub $ln128 $ls40v $omr1;        l1bmd $aluf $lb1024;   l2bmriiadd $lb576 $lc576
isub $lr40v $ln256 $omr1/$imr1;                         l2bmriiadd $lb640 $lc640
ipassa/$imr1 $ls2 $lr40v;                               l2bmriiadd $lb704 $lc704
isub $ln128 $ls48v $omr1;        l1bmd $aluf $lb1280;   l2bmriiadd $lb768 $lc768
isub $lr48v $ln256 $omr1/$imr1;                         l2bmriiadd $lb832 $lc832
ipassa/$imr1 $ls2 $lr48v;                               l2bmriiadd $lb896 $lc896
isub $ln128 $ls56v $omr1;        l1bmd $aluf $lb1536;   l2bmriiadd $lb960 $lc960
isub $lr56v $ln256 $omr1/$imr1;                         l2bmriiadd $lb1024 $lc1024
ipassa/$imr1 $ls2 $lr56v;                               l2bmriiadd $lb1088 $lc1088
isub $ln128 $ls64v $omr1;        l1bmd $aluf $lb1792;   l2bmriiadd $lb1152 $lc1152
isub $lr64v $ln256 $omr1/$imr1;                         l2bmriiadd $lb1216 $lc1216
ipassa/$imr1 $ls2 $lr64v;                               l2bmriiadd $lb1280 $lc1280
isub $ln128 $ls72v $omr1;        l1bmd $aluf $lb2048;   l2bmriiadd $lb1344 $lc1344
isub $lr72v $ln256 $omr1/$imr1;                         l2bmriiadd $lb1408 $lc1408
ipassa/$imr1 $ls2 $lr72v;                               l2bmriiadd $lb1472 $lc1472
isub $ln128 $ls80v $omr1;        l1bmd $aluf $lb2304;   l2bmriiadd $lb1536 $lc1536
isub $lr80v $ln256 $omr1/$imr1;                         l2bmriiadd $lb1600 $lc1600
ipassa/$imr1 $ls2 $lr80v;                               l2bmriiadd $lb1664 $lc1664
isub $ln128 $ls88v $omr1;        l1bmd $aluf $lb2560;   l2bmriiadd $lb1728 $lc1728
isub $lr88v $ln256 $omr1/$imr1;                         l2bmriiadd $lb1792 $lc1792
ipassa/$imr1 $ls2 $lr88v;                               l2bmriiadd $lb1856 $lc1856
isub $ln128 $ls96v $omr1;        l1bmd $aluf $lb2816;   l2bmriiadd $lb1920 $lc1920
isub $lr96v $ln256 $omr1/$imr1;                         l2bmriiadd $lb1984 $lc1984
ipassa/$imr1 $ls2 $lr96v;                               l2bmriiadd $lb2048 $lc2048
isub $ln128 $ls104v $omr1;        l1bmd $aluf $lb3072;  l2bmriiadd $lb2112 $lc2112
isub $lr104v $ln256 $omr1/$imr1;                        l2bmriiadd $lb2176 $lc2176
ipassa/$imr1 $ls2 $lr104v;                              l2bmriiadd $lb2240 $lc2240
isub $ln128 $ls112v $omr1;        l1bmd $aluf $lb3328;  l2bmriiadd $lb2304 $lc2304
isub $lr112v $ln256 $omr1/$imr1;                        l2bmriiadd $lb2368 $lc2368
ipassa/$imr1 $ls2 $lr112v;                              l2bmriiadd $lb2432 $lc2432
isub $ln128 $ls120v $omr1;        l1bmd $aluf $lb3584;  l2bmriiadd $lb2496 $lc2496
isub $lr120v $ln256 $omr1/$imr1;                        l2bmriiadd $lb2560 $lc2560
ipassa/$imr1 $ls2 $lr120v;                              l2bmriiadd $lb2624 $lc2624
l1bmd $aluf $lb3840;                                    l2bmriiadd $lb2688 $lc2688

l2bmriiadd $lb2688 $lc2688
l2bmriiadd $lb2752 $lc2752
l2bmriiadd $lb2816 $lc2816
l2bmriiadd $lb2880 $lc2880
l2bmriiadd $lb2944 $lc2944
l2bmriiadd $lb3008 $lc3008
l2bmriiadd $lb3072 $lc3072
l2bmriiadd $lb3136 $lc3136
l2bmriiadd $lb3200 $lc3200
l2bmriiadd $lb3264 $lc3264
l2bmriiadd $lb3328 $lc3328
l2bmriiadd $lb3392 $lc3392
l2bmriiadd $lb3456 $lc3456
l2bmriiadd $lb3520 $lc3520
l2bmriiadd $lb3584 $lc3584
l2bmriiadd $lb3648 $lc3648
l2bmriiadd $lb3712 $lc3712
l2bmriiadd $lb3776 $lc3776
l2bmriiadd $lb3840 $lc3840
l2bmriiadd $lb3904 $lc3904
l2bmriiadd $lb3968 $lc3968
l2bmriiadd $lb4032 $lc4032

nop

mvriiadd/n4096 $lc0 $p0@0
mvb/n4096 $p0@0 $lc0

nop
nop

# (4096/64).times { |i| puts "l2bmb $lc#{i*64} $lb#{i*64}" }
# (128/8).times { |i| puts "l1bmd $lb#{i*256} $lm#{i*8}v" }
l2bmb $lc0 $lb0
l2bmb $lc64 $lb64
l2bmb $lc128 $lb128
l2bmb $lc192 $lb192
l2bmb $lc256 $lb256
l2bmb $lc320 $lb320; l1bmd $lb0 $lm0v
l2bmb $lc384 $lb384
l2bmb $lc448 $lb448
l2bmb $lc512 $lb512
l2bmb $lc576 $lb576; l1bmd $lb256 $lm8v
l2bmb $lc640 $lb640
l2bmb $lc704 $lb704
l2bmb $lc768 $lb768
l2bmb $lc832 $lb832; l1bmd $lb512 $lm16v
l2bmb $lc896 $lb896
l2bmb $lc960 $lb960
l2bmb $lc1024 $lb1024
l2bmb $lc1088 $lb1088; l1bmd $lb768 $lm24v
l2bmb $lc1152 $lb1152
l2bmb $lc1216 $lb1216
l2bmb $lc1280 $lb1280
l2bmb $lc1344 $lb1344; l1bmd $lb1024 $lm32v
l2bmb $lc1408 $lb1408
l2bmb $lc1472 $lb1472
l2bmb $lc1536 $lb1536
l2bmb $lc1600 $lb1600; l1bmd $lb1280 $lm40v
l2bmb $lc1664 $lb1664
l2bmb $lc1728 $lb1728
l2bmb $lc1792 $lb1792
l2bmb $lc1856 $lb1856; l1bmd $lb1536 $lm48v
l2bmb $lc1920 $lb1920
l2bmb $lc1984 $lb1984
l2bmb $lc2048 $lb2048
l2bmb $lc2112 $lb2112; l1bmd $lb1792 $lm56v
l2bmb $lc2176 $lb2176
l2bmb $lc2240 $lb2240
l2bmb $lc2304 $lb2304
l2bmb $lc2368 $lb2368; l1bmd $lb2048 $lm64v
l2bmb $lc2432 $lb2432
l2bmb $lc2496 $lb2496
l2bmb $lc2560 $lb2560
l2bmb $lc2624 $lb2624; l1bmd $lb2304 $lm72v
l2bmb $lc2688 $lb2688
l2bmb $lc2752 $lb2752
l2bmb $lc2816 $lb2816
l2bmb $lc2880 $lb2880; l1bmd $lb2560 $lm80v
l2bmb $lc2944 $lb2944
l2bmb $lc3008 $lb3008
l2bmb $lc3072 $lb3072
l2bmb $lc3136 $lb3136; l1bmd $lb2816 $lm88v
l2bmb $lc3200 $lb3200
l2bmb $lc3264 $lb3264
l2bmb $lc3328 $lb3328
l2bmb $lc3392 $lb3392; l1bmd $lb3072 $lm96v
l2bmb $lc3456 $lb3456
l2bmb $lc3520 $lb3520
l2bmb $lc3584 $lb3584
l2bmb $lc3648 $lb3648; l1bmd $lb3328 $lm104v
l2bmb $lc3712 $lb3712
l2bmb $lc3776 $lb3776
l2bmb $lc3840 $lb3840
l2bmb $lc3904 $lb3904; l1bmd $lb3584 $lm112v
l2bmb $lc3968 $lb3968
l2bmb $lc4032 $lb4032
nop
l1bmd $lb3840 $lm120v

nop
nop

l1bmd $lmt0 $lb0

nop
nop

l2bmriiadd $lb0 $lc0
mvriiadd/n64 $lc0 $p0@0
mvb/n64 $p0@0 $lc0

nop
nop
nop

l2bmb $lc0 $lb0

nop
nop

l1bmd $lb0 $ln0v