aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2024-09-14 23:52:40 +0900
committernsfisis <nsfisis@gmail.com>2024-09-15 00:02:09 +0900
commit396377f7d8a15a85524fce4937acc83236236636 (patch)
tree8844fcc690693512b7e50a8a53109f6d5cf196f7
parent0cf9106fdfc1c1ea32a72b5eb7a6fc58cff22fe9 (diff)
downloadmncore-challenge-396377f7d8a15a85524fce4937acc83236236636.tar.gz
mncore-challenge-396377f7d8a15a85524fce4937acc83236236636.tar.zst
mncore-challenge-396377f7d8a15a85524fce4937acc83236236636.zip
11-Square-Sum
-rw-r--r--NOTE.md25
-rw-r--r--problems/11-Square-Sum/05.vsm4
-rw-r--r--problems/11-Square-Sum/06.vsm12
-rw-r--r--problems/11-Square-Sum/07.vsm11
4 files changed, 37 insertions, 15 deletions
diff --git a/NOTE.md b/NOTE.md
index 49126bb..7571e91 100644
--- a/NOTE.md
+++ b/NOTE.md
@@ -7,22 +7,22 @@ M = 自分の点
| 問題 | m | M | m-M |
|-----------------|-----|-----|-----|
-| Mul 7 | 80 | 80 | 0 |
-| Abs | 94 | 68 | 26 |
-| FAM 8 | 90 | 90 | 0 |
-| FMul 2 | 100 | 85 | 15 |
+| Mul 7 | 88 | 70 | 18 |
+| Abs | 100 | 68 | 32 |
+| FAM 8 | 85 | 85 | 0 |
+| FMul 2 | 100 | 100 | 0 |
| Lesseq | 100 | 100 | 0 |
| Transpose | 85 | 67 | 18 |
| Gather | 100 | 100 | 0 |
-| Square Sum | 100 | 93 | 7 |
+| Square Sum | 100 | 100 | 0 |
| Convert Endian | 100 | 100 | 0 |
-| Mod 3 | 70 | 48 | 22 |
+| Mod 3 | 85 | 48 | 37 |
| Matrix Square | 100 | 92 | 8 |
-| Contains | 25 | 17 | 8 |
-| Count Up | 64 | 33 | 31 |
-| Transpose MAB | 69 | 51 | 18 |
-| Inversion Small | 55 | 28 | 27 |
-| Inversion | 52 | 19 | 33 |
+| Contains | 82 | 17 | 65 |
+| Count Up | 81 | 33 | 48 |
+| Transpose MAB | 57 | 49 | 8 |
+| Inversion Small | 67 | 27 | 40 |
+| Inversion | 81 | 19 | 62 |
# 短縮テクニックメモ
@@ -32,7 +32,10 @@ M = 自分の点
* 特に、ALU と MAU を同時に動かす
* 許される限り精度を落とす
* flat アドレス指定を使う
+ * データ競合回避のためにアクセスを遅らせる (`$lr[128,128,128,0]` のようなパターン。必要なのは `$lr0` だけ)
* PE に broadcast されている場合
* `j` 指定を使う
* `$t` を間接参照以外にも使う
* 2倍は `packbit`
+* 入力と出力のスループットは絶対
+* MAB/PE 等の出し分けに `$t` を使わずマスクを使う
diff --git a/problems/11-Square-Sum/05.vsm b/problems/11-Square-Sum/05.vsm
index 4da9e64..7d05005 100644
--- a/problems/11-Square-Sum/05.vsm
+++ b/problems/11-Square-Sum/05.vsm
@@ -1,16 +1,12 @@
lpackbit $mabid $ln0 $t # t = 2 x mab
nop
-
dvmulu $lmt0v32 $lmt0v32 $nowrite
dvfmad $lmt0v32 $lmt0v32 $mauf $nowrite
dvmulu $lmt128v32 $lmt128v32 $nowrite; l1bmrdfadd $mauf $lbi
dvfmad $lmt128v32 $lmt128v32 $mauf $nowrite
-
l1bmm $lbi $lm256v; l1bmrdfadd $mauf $lbi
l1bmm $lbi $lm264v
-
nop
nop
-
l1bmrdfadd $lmt256v $lbi
l1bmm $lbi $ln0v
diff --git a/problems/11-Square-Sum/06.vsm b/problems/11-Square-Sum/06.vsm
new file mode 100644
index 0000000..cef4384
--- /dev/null
+++ b/problems/11-Square-Sum/06.vsm
@@ -0,0 +1,12 @@
+lpackbit $mabid $ln0 $t # t = 2 x mab
+nop
+dvmulu $lmt0v32 $lmt0v32 $nowrite
+dvfmad $lmt0v32 $lmt0v32 $mauf $nowrite
+dvfmau $lmt128v32 $lmt128v32 $mauf $nowrite
+dvfmad $lmt128v32 $lmt128v32 $mauf $nowrite
+l1bmrdfadd $mauf $lbi
+l1bmm $lbi $lm256v
+nop
+nop
+l1bmrdfadd $lmt256v $lbi
+l1bmm $lbi $ln0v
diff --git a/problems/11-Square-Sum/07.vsm b/problems/11-Square-Sum/07.vsm
new file mode 100644
index 0000000..1cc0719
--- /dev/null
+++ b/problems/11-Square-Sum/07.vsm
@@ -0,0 +1,11 @@
+lpackbit $mabid $ln0 $t
+imm i"1" $s3v2
+dvmulu $lmt0v32 $lmt0v32 $nowrite ; iinc $aluf $s5v2
+dvfmad $lmt0v32 $lmt0v32 $mauf $nowrite ; iinc $aluf $s7v2
+dvfmau $lmt128v32 $lmt128v32 $mauf $nowrite
+dvfmad $lmt128v32 $lmt128v32 $mauf $nowrite; lxor $mabid $ls0v $omr1
+l1bmrdfadd $mauf $lbi
+l1bmm $lbi $lr0/$imr1
+nop
+l1bmrdfadd $lr[256,256,256,0] $lbi
+l1bmm $lbi $ln0