diff options
| author | nsfisis <nsfisis@gmail.com> | 2024-09-14 23:52:40 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2024-09-15 00:02:09 +0900 |
| commit | 396377f7d8a15a85524fce4937acc83236236636 (patch) | |
| tree | 8844fcc690693512b7e50a8a53109f6d5cf196f7 | |
| parent | 0cf9106fdfc1c1ea32a72b5eb7a6fc58cff22fe9 (diff) | |
| download | mncore-challenge-396377f7d8a15a85524fce4937acc83236236636.tar.gz mncore-challenge-396377f7d8a15a85524fce4937acc83236236636.tar.zst mncore-challenge-396377f7d8a15a85524fce4937acc83236236636.zip | |
11-Square-Sum
| -rw-r--r-- | NOTE.md | 25 | ||||
| -rw-r--r-- | problems/11-Square-Sum/05.vsm | 4 | ||||
| -rw-r--r-- | problems/11-Square-Sum/06.vsm | 12 | ||||
| -rw-r--r-- | problems/11-Square-Sum/07.vsm | 11 |
4 files changed, 37 insertions, 15 deletions
@@ -7,22 +7,22 @@ M = 自分の点 | 問題 | m | M | m-M | |-----------------|-----|-----|-----| -| Mul 7 | 80 | 80 | 0 | -| Abs | 94 | 68 | 26 | -| FAM 8 | 90 | 90 | 0 | -| FMul 2 | 100 | 85 | 15 | +| Mul 7 | 88 | 70 | 18 | +| Abs | 100 | 68 | 32 | +| FAM 8 | 85 | 85 | 0 | +| FMul 2 | 100 | 100 | 0 | | Lesseq | 100 | 100 | 0 | | Transpose | 85 | 67 | 18 | | Gather | 100 | 100 | 0 | -| Square Sum | 100 | 93 | 7 | +| Square Sum | 100 | 100 | 0 | | Convert Endian | 100 | 100 | 0 | -| Mod 3 | 70 | 48 | 22 | +| Mod 3 | 85 | 48 | 37 | | Matrix Square | 100 | 92 | 8 | -| Contains | 25 | 17 | 8 | -| Count Up | 64 | 33 | 31 | -| Transpose MAB | 69 | 51 | 18 | -| Inversion Small | 55 | 28 | 27 | -| Inversion | 52 | 19 | 33 | +| Contains | 82 | 17 | 65 | +| Count Up | 81 | 33 | 48 | +| Transpose MAB | 57 | 49 | 8 | +| Inversion Small | 67 | 27 | 40 | +| Inversion | 81 | 19 | 62 | # 短縮テクニックメモ @@ -32,7 +32,10 @@ M = 自分の点 * 特に、ALU と MAU を同時に動かす * 許される限り精度を落とす * flat アドレス指定を使う + * データ競合回避のためにアクセスを遅らせる (`$lr[128,128,128,0]` のようなパターン。必要なのは `$lr0` だけ) * PE に broadcast されている場合 * `j` 指定を使う * `$t` を間接参照以外にも使う * 2倍は `packbit` +* 入力と出力のスループットは絶対 +* MAB/PE 等の出し分けに `$t` を使わずマスクを使う diff --git a/problems/11-Square-Sum/05.vsm b/problems/11-Square-Sum/05.vsm index 4da9e64..7d05005 100644 --- a/problems/11-Square-Sum/05.vsm +++ b/problems/11-Square-Sum/05.vsm @@ -1,16 +1,12 @@ lpackbit $mabid $ln0 $t # t = 2 x mab nop - dvmulu $lmt0v32 $lmt0v32 $nowrite dvfmad $lmt0v32 $lmt0v32 $mauf $nowrite dvmulu $lmt128v32 $lmt128v32 $nowrite; l1bmrdfadd $mauf $lbi dvfmad $lmt128v32 $lmt128v32 $mauf $nowrite - l1bmm $lbi $lm256v; l1bmrdfadd $mauf $lbi l1bmm $lbi $lm264v - nop nop - l1bmrdfadd $lmt256v $lbi l1bmm $lbi $ln0v diff --git a/problems/11-Square-Sum/06.vsm b/problems/11-Square-Sum/06.vsm new file mode 100644 index 0000000..cef4384 --- /dev/null +++ b/problems/11-Square-Sum/06.vsm @@ -0,0 +1,12 @@ +lpackbit $mabid $ln0 $t # t = 2 x mab +nop +dvmulu $lmt0v32 $lmt0v32 $nowrite +dvfmad $lmt0v32 $lmt0v32 $mauf $nowrite +dvfmau $lmt128v32 $lmt128v32 $mauf $nowrite +dvfmad $lmt128v32 $lmt128v32 $mauf $nowrite +l1bmrdfadd $mauf $lbi +l1bmm $lbi $lm256v +nop +nop +l1bmrdfadd $lmt256v $lbi +l1bmm $lbi $ln0v diff --git a/problems/11-Square-Sum/07.vsm b/problems/11-Square-Sum/07.vsm new file mode 100644 index 0000000..1cc0719 --- /dev/null +++ b/problems/11-Square-Sum/07.vsm @@ -0,0 +1,11 @@ +lpackbit $mabid $ln0 $t +imm i"1" $s3v2 +dvmulu $lmt0v32 $lmt0v32 $nowrite ; iinc $aluf $s5v2 +dvfmad $lmt0v32 $lmt0v32 $mauf $nowrite ; iinc $aluf $s7v2 +dvfmau $lmt128v32 $lmt128v32 $mauf $nowrite +dvfmad $lmt128v32 $lmt128v32 $mauf $nowrite; lxor $mabid $ls0v $omr1 +l1bmrdfadd $mauf $lbi +l1bmm $lbi $lr0/$imr1 +nop +l1bmrdfadd $lr[256,256,256,0] $lbi +l1bmm $lbi $ln0 |
