From c50662e1e81289c44b4da4107944221328bb5f1c Mon Sep 17 00:00:00 2001 From: nsfisis Date: Sun, 13 Jul 2025 15:37:47 +0900 Subject: fix: utf-8 decoding --- BUGS | 9 +-------- phpunit.xml | 4 ++++ src/WebAssembly/BinaryFormat/Decoder.php | 31 ++----------------------------- 3 files changed, 7 insertions(+), 37 deletions(-) diff --git a/BUGS b/BUGS index 3e88d14..ff37515 100644 --- a/BUGS +++ b/BUGS @@ -2,14 +2,7 @@ ## Validation +* AlignTest * ImportsTest * LinkingTest - -## Misc. - -* AlignTest -* CustomTest * SkipStackGuardPageTest -* Utf8CustomSectionIdTest -* Utf8ImportFieldTest -* Utf8ImportModuleTest diff --git a/phpunit.xml b/phpunit.xml index a6c6ac1..2e62c6f 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -29,6 +29,7 @@ tests/src/SpecTestsuites/Core/CallTest.php tests/src/SpecTestsuites/Core/ConstTest.php tests/src/SpecTestsuites/Core/ConversionsTest.php + tests/src/SpecTestsuites/Core/CustomTest.php tests/src/SpecTestsuites/Core/DataTest.php tests/src/SpecTestsuites/Core/ElemTest.php tests/src/SpecTestsuites/Core/EndiannessTest.php @@ -97,6 +98,9 @@ tests/src/SpecTestsuites/Core/UnreachedInvalidTest.php tests/src/SpecTestsuites/Core/UnreachedValidTest.php tests/src/SpecTestsuites/Core/UnwindTest.php + tests/src/SpecTestsuites/Core/Utf8CustomSectionIdTest.php + tests/src/SpecTestsuites/Core/Utf8ImportFieldTest.php + tests/src/SpecTestsuites/Core/Utf8ImportModuleTest.php tests/src/SpecTestsuites/Core/Utf8InvalidEncodingTest.php diff --git a/src/WebAssembly/BinaryFormat/Decoder.php b/src/WebAssembly/BinaryFormat/Decoder.php index f0a5c29..ff23a5d 100644 --- a/src/WebAssembly/BinaryFormat/Decoder.php +++ b/src/WebAssembly/BinaryFormat/Decoder.php @@ -1119,35 +1119,8 @@ final class Decoder */ private function implodeUtf8BytesToString(array $bytes): ?string { - $s = ''; - $count = count($bytes); - for ($i = 0; $i < $count; $i++) { - if (($bytes[$i] & 0x80) === 0) { - $code = $bytes[$i]; - } elseif (($bytes[$i] & 0xE0) === 0xC0) { - if ($count <= $i + 1) { - return null; - } - $code = (($bytes[$i] & 0x1F) << 6) | ($bytes[$i + 1] & 0x3F); - $i++; - } elseif (($bytes[$i] & 0xF0) === 0xE0) { - if ($count <= $i + 2) { - return null; - } - $code = (($bytes[$i] & 0x0F) << 12) | (($bytes[$i + 1] & 0x3F) << 6) | ($bytes[$i + 2] & 0x3F); - $i += 2; - } elseif (($bytes[$i] & 0xF8) === 0xF0) { - if ($count <= $i + 3) { - return null; - } - $code = (($bytes[$i] & 0x07) << 18) | (($bytes[$i + 1] & 0x3F) << 12) | (($bytes[$i + 2] & 0x3F) << 6) | ($bytes[$i + 3] & 0x3F); - $i += 3; - } else { - return null; - } - $s .= mb_chr($code, 'UTF-8'); - } - return $s; + $s = pack('C*', ...$bytes); + return mb_check_encoding($s, 'UTF-8') ? $s : null; } /** -- cgit v1.2.3-70-g09d2