aboutsummaryrefslogtreecommitdiffhomepage
path: root/crates/shirabe-class-map-generator/src
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2026-05-17 01:52:38 +0900
committernsfisis <nsfisis@gmail.com>2026-05-17 01:52:38 +0900
commit5b2974ac652e816b950b706cb86455bafb1f50b2 (patch)
treeb8d7089b5e8d857a23a2299ea649c8dc077c1339 /crates/shirabe-class-map-generator/src
parente012ab29e69350ff1c4f5e81d3578be951c1a4eb (diff)
downloadphp-shirabe-5b2974ac652e816b950b706cb86455bafb1f50b2.tar.gz
php-shirabe-5b2974ac652e816b950b706cb86455bafb1f50b2.tar.zst
php-shirabe-5b2974ac652e816b950b706cb86455bafb1f50b2.zip
feat(port): port PhpFileCleaner.php
Diffstat (limited to 'crates/shirabe-class-map-generator/src')
-rw-r--r--crates/shirabe-class-map-generator/src/php_file_cleaner.rs265
1 files changed, 265 insertions, 0 deletions
diff --git a/crates/shirabe-class-map-generator/src/php_file_cleaner.rs b/crates/shirabe-class-map-generator/src/php_file_cleaner.rs
new file mode 100644
index 0000000..9f7d781
--- /dev/null
+++ b/crates/shirabe-class-map-generator/src/php_file_cleaner.rs
@@ -0,0 +1,265 @@
+//! ref: composer/vendor/composer/class-map-generator/src/PhpFileCleaner.php
+
+use std::sync::Mutex;
+use indexmap::IndexMap;
+use shirabe_php_shim::preg_quote;
+use shirabe_external_packages::composer::pcre::preg::Preg;
+
+#[derive(Debug, Clone)]
+struct TypeConfigEntry {
+ name: String,
+ length: usize,
+ pattern: String,
+}
+
+static TYPE_CONFIG: Mutex<Option<IndexMap<char, TypeConfigEntry>>> = Mutex::new(None);
+static REST_PATTERN: Mutex<Option<String>> = Mutex::new(None);
+
+#[derive(Debug)]
+pub struct PhpFileCleaner {
+ contents: String,
+ len: usize,
+ max_matches: usize,
+ index: usize,
+}
+
+impl PhpFileCleaner {
+ pub fn set_type_config(types: Vec<String>) {
+ let mut type_config: IndexMap<char, TypeConfigEntry> = IndexMap::new();
+
+ for r#type in &types {
+ let first_char = r#type.chars().next().unwrap();
+ type_config.insert(
+ first_char,
+ TypeConfigEntry {
+ name: r#type.clone(),
+ length: r#type.len(),
+ pattern: format!(
+ "{{.\\b(?<![$:>]){}\\s++[a-zA-Z_\\x7f-\\xff:][a-zA-Z0-9_\\x7f-\\xff:\\-]*+}}Ais",
+ r#type
+ ),
+ },
+ );
+ }
+
+ let keys: String = type_config.keys().collect();
+ let rest_pattern = format!("{{[^?\"'</{}]+}}A", keys);
+
+ *REST_PATTERN.lock().unwrap() = Some(rest_pattern);
+ *TYPE_CONFIG.lock().unwrap() = Some(type_config);
+ }
+
+ pub fn new(contents: String, max_matches: usize) -> Self {
+ let len = contents.len();
+ PhpFileCleaner {
+ contents,
+ len,
+ max_matches,
+ index: 0,
+ }
+ }
+
+ pub fn clean(&mut self) -> String {
+ let mut clean = String::new();
+
+ 'outer: while self.index < self.len {
+ self.skip_to_php();
+ clean.push_str("<?");
+
+ while self.index < self.len {
+ let char = self.contents.as_bytes()[self.index] as char;
+
+ if char == '?' && self.peek('>') {
+ clean.push_str("?>");
+ self.index += 2;
+ continue 'outer;
+ }
+
+ if char == '"' {
+ self.skip_string('"');
+ clean.push_str("null");
+ continue;
+ }
+
+ if char == '\'' {
+ self.skip_string('\'');
+ clean.push_str("null");
+ continue;
+ }
+
+ if char == '<' && self.peek('<') {
+ let mut r#match: Vec<String> = vec![];
+ if self.r#match(
+ r"{<<<[ \t]*+(['\"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*+)\1(?:\r\n|\n|\r)}A",
+ Some(&mut r#match),
+ ) {
+ self.index += r#match[0].len();
+ let delimiter = r#match[2].clone();
+ self.skip_heredoc(&delimiter);
+ clean.push_str("null");
+ continue;
+ }
+ }
+
+ if char == '/' {
+ if self.peek('/') {
+ self.skip_to_newline();
+ continue;
+ }
+
+ if self.peek('*') {
+ self.skip_comment();
+ continue;
+ }
+ }
+
+ if self.max_matches == 1 {
+ let type_entry = {
+ let guard = TYPE_CONFIG.lock().unwrap();
+ guard.as_ref().and_then(|tc| tc.get(&char)).cloned()
+ };
+ if let Some(entry) = type_entry {
+ let end = self.index + entry.length;
+ if end <= self.len
+ && &self.contents[self.index..end] == entry.name
+ {
+ let offset = if self.index > 0 { self.index - 1 } else { 0 };
+ let mut r#match: Vec<String> = vec![];
+ if Preg::is_match_at(
+ &entry.pattern,
+ &self.contents,
+ &mut r#match,
+ 0,
+ offset,
+ ) {
+ return clean + &r#match[0];
+ }
+ }
+ }
+ }
+
+ self.index += 1;
+ let rest_pattern = REST_PATTERN.lock().unwrap().clone();
+ if let Some(rest_pattern) = rest_pattern {
+ let mut r#match: Vec<String> = vec![];
+ if self.r#match(&rest_pattern, Some(&mut r#match)) {
+ clean.push(char);
+ clean.push_str(&r#match[0]);
+ self.index += r#match[0].len();
+ } else {
+ clean.push(char);
+ }
+ } else {
+ clean.push(char);
+ }
+ }
+ }
+
+ clean
+ }
+
+ fn skip_to_php(&mut self) {
+ while self.index < self.len {
+ if self.contents.as_bytes()[self.index] as char == '<' && self.peek('?') {
+ self.index += 2;
+ break;
+ }
+
+ self.index += 1;
+ }
+ }
+
+ fn skip_string(&mut self, delimiter: char) {
+ self.index += 1;
+ while self.index < self.len {
+ let c = self.contents.as_bytes()[self.index] as char;
+ if c == '\\' && (self.peek('\\') || self.peek(delimiter)) {
+ self.index += 2;
+ continue;
+ }
+
+ if c == delimiter {
+ self.index += 1;
+ break;
+ }
+
+ self.index += 1;
+ }
+ }
+
+ fn skip_comment(&mut self) {
+ self.index += 2;
+ while self.index < self.len {
+ if self.contents.as_bytes()[self.index] as char == '*' && self.peek('/') {
+ self.index += 2;
+ break;
+ }
+
+ self.index += 1;
+ }
+ }
+
+ fn skip_to_newline(&mut self) {
+ while self.index < self.len {
+ let c = self.contents.as_bytes()[self.index] as char;
+ if c == '\r' || c == '\n' {
+ return;
+ }
+
+ self.index += 1;
+ }
+ }
+
+ fn skip_heredoc(&mut self, delimiter: &str) {
+ let first_delimiter_char = delimiter.chars().next().unwrap();
+ let delimiter_length = delimiter.len();
+ let delimiter_pattern = format!(
+ "{{{}(?![a-zA-Z0-9_\\x80-\\xff])}}A",
+ preg_quote(delimiter, None)
+ );
+
+ while self.index < self.len {
+ let c = self.contents.as_bytes()[self.index] as char;
+
+ // check if we find the delimiter after some spaces/tabs
+ match c {
+ '\t' | ' ' => {
+ self.index += 1;
+ continue;
+ }
+ _ if c == first_delimiter_char => {
+ let end = self.index + delimiter_length;
+ if end <= self.len
+ && &self.contents[self.index..end] == delimiter
+ && self.r#match(&delimiter_pattern, None)
+ {
+ self.index += delimiter_length;
+ return;
+ }
+ }
+ _ => {}
+ }
+
+ // skip the rest of the line
+ self.skip_to_newline();
+
+ // skip newlines
+ while self.index < self.len {
+ let c = self.contents.as_bytes()[self.index] as char;
+ if c == '\r' || c == '\n' {
+ self.index += 1;
+ } else {
+ break;
+ }
+ }
+ }
+ }
+
+ fn peek(&self, char: char) -> bool {
+ self.index + 1 < self.len && self.contents.as_bytes()[self.index + 1] as char == char
+ }
+
+ fn r#match(&self, regex: &str, r#match: Option<&mut Vec<String>>) -> bool {
+ Preg::is_match_strict_groups_at(regex, &self.contents, r#match, 0, self.index)
+ }
+}