use anyhow::Result; use regex::Regex; use std::path::Path; /// File extensions considered PHP source files for class scanning. const PHP_EXTENSIONS: &[&str] = &["php", "inc", "hh"]; /// Check if a file path has a PHP-like extension. fn is_php_file(path: &Path) -> bool { is_php_ext(path) } /// Public version of the PHP extension check, used by the autoload scanner. pub fn is_php_ext(path: &Path) -> bool { path.extension() .and_then(|e| e.to_str()) .map(|ext| PHP_EXTENSIONS.iter().any(|&e| ext.eq_ignore_ascii_case(e))) .unwrap_or(false) } /// Scan a PHP file and return the list of fully-qualified class names declared in it. /// /// Returns an empty vec if the file has no relevant extension or no class declarations. pub fn find_classes(path: &Path) -> Result> { if !is_php_file(path) { return Ok(vec![]); } let contents = std::fs::read_to_string(path)?; // Quick check: does the file even contain a class-like keyword? let quick_re = Regex::new(r"(?i)\b(?:class|interface|trait|enum)\s").unwrap(); if !quick_re.is_match(&contents) { return Ok(vec![]); } let cleaned = clean_php_content(&contents); Ok(extract_declarations(&cleaned)) } /// State machine that strips strings, comments, and heredocs/nowdocs from PHP code. /// /// Returns a string of equal byte length where non-PHP content is replaced with spaces /// so that regex offsets are preserved. Only PHP mode content is kept; everything else /// is blanked out. fn clean_php_content(contents: &str) -> String { let bytes = contents.as_bytes(); let len = bytes.len(); let mut out = vec![b' '; len]; let mut i = 0; let mut in_php = false; while i < len { if !in_php { // Look for `` if i + 1 < len && bytes[i] == b'?' && bytes[i + 1] == b'>' { in_php = false; i += 2; continue; } // Line comment: // or # if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' { // Skip to end of line while i < len && bytes[i] != b'\n' { i += 1; } continue; } if bytes[i] == b'#' { while i < len && bytes[i] != b'\n' { i += 1; } continue; } // Block comment: /* ... */ if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' { i += 2; while i + 1 < len { if bytes[i] == b'*' && bytes[i + 1] == b'/' { i += 2; break; } i += 1; } continue; } // Single-quoted string if bytes[i] == b'\'' { out[i] = b'\''; i += 1; while i < len { if bytes[i] == b'\\' && i + 1 < len { // escaped character — blank both i += 2; } else if bytes[i] == b'\'' { out[i] = b'\''; i += 1; break; } else { i += 1; } } continue; } // Double-quoted string if bytes[i] == b'"' { out[i] = b'"'; i += 1; while i < len { if bytes[i] == b'\\' && i + 1 < len { i += 2; } else if bytes[i] == b'"' { out[i] = b'"'; i += 1; break; } else { i += 1; } } continue; } // Heredoc / Nowdoc: <<< if i + 2 < len && bytes[i] == b'<' && bytes[i + 1] == b'<' && bytes[i + 2] == b'<' { i += 3; // Skip whitespace while i < len && (bytes[i] == b' ' || bytes[i] == b'\t') { i += 1; } // Nowdoc uses single quotes around label; heredoc may use double quotes. let is_nowdoc = i < len && bytes[i] == b'\''; // Skip optional opening quote (single for nowdoc, double for heredoc) if i < len && (bytes[i] == b'\'' || bytes[i] == b'"') { i += 1; } // Read label let label_start = i; while i < len && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') { i += 1; } let label = std::str::from_utf8(&bytes[label_start..i]) .unwrap_or("") .to_string(); // Skip closing quote of label (must match the opening quote) let expected_close = if is_nowdoc { b'\'' } else { b'"' }; if i < len && bytes[i] == expected_close { i += 1; } // Skip to end of line while i < len && bytes[i] != b'\n' { i += 1; } if i < len { i += 1; // consume newline } // Scan for the terminator label on its own line if !label.is_empty() { loop { if i >= len { break; } // Check if current line starts with the label let line_start = i; // Skip optional whitespace for indented heredoc (PHP 7.3+) while i < len && (bytes[i] == b' ' || bytes[i] == b'\t') { i += 1; } let remaining = &bytes[i..]; let label_bytes = label.as_bytes(); if remaining.len() >= label_bytes.len() && &remaining[..label_bytes.len()] == label_bytes { let after = i + label_bytes.len(); // Terminator must be followed by ; or newline or EOF if after >= len || bytes[after] == b';' || bytes[after] == b'\n' || bytes[after] == b'\r' { // Skip to end of this line i = after; while i < len && bytes[i] != b'\n' { i += 1; } if i < len { i += 1; } break; } } // Not a terminator line — skip to end of line i = line_start; while i < len && bytes[i] != b'\n' { i += 1; } if i < len { i += 1; } } } continue; } // Backtick strings (shell exec) if bytes[i] == b'`' { out[i] = b'`'; i += 1; while i < len { if bytes[i] == b'\\' && i + 1 < len { i += 2; } else if bytes[i] == b'`' { out[i] = b'`'; i += 1; break; } else { i += 1; } } continue; } // Keep normal PHP content out[i] = bytes[i]; i += 1; } String::from_utf8_lossy(&out).into_owned() } /// Extract fully-qualified class names from cleaned PHP content. /// /// Tracks the current namespace and finds class/interface/trait/enum declarations. fn extract_declarations(cleaned: &str) -> Vec { let mut results = Vec::new(); // Regex for namespace declarations: // namespace Foo\Bar; — simple // namespace Foo\Bar { — block // namespace { — global block let ns_re = Regex::new( r"(?x) \bnamespace\s+ ((?:[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*\\)*[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*) \s*[;{] | \bnamespace\s*\{ ", ) .unwrap(); // Regex for class/interface/trait/enum declarations. // We need to capture the name; anonymous classes (new class ...) are excluded. let decl_re = Regex::new( r"(?x) \b(?:abstract\s+|final\s+|readonly\s+)* (?Pclass|interface|trait|enum)\s+ (?P[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*) ", ) .unwrap(); let mut current_ns = String::new(); // We process namespace changes as we walk through the file. // Build a list of all namespace and declaration positions. #[derive(Debug)] enum Event { Namespace(usize, String), // position, namespace Declaration(usize, String), // position, simple name } let mut events: Vec = Vec::new(); // Find namespace declarations for cap in ns_re.captures_iter(cleaned) { let pos = cap.get(0).unwrap().start(); let ns_name = cap .get(1) .map(|m| m.as_str().to_string()) .unwrap_or_default(); events.push(Event::Namespace(pos, ns_name)); } // Find class/interface/trait/enum declarations for cap in decl_re.captures_iter(cleaned) { let pos = cap.get(0).unwrap().start(); let name = cap.name("name").unwrap().as_str().to_string(); // Skip anonymous classes: check if "new" precedes "class" on the same "expression". // A reliable check: look back for "new " before this match. let before = &cleaned[..pos]; let kind = cap.name("kind").unwrap().as_str(); if kind == "class" { // Check if "new" appears right before (with possible whitespace/modifiers). // Simple heuristic: scan backwards for non-whitespace token. let trimmed = before.trim_end(); if trimmed.ends_with("new") { continue; } } events.push(Event::Declaration(pos, name)); } // Sort all events by position events.sort_by_key(|e| match e { Event::Namespace(pos, _) => *pos, Event::Declaration(pos, _) => *pos, }); // Process events in order for event in events { match event { Event::Namespace(_, ns) => { current_ns = ns; } Event::Declaration(_, name) => { let fqn = if current_ns.is_empty() { name } else { format!("{}\\{}", current_ns, name) }; results.push(fqn); } } } results } /// Validate that a class file is correctly placed according to PSR-4. /// /// - `class`: fully-qualified class name (e.g. `Foo\Bar\Baz`) /// - `base_namespace`: the PSR-4 namespace prefix (e.g. `Foo\Bar\`) /// - `file_path`: absolute path to the PHP file /// - `base_path`: the directory mapped to `base_namespace` (absolute) /// /// Returns `true` if the file path matches the PSR-4 mapping. pub fn validate_psr4_class( class: &str, base_namespace: &str, file_path: &str, base_path: &str, ) -> bool { // Normalize the base namespace: ensure it ends with `\` let base_ns = if base_namespace.is_empty() || base_namespace.ends_with('\\') { base_namespace.to_string() } else { format!("{base_namespace}\\") }; // Class must start with the base namespace if !class.starts_with(&*base_ns) { return false; } // The relative class name after the base namespace let relative_class = &class[base_ns.len()..]; // Convert relative class to a relative file path: replace `\` with `/` let expected_relative = relative_class.replace('\\', "/"); let expected_file = format!( "{}/{}.php", base_path.trim_end_matches('/'), expected_relative ); // Normalize both paths for comparison (simplistic: just compare strings) Path::new(file_path) == Path::new(&expected_file) } /// Validate that a class file is correctly placed according to PSR-0. /// /// - `class`: fully-qualified class name (e.g. `Foo_Bar_Baz` or `Foo\Bar`) /// - `file_path`: absolute path to the PHP file /// - `base_path`: the base directory for PSR-0 lookup /// /// Returns `true` if the file path matches the PSR-0 mapping. pub fn validate_psr0_class(class: &str, file_path: &str, base_path: &str) -> bool { // PSR-0: namespace separators AND underscores (in class part) map to directory separators. // Split on `\` first; the last segment may contain underscores that also become `/`. let parts: Vec<&str> = class.split('\\').collect(); let relative = if parts.len() == 1 { // No namespace: underscores in class name become dir separators parts[0].replace('_', "/") } else { let ns_part = parts[..parts.len() - 1].join("/"); let class_part = parts[parts.len() - 1].replace('_', "/"); format!("{}/{}", ns_part, class_part) }; let expected_file = format!("{}/{}.php", base_path.trim_end_matches('/'), relative); Path::new(file_path) == Path::new(&expected_file) } // ───────────────────────────────────────────────────────────────────────────── // Tests // ───────────────────────────────────────────────────────────────────────────── #[cfg(test)] mod tests { use super::*; use std::io::Write; use tempfile::NamedTempFile; fn write_php(content: &str) -> NamedTempFile { let mut f = NamedTempFile::with_suffix(".php").unwrap(); f.write_all(content.as_bytes()).unwrap(); f } // ------------------------------------------------------------------------- // find_classes tests // ------------------------------------------------------------------------- #[test] fn test_find_classes_simple_class() { let f = write_php("