aboutsummaryrefslogtreecommitdiffhomepage
path: root/crates/mozart-core/src/vcs/util
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2026-05-10 00:32:08 +0900
committernsfisis <nsfisis@gmail.com>2026-05-10 00:32:08 +0900
commit8cc1ba8a02c0318b65658f1634de378c780392b9 (patch)
treefdd5cb61e488018891a486b25991b87c84220bb8 /crates/mozart-core/src/vcs/util
parent72b2e877c01e67ba7edd37e34ac2eadb7a1c62c4 (diff)
downloadphp-mozart-8cc1ba8a02c0318b65658f1634de378c780392b9.tar.gz
php-mozart-8cc1ba8a02c0318b65658f1634de378c780392b9.tar.zst
php-mozart-8cc1ba8a02c0318b65658f1634de378c780392b9.zip
refactor(workspace): consolidate crates into mozart-core
Merged mozart-archiver, mozart-autoload, mozart-registry, mozart-sat-resolver, and mozart-vcs into mozart-core to align the source layout with Composer's structure. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'crates/mozart-core/src/vcs/util')
-rw-r--r--crates/mozart-core/src/vcs/util/git.rs312
-rw-r--r--crates/mozart-core/src/vcs/util/hg.rs28
-rw-r--r--crates/mozart-core/src/vcs/util/mod.rs3
-rw-r--r--crates/mozart-core/src/vcs/util/svn.rs89
4 files changed, 432 insertions, 0 deletions
diff --git a/crates/mozart-core/src/vcs/util/git.rs b/crates/mozart-core/src/vcs/util/git.rs
new file mode 100644
index 0000000..15bfa09
--- /dev/null
+++ b/crates/mozart-core/src/vcs/util/git.rs
@@ -0,0 +1,312 @@
+use super::super::process::{ProcessExecutor, ProcessOutput};
+use anyhow::{Result, bail};
+use regex::Regex;
+use std::path::{Path, PathBuf};
+use std::sync::LazyLock;
+
+/// Modern GitHub token pattern (40+ hex chars, `ghp_…`, `github_pat_…`).
+///
+/// Mirrors `Composer\Util\GitHub::GITHUB_TOKEN_REGEX`.
+static GITHUB_TOKEN_RE: LazyLock<Regex> = LazyLock::new(|| {
+ Regex::new(r"^([a-fA-F0-9]{12,}|gh[a-zA-Z]_[a-zA-Z0-9_]+|github_pat_[a-zA-Z0-9_]+)$").unwrap()
+});
+
+/// `[?&]access_token=...` query parameter.
+static ACCESS_TOKEN_RE: LazyLock<Regex> =
+ LazyLock::new(|| Regex::new(r"([&?]access_token=)[^&]+").unwrap());
+
+/// `<scheme>://user:password@` credential block.
+static CREDENTIALS_RE: LazyLock<Regex> = LazyLock::new(|| {
+ Regex::new(r"(?i)(?P<prefix>[a-z0-9]+://)?(?P<user>[^:/\s@]+):(?P<password>[^@\s/]+)@").unwrap()
+});
+
+/// Git utility for mirror management and protocol fallback.
+///
+/// Corresponds to Composer's `Util\Git`.
+pub struct GitUtil {
+ process: ProcessExecutor,
+ cache_dir: PathBuf,
+}
+
+impl GitUtil {
+ pub fn new(process: ProcessExecutor, cache_dir: PathBuf) -> Self {
+ Self { process, cache_dir }
+ }
+
+ /// Returns environment variable overrides to clean Git state.
+ /// Removes `GIT_DIR`, `GIT_WORK_TREE`, `GIT_INDEX_FILE` to avoid
+ /// interference from the calling process's Git context.
+ pub fn clean_env() -> Vec<(&'static str, Option<&'static str>)> {
+ vec![
+ ("GIT_DIR", None),
+ ("GIT_WORK_TREE", None),
+ ("GIT_INDEX_FILE", None),
+ ("GIT_TERMINAL_PROMPT", Some("0")),
+ ]
+ }
+
+ /// Synchronize a bare mirror in the cache directory.
+ ///
+ /// On first call, clones a bare mirror. On subsequent calls, updates it.
+ /// Returns the path to the mirror directory.
+ pub fn sync_mirror(&self, url: &str) -> Result<PathBuf> {
+ let mirror_dir = self.mirror_path(url);
+
+ if mirror_dir.join("HEAD").exists() {
+ // Update existing mirror
+ self.run_command(
+ &["git", "remote", "set-url", "origin", "--", url],
+ url,
+ Some(&mirror_dir),
+ )?;
+ self.run_command(
+ &["git", "remote", "update", "--prune", "origin"],
+ url,
+ Some(&mirror_dir),
+ )?;
+ } else {
+ // Create new mirror
+ std::fs::create_dir_all(&mirror_dir)?;
+ self.run_command(
+ &[
+ "git",
+ "clone",
+ "--mirror",
+ "--",
+ url,
+ mirror_dir.to_str().unwrap_or(""),
+ ],
+ url,
+ None,
+ )?;
+ }
+
+ Ok(mirror_dir)
+ }
+
+ /// Fetch a specific refspec from the mirror.
+ pub fn fetch_ref(&self, mirror_dir: &Path, refspec: &str) -> Result<bool> {
+ let output = self
+ .process
+ .execute(&["git", "fetch", "origin", refspec], Some(mirror_dir))?;
+ Ok(output.status == 0)
+ }
+
+ /// Get the default branch of a repository.
+ pub fn get_default_branch(&self, mirror_dir: &Path) -> Result<Option<String>> {
+ let output = self
+ .process
+ .execute(&["git", "remote", "show", "origin"], Some(mirror_dir))?;
+ if output.status != 0 {
+ return Ok(None);
+ }
+ for line in output.stdout.lines() {
+ let trimmed = line.trim();
+ if let Some(branch) = trimmed.strip_prefix("HEAD branch:") {
+ let branch = branch.trim();
+ if branch != "(unknown)" {
+ return Ok(Some(branch.to_string()));
+ }
+ }
+ }
+ Ok(None)
+ }
+
+ /// Execute a git command with protocol fallback.
+ ///
+ /// Tries the URL as-is first, then falls back through protocol variations
+ /// (ssh → https → git://) if the command fails.
+ pub fn run_command(
+ &self,
+ args: &[&str],
+ url: &str,
+ cwd: Option<&Path>,
+ ) -> Result<ProcessOutput> {
+ let mut executor = ProcessExecutor::new();
+ for (key, value) in Self::clean_env() {
+ match value {
+ Some(v) => executor.set_env(key, v),
+ None => executor.remove_env(key),
+ }
+ }
+
+ // Try the command as-is first
+ let output = executor.execute(args, cwd)?;
+ if output.status == 0 {
+ return Ok(output);
+ }
+
+ // Try protocol fallback for remote URLs
+ let fallback_urls = Self::get_fallback_urls(url);
+ for fallback_url in &fallback_urls {
+ let new_args: Vec<&str> = args
+ .iter()
+ .map(|&a| if a == url { fallback_url.as_str() } else { a })
+ .collect();
+ let fallback_output = executor.execute(&new_args, cwd)?;
+ if fallback_output.status == 0 {
+ return Ok(fallback_output);
+ }
+ }
+
+ // Return the original error
+ if output.status != 0 {
+ bail!(
+ "Git command `{}` failed with exit code {}\nstdout: {}\nstderr: {}",
+ args.join(" "),
+ output.status,
+ output.stdout.trim(),
+ output.stderr.trim(),
+ );
+ }
+ Ok(output)
+ }
+
+ /// Get the Git version string.
+ pub fn get_version(&self) -> Option<String> {
+ let output = self.process.execute(&["git", "--version"], None).ok()?;
+ if output.status != 0 {
+ return None;
+ }
+ // "git version 2.39.2" -> "2.39.2"
+ output
+ .stdout
+ .trim()
+ .strip_prefix("git version ")
+ .map(|s| s.to_string())
+ }
+
+ /// Sanitize a URL for use as a cache directory name.
+ ///
+ /// Mirrors Composer's `Preg::replace('{[^a-z0-9.]}i', '-', Url::sanitize($url))`
+ /// pattern (see `GitDriver::initialize` and `GitDownloader`): credentials and
+ /// access tokens are first redacted, then every byte outside `[a-zA-Z0-9.]`
+ /// is replaced with `-`. The redaction step keeps cache keys stable across
+ /// URLs that differ only in their embedded token.
+ pub fn sanitize_url(url: &str) -> String {
+ let redacted = sanitize_url_credentials(url);
+ redacted
+ .chars()
+ .map(|c| {
+ if c.is_ascii_alphanumeric() || c == '.' {
+ c
+ } else {
+ '-'
+ }
+ })
+ .collect()
+ }
+
+ /// Get the cache mirror path for a URL.
+ pub fn mirror_path(&self, url: &str) -> PathBuf {
+ self.cache_dir.join(Self::sanitize_url(url))
+ }
+
+ /// Generate fallback URLs for protocol switching.
+ fn get_fallback_urls(url: &str) -> Vec<String> {
+ let mut urls = Vec::new();
+
+ // ssh -> https fallback
+ if url.starts_with("git@") {
+ // git@github.com:owner/repo.git -> https://github.com/owner/repo.git
+ if let Some(rest) = url.strip_prefix("git@") {
+ let converted = rest.replacen(':', "/", 1);
+ urls.push(format!("https://{converted}"));
+ }
+ }
+
+ // git:// -> https:// fallback
+ if let Some(rest) = url.strip_prefix("git://") {
+ urls.push(format!("https://{rest}"));
+ }
+
+ // https -> git:// fallback
+ if let Some(rest) = url.strip_prefix("https://") {
+ urls.push(format!("git://{rest}"));
+ }
+
+ urls
+ }
+}
+
+/// Redact credentials and access tokens from `url`.
+///
+/// Mirrors Composer's `Util\Url::sanitize`. Two replacements are applied:
+/// 1. `[?&]access_token=…` query values → `***`
+/// 2. `<scheme>://user:password@` credentials → `***:***@` if `user` looks like
+/// a GitHub token, otherwise just `user:***@`
+fn sanitize_url_credentials(url: &str) -> String {
+ let url = ACCESS_TOKEN_RE.replace_all(url, "${1}***");
+ CREDENTIALS_RE
+ .replace_all(&url, |caps: &regex::Captures<'_>| {
+ let prefix = caps.name("prefix").map(|m| m.as_str()).unwrap_or("");
+ let user = &caps["user"];
+ if GITHUB_TOKEN_RE.is_match(user) {
+ format!("{prefix}***:***@")
+ } else {
+ format!("{prefix}{user}:***@")
+ }
+ })
+ .into_owned()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn sanitize_url_replaces_special_chars_with_dash() {
+ assert_eq!(
+ GitUtil::sanitize_url("https://github.com/owner/repo.git"),
+ "https---github.com-owner-repo.git"
+ );
+ }
+
+ #[test]
+ fn sanitize_url_preserves_dot() {
+ // Dot must survive — it appears in hostnames and ".git" suffixes.
+ let key = GitUtil::sanitize_url("git://example.org/foo.bar/baz.git");
+ assert!(key.contains(".org"));
+ assert!(key.ends_with(".git"));
+ }
+
+ #[test]
+ fn sanitize_url_redacts_password_in_credentials() {
+ let key = GitUtil::sanitize_url("https://alice:s3cret@example.com/repo.git");
+ // Password is replaced with ***, then non-alphanumerics become '-'.
+ assert!(key.contains("alice"));
+ assert!(!key.contains("s3cret"));
+ }
+
+ #[test]
+ fn sanitize_url_redacts_user_when_looks_like_github_token() {
+ // 40-hex token in the user position triggers full redaction.
+ let token = "abcdef0123456789abcdef0123456789abcdef01";
+ let key = GitUtil::sanitize_url(&format!("https://{token}:x-oauth-basic@github.com/o/r"));
+ assert!(!key.contains("abcdef"));
+ }
+
+ #[test]
+ fn sanitize_url_redacts_modern_github_pat() {
+ // ghp_xxx and github_pat_xxx forms.
+ let key1 = GitUtil::sanitize_url("https://ghp_abc123XYZ:x@github.com/o/r");
+ assert!(!key1.contains("ghp_"));
+ let key2 = GitUtil::sanitize_url("https://github_pat_abc123:x@github.com/o/r");
+ assert!(!key2.contains("github_pat_"));
+ }
+
+ #[test]
+ fn sanitize_url_strips_access_token_query() {
+ let key = GitUtil::sanitize_url("https://api.github.com/x?access_token=secrettoken");
+ assert!(!key.contains("secrettoken"));
+ }
+
+ #[test]
+ fn sanitize_url_token_variants_share_cache_key() {
+ // Two pulls of the same repo with different access tokens should land
+ // in the same cache subdirectory.
+ let a = GitUtil::sanitize_url("https://api.github.com/repo?access_token=tokenA");
+ let b = GitUtil::sanitize_url("https://api.github.com/repo?access_token=tokenB");
+ assert_eq!(a, b);
+ }
+}
diff --git a/crates/mozart-core/src/vcs/util/hg.rs b/crates/mozart-core/src/vcs/util/hg.rs
new file mode 100644
index 0000000..73051b7
--- /dev/null
+++ b/crates/mozart-core/src/vcs/util/hg.rs
@@ -0,0 +1,28 @@
+use super::super::process::{ProcessExecutor, ProcessOutput};
+use anyhow::Result;
+use std::path::Path;
+
+/// Mercurial utility for command execution.
+pub struct HgUtil {
+ process: ProcessExecutor,
+}
+
+impl HgUtil {
+ pub fn new(process: ProcessExecutor) -> Self {
+ Self { process }
+ }
+
+ /// Execute a Mercurial command.
+ pub fn execute(&self, args: &[&str], cwd: Option<&Path>) -> Result<ProcessOutput> {
+ let mut full_args = vec!["hg"];
+ full_args.extend_from_slice(args);
+ self.process.execute_checked(&full_args, cwd)
+ }
+
+ /// Execute a Mercurial command, not erroring on non-zero exit.
+ pub fn execute_unchecked(&self, args: &[&str], cwd: Option<&Path>) -> Result<ProcessOutput> {
+ let mut full_args = vec!["hg"];
+ full_args.extend_from_slice(args);
+ self.process.execute(&full_args, cwd)
+ }
+}
diff --git a/crates/mozart-core/src/vcs/util/mod.rs b/crates/mozart-core/src/vcs/util/mod.rs
new file mode 100644
index 0000000..b2c35fc
--- /dev/null
+++ b/crates/mozart-core/src/vcs/util/mod.rs
@@ -0,0 +1,3 @@
+pub mod git;
+pub mod hg;
+pub mod svn;
diff --git a/crates/mozart-core/src/vcs/util/svn.rs b/crates/mozart-core/src/vcs/util/svn.rs
new file mode 100644
index 0000000..d989fc8
--- /dev/null
+++ b/crates/mozart-core/src/vcs/util/svn.rs
@@ -0,0 +1,89 @@
+use super::super::process::{ProcessExecutor, ProcessOutput};
+use anyhow::Result;
+use std::path::Path;
+
+/// SVN credentials for authenticated operations.
+#[derive(Debug, Clone)]
+pub struct SvnCredentials {
+ pub username: String,
+ pub password: String,
+}
+
+/// SVN utility for command execution with credential handling.
+pub struct SvnUtil {
+ process: ProcessExecutor,
+}
+
+impl SvnUtil {
+ pub fn new(process: ProcessExecutor) -> Self {
+ Self { process }
+ }
+
+ /// Execute an SVN command with `--non-interactive`.
+ pub fn execute(&self, args: &[&str], cwd: Option<&Path>) -> Result<ProcessOutput> {
+ let mut full_args = vec!["svn"];
+ full_args.extend_from_slice(args);
+ full_args.push("--non-interactive");
+ self.process.execute_checked(&full_args, cwd)
+ }
+
+ /// Execute an SVN command with optional credentials, retrying on auth failure.
+ pub fn execute_with_credentials(
+ &self,
+ args: &[&str],
+ creds: Option<&SvnCredentials>,
+ cwd: Option<&Path>,
+ ) -> Result<ProcessOutput> {
+ let mut full_args = vec!["svn"];
+ full_args.extend_from_slice(args);
+ full_args.push("--non-interactive");
+
+ let cred_args: Vec<String>;
+ if let Some(c) = creds {
+ cred_args = vec![
+ "--username".to_string(),
+ c.username.clone(),
+ "--password".to_string(),
+ c.password.clone(),
+ ];
+ for arg in &cred_args {
+ full_args.push(arg);
+ }
+ }
+
+ let full_args_refs: Vec<&str> = full_args.iter().map(|s| &**s).collect();
+
+ // Retry up to 5 times on auth failure
+ let max_retries = 5;
+ let mut last_output = None;
+ for _ in 0..max_retries {
+ let output = self.process.execute(&full_args_refs, cwd)?;
+ if output.status == 0 {
+ return Ok(output);
+ }
+ // Check if it's an auth error (SVN exit code or stderr hint)
+ if !output.stderr.contains("authorization failed")
+ && !output.stderr.contains("Could not authenticate")
+ && !output.stderr.contains("Authentication failed")
+ {
+ // Not an auth error, return immediately
+ last_output = Some(output);
+ break;
+ }
+ last_output = Some(output);
+ }
+
+ match last_output {
+ Some(output) if output.status != 0 => {
+ anyhow::bail!(
+ "SVN command `{}` failed with exit code {}\nstderr: {}",
+ full_args_refs.join(" "),
+ output.status,
+ output.stderr.trim(),
+ );
+ }
+ Some(output) => Ok(output),
+ None => anyhow::bail!("SVN command failed with no output"),
+ }
+ }
+}