diff options
| author | nsfisis <nsfisis@gmail.com> | 2026-02-21 10:46:26 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2026-02-21 10:46:26 +0900 |
| commit | a03ad0152ec28cdd1cc05f96a9823807dbb2b818 (patch) | |
| tree | 83b4e4aadb98bf53c89237b58845ba1f41fbef68 /crates/mozart/src/downloader.rs | |
| parent | 0b5d333083f1317391338d3aa67b1290e93922cc (diff) | |
| download | php-mozart-a03ad0152ec28cdd1cc05f96a9823807dbb2b818.tar.gz php-mozart-a03ad0152ec28cdd1cc05f96a9823807dbb2b818.tar.zst php-mozart-a03ad0152ec28cdd1cc05f96a9823807dbb2b818.zip | |
feat(core): add version constraint, lockfile, installed registry, and downloader modules
Phase 1 infrastructure for the install command:
- constraint: Composer-compatible version parsing and constraint matching
(caret, tilde, wildcard, hyphen range, OR/AND combinators)
- lockfile: composer.lock read/write with content-hash computation
- installed: vendor/composer/installed.json registry (Composer 2.x format)
- downloader: dist archive download with SHA-1 verification and
zip/tar.gz extraction with top-level directory stripping
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'crates/mozart/src/downloader.rs')
| -rw-r--r-- | crates/mozart/src/downloader.rs | 380 |
1 files changed, 380 insertions, 0 deletions
diff --git a/crates/mozart/src/downloader.rs b/crates/mozart/src/downloader.rs new file mode 100644 index 0000000..b477ab4 --- /dev/null +++ b/crates/mozart/src/downloader.rs @@ -0,0 +1,380 @@ +use sha1::{Digest, Sha1}; +use std::collections::HashSet; +use std::fs; +use std::io::{Cursor, Read}; +use std::path::Path; + +/// Download a dist archive from a URL. +/// Returns the raw bytes of the downloaded archive. +/// If `expected_shasum` is provided and non-empty, verifies SHA-1 of the downloaded bytes. +pub fn download_dist(url: &str, expected_shasum: Option<&str>) -> anyhow::Result<Vec<u8>> { + let response = reqwest::blocking::get(url)?; + + if !response.status().is_success() { + anyhow::bail!( + "Failed to download dist archive from {} (HTTP {})", + url, + response.status() + ); + } + + let bytes = response.bytes()?.to_vec(); + + // Verify SHA-1 checksum if provided + if let Some(shasum) = expected_shasum + && !shasum.is_empty() + { + let mut hasher = Sha1::new(); + hasher.update(&bytes); + let result = hasher.finalize(); + let computed = format!("{result:x}"); + + if computed != shasum { + anyhow::bail!( + "SHA-1 checksum mismatch for {url}: expected {shasum}, got {computed}" + ); + } + } + + Ok(bytes) +} + +/// Find the common top-level directory prefix shared by all entries. +/// Returns `Some(prefix)` if all entries share a single top-level directory. +fn find_top_level_dir(entries: &[String]) -> Option<String> { + if entries.is_empty() { + return None; + } + + let mut prefixes: HashSet<String> = HashSet::new(); + for entry in entries { + if let Some(slash_pos) = entry.find('/') { + prefixes.insert(entry[..slash_pos + 1].to_string()); + } else { + // Entry at root level — no common prefix to strip + return None; + } + } + + if prefixes.len() == 1 { + prefixes.into_iter().next() + } else { + None + } +} + +/// Extract a zip archive to the target directory. +/// Strips a common top-level directory if all entries share one (Packagist pattern). +pub fn extract_zip(data: &[u8], target_dir: &Path) -> anyhow::Result<()> { + let cursor = Cursor::new(data); + let mut archive = zip::ZipArchive::new(cursor)?; + + // Collect all entry names to detect common prefix + let entry_names: Vec<String> = (0..archive.len()) + .map(|i| archive.by_index(i).map(|e| e.name().to_string())) + .collect::<Result<_, _>>()?; + + let prefix = find_top_level_dir(&entry_names); + + for i in 0..archive.len() { + let mut entry = archive.by_index(i)?; + let raw_name = entry.name().to_string(); + + // Strip common prefix + let relative = if let Some(ref pfx) = prefix { + if raw_name.starts_with(pfx.as_str()) { + &raw_name[pfx.len()..] + } else { + &raw_name + } + } else { + &raw_name + }; + + // Skip the directory entry itself (empty name after stripping) + if relative.is_empty() { + continue; + } + + let target_path = target_dir.join(relative); + + if raw_name.ends_with('/') { + // Directory entry + fs::create_dir_all(&target_path)?; + } else { + // File entry + if let Some(parent) = target_path.parent() { + fs::create_dir_all(parent)?; + } + + let mut buf = Vec::new(); + entry.read_to_end(&mut buf)?; + fs::write(&target_path, &buf)?; + + // Set permissions on Unix + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Some(mode) = entry.unix_mode() { + fs::set_permissions(&target_path, fs::Permissions::from_mode(mode))?; + } + } + } + } + + Ok(()) +} + +/// Extract a tar.gz archive to the target directory. +/// Strips a common top-level directory if all entries share one (Packagist pattern). +pub fn extract_tar_gz(data: &[u8], target_dir: &Path) -> anyhow::Result<()> { + let cursor = Cursor::new(data); + let decoder = flate2::read::GzDecoder::new(cursor); + let mut archive = tar::Archive::new(decoder); + + // We need to process in two passes: first collect names, then extract. + // Use a buffered approach: collect entries into memory. + let cursor2 = Cursor::new(data); + let decoder2 = flate2::read::GzDecoder::new(cursor2); + let mut archive2 = tar::Archive::new(decoder2); + + let entry_names: Vec<String> = archive2 + .entries()? + .filter_map(|e| e.ok()) + .filter_map(|e| e.path().ok().map(|p| p.to_string_lossy().to_string())) + .collect(); + + let prefix = find_top_level_dir(&entry_names); + + for entry in archive.entries()? { + let mut entry = entry?; + let raw_path = entry.path()?.to_string_lossy().to_string(); + + // Strip common prefix + let relative = if let Some(ref pfx) = prefix { + if raw_path.starts_with(pfx.as_str()) { + raw_path[pfx.len()..].to_string() + } else { + raw_path.clone() + } + } else { + raw_path.clone() + }; + + // Skip empty (top-level dir itself) + if relative.is_empty() { + continue; + } + + let target_path = target_dir.join(&relative); + + let entry_type = entry.header().entry_type(); + if entry_type.is_dir() { + fs::create_dir_all(&target_path)?; + } else if entry_type.is_file() { + if let Some(parent) = target_path.parent() { + fs::create_dir_all(parent)?; + } + let mut buf = Vec::new(); + entry.read_to_end(&mut buf)?; + fs::write(&target_path, &buf)?; + + // Set permissions on Unix + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Ok(mode) = entry.header().mode() { + fs::set_permissions(&target_path, fs::Permissions::from_mode(mode))?; + } + } + } + // Symlinks and other types are skipped for now + } + + Ok(()) +} + +/// Download and install a package to the vendor directory. +/// +/// - `dist_url`: the download URL (from `LockedPackage.dist.url`) +/// - `dist_type`: `"zip"` or `"tar"` (from `LockedPackage.dist.dist_type`) +/// - `dist_shasum`: optional SHA-1 checksum +/// - `vendor_dir`: path to `vendor/` directory +/// - `package_name`: e.g. `"monolog/monolog"` +pub fn install_package( + dist_url: &str, + dist_type: &str, + dist_shasum: Option<&str>, + vendor_dir: &Path, + package_name: &str, +) -> anyhow::Result<()> { + let target = vendor_dir.join(package_name); + + // Remove existing installation for a clean reinstall + if target.exists() { + fs::remove_dir_all(&target)?; + } + fs::create_dir_all(&target)?; + + let bytes = download_dist(dist_url, dist_shasum)?; + + match dist_type { + "zip" => extract_zip(&bytes, &target)?, + "tar" | "tar.gz" | "tgz" => extract_tar_gz(&bytes, &target)?, + other => anyhow::bail!("Unsupported dist type: {other}"), + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write as IoWrite; + use tempfile::tempdir; + + /// Build a minimal zip archive in memory. + fn make_zip(files: &[(&str, &[u8])]) -> Vec<u8> { + let buf = Vec::new(); + let cursor = Cursor::new(buf); + let mut writer = zip::ZipWriter::new(cursor); + let options = zip::write::FileOptions::<()>::default() + .compression_method(zip::CompressionMethod::Stored); + + for (name, content) in files { + writer.start_file(*name, options).unwrap(); + writer.write_all(content).unwrap(); + } + + writer.finish().unwrap().into_inner() + } + + /// Build a minimal tar.gz archive in memory. + fn make_tar_gz(files: &[(&str, &[u8])]) -> Vec<u8> { + let buf = Vec::new(); + let enc = flate2::write::GzEncoder::new(buf, flate2::Compression::default()); + let mut builder = tar::Builder::new(enc); + + for (name, content) in files { + let mut header = tar::Header::new_gnu(); + header.set_size(content.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + builder + .append_data(&mut header, name, Cursor::new(content)) + .unwrap(); + } + + builder.into_inner().unwrap().finish().unwrap() + } + + #[test] + fn test_extract_zip_flat() { + let zip_data = make_zip(&[("file1.txt", b"hello"), ("subdir/file2.txt", b"world")]); + + let dir = tempdir().unwrap(); + extract_zip(&zip_data, dir.path()).unwrap(); + + assert_eq!( + fs::read_to_string(dir.path().join("file1.txt")).unwrap(), + "hello" + ); + assert_eq!( + fs::read_to_string(dir.path().join("subdir/file2.txt")).unwrap(), + "world" + ); + } + + #[test] + fn test_extract_zip_with_top_level_dir() { + // Packagist pattern: all files under vendor-package-abc123/ + let zip_data = make_zip(&[ + ("vendor-pkg-abc/", &[]), + ("vendor-pkg-abc/file1.txt", b"hello"), + ("vendor-pkg-abc/src/Foo.php", b"<?php"), + ]); + + let dir = tempdir().unwrap(); + extract_zip(&zip_data, dir.path()).unwrap(); + + // Top-level dir should be stripped + assert!(dir.path().join("file1.txt").exists()); + assert!(dir.path().join("src/Foo.php").exists()); + assert_eq!( + fs::read_to_string(dir.path().join("file1.txt")).unwrap(), + "hello" + ); + } + + #[test] + fn test_extract_tar_gz_flat() { + let tar_data = make_tar_gz(&[("file1.txt", b"hello"), ("subdir/file2.txt", b"world")]); + + let dir = tempdir().unwrap(); + extract_tar_gz(&tar_data, dir.path()).unwrap(); + + assert_eq!( + fs::read_to_string(dir.path().join("file1.txt")).unwrap(), + "hello" + ); + assert_eq!( + fs::read_to_string(dir.path().join("subdir/file2.txt")).unwrap(), + "world" + ); + } + + #[test] + fn test_extract_tar_gz_with_top_level_dir() { + let tar_data = make_tar_gz(&[ + ("vendor-pkg-abc/file1.txt", b"hello"), + ("vendor-pkg-abc/src/Foo.php", b"<?php"), + ]); + + let dir = tempdir().unwrap(); + extract_tar_gz(&tar_data, dir.path()).unwrap(); + + assert!(dir.path().join("file1.txt").exists()); + assert!(dir.path().join("src/Foo.php").exists()); + } + + #[test] + fn test_sha1_verification() { + use sha1::{Digest, Sha1}; + + let data = b"test content"; + let mut hasher = Sha1::new(); + hasher.update(data); + let expected = format!("{:x}", hasher.finalize()); + + // We can't test download_dist without a server, but we can verify the + // SHA-1 logic: same data should produce same hash + let mut hasher2 = Sha1::new(); + hasher2.update(data); + let computed = format!("{:x}", hasher2.finalize()); + + assert_eq!(expected, computed); + assert!(!expected.is_empty()); + } + + #[test] + fn test_find_top_level_dir_common() { + let entries = vec![ + "pkg-1.0/".to_string(), + "pkg-1.0/README.md".to_string(), + "pkg-1.0/src/Foo.php".to_string(), + ]; + assert_eq!(find_top_level_dir(&entries), Some("pkg-1.0/".to_string())); + } + + #[test] + fn test_find_top_level_dir_none_when_mixed() { + let entries = vec!["pkg-1.0/file.txt".to_string(), "other/file.txt".to_string()]; + assert_eq!(find_top_level_dir(&entries), None); + } + + #[test] + fn test_find_top_level_dir_none_when_root_file() { + let entries = vec!["file.txt".to_string(), "pkg/other.txt".to_string()]; + assert_eq!(find_top_level_dir(&entries), None); + } +} |
