diff options
| author | nsfisis <nsfisis@gmail.com> | 2026-05-10 00:32:08 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2026-05-10 00:32:08 +0900 |
| commit | 8cc1ba8a02c0318b65658f1634de378c780392b9 (patch) | |
| tree | fdd5cb61e488018891a486b25991b87c84220bb8 /crates/mozart-core/src/repository/downloader.rs | |
| parent | 72b2e877c01e67ba7edd37e34ac2eadb7a1c62c4 (diff) | |
| download | php-mozart-8cc1ba8a02c0318b65658f1634de378c780392b9.tar.gz php-mozart-8cc1ba8a02c0318b65658f1634de378c780392b9.tar.zst php-mozart-8cc1ba8a02c0318b65658f1634de378c780392b9.zip | |
refactor(workspace): consolidate crates into mozart-core
Merged mozart-archiver, mozart-autoload, mozart-registry,
mozart-sat-resolver, and mozart-vcs into mozart-core to align
the source layout with Composer's structure.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'crates/mozart-core/src/repository/downloader.rs')
| -rw-r--r-- | crates/mozart-core/src/repository/downloader.rs | 500 |
1 files changed, 500 insertions, 0 deletions
diff --git a/crates/mozart-core/src/repository/downloader.rs b/crates/mozart-core/src/repository/downloader.rs new file mode 100644 index 0000000..b0d2a6a --- /dev/null +++ b/crates/mozart-core/src/repository/downloader.rs @@ -0,0 +1,500 @@ +use super::cache::Cache; +use indexmap::IndexSet; +use sha1::{Digest, Sha1}; +use std::fs; +use std::io::{Cursor, Read, Write}; +use std::path::Path; + +/// A simple download progress tracker that writes to stderr. +/// +/// When `show` is false, all methods are no-ops. This lets callers toggle +/// progress display without branching on every call. +pub struct DownloadProgress { + show: bool, + total: u64, + downloaded: u64, + label: String, +} + +impl DownloadProgress { + /// Create a new progress tracker. + /// + /// - `show`: whether to actually display anything. + /// - `label`: a human-readable label (e.g. "psr/log (3.0.2)"). + pub fn new(show: bool, label: impl Into<String>) -> Self { + Self { + show, + total: 0, + downloaded: 0, + label: label.into(), + } + } + + /// Set the total expected bytes from a `Content-Length` header. + pub fn set_total(&mut self, total: u64) { + self.total = total; + } + + /// Advance the downloaded byte count and redraw the line. + pub fn inc(&mut self, n: u64) { + if !self.show { + return; + } + self.downloaded += n; + let stderr = std::io::stderr(); + let mut out = stderr.lock(); + if let Some(pct) = (self.downloaded * 100).checked_div(self.total) { + let _ = write!( + out, + "\r Downloading {} ({}/{} bytes, {}%)", + self.label, self.downloaded, self.total, pct + ); + } else { + let _ = write!( + out, + "\r Downloading {} ({} bytes)", + self.label, self.downloaded + ); + } + let _ = out.flush(); + } + + /// Clear the progress line from the terminal. + pub fn finish(&self) { + if !self.show { + return; + } + let stderr = std::io::stderr(); + let mut out = stderr.lock(); + // Clear the line with spaces then return to start + let _ = write!(out, "\r{}\r", " ".repeat(80)); + let _ = out.flush(); + } +} + +/// Download a dist archive from a URL. +/// Returns the raw bytes of the downloaded archive. +/// If `expected_shasum` is provided and non-empty, verifies SHA-1 of the downloaded bytes. +/// If `progress` is provided, increments it as bytes are received and sets the total from +/// the `Content-Length` response header. +/// Downloaded bytes are cached by URL in `files_cache`; cache hits skip the network request +/// entirely. +#[tracing::instrument(skip(expected_shasum, progress, files_cache))] +pub async fn download_dist( + url: &str, + expected_shasum: Option<&str>, + progress: Option<&mut DownloadProgress>, + files_cache: &Cache, +) -> anyhow::Result<Vec<u8>> { + // Build a cache key from the URL + let cache_key = Cache::sanitize_key(url); + + // Check cache first + if let Some(cached_bytes) = files_cache.read_bytes(&cache_key) { + // Verify checksum against cache hit if provided + if let Some(shasum) = expected_shasum + && !shasum.is_empty() + { + let mut hasher = Sha1::new(); + hasher.update(&cached_bytes); + let computed = format!("{:x}", hasher.finalize()); + if computed == shasum { + tracing::debug!("cache hit"); + return Ok(cached_bytes); + } + // Checksum mismatch — discard cache, re-download + } else { + tracing::debug!("cache hit"); + return Ok(cached_bytes); + } + } + + let client = crate::http::client_builder().build()?; + let response = client.get(url).send().await?; + tracing::debug!(status = %response.status(), "received response"); + + if !response.status().is_success() { + anyhow::bail!( + "Failed to download dist archive from {} (HTTP {})", + url, + response.status() + ); + } + + // Stream the response body, updating progress as bytes arrive + let bytes = if let Some(pb) = progress { + if let Some(content_length) = response.content_length() { + pb.set_total(content_length); + } + let mut buf = Vec::new(); + let mut stream = response; + while let Some(chunk) = stream.chunk().await? { + buf.extend_from_slice(&chunk); + pb.inc(chunk.len() as u64); + } + buf + } else { + response.bytes().await?.to_vec() + }; + + tracing::debug!(size = bytes.len(), "download complete"); + + // Verify SHA-1 checksum if provided + if let Some(shasum) = expected_shasum + && !shasum.is_empty() + { + let mut hasher = Sha1::new(); + hasher.update(&bytes); + let result = hasher.finalize(); + let computed = format!("{result:x}"); + + if computed != shasum { + anyhow::bail!("SHA-1 checksum mismatch for {url}: expected {shasum}, got {computed}"); + } + } + + // Write to cache + let _ = files_cache.write_bytes(&cache_key, &bytes); + + Ok(bytes) +} + +/// Find the common top-level directory prefix shared by all entries. +/// Returns `Some(prefix)` if all entries share a single top-level directory. +fn find_top_level_dir(entries: &[String]) -> Option<String> { + if entries.is_empty() { + return None; + } + + let mut prefixes: IndexSet<String> = IndexSet::new(); + for entry in entries { + let slash_pos = entry.find('/')?; + prefixes.insert(entry[..slash_pos + 1].to_string()); + } + + if prefixes.len() == 1 { + prefixes.into_iter().next() + } else { + None + } +} + +/// Extract a zip archive to the target directory. +/// Strips a common top-level directory if all entries share one (Packagist pattern). +pub fn extract_zip(data: &[u8], target_dir: &Path) -> anyhow::Result<()> { + let cursor = Cursor::new(data); + let mut archive = zip::ZipArchive::new(cursor)?; + + // Collect all entry names to detect common prefix + let entry_names: Vec<String> = (0..archive.len()) + .map(|i| archive.by_index(i).map(|e| e.name().to_string())) + .collect::<Result<_, _>>()?; + + let prefix = find_top_level_dir(&entry_names); + + for i in 0..archive.len() { + let mut entry = archive.by_index(i)?; + let raw_name = entry.name().to_string(); + + // Strip common prefix + let relative = if let Some(ref pfx) = prefix { + if raw_name.starts_with(pfx.as_str()) { + &raw_name[pfx.len()..] + } else { + &raw_name + } + } else { + &raw_name + }; + + // Skip the directory entry itself (empty name after stripping) + if relative.is_empty() { + continue; + } + + let target_path = target_dir.join(relative); + + if raw_name.ends_with('/') { + // Directory entry + fs::create_dir_all(&target_path)?; + } else { + // File entry + if let Some(parent) = target_path.parent() { + fs::create_dir_all(parent)?; + } + + let mut buf = Vec::new(); + entry.read_to_end(&mut buf)?; + fs::write(&target_path, &buf)?; + + // Set permissions on Unix + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Some(mode) = entry.unix_mode() { + fs::set_permissions(&target_path, fs::Permissions::from_mode(mode))?; + } + } + } + } + + Ok(()) +} + +/// Extract a tar.gz archive to the target directory. +/// Strips a common top-level directory if all entries share one (Packagist pattern). +pub fn extract_tar_gz(data: &[u8], target_dir: &Path) -> anyhow::Result<()> { + let cursor = Cursor::new(data); + let decoder = flate2::read::GzDecoder::new(cursor); + let mut archive = tar::Archive::new(decoder); + + // We need to process in two passes: first collect names, then extract. + // Use a buffered approach: collect entries into memory. + let cursor2 = Cursor::new(data); + let decoder2 = flate2::read::GzDecoder::new(cursor2); + let mut archive2 = tar::Archive::new(decoder2); + + let entry_names: Vec<String> = archive2 + .entries()? + .filter_map(|e| e.ok()) + .filter_map(|e| e.path().ok().map(|p| p.to_string_lossy().to_string())) + .collect(); + + let prefix = find_top_level_dir(&entry_names); + + for entry in archive.entries()? { + let mut entry = entry?; + let raw_path = entry.path()?.to_string_lossy().to_string(); + + // Strip common prefix + let relative = if let Some(ref pfx) = prefix { + if raw_path.starts_with(pfx.as_str()) { + raw_path[pfx.len()..].to_string() + } else { + raw_path.clone() + } + } else { + raw_path.clone() + }; + + // Skip empty (top-level dir itself) + if relative.is_empty() { + continue; + } + + let target_path = target_dir.join(&relative); + + let entry_type = entry.header().entry_type(); + if entry_type.is_dir() { + fs::create_dir_all(&target_path)?; + } else if entry_type.is_file() { + if let Some(parent) = target_path.parent() { + fs::create_dir_all(parent)?; + } + let mut buf = Vec::new(); + entry.read_to_end(&mut buf)?; + fs::write(&target_path, &buf)?; + + // Set permissions on Unix + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Ok(mode) = entry.header().mode() { + fs::set_permissions(&target_path, fs::Permissions::from_mode(mode))?; + } + } + } + // Symlinks and other types are skipped for now + } + + Ok(()) +} + +/// Download and install a package to the vendor directory. +/// +/// - `dist_url`: the download URL (from `LockedPackage.dist.url`) +/// - `dist_type`: `"zip"` or `"tar"` (from `LockedPackage.dist.dist_type`) +/// - `dist_shasum`: optional SHA-1 checksum +/// - `vendor_dir`: path to `vendor/` directory +/// - `package_name`: e.g. `"monolog/monolog"` +/// - `progress`: optional mutable progress tracker to update during download +/// - `files_cache`: files cache; archive bytes are cached by URL +pub async fn install_package( + dist_url: &str, + dist_type: &str, + dist_shasum: Option<&str>, + vendor_dir: &Path, + package_name: &str, + progress: Option<&mut DownloadProgress>, + files_cache: &Cache, +) -> anyhow::Result<()> { + let target = vendor_dir.join(package_name); + + // Remove existing installation for a clean reinstall + if target.exists() { + fs::remove_dir_all(&target)?; + } + fs::create_dir_all(&target)?; + + let bytes = download_dist(dist_url, dist_shasum, progress, files_cache).await?; + + match dist_type { + "zip" => extract_zip(&bytes, &target)?, + "tar" | "tar.gz" | "tgz" => extract_tar_gz(&bytes, &target)?, + other => anyhow::bail!("Unsupported dist type: {other}"), + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write as IoWrite; + use tempfile::tempdir; + + /// Build a minimal zip archive in memory. + fn make_zip(files: &[(&str, &[u8])]) -> Vec<u8> { + let buf = Vec::new(); + let cursor = Cursor::new(buf); + let mut writer = zip::ZipWriter::new(cursor); + let options = zip::write::FileOptions::<()>::default() + .compression_method(zip::CompressionMethod::Stored); + + for (name, content) in files { + writer.start_file(*name, options).unwrap(); + writer.write_all(content).unwrap(); + } + + writer.finish().unwrap().into_inner() + } + + /// Build a minimal tar.gz archive in memory. + fn make_tar_gz(files: &[(&str, &[u8])]) -> Vec<u8> { + let buf = Vec::new(); + let enc = flate2::write::GzEncoder::new(buf, flate2::Compression::default()); + let mut builder = tar::Builder::new(enc); + + for (name, content) in files { + let mut header = tar::Header::new_gnu(); + header.set_size(content.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + builder + .append_data(&mut header, name, Cursor::new(content)) + .unwrap(); + } + + builder.into_inner().unwrap().finish().unwrap() + } + + #[test] + fn test_extract_zip_flat() { + let zip_data = make_zip(&[("file1.txt", b"hello"), ("subdir/file2.txt", b"world")]); + + let dir = tempdir().unwrap(); + extract_zip(&zip_data, dir.path()).unwrap(); + + assert_eq!( + fs::read_to_string(dir.path().join("file1.txt")).unwrap(), + "hello" + ); + assert_eq!( + fs::read_to_string(dir.path().join("subdir/file2.txt")).unwrap(), + "world" + ); + } + + #[test] + fn test_extract_zip_with_top_level_dir() { + // Packagist pattern: all files under vendor-package-abc123/ + let zip_data = make_zip(&[ + ("vendor-pkg-abc/", &[]), + ("vendor-pkg-abc/file1.txt", b"hello"), + ("vendor-pkg-abc/src/Foo.php", b"<?php"), + ]); + + let dir = tempdir().unwrap(); + extract_zip(&zip_data, dir.path()).unwrap(); + + // Top-level dir should be stripped + assert!(dir.path().join("file1.txt").exists()); + assert!(dir.path().join("src/Foo.php").exists()); + assert_eq!( + fs::read_to_string(dir.path().join("file1.txt")).unwrap(), + "hello" + ); + } + + #[test] + fn test_extract_tar_gz_flat() { + let tar_data = make_tar_gz(&[("file1.txt", b"hello"), ("subdir/file2.txt", b"world")]); + + let dir = tempdir().unwrap(); + extract_tar_gz(&tar_data, dir.path()).unwrap(); + + assert_eq!( + fs::read_to_string(dir.path().join("file1.txt")).unwrap(), + "hello" + ); + assert_eq!( + fs::read_to_string(dir.path().join("subdir/file2.txt")).unwrap(), + "world" + ); + } + + #[test] + fn test_extract_tar_gz_with_top_level_dir() { + let tar_data = make_tar_gz(&[ + ("vendor-pkg-abc/file1.txt", b"hello"), + ("vendor-pkg-abc/src/Foo.php", b"<?php"), + ]); + + let dir = tempdir().unwrap(); + extract_tar_gz(&tar_data, dir.path()).unwrap(); + + assert!(dir.path().join("file1.txt").exists()); + assert!(dir.path().join("src/Foo.php").exists()); + } + + #[test] + fn test_sha1_verification() { + use sha1::{Digest, Sha1}; + + let data = b"test content"; + let mut hasher = Sha1::new(); + hasher.update(data); + let expected = format!("{:x}", hasher.finalize()); + + // We can't test download_dist without a server, but we can verify the + // SHA-1 logic: same data should produce same hash + let mut hasher2 = Sha1::new(); + hasher2.update(data); + let computed = format!("{:x}", hasher2.finalize()); + + assert_eq!(expected, computed); + assert!(!expected.is_empty()); + } + + #[test] + fn test_find_top_level_dir_common() { + let entries = vec![ + "pkg-1.0/".to_string(), + "pkg-1.0/README.md".to_string(), + "pkg-1.0/src/Foo.php".to_string(), + ]; + assert_eq!(find_top_level_dir(&entries), Some("pkg-1.0/".to_string())); + } + + #[test] + fn test_find_top_level_dir_none_when_mixed() { + let entries = vec!["pkg-1.0/file.txt".to_string(), "other/file.txt".to_string()]; + assert_eq!(find_top_level_dir(&entries), None); + } + + #[test] + fn test_find_top_level_dir_none_when_root_file() { + let entries = vec!["file.txt".to_string(), "pkg/other.txt".to_string()]; + assert_eq!(find_top_level_dir(&entries), None); + } +} |
