aboutsummaryrefslogtreecommitdiffhomepage
path: root/crates/mozart-registry/src/downloader.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/mozart-registry/src/downloader.rs')
-rw-r--r--crates/mozart-registry/src/downloader.rs500
1 files changed, 0 insertions, 500 deletions
diff --git a/crates/mozart-registry/src/downloader.rs b/crates/mozart-registry/src/downloader.rs
deleted file mode 100644
index 3cb991b..0000000
--- a/crates/mozart-registry/src/downloader.rs
+++ /dev/null
@@ -1,500 +0,0 @@
-use crate::cache::Cache;
-use indexmap::IndexSet;
-use sha1::{Digest, Sha1};
-use std::fs;
-use std::io::{Cursor, Read, Write};
-use std::path::Path;
-
-/// A simple download progress tracker that writes to stderr.
-///
-/// When `show` is false, all methods are no-ops. This lets callers toggle
-/// progress display without branching on every call.
-pub struct DownloadProgress {
- show: bool,
- total: u64,
- downloaded: u64,
- label: String,
-}
-
-impl DownloadProgress {
- /// Create a new progress tracker.
- ///
- /// - `show`: whether to actually display anything.
- /// - `label`: a human-readable label (e.g. "psr/log (3.0.2)").
- pub fn new(show: bool, label: impl Into<String>) -> Self {
- Self {
- show,
- total: 0,
- downloaded: 0,
- label: label.into(),
- }
- }
-
- /// Set the total expected bytes from a `Content-Length` header.
- pub fn set_total(&mut self, total: u64) {
- self.total = total;
- }
-
- /// Advance the downloaded byte count and redraw the line.
- pub fn inc(&mut self, n: u64) {
- if !self.show {
- return;
- }
- self.downloaded += n;
- let stderr = std::io::stderr();
- let mut out = stderr.lock();
- if let Some(pct) = (self.downloaded * 100).checked_div(self.total) {
- let _ = write!(
- out,
- "\r Downloading {} ({}/{} bytes, {}%)",
- self.label, self.downloaded, self.total, pct
- );
- } else {
- let _ = write!(
- out,
- "\r Downloading {} ({} bytes)",
- self.label, self.downloaded
- );
- }
- let _ = out.flush();
- }
-
- /// Clear the progress line from the terminal.
- pub fn finish(&self) {
- if !self.show {
- return;
- }
- let stderr = std::io::stderr();
- let mut out = stderr.lock();
- // Clear the line with spaces then return to start
- let _ = write!(out, "\r{}\r", " ".repeat(80));
- let _ = out.flush();
- }
-}
-
-/// Download a dist archive from a URL.
-/// Returns the raw bytes of the downloaded archive.
-/// If `expected_shasum` is provided and non-empty, verifies SHA-1 of the downloaded bytes.
-/// If `progress` is provided, increments it as bytes are received and sets the total from
-/// the `Content-Length` response header.
-/// Downloaded bytes are cached by URL in `files_cache`; cache hits skip the network request
-/// entirely.
-#[tracing::instrument(skip(expected_shasum, progress, files_cache))]
-pub async fn download_dist(
- url: &str,
- expected_shasum: Option<&str>,
- progress: Option<&mut DownloadProgress>,
- files_cache: &Cache,
-) -> anyhow::Result<Vec<u8>> {
- // Build a cache key from the URL
- let cache_key = Cache::sanitize_key(url);
-
- // Check cache first
- if let Some(cached_bytes) = files_cache.read_bytes(&cache_key) {
- // Verify checksum against cache hit if provided
- if let Some(shasum) = expected_shasum
- && !shasum.is_empty()
- {
- let mut hasher = Sha1::new();
- hasher.update(&cached_bytes);
- let computed = format!("{:x}", hasher.finalize());
- if computed == shasum {
- tracing::debug!("cache hit");
- return Ok(cached_bytes);
- }
- // Checksum mismatch — discard cache, re-download
- } else {
- tracing::debug!("cache hit");
- return Ok(cached_bytes);
- }
- }
-
- let client = mozart_core::http::client_builder().build()?;
- let response = client.get(url).send().await?;
- tracing::debug!(status = %response.status(), "received response");
-
- if !response.status().is_success() {
- anyhow::bail!(
- "Failed to download dist archive from {} (HTTP {})",
- url,
- response.status()
- );
- }
-
- // Stream the response body, updating progress as bytes arrive
- let bytes = if let Some(pb) = progress {
- if let Some(content_length) = response.content_length() {
- pb.set_total(content_length);
- }
- let mut buf = Vec::new();
- let mut stream = response;
- while let Some(chunk) = stream.chunk().await? {
- buf.extend_from_slice(&chunk);
- pb.inc(chunk.len() as u64);
- }
- buf
- } else {
- response.bytes().await?.to_vec()
- };
-
- tracing::debug!(size = bytes.len(), "download complete");
-
- // Verify SHA-1 checksum if provided
- if let Some(shasum) = expected_shasum
- && !shasum.is_empty()
- {
- let mut hasher = Sha1::new();
- hasher.update(&bytes);
- let result = hasher.finalize();
- let computed = format!("{result:x}");
-
- if computed != shasum {
- anyhow::bail!("SHA-1 checksum mismatch for {url}: expected {shasum}, got {computed}");
- }
- }
-
- // Write to cache
- let _ = files_cache.write_bytes(&cache_key, &bytes);
-
- Ok(bytes)
-}
-
-/// Find the common top-level directory prefix shared by all entries.
-/// Returns `Some(prefix)` if all entries share a single top-level directory.
-fn find_top_level_dir(entries: &[String]) -> Option<String> {
- if entries.is_empty() {
- return None;
- }
-
- let mut prefixes: IndexSet<String> = IndexSet::new();
- for entry in entries {
- let slash_pos = entry.find('/')?;
- prefixes.insert(entry[..slash_pos + 1].to_string());
- }
-
- if prefixes.len() == 1 {
- prefixes.into_iter().next()
- } else {
- None
- }
-}
-
-/// Extract a zip archive to the target directory.
-/// Strips a common top-level directory if all entries share one (Packagist pattern).
-pub fn extract_zip(data: &[u8], target_dir: &Path) -> anyhow::Result<()> {
- let cursor = Cursor::new(data);
- let mut archive = zip::ZipArchive::new(cursor)?;
-
- // Collect all entry names to detect common prefix
- let entry_names: Vec<String> = (0..archive.len())
- .map(|i| archive.by_index(i).map(|e| e.name().to_string()))
- .collect::<Result<_, _>>()?;
-
- let prefix = find_top_level_dir(&entry_names);
-
- for i in 0..archive.len() {
- let mut entry = archive.by_index(i)?;
- let raw_name = entry.name().to_string();
-
- // Strip common prefix
- let relative = if let Some(ref pfx) = prefix {
- if raw_name.starts_with(pfx.as_str()) {
- &raw_name[pfx.len()..]
- } else {
- &raw_name
- }
- } else {
- &raw_name
- };
-
- // Skip the directory entry itself (empty name after stripping)
- if relative.is_empty() {
- continue;
- }
-
- let target_path = target_dir.join(relative);
-
- if raw_name.ends_with('/') {
- // Directory entry
- fs::create_dir_all(&target_path)?;
- } else {
- // File entry
- if let Some(parent) = target_path.parent() {
- fs::create_dir_all(parent)?;
- }
-
- let mut buf = Vec::new();
- entry.read_to_end(&mut buf)?;
- fs::write(&target_path, &buf)?;
-
- // Set permissions on Unix
- #[cfg(unix)]
- {
- use std::os::unix::fs::PermissionsExt;
- if let Some(mode) = entry.unix_mode() {
- fs::set_permissions(&target_path, fs::Permissions::from_mode(mode))?;
- }
- }
- }
- }
-
- Ok(())
-}
-
-/// Extract a tar.gz archive to the target directory.
-/// Strips a common top-level directory if all entries share one (Packagist pattern).
-pub fn extract_tar_gz(data: &[u8], target_dir: &Path) -> anyhow::Result<()> {
- let cursor = Cursor::new(data);
- let decoder = flate2::read::GzDecoder::new(cursor);
- let mut archive = tar::Archive::new(decoder);
-
- // We need to process in two passes: first collect names, then extract.
- // Use a buffered approach: collect entries into memory.
- let cursor2 = Cursor::new(data);
- let decoder2 = flate2::read::GzDecoder::new(cursor2);
- let mut archive2 = tar::Archive::new(decoder2);
-
- let entry_names: Vec<String> = archive2
- .entries()?
- .filter_map(|e| e.ok())
- .filter_map(|e| e.path().ok().map(|p| p.to_string_lossy().to_string()))
- .collect();
-
- let prefix = find_top_level_dir(&entry_names);
-
- for entry in archive.entries()? {
- let mut entry = entry?;
- let raw_path = entry.path()?.to_string_lossy().to_string();
-
- // Strip common prefix
- let relative = if let Some(ref pfx) = prefix {
- if raw_path.starts_with(pfx.as_str()) {
- raw_path[pfx.len()..].to_string()
- } else {
- raw_path.clone()
- }
- } else {
- raw_path.clone()
- };
-
- // Skip empty (top-level dir itself)
- if relative.is_empty() {
- continue;
- }
-
- let target_path = target_dir.join(&relative);
-
- let entry_type = entry.header().entry_type();
- if entry_type.is_dir() {
- fs::create_dir_all(&target_path)?;
- } else if entry_type.is_file() {
- if let Some(parent) = target_path.parent() {
- fs::create_dir_all(parent)?;
- }
- let mut buf = Vec::new();
- entry.read_to_end(&mut buf)?;
- fs::write(&target_path, &buf)?;
-
- // Set permissions on Unix
- #[cfg(unix)]
- {
- use std::os::unix::fs::PermissionsExt;
- if let Ok(mode) = entry.header().mode() {
- fs::set_permissions(&target_path, fs::Permissions::from_mode(mode))?;
- }
- }
- }
- // Symlinks and other types are skipped for now
- }
-
- Ok(())
-}
-
-/// Download and install a package to the vendor directory.
-///
-/// - `dist_url`: the download URL (from `LockedPackage.dist.url`)
-/// - `dist_type`: `"zip"` or `"tar"` (from `LockedPackage.dist.dist_type`)
-/// - `dist_shasum`: optional SHA-1 checksum
-/// - `vendor_dir`: path to `vendor/` directory
-/// - `package_name`: e.g. `"monolog/monolog"`
-/// - `progress`: optional mutable progress tracker to update during download
-/// - `files_cache`: files cache; archive bytes are cached by URL
-pub async fn install_package(
- dist_url: &str,
- dist_type: &str,
- dist_shasum: Option<&str>,
- vendor_dir: &Path,
- package_name: &str,
- progress: Option<&mut DownloadProgress>,
- files_cache: &Cache,
-) -> anyhow::Result<()> {
- let target = vendor_dir.join(package_name);
-
- // Remove existing installation for a clean reinstall
- if target.exists() {
- fs::remove_dir_all(&target)?;
- }
- fs::create_dir_all(&target)?;
-
- let bytes = download_dist(dist_url, dist_shasum, progress, files_cache).await?;
-
- match dist_type {
- "zip" => extract_zip(&bytes, &target)?,
- "tar" | "tar.gz" | "tgz" => extract_tar_gz(&bytes, &target)?,
- other => anyhow::bail!("Unsupported dist type: {other}"),
- }
-
- Ok(())
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
- use std::io::Write as IoWrite;
- use tempfile::tempdir;
-
- /// Build a minimal zip archive in memory.
- fn make_zip(files: &[(&str, &[u8])]) -> Vec<u8> {
- let buf = Vec::new();
- let cursor = Cursor::new(buf);
- let mut writer = zip::ZipWriter::new(cursor);
- let options = zip::write::FileOptions::<()>::default()
- .compression_method(zip::CompressionMethod::Stored);
-
- for (name, content) in files {
- writer.start_file(*name, options).unwrap();
- writer.write_all(content).unwrap();
- }
-
- writer.finish().unwrap().into_inner()
- }
-
- /// Build a minimal tar.gz archive in memory.
- fn make_tar_gz(files: &[(&str, &[u8])]) -> Vec<u8> {
- let buf = Vec::new();
- let enc = flate2::write::GzEncoder::new(buf, flate2::Compression::default());
- let mut builder = tar::Builder::new(enc);
-
- for (name, content) in files {
- let mut header = tar::Header::new_gnu();
- header.set_size(content.len() as u64);
- header.set_mode(0o644);
- header.set_cksum();
- builder
- .append_data(&mut header, name, Cursor::new(content))
- .unwrap();
- }
-
- builder.into_inner().unwrap().finish().unwrap()
- }
-
- #[test]
- fn test_extract_zip_flat() {
- let zip_data = make_zip(&[("file1.txt", b"hello"), ("subdir/file2.txt", b"world")]);
-
- let dir = tempdir().unwrap();
- extract_zip(&zip_data, dir.path()).unwrap();
-
- assert_eq!(
- fs::read_to_string(dir.path().join("file1.txt")).unwrap(),
- "hello"
- );
- assert_eq!(
- fs::read_to_string(dir.path().join("subdir/file2.txt")).unwrap(),
- "world"
- );
- }
-
- #[test]
- fn test_extract_zip_with_top_level_dir() {
- // Packagist pattern: all files under vendor-package-abc123/
- let zip_data = make_zip(&[
- ("vendor-pkg-abc/", &[]),
- ("vendor-pkg-abc/file1.txt", b"hello"),
- ("vendor-pkg-abc/src/Foo.php", b"<?php"),
- ]);
-
- let dir = tempdir().unwrap();
- extract_zip(&zip_data, dir.path()).unwrap();
-
- // Top-level dir should be stripped
- assert!(dir.path().join("file1.txt").exists());
- assert!(dir.path().join("src/Foo.php").exists());
- assert_eq!(
- fs::read_to_string(dir.path().join("file1.txt")).unwrap(),
- "hello"
- );
- }
-
- #[test]
- fn test_extract_tar_gz_flat() {
- let tar_data = make_tar_gz(&[("file1.txt", b"hello"), ("subdir/file2.txt", b"world")]);
-
- let dir = tempdir().unwrap();
- extract_tar_gz(&tar_data, dir.path()).unwrap();
-
- assert_eq!(
- fs::read_to_string(dir.path().join("file1.txt")).unwrap(),
- "hello"
- );
- assert_eq!(
- fs::read_to_string(dir.path().join("subdir/file2.txt")).unwrap(),
- "world"
- );
- }
-
- #[test]
- fn test_extract_tar_gz_with_top_level_dir() {
- let tar_data = make_tar_gz(&[
- ("vendor-pkg-abc/file1.txt", b"hello"),
- ("vendor-pkg-abc/src/Foo.php", b"<?php"),
- ]);
-
- let dir = tempdir().unwrap();
- extract_tar_gz(&tar_data, dir.path()).unwrap();
-
- assert!(dir.path().join("file1.txt").exists());
- assert!(dir.path().join("src/Foo.php").exists());
- }
-
- #[test]
- fn test_sha1_verification() {
- use sha1::{Digest, Sha1};
-
- let data = b"test content";
- let mut hasher = Sha1::new();
- hasher.update(data);
- let expected = format!("{:x}", hasher.finalize());
-
- // We can't test download_dist without a server, but we can verify the
- // SHA-1 logic: same data should produce same hash
- let mut hasher2 = Sha1::new();
- hasher2.update(data);
- let computed = format!("{:x}", hasher2.finalize());
-
- assert_eq!(expected, computed);
- assert!(!expected.is_empty());
- }
-
- #[test]
- fn test_find_top_level_dir_common() {
- let entries = vec![
- "pkg-1.0/".to_string(),
- "pkg-1.0/README.md".to_string(),
- "pkg-1.0/src/Foo.php".to_string(),
- ];
- assert_eq!(find_top_level_dir(&entries), Some("pkg-1.0/".to_string()));
- }
-
- #[test]
- fn test_find_top_level_dir_none_when_mixed() {
- let entries = vec!["pkg-1.0/file.txt".to_string(), "other/file.txt".to_string()];
- assert_eq!(find_top_level_dir(&entries), None);
- }
-
- #[test]
- fn test_find_top_level_dir_none_when_root_file() {
- let entries = vec!["file.txt".to_string(), "pkg/other.txt".to_string()];
- assert_eq!(find_top_level_dir(&entries), None);
- }
-}