diff options
Diffstat (limited to 'crates/mozart-core/src/repository/cache.rs')
| -rw-r--r-- | crates/mozart-core/src/repository/cache.rs | 575 |
1 files changed, 575 insertions, 0 deletions
diff --git a/crates/mozart-core/src/repository/cache.rs b/crates/mozart-core/src/repository/cache.rs new file mode 100644 index 0000000..39e3e8d --- /dev/null +++ b/crates/mozart-core/src/repository/cache.rs @@ -0,0 +1,575 @@ +//! Filesystem-backed cache system with TTL expiration and size-limited GC. +//! +//! Cache directory structure: +//! ```text +//! ~/.cache/mozart/ (or $COMPOSER_CACHE_DIR) +//! files/ dist archives (key: vendor~package~reference.ext) +//! repo/ API responses (key: provider-vendor~package.json) +//! vcs/ VCS mirrors (one subdir per sanitized URL) +//! ``` + +use std::fs; +use std::path::{Path, PathBuf}; +use std::time::{SystemTime, UNIX_EPOCH}; + +/// Configuration for the Mozart cache system. +pub struct CacheConfig { + /// Root cache directory (e.g. `~/.cache/mozart`). + pub cache_dir: PathBuf, + /// Directory for dist archives. + pub cache_files_dir: PathBuf, + /// Directory for API responses. + pub cache_repo_dir: PathBuf, + /// Directory for VCS mirrors (one subdirectory per sanitized URL). + pub cache_vcs_dir: PathBuf, + /// TTL in seconds for repo entries (default: 15,552,000 = 6 months). + pub cache_ttl: u64, + /// TTL in seconds for files entries (falls back to `cache_ttl`). + pub cache_files_ttl: u64, + /// Maximum size of the files cache in bytes (default: 300 MiB). + pub cache_files_maxsize: u64, + /// Whether the cache is read-only (no writes). + pub read_only: bool, +} + +impl CacheConfig { + /// Default TTL: 6 months in seconds. + pub const DEFAULT_TTL: u64 = 15_552_000; + /// Default max files cache size: 300 MiB. + pub const DEFAULT_FILES_MAXSIZE: u64 = 300 * 1024 * 1024; +} + +/// Build a `CacheConfig` from CLI flags and environment variables. +/// +/// Respects `$COMPOSER_CACHE_DIR` for the base directory, and +/// `$COMPOSER_NO_CACHE` / `COMPOSER_CACHE_READ_ONLY` env vars. +/// +/// When no-cache mode is active (via `cli_no_cache` or `$COMPOSER_NO_CACHE`), +/// all cache directories are set to a null device, mirroring Composer's +/// `Application::doRun()` which calls `putenv('COMPOSER_CACHE_DIR', '/dev/null')`. +pub fn build_cache_config(cli_no_cache: bool) -> CacheConfig { + let no_cache = std::env::var("COMPOSER_NO_CACHE").is_ok() || cli_no_cache; + + let read_only = std::env::var("COMPOSER_CACHE_READ_ONLY") + .map(|v| v == "1" || v.eq_ignore_ascii_case("true")) + .unwrap_or(false); + + let cache_dir = if no_cache { + // Mirrors Composer: --no-cache redirects all cache paths to a null device so + // that Cache::is_usable() returns false and caching is transparently disabled. + #[cfg(windows)] + { + PathBuf::from("nul") + } + #[cfg(not(windows))] + { + PathBuf::from("/dev/null") + } + } else if let Ok(dir) = std::env::var("COMPOSER_CACHE_DIR") { + PathBuf::from(dir) + } else { + dirs_cache_dir().join("mozart") + }; + + let cache_files_dir = cache_dir.join("files"); + let cache_repo_dir = cache_dir.join("repo"); + let cache_vcs_dir = std::env::var("COMPOSER_CACHE_VCS_DIR") + .map(PathBuf::from) + .unwrap_or_else(|_| cache_dir.join("vcs")); + + CacheConfig { + cache_files_dir, + cache_repo_dir, + cache_vcs_dir, + cache_ttl: CacheConfig::DEFAULT_TTL, + cache_files_ttl: CacheConfig::DEFAULT_TTL, + cache_files_maxsize: CacheConfig::DEFAULT_FILES_MAXSIZE, + cache_dir, + read_only, + } +} + +/// Return the platform cache directory (XDG_CACHE_HOME or ~/.cache). +fn dirs_cache_dir() -> PathBuf { + if let Ok(xdg) = std::env::var("XDG_CACHE_HOME") { + return PathBuf::from(xdg); + } + if let Ok(home) = std::env::var("HOME") { + return PathBuf::from(home).join(".cache"); + } + PathBuf::from("/tmp") +} + +/// A single cache bucket (a directory on disk). +#[derive(Clone)] +pub struct Cache { + root: PathBuf, + enabled: bool, + readonly: bool, +} + +impl Cache { + /// Create a new cache rooted at `root`. + /// + /// Mirrors Composer's `Cache::__construct` + `Cache::isEnabled()`: + /// - If the path is a null device (`/dev/null`, `nul`, etc.), the cache is disabled. + /// - If `readonly` is true, the cache is always enabled (no writability check). + /// - Otherwise, tries to create the directory and checks that it is writable; + /// disables the cache with a warning if not. + pub fn new(root: PathBuf, readonly: bool) -> Self { + let enabled = if !Self::is_usable(&root) { + false + } else if readonly { + true + } else { + if fs::create_dir_all(&root).is_err() { + false + } else { + fs::metadata(&root) + .map(|m| !m.permissions().readonly()) + .unwrap_or(false) + } + }; + Self { + root, + enabled, + readonly, + } + } + + /// Returns `false` for null-device paths that should never be used as a real cache. + /// + /// Mirrors Composer's `Cache::isUsable()`. + fn is_usable(path: &Path) -> bool { + let s = path.to_string_lossy(); + if cfg!(windows) { + // On Windows, "nul" and "$null" (any case) are null devices. + !s.split(['/', '\\']) + .any(|c| c.eq_ignore_ascii_case("nul") || c == "$null") + } else { + // On Unix, /dev/null and any path under it are unusable. + s != "/dev/null" && !s.starts_with("/dev/null/") + } + } + + /// Shorthand: create the repo cache from a `CacheConfig`. + pub fn repo(config: &CacheConfig) -> Self { + Self::new(config.cache_repo_dir.clone(), config.read_only) + } + + /// Shorthand: create the files cache from a `CacheConfig`. + pub fn files(config: &CacheConfig) -> Self { + Self::new(config.cache_files_dir.clone(), config.read_only) + } + + /// Whether caching is enabled for this bucket. + pub fn is_enabled(&self) -> bool { + self.enabled + } + + /// Sanitize a cache key for use as a filename. + /// + /// Replaces `/` with `~` and strips characters that are unsafe in + /// filenames (anything except alphanumerics, `-`, `_`, `.`, `~`). + pub fn sanitize_key(key: &str) -> String { + key.replace('/', "~") + .chars() + .filter(|c| c.is_alphanumeric() || matches!(c, '-' | '_' | '.' | '~')) + .collect() + } + + /// Return the full path for a cache entry. + fn path_for(&self, key: &str) -> PathBuf { + self.root.join(Self::sanitize_key(key)) + } + + /// Read a cached string entry, or `None` if absent or cache disabled. + pub fn read(&self, key: &str) -> Option<String> { + if !self.enabled { + return None; + } + fs::read_to_string(self.path_for(key)).ok() + } + + /// Write a string entry atomically (write to temp file, then rename). + pub fn write(&self, key: &str, contents: &str) -> anyhow::Result<()> { + if !self.enabled || self.readonly { + return Ok(()); + } + self.write_bytes(key, contents.as_bytes()) + } + + /// Read a cached binary entry, or `None` if absent or cache disabled. + pub fn read_bytes(&self, key: &str) -> Option<Vec<u8>> { + if !self.enabled { + return None; + } + fs::read(self.path_for(key)).ok() + } + + /// Write a binary entry atomically (write to temp file, then rename). + pub fn write_bytes(&self, key: &str, data: &[u8]) -> anyhow::Result<()> { + if !self.enabled || self.readonly { + return Ok(()); + } + let dest = self.path_for(key); + // Ensure parent directory exists + if let Some(parent) = dest.parent() { + fs::create_dir_all(parent)?; + } + // Write to a temp file next to the destination + let tmp = dest.with_extension("tmp"); + fs::write(&tmp, data)?; + fs::rename(&tmp, &dest)?; + Ok(()) + } + + /// Delete all cached entries in this bucket. + pub fn clear(&self) -> anyhow::Result<()> { + if !self.enabled || self.readonly { + return Ok(()); + } + if !self.root.exists() { + return Ok(()); + } + for entry in fs::read_dir(&self.root)? { + let entry = entry?; + let path = entry.path(); + if path.is_file() { + fs::remove_file(&path)?; + } else if path.is_dir() { + fs::remove_dir_all(&path)?; + } + } + Ok(()) + } + + /// Run garbage collection on this cache bucket. + /// + /// 1. Deletes files with mtime older than `ttl_seconds`. + /// 2. If total remaining size > `max_size_bytes`, deletes the oldest files + /// (by mtime) until the total is under the limit. + pub fn gc(&self, ttl_seconds: u64, max_size_bytes: u64) -> anyhow::Result<()> { + if !self.enabled || self.readonly || !self.root.exists() { + return Ok(()); + } + + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + + // Collect (path, mtime, size) for all files + let mut files: Vec<(PathBuf, u64, u64)> = Vec::new(); + collect_files(&self.root, &mut files)?; + + // Phase 1: delete TTL-expired files + let mut remaining: Vec<(PathBuf, u64, u64)> = Vec::new(); + for (path, mtime, size) in files { + let age = now.saturating_sub(mtime); + if age > ttl_seconds { + let _ = fs::remove_file(&path); + } else { + remaining.push((path, mtime, size)); + } + } + + // Phase 2: enforce size limit by deleting oldest first + let total_size: u64 = remaining.iter().map(|(_, _, sz)| sz).sum(); + if total_size > max_size_bytes { + // Sort by mtime ascending (oldest first) + remaining.sort_by_key(|(_, mtime, _)| *mtime); + let mut current_size = total_size; + for (path, _, size) in &remaining { + if current_size <= max_size_bytes { + break; + } + if fs::remove_file(path).is_ok() { + current_size = current_size.saturating_sub(*size); + } + } + } + + Ok(()) + } + + /// Run garbage collection on a VCS cache bucket. + /// + /// Each top-level subdirectory is one bare mirror keyed by sanitized URL. + /// Deletes entire subdirectories whose mtime is older than `ttl_seconds`. + /// Mirrors Composer's `Cache::gcVcsCache`. + pub fn gc_vcs_cache(&self, ttl_seconds: u64) -> anyhow::Result<()> { + if !self.enabled || !self.root.exists() { + return Ok(()); + } + + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + + for entry in fs::read_dir(&self.root)? { + let entry = entry?; + let path = entry.path(); + let metadata = entry.metadata()?; + if !metadata.is_dir() { + continue; + } + let mtime = metadata + .modified() + .ok() + .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) + .map(|d| d.as_secs()) + .unwrap_or(0); + if now.saturating_sub(mtime) > ttl_seconds { + let _ = fs::remove_dir_all(&path); + } + } + + Ok(()) + } + + /// Return the age in seconds of a cached entry based on its mtime, + /// or `None` if the entry doesn't exist or mtime can't be read. + pub fn age(&self, key: &str) -> Option<u64> { + if !self.enabled { + return None; + } + let path = self.path_for(key); + let metadata = fs::metadata(&path).ok()?; + let mtime = metadata.modified().ok()?; + let now = SystemTime::now(); + now.duration_since(mtime).ok().map(|d| d.as_secs()) + } +} + +/// Recursively collect all files under `dir` as `(path, mtime_secs, size_bytes)`. +fn collect_files(dir: &Path, out: &mut Vec<(PathBuf, u64, u64)>) -> anyhow::Result<()> { + if !dir.exists() { + return Ok(()); + } + for entry in fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + let metadata = entry.metadata()?; + if metadata.is_dir() { + collect_files(&path, out)?; + } else if metadata.is_file() { + let mtime = metadata + .modified() + .ok() + .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) + .map(|d| d.as_secs()) + .unwrap_or(0); + let size = metadata.len(); + out.push((path, mtime, size)); + } + } + Ok(()) +} + +/// Return `true` with a probability of 1 in 50 (based on system time nanos). +/// +/// Used to decide whether to run GC after an install/update operation. +pub fn gc_is_necessary() -> bool { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .subsec_nanos(); + nanos.is_multiple_of(50) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Duration; + use tempfile::tempdir; + + #[test] + fn test_sanitize_key_replaces_slash() { + assert_eq!(Cache::sanitize_key("vendor/package"), "vendor~package"); + } + + #[test] + fn test_sanitize_key_strips_unsafe_chars() { + // Colons and spaces should be stripped + assert_eq!(Cache::sanitize_key("foo:bar baz"), "foobarbaz"); + } + + #[test] + fn test_sanitize_key_preserves_safe_chars() { + let key = "provider-vendor~package.json"; + assert_eq!(Cache::sanitize_key(key), key); + } + + #[test] + fn test_sanitize_key_full_example() { + assert_eq!( + Cache::sanitize_key("provider-monolog/monolog.json"), + "provider-monolog~monolog.json" + ); + } + + #[test] + fn test_write_read_roundtrip_string() { + let dir = tempdir().unwrap(); + let cache = Cache::new(dir.path().to_path_buf(), false); + + cache.write("test-key", "hello world").unwrap(); + let result = cache.read("test-key"); + assert_eq!(result.as_deref(), Some("hello world")); + } + + #[test] + fn test_write_read_roundtrip_bytes() { + let dir = tempdir().unwrap(); + let cache = Cache::new(dir.path().to_path_buf(), false); + + let data = vec![0u8, 1, 2, 3, 255]; + cache.write_bytes("bin-key", &data).unwrap(); + let result = cache.read_bytes("bin-key"); + assert_eq!(result, Some(data)); + } + + #[test] + fn test_clear_removes_all_entries() { + let dir = tempdir().unwrap(); + let cache = Cache::new(dir.path().to_path_buf(), false); + + cache.write("key1", "value1").unwrap(); + cache.write("key2", "value2").unwrap(); + assert!(cache.read("key1").is_some()); + assert!(cache.read("key2").is_some()); + + cache.clear().unwrap(); + + assert!(cache.read("key1").is_none()); + assert!(cache.read("key2").is_none()); + } + + #[test] + fn test_disabled_cache_returns_none() { + // Point cache at /dev/null — is_usable() returns false → cache disabled. + let cache = Cache::new(PathBuf::from("/dev/null/files"), false); + + // Write should silently succeed (no-op) + cache.write("key", "value").unwrap(); + + // Read should return None even if we wrote + assert!(cache.read("key").is_none()); + assert!(cache.read_bytes("key").is_none()); + } + + #[test] + fn test_gc_ttl_expiration() { + let dir = tempdir().unwrap(); + let cache = Cache::new(dir.path().to_path_buf(), false); + + // Write a file, then manually set its mtime to the past + cache.write("old-key", "old content").unwrap(); + let old_path = dir.path().join(Cache::sanitize_key("old-key")); + + // Write a fresh file + cache.write("new-key", "new content").unwrap(); + + // Set the old file's mtime to 2 hours ago + let two_hours_ago = SystemTime::now() - Duration::from_secs(7200); + filetime::set_file_mtime( + &old_path, + filetime::FileTime::from_system_time(two_hours_ago), + ) + .unwrap(); + + // GC with TTL of 1 hour (3600 seconds) + cache.gc(3600, u64::MAX).unwrap(); + + // Old file should be deleted, new file should remain + assert!( + cache.read("old-key").is_none(), + "expired file should be deleted" + ); + assert!(cache.read("new-key").is_some(), "fresh file should remain"); + } + + #[test] + fn test_gc_size_limit() { + let dir = tempdir().unwrap(); + let cache = Cache::new(dir.path().to_path_buf(), false); + + // Write two files; the first one should be older + cache.write("old-file", "aaaaaaaaaa").unwrap(); // 10 bytes + let old_path = dir.path().join(Cache::sanitize_key("old-file")); + + // Add a small delay before writing second file via mtime manipulation + cache.write("new-file", "bbbbbbbbbb").unwrap(); // 10 bytes + + // Set old-file's mtime to 1 second ago so it's older + let one_second_ago = SystemTime::now() - Duration::from_secs(1); + filetime::set_file_mtime( + &old_path, + filetime::FileTime::from_system_time(one_second_ago), + ) + .unwrap(); + + // GC with a max size of 12 bytes (can only fit one 10-byte file) + // TTL is very long so no TTL expiration + cache.gc(u64::MAX / 2, 12).unwrap(); + + // The older file should be removed to get under the size limit + assert!( + cache.read("old-file").is_none() || cache.read("new-file").is_none(), + "at least one file should be removed to enforce size limit" + ); + } + + #[test] + fn test_gc_vcs_removes_old_subdirs() { + let dir = tempdir().unwrap(); + let cache = Cache::new(dir.path().to_path_buf(), false); + + let old_mirror = dir.path().join("old-mirror"); + let new_mirror = dir.path().join("new-mirror"); + fs::create_dir_all(&old_mirror).unwrap(); + fs::write(old_mirror.join("HEAD"), "ref: refs/heads/main\n").unwrap(); + fs::create_dir_all(&new_mirror).unwrap(); + fs::write(new_mirror.join("HEAD"), "ref: refs/heads/main\n").unwrap(); + + let two_hours_ago = SystemTime::now() - Duration::from_secs(7200); + filetime::set_file_mtime( + &old_mirror, + filetime::FileTime::from_system_time(two_hours_ago), + ) + .unwrap(); + + cache.gc_vcs_cache(3600).unwrap(); + + assert!(!old_mirror.exists(), "expired mirror should be removed"); + assert!(new_mirror.exists(), "fresh mirror should remain"); + } + + #[test] + fn test_age_existing_entry() { + let dir = tempdir().unwrap(); + let cache = Cache::new(dir.path().to_path_buf(), false); + + cache.write("fresh-key", "content").unwrap(); + let age = cache.age("fresh-key"); + + // Should be very recent (< 5 seconds) + assert!(age.is_some()); + assert!(age.unwrap() < 5); + } + + #[test] + fn test_age_missing_entry() { + let dir = tempdir().unwrap(); + let cache = Cache::new(dir.path().to_path_buf(), false); + assert!(cache.age("nonexistent-key").is_none()); + } + + #[test] + fn test_age_disabled_cache() { + let cache = Cache::new(PathBuf::from("/dev/null/files"), false); + assert!(cache.age("any-key").is_none()); + } +} |
