From e6f35b567564665c6cb741a06e4c4afcdc5ab317 Mon Sep 17 00:00:00 2001 From: nsfisis Date: Fri, 8 May 2026 03:12:05 +0900 Subject: fix(search): align with Composer's RepositoryInterface::search Replace the HTTP-only post-filtered implementation with a Repository::search trait dispatch that mirrors ComposerRepository::search semantics for all three modes (FULLTEXT/NAME/VENDOR). --only-name now does an OR-of-tokens regex match against the full Packagist list.json index instead of a substring match against a fulltext page, so e.g. \`mozart search --only-name mono log\` matches \`monolog/monolog\` like Composer does. Other parity fixes: regex::escape on non-fulltext queries, format check before mutex check, 4-space JSON indent, OSC 8 terminal hyperlink emission when a result has a url, \! Abandoned \! styling on abandoned rows, and the Mozart-only "No packages found" warning is dropped to match Composer's silent empty-result behavior. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/mozart-registry/src/packagist.rs | 72 ++++++++++++++++++++++ crates/mozart-registry/src/repository/mod.rs | 53 +++++++++++++++- .../src/repository/packagist_repo.rs | 66 +++++++++++++++++++- 3 files changed, 189 insertions(+), 2 deletions(-) (limited to 'crates/mozart-registry') diff --git a/crates/mozart-registry/src/packagist.rs b/crates/mozart-registry/src/packagist.rs index 6f9b24a..5c99b07 100644 --- a/crates/mozart-registry/src/packagist.rs +++ b/crates/mozart-registry/src/packagist.rs @@ -367,6 +367,78 @@ pub async fn search_packages( Ok((all_results, total)) } +/// Response shape of `https://packagist.org/packages/list.json[?type=...]`. +#[derive(Debug, Deserialize)] +struct ListResponse { + #[serde(rename = "packageNames")] + package_names: Vec, +} + +/// Fetch the full list of Packagist package names, optionally filtered by type. +/// +/// Backs Composer's `ComposerRepository::getPackageNames()` for the +/// `SEARCH_NAME` and `SEARCH_VENDOR` search modes. Cached on disk under +/// `list-packages~{type}.json` (or `list-packages~all.json` when no type +/// filter is given). +#[tracing::instrument(skip(repo_cache))] +pub async fn fetch_package_names( + package_type: Option<&str>, + repo_cache: &Cache, +) -> anyhow::Result> { + let cache_key = match package_type { + Some(t) => format!("list-packages~{t}.json"), + None => "list-packages~all.json".to_string(), + }; + + if let Some(cached) = repo_cache.read(&cache_key) { + tracing::debug!("cache hit"); + let parsed: ListResponse = serde_json::from_str(&cached)?; + return Ok(parsed.package_names); + } + + let mut url = "https://packagist.org/packages/list.json".to_string(); + if let Some(t) = package_type { + url.push_str("?type="); + url.push_str(&url_encode(t)); + } + tracing::debug!(%url, "fetching package list"); + let client = mozart_core::http::client_builder().build()?; + let response = client.get(&url).send().await?; + tracing::debug!(status = %response.status(), "received response"); + + if !response.status().is_success() { + anyhow::bail!( + "Failed to fetch package list from Packagist (HTTP {})", + response.status() + ); + } + + let body = response.text().await?; + let _ = repo_cache.write(&cache_key, &body); + + let parsed: ListResponse = serde_json::from_str(&body)?; + Ok(parsed.package_names) +} + +/// Fetch the deduplicated list of Packagist vendor names. +/// +/// Mirrors Composer's `ComposerRepository::getVendorNames()` which derives +/// vendors from `getPackageNames()` (regardless of type) by stripping the +/// `/...` suffix and de-duplicating in insertion order. +#[tracing::instrument(skip(repo_cache))] +pub async fn fetch_vendor_names(repo_cache: &Cache) -> anyhow::Result> { + let names = fetch_package_names(None, repo_cache).await?; + let mut seen: indexmap::IndexSet = indexmap::IndexSet::new(); + for name in names { + let vendor = match name.split_once('/') { + Some((v, _)) => v.to_string(), + None => name, + }; + seen.insert(vendor); + } + Ok(seen.into_iter().collect()) +} + /// A single security advisory from the Packagist API. #[derive(Debug, Clone, Deserialize, Serialize)] pub struct SecurityAdvisory { diff --git a/crates/mozart-registry/src/repository/mod.rs b/crates/mozart-registry/src/repository/mod.rs index 21752b9..6642638 100644 --- a/crates/mozart-registry/src/repository/mod.rs +++ b/crates/mozart-registry/src/repository/mod.rs @@ -10,12 +10,29 @@ //! the live Packagist HTTP repo, [`inline_package_repo`] for `type: package` //! entries embedded in `composer.json`, and [`vcs_repo`] for VCS repositories. -use crate::packagist::PackagistVersion; +use crate::packagist::{PackagistVersion, SearchResult}; pub mod inline_package_repo; pub mod packagist_repo; pub mod vcs_repo; +/// Search modes for [`Repository::search`]. +/// +/// Mirrors Composer's `RepositoryInterface::SEARCH_FULLTEXT|SEARCH_NAME|SEARCH_VENDOR` +/// constants (`composer/src/Composer/Repository/RepositoryInterface.php`). +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum SearchMode { + /// Full-text search over name, description, and keywords (Packagist's + /// `search.json` API). + Fulltext, + /// Match the regex against package names. Tokens are split on whitespace + /// and joined as `(?:t1|t2|...)`; callers must pre-quote regex metachars. + Name, + /// Match the regex against vendor names. Result rows have only `name` + /// populated (the vendor part). + Vendor, +} + /// One name-keyed lookup against a repository. /// /// Matches the `$packageNameMap` argument of Composer's `loadPackages`. The @@ -65,6 +82,22 @@ pub trait Repository: Send + Sync { /// Look up every version of every queried name this repo knows about. async fn load_packages(&self, queries: &[PackageQuery<'_>]) -> anyhow::Result; + + /// Search this repository. + /// + /// The default returns an empty result so repositories that don't + /// participate in search (e.g. inline / VCS repos that only resolve + /// known names) can opt out. Mirrors Composer's + /// `RepositoryInterface::search` whose default behavior on + /// `ArrayRepository` walks the in-memory list. + async fn search( + &self, + _query: &str, + _mode: SearchMode, + _package_type: Option<&str>, + ) -> anyhow::Result> { + Ok(Vec::new()) + } } /// Ordered list of repositories. Mirrors `Composer\Repository\RepositoryManager`. @@ -140,4 +173,22 @@ impl RepositorySet { Ok(packages) } + + /// Fan-out search across every repository, concatenating results in + /// priority order. Mirrors Composer's + /// `CompositeRepository::search` which `array_merge`s per-repo results + /// without de-duplication. + pub async fn search( + &self, + query: &str, + mode: SearchMode, + package_type: Option<&str>, + ) -> anyhow::Result> { + let mut all = Vec::new(); + for repo in &self.repos { + let mut hits = repo.search(query, mode, package_type).await?; + all.append(&mut hits); + } + Ok(all) + } } diff --git a/crates/mozart-registry/src/repository/packagist_repo.rs b/crates/mozart-registry/src/repository/packagist_repo.rs index 6f9b687..fa656b7 100644 --- a/crates/mozart-registry/src/repository/packagist_repo.rs +++ b/crates/mozart-registry/src/repository/packagist_repo.rs @@ -5,9 +5,10 @@ //! direct call. Construction takes ownership of the [`Cache`] handle so //! callers no longer thread it through `ResolveRequest` / `LockFileGenerationRequest`. -use super::{LoadResult, NamedPackagistVersion, PackageQuery, Repository}; +use super::{LoadResult, NamedPackagistVersion, PackageQuery, Repository, SearchMode}; use crate::cache::Cache; use crate::packagist; +use crate::packagist::SearchResult; pub struct PackagistRepository { id: String, @@ -54,4 +55,67 @@ impl Repository for PackagistRepository { } Ok(result) } + + async fn search( + &self, + query: &str, + mode: SearchMode, + package_type: Option<&str>, + ) -> anyhow::Result> { + match mode { + SearchMode::Fulltext => { + let (results, _total) = packagist::search_packages(query, package_type).await?; + Ok(results) + } + SearchMode::Name => { + let pattern = build_name_regex(query)?; + let names = packagist::fetch_package_names(package_type, &self.cache).await?; + Ok(names + .into_iter() + .filter(|name| pattern.is_match(name)) + .map(empty_search_result) + .collect()) + } + SearchMode::Vendor => { + let pattern = build_name_regex(query)?; + let vendors = packagist::fetch_vendor_names(&self.cache).await?; + Ok(vendors + .into_iter() + .filter(|name| pattern.is_match(name)) + .map(empty_search_result) + .collect()) + } + } + } +} + +/// Build the case-insensitive `(?:t1|t2|...)` regex from whitespace-split +/// tokens, mirroring Composer's `'{(?:'.implode('|', $matches).')}i'`. +/// +/// Tokens are joined as-is — callers are expected to have already escaped +/// regex metacharacters (`SearchCommand` calls `preg_quote`; Mozart calls +/// `regex::escape` before reaching this point). +fn build_name_regex(query: &str) -> anyhow::Result { + let tokens: Vec<&str> = query.split_whitespace().collect(); + let body = if tokens.is_empty() { + String::new() + } else { + tokens.join("|") + }; + Ok(regex::Regex::new(&format!("(?i)(?:{body})"))?) +} + +/// Build a [`SearchResult`] with only `name` populated, mirroring the shape +/// Composer returns for `SEARCH_NAME` / `SEARCH_VENDOR` modes +/// (`['name' => $name]`, all other fields `null`). +fn empty_search_result(name: String) -> SearchResult { + SearchResult { + name, + description: String::new(), + url: String::new(), + repository: None, + downloads: 0, + favers: 0, + abandoned: None, + } } -- cgit v1.3.1