//! ref: composer/src/Composer/Util/Url.php use crate::config::Config; use crate::util::GitHub; use indexmap::IndexMap; use shirabe_external_packages::composer::pcre::{CaptureKey, Preg}; use shirabe_php_shim::{PHP_URL_HOST, PHP_URL_PORT, PhpMixed, in_array, parse_url}; pub struct Url; impl Url { pub fn update_dist_reference(config: &Config, mut url: String, r#ref: &str) -> String { let host = parse_url(&url, PHP_URL_HOST) .as_string() .map(|s| s.to_string()) .unwrap_or_default(); if host == "api.github.com" || host == "github.com" || host == "www.github.com" { let mut m: IndexMap = IndexMap::new(); if Preg::match3( r"(?i)^https?://(?:www\.)?github\.com/([^/]+)/([^/]+)/(zip|tar)ball/(.+)$", &url, Some(&mut m), ) .unwrap_or(false) { url = format!( "https://api.github.com/repos/{}/{}/{}ball/{}", m.get(&CaptureKey::ByIndex(1)).cloned().unwrap_or_default(), m.get(&CaptureKey::ByIndex(2)).cloned().unwrap_or_default(), m.get(&CaptureKey::ByIndex(3)).cloned().unwrap_or_default(), r#ref ); } else if Preg::match3( r"(?i)^https?://(?:www\.)?github\.com/([^/]+)/([^/]+)/archive/.+\.(zip|tar)(?:\.gz)?$", &url, Some(&mut m), ) .unwrap_or(false) { url = format!( "https://api.github.com/repos/{}/{}/{}ball/{}", m.get(&CaptureKey::ByIndex(1)).cloned().unwrap_or_default(), m.get(&CaptureKey::ByIndex(2)).cloned().unwrap_or_default(), m.get(&CaptureKey::ByIndex(3)).cloned().unwrap_or_default(), r#ref ); } else if Preg::match3( r"(?i)^https?://api\.github\.com/repos/([^/]+)/([^/]+)/(zip|tar)ball(?:/.+)?$", &url, Some(&mut m), ) .unwrap_or(false) { url = format!( "https://api.github.com/repos/{}/{}/{}ball/{}", m.get(&CaptureKey::ByIndex(1)).cloned().unwrap_or_default(), m.get(&CaptureKey::ByIndex(2)).cloned().unwrap_or_default(), m.get(&CaptureKey::ByIndex(3)).cloned().unwrap_or_default(), r#ref ); } } else if host == "bitbucket.org" || host == "www.bitbucket.org" { let mut m: IndexMap = IndexMap::new(); if Preg::match3( r"(?i)^https?://(?:www\.)?bitbucket\.org/([^/]+)/([^/]+)/get/(.+)\.(zip|tar\.gz|tar\.bz2)$", &url, Some(&mut m), ) .unwrap_or(false) { url = format!( "https://bitbucket.org/{}/{}/get/{}.{}", m.get(&CaptureKey::ByIndex(1)).cloned().unwrap_or_default(), m.get(&CaptureKey::ByIndex(2)).cloned().unwrap_or_default(), r#ref, m.get(&CaptureKey::ByIndex(4)).cloned().unwrap_or_default() ); } } else if host == "gitlab.com" || host == "www.gitlab.com" { let mut m: IndexMap = IndexMap::new(); if Preg::match3( r"(?i)^https?://(?:www\.)?gitlab\.com/api/v[34]/projects/([^/]+)/repository/archive\.(zip|tar\.gz|tar\.bz2|tar)\?sha=.+$", &url, Some(&mut m), ) .unwrap_or(false) { url = format!( "https://gitlab.com/api/v4/projects/{}/repository/archive.{}?sha={}", m.get(&CaptureKey::ByIndex(1)).cloned().unwrap_or_default(), m.get(&CaptureKey::ByIndex(2)).cloned().unwrap_or_default(), r#ref ); } } else if in_array( PhpMixed::String(host.clone()), &config.get("github-domains"), true, ) { url = Preg::replace( r"(?i)(/repos/[^/]+/[^/]+/(zip|tar)ball)(?:/.+)?$", &format!("$1/{}", r#ref), &url, ) .unwrap_or(url); } else if in_array( PhpMixed::String(host.clone()), &config.get("gitlab-domains"), true, ) { url = Preg::replace( r"(?i)(/api/v[34]/projects/[^/]+/repository/archive\.(?:zip|tar\.gz|tar\.bz2|tar)\?sha=).+$", &format!("${{1}}{}", r#ref), &url, ) .unwrap_or(url); } assert!(!url.is_empty()); url } pub fn get_origin(config: &Config, url: &str) -> String { if url.starts_with("file://") { return url.to_string(); } let mut origin = parse_url(url, PHP_URL_HOST) .as_string() .map(|s| s.to_string()) .unwrap_or_default(); if let Some(port) = parse_url(url, PHP_URL_PORT).as_int() { origin = format!("{}:{}", origin, port); } if origin.ends_with(".github.com") && origin != "codeload.github.com" { return "github.com".to_string(); } if origin == "repo.packagist.org" { return "packagist.org".to_string(); } if origin.is_empty() { origin = url.to_string(); } // Gitlab can be installed in a non-root context (i.e. gitlab.com/foo). When downloading archives the originUrl // is the host without the path, so we look for the registered gitlab-domains matching the host here if !origin.contains('/') && !in_array( PhpMixed::String(origin.clone()), &config.get("gitlab-domains"), true, ) { let gitlab_domains: Vec = match config.get("gitlab-domains") { PhpMixed::List(list) => list .iter() .filter_map(|v| v.as_string().map(|s| s.to_string())) .collect(), _ => vec![], }; for gitlab_domain in gitlab_domains { if !gitlab_domain.is_empty() && gitlab_domain.starts_with(&origin) { return gitlab_domain; } } } origin } pub fn sanitize(url: String) -> String { // GitHub repository rename result in redirect locations containing the access_token as GET parameter // e.g. https://api.github.com/repositories/9999999999?access_token=github_token let url = Preg::replace(r"([&?]access_token=)[^&]+", "$1***", &url).unwrap_or(url); let url = Preg::replace_callback( r"(?i)^(?P[a-z0-9]+://)?(?P[^:/\s@]+):(?P[^@\s/]+)@", |m| { let user = m .get(&CaptureKey::ByName("user".to_string())) .cloned() .unwrap_or_default(); let prefix = m .get(&CaptureKey::ByName("prefix".to_string())) .cloned() .unwrap_or_default(); // if the username looks like a long (12char+) hex string, or a modern github token (e.g. ghp_xxx, github_pat_xxx) we obfuscate that if Preg::is_match(GitHub::GITHUB_TOKEN_REGEX, &user).unwrap_or(false) { format!("{}***:***@", prefix) } else { format!("{}{}:***@", prefix, user) } }, &url, ) .unwrap_or(url); url } }