diff options
| -rw-r--r-- | .gitmodules | 3 | ||||
| -rw-r--r-- | Cargo.lock | 9 | ||||
| -rw-r--r-- | Cargo.toml | 1 | ||||
| -rw-r--r-- | crates/mozart-core/Cargo.toml | 1 | ||||
| -rw-r--r-- | crates/mozart-core/src/validation.rs | 3 | ||||
| -rw-r--r-- | crates/mozart-spdx-licenses/Cargo.toml | 10 | ||||
| -rw-r--r-- | crates/mozart-spdx-licenses/build.rs | 54 | ||||
| m--------- | crates/mozart-spdx-licenses/composer-spdx-licenses | 0 | ||||
| -rw-r--r-- | crates/mozart-spdx-licenses/src/lib.rs | 467 |
9 files changed, 546 insertions, 2 deletions
diff --git a/.gitmodules b/.gitmodules index 44f77f5..975a18d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "composer"] path = composer url = https://github.com/composer/composer +[submodule "crates/mozart-spdx-licenses/composer-spdx-licenses"] + path = crates/mozart-spdx-licenses/composer-spdx-licenses + url = https://github.com/composer/spdx-licenses @@ -1131,6 +1131,7 @@ dependencies = [ "anyhow", "colored", "dialoguer", + "mozart-spdx-licenses", "regex", "serde", "serde_json", @@ -1167,6 +1168,14 @@ dependencies = [ ] [[package]] +name = "mozart-spdx-licenses" +version = "0.1.0" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] name = "normalize-line-endings" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -11,6 +11,7 @@ mozart-constraint = { path = "crates/mozart-constraint" } mozart-core = { path = "crates/mozart-core" } mozart-archiver = { path = "crates/mozart-archiver" } mozart-metadata-minifier = { path = "crates/mozart-metadata-minifier" } +mozart-spdx-licenses = { path = "crates/mozart-spdx-licenses" } mozart-registry = { path = "crates/mozart-registry" } mozart-autoload = { path = "crates/mozart-autoload" } anyhow = "1.0.102" diff --git a/crates/mozart-core/Cargo.toml b/crates/mozart-core/Cargo.toml index 25210be..885fdf4 100644 --- a/crates/mozart-core/Cargo.toml +++ b/crates/mozart-core/Cargo.toml @@ -7,6 +7,7 @@ edition.workspace = true anyhow.workspace = true colored.workspace = true dialoguer.workspace = true +mozart-spdx-licenses.workspace = true regex.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/mozart-core/src/validation.rs b/crates/mozart-core/src/validation.rs index 7f946ae..24f1705 100644 --- a/crates/mozart-core/src/validation.rs +++ b/crates/mozart-core/src/validation.rs @@ -52,8 +52,7 @@ pub fn validate_stability(s: &str) -> bool { } pub fn validate_license(s: &str) -> bool { - // TODO: check SPDX Identifier - !s.is_empty() + mozart_spdx_licenses::spdx().validate(s) } pub fn validate_autoload_path(s: &str) -> bool { diff --git a/crates/mozart-spdx-licenses/Cargo.toml b/crates/mozart-spdx-licenses/Cargo.toml new file mode 100644 index 0000000..fc31b9f --- /dev/null +++ b/crates/mozart-spdx-licenses/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "mozart-spdx-licenses" +version.workspace = true +edition.workspace = true + +[dependencies] + +[build-dependencies] +serde.workspace = true +serde_json.workspace = true diff --git a/crates/mozart-spdx-licenses/build.rs b/crates/mozart-spdx-licenses/build.rs new file mode 100644 index 0000000..27d4ed6 --- /dev/null +++ b/crates/mozart-spdx-licenses/build.rs @@ -0,0 +1,54 @@ +use serde_json::Value; +use std::collections::BTreeMap; +use std::env; +use std::fs; +use std::path::Path; + +fn main() { + let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); + let res_dir = Path::new(&manifest_dir).join("composer-spdx-licenses/res"); + + let licenses_path = res_dir.join("spdx-licenses.json"); + let exceptions_path = res_dir.join("spdx-exceptions.json"); + + println!("cargo:rerun-if-changed={}", licenses_path.display()); + println!("cargo:rerun-if-changed={}", exceptions_path.display()); + + let licenses_json: BTreeMap<String, Value> = + serde_json::from_str(&fs::read_to_string(&licenses_path).unwrap()).unwrap(); + + let exceptions_json: BTreeMap<String, Value> = + serde_json::from_str(&fs::read_to_string(&exceptions_path).unwrap()).unwrap(); + + let out_dir = env::var("OUT_DIR").unwrap(); + let out_path = Path::new(&out_dir).join("spdx_data.rs"); + + let mut code = String::new(); + + // Generate licenses array + code.push_str("const LICENSES: &[(&str, &str, &str, bool, bool)] = &[\n"); + for (id, val) in &licenses_json { + let arr = val.as_array().unwrap(); + let full_name = arr[0].as_str().unwrap(); + let osi_approved = arr[1].as_bool().unwrap(); + let deprecated = arr[2].as_bool().unwrap(); + let lower = id.to_lowercase(); + code.push_str(&format!( + " ({:?}, {:?}, {:?}, {}, {}),\n", + lower, id, full_name, osi_approved, deprecated + )); + } + code.push_str("];\n\n"); + + // Generate exceptions array + code.push_str("const EXCEPTIONS: &[(&str, &str, &str)] = &[\n"); + for (id, val) in &exceptions_json { + let arr = val.as_array().unwrap(); + let full_name = arr[0].as_str().unwrap(); + let lower = id.to_lowercase(); + code.push_str(&format!(" ({:?}, {:?}, {:?}),\n", lower, id, full_name)); + } + code.push_str("];\n"); + + fs::write(out_path, code).unwrap(); +} diff --git a/crates/mozart-spdx-licenses/composer-spdx-licenses b/crates/mozart-spdx-licenses/composer-spdx-licenses new file mode 160000 +Subproject ce0b20d268fbbee37a1aea57006ad90c6bc8d51 diff --git a/crates/mozart-spdx-licenses/src/lib.rs b/crates/mozart-spdx-licenses/src/lib.rs new file mode 100644 index 0000000..04dce27 --- /dev/null +++ b/crates/mozart-spdx-licenses/src/lib.rs @@ -0,0 +1,467 @@ +use std::collections::HashMap; +use std::sync::LazyLock; + +include!(concat!(env!("OUT_DIR"), "/spdx_data.rs")); + +/// Information about an SPDX license. +#[derive(Debug, Clone)] +pub struct LicenseInfo { + pub identifier: &'static str, + pub full_name: &'static str, + pub osi_approved: bool, + pub deprecated: bool, +} + +/// Information about an SPDX license exception. +#[derive(Debug, Clone)] +pub struct ExceptionInfo { + pub identifier: &'static str, + pub full_name: &'static str, +} + +/// SPDX license database with expression validation. +pub struct SpdxLicenses { + licenses: HashMap<&'static str, LicenseInfo>, + exceptions: HashMap<&'static str, ExceptionInfo>, + name_to_id: HashMap<&'static str, &'static str>, +} + +impl SpdxLicenses { + /// Build the license database from generated data. + pub fn new() -> Self { + let mut licenses = HashMap::with_capacity(LICENSES.len()); + let mut name_to_id = HashMap::with_capacity(LICENSES.len()); + for &(lower, id, full_name, osi, deprecated) in LICENSES { + licenses.insert( + lower, + LicenseInfo { + identifier: id, + full_name, + osi_approved: osi, + deprecated, + }, + ); + name_to_id.insert(full_name, id); + } + + let mut exceptions = HashMap::with_capacity(EXCEPTIONS.len()); + for &(lower, id, full_name) in EXCEPTIONS { + exceptions.insert( + lower, + ExceptionInfo { + identifier: id, + full_name, + }, + ); + } + + Self { + licenses, + exceptions, + name_to_id, + } + } + + /// Look up a license by its SPDX identifier (case-insensitive). + pub fn get_license_by_identifier(&self, id: &str) -> Option<&LicenseInfo> { + self.licenses.get(id.to_lowercase().as_str()) + } + + /// Look up an exception by its SPDX identifier (case-insensitive). + pub fn get_exception_by_identifier(&self, id: &str) -> Option<&ExceptionInfo> { + self.exceptions.get(id.to_lowercase().as_str()) + } + + /// Look up a license identifier by its full name. + pub fn get_identifier_by_name(&self, name: &str) -> Option<&str> { + self.name_to_id.get(name).copied() + } + + /// Check if a license is OSI-approved. + pub fn is_osi_approved(&self, id: &str) -> bool { + self.get_license_by_identifier(id) + .is_some_and(|l| l.osi_approved) + } + + /// Check if a license is deprecated. + pub fn is_deprecated(&self, id: &str) -> bool { + self.get_license_by_identifier(id) + .is_some_and(|l| l.deprecated) + } + + /// Validate an SPDX license expression. + /// + /// Supports compound expressions with AND/OR, the WITH operator for + /// exceptions, the `+` (or-later) operator, LicenseRef, and the special + /// values `NONE` and `NOASSERTION`. + pub fn validate(&self, license: &str) -> bool { + let license = license.trim(); + if license.is_empty() { + return false; + } + + // Special values + if license.eq_ignore_ascii_case("NONE") || license.eq_ignore_ascii_case("NOASSERTION") { + return true; + } + + let mut parser = Parser::new(license, self); + parser.parse_expression() && parser.is_at_end() + } + + /// Validate a list of SPDX license identifiers (joined with OR). + pub fn validate_list(&self, licenses: &[&str]) -> bool { + if licenses.is_empty() { + return false; + } + let expr = licenses.join(" OR "); + self.validate(&expr) + } + + fn is_valid_license_id(&self, id: &str) -> bool { + self.licenses.contains_key(id.to_lowercase().as_str()) + } + + fn is_valid_exception_id(&self, id: &str) -> bool { + self.exceptions.contains_key(id.to_lowercase().as_str()) + } +} + +impl Default for SpdxLicenses { + fn default() -> Self { + Self::new() + } +} + +/// Global static SPDX license database. +static SPDX: LazyLock<SpdxLicenses> = LazyLock::new(SpdxLicenses::new); + +/// Get a reference to the global SPDX license database. +pub fn spdx() -> &'static SpdxLicenses { + &SPDX +} + +// --------------------------------------------------------------------------- +// SPDX expression parser (recursive descent) +// --------------------------------------------------------------------------- +// +// Grammar: +// expression = compound_expr +// compound_expr = head_expr (("AND" | "OR") compound_expr)? +// head_expr = simple_expr ("WITH" exception_id)? +// | "(" compound_expr ")" +// simple_expr = license_id "+"? +// | license_ref +// license_ref = ("DocumentRef-" idstring ":")? "LicenseRef-" idstring +// idstring = [a-zA-Z0-9-.]+ + +struct Parser<'a> { + tokens: Vec<&'a str>, + pos: usize, + db: &'a SpdxLicenses, +} + +impl<'a> Parser<'a> { + fn new(input: &'a str, db: &'a SpdxLicenses) -> Self { + let tokens = Self::tokenize(input); + Self { + tokens, + pos: 0, + db, + } + } + + fn tokenize(input: &str) -> Vec<&str> { + let mut tokens = Vec::new(); + let mut chars = input.char_indices().peekable(); + + while let Some(&(i, c)) = chars.peek() { + if c.is_whitespace() { + chars.next(); + continue; + } + if c == '(' || c == ')' || c == '+' { + tokens.push(&input[i..i + 1]); + chars.next(); + continue; + } + // Identifier or keyword: consume until whitespace or special char + let start = i; + loop { + chars.next(); + match chars.peek() { + Some(&(_, ch)) if !ch.is_whitespace() && ch != '(' && ch != ')' => { + // '+' only breaks if it's right after an identifier + if ch == '+' { + break; + } + } + _ => break, + } + } + let end = chars.peek().map_or(input.len(), |&(j, _)| j); + tokens.push(&input[start..end]); + } + + tokens + } + + fn peek(&self) -> Option<&'a str> { + self.tokens.get(self.pos).copied() + } + + fn advance(&mut self) -> Option<&'a str> { + let tok = self.tokens.get(self.pos).copied(); + if tok.is_some() { + self.pos += 1; + } + tok + } + + fn is_at_end(&self) -> bool { + self.pos >= self.tokens.len() + } + + fn expect(&mut self, expected: &str) -> bool { + if self.peek() == Some(expected) { + self.advance(); + true + } else { + false + } + } + + /// Parse the top-level expression. + fn parse_expression(&mut self) -> bool { + self.parse_compound_expr() + } + + /// compound_expr = head_expr (("AND" | "OR") compound_expr)? + fn parse_compound_expr(&mut self) -> bool { + if !self.parse_head_expr() { + return false; + } + + if let Some(tok) = self.peek() + && (tok == "AND" || tok == "OR") + { + self.advance(); + return self.parse_compound_expr(); + } + + true + } + + /// head_expr = "(" compound_expr ")" | simple_expr ("WITH" exception_id)? + fn parse_head_expr(&mut self) -> bool { + if self.expect("(") { + if !self.parse_compound_expr() { + return false; + } + return self.expect(")"); + } + + if !self.parse_simple_expr() { + return false; + } + + // Optional WITH clause + if self.peek() == Some("WITH") { + self.advance(); + return self.parse_exception_id(); + } + + true + } + + /// simple_expr = license_ref | license_id "+"? + fn parse_simple_expr(&mut self) -> bool { + let tok = match self.peek() { + Some(t) => t, + None => return false, + }; + + // LicenseRef / DocumentRef + if tok.starts_with("LicenseRef-") || tok.starts_with("DocumentRef-") { + return self.parse_license_ref(); + } + + // Regular license identifier — could be multi-token with "-" + // We just consume the current token and check + self.advance(); + + // Handle '+' (or-later) operator + if self.peek() == Some("+") { + self.advance(); + } + + self.db.is_valid_license_id(tok) + } + + /// license_ref = ("DocumentRef-" idstring ":")? "LicenseRef-" idstring + fn parse_license_ref(&mut self) -> bool { + let tok = match self.advance() { + Some(t) => t, + None => return false, + }; + + if let Some(rest) = tok.strip_prefix("DocumentRef-") { + // Must contain ":LicenseRef-" within + if let Some(colon_pos) = rest.find(":LicenseRef-") { + let doc_id = &rest[..colon_pos]; + let license_ref_id = &rest[colon_pos + ":LicenseRef-".len()..]; + return is_valid_idstring(doc_id) && is_valid_idstring(license_ref_id); + } + return false; + } + + if let Some(id) = tok.strip_prefix("LicenseRef-") { + return is_valid_idstring(id); + } + + false + } + + fn parse_exception_id(&mut self) -> bool { + match self.advance() { + Some(id) => self.db.is_valid_exception_id(id), + None => false, + } + } +} + +/// Check that a string matches `[a-zA-Z0-9.-]+`. +fn is_valid_idstring(s: &str) -> bool { + !s.is_empty() && s.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'.' || b == b'-') +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn valid_identifiers() { + let db = spdx(); + assert!(db.validate("MIT")); + assert!(db.validate("Apache-2.0")); + assert!(db.validate("GPL-3.0-only")); + assert!(db.validate("0BSD")); + } + + #[test] + fn case_insensitive() { + let db = spdx(); + assert!(db.validate("mit")); + assert!(db.validate("apache-2.0")); + assert!(db.validate("Mit")); + } + + #[test] + fn or_expression() { + let db = spdx(); + assert!(db.validate("MIT OR Apache-2.0")); + } + + #[test] + fn and_expression() { + let db = spdx(); + assert!(db.validate("MIT AND Apache-2.0")); + } + + #[test] + fn with_exception() { + let db = spdx(); + assert!(db.validate("GPL-2.0-only WITH Classpath-exception-2.0")); + } + + #[test] + fn complex_expression() { + let db = spdx(); + assert!(db.validate("(MIT AND Apache-2.0) OR GPL-3.0-only")); + assert!(db.validate("(MIT OR Apache-2.0) AND (GPL-2.0-only OR BSD-2-Clause)")); + } + + #[test] + fn special_values() { + let db = spdx(); + assert!(db.validate("NONE")); + assert!(db.validate("NOASSERTION")); + assert!(db.validate("none")); + assert!(db.validate("noassertion")); + } + + #[test] + fn or_later_operator() { + let db = spdx(); + assert!(db.validate("Apache-2.0+")); + assert!(db.validate("GPL-2.0-only+")); + } + + #[test] + fn license_ref() { + let db = spdx(); + assert!(db.validate("LicenseRef-custom")); + assert!(db.validate("LicenseRef-my-license.1")); + assert!(db.validate("DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2")); + } + + #[test] + fn invalid_expressions() { + let db = spdx(); + assert!(!db.validate("")); + assert!(!db.validate("totally-not-a-license")); + assert!(!db.validate("MIT AND")); + assert!(!db.validate("AND MIT")); + assert!(!db.validate("MIT OR")); + assert!(!db.validate("(MIT")); + assert!(!db.validate("MIT)")); + assert!(!db.validate("MIT WITH")); + assert!(!db.validate("MIT WITH not-an-exception")); + } + + #[test] + fn validate_list() { + let db = spdx(); + assert!(db.validate_list(&["MIT", "Apache-2.0"])); + assert!(!db.validate_list(&[])); + assert!(!db.validate_list(&["not-valid"])); + } + + #[test] + fn license_lookup() { + let db = spdx(); + let mit = db.get_license_by_identifier("MIT").unwrap(); + assert_eq!(mit.identifier, "MIT"); + assert!(mit.osi_approved); + assert!(!mit.deprecated); + + assert!(db.get_license_by_identifier("mit").is_some()); + assert!(db.get_license_by_identifier("nonexistent").is_none()); + } + + #[test] + fn exception_lookup() { + let db = spdx(); + let exc = db + .get_exception_by_identifier("Classpath-exception-2.0") + .unwrap(); + assert_eq!(exc.identifier, "Classpath-exception-2.0"); + } + + #[test] + fn name_lookup() { + let db = spdx(); + assert_eq!( + db.get_identifier_by_name("MIT License"), + Some("MIT") + ); + } + + #[test] + fn osi_and_deprecated() { + let db = spdx(); + assert!(db.is_osi_approved("MIT")); + assert!(!db.is_osi_approved("nonexistent")); + assert!(!db.is_deprecated("MIT")); + } +} |
