diff options
| author | nsfisis <nsfisis@gmail.com> | 2026-02-22 12:27:20 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2026-02-22 12:37:41 +0900 |
| commit | 1ef1ebdcf50ae1358ec06e3c6a2fb797a8461617 (patch) | |
| tree | 97cc47fc216dc7e64697ffa74b6a2bb70f395abe /crates/mozart-spdx-licenses/src | |
| parent | 92fdff257d2c64f94600ba70bf17e429d46474b2 (diff) | |
| download | php-mozart-1ef1ebdcf50ae1358ec06e3c6a2fb797a8461617.tar.gz php-mozart-1ef1ebdcf50ae1358ec06e3c6a2fb797a8461617.tar.zst php-mozart-1ef1ebdcf50ae1358ec06e3c6a2fb797a8461617.zip | |
feat(spdx): add mozart-spdx-licenses crate for SPDX license validation
Add new workspace crate that validates SPDX license expressions using
data from composer/spdx-licenses (git submodule). Includes build.rs
codegen from JSON, recursive descent expression parser supporting
AND/OR/WITH/LicenseRef, and integrates into mozart-core's
validate_license function.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'crates/mozart-spdx-licenses/src')
| -rw-r--r-- | crates/mozart-spdx-licenses/src/lib.rs | 467 |
1 files changed, 467 insertions, 0 deletions
diff --git a/crates/mozart-spdx-licenses/src/lib.rs b/crates/mozart-spdx-licenses/src/lib.rs new file mode 100644 index 0000000..04dce27 --- /dev/null +++ b/crates/mozart-spdx-licenses/src/lib.rs @@ -0,0 +1,467 @@ +use std::collections::HashMap; +use std::sync::LazyLock; + +include!(concat!(env!("OUT_DIR"), "/spdx_data.rs")); + +/// Information about an SPDX license. +#[derive(Debug, Clone)] +pub struct LicenseInfo { + pub identifier: &'static str, + pub full_name: &'static str, + pub osi_approved: bool, + pub deprecated: bool, +} + +/// Information about an SPDX license exception. +#[derive(Debug, Clone)] +pub struct ExceptionInfo { + pub identifier: &'static str, + pub full_name: &'static str, +} + +/// SPDX license database with expression validation. +pub struct SpdxLicenses { + licenses: HashMap<&'static str, LicenseInfo>, + exceptions: HashMap<&'static str, ExceptionInfo>, + name_to_id: HashMap<&'static str, &'static str>, +} + +impl SpdxLicenses { + /// Build the license database from generated data. + pub fn new() -> Self { + let mut licenses = HashMap::with_capacity(LICENSES.len()); + let mut name_to_id = HashMap::with_capacity(LICENSES.len()); + for &(lower, id, full_name, osi, deprecated) in LICENSES { + licenses.insert( + lower, + LicenseInfo { + identifier: id, + full_name, + osi_approved: osi, + deprecated, + }, + ); + name_to_id.insert(full_name, id); + } + + let mut exceptions = HashMap::with_capacity(EXCEPTIONS.len()); + for &(lower, id, full_name) in EXCEPTIONS { + exceptions.insert( + lower, + ExceptionInfo { + identifier: id, + full_name, + }, + ); + } + + Self { + licenses, + exceptions, + name_to_id, + } + } + + /// Look up a license by its SPDX identifier (case-insensitive). + pub fn get_license_by_identifier(&self, id: &str) -> Option<&LicenseInfo> { + self.licenses.get(id.to_lowercase().as_str()) + } + + /// Look up an exception by its SPDX identifier (case-insensitive). + pub fn get_exception_by_identifier(&self, id: &str) -> Option<&ExceptionInfo> { + self.exceptions.get(id.to_lowercase().as_str()) + } + + /// Look up a license identifier by its full name. + pub fn get_identifier_by_name(&self, name: &str) -> Option<&str> { + self.name_to_id.get(name).copied() + } + + /// Check if a license is OSI-approved. + pub fn is_osi_approved(&self, id: &str) -> bool { + self.get_license_by_identifier(id) + .is_some_and(|l| l.osi_approved) + } + + /// Check if a license is deprecated. + pub fn is_deprecated(&self, id: &str) -> bool { + self.get_license_by_identifier(id) + .is_some_and(|l| l.deprecated) + } + + /// Validate an SPDX license expression. + /// + /// Supports compound expressions with AND/OR, the WITH operator for + /// exceptions, the `+` (or-later) operator, LicenseRef, and the special + /// values `NONE` and `NOASSERTION`. + pub fn validate(&self, license: &str) -> bool { + let license = license.trim(); + if license.is_empty() { + return false; + } + + // Special values + if license.eq_ignore_ascii_case("NONE") || license.eq_ignore_ascii_case("NOASSERTION") { + return true; + } + + let mut parser = Parser::new(license, self); + parser.parse_expression() && parser.is_at_end() + } + + /// Validate a list of SPDX license identifiers (joined with OR). + pub fn validate_list(&self, licenses: &[&str]) -> bool { + if licenses.is_empty() { + return false; + } + let expr = licenses.join(" OR "); + self.validate(&expr) + } + + fn is_valid_license_id(&self, id: &str) -> bool { + self.licenses.contains_key(id.to_lowercase().as_str()) + } + + fn is_valid_exception_id(&self, id: &str) -> bool { + self.exceptions.contains_key(id.to_lowercase().as_str()) + } +} + +impl Default for SpdxLicenses { + fn default() -> Self { + Self::new() + } +} + +/// Global static SPDX license database. +static SPDX: LazyLock<SpdxLicenses> = LazyLock::new(SpdxLicenses::new); + +/// Get a reference to the global SPDX license database. +pub fn spdx() -> &'static SpdxLicenses { + &SPDX +} + +// --------------------------------------------------------------------------- +// SPDX expression parser (recursive descent) +// --------------------------------------------------------------------------- +// +// Grammar: +// expression = compound_expr +// compound_expr = head_expr (("AND" | "OR") compound_expr)? +// head_expr = simple_expr ("WITH" exception_id)? +// | "(" compound_expr ")" +// simple_expr = license_id "+"? +// | license_ref +// license_ref = ("DocumentRef-" idstring ":")? "LicenseRef-" idstring +// idstring = [a-zA-Z0-9-.]+ + +struct Parser<'a> { + tokens: Vec<&'a str>, + pos: usize, + db: &'a SpdxLicenses, +} + +impl<'a> Parser<'a> { + fn new(input: &'a str, db: &'a SpdxLicenses) -> Self { + let tokens = Self::tokenize(input); + Self { + tokens, + pos: 0, + db, + } + } + + fn tokenize(input: &str) -> Vec<&str> { + let mut tokens = Vec::new(); + let mut chars = input.char_indices().peekable(); + + while let Some(&(i, c)) = chars.peek() { + if c.is_whitespace() { + chars.next(); + continue; + } + if c == '(' || c == ')' || c == '+' { + tokens.push(&input[i..i + 1]); + chars.next(); + continue; + } + // Identifier or keyword: consume until whitespace or special char + let start = i; + loop { + chars.next(); + match chars.peek() { + Some(&(_, ch)) if !ch.is_whitespace() && ch != '(' && ch != ')' => { + // '+' only breaks if it's right after an identifier + if ch == '+' { + break; + } + } + _ => break, + } + } + let end = chars.peek().map_or(input.len(), |&(j, _)| j); + tokens.push(&input[start..end]); + } + + tokens + } + + fn peek(&self) -> Option<&'a str> { + self.tokens.get(self.pos).copied() + } + + fn advance(&mut self) -> Option<&'a str> { + let tok = self.tokens.get(self.pos).copied(); + if tok.is_some() { + self.pos += 1; + } + tok + } + + fn is_at_end(&self) -> bool { + self.pos >= self.tokens.len() + } + + fn expect(&mut self, expected: &str) -> bool { + if self.peek() == Some(expected) { + self.advance(); + true + } else { + false + } + } + + /// Parse the top-level expression. + fn parse_expression(&mut self) -> bool { + self.parse_compound_expr() + } + + /// compound_expr = head_expr (("AND" | "OR") compound_expr)? + fn parse_compound_expr(&mut self) -> bool { + if !self.parse_head_expr() { + return false; + } + + if let Some(tok) = self.peek() + && (tok == "AND" || tok == "OR") + { + self.advance(); + return self.parse_compound_expr(); + } + + true + } + + /// head_expr = "(" compound_expr ")" | simple_expr ("WITH" exception_id)? + fn parse_head_expr(&mut self) -> bool { + if self.expect("(") { + if !self.parse_compound_expr() { + return false; + } + return self.expect(")"); + } + + if !self.parse_simple_expr() { + return false; + } + + // Optional WITH clause + if self.peek() == Some("WITH") { + self.advance(); + return self.parse_exception_id(); + } + + true + } + + /// simple_expr = license_ref | license_id "+"? + fn parse_simple_expr(&mut self) -> bool { + let tok = match self.peek() { + Some(t) => t, + None => return false, + }; + + // LicenseRef / DocumentRef + if tok.starts_with("LicenseRef-") || tok.starts_with("DocumentRef-") { + return self.parse_license_ref(); + } + + // Regular license identifier — could be multi-token with "-" + // We just consume the current token and check + self.advance(); + + // Handle '+' (or-later) operator + if self.peek() == Some("+") { + self.advance(); + } + + self.db.is_valid_license_id(tok) + } + + /// license_ref = ("DocumentRef-" idstring ":")? "LicenseRef-" idstring + fn parse_license_ref(&mut self) -> bool { + let tok = match self.advance() { + Some(t) => t, + None => return false, + }; + + if let Some(rest) = tok.strip_prefix("DocumentRef-") { + // Must contain ":LicenseRef-" within + if let Some(colon_pos) = rest.find(":LicenseRef-") { + let doc_id = &rest[..colon_pos]; + let license_ref_id = &rest[colon_pos + ":LicenseRef-".len()..]; + return is_valid_idstring(doc_id) && is_valid_idstring(license_ref_id); + } + return false; + } + + if let Some(id) = tok.strip_prefix("LicenseRef-") { + return is_valid_idstring(id); + } + + false + } + + fn parse_exception_id(&mut self) -> bool { + match self.advance() { + Some(id) => self.db.is_valid_exception_id(id), + None => false, + } + } +} + +/// Check that a string matches `[a-zA-Z0-9.-]+`. +fn is_valid_idstring(s: &str) -> bool { + !s.is_empty() && s.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'.' || b == b'-') +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn valid_identifiers() { + let db = spdx(); + assert!(db.validate("MIT")); + assert!(db.validate("Apache-2.0")); + assert!(db.validate("GPL-3.0-only")); + assert!(db.validate("0BSD")); + } + + #[test] + fn case_insensitive() { + let db = spdx(); + assert!(db.validate("mit")); + assert!(db.validate("apache-2.0")); + assert!(db.validate("Mit")); + } + + #[test] + fn or_expression() { + let db = spdx(); + assert!(db.validate("MIT OR Apache-2.0")); + } + + #[test] + fn and_expression() { + let db = spdx(); + assert!(db.validate("MIT AND Apache-2.0")); + } + + #[test] + fn with_exception() { + let db = spdx(); + assert!(db.validate("GPL-2.0-only WITH Classpath-exception-2.0")); + } + + #[test] + fn complex_expression() { + let db = spdx(); + assert!(db.validate("(MIT AND Apache-2.0) OR GPL-3.0-only")); + assert!(db.validate("(MIT OR Apache-2.0) AND (GPL-2.0-only OR BSD-2-Clause)")); + } + + #[test] + fn special_values() { + let db = spdx(); + assert!(db.validate("NONE")); + assert!(db.validate("NOASSERTION")); + assert!(db.validate("none")); + assert!(db.validate("noassertion")); + } + + #[test] + fn or_later_operator() { + let db = spdx(); + assert!(db.validate("Apache-2.0+")); + assert!(db.validate("GPL-2.0-only+")); + } + + #[test] + fn license_ref() { + let db = spdx(); + assert!(db.validate("LicenseRef-custom")); + assert!(db.validate("LicenseRef-my-license.1")); + assert!(db.validate("DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2")); + } + + #[test] + fn invalid_expressions() { + let db = spdx(); + assert!(!db.validate("")); + assert!(!db.validate("totally-not-a-license")); + assert!(!db.validate("MIT AND")); + assert!(!db.validate("AND MIT")); + assert!(!db.validate("MIT OR")); + assert!(!db.validate("(MIT")); + assert!(!db.validate("MIT)")); + assert!(!db.validate("MIT WITH")); + assert!(!db.validate("MIT WITH not-an-exception")); + } + + #[test] + fn validate_list() { + let db = spdx(); + assert!(db.validate_list(&["MIT", "Apache-2.0"])); + assert!(!db.validate_list(&[])); + assert!(!db.validate_list(&["not-valid"])); + } + + #[test] + fn license_lookup() { + let db = spdx(); + let mit = db.get_license_by_identifier("MIT").unwrap(); + assert_eq!(mit.identifier, "MIT"); + assert!(mit.osi_approved); + assert!(!mit.deprecated); + + assert!(db.get_license_by_identifier("mit").is_some()); + assert!(db.get_license_by_identifier("nonexistent").is_none()); + } + + #[test] + fn exception_lookup() { + let db = spdx(); + let exc = db + .get_exception_by_identifier("Classpath-exception-2.0") + .unwrap(); + assert_eq!(exc.identifier, "Classpath-exception-2.0"); + } + + #[test] + fn name_lookup() { + let db = spdx(); + assert_eq!( + db.get_identifier_by_name("MIT License"), + Some("MIT") + ); + } + + #[test] + fn osi_and_deprecated() { + let db = spdx(); + assert!(db.is_osi_approved("MIT")); + assert!(!db.is_osi_approved("nonexistent")); + assert!(!db.is_deprecated("MIT")); + } +} |
