aboutsummaryrefslogtreecommitdiffhomepage
path: root/crates/mozart-spdx-licenses
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2026-02-22 12:27:20 +0900
committernsfisis <nsfisis@gmail.com>2026-02-22 12:37:41 +0900
commit1ef1ebdcf50ae1358ec06e3c6a2fb797a8461617 (patch)
tree97cc47fc216dc7e64697ffa74b6a2bb70f395abe /crates/mozart-spdx-licenses
parent92fdff257d2c64f94600ba70bf17e429d46474b2 (diff)
downloadphp-mozart-1ef1ebdcf50ae1358ec06e3c6a2fb797a8461617.tar.gz
php-mozart-1ef1ebdcf50ae1358ec06e3c6a2fb797a8461617.tar.zst
php-mozart-1ef1ebdcf50ae1358ec06e3c6a2fb797a8461617.zip
feat(spdx): add mozart-spdx-licenses crate for SPDX license validation
Add new workspace crate that validates SPDX license expressions using data from composer/spdx-licenses (git submodule). Includes build.rs codegen from JSON, recursive descent expression parser supporting AND/OR/WITH/LicenseRef, and integrates into mozart-core's validate_license function. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'crates/mozart-spdx-licenses')
-rw-r--r--crates/mozart-spdx-licenses/Cargo.toml10
-rw-r--r--crates/mozart-spdx-licenses/build.rs54
m---------crates/mozart-spdx-licenses/composer-spdx-licenses0
-rw-r--r--crates/mozart-spdx-licenses/src/lib.rs467
4 files changed, 531 insertions, 0 deletions
diff --git a/crates/mozart-spdx-licenses/Cargo.toml b/crates/mozart-spdx-licenses/Cargo.toml
new file mode 100644
index 0000000..fc31b9f
--- /dev/null
+++ b/crates/mozart-spdx-licenses/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "mozart-spdx-licenses"
+version.workspace = true
+edition.workspace = true
+
+[dependencies]
+
+[build-dependencies]
+serde.workspace = true
+serde_json.workspace = true
diff --git a/crates/mozart-spdx-licenses/build.rs b/crates/mozart-spdx-licenses/build.rs
new file mode 100644
index 0000000..27d4ed6
--- /dev/null
+++ b/crates/mozart-spdx-licenses/build.rs
@@ -0,0 +1,54 @@
+use serde_json::Value;
+use std::collections::BTreeMap;
+use std::env;
+use std::fs;
+use std::path::Path;
+
+fn main() {
+ let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
+ let res_dir = Path::new(&manifest_dir).join("composer-spdx-licenses/res");
+
+ let licenses_path = res_dir.join("spdx-licenses.json");
+ let exceptions_path = res_dir.join("spdx-exceptions.json");
+
+ println!("cargo:rerun-if-changed={}", licenses_path.display());
+ println!("cargo:rerun-if-changed={}", exceptions_path.display());
+
+ let licenses_json: BTreeMap<String, Value> =
+ serde_json::from_str(&fs::read_to_string(&licenses_path).unwrap()).unwrap();
+
+ let exceptions_json: BTreeMap<String, Value> =
+ serde_json::from_str(&fs::read_to_string(&exceptions_path).unwrap()).unwrap();
+
+ let out_dir = env::var("OUT_DIR").unwrap();
+ let out_path = Path::new(&out_dir).join("spdx_data.rs");
+
+ let mut code = String::new();
+
+ // Generate licenses array
+ code.push_str("const LICENSES: &[(&str, &str, &str, bool, bool)] = &[\n");
+ for (id, val) in &licenses_json {
+ let arr = val.as_array().unwrap();
+ let full_name = arr[0].as_str().unwrap();
+ let osi_approved = arr[1].as_bool().unwrap();
+ let deprecated = arr[2].as_bool().unwrap();
+ let lower = id.to_lowercase();
+ code.push_str(&format!(
+ " ({:?}, {:?}, {:?}, {}, {}),\n",
+ lower, id, full_name, osi_approved, deprecated
+ ));
+ }
+ code.push_str("];\n\n");
+
+ // Generate exceptions array
+ code.push_str("const EXCEPTIONS: &[(&str, &str, &str)] = &[\n");
+ for (id, val) in &exceptions_json {
+ let arr = val.as_array().unwrap();
+ let full_name = arr[0].as_str().unwrap();
+ let lower = id.to_lowercase();
+ code.push_str(&format!(" ({:?}, {:?}, {:?}),\n", lower, id, full_name));
+ }
+ code.push_str("];\n");
+
+ fs::write(out_path, code).unwrap();
+}
diff --git a/crates/mozart-spdx-licenses/composer-spdx-licenses b/crates/mozart-spdx-licenses/composer-spdx-licenses
new file mode 160000
+Subproject ce0b20d268fbbee37a1aea57006ad90c6bc8d51
diff --git a/crates/mozart-spdx-licenses/src/lib.rs b/crates/mozart-spdx-licenses/src/lib.rs
new file mode 100644
index 0000000..04dce27
--- /dev/null
+++ b/crates/mozart-spdx-licenses/src/lib.rs
@@ -0,0 +1,467 @@
+use std::collections::HashMap;
+use std::sync::LazyLock;
+
+include!(concat!(env!("OUT_DIR"), "/spdx_data.rs"));
+
+/// Information about an SPDX license.
+#[derive(Debug, Clone)]
+pub struct LicenseInfo {
+ pub identifier: &'static str,
+ pub full_name: &'static str,
+ pub osi_approved: bool,
+ pub deprecated: bool,
+}
+
+/// Information about an SPDX license exception.
+#[derive(Debug, Clone)]
+pub struct ExceptionInfo {
+ pub identifier: &'static str,
+ pub full_name: &'static str,
+}
+
+/// SPDX license database with expression validation.
+pub struct SpdxLicenses {
+ licenses: HashMap<&'static str, LicenseInfo>,
+ exceptions: HashMap<&'static str, ExceptionInfo>,
+ name_to_id: HashMap<&'static str, &'static str>,
+}
+
+impl SpdxLicenses {
+ /// Build the license database from generated data.
+ pub fn new() -> Self {
+ let mut licenses = HashMap::with_capacity(LICENSES.len());
+ let mut name_to_id = HashMap::with_capacity(LICENSES.len());
+ for &(lower, id, full_name, osi, deprecated) in LICENSES {
+ licenses.insert(
+ lower,
+ LicenseInfo {
+ identifier: id,
+ full_name,
+ osi_approved: osi,
+ deprecated,
+ },
+ );
+ name_to_id.insert(full_name, id);
+ }
+
+ let mut exceptions = HashMap::with_capacity(EXCEPTIONS.len());
+ for &(lower, id, full_name) in EXCEPTIONS {
+ exceptions.insert(
+ lower,
+ ExceptionInfo {
+ identifier: id,
+ full_name,
+ },
+ );
+ }
+
+ Self {
+ licenses,
+ exceptions,
+ name_to_id,
+ }
+ }
+
+ /// Look up a license by its SPDX identifier (case-insensitive).
+ pub fn get_license_by_identifier(&self, id: &str) -> Option<&LicenseInfo> {
+ self.licenses.get(id.to_lowercase().as_str())
+ }
+
+ /// Look up an exception by its SPDX identifier (case-insensitive).
+ pub fn get_exception_by_identifier(&self, id: &str) -> Option<&ExceptionInfo> {
+ self.exceptions.get(id.to_lowercase().as_str())
+ }
+
+ /// Look up a license identifier by its full name.
+ pub fn get_identifier_by_name(&self, name: &str) -> Option<&str> {
+ self.name_to_id.get(name).copied()
+ }
+
+ /// Check if a license is OSI-approved.
+ pub fn is_osi_approved(&self, id: &str) -> bool {
+ self.get_license_by_identifier(id)
+ .is_some_and(|l| l.osi_approved)
+ }
+
+ /// Check if a license is deprecated.
+ pub fn is_deprecated(&self, id: &str) -> bool {
+ self.get_license_by_identifier(id)
+ .is_some_and(|l| l.deprecated)
+ }
+
+ /// Validate an SPDX license expression.
+ ///
+ /// Supports compound expressions with AND/OR, the WITH operator for
+ /// exceptions, the `+` (or-later) operator, LicenseRef, and the special
+ /// values `NONE` and `NOASSERTION`.
+ pub fn validate(&self, license: &str) -> bool {
+ let license = license.trim();
+ if license.is_empty() {
+ return false;
+ }
+
+ // Special values
+ if license.eq_ignore_ascii_case("NONE") || license.eq_ignore_ascii_case("NOASSERTION") {
+ return true;
+ }
+
+ let mut parser = Parser::new(license, self);
+ parser.parse_expression() && parser.is_at_end()
+ }
+
+ /// Validate a list of SPDX license identifiers (joined with OR).
+ pub fn validate_list(&self, licenses: &[&str]) -> bool {
+ if licenses.is_empty() {
+ return false;
+ }
+ let expr = licenses.join(" OR ");
+ self.validate(&expr)
+ }
+
+ fn is_valid_license_id(&self, id: &str) -> bool {
+ self.licenses.contains_key(id.to_lowercase().as_str())
+ }
+
+ fn is_valid_exception_id(&self, id: &str) -> bool {
+ self.exceptions.contains_key(id.to_lowercase().as_str())
+ }
+}
+
+impl Default for SpdxLicenses {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+/// Global static SPDX license database.
+static SPDX: LazyLock<SpdxLicenses> = LazyLock::new(SpdxLicenses::new);
+
+/// Get a reference to the global SPDX license database.
+pub fn spdx() -> &'static SpdxLicenses {
+ &SPDX
+}
+
+// ---------------------------------------------------------------------------
+// SPDX expression parser (recursive descent)
+// ---------------------------------------------------------------------------
+//
+// Grammar:
+// expression = compound_expr
+// compound_expr = head_expr (("AND" | "OR") compound_expr)?
+// head_expr = simple_expr ("WITH" exception_id)?
+// | "(" compound_expr ")"
+// simple_expr = license_id "+"?
+// | license_ref
+// license_ref = ("DocumentRef-" idstring ":")? "LicenseRef-" idstring
+// idstring = [a-zA-Z0-9-.]+
+
+struct Parser<'a> {
+ tokens: Vec<&'a str>,
+ pos: usize,
+ db: &'a SpdxLicenses,
+}
+
+impl<'a> Parser<'a> {
+ fn new(input: &'a str, db: &'a SpdxLicenses) -> Self {
+ let tokens = Self::tokenize(input);
+ Self {
+ tokens,
+ pos: 0,
+ db,
+ }
+ }
+
+ fn tokenize(input: &str) -> Vec<&str> {
+ let mut tokens = Vec::new();
+ let mut chars = input.char_indices().peekable();
+
+ while let Some(&(i, c)) = chars.peek() {
+ if c.is_whitespace() {
+ chars.next();
+ continue;
+ }
+ if c == '(' || c == ')' || c == '+' {
+ tokens.push(&input[i..i + 1]);
+ chars.next();
+ continue;
+ }
+ // Identifier or keyword: consume until whitespace or special char
+ let start = i;
+ loop {
+ chars.next();
+ match chars.peek() {
+ Some(&(_, ch)) if !ch.is_whitespace() && ch != '(' && ch != ')' => {
+ // '+' only breaks if it's right after an identifier
+ if ch == '+' {
+ break;
+ }
+ }
+ _ => break,
+ }
+ }
+ let end = chars.peek().map_or(input.len(), |&(j, _)| j);
+ tokens.push(&input[start..end]);
+ }
+
+ tokens
+ }
+
+ fn peek(&self) -> Option<&'a str> {
+ self.tokens.get(self.pos).copied()
+ }
+
+ fn advance(&mut self) -> Option<&'a str> {
+ let tok = self.tokens.get(self.pos).copied();
+ if tok.is_some() {
+ self.pos += 1;
+ }
+ tok
+ }
+
+ fn is_at_end(&self) -> bool {
+ self.pos >= self.tokens.len()
+ }
+
+ fn expect(&mut self, expected: &str) -> bool {
+ if self.peek() == Some(expected) {
+ self.advance();
+ true
+ } else {
+ false
+ }
+ }
+
+ /// Parse the top-level expression.
+ fn parse_expression(&mut self) -> bool {
+ self.parse_compound_expr()
+ }
+
+ /// compound_expr = head_expr (("AND" | "OR") compound_expr)?
+ fn parse_compound_expr(&mut self) -> bool {
+ if !self.parse_head_expr() {
+ return false;
+ }
+
+ if let Some(tok) = self.peek()
+ && (tok == "AND" || tok == "OR")
+ {
+ self.advance();
+ return self.parse_compound_expr();
+ }
+
+ true
+ }
+
+ /// head_expr = "(" compound_expr ")" | simple_expr ("WITH" exception_id)?
+ fn parse_head_expr(&mut self) -> bool {
+ if self.expect("(") {
+ if !self.parse_compound_expr() {
+ return false;
+ }
+ return self.expect(")");
+ }
+
+ if !self.parse_simple_expr() {
+ return false;
+ }
+
+ // Optional WITH clause
+ if self.peek() == Some("WITH") {
+ self.advance();
+ return self.parse_exception_id();
+ }
+
+ true
+ }
+
+ /// simple_expr = license_ref | license_id "+"?
+ fn parse_simple_expr(&mut self) -> bool {
+ let tok = match self.peek() {
+ Some(t) => t,
+ None => return false,
+ };
+
+ // LicenseRef / DocumentRef
+ if tok.starts_with("LicenseRef-") || tok.starts_with("DocumentRef-") {
+ return self.parse_license_ref();
+ }
+
+ // Regular license identifier — could be multi-token with "-"
+ // We just consume the current token and check
+ self.advance();
+
+ // Handle '+' (or-later) operator
+ if self.peek() == Some("+") {
+ self.advance();
+ }
+
+ self.db.is_valid_license_id(tok)
+ }
+
+ /// license_ref = ("DocumentRef-" idstring ":")? "LicenseRef-" idstring
+ fn parse_license_ref(&mut self) -> bool {
+ let tok = match self.advance() {
+ Some(t) => t,
+ None => return false,
+ };
+
+ if let Some(rest) = tok.strip_prefix("DocumentRef-") {
+ // Must contain ":LicenseRef-" within
+ if let Some(colon_pos) = rest.find(":LicenseRef-") {
+ let doc_id = &rest[..colon_pos];
+ let license_ref_id = &rest[colon_pos + ":LicenseRef-".len()..];
+ return is_valid_idstring(doc_id) && is_valid_idstring(license_ref_id);
+ }
+ return false;
+ }
+
+ if let Some(id) = tok.strip_prefix("LicenseRef-") {
+ return is_valid_idstring(id);
+ }
+
+ false
+ }
+
+ fn parse_exception_id(&mut self) -> bool {
+ match self.advance() {
+ Some(id) => self.db.is_valid_exception_id(id),
+ None => false,
+ }
+ }
+}
+
+/// Check that a string matches `[a-zA-Z0-9.-]+`.
+fn is_valid_idstring(s: &str) -> bool {
+ !s.is_empty() && s.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'.' || b == b'-')
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn valid_identifiers() {
+ let db = spdx();
+ assert!(db.validate("MIT"));
+ assert!(db.validate("Apache-2.0"));
+ assert!(db.validate("GPL-3.0-only"));
+ assert!(db.validate("0BSD"));
+ }
+
+ #[test]
+ fn case_insensitive() {
+ let db = spdx();
+ assert!(db.validate("mit"));
+ assert!(db.validate("apache-2.0"));
+ assert!(db.validate("Mit"));
+ }
+
+ #[test]
+ fn or_expression() {
+ let db = spdx();
+ assert!(db.validate("MIT OR Apache-2.0"));
+ }
+
+ #[test]
+ fn and_expression() {
+ let db = spdx();
+ assert!(db.validate("MIT AND Apache-2.0"));
+ }
+
+ #[test]
+ fn with_exception() {
+ let db = spdx();
+ assert!(db.validate("GPL-2.0-only WITH Classpath-exception-2.0"));
+ }
+
+ #[test]
+ fn complex_expression() {
+ let db = spdx();
+ assert!(db.validate("(MIT AND Apache-2.0) OR GPL-3.0-only"));
+ assert!(db.validate("(MIT OR Apache-2.0) AND (GPL-2.0-only OR BSD-2-Clause)"));
+ }
+
+ #[test]
+ fn special_values() {
+ let db = spdx();
+ assert!(db.validate("NONE"));
+ assert!(db.validate("NOASSERTION"));
+ assert!(db.validate("none"));
+ assert!(db.validate("noassertion"));
+ }
+
+ #[test]
+ fn or_later_operator() {
+ let db = spdx();
+ assert!(db.validate("Apache-2.0+"));
+ assert!(db.validate("GPL-2.0-only+"));
+ }
+
+ #[test]
+ fn license_ref() {
+ let db = spdx();
+ assert!(db.validate("LicenseRef-custom"));
+ assert!(db.validate("LicenseRef-my-license.1"));
+ assert!(db.validate("DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2"));
+ }
+
+ #[test]
+ fn invalid_expressions() {
+ let db = spdx();
+ assert!(!db.validate(""));
+ assert!(!db.validate("totally-not-a-license"));
+ assert!(!db.validate("MIT AND"));
+ assert!(!db.validate("AND MIT"));
+ assert!(!db.validate("MIT OR"));
+ assert!(!db.validate("(MIT"));
+ assert!(!db.validate("MIT)"));
+ assert!(!db.validate("MIT WITH"));
+ assert!(!db.validate("MIT WITH not-an-exception"));
+ }
+
+ #[test]
+ fn validate_list() {
+ let db = spdx();
+ assert!(db.validate_list(&["MIT", "Apache-2.0"]));
+ assert!(!db.validate_list(&[]));
+ assert!(!db.validate_list(&["not-valid"]));
+ }
+
+ #[test]
+ fn license_lookup() {
+ let db = spdx();
+ let mit = db.get_license_by_identifier("MIT").unwrap();
+ assert_eq!(mit.identifier, "MIT");
+ assert!(mit.osi_approved);
+ assert!(!mit.deprecated);
+
+ assert!(db.get_license_by_identifier("mit").is_some());
+ assert!(db.get_license_by_identifier("nonexistent").is_none());
+ }
+
+ #[test]
+ fn exception_lookup() {
+ let db = spdx();
+ let exc = db
+ .get_exception_by_identifier("Classpath-exception-2.0")
+ .unwrap();
+ assert_eq!(exc.identifier, "Classpath-exception-2.0");
+ }
+
+ #[test]
+ fn name_lookup() {
+ let db = spdx();
+ assert_eq!(
+ db.get_identifier_by_name("MIT License"),
+ Some("MIT")
+ );
+ }
+
+ #[test]
+ fn osi_and_deprecated() {
+ let db = spdx();
+ assert!(db.is_osi_approved("MIT"));
+ assert!(!db.is_osi_approved("nonexistent"));
+ assert!(!db.is_deprecated("MIT"));
+ }
+}