aboutsummaryrefslogtreecommitdiffhomepage
path: root/crates/mozart-php-serialize/src/lib.rs
blob: b4c687302bd679adb59945ded0ae663a38caba7a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
//! Byte-compatible port of PHP's `serialize()` function.
//!
//! Mirrors `php_var_serialize` in PHP's source: each value is rendered to a
//! tagged form like `b:1;`, `i:42;`, `s:3:"foo";`, `a:N:{...}` so the output
//! can be SHA-1'd and compared against PHP-side hashes (e.g. Composer's
//! `PathRepository` reference, which is `sha1($json . serialize($options))`).
//!
//! Only the value forms Mozart needs today are implemented. Floats, objects,
//! and references are deliberately omitted — extend the [`Value`] enum and
//! [`serialize`] writer when a new shape is required, and add a focused test
//! for it (the file_get_contents → hash flow downstream is unforgiving).
//!
//! Lengths are byte counts, not character counts. Array keys are written in
//! insertion order (PHP arrays preserve insertion order). Integer-coercible
//! string keys (e.g. `"1"`) are NOT auto-converted to integers — PHP itself
//! does that during array construction, not at serialization time, so callers
//! that care must construct [`Value::Int`] keys directly.

use std::fmt::Write as _;

/// One PHP value, suitable for `serialize()`.
///
/// Add variants here as the need arises (e.g. `Float(f64)` → `d:<repr>;`).
/// Keep the variants minimal — every variant we add is a new compatibility
/// surface that has to match PHP byte-for-byte.
#[derive(Debug, Clone)]
pub enum Value {
    Null,
    Bool(bool),
    Int(i64),
    /// UTF-8 string. Length prefix is the byte length, matching PHP.
    String(String),
    /// Associative or indexed array. Order is preserved verbatim — the writer
    /// does not normalize integer-coercible keys or sort entries.
    Array(Vec<(Value, Value)>),
}

/// Render `value` as PHP's `serialize()` would.
///
/// Returns a `String` (not bytes) because every byte we emit is in
/// printable-ASCII or comes from a UTF-8 [`Value::String`] payload, so the
/// result is always valid UTF-8.
pub fn serialize(value: &Value) -> String {
    let mut out = String::new();
    write_value(&mut out, value);
    out
}

fn write_value(out: &mut String, value: &Value) {
    match value {
        Value::Null => out.push_str("N;"),
        Value::Bool(b) => {
            out.push_str("b:");
            out.push(if *b { '1' } else { '0' });
            out.push(';');
        }
        Value::Int(n) => {
            write!(out, "i:{};", n).expect("writing to String never fails");
        }
        Value::String(s) => {
            write!(out, "s:{}:\"{}\";", s.len(), s).expect("writing to String never fails");
        }
        Value::Array(entries) => {
            write!(out, "a:{}:{{", entries.len()).expect("writing to String never fails");
            for (k, v) in entries {
                write_value(out, k);
                write_value(out, v);
            }
            out.push('}');
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    // Each `expected` string was produced by running the equivalent PHP
    // `serialize()` call (`php -r 'echo serialize(...);'`), so the assertions
    // pin Mozart's output to actual PHP behaviour rather than the spec we
    // think we're following.

    #[test]
    fn null() {
        assert_eq!(serialize(&Value::Null), "N;");
    }

    #[test]
    fn bool_true() {
        assert_eq!(serialize(&Value::Bool(true)), "b:1;");
    }

    #[test]
    fn bool_false() {
        assert_eq!(serialize(&Value::Bool(false)), "b:0;");
    }

    #[test]
    fn int_positive() {
        assert_eq!(serialize(&Value::Int(42)), "i:42;");
    }

    #[test]
    fn int_zero() {
        assert_eq!(serialize(&Value::Int(0)), "i:0;");
    }

    #[test]
    fn int_negative() {
        assert_eq!(serialize(&Value::Int(-7)), "i:-7;");
    }

    #[test]
    fn string_ascii() {
        assert_eq!(serialize(&Value::String("hi".into())), "s:2:\"hi\";");
    }

    #[test]
    fn string_empty() {
        assert_eq!(serialize(&Value::String(String::new())), "s:0:\"\";");
    }

    #[test]
    fn string_length_is_bytes_not_chars() {
        // 「日本」 is 6 bytes in UTF-8 (3 per kanji), 2 chars. PHP measures
        // by byte; mirror that.
        assert_eq!(serialize(&Value::String("日本".into())), "s:6:\"日本\";");
    }

    #[test]
    fn array_empty() {
        assert_eq!(serialize(&Value::Array(vec![])), "a:0:{}");
    }

    #[test]
    fn array_assoc_single() {
        let v = Value::Array(vec![(Value::String("relative".into()), Value::Bool(true))]);
        assert_eq!(serialize(&v), "a:1:{s:8:\"relative\";b:1;}");
    }

    #[test]
    fn array_assoc_multi_preserves_order() {
        let v = Value::Array(vec![
            (Value::String("a".into()), Value::Int(1)),
            (Value::String("b".into()), Value::Int(2)),
        ]);
        assert_eq!(serialize(&v), "a:2:{s:1:\"a\";i:1;s:1:\"b\";i:2;}");
    }

    #[test]
    fn array_indexed() {
        // PHP `serialize([10, 20])` uses integer keys 0, 1.
        let v = Value::Array(vec![
            (Value::Int(0), Value::Int(10)),
            (Value::Int(1), Value::Int(20)),
        ]);
        assert_eq!(serialize(&v), "a:2:{i:0;i:10;i:1;i:20;}");
    }

    #[test]
    fn array_nested() {
        // PHP: serialize(['outer' => ['inner' => true]])
        let v = Value::Array(vec![(
            Value::String("outer".into()),
            Value::Array(vec![(Value::String("inner".into()), Value::Bool(true))]),
        )]);
        assert_eq!(
            serialize(&v),
            "a:1:{s:5:\"outer\";a:1:{s:5:\"inner\";b:1;}}"
        );
    }
}