1
use thiserror::Error;
2

            
3
#[derive(Debug, Error, PartialEq, Eq)]
4
pub enum FrameError {
5
    #[error("frame contains invalid UTF-8")]
6
    InvalidUtf8,
7
    #[error("malformed frame: {0}")]
8
    Malformed(String),
9
}
10

            
11
#[derive(Debug, Default)]
12
pub struct FrameDecoder {
13
    buf: String,
14
}
15

            
16
impl FrameDecoder {
17
    #[must_use]
18
236
    pub fn new() -> Self {
19
236
        Self::default()
20
236
    }
21

            
22
380
    pub fn feed(&mut self, bytes: &[u8]) -> Result<(), FrameError> {
23
380
        let s = std::str::from_utf8(bytes).map_err(|_| FrameError::InvalidUtf8)?;
24
361
        self.buf.push_str(s);
25
361
        Ok(())
26
380
    }
27

            
28
580
    pub fn next_frame(&mut self) -> Option<Result<String, FrameError>> {
29
580
        let trimmed_offset = self.leading_skip_len();
30
580
        if trimmed_offset >= self.buf.len() {
31
235
            self.buf.drain(..trimmed_offset);
32
235
            return None;
33
345
        }
34
345
        match scan_form_end(&self.buf[trimmed_offset..]) {
35
200
            Scan::Complete(rel_end) => {
36
200
                let abs_end = trimmed_offset + rel_end;
37
200
                let frame: String = self.buf[trimmed_offset..abs_end].to_string();
38
200
                self.buf.drain(..abs_end);
39
200
                Some(Ok(frame))
40
            }
41
            Scan::Incomplete => {
42
145
                self.buf.drain(..trimmed_offset);
43
145
                None
44
            }
45
            Scan::Invalid(msg) => {
46
                self.buf.clear();
47
                Some(Err(FrameError::Malformed(msg)))
48
            }
49
        }
50
580
    }
51

            
52
580
    fn leading_skip_len(&self) -> usize {
53
580
        let bytes = self.buf.as_bytes();
54
580
        let mut i = 0;
55
723
        while i < bytes.len() {
56
488
            let c = bytes[i];
57
488
            if c == b' ' || c == b'\t' || c == b'\n' || c == b'\r' {
58
141
                i += 1;
59
347
            } else if c == b';' {
60
20
                while i < bytes.len() && bytes[i] != b'\n' {
61
18
                    i += 1;
62
18
                }
63
            } else {
64
345
                break;
65
            }
66
        }
67
580
        i
68
580
    }
69
}
70

            
71
enum Scan {
72
    Complete(usize),
73
    Incomplete,
74
    Invalid(String),
75
}
76

            
77
347
fn scan_form_end(s: &str) -> Scan {
78
347
    let bytes = s.as_bytes();
79
347
    if bytes.is_empty() {
80
        return Scan::Incomplete;
81
347
    }
82
347
    match bytes[0] {
83
341
        b'(' => scan_balanced_list(s, 0),
84
2
        b'\'' | b'`' => scan_after_prefix(s, 1),
85
        b',' if bytes.get(1) == Some(&b'@') => scan_after_prefix(s, 2),
86
        b',' => scan_after_prefix(s, 1),
87
1
        b'"' => scan_string(s, 0),
88
2
        b'#' if bytes.get(1) == Some(&b'"') => scan_hash_string(s, 0),
89
1
        b'#' if bytes.get(1) == Some(&b'u')
90
1
            && bytes.get(2) == Some(&b'8')
91
1
            && bytes.get(3) == Some(&b'(') =>
92
        {
93
1
            scan_balanced_list(s, 3)
94
        }
95
1
        _ => scan_atom(s, 0),
96
    }
97
347
}
98

            
99
2
fn scan_after_prefix(s: &str, start: usize) -> Scan {
100
2
    if start >= s.len() {
101
        return Scan::Incomplete;
102
2
    }
103
2
    match scan_form_end(&s[start..]) {
104
2
        Scan::Complete(rel) => Scan::Complete(start + rel),
105
        other => other,
106
    }
107
2
}
108

            
109
342
fn scan_balanced_list(s: &str, start: usize) -> Scan {
110
342
    let bytes = s.as_bytes();
111
342
    let mut i = start;
112
342
    if bytes.get(i) != Some(&b'(') {
113
        return Scan::Invalid("expected '(' at list start".into());
114
342
    }
115
342
    let mut depth: usize = 1;
116
342
    i += 1;
117
6985
    while i < bytes.len() {
118
6840
        match bytes[i] {
119
273
            b'(' => depth += 1,
120
            b')' => {
121
434
                depth -= 1;
122
434
                if depth == 0 {
123
197
                    return Scan::Complete(i + 1);
124
237
                }
125
            }
126
3
            b'"' => match scan_string(s, i) {
127
3
                Scan::Complete(end) => {
128
3
                    i = end;
129
3
                    continue;
130
                }
131
                Scan::Incomplete => return Scan::Incomplete,
132
                Scan::Invalid(m) => return Scan::Invalid(m),
133
            },
134
2
            b'#' if bytes.get(i + 1) == Some(&b'"') => match scan_hash_string(s, i) {
135
1
                Scan::Complete(end) => {
136
1
                    i = end;
137
1
                    continue;
138
                }
139
                Scan::Incomplete => return Scan::Incomplete,
140
                Scan::Invalid(m) => return Scan::Invalid(m),
141
            },
142
            b';' => {
143
143
                while i < bytes.len() && bytes[i] != b'\n' {
144
124
                    i += 1;
145
124
                }
146
19
                continue;
147
            }
148
6110
            _ => {}
149
        }
150
6620
        i += 1;
151
    }
152
145
    Scan::Incomplete
153
342
}
154

            
155
4
fn scan_string(s: &str, start: usize) -> Scan {
156
4
    let bytes = s.as_bytes();
157
4
    if bytes.get(start) != Some(&b'"') {
158
        return Scan::Invalid("expected '\"' at string start".into());
159
4
    }
160
4
    if bytes.get(start + 1) == Some(&b'"') && bytes.get(start + 2) == Some(&b'"') {
161
1
        return scan_triple_string(s, start);
162
3
    }
163
3
    let mut i = start + 1;
164
19
    while i < bytes.len() {
165
19
        match bytes[i] {
166
            b'\\' => {
167
1
                if i + 1 >= bytes.len() {
168
                    return Scan::Incomplete;
169
1
                }
170
1
                i += 2;
171
            }
172
3
            b'"' => return Scan::Complete(i + 1),
173
15
            _ => i += 1,
174
        }
175
    }
176
    Scan::Incomplete
177
4
}
178

            
179
1
fn scan_triple_string(s: &str, start: usize) -> Scan {
180
1
    let bytes = s.as_bytes();
181
1
    let mut i = start + 3;
182
21
    while i + 2 < bytes.len() {
183
21
        if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
184
1
            return Scan::Complete(i + 3);
185
20
        }
186
20
        i += 1;
187
    }
188
    Scan::Incomplete
189
1
}
190

            
191
2
fn scan_hash_string(s: &str, start: usize) -> Scan {
192
2
    let bytes = s.as_bytes();
193
2
    if bytes.get(start) != Some(&b'#') || bytes.get(start + 1) != Some(&b'"') {
194
        return Scan::Invalid("expected '#\"' at hash-string start".into());
195
2
    }
196
2
    let mut i = start + 2;
197
15
    while i < bytes.len() {
198
15
        if bytes[i] == b'"' {
199
2
            return Scan::Complete(i + 1);
200
13
        }
201
13
        i += 1;
202
    }
203
    Scan::Incomplete
204
2
}
205

            
206
1
fn scan_atom(s: &str, start: usize) -> Scan {
207
1
    let bytes = s.as_bytes();
208
1
    let mut i = start;
209
3
    while i < bytes.len() {
210
3
        let c = bytes[i];
211
3
        if c == b' '
212
2
            || c == b'\t'
213
2
            || c == b'\n'
214
2
            || c == b'\r'
215
2
            || c == b'('
216
2
            || c == b')'
217
2
            || c == b';'
218
        {
219
1
            break;
220
2
        }
221
2
        i += 1;
222
    }
223
1
    if i == start {
224
        Scan::Incomplete
225
    } else {
226
1
        Scan::Complete(i)
227
    }
228
1
}
229

            
230
#[cfg(test)]
231
mod tests {
232
    use super::*;
233

            
234
17
    fn drain(decoder: &mut FrameDecoder) -> Vec<String> {
235
17
        let mut frames = Vec::new();
236
37
        while let Some(result) = decoder.next_frame() {
237
20
            match result {
238
20
                Ok(f) => frames.push(f),
239
                Err(e) => panic!("unexpected frame error: {e}"),
240
            }
241
        }
242
17
        frames
243
17
    }
244

            
245
    #[test]
246
1
    fn returns_none_when_buffer_empty() {
247
1
        let mut d = FrameDecoder::new();
248
1
        assert!(d.next_frame().is_none());
249
1
    }
250

            
251
    #[test]
252
1
    fn returns_none_when_buffer_only_whitespace() {
253
1
        let mut d = FrameDecoder::new();
254
1
        d.feed(b"   \n\t  ").unwrap();
255
1
        assert!(d.next_frame().is_none());
256
1
    }
257

            
258
    #[test]
259
1
    fn yields_atom_frame() {
260
1
        let mut d = FrameDecoder::new();
261
1
        d.feed(b"42 ").unwrap();
262
1
        let frames = drain(&mut d);
263
1
        assert_eq!(frames, vec!["42".to_string()]);
264
1
    }
265

            
266
    #[test]
267
1
    fn yields_simple_list_frame() {
268
1
        let mut d = FrameDecoder::new();
269
1
        d.feed(b"(foo bar)").unwrap();
270
1
        let frames = drain(&mut d);
271
1
        assert_eq!(frames, vec!["(foo bar)".to_string()]);
272
1
    }
273

            
274
    #[test]
275
1
    fn yields_multiple_frames_streamed() {
276
1
        let mut d = FrameDecoder::new();
277
1
        d.feed(b"(a)\n(b)\n(c)\n").unwrap();
278
1
        let frames = drain(&mut d);
279
1
        assert_eq!(
280
            frames,
281
1
            vec!["(a)".to_string(), "(b)".to_string(), "(c)".to_string()]
282
        );
283
1
    }
284

            
285
    #[test]
286
1
    fn defers_when_form_incomplete() {
287
1
        let mut d = FrameDecoder::new();
288
1
        d.feed(b"(foo ").unwrap();
289
1
        assert!(d.next_frame().is_none());
290
1
        d.feed(b"bar)").unwrap();
291
1
        let frames = drain(&mut d);
292
1
        assert_eq!(frames, vec!["(foo bar)".to_string()]);
293
1
    }
294

            
295
    #[test]
296
1
    fn handles_nested_lists() {
297
1
        let mut d = FrameDecoder::new();
298
1
        d.feed(b"(a (b (c d) e) f)").unwrap();
299
1
        let frames = drain(&mut d);
300
1
        assert_eq!(frames, vec!["(a (b (c d) e) f)".to_string()]);
301
1
    }
302

            
303
    #[test]
304
1
    fn parens_inside_string_do_not_affect_depth() {
305
1
        let mut d = FrameDecoder::new();
306
1
        d.feed(b"(foo \"a)b(c\" bar)").unwrap();
307
1
        let frames = drain(&mut d);
308
1
        assert_eq!(frames, vec!["(foo \"a)b(c\" bar)".to_string()]);
309
1
    }
310

            
311
    #[test]
312
1
    fn handles_escaped_quote_in_string() {
313
1
        let mut d = FrameDecoder::new();
314
1
        d.feed(b"(say \"he\\\"llo\")").unwrap();
315
1
        let frames = drain(&mut d);
316
1
        assert_eq!(frames, vec!["(say \"he\\\"llo\")".to_string()]);
317
1
    }
318

            
319
    #[test]
320
1
    fn handles_triple_quoted_string_across_lines() {
321
1
        let mut d = FrameDecoder::new();
322
1
        d.feed(b"(doc \"\"\"line one\n).\nline two\"\"\")").unwrap();
323
1
        let frames = drain(&mut d);
324
1
        assert_eq!(
325
            frames,
326
1
            vec!["(doc \"\"\"line one\n).\nline two\"\"\")".to_string()]
327
        );
328
1
    }
329

            
330
    #[test]
331
1
    fn handles_base64_literal_with_parens_inside() {
332
1
        let mut d = FrameDecoder::new();
333
1
        d.feed(b"(blob #\"abc())def\")").unwrap();
334
1
        let frames = drain(&mut d);
335
1
        assert_eq!(frames, vec!["(blob #\"abc())def\")".to_string()]);
336
1
    }
337

            
338
    #[test]
339
1
    fn handles_byte_vector_literal_with_inner_parens_handled_by_balance() {
340
1
        let mut d = FrameDecoder::new();
341
1
        d.feed(b"(blob #u8(1 2 3))").unwrap();
342
1
        let frames = drain(&mut d);
343
1
        assert_eq!(frames, vec!["(blob #u8(1 2 3))".to_string()]);
344
1
    }
345

            
346
    #[test]
347
1
    fn handles_quote_prefix() {
348
1
        let mut d = FrameDecoder::new();
349
1
        d.feed(b"'(a b)").unwrap();
350
1
        let frames = drain(&mut d);
351
1
        assert_eq!(frames, vec!["'(a b)".to_string()]);
352
1
    }
353

            
354
    #[test]
355
1
    fn handles_quasiquote_with_unquote_inside() {
356
1
        let mut d = FrameDecoder::new();
357
1
        d.feed(b"`(a ,b ,@c)").unwrap();
358
1
        let frames = drain(&mut d);
359
1
        assert_eq!(frames, vec!["`(a ,b ,@c)".to_string()]);
360
1
    }
361

            
362
    #[test]
363
1
    fn skips_top_level_comments_between_frames() {
364
1
        let mut d = FrameDecoder::new();
365
1
        d.feed(b"; comment\n(foo)\n; another\n(bar)\n").unwrap();
366
1
        let frames = drain(&mut d);
367
1
        assert_eq!(frames, vec!["(foo)".to_string(), "(bar)".to_string()]);
368
1
    }
369

            
370
    #[test]
371
1
    fn comment_inside_list_does_not_split_frame() {
372
1
        let mut d = FrameDecoder::new();
373
1
        d.feed(b"(foo ; comment with )\n  bar)").unwrap();
374
1
        let frames = drain(&mut d);
375
1
        assert_eq!(frames, vec!["(foo ; comment with )\n  bar)".to_string()]);
376
1
    }
377

            
378
    #[test]
379
1
    fn invalid_utf8_returns_error() {
380
1
        let mut d = FrameDecoder::new();
381
1
        let err = d.feed(&[0xFF, 0xFE, 0xFD]).unwrap_err();
382
1
        assert_eq!(err, FrameError::InvalidUtf8);
383
1
    }
384

            
385
    #[test]
386
1
    fn standalone_string_is_a_frame() {
387
1
        let mut d = FrameDecoder::new();
388
1
        d.feed(b"\"hello\"").unwrap();
389
1
        let frames = drain(&mut d);
390
1
        assert_eq!(frames, vec!["\"hello\"".to_string()]);
391
1
    }
392

            
393
    #[test]
394
1
    fn standalone_byte_vector_is_a_frame() {
395
1
        let mut d = FrameDecoder::new();
396
1
        d.feed(b"#u8(1 2 3)").unwrap();
397
1
        let frames = drain(&mut d);
398
1
        assert_eq!(frames, vec!["#u8(1 2 3)".to_string()]);
399
1
    }
400

            
401
    #[test]
402
1
    fn standalone_base64_is_a_frame() {
403
1
        let mut d = FrameDecoder::new();
404
1
        d.feed(b"#\"abcd\"").unwrap();
405
1
        let frames = drain(&mut d);
406
1
        assert_eq!(frames, vec!["#\"abcd\"".to_string()]);
407
1
    }
408
}