Lines
92.76 %
Functions
70.37 %
Branches
100 %
use thiserror::Error;
#[derive(Debug, Error, PartialEq, Eq)]
pub enum FrameError {
#[error("frame contains invalid UTF-8")]
InvalidUtf8,
#[error("malformed frame: {0}")]
Malformed(String),
}
#[derive(Debug, Default)]
pub struct FrameDecoder {
buf: String,
impl FrameDecoder {
#[must_use]
pub fn new() -> Self {
Self::default()
pub fn feed(&mut self, bytes: &[u8]) -> Result<(), FrameError> {
let s = std::str::from_utf8(bytes).map_err(|_| FrameError::InvalidUtf8)?;
self.buf.push_str(s);
Ok(())
pub fn next_frame(&mut self) -> Option<Result<String, FrameError>> {
let trimmed_offset = self.leading_skip_len();
if trimmed_offset >= self.buf.len() {
self.buf.drain(..trimmed_offset);
return None;
match scan_form_end(&self.buf[trimmed_offset..]) {
Scan::Complete(rel_end) => {
let abs_end = trimmed_offset + rel_end;
let frame: String = self.buf[trimmed_offset..abs_end].to_string();
self.buf.drain(..abs_end);
Some(Ok(frame))
Scan::Incomplete => {
None
Scan::Invalid(msg) => {
self.buf.clear();
Some(Err(FrameError::Malformed(msg)))
fn leading_skip_len(&self) -> usize {
let bytes = self.buf.as_bytes();
let mut i = 0;
while i < bytes.len() {
let c = bytes[i];
if c == b' ' || c == b'\t' || c == b'\n' || c == b'\r' {
i += 1;
} else if c == b';' {
while i < bytes.len() && bytes[i] != b'\n' {
} else {
break;
i
enum Scan {
Complete(usize),
Incomplete,
Invalid(String),
fn scan_form_end(s: &str) -> Scan {
let bytes = s.as_bytes();
if bytes.is_empty() {
return Scan::Incomplete;
match bytes[0] {
b'(' => scan_balanced_list(s, 0),
b'\'' | b'`' => scan_after_prefix(s, 1),
b',' if bytes.get(1) == Some(&b'@') => scan_after_prefix(s, 2),
b',' => scan_after_prefix(s, 1),
b'"' => scan_string(s, 0),
b'#' if bytes.get(1) == Some(&b'"') => scan_hash_string(s, 0),
b'#' if bytes.get(1) == Some(&b'u')
&& bytes.get(2) == Some(&b'8')
&& bytes.get(3) == Some(&b'(') =>
{
scan_balanced_list(s, 3)
_ => scan_atom(s, 0),
fn scan_after_prefix(s: &str, start: usize) -> Scan {
if start >= s.len() {
match scan_form_end(&s[start..]) {
Scan::Complete(rel) => Scan::Complete(start + rel),
other => other,
fn scan_balanced_list(s: &str, start: usize) -> Scan {
let mut i = start;
if bytes.get(i) != Some(&b'(') {
return Scan::Invalid("expected '(' at list start".into());
let mut depth: usize = 1;
match bytes[i] {
b'(' => depth += 1,
b')' => {
depth -= 1;
if depth == 0 {
return Scan::Complete(i + 1);
b'"' => match scan_string(s, i) {
Scan::Complete(end) => {
i = end;
continue;
Scan::Incomplete => return Scan::Incomplete,
Scan::Invalid(m) => return Scan::Invalid(m),
},
b'#' if bytes.get(i + 1) == Some(&b'"') => match scan_hash_string(s, i) {
b';' => {
_ => {}
Scan::Incomplete
fn scan_string(s: &str, start: usize) -> Scan {
if bytes.get(start) != Some(&b'"') {
return Scan::Invalid("expected '\"' at string start".into());
if bytes.get(start + 1) == Some(&b'"') && bytes.get(start + 2) == Some(&b'"') {
return scan_triple_string(s, start);
let mut i = start + 1;
b'\\' => {
if i + 1 >= bytes.len() {
i += 2;
b'"' => return Scan::Complete(i + 1),
_ => i += 1,
fn scan_triple_string(s: &str, start: usize) -> Scan {
let mut i = start + 3;
while i + 2 < bytes.len() {
if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
return Scan::Complete(i + 3);
fn scan_hash_string(s: &str, start: usize) -> Scan {
if bytes.get(start) != Some(&b'#') || bytes.get(start + 1) != Some(&b'"') {
return Scan::Invalid("expected '#\"' at hash-string start".into());
let mut i = start + 2;
if bytes[i] == b'"' {
fn scan_atom(s: &str, start: usize) -> Scan {
if c == b' '
|| c == b'\t'
|| c == b'\n'
|| c == b'\r'
|| c == b'('
|| c == b')'
|| c == b';'
if i == start {
Scan::Complete(i)
#[cfg(test)]
mod tests {
use super::*;
fn drain(decoder: &mut FrameDecoder) -> Vec<String> {
let mut frames = Vec::new();
while let Some(result) = decoder.next_frame() {
match result {
Ok(f) => frames.push(f),
Err(e) => panic!("unexpected frame error: {e}"),
frames
#[test]
fn returns_none_when_buffer_empty() {
let mut d = FrameDecoder::new();
assert!(d.next_frame().is_none());
fn returns_none_when_buffer_only_whitespace() {
d.feed(b" \n\t ").unwrap();
fn yields_atom_frame() {
d.feed(b"42 ").unwrap();
let frames = drain(&mut d);
assert_eq!(frames, vec!["42".to_string()]);
fn yields_simple_list_frame() {
d.feed(b"(foo bar)").unwrap();
assert_eq!(frames, vec!["(foo bar)".to_string()]);
fn yields_multiple_frames_streamed() {
d.feed(b"(a)\n(b)\n(c)\n").unwrap();
assert_eq!(
frames,
vec!["(a)".to_string(), "(b)".to_string(), "(c)".to_string()]
);
fn defers_when_form_incomplete() {
d.feed(b"(foo ").unwrap();
d.feed(b"bar)").unwrap();
fn handles_nested_lists() {
d.feed(b"(a (b (c d) e) f)").unwrap();
assert_eq!(frames, vec!["(a (b (c d) e) f)".to_string()]);
fn parens_inside_string_do_not_affect_depth() {
d.feed(b"(foo \"a)b(c\" bar)").unwrap();
assert_eq!(frames, vec!["(foo \"a)b(c\" bar)".to_string()]);
fn handles_escaped_quote_in_string() {
d.feed(b"(say \"he\\\"llo\")").unwrap();
assert_eq!(frames, vec!["(say \"he\\\"llo\")".to_string()]);
fn handles_triple_quoted_string_across_lines() {
d.feed(b"(doc \"\"\"line one\n).\nline two\"\"\")").unwrap();
vec!["(doc \"\"\"line one\n).\nline two\"\"\")".to_string()]
fn handles_base64_literal_with_parens_inside() {
d.feed(b"(blob #\"abc())def\")").unwrap();
assert_eq!(frames, vec!["(blob #\"abc())def\")".to_string()]);
fn handles_byte_vector_literal_with_inner_parens_handled_by_balance() {
d.feed(b"(blob #u8(1 2 3))").unwrap();
assert_eq!(frames, vec!["(blob #u8(1 2 3))".to_string()]);
fn handles_quote_prefix() {
d.feed(b"'(a b)").unwrap();
assert_eq!(frames, vec!["'(a b)".to_string()]);
fn handles_quasiquote_with_unquote_inside() {
d.feed(b"`(a ,b ,@c)").unwrap();
assert_eq!(frames, vec!["`(a ,b ,@c)".to_string()]);
fn skips_top_level_comments_between_frames() {
d.feed(b"; comment\n(foo)\n; another\n(bar)\n").unwrap();
assert_eq!(frames, vec!["(foo)".to_string(), "(bar)".to_string()]);
fn comment_inside_list_does_not_split_frame() {
d.feed(b"(foo ; comment with )\n bar)").unwrap();
assert_eq!(frames, vec!["(foo ; comment with )\n bar)".to_string()]);
fn invalid_utf8_returns_error() {
let err = d.feed(&[0xFF, 0xFE, 0xFD]).unwrap_err();
assert_eq!(err, FrameError::InvalidUtf8);
fn standalone_string_is_a_frame() {
d.feed(b"\"hello\"").unwrap();
assert_eq!(frames, vec!["\"hello\"".to_string()]);
fn standalone_byte_vector_is_a_frame() {
d.feed(b"#u8(1 2 3)").unwrap();
assert_eq!(frames, vec!["#u8(1 2 3)".to_string()]);
fn standalone_base64_is_a_frame() {
d.feed(b"#\"abcd\"").unwrap();
assert_eq!(frames, vec!["#\"abcd\"".to_string()]);