1use winnow::ascii::{digit1, line_ending, space0, till_line_ending};
2use winnow::combinator::{alt, cut_err, opt, preceded};
3use winnow::error::{AddContext, ContextError, ErrMode, ModalResult, StrContext, StrContextValue};
4use winnow::prelude::*;
5use winnow::token::{any, none_of, take_till, take_while};
6
7use tracing::debug;
8
9use crate::ast::{Annotation, Expr, Fraction, Program};
10use crate::error::{Error, Result};
11
12pub struct Reader;
13
14impl Reader {
15 pub fn parse(input: &str) -> Result<Program> {
16 debug!(input_len = input.len(), "parse start");
17 let mut remaining = input;
18 let (exprs, annotations) = parse_program(&mut remaining).map_err(|e| {
19 let offset = input.len() - remaining.len();
20 Error::parse(format_parse_error(e), input, offset..offset + 1)
21 })?;
22 debug!(
23 expr_count = exprs.len(),
24 annotation_count = annotations.len(),
25 "parse complete"
26 );
27 Ok(Program::with_annotations(exprs, annotations))
28 }
29
30 #[must_use]
31 pub fn is_incomplete(input: &str) -> bool {
32 let mut remaining = input;
33 parse_program(&mut remaining).is_err() && remaining.is_empty()
34 }
35
36 pub fn parse_expr(input: &str) -> Result<Expr> {
37 let mut remaining = input;
38 parse_expr(&mut remaining).map_err(|e| {
39 let offset = input.len() - remaining.len();
40 Error::parse(format_parse_error(e), input, offset..offset + 1)
41 })
42 }
43}
44
45fn format_parse_error(err: ErrMode<ContextError>) -> String {
46 match err {
47 ErrMode::Backtrack(ctx) | ErrMode::Cut(ctx) => format_context_error(&ctx),
48 ErrMode::Incomplete(_) => "unexpected end of input".to_string(),
49 }
50}
51
52fn format_context_error(ctx: &ContextError) -> String {
53 let mut parts = Vec::new();
54 for c in ctx.context() {
55 match c {
56 StrContext::Label(label) => parts.push(format!("in {label}")),
57 StrContext::Expected(StrContextValue::Description(desc)) => {
58 parts.push(format!("expected {desc}"));
59 }
60 StrContext::Expected(StrContextValue::CharLiteral(c)) => {
61 parts.push(format!("expected '{c}'"));
62 }
63 StrContext::Expected(StrContextValue::StringLiteral(s)) => {
64 parts.push(format!("expected \"{s}\""));
65 }
66 _ => {}
67 }
68 }
69 if parts.is_empty() {
70 "parse error".to_string()
71 } else {
72 parts.join(", ")
73 }
74}
75
76fn skip_ws_and_comments(input: &mut &str, annotations: &mut Vec<Annotation>) -> ModalResult<()> {
77 loop {
78 let _ = space0.parse_next(input)?;
79 if input.starts_with("; @") {
80 let _ = "; @".parse_next(input)?;
81 let name: &str = take_while(1.., |c: char| c.is_alphanumeric() || c == '-' || c == '_')
82 .parse_next(input)?;
83 let _ = space0.parse_next(input)?;
84
85 let value = parse_expr(input).unwrap_or(Expr::Nil);
86 annotations.push(Annotation {
87 name: name.to_string(),
88 value,
89 });
90 let _ = till_line_ending.parse_next(input)?;
91 let _ = opt(line_ending).parse_next(input)?;
92 } else if input.starts_with(';') {
93 let _ = till_line_ending.parse_next(input)?;
94 let _ = opt(line_ending).parse_next(input)?;
95 } else if input.starts_with('\n') || input.starts_with('\r') {
96 let _ = line_ending.parse_next(input)?;
97 } else {
98 break;
99 }
100 }
101 Ok(())
102}
103
104fn skip_ws_and_comments_no_annotations(input: &mut &str) -> ModalResult<()> {
105 let mut dummy = Vec::new();
106 skip_ws_and_comments(input, &mut dummy)
107}
108
109fn parse_program(input: &mut &str) -> ModalResult<(Vec<Expr>, Vec<Annotation>)> {
110 let mut annotations = Vec::new();
111 skip_ws_and_comments(input, &mut annotations)?;
112
113 let mut exprs = Vec::new();
114 loop {
115 if input.is_empty() {
116 break;
117 }
118 let expr = parse_expr(input)?;
119 exprs.push(expr);
120 skip_ws_and_comments(input, &mut annotations)?;
121 }
122
123 Ok((exprs, annotations))
124}
125
126fn parse_expr(input: &mut &str) -> ModalResult<Expr> {
127 let expr = alt((
128 alt((
129 parse_nil,
130 parse_bool,
131 parse_number,
132 parse_string,
133 parse_quote,
134 )),
135 alt((
136 parse_quasiquote,
137 parse_unquote,
138 parse_list,
139 parse_keyword,
140 parse_symbol,
141 )),
142 ))
143 .parse_next(input)?;
144 debug!(expr = ?expr, "parsed expression");
145 Ok(expr)
146}
147
148fn parse_nil(input: &mut &str) -> ModalResult<Expr> {
149 alt(("nil", "NIL", "Nil"))
150 .value(Expr::Nil)
151 .parse_next(input)
152}
153
154fn parse_bool(input: &mut &str) -> ModalResult<Expr> {
155 alt(("#t", "#T", "#f", "#F"))
156 .map(|s: &str| match s {
157 "#t" | "#T" => Expr::Bool(true),
158 _ => Expr::Nil,
159 })
160 .parse_next(input)
161}
162
163fn is_delimiter(c: char) -> bool {
164 c.is_whitespace() || "()\"'`,;".contains(c)
165}
166
167fn parse_number(input: &mut &str) -> ModalResult<Expr> {
168 let sign = opt(alt(("-".value(-1i64), "+".value(1i64))))
169 .map(|s| s.unwrap_or(1))
170 .parse_next(input)?;
171
172 let int_part: &str = digit1.parse_next(input)?;
173 let int_val: i64 = int_part
174 .parse()
175 .map_err(|_| ErrMode::Cut(ContextError::new()))?;
176
177 let frac_part = opt(preceded('.', digit1)).parse_next(input)?;
178
179 if input.starts_with(|c: char| !is_delimiter(c)) {
180 return Err(ErrMode::Backtrack(ContextError::new()));
181 }
182
183 let fraction = if let Some(decimals) = frac_part {
184 let decimal_places = decimals.len() as u32;
185 let decimal_val: i64 = decimals
186 .parse()
187 .map_err(|_| ErrMode::Cut(ContextError::new()))?;
188 let denom = 10i64.pow(decimal_places);
189 Fraction::new(sign * (int_val * denom + decimal_val), denom)
190 } else {
191 Fraction::from_integer(sign * int_val)
192 };
193
194 Ok(Expr::Number(fraction))
195}
196
197fn parse_string(input: &mut &str) -> ModalResult<Expr> {
198 alt((parse_triple_quoted_string, parse_double_quoted_string)).parse_next(input)
199}
200
201fn parse_double_quoted_string(input: &mut &str) -> ModalResult<Expr> {
202 let _ = '"'.parse_next(input)?;
203 let mut result = String::new();
204
205 loop {
206 let chunk: &str = take_till(0.., |c| c == '"' || c == '\\').parse_next(input)?;
207 result.push_str(chunk);
208
209 match cut_err(any)
210 .context(StrContext::Label("string"))
211 .context(StrContext::Expected(StrContextValue::Description(
212 "closing quote",
213 )))
214 .parse_next(input)?
215 {
216 '"' => return Ok(Expr::String(result)),
217 '\\' => {
218 let escaped = any.parse_next(input)?;
219 match escaped {
220 'n' => result.push('\n'),
221 't' => result.push('\t'),
222 'r' => result.push('\r'),
223 '\\' => result.push('\\'),
224 '"' => result.push('"'),
225 c => {
226 result.push('\\');
227 result.push(c);
228 }
229 }
230 }
231 _ => unreachable!(),
232 }
233 }
234}
235
236fn parse_triple_quoted_string(input: &mut &str) -> ModalResult<Expr> {
237 let _ = "\"\"\"".parse_next(input)?;
238 let mut content = String::new();
239
240 loop {
241 if input.starts_with("\"\"\"") {
242 let _ = "\"\"\"".parse_next(input)?;
243 return Ok(Expr::String(content));
244 }
245 if input.is_empty() {
246 return Err(ErrMode::Cut(
247 ContextError::new()
248 .add_context(input, &input.checkpoint(), StrContext::Label("string"))
249 .add_context(
250 input,
251 &input.checkpoint(),
252 StrContext::Expected(StrContextValue::Description("closing \"\"\"")),
253 ),
254 ));
255 }
256 content.push(any.parse_next(input)?);
257 }
258}
259
260fn parse_quote(input: &mut &str) -> ModalResult<Expr> {
261 let _ = '\''.parse_next(input)?;
262 let expr = parse_expr.parse_next(input)?;
263 Ok(Expr::Quote(Box::new(expr)))
264}
265
266fn parse_quasiquote(input: &mut &str) -> ModalResult<Expr> {
267 let _ = '`'.parse_next(input)?;
268 let expr = parse_expr.parse_next(input)?;
269 Ok(Expr::Quasiquote(Box::new(expr)))
270}
271
272fn parse_unquote(input: &mut &str) -> ModalResult<Expr> {
273 let _ = ','.parse_next(input)?;
274 if input.starts_with('@') {
275 let _ = '@'.parse_next(input)?;
276 let expr = parse_expr.parse_next(input)?;
277 return Ok(Expr::UnquoteSplicing(Box::new(expr)));
278 }
279 let expr = parse_expr.parse_next(input)?;
280 Ok(Expr::Unquote(Box::new(expr)))
281}
282
283fn parse_list(input: &mut &str) -> ModalResult<Expr> {
284 let _ = '('.parse_next(input)?;
285 skip_ws_and_comments_no_annotations(input)?;
286
287 let mut items = Vec::new();
288 let mut dotted_cdr: Option<Expr> = None;
289
290 loop {
291 if input.starts_with(')') {
292 break;
293 }
294 if input.is_empty() {
295 return Err(ErrMode::Cut(
296 ContextError::new()
297 .add_context(input, &input.checkpoint(), StrContext::Label("list"))
298 .add_context(
299 input,
300 &input.checkpoint(),
301 StrContext::Expected(StrContextValue::Description("closing paren")),
302 ),
303 ));
304 }
305 if input.starts_with('.') && input.chars().nth(1).is_some_and(char::is_whitespace) {
306 let _ = '.'.parse_next(input)?;
307 skip_ws_and_comments_no_annotations(input)?;
308 dotted_cdr = Some(
309 cut_err(parse_expr)
310 .context(StrContext::Label("cdr expression"))
311 .parse_next(input)?,
312 );
313 skip_ws_and_comments_no_annotations(input)?;
314 cut_err(')')
315 .context(StrContext::Label("closing paren"))
316 .context(StrContext::Expected(StrContextValue::Description(
317 "only one expression after dot in dotted pair",
318 )))
319 .parse_next(input)?;
320 break;
321 }
322 let expr = parse_expr(input)?;
323 items.push(expr);
324 skip_ws_and_comments_no_annotations(input)?;
325 }
326
327 if dotted_cdr.is_none() {
328 cut_err(')')
329 .context(StrContext::Label("list"))
330 .context(StrContext::Expected(StrContextValue::Description(
331 "closing paren",
332 )))
333 .parse_next(input)?;
334 }
335
336 if let Some(cdr) = dotted_cdr {
337 let mut result = cdr;
338 for item in items.into_iter().rev() {
339 result = Expr::cons(item, result);
340 }
341 Ok(result)
342 } else {
343 Ok(Expr::List(items))
344 }
345}
346
347fn parse_keyword(input: &mut &str) -> ModalResult<Expr> {
348 let _ = ':'.parse_next(input)?;
349 let first: char =
350 none_of(|c: char| c.is_whitespace() || "()\"'`,".contains(c)).parse_next(input)?;
351
352 let rest: &str = take_while(0.., |c: char| !c.is_whitespace() && !"()\"'`,".contains(c))
353 .parse_next(input)?;
354
355 let mut name = String::with_capacity(1 + rest.len());
356 name.push(first);
357 name.push_str(rest);
358 name.make_ascii_uppercase();
359 Ok(Expr::Keyword(name))
360}
361
362fn parse_symbol(input: &mut &str) -> ModalResult<Expr> {
363 let first: char =
364 none_of(|c: char| c.is_whitespace() || "()\"'`,".contains(c)).parse_next(input)?;
365
366 let rest: &str = take_while(0.., |c: char| !c.is_whitespace() && !"()\"'`,".contains(c))
367 .parse_next(input)?;
368
369 let mut name = String::with_capacity(1 + rest.len());
370 name.push(first);
371 name.push_str(rest);
372 name.make_ascii_uppercase();
373 if name == "NIL" {
374 return Ok(Expr::Nil);
375 }
376 if name == "T" {
377 return Ok(Expr::Bool(true));
378 }
379 Ok(Expr::Symbol(name))
380}
381
382#[cfg(test)]
383mod tests {
384 use super::*;
385
386 #[test]
387 fn test_parse_nil() {
388 let program = Reader::parse("nil").unwrap();
389 assert_eq!(program.exprs, vec![Expr::Nil]);
390 }
391
392 #[test]
393 fn test_parse_bool() {
394 let program = Reader::parse("#t #T t T").unwrap();
395 assert_eq!(
396 program.exprs,
397 vec![
398 Expr::Bool(true),
399 Expr::Bool(true),
400 Expr::Bool(true),
401 Expr::Bool(true),
402 ]
403 );
404 }
405
406 #[test]
407 fn test_parse_false_is_nil() {
408 let program = Reader::parse("#f #F").unwrap();
409 assert_eq!(program.exprs, vec![Expr::Nil, Expr::Nil]);
410 }
411
412 #[test]
413 fn test_parse_nil_case_insensitive() {
414 assert_eq!(Reader::parse("nil").unwrap().exprs, vec![Expr::Nil]);
415 assert_eq!(Reader::parse("NIL").unwrap().exprs, vec![Expr::Nil]);
416 assert_eq!(Reader::parse("Nil").unwrap().exprs, vec![Expr::Nil]);
417 }
418
419 #[test]
420 fn test_parse_integer() {
421 let program = Reader::parse("42").unwrap();
422 assert_eq!(
423 program.exprs,
424 vec![Expr::Number(Fraction::from_integer(42))]
425 );
426 }
427
428 #[test]
429 fn test_parse_negative_number() {
430 let program = Reader::parse("-17").unwrap();
431 assert_eq!(
432 program.exprs,
433 vec![Expr::Number(Fraction::from_integer(-17))]
434 );
435 }
436
437 #[test]
438 fn test_parse_decimal() {
439 let program = Reader::parse("0.1").unwrap();
440 assert_eq!(program.exprs, vec![Expr::Number(Fraction::new(1, 10))]);
441 }
442
443 #[test]
444 fn test_parse_symbol() {
445 let program = Reader::parse("foo").unwrap();
446 assert_eq!(program.exprs, vec![Expr::Symbol("FOO".into())]);
447 }
448
449 #[test]
450 fn test_parse_symbol_case_insensitive() {
451 assert_eq!(
452 Reader::parse("foo").unwrap().exprs,
453 Reader::parse("FOO").unwrap().exprs
454 );
455 assert_eq!(
456 Reader::parse("Sum").unwrap().exprs,
457 Reader::parse("SUM").unwrap().exprs
458 );
459 }
460
461 #[test]
462 fn test_parse_keyword() {
463 let program = Reader::parse(":foo").unwrap();
464 assert_eq!(program.exprs, vec![Expr::Keyword("FOO".into())]);
465 }
466
467 #[test]
468 fn test_parse_keyword_case_insensitive() {
469 assert_eq!(
470 Reader::parse(":foo").unwrap().exprs,
471 Reader::parse(":FOO").unwrap().exprs
472 );
473 assert_eq!(
474 Reader::parse(":Name").unwrap().exprs,
475 Reader::parse(":NAME").unwrap().exprs
476 );
477 }
478
479 #[test]
480 fn test_parse_string() {
481 let program = Reader::parse(r#""hello""#).unwrap();
482 assert_eq!(program.exprs, vec![Expr::String("hello".into())]);
483 }
484
485 #[test]
486 fn test_parse_string_with_escapes() {
487 let program = Reader::parse(r#""hello\nworld""#).unwrap();
488 assert_eq!(program.exprs, vec![Expr::String("hello\nworld".into())]);
489 }
490
491 #[test]
492 fn test_parse_list() {
493 let program = Reader::parse("(+ 1 2)").unwrap();
494 assert_eq!(
495 program.exprs,
496 vec![Expr::List(vec![
497 Expr::Symbol("+".into()),
498 Expr::Number(Fraction::from_integer(1)),
499 Expr::Number(Fraction::from_integer(2)),
500 ])]
501 );
502 }
503
504 #[test]
505 fn test_parse_nested_list() {
506 let program = Reader::parse("(define (square x) (* x x))").unwrap();
507 assert_eq!(
508 program.exprs,
509 vec![Expr::List(vec![
510 Expr::Symbol("DEFINE".into()),
511 Expr::List(vec![
512 Expr::Symbol("SQUARE".into()),
513 Expr::Symbol("X".into()),
514 ]),
515 Expr::List(vec![
516 Expr::Symbol("*".into()),
517 Expr::Symbol("X".into()),
518 Expr::Symbol("X".into()),
519 ]),
520 ])]
521 );
522 }
523
524 #[test]
525 fn test_parse_quote() {
526 let program = Reader::parse("'foo").unwrap();
527 assert_eq!(
528 program.exprs,
529 vec![Expr::Quote(Box::new(Expr::Symbol("FOO".into())))]
530 );
531 }
532
533 #[test]
534 fn test_parse_quoted_list() {
535 let program = Reader::parse("'(1 2 3)").unwrap();
536 assert_eq!(
537 program.exprs,
538 vec![Expr::Quote(Box::new(Expr::List(vec![
539 Expr::Number(Fraction::from_integer(1)),
540 Expr::Number(Fraction::from_integer(2)),
541 Expr::Number(Fraction::from_integer(3)),
542 ])))]
543 );
544 }
545
546 #[test]
547 fn test_parse_multiple_exprs() {
548 let program = Reader::parse("(DEFINE x 10) (+ x 5)").unwrap();
549 assert_eq!(program.exprs.len(), 2);
550 }
551
552 #[test]
553 fn test_parse_line_comment() {
554 let program = Reader::parse("; this is a comment\n42").unwrap();
555 assert_eq!(
556 program.exprs,
557 vec![Expr::Number(Fraction::from_integer(42))]
558 );
559 }
560
561 #[test]
562 fn test_parse_inline_comment() {
563 let program = Reader::parse("42 ; inline comment").unwrap();
564 assert_eq!(
565 program.exprs,
566 vec![Expr::Number(Fraction::from_integer(42))]
567 );
568 }
569
570 #[test]
571 fn test_parse_comment_in_list() {
572 let program = Reader::parse("(+ 1 ; add one\n 2)").unwrap();
573 assert_eq!(
574 program.exprs,
575 vec![Expr::List(vec![
576 Expr::Symbol("+".into()),
577 Expr::Number(Fraction::from_integer(1)),
578 Expr::Number(Fraction::from_integer(2)),
579 ])]
580 );
581 }
582
583 #[test]
584 fn test_parse_multiple_comments() {
585 let code = "; comment 1
586; comment 2
58742
588; comment 3";
589 let program = Reader::parse(code).unwrap();
590 assert_eq!(
591 program.exprs,
592 vec![Expr::Number(Fraction::from_integer(42))]
593 );
594 }
595
596 #[test]
597 fn test_parse_only_comments() {
598 let program = Reader::parse("; just a comment").unwrap();
599 assert!(program.exprs.is_empty());
600 }
601
602 #[test]
603 fn test_parse_annotation() {
604 let code = "; @test (= (count 'defun) 5)\n42";
605 let program = Reader::parse(code).unwrap();
606 assert_eq!(program.exprs.len(), 1);
607 assert_eq!(program.annotations.len(), 1);
608 assert_eq!(program.annotations[0].name, "test");
609 assert!(matches!(&program.annotations[0].value, Expr::List(_)));
610 }
611
612 #[test]
613 fn test_parse_multiple_annotations() {
614 let code = "; @test (= (count 'defun) 5)\n; @test (= (count 'defvar) 2)\n42";
615 let program = Reader::parse(code).unwrap();
616 assert_eq!(program.annotations.len(), 2);
617 }
618
619 #[test]
620 fn test_annotation_with_simple_expr() {
621 let code = "; @version 1\n42";
622 let program = Reader::parse(code).unwrap();
623 assert_eq!(program.annotations.len(), 1);
624 assert_eq!(program.annotations[0].name, "version");
625 assert_eq!(
626 program.annotations[0].value,
627 Expr::Number(Fraction::from_integer(1))
628 );
629 }
630
631 #[test]
632 fn test_regular_comment_no_annotation() {
633 let code = "; just a comment\n42";
634 let program = Reader::parse(code).unwrap();
635 assert!(program.annotations.is_empty());
636 }
637
638 #[test]
639 fn test_parse_cons() {
640 let program = Reader::parse("(a . b)").unwrap();
641 assert_eq!(
642 program.exprs,
643 vec![Expr::cons(
644 Expr::Symbol("A".into()),
645 Expr::Symbol("B".into())
646 )]
647 );
648 }
649
650 #[test]
651 fn test_parse_cons_proper_list() {
652 let program = Reader::parse("(a . (b . nil))").unwrap();
653 assert_eq!(
654 program.exprs,
655 vec![Expr::cons(
656 Expr::Symbol("A".into()),
657 Expr::cons(Expr::Symbol("B".into()), Expr::Nil)
658 )]
659 );
660 }
661
662 #[test]
663 fn test_parse_improper_list() {
664 let program = Reader::parse("(1 2 . 3)").unwrap();
665 assert_eq!(
666 program.exprs,
667 vec![Expr::cons(
668 Expr::Number(Fraction::from_integer(1)),
669 Expr::cons(
670 Expr::Number(Fraction::from_integer(2)),
671 Expr::Number(Fraction::from_integer(3))
672 )
673 )]
674 );
675 }
676
677 #[test]
678 fn test_invalid_dotted_pair_multiple_exprs_after_dot() {
679 let result = Reader::parse("(1 . (2 . 3) (3 . nil))");
680 assert!(result.is_err());
681 let err = result.unwrap_err();
682 let msg = err.to_string();
683 assert!(
684 msg.contains("closing paren") || msg.contains("one expression after dot"),
685 "Error should mention closing paren or one expression after dot, got: {msg}"
686 );
687 }
688
689 #[test]
690 fn test_parse_quasiquote() {
691 let program = Reader::parse("`foo").unwrap();
692 assert_eq!(
693 program.exprs,
694 vec![Expr::Quasiquote(Box::new(Expr::Symbol("FOO".into())))]
695 );
696 }
697
698 #[test]
699 fn test_parse_unquote() {
700 let program = Reader::parse(",foo").unwrap();
701 assert_eq!(
702 program.exprs,
703 vec![Expr::Unquote(Box::new(Expr::Symbol("FOO".into())))]
704 );
705 }
706
707 #[test]
708 fn test_parse_unquote_splicing() {
709 let program = Reader::parse(",@foo").unwrap();
710 assert_eq!(
711 program.exprs,
712 vec![Expr::UnquoteSplicing(Box::new(Expr::Symbol("FOO".into())))]
713 );
714 }
715
716 #[test]
717 fn test_parse_quasiquoted_list() {
718 let program = Reader::parse("`(if ,test ,body)").unwrap();
719 assert_eq!(
720 program.exprs,
721 vec![Expr::Quasiquote(Box::new(Expr::List(vec![
722 Expr::Symbol("IF".into()),
723 Expr::Unquote(Box::new(Expr::Symbol("TEST".into()))),
724 Expr::Unquote(Box::new(Expr::Symbol("BODY".into()))),
725 ])))]
726 );
727 }
728}