Skip to main content

nomiscript/
reader.rs

1use winnow::ascii::{digit1, line_ending, space0, till_line_ending};
2use winnow::combinator::{alt, cut_err, opt, preceded};
3use winnow::error::{AddContext, ContextError, ErrMode, ModalResult, StrContext, StrContextValue};
4use winnow::prelude::*;
5use winnow::token::{any, none_of, take_till, take_while};
6
7use tracing::debug;
8
9use crate::ast::{Annotation, Expr, Fraction, Program};
10use crate::error::{Error, Result};
11
12pub struct Reader;
13
14impl Reader {
15    pub fn parse(input: &str) -> Result<Program> {
16        debug!(input_len = input.len(), "parse start");
17        let mut remaining = input;
18        let (exprs, annotations) = parse_program(&mut remaining).map_err(|e| {
19            let offset = input.len() - remaining.len();
20            Error::parse(format_parse_error(e), input, offset..offset + 1)
21        })?;
22        debug!(
23            expr_count = exprs.len(),
24            annotation_count = annotations.len(),
25            "parse complete"
26        );
27        Ok(Program::with_annotations(exprs, annotations))
28    }
29
30    #[must_use]
31    pub fn is_incomplete(input: &str) -> bool {
32        let mut remaining = input;
33        parse_program(&mut remaining).is_err() && remaining.is_empty()
34    }
35
36    pub fn parse_expr(input: &str) -> Result<Expr> {
37        let mut remaining = input;
38        parse_expr(&mut remaining).map_err(|e| {
39            let offset = input.len() - remaining.len();
40            Error::parse(format_parse_error(e), input, offset..offset + 1)
41        })
42    }
43}
44
45fn format_parse_error(err: ErrMode<ContextError>) -> String {
46    match err {
47        ErrMode::Backtrack(ctx) | ErrMode::Cut(ctx) => format_context_error(&ctx),
48        ErrMode::Incomplete(_) => "unexpected end of input".to_string(),
49    }
50}
51
52fn format_context_error(ctx: &ContextError) -> String {
53    let mut parts = Vec::new();
54    for c in ctx.context() {
55        match c {
56            StrContext::Label(label) => parts.push(format!("in {label}")),
57            StrContext::Expected(StrContextValue::Description(desc)) => {
58                parts.push(format!("expected {desc}"));
59            }
60            StrContext::Expected(StrContextValue::CharLiteral(c)) => {
61                parts.push(format!("expected '{c}'"));
62            }
63            StrContext::Expected(StrContextValue::StringLiteral(s)) => {
64                parts.push(format!("expected \"{s}\""));
65            }
66            _ => {}
67        }
68    }
69    if parts.is_empty() {
70        "parse error".to_string()
71    } else {
72        parts.join(", ")
73    }
74}
75
76fn skip_ws_and_comments(input: &mut &str, annotations: &mut Vec<Annotation>) -> ModalResult<()> {
77    loop {
78        let _ = space0.parse_next(input)?;
79        if input.starts_with("; @") {
80            let _ = "; @".parse_next(input)?;
81            let name: &str = take_while(1.., |c: char| c.is_alphanumeric() || c == '-' || c == '_')
82                .parse_next(input)?;
83            let _ = space0.parse_next(input)?;
84
85            let value = parse_expr(input).unwrap_or(Expr::Nil);
86            annotations.push(Annotation {
87                name: name.to_string(),
88                value,
89            });
90            let _ = till_line_ending.parse_next(input)?;
91            let _ = opt(line_ending).parse_next(input)?;
92        } else if input.starts_with(';') {
93            let _ = till_line_ending.parse_next(input)?;
94            let _ = opt(line_ending).parse_next(input)?;
95        } else if input.starts_with('\n') || input.starts_with('\r') {
96            let _ = line_ending.parse_next(input)?;
97        } else {
98            break;
99        }
100    }
101    Ok(())
102}
103
104fn skip_ws_and_comments_no_annotations(input: &mut &str) -> ModalResult<()> {
105    let mut dummy = Vec::new();
106    skip_ws_and_comments(input, &mut dummy)
107}
108
109fn parse_program(input: &mut &str) -> ModalResult<(Vec<Expr>, Vec<Annotation>)> {
110    let mut annotations = Vec::new();
111    skip_ws_and_comments(input, &mut annotations)?;
112
113    let mut exprs = Vec::new();
114    loop {
115        if input.is_empty() {
116            break;
117        }
118        let expr = parse_expr(input)?;
119        exprs.push(expr);
120        skip_ws_and_comments(input, &mut annotations)?;
121    }
122
123    Ok((exprs, annotations))
124}
125
126fn parse_expr(input: &mut &str) -> ModalResult<Expr> {
127    let expr = alt((
128        alt((
129            parse_nil,
130            parse_bool,
131            parse_number,
132            parse_string,
133            parse_quote,
134        )),
135        alt((
136            parse_quasiquote,
137            parse_unquote,
138            parse_list,
139            parse_keyword,
140            parse_symbol,
141        )),
142    ))
143    .parse_next(input)?;
144    debug!(expr = ?expr, "parsed expression");
145    Ok(expr)
146}
147
148fn parse_nil(input: &mut &str) -> ModalResult<Expr> {
149    alt(("nil", "NIL", "Nil"))
150        .value(Expr::Nil)
151        .parse_next(input)
152}
153
154fn parse_bool(input: &mut &str) -> ModalResult<Expr> {
155    alt(("#t", "#T", "#f", "#F"))
156        .map(|s: &str| match s {
157            "#t" | "#T" => Expr::Bool(true),
158            _ => Expr::Nil,
159        })
160        .parse_next(input)
161}
162
163fn is_delimiter(c: char) -> bool {
164    c.is_whitespace() || "()\"'`,;".contains(c)
165}
166
167fn parse_number(input: &mut &str) -> ModalResult<Expr> {
168    let sign = opt(alt(("-".value(-1i64), "+".value(1i64))))
169        .map(|s| s.unwrap_or(1))
170        .parse_next(input)?;
171
172    let int_part: &str = digit1.parse_next(input)?;
173    let int_val: i64 = int_part
174        .parse()
175        .map_err(|_| ErrMode::Cut(ContextError::new()))?;
176
177    let frac_part = opt(preceded('.', digit1)).parse_next(input)?;
178
179    if input.starts_with(|c: char| !is_delimiter(c)) {
180        return Err(ErrMode::Backtrack(ContextError::new()));
181    }
182
183    let fraction = if let Some(decimals) = frac_part {
184        let decimal_places = decimals.len() as u32;
185        let decimal_val: i64 = decimals
186            .parse()
187            .map_err(|_| ErrMode::Cut(ContextError::new()))?;
188        let denom = 10i64.pow(decimal_places);
189        Fraction::new(sign * (int_val * denom + decimal_val), denom)
190    } else {
191        Fraction::from_integer(sign * int_val)
192    };
193
194    Ok(Expr::Number(fraction))
195}
196
197fn parse_string(input: &mut &str) -> ModalResult<Expr> {
198    alt((parse_triple_quoted_string, parse_double_quoted_string)).parse_next(input)
199}
200
201fn parse_double_quoted_string(input: &mut &str) -> ModalResult<Expr> {
202    let _ = '"'.parse_next(input)?;
203    let mut result = String::new();
204
205    loop {
206        let chunk: &str = take_till(0.., |c| c == '"' || c == '\\').parse_next(input)?;
207        result.push_str(chunk);
208
209        match cut_err(any)
210            .context(StrContext::Label("string"))
211            .context(StrContext::Expected(StrContextValue::Description(
212                "closing quote",
213            )))
214            .parse_next(input)?
215        {
216            '"' => return Ok(Expr::String(result)),
217            '\\' => {
218                let escaped = any.parse_next(input)?;
219                match escaped {
220                    'n' => result.push('\n'),
221                    't' => result.push('\t'),
222                    'r' => result.push('\r'),
223                    '\\' => result.push('\\'),
224                    '"' => result.push('"'),
225                    c => {
226                        result.push('\\');
227                        result.push(c);
228                    }
229                }
230            }
231            _ => unreachable!(),
232        }
233    }
234}
235
236fn parse_triple_quoted_string(input: &mut &str) -> ModalResult<Expr> {
237    let _ = "\"\"\"".parse_next(input)?;
238    let mut content = String::new();
239
240    loop {
241        if input.starts_with("\"\"\"") {
242            let _ = "\"\"\"".parse_next(input)?;
243            return Ok(Expr::String(content));
244        }
245        if input.is_empty() {
246            return Err(ErrMode::Cut(
247                ContextError::new()
248                    .add_context(input, &input.checkpoint(), StrContext::Label("string"))
249                    .add_context(
250                        input,
251                        &input.checkpoint(),
252                        StrContext::Expected(StrContextValue::Description("closing \"\"\"")),
253                    ),
254            ));
255        }
256        content.push(any.parse_next(input)?);
257    }
258}
259
260fn parse_quote(input: &mut &str) -> ModalResult<Expr> {
261    let _ = '\''.parse_next(input)?;
262    let expr = parse_expr.parse_next(input)?;
263    Ok(Expr::Quote(Box::new(expr)))
264}
265
266fn parse_quasiquote(input: &mut &str) -> ModalResult<Expr> {
267    let _ = '`'.parse_next(input)?;
268    let expr = parse_expr.parse_next(input)?;
269    Ok(Expr::Quasiquote(Box::new(expr)))
270}
271
272fn parse_unquote(input: &mut &str) -> ModalResult<Expr> {
273    let _ = ','.parse_next(input)?;
274    if input.starts_with('@') {
275        let _ = '@'.parse_next(input)?;
276        let expr = parse_expr.parse_next(input)?;
277        return Ok(Expr::UnquoteSplicing(Box::new(expr)));
278    }
279    let expr = parse_expr.parse_next(input)?;
280    Ok(Expr::Unquote(Box::new(expr)))
281}
282
283fn parse_list(input: &mut &str) -> ModalResult<Expr> {
284    let _ = '('.parse_next(input)?;
285    skip_ws_and_comments_no_annotations(input)?;
286
287    let mut items = Vec::new();
288    let mut dotted_cdr: Option<Expr> = None;
289
290    loop {
291        if input.starts_with(')') {
292            break;
293        }
294        if input.is_empty() {
295            return Err(ErrMode::Cut(
296                ContextError::new()
297                    .add_context(input, &input.checkpoint(), StrContext::Label("list"))
298                    .add_context(
299                        input,
300                        &input.checkpoint(),
301                        StrContext::Expected(StrContextValue::Description("closing paren")),
302                    ),
303            ));
304        }
305        if input.starts_with('.') && input.chars().nth(1).is_some_and(char::is_whitespace) {
306            let _ = '.'.parse_next(input)?;
307            skip_ws_and_comments_no_annotations(input)?;
308            dotted_cdr = Some(
309                cut_err(parse_expr)
310                    .context(StrContext::Label("cdr expression"))
311                    .parse_next(input)?,
312            );
313            skip_ws_and_comments_no_annotations(input)?;
314            cut_err(')')
315                .context(StrContext::Label("closing paren"))
316                .context(StrContext::Expected(StrContextValue::Description(
317                    "only one expression after dot in dotted pair",
318                )))
319                .parse_next(input)?;
320            break;
321        }
322        let expr = parse_expr(input)?;
323        items.push(expr);
324        skip_ws_and_comments_no_annotations(input)?;
325    }
326
327    if dotted_cdr.is_none() {
328        cut_err(')')
329            .context(StrContext::Label("list"))
330            .context(StrContext::Expected(StrContextValue::Description(
331                "closing paren",
332            )))
333            .parse_next(input)?;
334    }
335
336    if let Some(cdr) = dotted_cdr {
337        let mut result = cdr;
338        for item in items.into_iter().rev() {
339            result = Expr::cons(item, result);
340        }
341        Ok(result)
342    } else {
343        Ok(Expr::List(items))
344    }
345}
346
347fn parse_keyword(input: &mut &str) -> ModalResult<Expr> {
348    let _ = ':'.parse_next(input)?;
349    let first: char =
350        none_of(|c: char| c.is_whitespace() || "()\"'`,".contains(c)).parse_next(input)?;
351
352    let rest: &str = take_while(0.., |c: char| !c.is_whitespace() && !"()\"'`,".contains(c))
353        .parse_next(input)?;
354
355    let mut name = String::with_capacity(1 + rest.len());
356    name.push(first);
357    name.push_str(rest);
358    name.make_ascii_uppercase();
359    Ok(Expr::Keyword(name))
360}
361
362fn parse_symbol(input: &mut &str) -> ModalResult<Expr> {
363    let first: char =
364        none_of(|c: char| c.is_whitespace() || "()\"'`,".contains(c)).parse_next(input)?;
365
366    let rest: &str = take_while(0.., |c: char| !c.is_whitespace() && !"()\"'`,".contains(c))
367        .parse_next(input)?;
368
369    let mut name = String::with_capacity(1 + rest.len());
370    name.push(first);
371    name.push_str(rest);
372    name.make_ascii_uppercase();
373    if name == "NIL" {
374        return Ok(Expr::Nil);
375    }
376    if name == "T" {
377        return Ok(Expr::Bool(true));
378    }
379    Ok(Expr::Symbol(name))
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385
386    #[test]
387    fn test_parse_nil() {
388        let program = Reader::parse("nil").unwrap();
389        assert_eq!(program.exprs, vec![Expr::Nil]);
390    }
391
392    #[test]
393    fn test_parse_bool() {
394        let program = Reader::parse("#t #T t T").unwrap();
395        assert_eq!(
396            program.exprs,
397            vec![
398                Expr::Bool(true),
399                Expr::Bool(true),
400                Expr::Bool(true),
401                Expr::Bool(true),
402            ]
403        );
404    }
405
406    #[test]
407    fn test_parse_false_is_nil() {
408        let program = Reader::parse("#f #F").unwrap();
409        assert_eq!(program.exprs, vec![Expr::Nil, Expr::Nil]);
410    }
411
412    #[test]
413    fn test_parse_nil_case_insensitive() {
414        assert_eq!(Reader::parse("nil").unwrap().exprs, vec![Expr::Nil]);
415        assert_eq!(Reader::parse("NIL").unwrap().exprs, vec![Expr::Nil]);
416        assert_eq!(Reader::parse("Nil").unwrap().exprs, vec![Expr::Nil]);
417    }
418
419    #[test]
420    fn test_parse_integer() {
421        let program = Reader::parse("42").unwrap();
422        assert_eq!(
423            program.exprs,
424            vec![Expr::Number(Fraction::from_integer(42))]
425        );
426    }
427
428    #[test]
429    fn test_parse_negative_number() {
430        let program = Reader::parse("-17").unwrap();
431        assert_eq!(
432            program.exprs,
433            vec![Expr::Number(Fraction::from_integer(-17))]
434        );
435    }
436
437    #[test]
438    fn test_parse_decimal() {
439        let program = Reader::parse("0.1").unwrap();
440        assert_eq!(program.exprs, vec![Expr::Number(Fraction::new(1, 10))]);
441    }
442
443    #[test]
444    fn test_parse_symbol() {
445        let program = Reader::parse("foo").unwrap();
446        assert_eq!(program.exprs, vec![Expr::Symbol("FOO".into())]);
447    }
448
449    #[test]
450    fn test_parse_symbol_case_insensitive() {
451        assert_eq!(
452            Reader::parse("foo").unwrap().exprs,
453            Reader::parse("FOO").unwrap().exprs
454        );
455        assert_eq!(
456            Reader::parse("Sum").unwrap().exprs,
457            Reader::parse("SUM").unwrap().exprs
458        );
459    }
460
461    #[test]
462    fn test_parse_keyword() {
463        let program = Reader::parse(":foo").unwrap();
464        assert_eq!(program.exprs, vec![Expr::Keyword("FOO".into())]);
465    }
466
467    #[test]
468    fn test_parse_keyword_case_insensitive() {
469        assert_eq!(
470            Reader::parse(":foo").unwrap().exprs,
471            Reader::parse(":FOO").unwrap().exprs
472        );
473        assert_eq!(
474            Reader::parse(":Name").unwrap().exprs,
475            Reader::parse(":NAME").unwrap().exprs
476        );
477    }
478
479    #[test]
480    fn test_parse_string() {
481        let program = Reader::parse(r#""hello""#).unwrap();
482        assert_eq!(program.exprs, vec![Expr::String("hello".into())]);
483    }
484
485    #[test]
486    fn test_parse_string_with_escapes() {
487        let program = Reader::parse(r#""hello\nworld""#).unwrap();
488        assert_eq!(program.exprs, vec![Expr::String("hello\nworld".into())]);
489    }
490
491    #[test]
492    fn test_parse_list() {
493        let program = Reader::parse("(+ 1 2)").unwrap();
494        assert_eq!(
495            program.exprs,
496            vec![Expr::List(vec![
497                Expr::Symbol("+".into()),
498                Expr::Number(Fraction::from_integer(1)),
499                Expr::Number(Fraction::from_integer(2)),
500            ])]
501        );
502    }
503
504    #[test]
505    fn test_parse_nested_list() {
506        let program = Reader::parse("(define (square x) (* x x))").unwrap();
507        assert_eq!(
508            program.exprs,
509            vec![Expr::List(vec![
510                Expr::Symbol("DEFINE".into()),
511                Expr::List(vec![
512                    Expr::Symbol("SQUARE".into()),
513                    Expr::Symbol("X".into()),
514                ]),
515                Expr::List(vec![
516                    Expr::Symbol("*".into()),
517                    Expr::Symbol("X".into()),
518                    Expr::Symbol("X".into()),
519                ]),
520            ])]
521        );
522    }
523
524    #[test]
525    fn test_parse_quote() {
526        let program = Reader::parse("'foo").unwrap();
527        assert_eq!(
528            program.exprs,
529            vec![Expr::Quote(Box::new(Expr::Symbol("FOO".into())))]
530        );
531    }
532
533    #[test]
534    fn test_parse_quoted_list() {
535        let program = Reader::parse("'(1 2 3)").unwrap();
536        assert_eq!(
537            program.exprs,
538            vec![Expr::Quote(Box::new(Expr::List(vec![
539                Expr::Number(Fraction::from_integer(1)),
540                Expr::Number(Fraction::from_integer(2)),
541                Expr::Number(Fraction::from_integer(3)),
542            ])))]
543        );
544    }
545
546    #[test]
547    fn test_parse_multiple_exprs() {
548        let program = Reader::parse("(DEFINE x 10) (+ x 5)").unwrap();
549        assert_eq!(program.exprs.len(), 2);
550    }
551
552    #[test]
553    fn test_parse_line_comment() {
554        let program = Reader::parse("; this is a comment\n42").unwrap();
555        assert_eq!(
556            program.exprs,
557            vec![Expr::Number(Fraction::from_integer(42))]
558        );
559    }
560
561    #[test]
562    fn test_parse_inline_comment() {
563        let program = Reader::parse("42 ; inline comment").unwrap();
564        assert_eq!(
565            program.exprs,
566            vec![Expr::Number(Fraction::from_integer(42))]
567        );
568    }
569
570    #[test]
571    fn test_parse_comment_in_list() {
572        let program = Reader::parse("(+ 1 ; add one\n   2)").unwrap();
573        assert_eq!(
574            program.exprs,
575            vec![Expr::List(vec![
576                Expr::Symbol("+".into()),
577                Expr::Number(Fraction::from_integer(1)),
578                Expr::Number(Fraction::from_integer(2)),
579            ])]
580        );
581    }
582
583    #[test]
584    fn test_parse_multiple_comments() {
585        let code = "; comment 1
586; comment 2
58742
588; comment 3";
589        let program = Reader::parse(code).unwrap();
590        assert_eq!(
591            program.exprs,
592            vec![Expr::Number(Fraction::from_integer(42))]
593        );
594    }
595
596    #[test]
597    fn test_parse_only_comments() {
598        let program = Reader::parse("; just a comment").unwrap();
599        assert!(program.exprs.is_empty());
600    }
601
602    #[test]
603    fn test_parse_annotation() {
604        let code = "; @test (= (count 'defun) 5)\n42";
605        let program = Reader::parse(code).unwrap();
606        assert_eq!(program.exprs.len(), 1);
607        assert_eq!(program.annotations.len(), 1);
608        assert_eq!(program.annotations[0].name, "test");
609        assert!(matches!(&program.annotations[0].value, Expr::List(_)));
610    }
611
612    #[test]
613    fn test_parse_multiple_annotations() {
614        let code = "; @test (= (count 'defun) 5)\n; @test (= (count 'defvar) 2)\n42";
615        let program = Reader::parse(code).unwrap();
616        assert_eq!(program.annotations.len(), 2);
617    }
618
619    #[test]
620    fn test_annotation_with_simple_expr() {
621        let code = "; @version 1\n42";
622        let program = Reader::parse(code).unwrap();
623        assert_eq!(program.annotations.len(), 1);
624        assert_eq!(program.annotations[0].name, "version");
625        assert_eq!(
626            program.annotations[0].value,
627            Expr::Number(Fraction::from_integer(1))
628        );
629    }
630
631    #[test]
632    fn test_regular_comment_no_annotation() {
633        let code = "; just a comment\n42";
634        let program = Reader::parse(code).unwrap();
635        assert!(program.annotations.is_empty());
636    }
637
638    #[test]
639    fn test_parse_cons() {
640        let program = Reader::parse("(a . b)").unwrap();
641        assert_eq!(
642            program.exprs,
643            vec![Expr::cons(
644                Expr::Symbol("A".into()),
645                Expr::Symbol("B".into())
646            )]
647        );
648    }
649
650    #[test]
651    fn test_parse_cons_proper_list() {
652        let program = Reader::parse("(a . (b . nil))").unwrap();
653        assert_eq!(
654            program.exprs,
655            vec![Expr::cons(
656                Expr::Symbol("A".into()),
657                Expr::cons(Expr::Symbol("B".into()), Expr::Nil)
658            )]
659        );
660    }
661
662    #[test]
663    fn test_parse_improper_list() {
664        let program = Reader::parse("(1 2 . 3)").unwrap();
665        assert_eq!(
666            program.exprs,
667            vec![Expr::cons(
668                Expr::Number(Fraction::from_integer(1)),
669                Expr::cons(
670                    Expr::Number(Fraction::from_integer(2)),
671                    Expr::Number(Fraction::from_integer(3))
672                )
673            )]
674        );
675    }
676
677    #[test]
678    fn test_invalid_dotted_pair_multiple_exprs_after_dot() {
679        let result = Reader::parse("(1 . (2 . 3) (3 . nil))");
680        assert!(result.is_err());
681        let err = result.unwrap_err();
682        let msg = err.to_string();
683        assert!(
684            msg.contains("closing paren") || msg.contains("one expression after dot"),
685            "Error should mention closing paren or one expression after dot, got: {msg}"
686        );
687    }
688
689    #[test]
690    fn test_parse_quasiquote() {
691        let program = Reader::parse("`foo").unwrap();
692        assert_eq!(
693            program.exprs,
694            vec![Expr::Quasiquote(Box::new(Expr::Symbol("FOO".into())))]
695        );
696    }
697
698    #[test]
699    fn test_parse_unquote() {
700        let program = Reader::parse(",foo").unwrap();
701        assert_eq!(
702            program.exprs,
703            vec![Expr::Unquote(Box::new(Expr::Symbol("FOO".into())))]
704        );
705    }
706
707    #[test]
708    fn test_parse_unquote_splicing() {
709        let program = Reader::parse(",@foo").unwrap();
710        assert_eq!(
711            program.exprs,
712            vec![Expr::UnquoteSplicing(Box::new(Expr::Symbol("FOO".into())))]
713        );
714    }
715
716    #[test]
717    fn test_parse_quasiquoted_list() {
718        let program = Reader::parse("`(if ,test ,body)").unwrap();
719        assert_eq!(
720            program.exprs,
721            vec![Expr::Quasiquote(Box::new(Expr::List(vec![
722                Expr::Symbol("IF".into()),
723                Expr::Unquote(Box::new(Expr::Symbol("TEST".into()))),
724                Expr::Unquote(Box::new(Expr::Symbol("BODY".into()))),
725            ])))]
726        );
727    }
728}