Skip to main content

r/parser/
diagnostics.rs

1use std::fmt;
2
3use pest::error::InputLocation;
4
5use super::Rule;
6
7/// A structured parse error with human-friendly messages and source context.
8#[derive(Debug, Clone)]
9pub struct ParseError {
10    pub message: String,
11    pub line: usize,
12    pub col: usize,
13    pub source_line: String,
14    pub filename: Option<String>,
15    pub suggestion: Option<String>,
16    /// The full source code that was being parsed. Populated by `convert_pest_error`
17    /// and `parse_program`; used by the miette diagnostic renderer.
18    /// Boxed to keep `ParseError` small in the common `Result::Ok` path.
19    pub source_code: Option<Box<String>>,
20    /// Byte offset into `source_code` where the error occurred.
21    pub byte_offset: usize,
22    /// Length of the error span in bytes (0 = point, >0 = range).
23    pub span_length: usize,
24}
25
26impl std::error::Error for ParseError {}
27
28impl fmt::Display for ParseError {
29    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
30        // Header: error message
31        if let Some(ref filename) = self.filename {
32            writeln!(
33                f,
34                "Error in parse: {}:{}:{}: {}",
35                filename, self.line, self.col, self.message
36            )?;
37        } else {
38            writeln!(f, "Error: {}", self.message)?;
39        }
40
41        // Source line with caret
42        let line_num = format!("{}", self.line);
43        let gutter_width = line_num.len();
44        writeln!(f, "{} |", " ".repeat(gutter_width))?;
45        writeln!(f, "{} | {}", line_num, self.source_line)?;
46        let caret_offset = self.col.saturating_sub(1);
47        write!(
48            f,
49            "{} | {}^",
50            " ".repeat(gutter_width),
51            " ".repeat(caret_offset)
52        )?;
53
54        // Suggestion
55        if let Some(ref suggestion) = self.suggestion {
56            write!(f, "\n{} |", " ".repeat(gutter_width))?;
57            write!(f, "\n{} = help: {}", " ".repeat(gutter_width), suggestion)?;
58        }
59
60        Ok(())
61    }
62}
63
64// region: miette Diagnostic implementation
65
66#[cfg(feature = "diagnostics")]
67impl miette::Diagnostic for ParseError {
68    fn code<'a>(&'a self) -> Option<Box<dyn fmt::Display + 'a>> {
69        Some(Box::new("parse::error"))
70    }
71
72    fn help<'a>(&'a self) -> Option<Box<dyn fmt::Display + 'a>> {
73        self.suggestion
74            .as_ref()
75            .map(|s| Box::new(s.as_str()) as Box<dyn fmt::Display>)
76    }
77
78    fn source_code(&self) -> Option<&dyn miette::SourceCode> {
79        self.source_code
80            .as_ref()
81            .map(|s| s.as_ref() as &String as &dyn miette::SourceCode)
82    }
83
84    fn labels(&self) -> Option<Box<dyn Iterator<Item = miette::LabeledSpan> + '_>> {
85        // Only provide labels if we have source code to render them against
86        if self.source_code.is_some() {
87            let label = miette::LabeledSpan::at(
88                self.byte_offset..self.byte_offset + self.span_length.max(1),
89                &self.message,
90            );
91            Some(Box::new(std::iter::once(label)))
92        } else {
93            None
94        }
95    }
96}
97
98#[cfg(feature = "diagnostics")]
99impl ParseError {
100    /// Render this error using miette's graphical report handler.
101    /// Falls back to the standard Display if rendering fails.
102    pub fn render(&self) -> String {
103        let handler = miette::GraphicalReportHandler::new();
104        let mut buf = String::new();
105        match handler.render_report(&mut buf, self) {
106            Ok(()) => buf,
107            Err(_) => format!("{}", self),
108        }
109    }
110}
111
112#[cfg(not(feature = "diagnostics"))]
113impl ParseError {
114    /// Render this error. Without the `diagnostics` feature, this is just Display.
115    pub fn render(&self) -> String {
116        format!("{}", self)
117    }
118}
119
120// endregion
121
122/// Compute byte offset into `source` given 1-based line and 1-based column.
123fn line_col_to_byte_offset(source: &str, line: usize, col: usize) -> usize {
124    let mut offset = 0;
125    for (i, src_line) in source.lines().enumerate() {
126        if i + 1 == line {
127            // col is 1-based, clamp to line length
128            return offset + (col.saturating_sub(1)).min(src_line.len());
129        }
130        offset += src_line.len() + 1; // +1 for the newline
131    }
132    // Past end of source
133    source.len()
134}
135
136/// Convert a pest error into a human-friendly ParseError.
137pub(super) fn convert_pest_error(e: pest::error::Error<Rule>, source: &str) -> ParseError {
138    let (line, col) = match e.line_col {
139        pest::error::LineColLocation::Pos((l, c)) => (l, c),
140        pest::error::LineColLocation::Span((l, c), _) => (l, c),
141    };
142
143    let source_line = source.lines().nth(line - 1).unwrap_or("").to_string();
144
145    // Get byte offset for token classification
146    let byte_offset = match e.location {
147        InputLocation::Pos(p) => p,
148        InputLocation::Span((s, _)) => s,
149    };
150
151    // Compute span length from the token at the error position
152    let span_length = token_length_at(source, byte_offset);
153
154    // Try common-mistake detection first
155    if let Some(mut err) = detect_common_mistakes(source, &source_line, line, col) {
156        err.source_code = Some(Box::new(source.to_string()));
157        if err.byte_offset == 0 && (err.line > 1 || err.col > 1) {
158            err.byte_offset = line_col_to_byte_offset(source, err.line, err.col);
159        }
160        return err;
161    }
162
163    // Classify what was found at the error position
164    let found_token = classify_token(source, byte_offset);
165
166    // Build R-style "unexpected <token> in <context>" message
167    let context = build_context(&source_line, col);
168    let message = if context.is_empty() {
169        format!("unexpected {}", found_token)
170    } else {
171        format!("unexpected {} in \"{}\"", found_token, context)
172    };
173
174    // Try to generate a suggestion from what was expected
175    let suggestion = suggest_from_expected(&e, &found_token, &source_line, col);
176
177    ParseError {
178        message,
179        line,
180        col,
181        source_line,
182        filename: None,
183        suggestion,
184        source_code: Some(Box::new(source.to_string())),
185        byte_offset,
186        span_length,
187    }
188}
189
190/// Compute the byte length of the token at the given offset for span highlighting.
191fn token_length_at(source: &str, offset: usize) -> usize {
192    let remaining = &source[offset..];
193    if remaining.is_empty() {
194        return 0;
195    }
196
197    let ch = remaining
198        .chars()
199        .next()
200        .expect("non-empty string has a first char");
201
202    // String literal — highlight the opening quote
203    if ch == '"' || ch == '\'' {
204        return 1;
205    }
206
207    // Number
208    if ch.is_ascii_digit()
209        || (ch == '.' && remaining.len() > 1 && remaining.as_bytes()[1].is_ascii_digit())
210    {
211        return remaining
212            .find(|c: char| !c.is_ascii_digit() && c != '.' && c != 'e' && c != 'E' && c != 'L')
213            .unwrap_or(remaining.len());
214    }
215
216    // Keyword or identifier
217    if ch.is_ascii_alphabetic() || ch == '.' || ch == '_' {
218        return remaining
219            .find(|c: char| !c.is_ascii_alphanumeric() && c != '.' && c != '_')
220            .unwrap_or(remaining.len());
221    }
222
223    // Multi-char operators
224    for op in &[
225        "<<-", "<-", "->>", "->", "|>", "||", "&&", "==", "!=", ">=", "<=", "%%", "**",
226    ] {
227        if remaining.starts_with(op) {
228            return op.len();
229        }
230    }
231
232    // Single char
233    ch.len_utf8()
234}
235
236/// Map a pest grammar rule to a human-readable description.
237fn humanize_rule(rule: &Rule) -> &'static str {
238    match rule {
239        Rule::expr | Rule::unary_expr | Rule::primary_expr => "an expression",
240        Rule::ident | Rule::plain_ident | Rule::dotted_ident => "a variable name",
241        Rule::number | Rule::decimal_number | Rule::hex_number => "a number",
242        Rule::string => "a string",
243        Rule::block => "a block `{ ... }`",
244        Rule::paren_expr => "a parenthesized expression",
245        Rule::if_expr => "an if-expression",
246        Rule::for_expr => "a for-loop",
247        Rule::while_expr => "a while-loop",
248        Rule::function_def => "a function definition",
249        Rule::param_list => "function parameters",
250        Rule::arg_list => "function arguments",
251        Rule::eq_assign_op => "'='",
252        Rule::left_assign_op => "'<-'",
253        Rule::right_assign_op => "'->'",
254        Rule::or_op => "'|' or '||'",
255        Rule::and_op => "'&' or '&&'",
256        Rule::compare_op => "a comparison operator",
257        Rule::add_op => "'+' or '-'",
258        Rule::mul_op => "'*' or '/'",
259        Rule::special_op => "a special operator (%%,  %in%, etc.)",
260        Rule::pipe_op => "'|>'",
261        Rule::power_op => "'^'",
262        Rule::EOI => "end of input",
263        _ => "an expression",
264    }
265}
266
267/// Classify the token found at a byte offset in the source.
268fn classify_token(source: &str, offset: usize) -> String {
269    let remaining = &source[offset..];
270    if remaining.is_empty() {
271        return "end of input".to_string();
272    }
273
274    let ch = remaining
275        .chars()
276        .next()
277        .expect("non-empty string has a first char");
278
279    // String literal
280    if ch == '"' || ch == '\'' {
281        return "string constant".to_string();
282    }
283
284    // Number
285    if ch.is_ascii_digit()
286        || (ch == '.' && remaining.len() > 1 && remaining.as_bytes()[1].is_ascii_digit())
287    {
288        let end = remaining
289            .find(|c: char| !c.is_ascii_digit() && c != '.' && c != 'e' && c != 'E' && c != 'L')
290            .unwrap_or(remaining.len());
291        let token = &remaining[..end];
292        return format!("numeric constant {}", token);
293    }
294
295    // Keyword or identifier
296    if ch.is_ascii_alphabetic() || ch == '.' || ch == '_' {
297        let end = remaining
298            .find(|c: char| !c.is_ascii_alphanumeric() && c != '.' && c != '_')
299            .unwrap_or(remaining.len());
300        let word = &remaining[..end];
301        return match word {
302            "if" | "else" | "for" | "in" | "while" | "repeat" | "function" | "return" | "break"
303            | "next" | "TRUE" | "FALSE" | "NULL" | "NA" | "Inf" | "NaN" => format!("'{}'", word),
304            _ => format!("symbol '{}'", word),
305        };
306    }
307
308    // Operator or punctuation
309    // Check multi-char operators first
310    if remaining.starts_with("<<-") {
311        return "'<<-'".to_string();
312    }
313    if remaining.starts_with("<-") {
314        return "'<-'".to_string();
315    }
316    if remaining.starts_with("->>") {
317        return "'->>'".to_string();
318    }
319    if remaining.starts_with("->") {
320        return "'->'".to_string();
321    }
322    if remaining.starts_with("|>") {
323        return "'|>'".to_string();
324    }
325    if remaining.starts_with("||") {
326        return "'||'".to_string();
327    }
328    if remaining.starts_with("&&") {
329        return "'&&'".to_string();
330    }
331    if remaining.starts_with("==") {
332        return "'=='".to_string();
333    }
334    if remaining.starts_with("!=") {
335        return "'!='".to_string();
336    }
337    if remaining.starts_with(">=") {
338        return "'>='".to_string();
339    }
340    if remaining.starts_with("<=") {
341        return "'<='".to_string();
342    }
343    if remaining.starts_with("%%") {
344        return "'%%'".to_string();
345    }
346    if remaining.starts_with("**") {
347        return "'**'".to_string();
348    }
349
350    format!("'{}'", ch)
351}
352
353/// Build context string showing input up to the error, truncated to ~40 chars.
354fn build_context(source_line: &str, col: usize) -> String {
355    // col is a byte offset — clamp to the nearest char boundary
356    let end = floor_char_boundary(source_line, col.min(source_line.len()));
357    let context = &source_line[..end];
358    if context.len() > 40 {
359        let start = ceil_char_boundary(context, context.len() - 37);
360        format!("...{}", &context[start..])
361    } else {
362        context.to_string()
363    }
364}
365
366/// Largest byte index <= pos that is a char boundary.
367fn floor_char_boundary(s: &str, pos: usize) -> usize {
368    let pos = pos.min(s.len());
369    let mut i = pos;
370    while i > 0 && !s.is_char_boundary(i) {
371        i -= 1;
372    }
373    i
374}
375
376/// Smallest byte index >= pos that is a char boundary.
377fn ceil_char_boundary(s: &str, pos: usize) -> usize {
378    let mut i = pos.min(s.len());
379    while i < s.len() && !s.is_char_boundary(i) {
380        i += 1;
381    }
382    i
383}
384
385/// Try to suggest a fix based on what was expected.
386fn suggest_from_expected(
387    e: &pest::error::Error<Rule>,
388    found: &str,
389    source_line: &str,
390    col: usize,
391) -> Option<String> {
392    let expected_rules: Vec<Rule> = match &e.variant {
393        pest::error::ErrorVariant::ParsingError { positives, .. } => positives.clone(),
394        _ => vec![],
395    };
396
397    // If we found a closing bracket where an expression was expected
398    if (found.contains("')'") || found.contains("'}'") || found.contains("']'"))
399        && expected_rules
400            .iter()
401            .any(|r| matches!(r, Rule::expr | Rule::unary_expr | Rule::primary_expr))
402    {
403        return Some("remove the extra bracket, or add an expression before it".to_string());
404    }
405
406    // If end of input where expression expected
407    if found == "end of input"
408        && expected_rules
409            .iter()
410            .any(|r| matches!(r, Rule::expr | Rule::unary_expr | Rule::primary_expr))
411    {
412        return Some("the expression is incomplete — add the missing part".to_string());
413    }
414
415    // If a value (number, string, symbol) appears where an operator or comma was expected,
416    // this likely means a missing comma inside a function call or vector
417    if (found.starts_with("numeric constant")
418        || found.starts_with("string constant")
419        || found.starts_with("symbol"))
420        && is_inside_call_or_vector(source_line, col)
421    {
422        return Some("did you forget a comma between arguments?".to_string());
423    }
424
425    // Describe what was expected using human-friendly names
426    if !expected_rules.is_empty() {
427        let unique: Vec<&str> = expected_rules
428            .iter()
429            .map(humanize_rule)
430            .collect::<std::collections::HashSet<_>>()
431            .into_iter()
432            .collect();
433        if unique.len() == 1 {
434            return Some(format!("expected {}", unique[0]));
435        }
436        if unique.len() <= 3 {
437            return Some(format!("expected one of: {}", unique.join(", ")));
438        }
439    }
440
441    None
442}
443
444/// Heuristic: is the error position inside a function call or c() vector?
445fn is_inside_call_or_vector(source_line: &str, col: usize) -> bool {
446    // Check if there's an unmatched `(` before the error position
447    let before = &source_line[..col.min(source_line.len())];
448    let open_parens = before.chars().filter(|&c| c == '(').count();
449    let close_parens = before.chars().filter(|&c| c == ')').count();
450    open_parens > close_parens
451}
452
453/// Detect common R mistakes and return a tailored ParseError.
454fn detect_common_mistakes(
455    source: &str,
456    source_line: &str,
457    line: usize,
458    col: usize,
459) -> Option<ParseError> {
460    let trimmed = source_line.trim();
461
462    // --- Unterminated string ---
463    // Check if there's an unclosed string in the source (before bracket checks,
464    // since unclosed strings make bracket counting wrong)
465    if let Some(err) = detect_unterminated_string(source) {
466        return Some(err);
467    }
468
469    // --- Missing parentheses around control flow conditions ---
470
471    // `if x > 0` without parentheses
472    if let Some(rest) = trimmed.strip_prefix("if ") {
473        if !rest.starts_with('(') {
474            let err_col = source_line.find("if ").unwrap_or(0) + 4;
475            return Some(ParseError {
476                message: "missing parentheses around `if` condition".to_string(),
477                line,
478                col: err_col,
479                source_line: source_line.to_string(),
480                filename: None,
481                suggestion: Some("R requires parentheses: `if (condition) ...`".to_string()),
482                source_code: None,
483                byte_offset: line_col_to_byte_offset(source, line, err_col),
484                span_length: 1,
485            });
486        }
487    }
488
489    // `while x > 0` without parentheses
490    if let Some(rest) = trimmed.strip_prefix("while ") {
491        if !rest.starts_with('(') {
492            let err_col = source_line.find("while ").unwrap_or(0) + 7;
493            return Some(ParseError {
494                message: "missing parentheses around `while` condition".to_string(),
495                line,
496                col: err_col,
497                source_line: source_line.to_string(),
498                filename: None,
499                suggestion: Some("R requires parentheses: `while (condition) ...`".to_string()),
500                source_code: None,
501                byte_offset: line_col_to_byte_offset(source, line, err_col),
502                span_length: 1,
503            });
504        }
505    }
506
507    // `for i in 1:10` without parentheses
508    if let Some(rest) = trimmed.strip_prefix("for ") {
509        if !rest.starts_with('(') {
510            let err_col = source_line.find("for ").unwrap_or(0) + 5;
511            return Some(ParseError {
512                message: "missing parentheses around `for` clause".to_string(),
513                line,
514                col: err_col,
515                source_line: source_line.to_string(),
516                filename: None,
517                suggestion: Some("R requires parentheses: `for (var in sequence) ...`".to_string()),
518                source_code: None,
519                byte_offset: line_col_to_byte_offset(source, line, err_col),
520                span_length: 1,
521            });
522        }
523    }
524
525    // --- `function` without parameter list ---
526    // `function { ... }` or `function x + 1`
527    if let Some(rest) = trimmed.strip_prefix("function") {
528        let rest = rest.trim_start();
529        if !rest.starts_with('(') && !rest.is_empty() {
530            let err_col = source_line.find("function").unwrap_or(0) + 1;
531            return Some(ParseError {
532                message: "`function` requires a parameter list".to_string(),
533                line,
534                col: err_col,
535                source_line: source_line.to_string(),
536                filename: None,
537                suggestion: Some(
538                    "use `function(...) body` — even with no parameters, the parentheses are required: `function() ...`"
539                        .to_string(),
540                ),
541                source_code: None,
542                byte_offset: line_col_to_byte_offset(source, line, err_col),
543                span_length: "function".len(),
544            });
545        }
546    }
547
548    // --- `for (i 1:10)` — missing `in` keyword ---
549    if let Some(for_content) = extract_for_parens(trimmed) {
550        // Check if the content after the variable name has `in`
551        let parts: Vec<&str> = for_content.splitn(2, char::is_whitespace).collect();
552        if parts.len() >= 2 {
553            let after_var = parts[1].trim_start();
554            if !after_var.starts_with("in") {
555                let err_col = source_line.find("for").unwrap_or(0) + 1;
556                return Some(ParseError {
557                    message: "missing `in` keyword in `for` loop".to_string(),
558                    line,
559                    col: err_col,
560                    source_line: source_line.to_string(),
561                    filename: None,
562                    suggestion: Some(format!("use `for ({} in {}) ...`", parts[0], after_var)),
563                    source_code: None,
564                    byte_offset: line_col_to_byte_offset(source, line, err_col),
565                    span_length: "for".len(),
566                });
567            }
568        }
569    }
570
571    // --- Unmatched brackets ---
572    let (opens, closes) = count_brackets(source);
573
574    // More closes than opens
575    if closes.0 > opens.0 {
576        return Some(ParseError {
577            message: "unexpected `)` without matching `(`".to_string(),
578            line,
579            col,
580            source_line: source_line.to_string(),
581            filename: None,
582            suggestion: Some("remove the extra `)` or add a matching `(`".to_string()),
583            source_code: None,
584            byte_offset: line_col_to_byte_offset(source, line, col),
585            span_length: 1,
586        });
587    }
588    if closes.1 > opens.1 {
589        return Some(ParseError {
590            message: "unexpected `}` without matching `{`".to_string(),
591            line,
592            col,
593            source_line: source_line.to_string(),
594            filename: None,
595            suggestion: Some("remove the extra `}` or add a matching `{`".to_string()),
596            source_code: None,
597            byte_offset: line_col_to_byte_offset(source, line, col),
598            span_length: 1,
599        });
600    }
601    if closes.2 > opens.2 {
602        return Some(ParseError {
603            message: "unexpected `]` without matching `[`".to_string(),
604            line,
605            col,
606            source_line: source_line.to_string(),
607            filename: None,
608            suggestion: Some("remove the extra `]` or add a matching `[`".to_string()),
609            source_code: None,
610            byte_offset: line_col_to_byte_offset(source, line, col),
611            span_length: 1,
612        });
613    }
614
615    // More opens than closes — find where the unmatched bracket is
616    if opens.0 > closes.0 {
617        let (bl, bc) = find_unmatched_open(source, '(', ')');
618        let bline = source.lines().nth(bl - 1).unwrap_or("").to_string();
619        return Some(ParseError {
620            message: "unmatched `(` — expected a closing `)`".to_string(),
621            line: bl,
622            col: bc,
623            source_line: bline,
624            filename: None,
625            suggestion: Some("add a closing `)` to match this opening `(`".to_string()),
626            source_code: None,
627            byte_offset: line_col_to_byte_offset(source, bl, bc),
628            span_length: 1,
629        });
630    }
631    if opens.1 > closes.1 {
632        let (bl, bc) = find_unmatched_open(source, '{', '}');
633        let bline = source.lines().nth(bl - 1).unwrap_or("").to_string();
634        return Some(ParseError {
635            message: "unmatched `{` — expected a closing `}`".to_string(),
636            line: bl,
637            col: bc,
638            source_line: bline,
639            filename: None,
640            suggestion: Some("add a closing `}` to match this opening `{`".to_string()),
641            source_code: None,
642            byte_offset: line_col_to_byte_offset(source, bl, bc),
643            span_length: 1,
644        });
645    }
646    if opens.2 > closes.2 {
647        // Check for `[[` without `]]`
648        let has_double_bracket = source.contains("[[");
649        let (bl, bc) = find_unmatched_open(source, '[', ']');
650        let bline = source.lines().nth(bl - 1).unwrap_or("").to_string();
651        let msg = if has_double_bracket {
652            "unmatched `[[` — expected a closing `]]`"
653        } else {
654            "unmatched `[` — expected a closing `]`"
655        };
656        let suggestion = if has_double_bracket {
657            "use `]]` to close double-bracket indexing (not just `]`)"
658        } else {
659            "add a closing `]` to match this opening `[`"
660        };
661        let span_len = if has_double_bracket { 2 } else { 1 };
662        return Some(ParseError {
663            message: msg.to_string(),
664            line: bl,
665            col: bc,
666            source_line: bline,
667            filename: None,
668            suggestion: Some(suggestion.to_string()),
669            source_code: None,
670            byte_offset: line_col_to_byte_offset(source, bl, bc),
671            span_length: span_len,
672        });
673    }
674
675    None
676}
677
678/// Detect unterminated strings in the source.
679fn detect_unterminated_string(source: &str) -> Option<ParseError> {
680    let mut in_string = false;
681    let mut string_char = ' ';
682    let mut string_start_line = 0;
683    let mut string_start_col = 0;
684    let mut string_start_byte = 0;
685    let mut prev = ' ';
686    let mut cur_line = 1usize;
687    let mut cur_col = 1usize;
688    let mut cur_byte = 0usize;
689    let mut in_comment = false;
690
691    for ch in source.chars() {
692        if in_comment {
693            if ch == '\n' {
694                in_comment = false;
695                cur_line += 1;
696                cur_col = 1;
697            } else {
698                cur_col += 1;
699            }
700            cur_byte += ch.len_utf8();
701            prev = ch;
702            continue;
703        }
704        if in_string {
705            if ch == '\n' {
706                // Strings in R can span lines, but only raw strings
707                // Regular strings can't contain unescaped newlines
708                // This is an unterminated string
709                let source_line = source
710                    .lines()
711                    .nth(string_start_line - 1)
712                    .unwrap_or("")
713                    .to_string();
714                return Some(ParseError {
715                    message: "unterminated string".to_string(),
716                    line: string_start_line,
717                    col: string_start_col,
718                    source_line,
719                    filename: None,
720                    suggestion: Some(format!(
721                        "add a closing `{}` to complete the string",
722                        string_char
723                    )),
724                    source_code: None,
725                    byte_offset: string_start_byte,
726                    span_length: cur_byte - string_start_byte,
727                });
728            }
729            if ch == string_char && prev != '\\' {
730                in_string = false;
731            }
732            cur_col += 1;
733            cur_byte += ch.len_utf8();
734            prev = ch;
735            continue;
736        }
737        match ch {
738            '#' => in_comment = true,
739            '"' | '\'' => {
740                in_string = true;
741                string_char = ch;
742                string_start_line = cur_line;
743                string_start_col = cur_col;
744                string_start_byte = cur_byte;
745            }
746            '\n' => {
747                cur_line += 1;
748                cur_col = 0; // will be incremented below
749            }
750            _ => {}
751        }
752        cur_col += 1;
753        cur_byte += ch.len_utf8();
754        prev = ch;
755    }
756
757    // String still open at EOF
758    if in_string {
759        let source_line = source
760            .lines()
761            .nth(string_start_line - 1)
762            .unwrap_or("")
763            .to_string();
764        return Some(ParseError {
765            message: "unterminated string".to_string(),
766            line: string_start_line,
767            col: string_start_col,
768            source_line,
769            filename: None,
770            suggestion: Some(format!(
771                "add a closing `{}` to complete the string",
772                string_char
773            )),
774            source_code: None,
775            byte_offset: string_start_byte,
776            span_length: source.len() - string_start_byte,
777        });
778    }
779
780    None
781}
782
783/// Find the line and column of the first unmatched opening bracket.
784fn find_unmatched_open(source: &str, open: char, close: char) -> (usize, usize) {
785    let mut stack: Vec<(usize, usize)> = Vec::new();
786    let mut in_string = false;
787    let mut string_char = ' ';
788    let mut prev = ' ';
789    let mut in_comment = false;
790    let mut cur_line = 1usize;
791    let mut cur_col = 1usize;
792
793    for ch in source.chars() {
794        if in_comment {
795            if ch == '\n' {
796                in_comment = false;
797                cur_line += 1;
798                cur_col = 1;
799                prev = ch;
800                continue;
801            }
802            cur_col += 1;
803            prev = ch;
804            continue;
805        }
806        if in_string {
807            if ch == string_char && prev != '\\' {
808                in_string = false;
809            }
810            if ch == '\n' {
811                cur_line += 1;
812                cur_col = 0;
813            }
814            cur_col += 1;
815            prev = ch;
816            continue;
817        }
818        match ch {
819            '#' => in_comment = true,
820            '"' | '\'' => {
821                in_string = true;
822                string_char = ch;
823            }
824            c if c == open => stack.push((cur_line, cur_col)),
825            c if c == close => {
826                stack.pop();
827            }
828            '\n' => {
829                cur_line += 1;
830                cur_col = 0;
831            }
832            _ => {}
833        }
834        cur_col += 1;
835        prev = ch;
836    }
837
838    // The first remaining item in the stack is the unmatched open
839    stack.into_iter().next().unwrap_or((1, 1))
840}
841
842/// Extract the content inside `for (...)` parentheses, if present.
843fn extract_for_parens(trimmed: &str) -> Option<&str> {
844    let rest = trimmed.strip_prefix("for")?;
845    let rest = rest.trim_start();
846    let rest = rest.strip_prefix('(')?;
847    // Find matching close paren (simple, not nested-aware for this heuristic)
848    let end = rest.find(')')?;
849    Some(&rest[..end])
850}
851
852/// Count opening and closing brackets in source, respecting strings and comments.
853/// Returns ((parens, braces, brackets), (parens, braces, brackets))
854fn count_brackets(source: &str) -> ((i32, i32, i32), (i32, i32, i32)) {
855    let mut opens = (0i32, 0i32, 0i32);
856    let mut closes = (0i32, 0i32, 0i32);
857    let mut in_string = false;
858    let mut string_char = ' ';
859    let mut prev = ' ';
860    let mut in_comment = false;
861
862    for ch in source.chars() {
863        if in_comment {
864            if ch == '\n' {
865                in_comment = false;
866            }
867            prev = ch;
868            continue;
869        }
870        if in_string {
871            if ch == string_char && prev != '\\' {
872                in_string = false;
873            }
874            prev = ch;
875            continue;
876        }
877        match ch {
878            '#' => in_comment = true,
879            '"' | '\'' => {
880                in_string = true;
881                string_char = ch;
882            }
883            '(' => opens.0 += 1,
884            ')' => closes.0 += 1,
885            '{' => opens.1 += 1,
886            '}' => closes.1 += 1,
887            '[' => opens.2 += 1,
888            ']' => closes.2 += 1,
889            _ => {}
890        }
891        prev = ch;
892    }
893    (opens, closes)
894}