Skip to main content

r/repl/
highlighter.rs

1//! R syntax highlighting for the REPL.
2//!
3//! Colors R keywords, strings, numbers, comments, and operators as the user
4//! types, providing immediate visual feedback about syntax structure.
5
6use nu_ansi_term::{Color, Style};
7use reedline::{Highlighter, StyledText};
8
9pub struct RHighlighter;
10
11// region: token types
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14enum TokenKind {
15    Keyword,
16    Literal, // TRUE, FALSE, NULL, NA variants, Inf, NaN
17    String,
18    Number,
19    Comment,
20    Operator,
21    Bracket,
22    Normal,
23}
24
25impl TokenKind {
26    fn style(self) -> Style {
27        match self {
28            TokenKind::Keyword => Style::new().bold().fg(Color::Magenta),
29            TokenKind::Literal => Style::new().fg(Color::Yellow),
30            TokenKind::String => Style::new().fg(Color::Green),
31            TokenKind::Number => Style::new().fg(Color::Cyan),
32            TokenKind::Comment => Style::new().italic().fg(Color::DarkGray),
33            TokenKind::Operator => Style::new().fg(Color::Red),
34            TokenKind::Bracket => Style::new().bold(),
35            TokenKind::Normal => Style::new(),
36        }
37    }
38}
39
40// endregion
41
42// region: keyword classification
43
44fn classify_word(word: &str) -> TokenKind {
45    match word {
46        // R keywords
47        "if" | "else" | "for" | "while" | "repeat" | "function" | "return" | "next" | "break"
48        | "in" | "library" | "require" => TokenKind::Keyword,
49
50        // Literal constants
51        "TRUE" | "FALSE" | "NULL" | "NA" | "NA_integer_" | "NA_real_" | "NA_complex_"
52        | "NA_character_" | "Inf" | "NaN" | "T" | "F" => TokenKind::Literal,
53
54        _ => TokenKind::Normal,
55    }
56}
57
58// endregion
59
60// region: raw string detection
61
62/// Check if position `i` starts an R 4.0+ raw string like `r"(...)"`, `R"[...]"`,
63/// `r'(...)'`, `R'{...}'`, etc. Returns the closing delimiter char and the number
64/// of chars consumed for the prefix (e.g. `r"(` = 3 chars) if it matches.
65fn raw_string_prefix(chars: &[char], i: usize) -> Option<(char, usize)> {
66    let len = chars.len();
67    if i >= len {
68        return None;
69    }
70    let c = chars[i];
71    if c != 'r' && c != 'R' {
72        return None;
73    }
74    if i + 2 >= len {
75        return None;
76    }
77    let quote = chars[i + 1];
78    if quote != '"' && quote != '\'' {
79        return None;
80    }
81    let open = chars[i + 2];
82    let close = match open {
83        '(' => ')',
84        '[' => ']',
85        '{' => '}',
86        _ => return None,
87    };
88    // The closing delimiter is: close_bracket followed by the matching quote
89    Some((close, 3))
90}
91
92// endregion
93
94// region: tokenizer
95
96impl Highlighter for RHighlighter {
97    fn highlight(&self, line: &str, _cursor: usize) -> StyledText {
98        let mut styled = StyledText::new();
99        let chars: Vec<char> = line.chars().collect();
100        let len = chars.len();
101        let mut i = 0;
102
103        while i < len {
104            let c = chars[i];
105
106            // Comment: # to end of line
107            if c == '#' {
108                let start = i;
109                while i < len && chars[i] != '\n' {
110                    i += 1;
111                }
112                let text: String = chars[start..i].iter().collect();
113                styled.push((TokenKind::Comment.style(), text));
114                continue;
115            }
116
117            // R 4.0+ raw strings: r"(...)", R"[...]", r'{...}', etc.
118            // Must be checked before regular identifiers since 'r' and 'R' are valid
119            // identifier starts.
120            if let Some((close_bracket, prefix_len)) = raw_string_prefix(&chars, i) {
121                let start = i;
122                let quote = chars[i + 1];
123                i += prefix_len; // skip r"( or R"[ etc.
124                                 // Scan for close_bracket followed by matching quote
125                while i < len {
126                    if chars[i] == close_bracket && i + 1 < len && chars[i + 1] == quote {
127                        i += 2; // skip )' or }" etc.
128                        break;
129                    }
130                    i += 1;
131                }
132                let text: String = chars[start..i].iter().collect();
133                styled.push((TokenKind::String.style(), text));
134                continue;
135            }
136
137            // Strings: "..." or '...'
138            if c == '"' || c == '\'' {
139                let quote = c;
140                let start = i;
141                i += 1;
142                while i < len {
143                    if chars[i] == '\\' && i + 1 < len {
144                        i += 2; // skip escaped char
145                    } else if chars[i] == quote {
146                        i += 1;
147                        break;
148                    } else {
149                        i += 1;
150                    }
151                }
152                let text: String = chars[start..i].iter().collect();
153                styled.push((TokenKind::String.style(), text));
154                continue;
155            }
156
157            // Numbers: digits, hex (0x...), with optional L/i suffix
158            if c.is_ascii_digit() || (c == '.' && i + 1 < len && chars[i + 1].is_ascii_digit()) {
159                let start = i;
160                if c == '0' && i + 1 < len && (chars[i + 1] == 'x' || chars[i + 1] == 'X') {
161                    i += 2; // skip 0x
162                    while i < len && chars[i].is_ascii_hexdigit() {
163                        i += 1;
164                    }
165                } else {
166                    while i < len && (chars[i].is_ascii_digit() || chars[i] == '.') {
167                        i += 1;
168                    }
169                    // Exponent
170                    if i < len && (chars[i] == 'e' || chars[i] == 'E') {
171                        i += 1;
172                        if i < len && (chars[i] == '+' || chars[i] == '-') {
173                            i += 1;
174                        }
175                        while i < len && chars[i].is_ascii_digit() {
176                            i += 1;
177                        }
178                    }
179                }
180                // L (integer) or i (complex) suffix
181                if i < len && (chars[i] == 'L' || chars[i] == 'i') {
182                    i += 1;
183                }
184                let text: String = chars[start..i].iter().collect();
185                styled.push((TokenKind::Number.style(), text));
186                continue;
187            }
188
189            // Identifiers and keywords
190            if c.is_alphabetic() || c == '.' || c == '_' {
191                let start = i;
192                while i < len && (chars[i].is_alphanumeric() || chars[i] == '.' || chars[i] == '_')
193                {
194                    i += 1;
195                }
196                let word: String = chars[start..i].iter().collect();
197                let kind = classify_word(&word);
198                styled.push((kind.style(), word));
199                continue;
200            }
201
202            // Lambda shorthand: \(x) x + 1
203            if c == '\\' && i + 1 < len && chars[i + 1] == '(' {
204                styled.push((TokenKind::Keyword.style(), "\\".to_string()));
205                i += 1;
206                continue;
207            }
208
209            // Backtick-quoted identifiers
210            if c == '`' {
211                let start = i;
212                i += 1;
213                while i < len && chars[i] != '`' {
214                    i += 1;
215                }
216                if i < len {
217                    i += 1; // closing backtick
218                }
219                let text: String = chars[start..i].iter().collect();
220                styled.push((TokenKind::String.style(), text));
221                continue;
222            }
223
224            // Multi-character operators
225            if i + 1 < len {
226                let two: String = chars[i..i + 2].iter().collect();
227                match two.as_str() {
228                    "<-" | "<<" | "->" | ">>" | "|>" | "||" | "&&" | "!=" | "==" | "<=" | ">="
229                    | "%%" | "::" => {
230                        // Check for <<- and ->> and :::
231                        if i + 2 < len {
232                            let three: String = chars[i..i + 3].iter().collect();
233                            if three == "<<-" || three == "->>" || three == ":::" {
234                                styled.push((TokenKind::Operator.style(), three));
235                                i += 3;
236                                continue;
237                            }
238                        }
239                        styled.push((TokenKind::Operator.style(), two));
240                        i += 2;
241                        continue;
242                    }
243                    _ => {}
244                }
245
246                // %any% operators
247                if c == '%' {
248                    let start = i;
249                    i += 1;
250                    while i < len && chars[i] != '%' {
251                        i += 1;
252                    }
253                    if i < len {
254                        i += 1; // closing %
255                    }
256                    let text: String = chars[start..i].iter().collect();
257                    styled.push((TokenKind::Operator.style(), text));
258                    continue;
259                }
260            }
261
262            // Single-character operators
263            if matches!(
264                c,
265                '+' | '-'
266                    | '*'
267                    | '/'
268                    | '^'
269                    | '~'
270                    | '!'
271                    | '<'
272                    | '>'
273                    | '='
274                    | '&'
275                    | '|'
276                    | ':'
277                    | '$'
278                    | '@'
279                    | '?'
280            ) {
281                styled.push((TokenKind::Operator.style(), c.to_string()));
282                i += 1;
283                continue;
284            }
285
286            // Brackets and parentheses — bold for visibility
287            if matches!(c, '(' | ')' | '[' | ']' | '{' | '}') {
288                styled.push((TokenKind::Bracket.style(), c.to_string()));
289                i += 1;
290                continue;
291            }
292
293            // Everything else (whitespace, commas, semicolons, etc.)
294            styled.push((Style::new(), c.to_string()));
295            i += 1;
296        }
297
298        styled
299    }
300}
301
302// endregion