Skip to main content

r/parser/
literals.rs

1//! Literal parsing helpers — numbers, strings, complex values.
2
3use pest::iterators::Pair;
4
5use super::ast::Expr;
6use super::Rule;
7
8// region: Complex numbers
9
10pub(super) fn parse_complex(pair: Pair<Rule>) -> Expr {
11    let s = pair.as_str();
12    // Remove trailing 'i'
13    let num_str = &s[..s.len() - 1];
14    let val = num_str.parse::<f64>().unwrap_or(0.0);
15    Expr::Complex(val)
16}
17
18// endregion
19
20// region: Numeric literals
21
22pub(super) fn parse_number(pair: Pair<Rule>) -> Expr {
23    let s = pair.as_str();
24    // Integer literal (ends with L)
25    if let Some(num_str) = s.strip_suffix('L') {
26        if num_str.starts_with("0x") || num_str.starts_with("0X") {
27            return parse_hex_int(num_str);
28        }
29        if let Ok(val) = num_str.parse::<i64>() {
30            return Expr::Integer(val);
31        }
32        if let Ok(val) = num_str.parse::<f64>() {
33            // Intentional truncation: R `as.integer()` semantics for e.g. 1e5L
34            return Expr::Integer(crate::interpreter::coerce::f64_to_i64(val).unwrap_or(0));
35        }
36    }
37    // Hex (without L)
38    if s.starts_with("0x") || s.starts_with("0X") {
39        return parse_hex_float(s);
40    }
41    // Float / bare integer
42    if let Ok(val) = s.parse::<f64>() {
43        // In R, bare integers are still doubles unless suffixed with L
44        return Expr::Double(val);
45    }
46    Expr::Double(0.0)
47}
48
49fn parse_hex_int(num_str: &str) -> Expr {
50    let hex_part = &num_str[2..];
51    // Check for hex float with '.' or 'p'
52    if hex_part.contains('.') || hex_part.contains('p') || hex_part.contains('P') {
53        let val = parse_hex_float_value(num_str);
54        // Intentional truncation: hex float -> integer literal (e.g. 0x1.0p4L)
55        return Expr::Integer(crate::interpreter::coerce::f64_to_i64(val).unwrap_or(0));
56    }
57    let val = i64::from_str_radix(hex_part, 16).unwrap_or(0);
58    Expr::Integer(val)
59}
60
61fn parse_hex_float(s: &str) -> Expr {
62    let val = parse_hex_float_value(s);
63    Expr::Double(val)
64}
65
66fn parse_hex_float_value(s: &str) -> f64 {
67    let s = s.strip_suffix('L').unwrap_or(s);
68    let hex_part = &s[2..]; // skip 0x/0X
69
70    if let Some(p_pos) = hex_part.find(['p', 'P']) {
71        let mantissa_str = &hex_part[..p_pos];
72        let exp_str = &hex_part[p_pos + 1..];
73
74        let mantissa = if let Some(dot_pos) = mantissa_str.find('.') {
75            let int_part = &mantissa_str[..dot_pos];
76            let frac_part = &mantissa_str[dot_pos + 1..];
77            let int_val = if int_part.is_empty() {
78                0u64
79            } else {
80                u64::from_str_radix(int_part, 16).unwrap_or(0)
81            };
82            let frac_val = if frac_part.is_empty() {
83                0.0
84            } else {
85                let frac_int = u64::from_str_radix(frac_part, 16).unwrap_or(0);
86                // u64 -> f64 may lose precision for values > 2^53, acceptable for hex literals
87                let frac_digits = i32::try_from(frac_part.len()).unwrap_or(0);
88                crate::interpreter::coerce::u64_to_f64(frac_int) / 16f64.powi(frac_digits)
89            };
90            crate::interpreter::coerce::u64_to_f64(int_val) + frac_val
91        } else {
92            crate::interpreter::coerce::u64_to_f64(
93                u64::from_str_radix(mantissa_str, 16).unwrap_or(0),
94            )
95        };
96
97        let exp: i32 = exp_str.parse().unwrap_or(0);
98        mantissa * 2f64.powi(exp)
99    } else if let Some(dot_pos) = hex_part.find('.') {
100        // Hex with dot but no exponent
101        let int_part = &hex_part[..dot_pos];
102        let frac_part = &hex_part[dot_pos + 1..];
103        let int_val = if int_part.is_empty() {
104            0u64
105        } else {
106            u64::from_str_radix(int_part, 16).unwrap_or(0)
107        };
108        let frac_val = if frac_part.is_empty() {
109            0.0
110        } else {
111            let frac_int = u64::from_str_radix(frac_part, 16).unwrap_or(0);
112            let frac_digits = i32::try_from(frac_part.len()).unwrap_or(0);
113            crate::interpreter::coerce::u64_to_f64(frac_int) / 16f64.powi(frac_digits)
114        };
115        crate::interpreter::coerce::u64_to_f64(int_val) + frac_val
116    } else {
117        crate::interpreter::coerce::i64_to_f64(i64::from_str_radix(hex_part, 16).unwrap_or(0))
118    }
119}
120
121// endregion
122
123// region: String literals
124
125pub(super) fn parse_raw_string(pair: Pair<Rule>) -> Expr {
126    let s = pair.as_str();
127    // r"(...)" or R'(...)' etc — find the body between outer quotes
128    // Also handles dash delimiters: r"---(text)---"
129    let quote_pos = s
130        .find('"')
131        .or_else(|| s.find('\''))
132        .expect("raw string literal must contain a quote character");
133    let inner = &s[quote_pos + 1..s.len() - 1]; // between outer quotes
134
135    // Strip leading dashes, then the open delimiter, then trailing close + dashes
136    let inner = inner.trim_start_matches('-');
137    let (open, close) = if inner.starts_with('(') {
138        ('(', ')')
139    } else if inner.starts_with('[') {
140        ('[', ']')
141    } else if inner.starts_with('{') {
142        ('{', '}')
143    } else {
144        return Expr::String(inner.to_string());
145    };
146    // Strip open delimiter from start
147    let inner = &inner[1..];
148    // Find the matching close delimiter (last occurrence of close + dashes)
149    let content = inner.trim_end_matches('-');
150    let content = if content.ends_with(close) {
151        &content[..content.len() - 1]
152    } else {
153        content
154    };
155    let _ = open; // suppress unused warning
156    Expr::String(content.to_string())
157}
158
159pub(super) fn parse_string_value(pair: Pair<Rule>) -> String {
160    let s = pair.as_str();
161    let inner = &s[1..s.len() - 1];
162    unescape_string(inner)
163}
164
165pub(super) fn parse_string(pair: Pair<Rule>) -> Expr {
166    Expr::String(parse_string_value(pair))
167}
168
169pub(super) fn unescape_string(s: &str) -> String {
170    let mut result = String::new();
171    let mut chars = s.chars();
172    while let Some(c) = chars.next() {
173        if c == '\\' {
174            match chars.next() {
175                Some('n') => result.push('\n'),
176                Some('t') => result.push('\t'),
177                Some('r') => result.push('\r'),
178                Some('\\') => result.push('\\'),
179                Some('"') => result.push('"'),
180                Some('\'') => result.push('\''),
181                Some('0') => result.push('\0'),
182                Some('a') => result.push('\x07'),
183                Some('b') => result.push('\x08'),
184                Some('f') => result.push('\x0C'),
185                Some('v') => result.push('\x0B'),
186                Some('x') => {
187                    let hex: String = chars.clone().take(2).collect();
188                    if let Ok(val) = u8::from_str_radix(&hex, 16) {
189                        result.push(val as char);
190                        chars.nth(1);
191                    }
192                }
193                Some(other) => {
194                    result.push('\\');
195                    result.push(other);
196                }
197                None => result.push('\\'),
198            }
199        } else {
200            result.push(c);
201        }
202    }
203    result
204}
205
206// endregion