Skip to main content

r/interpreter/builtins/
interp.rs

1//! Interpreter-level builtins — functions that receive `BuiltinContext` so
2//! they can call back into the active interpreter without direct TLS lookups.
3//! Each is auto-registered via `#[interpreter_builtin]`.
4
5use super::CallArgs;
6use crate::interpreter::environment::Environment;
7use crate::interpreter::value::*;
8use crate::interpreter::BuiltinContext;
9use crate::parser::ast::{Arg, BinaryOp, Expr, Param, UnaryOp};
10use minir_macros::interpreter_builtin;
11
12/// Extract `fail_fast` from named args and return the remaining named args.
13/// Default is `false` (collect all errors).
14fn extract_fail_fast(named: &[(String, RValue)]) -> (bool, Vec<(String, RValue)>) {
15    let mut fail_fast = false;
16    let mut remaining = Vec::with_capacity(named.len());
17    for (name, val) in named {
18        if name == "fail_fast" {
19            fail_fast = val
20                .as_vector()
21                .and_then(|v| v.as_logical_scalar())
22                .unwrap_or(false);
23        } else {
24            remaining.push((name.clone(), val.clone()));
25        }
26    }
27    (fail_fast, remaining)
28}
29
30/// Resolve a function specification: accepts an RValue::Function directly,
31/// or a string naming a function to look up in the environment.
32/// Equivalent to R's match.fun().
33fn match_fun(f: &RValue, env: &Environment) -> Result<RValue, RError> {
34    match f {
35        RValue::Function(_) => Ok(f.clone()),
36        RValue::Vector(rv) => match &rv.inner {
37            Vector::Character(s) => {
38                let name = s.first().and_then(|x| x.as_ref()).ok_or_else(|| {
39                    RError::new(
40                        RErrorKind::Argument,
41                        "not a valid function name".to_string(),
42                    )
43                })?;
44                env.get_function(name)
45                    .ok_or_else(|| RError::other(format!("could not find function '{}'", name)))
46            }
47            _ => Err(RError::new(
48                RErrorKind::Argument,
49                "FUN is not a function and not a string naming a function".to_string(),
50            )),
51        },
52        _ => Err(RError::new(
53            RErrorKind::Argument,
54            "FUN is not a function and not a string naming a function".to_string(),
55        )),
56    }
57}
58
59fn optional_frame_index(positional: &[RValue], default: i64) -> Result<i64, RError> {
60    match positional.first() {
61        None => Ok(default),
62        Some(value) => value
63            .as_vector()
64            .and_then(|v| v.as_integer_scalar())
65            .ok_or_else(|| {
66                RError::new(
67                    RErrorKind::Argument,
68                    "frame index must be an integer".to_string(),
69                )
70            }),
71    }
72}
73
74fn language_or_null(expr: Option<crate::parser::ast::Expr>) -> RValue {
75    match expr {
76        Some(expr) => RValue::Language(Language::new(expr)),
77        None => RValue::Null,
78    }
79}
80
81// region: S3-dispatching generics (print, format)
82
83/// Get explicit class attributes from an RValue.
84/// Returns an empty vec for objects without a class attribute.
85fn explicit_classes(val: &RValue) -> Vec<String> {
86    match val {
87        RValue::Vector(rv) => rv.class().unwrap_or_default(),
88        RValue::List(list) => {
89            if let Some(RValue::Vector(rv)) = list.get_attr("class") {
90                if let Vector::Character(classes) = &rv.inner {
91                    classes.iter().filter_map(|c| c.clone()).collect()
92                } else {
93                    vec![]
94                }
95            } else {
96                vec![]
97            }
98        }
99        RValue::Language(lang) => lang.class().unwrap_or_default(),
100        _ => vec![],
101    }
102}
103
104/// Try S3 dispatch for a generic function. Returns `Ok(Some(result))` if a
105/// method was found and called, `Ok(None)` if no method exists (caller should
106/// fall through to default behavior).
107fn try_s3_dispatch(
108    generic: &str,
109    args: &[RValue],
110    named: &[(String, RValue)],
111    context: &BuiltinContext,
112) -> Result<Option<RValue>, RError> {
113    let Some(val) = args.first() else {
114        return Ok(None);
115    };
116    let classes = explicit_classes(val);
117    if classes.is_empty() {
118        return Ok(None);
119    }
120    let env = context.env();
121    let interp = context.interpreter();
122
123    // First pass: look up generic.class in the environment chain
124    for class in &classes {
125        let method_name = format!("{generic}.{class}");
126        if let Some(method) = env.get(&method_name) {
127            let result = interp.call_function(&method, args, named, env)?;
128            return Ok(Some(result));
129        }
130    }
131
132    // Second pass: check the per-interpreter S3 method registry
133    for class in &classes {
134        if let Some(method) = interp.lookup_s3_method(generic, class) {
135            let result = interp.call_function(&method, args, named, env)?;
136            return Ok(Some(result));
137        }
138    }
139
140    Ok(None)
141}
142
143/// Print a value to stdout (S3 generic).
144///
145/// @param x the value to print
146/// @return x, invisibly
147#[interpreter_builtin(min_args = 1)]
148fn interp_print(
149    args: &[RValue],
150    named: &[(String, RValue)],
151    context: &BuiltinContext,
152) -> Result<RValue, RError> {
153    // Try S3 dispatch (print.Date, print.POSIXct, print.data.frame, etc.)
154    if let Some(result) = try_s3_dispatch("print", args, named, context)? {
155        return Ok(result);
156    }
157    // Default print
158    if let Some(val) = args.first() {
159        context.write(&format!("{}\n", val));
160    }
161    // print() returns invisibly in R
162    context.interpreter().set_invisible();
163    Ok(args.first().cloned().unwrap_or(RValue::Null))
164}
165
166/// Return a value invisibly (suppresses auto-printing).
167///
168/// Sets the interpreter's visibility flag so that the REPL/eval loop
169/// knows not to auto-print the result.
170///
171/// @param x value to return (default: NULL)
172/// @return x (invisibly)
173#[interpreter_builtin]
174fn interp_invisible(
175    args: &[RValue],
176    _named: &[(String, RValue)],
177    context: &BuiltinContext,
178) -> Result<RValue, RError> {
179    context.interpreter().set_invisible();
180    Ok(args.first().cloned().unwrap_or(RValue::Null))
181}
182
183/// Format a value as a character string (S3 generic).
184///
185/// Supports named parameters: `nsmall` (minimum decimal places for doubles),
186/// `width` (minimum field width, right-justified), `big.mark` (thousands
187/// separator), and `scientific` (force scientific notation when TRUE, suppress
188/// when FALSE).
189///
190/// @param x the value to format
191/// @param nsmall minimum number of digits to the right of the decimal point
192/// @param width minimum field width (right-justified with spaces)
193/// @param big.mark character to insert as thousands separator
194/// @param scientific logical; TRUE forces scientific notation, FALSE suppresses it
195/// @return character vector representation
196#[interpreter_builtin(min_args = 1)]
197fn interp_format(
198    args: &[RValue],
199    named: &[(String, RValue)],
200    context: &BuiltinContext,
201) -> Result<RValue, RError> {
202    // Try S3 dispatch (format.Date, format.POSIXct, etc.)
203    if let Some(result) = try_s3_dispatch("format", args, named, context)? {
204        return Ok(result);
205    }
206
207    // Extract named parameters
208    let nsmall: Option<usize> = named
209        .iter()
210        .find(|(k, _)| k == "nsmall")
211        .and_then(|(_, v)| v.as_vector()?.as_integer_scalar())
212        .and_then(|i| usize::try_from(i).ok());
213    let digits: Option<usize> = named
214        .iter()
215        .find(|(k, _)| k == "digits")
216        .and_then(|(_, v)| v.as_vector()?.as_integer_scalar())
217        .and_then(|i| usize::try_from(i).ok());
218    let width: Option<usize> = named
219        .iter()
220        .find(|(k, _)| k == "width")
221        .and_then(|(_, v)| v.as_vector()?.as_integer_scalar())
222        .and_then(|i| usize::try_from(i).ok());
223    let big_mark: Option<String> = named
224        .iter()
225        .find(|(k, _)| k == "big.mark")
226        .and_then(|(_, v)| v.as_vector()?.as_character_scalar());
227    let scientific: Option<bool> = named
228        .iter()
229        .find(|(k, _)| k == "scientific")
230        .and_then(|(_, v)| v.as_vector()?.as_logical_scalar());
231
232    let has_format_opts = nsmall.is_some()
233        || digits.is_some()
234        || width.is_some()
235        || big_mark.is_some()
236        || scientific.is_some();
237
238    match args.first() {
239        Some(RValue::Vector(rv)) if has_format_opts => {
240            let formatted: Vec<Option<String>> = match &rv.inner {
241                Vector::Double(vals) => vals
242                    .iter_opt()
243                    .map(|x| {
244                        x.map(|f| {
245                            format_double_with_opts(
246                                f,
247                                nsmall,
248                                digits,
249                                big_mark.as_deref(),
250                                scientific,
251                            )
252                        })
253                    })
254                    .collect(),
255                Vector::Integer(vals) => vals
256                    .iter_opt()
257                    .map(|x| x.map(|i| format_integer_with_opts(i, big_mark.as_deref())))
258                    .collect(),
259                other => other.to_characters(),
260            };
261            // Apply width padding if requested
262            let formatted = if let Some(w) = width {
263                formatted
264                    .into_iter()
265                    .map(|s| s.map(|s| format!("{:>width$}", s, width = w)))
266                    .collect()
267            } else {
268                formatted
269            };
270            Ok(RValue::vec(Vector::Character(formatted.into())))
271        }
272        Some(RValue::Vector(rv)) => {
273            // No special formatting options — element-wise default format
274            let chars = rv.inner.to_characters();
275            let chars = if let Some(w) = width {
276                chars
277                    .into_iter()
278                    .map(|s| s.map(|s| format!("{:>width$}", s, width = w)))
279                    .collect()
280            } else {
281                chars
282            };
283            Ok(RValue::vec(Vector::Character(chars.into())))
284        }
285        Some(val) => Ok(RValue::vec(Vector::Character(
286            vec![Some(format!("{}", val))].into(),
287        ))),
288        None => Ok(RValue::vec(Vector::Character(
289            vec![Some(String::new())].into(),
290        ))),
291    }
292}
293
294/// Format a double value with nsmall, digits, big.mark, and scientific options.
295fn format_double_with_opts(
296    f: f64,
297    nsmall: Option<usize>,
298    digits: Option<usize>,
299    big_mark: Option<&str>,
300    scientific: Option<bool>,
301) -> String {
302    if f.is_nan() {
303        return "NaN".to_string();
304    }
305    if f.is_infinite() {
306        return if f > 0.0 {
307            "Inf".to_string()
308        } else {
309            "-Inf".to_string()
310        };
311    }
312
313    let s = match scientific {
314        Some(true) => {
315            if let Some(d) = digits {
316                // digits controls significant digits; in scientific notation
317                // that means d-1 decimal places after the leading digit
318                format!("{:.prec$e}", f, prec = d.saturating_sub(1))
319            } else {
320                format!("{:e}", f)
321            }
322        }
323        Some(false) => {
324            // Suppress scientific notation
325            if let Some(ns) = nsmall {
326                format!("{:.prec$}", f, prec = ns)
327            } else if let Some(d) = digits {
328                format_significant_digits(f, d)
329            } else if f == f.floor() && f.abs() < 1e15 {
330                use crate::interpreter::coerce;
331                format!("{}", coerce::f64_to_i64(f).unwrap_or(0))
332            } else {
333                format!("{}", f)
334            }
335        }
336        None => {
337            if let Some(ns) = nsmall {
338                format!("{:.prec$}", f, prec = ns)
339            } else if let Some(d) = digits {
340                format_significant_digits(f, d)
341            } else {
342                use crate::interpreter::value::vector::format_r_double;
343                format_r_double(f)
344            }
345        }
346    };
347
348    match big_mark {
349        Some(mark) if !mark.is_empty() => insert_thousands_sep(&s, mark),
350        _ => s,
351    }
352}
353
354/// Format a double value to a specified number of significant digits.
355fn format_significant_digits(f: f64, digits: usize) -> String {
356    if f == 0.0 {
357        return format!("{:.prec$}", 0.0, prec = digits.saturating_sub(1));
358    }
359    let magnitude = f.abs().log10().floor() as i32;
360    let decimal_places = (i64::from(digits as i32) - 1 - i64::from(magnitude)).max(0);
361    let decimal_places = usize::try_from(decimal_places).unwrap_or(0);
362    format!("{:.prec$}", f, prec = decimal_places)
363}
364
365/// Format an integer value with big.mark option.
366fn format_integer_with_opts(i: i64, big_mark: Option<&str>) -> String {
367    let s = i.to_string();
368    match big_mark {
369        Some(mark) if !mark.is_empty() => insert_thousands_sep(&s, mark),
370        _ => s,
371    }
372}
373
374/// Insert a thousands separator into the integer part of a numeric string.
375fn insert_thousands_sep(s: &str, sep: &str) -> String {
376    // Split into sign, integer part, and decimal part
377    let (sign, rest) = if let Some(stripped) = s.strip_prefix('-') {
378        ("-", stripped)
379    } else {
380        ("", s)
381    };
382
383    let (int_part, dec_part) = match rest.find('.') {
384        Some(pos) => (&rest[..pos], Some(&rest[pos..])),
385        None => (rest, None),
386    };
387
388    // Insert separator every 3 digits from the right
389    let digits: Vec<char> = int_part.chars().collect();
390    let mut result = String::with_capacity(int_part.len() + (int_part.len() / 3) * sep.len());
391    for (i, ch) in digits.iter().enumerate() {
392        let remaining = digits.len() - i;
393        if i > 0 && remaining.is_multiple_of(3) {
394            result.push_str(sep);
395        }
396        result.push(*ch);
397    }
398
399    let mut out = String::from(sign);
400    out.push_str(&result);
401    if let Some(dec) = dec_part {
402        out.push_str(dec);
403    }
404    out
405}
406
407/// Print a data.frame with aligned columns using TabWriter.
408///
409/// @param x a data.frame to print
410/// @return x, invisibly
411#[cfg(feature = "tables")]
412#[interpreter_builtin(name = "print.data.frame", min_args = 1)]
413fn interp_print_data_frame(
414    args: &[RValue],
415    _named: &[(String, RValue)],
416    context: &BuiltinContext,
417) -> Result<RValue, RError> {
418    use std::io::Write;
419    use tabwriter::TabWriter;
420
421    let val = &args[0];
422    let list = match val {
423        RValue::List(l) => l,
424        _ => {
425            context.write(&format!("{}\n", val));
426            return Ok(val.clone());
427        }
428    };
429
430    if list.values.is_empty() {
431        context.write("data frame with 0 columns and 0 rows\n");
432        return Ok(val.clone());
433    }
434
435    // Column names
436    let col_names: Vec<String> = list
437        .values
438        .iter()
439        .enumerate()
440        .map(|(i, (name, _))| name.clone().unwrap_or_else(|| format!("V{}", i + 1)))
441        .collect();
442
443    // Number of rows: from row.names attribute or first column length
444    let nrow = list
445        .get_attr("row.names")
446        .map(|v| v.length())
447        .unwrap_or_else(|| list.values.first().map(|(_, v)| v.length()).unwrap_or(0));
448
449    if nrow == 0 {
450        // Print header only for 0-row data frames
451        context.write(&format!(
452            "data frame with 0 rows and {} columns: {}\n",
453            col_names.len(),
454            col_names.join(", ")
455        ));
456        return Ok(val.clone());
457    }
458
459    // Row names
460    let row_names: Vec<String> = match list.get_attr("row.names") {
461        Some(RValue::Vector(rv)) => match &rv.inner {
462            Vector::Character(chars) => chars
463                .iter()
464                .map(|c| c.clone().unwrap_or_else(|| "NA".to_string()))
465                .collect(),
466            Vector::Integer(ints) => ints
467                .iter()
468                .map(|i| match i {
469                    Some(v) => v.to_string(),
470                    None => "NA".to_string(),
471                })
472                .collect(),
473            _ => (1..=nrow).map(|i| i.to_string()).collect(),
474        },
475        _ => (1..=nrow).map(|i| i.to_string()).collect(),
476    };
477
478    // Format each column's elements
479    let formatted_cols: Vec<Vec<String>> = list
480        .values
481        .iter()
482        .map(|(_, value)| match value {
483            RValue::Vector(rv) => format_column_elements(&rv.inner, nrow),
484            RValue::Null => vec!["NULL".to_string(); nrow],
485            other => vec![format!("{}", other); nrow],
486        })
487        .collect();
488
489    // Build tab-separated output and let tabwriter align it
490    let mut tw = TabWriter::new(Vec::new());
491
492    // Header row: blank for row-names column, then column names
493    let header_parts: Vec<&str> = std::iter::once("")
494        .chain(col_names.iter().map(|s| s.as_str()))
495        .collect();
496    writeln!(tw, "{}", header_parts.join("\t"))
497        .map_err(|e| RError::other(format!("write error: {}", e)))?;
498
499    // Data rows
500    for row in 0..nrow {
501        let row_name = row_names.get(row).map(|s| s.as_str()).unwrap_or("");
502        let mut parts = vec![row_name.to_string()];
503        for col in &formatted_cols {
504            parts.push(col.get(row).cloned().unwrap_or_else(|| "NA".to_string()));
505        }
506        writeln!(tw, "{}", parts.join("\t"))
507            .map_err(|e| RError::other(format!("write error: {}", e)))?;
508    }
509
510    tw.flush()
511        .map_err(|e| RError::other(format!("flush error: {}", e)))?;
512    let output = String::from_utf8(tw.into_inner().unwrap_or_default())
513        .map_err(|e| RError::other(format!("utf8 error: {}", e)))?;
514
515    // Print without trailing newline
516    context.write(&output);
517
518    Ok(val.clone())
519}
520
521#[cfg(feature = "tables")]
522/// Format individual elements of a vector column for data frame printing.
523fn format_column_elements(v: &Vector, nrow: usize) -> Vec<String> {
524    let len = v.len();
525    (0..nrow)
526        .map(|i| {
527            if i >= len {
528                return "NA".to_string();
529            }
530            match v {
531                Vector::Raw(vals) => format!("{:02x}", vals[i]),
532                Vector::Logical(vals) => match vals[i] {
533                    Some(true) => "TRUE".to_string(),
534                    Some(false) => "FALSE".to_string(),
535                    None => "NA".to_string(),
536                },
537                Vector::Integer(vals) => match vals.get_opt(i) {
538                    Some(n) => n.to_string(),
539                    None => "NA".to_string(),
540                },
541                Vector::Double(vals) => match vals.get_opt(i) {
542                    Some(f) => format_r_double(f),
543                    None => "NA".to_string(),
544                },
545                Vector::Complex(vals) => match vals[i] {
546                    Some(c) => format_r_complex(c),
547                    None => "NA".to_string(),
548                },
549                Vector::Character(vals) => match &vals[i] {
550                    Some(s) => s.clone(),
551                    None => "NA".to_string(),
552                },
553            }
554        })
555        .collect()
556}
557
558/// Print a matrix with row/column labels.
559///
560/// Formats the vector data as a 2D grid using the `dim` attribute for layout
561/// and `dimnames` for row/column labels.
562///
563/// @param x a matrix to print
564/// @return x, invisibly
565#[interpreter_builtin(name = "print.matrix", min_args = 1)]
566fn interp_print_matrix(
567    args: &[RValue],
568    _named: &[(String, RValue)],
569    context: &BuiltinContext,
570) -> Result<RValue, RError> {
571    let val = &args[0];
572    let rv = match val {
573        RValue::Vector(rv) => rv,
574        _ => {
575            context.write(&format!("{}\n", val));
576            return Ok(val.clone());
577        }
578    };
579
580    // Extract dim attribute
581    let (nrow, ncol) = match rv.get_attr("dim") {
582        Some(RValue::Vector(dim_rv)) => {
583            let dims = dim_rv.to_integers();
584            if dims.len() == 2 {
585                let nr = dims[0].unwrap_or(0);
586                let nc = dims[1].unwrap_or(0);
587                (
588                    usize::try_from(nr).unwrap_or(0),
589                    usize::try_from(nc).unwrap_or(0),
590                )
591            } else {
592                context.write(&format!("{}\n", val));
593                return Ok(val.clone());
594            }
595        }
596        _ => {
597            context.write(&format!("{}\n", val));
598            return Ok(val.clone());
599        }
600    };
601
602    // Extract dimnames
603    let (row_names, col_names) = extract_dimnames(rv);
604
605    // Format each element
606    let elements = format_matrix_elements(&rv.inner, nrow, ncol);
607
608    // Compute column widths (including headers)
609    let col_widths: Vec<usize> = (0..ncol)
610        .map(|j| {
611            let header_w = col_names
612                .as_ref()
613                .and_then(|cn| cn.get(j))
614                .map(|s| s.len())
615                .unwrap_or_else(|| format!("[,{}]", j + 1).len());
616            let max_elem_w = (0..nrow)
617                .map(|i| elements[i * ncol + j].len())
618                .max()
619                .unwrap_or(0);
620            header_w.max(max_elem_w)
621        })
622        .collect();
623
624    // Row label width
625    let row_label_width = (0..nrow)
626        .map(|i| {
627            row_names
628                .as_ref()
629                .and_then(|rn| rn.get(i))
630                .map(|s| s.len())
631                .unwrap_or_else(|| format!("[{},]", i + 1).len())
632        })
633        .max()
634        .unwrap_or(0);
635
636    // Also account for the blank space above row labels in the header line
637    let mut output = String::new();
638
639    // Header line
640    output.push_str(&" ".repeat(row_label_width));
641    for (j, &cw) in col_widths.iter().enumerate() {
642        let header = col_names
643            .as_ref()
644            .and_then(|cn| cn.get(j).cloned())
645            .unwrap_or_else(|| format!("[,{}]", j + 1));
646        output.push(' ');
647        output.push_str(&format!("{:>width$}", header, width = cw));
648    }
649    output.push('\n');
650
651    // Data rows
652    for i in 0..nrow {
653        let row_label = row_names
654            .as_ref()
655            .and_then(|rn| rn.get(i).cloned())
656            .unwrap_or_else(|| format!("[{},]", i + 1));
657        output.push_str(&format!("{:>width$}", row_label, width = row_label_width));
658        for j in 0..ncol {
659            output.push(' ');
660            output.push_str(&format!(
661                "{:>width$}",
662                elements[i * ncol + j],
663                width = col_widths[j]
664            ));
665        }
666        output.push('\n');
667    }
668
669    context.write(&output);
670    context.interpreter().set_invisible();
671    Ok(val.clone())
672}
673
674/// Extract dimnames from a matrix's attributes.
675/// Returns (row_names, col_names) as optional Vec<String>.
676fn extract_dimnames(rv: &RVector) -> (Option<Vec<String>>, Option<Vec<String>>) {
677    match rv.get_attr("dimnames") {
678        Some(RValue::List(list)) => {
679            let row_names = list.values.first().and_then(|(_, v)| match v {
680                RValue::Vector(rv) => {
681                    if let Vector::Character(chars) = &rv.inner {
682                        Some(
683                            chars
684                                .iter()
685                                .map(|c| c.clone().unwrap_or_else(|| "NA".to_string()))
686                                .collect(),
687                        )
688                    } else {
689                        None
690                    }
691                }
692                _ => None,
693            });
694            let col_names = list.values.get(1).and_then(|(_, v)| match v {
695                RValue::Vector(rv) => {
696                    if let Vector::Character(chars) = &rv.inner {
697                        Some(
698                            chars
699                                .iter()
700                                .map(|c| c.clone().unwrap_or_else(|| "NA".to_string()))
701                                .collect(),
702                        )
703                    } else {
704                        None
705                    }
706                }
707                _ => None,
708            });
709            (row_names, col_names)
710        }
711        _ => (None, None),
712    }
713}
714
715/// Format matrix elements as strings, stored in row-major order.
716fn format_matrix_elements(v: &Vector, nrow: usize, ncol: usize) -> Vec<String> {
717    let len = v.len();
718    // R stores matrices in column-major order: element [i,j] is at index i + j*nrow
719    (0..nrow * ncol)
720        .map(|idx| {
721            let i = idx / ncol; // row
722            let j = idx % ncol; // col
723            let flat_idx = i + j * nrow; // column-major index
724            if flat_idx >= len {
725                return "NA".to_string();
726            }
727            match v {
728                Vector::Raw(vals) => format!("{:02x}", vals[flat_idx]),
729                Vector::Logical(vals) => match vals[flat_idx] {
730                    Some(true) => "TRUE".to_string(),
731                    Some(false) => "FALSE".to_string(),
732                    None => "NA".to_string(),
733                },
734                Vector::Integer(vals) => match vals.get_opt(flat_idx) {
735                    Some(n) => n.to_string(),
736                    None => "NA".to_string(),
737                },
738                Vector::Double(vals) => match vals.get_opt(flat_idx) {
739                    Some(f) => format_r_double(f),
740                    None => "NA".to_string(),
741                },
742                Vector::Complex(vals) => match vals[flat_idx] {
743                    Some(c) => format_r_complex(c),
744                    None => "NA".to_string(),
745                },
746                Vector::Character(vals) => match &vals[flat_idx] {
747                    Some(s) => format!("\"{}\"", s),
748                    None => "NA".to_string(),
749                },
750            }
751        })
752        .collect()
753}
754
755/// Print a factor showing level labels instead of integer codes.
756///
757/// @param x a factor to print
758/// @return x, invisibly
759#[interpreter_builtin(name = "print.factor", min_args = 1)]
760fn interp_print_factor(
761    args: &[RValue],
762    _named: &[(String, RValue)],
763    context: &BuiltinContext,
764) -> Result<RValue, RError> {
765    let val = &args[0];
766    let rv = match val {
767        RValue::Vector(rv) => rv,
768        _ => {
769            context.write(&format!("{}\n", val));
770            return Ok(val.clone());
771        }
772    };
773
774    // Get levels
775    let levels: Vec<String> = match rv.get_attr("levels") {
776        Some(RValue::Vector(lv)) => match &lv.inner {
777            Vector::Character(chars) => chars
778                .iter()
779                .map(|c| c.clone().unwrap_or_else(|| "NA".to_string()))
780                .collect(),
781            _ => vec![],
782        },
783        _ => vec![],
784    };
785
786    // Map integer codes to level labels
787    let labels: Vec<String> = rv
788        .to_integers()
789        .iter()
790        .map(|code| match code {
791            Some(i) => {
792                let idx = usize::try_from(*i).ok().and_then(|i| i.checked_sub(1));
793                match idx {
794                    Some(idx) if idx < levels.len() => levels[idx].clone(),
795                    _ => "NA".to_string(),
796                }
797            }
798            None => "NA".to_string(),
799        })
800        .collect();
801
802    // Format like a character vector with [1] prefix
803    let formatted = format_factor_labels(&labels);
804    context.write(&formatted);
805    context.write(&format!("Levels: {}\n", levels.join(" ")));
806
807    context.interpreter().set_invisible();
808    Ok(val.clone())
809}
810
811/// Format factor labels as R-style output with line indices.
812fn format_factor_labels(labels: &[String]) -> String {
813    if labels.is_empty() {
814        return "factor(0)\n".to_string();
815    }
816
817    let max_width = 80;
818    let mut result = String::new();
819    let mut pos = 0;
820
821    while pos < labels.len() {
822        let label = format!("[{}]", pos + 1);
823        let label_width = label.len();
824        let mut line = format!("{} ", label);
825        let mut current_width = label_width + 1;
826        let line_start = pos;
827
828        while pos < labels.len() {
829            let elem = &labels[pos];
830            let elem_width = elem.len() + 1; // +1 for space
831            if current_width + elem_width > max_width && pos > line_start {
832                break;
833            }
834            line.push_str(elem);
835            if pos + 1 < labels.len() {
836                line.push(' ');
837            }
838            current_width += elem_width;
839            pos += 1;
840        }
841
842        if !result.is_empty() {
843            result.push('\n');
844        }
845        result.push_str(&line);
846    }
847
848    result.push('\n');
849    result
850}
851
852// endregion
853
854/// Apply a function over a vector or list, simplifying the result.
855///
856/// @param X vector or list to iterate over
857/// @param FUN function to apply to each element
858/// @return simplified vector or list of results
859#[interpreter_builtin(name = "sapply", min_args = 2)]
860fn interp_sapply(
861    args: &[RValue],
862    named: &[(String, RValue)],
863    context: &BuiltinContext,
864) -> Result<RValue, RError> {
865    eval_apply(args, named, true, context)
866}
867
868/// Apply a function over a vector or list, returning a list.
869///
870/// @param X vector or list to iterate over
871/// @param FUN function to apply to each element
872/// @param ... additional arguments passed to FUN
873/// @return list of results
874#[interpreter_builtin(name = "lapply", min_args = 2)]
875fn interp_lapply(
876    args: &[RValue],
877    named: &[(String, RValue)],
878    context: &BuiltinContext,
879) -> Result<RValue, RError> {
880    eval_apply(args, named, false, context)
881}
882
883/// Apply a function over a vector or list with a type-checked return template.
884///
885/// vapply is similar to sapply, but requires a FUN.VALUE template that specifies
886/// the expected return type and length. Each result is checked against the template,
887/// and an error is raised if there is a mismatch.
888///
889/// @param X vector or list to iterate over
890/// @param FUN function to apply to each element
891/// @param FUN.VALUE template value specifying the expected return type and length
892/// @param ... additional arguments passed to FUN
893/// @return simplified vector matching FUN.VALUE type
894#[interpreter_builtin(name = "vapply", min_args = 3)]
895fn interp_vapply(
896    positional: &[RValue],
897    named: &[(String, RValue)],
898    context: &BuiltinContext,
899) -> Result<RValue, RError> {
900    let ca = CallArgs::new(positional, named);
901    let env = context.env();
902
903    let x = ca
904        .value("X", 0)
905        .ok_or_else(|| {
906            RError::new(
907                RErrorKind::Argument,
908                "vapply requires at least 3 arguments: X, FUN, and FUN.VALUE",
909            )
910        })?
911        .clone();
912    let f_val = ca
913        .value("FUN", 1)
914        .ok_or_else(|| {
915            RError::new(
916                RErrorKind::Argument,
917                "vapply requires at least 3 arguments: X, FUN, and FUN.VALUE",
918            )
919        })?
920        .clone();
921    let f = match_fun(&f_val, env)?;
922    let fun_value = ca
923        .value("FUN.VALUE", 2)
924        .ok_or_else(|| {
925            RError::new(
926                RErrorKind::Argument,
927                "vapply requires at least 3 arguments: X, FUN, and FUN.VALUE",
928            )
929        })?
930        .clone();
931
932    // Filter out vapply's own named args before passing the rest to FUN
933    let vapply_params = ["X", "FUN", "FUN.VALUE", "USE.NAMES", "fail_fast"];
934    let (fail_fast, _) = extract_fail_fast(named);
935    let extra_named: Vec<(String, RValue)> = named
936        .iter()
937        .filter(|(n, _)| !vapply_params.contains(&n.as_str()))
938        .cloned()
939        .collect();
940
941    // Determine expected type and length from FUN.VALUE
942    let expected_len = fun_value.length();
943    let expected_type = fun_value.type_name().to_string();
944
945    let items: Vec<RValue> = rvalue_to_items(&x);
946
947    // Extra positional args beyond X, FUN, FUN.VALUE are passed to FUN
948    let extra_args: Vec<RValue> = positional.iter().skip(3).cloned().collect();
949    context.with_interpreter(|interp| {
950        let mut results: Vec<RValue> = Vec::with_capacity(items.len());
951        for (i, item) in items.iter().enumerate() {
952            let mut call_args = vec![item.clone()];
953            call_args.extend(extra_args.iter().cloned());
954            let result = if fail_fast {
955                interp.call_function(&f, &call_args, &extra_named, env)?
956            } else {
957                interp
958                    .call_function(&f, &call_args, &extra_named, env)
959                    .unwrap_or(RValue::Null)
960            };
961
962            // Validate result matches FUN.VALUE template
963            let result_len = result.length();
964            let result_type = result.type_name().to_string();
965            if result_len != expected_len {
966                return Err(RError::new(
967                    RErrorKind::Type,
968                    format!(
969                        "values must be length {} (FUN.VALUE), but FUN(X[[{}]]) result is length {}",
970                        expected_len,
971                        i + 1,
972                        result_len
973                    ),
974                ));
975            }
976            if result_type != expected_type {
977                return Err(RError::new(
978                    RErrorKind::Type,
979                    format!(
980                        "values must be type '{}' (FUN.VALUE), but FUN(X[[{}]]) result is type '{}'",
981                        expected_type,
982                        i + 1,
983                        result_type
984                    ),
985                ));
986            }
987            results.push(result);
988        }
989
990        // Simplify results: vapply always simplifies since we've validated types
991        if results.is_empty() {
992            // Return an empty vector of the expected type
993            return match expected_type.as_str() {
994                "double" => Ok(RValue::vec(Vector::Double(vec![].into()))),
995                "integer" => Ok(RValue::vec(Vector::Integer(vec![].into()))),
996                "character" => Ok(RValue::vec(Vector::Character(vec![].into()))),
997                "logical" => Ok(RValue::vec(Vector::Logical(vec![].into()))),
998                _ => Ok(RValue::List(RList::new(vec![]))),
999            };
1000        }
1001
1002        if expected_len == 1 {
1003            // Scalar results: simplify to a typed vector
1004            match expected_type.as_str() {
1005                "double" => {
1006                    let vals: Vec<Option<f64>> = results
1007                        .iter()
1008                        .filter_map(|r| {
1009                            r.as_vector()
1010                                .map(|v| v.to_doubles().into_iter().next().unwrap_or(None))
1011                        })
1012                        .collect();
1013                    Ok(RValue::vec(Vector::Double(vals.into())))
1014                }
1015                "integer" => {
1016                    let vals: Vec<Option<i64>> = results
1017                        .iter()
1018                        .filter_map(|r| {
1019                            r.as_vector()
1020                                .map(|v| v.to_integers().into_iter().next().unwrap_or(None))
1021                        })
1022                        .collect();
1023                    Ok(RValue::vec(Vector::Integer(vals.into())))
1024                }
1025                "character" => {
1026                    let vals: Vec<Option<String>> = results
1027                        .iter()
1028                        .filter_map(|r| {
1029                            r.as_vector()
1030                                .map(|v| v.to_characters().into_iter().next().unwrap_or(None))
1031                        })
1032                        .collect();
1033                    Ok(RValue::vec(Vector::Character(vals.into())))
1034                }
1035                "logical" => {
1036                    let vals: Vec<Option<bool>> = results
1037                        .iter()
1038                        .filter_map(|r| {
1039                            r.as_vector()
1040                                .map(|v| v.to_logicals().into_iter().next().unwrap_or(None))
1041                        })
1042                        .collect();
1043                    Ok(RValue::vec(Vector::Logical(vals.into())))
1044                }
1045                _ => {
1046                    let values: Vec<(Option<String>, RValue)> =
1047                        results.into_iter().map(|v| (None, v)).collect();
1048                    Ok(RValue::List(RList::new(values)))
1049                }
1050            }
1051        } else {
1052            // Multi-value results: build a matrix (each result becomes a column)
1053            simplify_apply_results(results)
1054        }
1055    })
1056}
1057
1058fn eval_apply(
1059    positional: &[RValue],
1060    named: &[(String, RValue)],
1061    simplify: bool,
1062    context: &BuiltinContext,
1063) -> Result<RValue, RError> {
1064    if positional.len() < 2 {
1065        return Err(RError::new(
1066            RErrorKind::Argument,
1067            "need at least 2 arguments for apply".to_string(),
1068        ));
1069    }
1070    let env = context.env();
1071    let (fail_fast, extra_named) = extract_fail_fast(named);
1072    let x = &positional[0];
1073    let f = match_fun(&positional[1], env)?;
1074
1075    let items: Vec<RValue> = rvalue_to_items(x);
1076
1077    // Extra positional args beyond X and FUN are passed to FUN in each call
1078    let extra_args: Vec<RValue> = positional.iter().skip(2).cloned().collect();
1079
1080    let env = context.env();
1081    context.with_interpreter(|interp| {
1082        let mut results: Vec<RValue> = Vec::new();
1083        for item in &items {
1084            let mut call_args = vec![item.clone()];
1085            call_args.extend(extra_args.iter().cloned());
1086            if fail_fast {
1087                let result = interp.call_function(&f, &call_args, &extra_named, env)?;
1088                results.push(result);
1089            } else {
1090                match interp.call_function(&f, &call_args, &extra_named, env) {
1091                    Ok(result) => results.push(result),
1092                    Err(_) => results.push(RValue::Null),
1093                }
1094            }
1095        }
1096
1097        if simplify {
1098            let all_scalar = results.iter().all(|r| r.length() == 1);
1099            if all_scalar && !results.is_empty() {
1100                let first_type = results[0].type_name();
1101                let all_same = results.iter().all(|r| r.type_name() == first_type);
1102                if all_same {
1103                    match first_type {
1104                        "double" => {
1105                            let vals: Vec<Option<f64>> = results
1106                                .iter()
1107                                .filter_map(|r| {
1108                                    r.as_vector()
1109                                        .map(|v| v.to_doubles().into_iter().next().unwrap_or(None))
1110                                })
1111                                .collect();
1112                            return Ok(RValue::vec(Vector::Double(vals.into())));
1113                        }
1114                        "integer" => {
1115                            let vals: Vec<Option<i64>> = results
1116                                .iter()
1117                                .filter_map(|r| {
1118                                    r.as_vector()
1119                                        .map(|v| v.to_integers().into_iter().next().unwrap_or(None))
1120                                })
1121                                .collect();
1122                            return Ok(RValue::vec(Vector::Integer(vals.into())));
1123                        }
1124                        "character" => {
1125                            let vals: Vec<Option<String>> = results
1126                                .iter()
1127                                .filter_map(|r| {
1128                                    r.as_vector().map(|v| {
1129                                        v.to_characters().into_iter().next().unwrap_or(None)
1130                                    })
1131                                })
1132                                .collect();
1133                            return Ok(RValue::vec(Vector::Character(vals.into())));
1134                        }
1135                        "logical" => {
1136                            let vals: Vec<Option<bool>> = results
1137                                .iter()
1138                                .filter_map(|r| {
1139                                    r.as_vector()
1140                                        .map(|v| v.to_logicals().into_iter().next().unwrap_or(None))
1141                                })
1142                                .collect();
1143                            return Ok(RValue::vec(Vector::Logical(vals.into())));
1144                        }
1145                        _ => {}
1146                    }
1147                }
1148            }
1149        }
1150
1151        let values: Vec<(Option<String>, RValue)> =
1152            results.into_iter().map(|v| (None, v)).collect();
1153        Ok(RValue::List(RList::new(values)))
1154    })
1155}
1156
1157/// Call a function with arguments supplied as a list.
1158///
1159/// Named elements in the list are passed as named arguments to the function.
1160/// Unnamed elements are passed as positional arguments.
1161///
1162/// @param what function or character string naming the function
1163/// @param args list of arguments to pass to the function
1164/// @param quote if TRUE, do not evaluate the arguments (default FALSE)
1165/// @return the result of the function call
1166#[interpreter_builtin(name = "do.call", min_args = 2)]
1167fn interp_do_call(
1168    positional: &[RValue],
1169    named: &[(String, RValue)],
1170    context: &BuiltinContext,
1171) -> Result<RValue, RError> {
1172    let env = context.env();
1173    if positional.len() >= 2 {
1174        let f = match_fun(&positional[0], env)?;
1175
1176        // Get the target environment from envir= named arg (defaults to calling env)
1177        let target_env = named
1178            .iter()
1179            .find(|(n, _)| n == "envir")
1180            .and_then(|(_, v)| {
1181                if let RValue::Environment(e) = v {
1182                    Some(e.clone())
1183                } else {
1184                    None
1185                }
1186            })
1187            .unwrap_or_else(|| env.clone());
1188
1189        // Filter out the envir= arg before forwarding to the function
1190        let forwarded_named: Vec<(String, RValue)> = named
1191            .iter()
1192            .filter(|(n, _)| n != "envir")
1193            .cloned()
1194            .collect();
1195
1196        // Handle pre-eval builtins that can't go through the normal dispatch path
1197        if let RValue::Function(crate::interpreter::value::RFunction::Builtin {
1198            name: builtin_name,
1199            ..
1200        }) = &f
1201        {
1202            if builtin_name == "on.exit" {
1203                return do_call_on_exit(&positional[1], &forwarded_named, &target_env);
1204            }
1205            // delayedAssign via do.call: the value arg is already evaluated,
1206            // so we create the promise directly instead of going through pre_eval.
1207            if builtin_name == "delayedAssign" {
1208                return do_call_delayed_assign(&positional[1], &target_env);
1209            }
1210        }
1211
1212        return context.with_interpreter(|interp| match &positional[1] {
1213            RValue::List(l) => {
1214                // Split list elements into positional and named args
1215                let mut pos_args: Vec<RValue> = Vec::new();
1216                let mut named_args: Vec<(String, RValue)> = forwarded_named;
1217                for (name, value) in &l.values {
1218                    match name {
1219                        Some(n) if !n.is_empty() => {
1220                            named_args.push((n.clone(), value.clone()));
1221                        }
1222                        _ => {
1223                            pos_args.push(value.clone());
1224                        }
1225                    }
1226                }
1227                interp
1228                    .call_function(&f, &pos_args, &named_args, &target_env)
1229                    .map_err(RError::from)
1230            }
1231            _ => interp
1232                .call_function(&f, &positional[1..], &forwarded_named, &target_env)
1233                .map_err(RError::from),
1234        });
1235    }
1236    Err(RError::new(
1237        RErrorKind::Argument,
1238        "do.call requires at least 2 arguments".to_string(),
1239    ))
1240}
1241
1242/// Handle `do.call(on.exit, list(expr, add), envir=env)`.
1243///
1244/// `on.exit` is a pre-eval builtin that stores unevaluated expressions.
1245/// When called via `do.call`, the expression is already an RValue (typically
1246/// a Language/call object). We convert it back to an Expr for storage.
1247fn do_call_on_exit(
1248    args_val: &RValue,
1249    named: &[(String, RValue)],
1250    target_env: &crate::interpreter::environment::Environment,
1251) -> Result<RValue, RError> {
1252    use crate::parser::ast::Expr;
1253
1254    let list = match args_val {
1255        RValue::List(l) => l,
1256        _ => return Ok(RValue::Null),
1257    };
1258
1259    // First element is the expression to run on exit
1260    let expr = list.values.first().map(|(_, v)| v);
1261    // Second element or named "add" is whether to add (vs replace)
1262    let add = list
1263        .values
1264        .iter()
1265        .find(|(k, _)| k.as_deref() == Some("add"))
1266        .or_else(|| list.values.get(1).filter(|(k, _)| k.is_none()))
1267        .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
1268        .or_else(|| {
1269            named
1270                .iter()
1271                .find(|(n, _)| n == "add")
1272                .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
1273        })
1274        .unwrap_or(false);
1275
1276    if let Some(expr_val) = expr {
1277        // Convert the RValue (Language/call) back to an Expr
1278        let expr_ast = match expr_val {
1279            RValue::Language(lang) => (*lang.inner).clone(),
1280            _ => Expr::Null,
1281        };
1282        target_env.push_on_exit(expr_ast, add, true);
1283    } else {
1284        target_env.take_on_exit();
1285    }
1286
1287    Ok(RValue::Null)
1288}
1289
1290/// `do.call(delayedAssign, list(name, expr, eval.env, assign.env))` handler.
1291fn do_call_delayed_assign(
1292    args_val: &RValue,
1293    target_env: &crate::interpreter::environment::Environment,
1294) -> Result<RValue, RError> {
1295    let list = match args_val {
1296        RValue::List(l) => l,
1297        _ => return Err(RError::new(RErrorKind::Argument, "args must be a list")),
1298    };
1299
1300    // Resolve args by name first, then position.
1301    // delayedAssign(x, value, eval.env, assign.env)
1302    let get_arg = |name_key: &str, pos: usize| -> Option<&RValue> {
1303        list.values
1304            .iter()
1305            .find(|(n, _)| n.as_deref() == Some(name_key))
1306            .map(|(_, v)| v)
1307            .or_else(|| list.values.get(pos).map(|(_, v)| v))
1308    };
1309
1310    let name = get_arg("x", 0)
1311        .and_then(|v| v.as_vector()?.as_character_scalar())
1312        .ok_or_else(|| RError::new(RErrorKind::Argument, "first arg must be a character name"))?;
1313
1314    let value_expr = match get_arg("value", 1) {
1315        Some(RValue::Language(lang)) => (*lang.inner).clone(),
1316        Some(other) => crate::interpreter::value::rvalue_to_expr(other),
1317        None => crate::parser::ast::Expr::Null,
1318    };
1319
1320    let eval_env = get_arg("eval.env", 2)
1321        .and_then(|v| match v {
1322            RValue::Environment(e) => Some(e.clone()),
1323            _ => None,
1324        })
1325        .unwrap_or_else(|| target_env.clone());
1326
1327    let assign_env = get_arg("assign.env", 3)
1328        .and_then(|v| match v {
1329            RValue::Environment(e) => Some(e.clone()),
1330            _ => None,
1331        })
1332        .unwrap_or_else(|| target_env.clone());
1333
1334    let promise = RValue::Promise(std::rc::Rc::new(std::cell::RefCell::new(RPromise::new(
1335        value_expr, eval_env,
1336    ))));
1337    assign_env.set(name, promise);
1338    Ok(RValue::Null)
1339}
1340
1341/// Create a vectorized version of a function.
1342///
1343/// Returns a new closure that calls `mapply(FUN, ...)` under the hood,
1344/// so scalar user-defined functions work element-wise on vector inputs.
1345///
1346/// @param FUN function to vectorize
1347/// @param vectorize.args character vector of argument names to vectorize over (default: all formals)
1348/// @param SIMPLIFY if TRUE (default), simplify the result
1349/// @return a new function that applies FUN element-wise
1350#[interpreter_builtin(name = "Vectorize", min_args = 1)]
1351fn interp_vectorize(
1352    positional: &[RValue],
1353    named: &[(String, RValue)],
1354    context: &BuiltinContext,
1355) -> Result<RValue, RError> {
1356    let env = context.env();
1357    let fun = match_fun(
1358        positional.first().ok_or_else(|| {
1359            RError::new(
1360                RErrorKind::Argument,
1361                "argument 'FUN' is missing".to_string(),
1362            )
1363        })?,
1364        env,
1365    )?;
1366
1367    let simplify = named
1368        .iter()
1369        .find(|(n, _)| n == "SIMPLIFY")
1370        .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
1371        .unwrap_or(true);
1372
1373    // Build a closure: function(...) mapply(FUN, ..., SIMPLIFY = <simplify>)
1374    // The FUN value is captured in the closure's environment.
1375    let closure_env = Environment::new_child(env);
1376    closure_env.set(".VEC_FUN".to_string(), fun);
1377    closure_env.set(
1378        ".VEC_SIMPLIFY".to_string(),
1379        RValue::vec(Vector::Logical(vec![Some(simplify)].into())),
1380    );
1381
1382    let body = Expr::Call {
1383        func: Box::new(Expr::Symbol("mapply".to_string())),
1384        span: None,
1385        args: vec![
1386            // FUN as first positional arg (mapply takes FUN as positional[0])
1387            Arg {
1388                name: None,
1389                value: Some(Expr::Symbol(".VEC_FUN".to_string())),
1390            },
1391            Arg {
1392                name: None,
1393                value: Some(Expr::Dots),
1394            },
1395            Arg {
1396                name: Some("SIMPLIFY".to_string()),
1397                value: Some(Expr::Symbol(".VEC_SIMPLIFY".to_string())),
1398            },
1399        ],
1400    };
1401
1402    let params = vec![Param {
1403        name: "...".to_string(),
1404        default: None,
1405        is_dots: true,
1406    }];
1407
1408    Ok(RValue::Function(RFunction::Closure {
1409        params,
1410        body,
1411        env: closure_env,
1412    }))
1413}
1414
1415/// Reduce a vector or list to a single value by applying a binary function.
1416///
1417/// @param f binary function taking two arguments
1418/// @param x vector or list to reduce
1419/// @param init optional initial value for the accumulator
1420/// @param accumulate if TRUE, return all intermediate results
1421/// @return the final accumulated value, or a list of intermediate values if accumulate=TRUE
1422#[interpreter_builtin(name = "Reduce", min_args = 2)]
1423fn interp_reduce(
1424    positional: &[RValue],
1425    named: &[(String, RValue)],
1426    context: &BuiltinContext,
1427) -> Result<RValue, RError> {
1428    if positional.len() < 2 {
1429        return Err(RError::new(
1430            RErrorKind::Argument,
1431            "Reduce requires at least 2 arguments".to_string(),
1432        ));
1433    }
1434    let env = context.env();
1435    let (_fail_fast, _extra_named) = extract_fail_fast(named);
1436    let f = match_fun(&positional[0], env)?;
1437    let x = &positional[1];
1438    let init = positional
1439        .get(2)
1440        .or_else(|| named.iter().find(|(n, _)| n == "init").map(|(_, v)| v));
1441    let accumulate = named
1442        .iter()
1443        .find(|(n, _)| n == "accumulate")
1444        .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
1445        .unwrap_or(false);
1446
1447    let items: Vec<RValue> = rvalue_to_items(x);
1448
1449    if items.is_empty() {
1450        return Ok(init.cloned().unwrap_or(RValue::Null));
1451    }
1452
1453    let (mut acc, start) = match init {
1454        Some(v) => (v.clone(), 0),
1455        None => (items[0].clone(), 1),
1456    };
1457
1458    let mut accum_results = if accumulate {
1459        vec![acc.clone()]
1460    } else {
1461        vec![]
1462    };
1463
1464    // Reduce is inherently sequential — each step depends on the previous.
1465    // fail_fast has no meaningful "collect errors" behavior here; errors always propagate.
1466    let env = context.env();
1467    context.with_interpreter(|interp| {
1468        for item in items.iter().skip(start) {
1469            acc = interp.call_function(&f, &[acc, item.clone()], &[], env)?;
1470            if accumulate {
1471                accum_results.push(acc.clone());
1472            }
1473        }
1474
1475        if accumulate {
1476            let values: Vec<(Option<String>, RValue)> =
1477                accum_results.into_iter().map(|v| (None, v)).collect();
1478            Ok(RValue::List(RList::new(values)))
1479        } else {
1480            Ok(acc)
1481        }
1482    })
1483}
1484
1485/// Select elements of a vector or list for which a predicate returns TRUE.
1486///
1487/// @param f predicate function returning a logical scalar
1488/// @param x vector or list to filter
1489/// @return elements of x for which f returns TRUE
1490#[interpreter_builtin(name = "Filter", min_args = 2)]
1491fn interp_filter(
1492    positional: &[RValue],
1493    named: &[(String, RValue)],
1494    context: &BuiltinContext,
1495) -> Result<RValue, RError> {
1496    if positional.len() < 2 {
1497        return Err(RError::new(
1498            RErrorKind::Argument,
1499            "Filter requires 2 arguments".to_string(),
1500        ));
1501    }
1502    let env = context.env();
1503    let (fail_fast, _extra_named) = extract_fail_fast(named);
1504    let f = match_fun(&positional[0], env)?;
1505    let x = &positional[1];
1506
1507    let items: Vec<RValue> = rvalue_to_items(x);
1508
1509    let mut results = Vec::new();
1510    context.with_interpreter(|interp| {
1511        for item in &items {
1512            if fail_fast {
1513                let keep = interp.call_function(&f, std::slice::from_ref(item), &[], env)?;
1514                if keep
1515                    .as_vector()
1516                    .and_then(|v| v.as_logical_scalar())
1517                    .unwrap_or(false)
1518                {
1519                    results.push(item.clone());
1520                }
1521            } else if let Ok(keep) = interp.call_function(&f, std::slice::from_ref(item), &[], env)
1522            {
1523                if keep
1524                    .as_vector()
1525                    .and_then(|v| v.as_logical_scalar())
1526                    .unwrap_or(false)
1527                {
1528                    results.push(item.clone());
1529                }
1530                // Errors are silently skipped (element excluded from results)
1531            }
1532        }
1533        Ok::<(), RError>(())
1534    })?;
1535
1536    match x {
1537        RValue::List(_) => {
1538            let values: Vec<(Option<String>, RValue)> =
1539                results.into_iter().map(|v| (None, v)).collect();
1540            Ok(RValue::List(RList::new(values)))
1541        }
1542        _ => {
1543            if results.is_empty() {
1544                Ok(RValue::Null)
1545            } else {
1546                crate::interpreter::builtins::builtin_c(&results, &[])
1547            }
1548        }
1549    }
1550}
1551
1552/// Apply a function to corresponding elements of multiple vectors or lists.
1553///
1554/// @param f function to apply
1555/// @param ... vectors or lists to map over in parallel
1556/// @return list of results
1557#[interpreter_builtin(name = "Map", min_args = 2)]
1558fn interp_map(
1559    positional: &[RValue],
1560    named: &[(String, RValue)],
1561    context: &BuiltinContext,
1562) -> Result<RValue, RError> {
1563    if positional.len() < 2 {
1564        return Err(RError::new(
1565            RErrorKind::Argument,
1566            "Map requires at least 2 arguments".to_string(),
1567        ));
1568    }
1569    let env = context.env();
1570    let (fail_fast, _extra_named) = extract_fail_fast(named);
1571    let f = match_fun(&positional[0], env)?;
1572
1573    let seqs: Vec<Vec<RValue>> = positional[1..].iter().map(rvalue_to_items).collect();
1574
1575    let max_len = seqs.iter().map(|s| s.len()).max().unwrap_or(0);
1576    let mut results = Vec::new();
1577
1578    context.with_interpreter(|interp| {
1579        for i in 0..max_len {
1580            let call_args: Vec<RValue> = seqs
1581                .iter()
1582                .map(|s| {
1583                    if s.is_empty() {
1584                        RValue::Null
1585                    } else {
1586                        s[i % s.len()].clone()
1587                    }
1588                })
1589                .collect();
1590            let result = if fail_fast {
1591                interp.call_function(&f, &call_args, &[], env)?
1592            } else {
1593                interp
1594                    .call_function(&f, &call_args, &[], env)
1595                    .unwrap_or(RValue::Null)
1596            };
1597            results.push((None, result));
1598        }
1599        Ok::<(), RError>(())
1600    })?;
1601
1602    Ok(RValue::List(RList::new(results)))
1603}
1604
1605// switch() moved to pre_eval.rs — must not eagerly evaluate all branches
1606
1607/// Look up a variable by name in an environment.
1608///
1609/// @param x character string giving the variable name
1610/// @param envir environment in which to look up the variable (default: calling environment)
1611/// @return the value bound to the name
1612#[interpreter_builtin(name = "get", min_args = 1)]
1613fn interp_get(
1614    positional: &[RValue],
1615    named: &[(String, RValue)],
1616    context: &BuiltinContext,
1617) -> Result<RValue, RError> {
1618    let env = context.env();
1619    let call_args = CallArgs::new(positional, named);
1620    let name = call_args.string("x", 0)?;
1621    let target_env = call_args.environment_or("envir", usize::MAX, env)?;
1622
1623    // Check for active bindings — these re-evaluate a function on every access
1624    if let Some(fun) = target_env.get_active_binding(&name) {
1625        return context
1626            .with_interpreter(|interp| interp.call_function(&fun, &[], &[], &target_env))
1627            .map_err(|flow| match flow {
1628                RFlow::Error(e) => e,
1629                other => RError::other(format!("{:?}", other)),
1630            });
1631    }
1632
1633    target_env
1634        .get(&name)
1635        .ok_or_else(|| RError::other(format!("object '{}' not found", name)))
1636}
1637
1638/// Like `get()` but returns a default value instead of erroring when not found.
1639///
1640/// @param x character string giving the variable name
1641/// @param envir environment to look in (default: calling environment)
1642/// @param ifnotfound value to return if `x` is not found (default: NULL)
1643/// @return the value of the variable, or `ifnotfound` if not present
1644#[interpreter_builtin(name = "get0", min_args = 1)]
1645fn interp_get0(
1646    positional: &[RValue],
1647    named: &[(String, RValue)],
1648    context: &BuiltinContext,
1649) -> Result<RValue, RError> {
1650    let env = context.env();
1651    let call_args = CallArgs::new(positional, named);
1652    let name = call_args.string("x", 0)?;
1653    let target_env = call_args.environment_or("envir", usize::MAX, env)?;
1654    let ifnotfound = call_args.value("ifnotfound", usize::MAX).cloned();
1655
1656    if let Some(fun) = target_env.get_active_binding(&name) {
1657        return context
1658            .with_interpreter(|interp| interp.call_function(&fun, &[], &[], &target_env))
1659            .map_err(|flow| match flow {
1660                RFlow::Error(e) => e,
1661                other => RError::other(format!("{:?}", other)),
1662            });
1663    }
1664
1665    Ok(target_env
1666        .get(&name)
1667        .unwrap_or_else(|| ifnotfound.unwrap_or(RValue::Null)))
1668}
1669
1670/// Assign a value to a variable name in an environment.
1671///
1672/// @param x character string giving the variable name
1673/// @param value the value to assign
1674/// @param envir environment in which to make the assignment (default: calling environment)
1675/// @return the assigned value, invisibly
1676#[interpreter_builtin(name = "assign", min_args = 2)]
1677fn interp_assign(
1678    positional: &[RValue],
1679    named: &[(String, RValue)],
1680    context: &BuiltinContext,
1681) -> Result<RValue, RError> {
1682    let env = context.env();
1683    let call_args = CallArgs::new(positional, named);
1684    let name = call_args.string("x", 0)?;
1685    let value = call_args.value("value", 1).cloned().unwrap_or(RValue::Null);
1686    let target_env = call_args.environment_or("envir", usize::MAX, env)?;
1687    target_env.set(name, value.clone());
1688    Ok(value)
1689}
1690
1691/// Test whether a variable exists in an environment.
1692///
1693/// @param x character string giving the variable name
1694/// @param envir environment to search in (default: calling environment)
1695/// @return TRUE if the variable exists, FALSE otherwise
1696#[interpreter_builtin(name = "exists", min_args = 1)]
1697fn interp_exists(
1698    positional: &[RValue],
1699    _named: &[(String, RValue)],
1700    context: &BuiltinContext,
1701) -> Result<RValue, RError> {
1702    let env = context.env();
1703    let call_args = CallArgs::new(positional, _named);
1704    let name = call_args.optional_string("x", 0).unwrap_or_default();
1705    let found = call_args
1706        .environment_or("envir", usize::MAX, env)?
1707        .get(&name)
1708        .is_some();
1709    Ok(RValue::vec(Vector::Logical(vec![Some(found)].into())))
1710}
1711
1712/// Read and evaluate an R source file.
1713///
1714/// @param file path to the R source file
1715/// @return the result of evaluating the last expression in the file
1716#[interpreter_builtin(name = "source", min_args = 1)]
1717fn interp_source(
1718    positional: &[RValue],
1719    _named: &[(String, RValue)],
1720    context: &BuiltinContext,
1721) -> Result<RValue, RError> {
1722    let path = positional
1723        .first()
1724        .and_then(|v| v.as_vector()?.as_character_scalar())
1725        .ok_or_else(|| RError::new(RErrorKind::Argument, "invalid 'file' argument".to_string()))?;
1726    let resolved_path = context.interpreter().resolve_path(&path);
1727    let display_path = resolved_path.to_string_lossy().to_string();
1728    let source = match std::fs::read_to_string(&resolved_path) {
1729        Ok(s) => s,
1730        Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
1731            let bytes = std::fs::read(&resolved_path).map_err(|e2| {
1732                RError::other(format!("cannot open file '{}': {}", display_path, e2))
1733            })?;
1734            String::from_utf8_lossy(&bytes).into_owned()
1735        }
1736        Err(e) => {
1737            return Err(RError::other(format!(
1738                "cannot open file '{}': {}",
1739                display_path, e
1740            )))
1741        }
1742    };
1743    let ast = crate::parser::parse_program(&source)
1744        .map_err(|e| RError::other(format!("parse error in '{}': {}", display_path, e)))?;
1745    context.with_interpreter(|interp| {
1746        interp
1747            .source_stack
1748            .borrow_mut()
1749            .push((display_path.clone(), source));
1750        let result = interp.eval(&ast).map_err(RError::from);
1751        interp.source_stack.borrow_mut().pop();
1752        result
1753    })
1754}
1755
1756/// Read and evaluate an R source file in a specified environment.
1757///
1758/// Like `source()`, but evaluates the expressions in the given environment
1759/// rather than the global environment. This is useful for loading code into
1760/// a specific namespace or local environment.
1761///
1762/// @param file path to the R source file
1763/// @param envir environment in which to evaluate (default: base environment)
1764/// @return the result of evaluating the last expression in the file (invisibly)
1765#[interpreter_builtin(name = "sys.source", min_args = 1)]
1766fn interp_sys_source(
1767    positional: &[RValue],
1768    named: &[(String, RValue)],
1769    context: &BuiltinContext,
1770) -> Result<RValue, RError> {
1771    let path = positional
1772        .first()
1773        .and_then(|v| v.as_vector()?.as_character_scalar())
1774        .ok_or_else(|| RError::new(RErrorKind::Argument, "invalid 'file' argument".to_string()))?;
1775    let resolved_path = context.interpreter().resolve_path(&path);
1776    let display_path = resolved_path.to_string_lossy().to_string();
1777
1778    // Get environment from named 'envir' argument or second positional
1779    let env = named
1780        .iter()
1781        .find(|(n, _)| n == "envir")
1782        .map(|(_, v)| v)
1783        .or_else(|| positional.get(1));
1784
1785    let source = match std::fs::read_to_string(&resolved_path) {
1786        Ok(s) => s,
1787        Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
1788            let bytes = std::fs::read(&resolved_path).map_err(|e2| {
1789                RError::other(format!("cannot open file '{}': {}", display_path, e2))
1790            })?;
1791            String::from_utf8_lossy(&bytes).into_owned()
1792        }
1793        Err(e) => {
1794            return Err(RError::other(format!(
1795                "cannot open file '{}': {}",
1796                display_path, e
1797            )))
1798        }
1799    };
1800
1801    let ast = crate::parser::parse_program(&source)
1802        .map_err(|e| RError::other(format!("parse error in '{}': {}", display_path, e)))?;
1803
1804    context.with_interpreter(|interp| {
1805        interp
1806            .source_stack
1807            .borrow_mut()
1808            .push((display_path.clone(), source));
1809        let result = match env {
1810            Some(RValue::Environment(target_env)) => {
1811                interp.eval_in(&ast, target_env).map_err(RError::from)
1812            }
1813            _ => interp.eval(&ast).map_err(RError::from),
1814        };
1815        interp.source_stack.borrow_mut().pop();
1816        result
1817    })
1818}
1819
1820// system.time() is in pre_eval.rs — it must time unevaluated expressions
1821
1822// --- Operator builtins: R operators as first-class functions ---
1823// These allow `Reduce("+", 1:10)`, `sapply(x, "-")`, `do.call("*", list(3,4))`, etc.
1824
1825fn eval_binop(op: BinaryOp, args: &[RValue], context: &BuiltinContext) -> Result<RValue, RError> {
1826    let left = args.first().cloned().unwrap_or(RValue::Null);
1827    let right = args.get(1).cloned().unwrap_or(RValue::Null);
1828    context
1829        .with_interpreter(|interp| interp.eval_binary(op, &left, &right, context.env()))
1830        .map_err(RError::from)
1831}
1832
1833/// Addition operator as a function (unary positive or binary addition).
1834///
1835/// @param e1 first operand (or sole operand for unary +)
1836/// @param e2 second operand (optional)
1837/// @return sum of e1 and e2, or e1 unchanged for unary +
1838#[interpreter_builtin(name = "+", min_args = 1)]
1839fn interp_op_add(
1840    args: &[RValue],
1841    _named: &[(String, RValue)],
1842    context: &BuiltinContext,
1843) -> Result<RValue, RError> {
1844    if args.len() == 1 {
1845        context
1846            .with_interpreter(|interp| interp.eval_unary(UnaryOp::Pos, &args[0]))
1847            .map_err(RError::from)
1848    } else {
1849        eval_binop(BinaryOp::Add, args, context)
1850    }
1851}
1852
1853/// Subtraction operator as a function (unary negation or binary subtraction).
1854///
1855/// @param e1 first operand (or sole operand for unary -)
1856/// @param e2 second operand (optional)
1857/// @return difference of e1 and e2, or negation of e1 for unary -
1858#[interpreter_builtin(name = "-", min_args = 1)]
1859fn interp_op_sub(
1860    args: &[RValue],
1861    _named: &[(String, RValue)],
1862    context: &BuiltinContext,
1863) -> Result<RValue, RError> {
1864    if args.len() == 1 {
1865        context
1866            .with_interpreter(|interp| interp.eval_unary(UnaryOp::Neg, &args[0]))
1867            .map_err(RError::from)
1868    } else {
1869        eval_binop(BinaryOp::Sub, args, context)
1870    }
1871}
1872
1873/// Multiplication operator as a function.
1874///
1875/// @param e1 first operand
1876/// @param e2 second operand
1877/// @return product of e1 and e2
1878#[interpreter_builtin(name = "*", min_args = 2)]
1879fn interp_op_mul(
1880    args: &[RValue],
1881    _named: &[(String, RValue)],
1882    context: &BuiltinContext,
1883) -> Result<RValue, RError> {
1884    eval_binop(BinaryOp::Mul, args, context)
1885}
1886
1887/// Division operator as a function.
1888///
1889/// @param e1 numerator
1890/// @param e2 denominator
1891/// @return quotient of e1 and e2
1892#[interpreter_builtin(name = "/", min_args = 2)]
1893fn interp_op_div(
1894    args: &[RValue],
1895    _named: &[(String, RValue)],
1896    context: &BuiltinContext,
1897) -> Result<RValue, RError> {
1898    eval_binop(BinaryOp::Div, args, context)
1899}
1900
1901/// Exponentiation operator as a function.
1902///
1903/// @param e1 base
1904/// @param e2 exponent
1905/// @return e1 raised to the power of e2
1906#[interpreter_builtin(name = "^", min_args = 2)]
1907fn interp_op_pow(
1908    args: &[RValue],
1909    _named: &[(String, RValue)],
1910    context: &BuiltinContext,
1911) -> Result<RValue, RError> {
1912    eval_binop(BinaryOp::Pow, args, context)
1913}
1914
1915/// Modulo operator as a function.
1916///
1917/// @param e1 dividend
1918/// @param e2 divisor
1919/// @return remainder of e1 divided by e2
1920#[interpreter_builtin(name = "%%", min_args = 2)]
1921fn interp_op_mod(
1922    args: &[RValue],
1923    _named: &[(String, RValue)],
1924    context: &BuiltinContext,
1925) -> Result<RValue, RError> {
1926    eval_binop(BinaryOp::Mod, args, context)
1927}
1928
1929/// Integer division operator as a function.
1930///
1931/// @param e1 dividend
1932/// @param e2 divisor
1933/// @return integer quotient of e1 divided by e2
1934#[interpreter_builtin(name = "%/%", min_args = 2)]
1935fn interp_op_intdiv(
1936    args: &[RValue],
1937    _named: &[(String, RValue)],
1938    context: &BuiltinContext,
1939) -> Result<RValue, RError> {
1940    eval_binop(BinaryOp::IntDiv, args, context)
1941}
1942
1943/// Equality comparison operator as a function.
1944///
1945/// @param e1 first operand
1946/// @param e2 second operand
1947/// @return logical vector indicating element-wise equality
1948#[interpreter_builtin(name = "==", min_args = 2)]
1949fn interp_op_eq(
1950    args: &[RValue],
1951    _named: &[(String, RValue)],
1952    context: &BuiltinContext,
1953) -> Result<RValue, RError> {
1954    eval_binop(BinaryOp::Eq, args, context)
1955}
1956
1957/// Inequality comparison operator as a function.
1958///
1959/// @param e1 first operand
1960/// @param e2 second operand
1961/// @return logical vector indicating element-wise inequality
1962#[interpreter_builtin(name = "!=", min_args = 2)]
1963fn interp_op_ne(
1964    args: &[RValue],
1965    _named: &[(String, RValue)],
1966    context: &BuiltinContext,
1967) -> Result<RValue, RError> {
1968    eval_binop(BinaryOp::Ne, args, context)
1969}
1970
1971/// Less-than comparison operator as a function.
1972///
1973/// @param e1 first operand
1974/// @param e2 second operand
1975/// @return logical vector indicating element-wise less-than
1976#[interpreter_builtin(name = "<", min_args = 2)]
1977fn interp_op_lt(
1978    args: &[RValue],
1979    _named: &[(String, RValue)],
1980    context: &BuiltinContext,
1981) -> Result<RValue, RError> {
1982    eval_binop(BinaryOp::Lt, args, context)
1983}
1984
1985/// Greater-than comparison operator as a function.
1986///
1987/// @param e1 first operand
1988/// @param e2 second operand
1989/// @return logical vector indicating element-wise greater-than
1990#[interpreter_builtin(name = ">", min_args = 2)]
1991fn interp_op_gt(
1992    args: &[RValue],
1993    _named: &[(String, RValue)],
1994    context: &BuiltinContext,
1995) -> Result<RValue, RError> {
1996    eval_binop(BinaryOp::Gt, args, context)
1997}
1998
1999/// Less-than-or-equal comparison operator as a function.
2000///
2001/// @param e1 first operand
2002/// @param e2 second operand
2003/// @return logical vector indicating element-wise less-than-or-equal
2004#[interpreter_builtin(name = "<=", min_args = 2)]
2005fn interp_op_le(
2006    args: &[RValue],
2007    _named: &[(String, RValue)],
2008    context: &BuiltinContext,
2009) -> Result<RValue, RError> {
2010    eval_binop(BinaryOp::Le, args, context)
2011}
2012
2013/// Greater-than-or-equal comparison operator as a function.
2014///
2015/// @param e1 first operand
2016/// @param e2 second operand
2017/// @return logical vector indicating element-wise greater-than-or-equal
2018#[interpreter_builtin(name = ">=", min_args = 2)]
2019fn interp_op_ge(
2020    args: &[RValue],
2021    _named: &[(String, RValue)],
2022    context: &BuiltinContext,
2023) -> Result<RValue, RError> {
2024    eval_binop(BinaryOp::Ge, args, context)
2025}
2026
2027/// Element-wise logical AND operator as a function.
2028///
2029/// @param e1 first logical operand
2030/// @param e2 second logical operand
2031/// @return logical vector of element-wise AND results
2032#[interpreter_builtin(name = "&", min_args = 2)]
2033fn interp_op_and(
2034    args: &[RValue],
2035    _named: &[(String, RValue)],
2036    context: &BuiltinContext,
2037) -> Result<RValue, RError> {
2038    eval_binop(BinaryOp::And, args, context)
2039}
2040
2041/// Element-wise logical OR operator as a function.
2042///
2043/// @param e1 first logical operand
2044/// @param e2 second logical operand
2045/// @return logical vector of element-wise OR results
2046#[interpreter_builtin(name = "|", min_args = 2)]
2047fn interp_op_or(
2048    args: &[RValue],
2049    _named: &[(String, RValue)],
2050    context: &BuiltinContext,
2051) -> Result<RValue, RError> {
2052    eval_binop(BinaryOp::Or, args, context)
2053}
2054
2055/// Logical NOT operator as a function.
2056///
2057/// @param x logical operand
2058/// @return logical vector of negated values
2059#[interpreter_builtin(name = "!", min_args = 1)]
2060fn interp_op_not(
2061    args: &[RValue],
2062    _named: &[(String, RValue)],
2063    context: &BuiltinContext,
2064) -> Result<RValue, RError> {
2065    context
2066        .with_interpreter(|interp| interp.eval_unary(UnaryOp::Not, &args[0]))
2067        .map_err(RError::from)
2068}
2069
2070/// Convert an RValue to a Vec of individual items (for apply/map/filter/reduce).
2071fn rvalue_to_items(x: &RValue) -> Vec<RValue> {
2072    match x {
2073        RValue::Vector(v) => match &v.inner {
2074            Vector::Raw(vals) => vals
2075                .iter()
2076                .map(|&x| RValue::vec(Vector::Raw(vec![x])))
2077                .collect(),
2078            Vector::Double(vals) => vals
2079                .iter_opt()
2080                .map(|x| RValue::vec(Vector::Double(vec![x].into())))
2081                .collect(),
2082            Vector::Integer(vals) => vals
2083                .iter_opt()
2084                .map(|x| RValue::vec(Vector::Integer(vec![x].into())))
2085                .collect(),
2086            Vector::Complex(vals) => vals
2087                .iter()
2088                .map(|x| RValue::vec(Vector::Complex(vec![*x].into())))
2089                .collect(),
2090            Vector::Character(vals) => vals
2091                .iter()
2092                .map(|x| RValue::vec(Vector::Character(vec![x.clone()].into())))
2093                .collect(),
2094            Vector::Logical(vals) => vals
2095                .iter()
2096                .map(|x| RValue::vec(Vector::Logical(vec![*x].into())))
2097                .collect(),
2098        },
2099        RValue::List(l) => l.values.iter().map(|(_, v)| v.clone()).collect(),
2100        _ => vec![x.clone()],
2101    }
2102}
2103
2104/// Invoke the next method in an S3 method dispatch chain.
2105///
2106/// @param generic character string naming the generic (optional, inferred from context)
2107/// @param object the object being dispatched on (optional, inferred from context)
2108/// @return the result of calling the next method
2109#[interpreter_builtin(name = "NextMethod")]
2110fn interp_next_method(
2111    positional: &[RValue],
2112    named: &[(String, RValue)],
2113    context: &BuiltinContext,
2114) -> Result<RValue, RError> {
2115    let env = context.env();
2116    context
2117        .with_interpreter(|interp| interp.dispatch_next_method(positional, named, env))
2118        .map_err(RError::from)
2119}
2120
2121/// Get or query the environment of a function.
2122///
2123/// @param fun function whose environment to return (optional; returns calling env if omitted)
2124/// @return the environment of fun, or the calling environment if no argument given
2125#[interpreter_builtin(name = "environment")]
2126fn interp_environment(
2127    positional: &[RValue],
2128    _named: &[(String, RValue)],
2129    context: &BuiltinContext,
2130) -> Result<RValue, RError> {
2131    match positional.first() {
2132        Some(RValue::Function(RFunction::Closure { env, .. })) => {
2133            Ok(RValue::Environment(env.clone()))
2134        }
2135        Some(_) => Ok(RValue::Null),
2136        // No args: return the current (calling) environment
2137        None => Ok(RValue::Environment(context.env().clone())),
2138    }
2139}
2140
2141/// Coerce a value to an environment.
2142///
2143/// @param x integer (search path position), string (environment name), or environment
2144/// @return the corresponding environment
2145#[interpreter_builtin(name = "as.environment", min_args = 1)]
2146fn interp_as_environment(
2147    positional: &[RValue],
2148    _named: &[(String, RValue)],
2149    context: &BuiltinContext,
2150) -> Result<RValue, RError> {
2151    let x = positional
2152        .first()
2153        .ok_or_else(|| RError::new(RErrorKind::Argument, "argument 'x' is missing".to_string()))?;
2154
2155    match x {
2156        RValue::Environment(_) => Ok(x.clone()),
2157        RValue::Vector(rv) => {
2158            if let Some(n) = rv.as_integer_scalar() {
2159                return context.with_interpreter(|interp| {
2160                    match n {
2161                    1 => Ok(RValue::Environment(interp.global_env.clone())),
2162                    -1 => {
2163                        let base = interp
2164                            .global_env
2165                            .parent()
2166                            .unwrap_or_else(|| interp.global_env.clone());
2167                        Ok(RValue::Environment(base))
2168                    }
2169                    _ => Err(RError::new(RErrorKind::Argument, format!(
2170                        "as.environment({}): only search path positions 1 (global) and -1 (base) are currently supported",
2171                        n
2172                    ))),
2173                }
2174                });
2175            }
2176            if let Some(s) = rv.as_character_scalar() {
2177                return context.with_interpreter(|interp| match s.as_str() {
2178                    ".GlobalEnv" | "R_GlobalEnv" => {
2179                        Ok(RValue::Environment(interp.global_env.clone()))
2180                    }
2181                    "package:base" => {
2182                        let base = interp
2183                            .global_env
2184                            .parent()
2185                            .unwrap_or_else(|| interp.global_env.clone());
2186                        Ok(RValue::Environment(base))
2187                    }
2188                    _ => Err(RError::new(
2189                        RErrorKind::Argument,
2190                        format!(
2191                        "no environment called '{}' was found. Use '.GlobalEnv' or 'package:base'",
2192                        s
2193                    ),
2194                    )),
2195                });
2196            }
2197            Err(RError::new(
2198                RErrorKind::Argument,
2199                format!(
2200                "cannot coerce {} to an environment — expected a number, string, or environment",
2201                x.type_name()
2202            ),
2203            ))
2204        }
2205        _ => Err(RError::new(
2206            RErrorKind::Argument,
2207            format!(
2208                "cannot coerce {} to an environment — expected a number, string, or environment",
2209                x.type_name()
2210            ),
2211        )),
2212    }
2213}
2214
2215/// Return the global environment.
2216///
2217/// @return the global environment
2218#[interpreter_builtin(name = "globalenv", max_args = 0)]
2219fn interp_globalenv(
2220    _positional: &[RValue],
2221    _named: &[(String, RValue)],
2222    context: &BuiltinContext,
2223) -> Result<RValue, RError> {
2224    context.with_interpreter(|interp| Ok(RValue::Environment(interp.global_env.clone())))
2225}
2226
2227/// Return the base environment.
2228///
2229/// @return the base environment (parent of the global environment)
2230#[interpreter_builtin(name = "baseenv", max_args = 0)]
2231fn interp_baseenv(
2232    _positional: &[RValue],
2233    _named: &[(String, RValue)],
2234    context: &BuiltinContext,
2235) -> Result<RValue, RError> {
2236    context.with_interpreter(|interp| {
2237        Ok(RValue::Environment(
2238            interp
2239                .global_env
2240                .parent()
2241                .unwrap_or_else(|| interp.global_env.clone()),
2242        ))
2243    })
2244}
2245
2246/// Return the empty environment (has no parent and no bindings).
2247///
2248/// @return the empty environment
2249#[interpreter_builtin(name = "emptyenv", max_args = 0)]
2250fn interp_emptyenv(
2251    _positional: &[RValue],
2252    _named: &[(String, RValue)],
2253    _context: &BuiltinContext,
2254) -> Result<RValue, RError> {
2255    Ok(RValue::Environment(Environment::new_empty()))
2256}
2257
2258/// Get the call expression of a frame on the call stack.
2259///
2260/// @param which frame number (0 = current, positive = counting from bottom)
2261/// @return the call as a language object, or NULL
2262#[interpreter_builtin(name = "sys.call", max_args = 1)]
2263fn interp_sys_call(
2264    positional: &[RValue],
2265    _named: &[(String, RValue)],
2266    context: &BuiltinContext,
2267) -> Result<RValue, RError> {
2268    let which = optional_frame_index(positional, 0)?;
2269    context.with_interpreter(|interp| {
2270        if which == 0 {
2271            return Ok(language_or_null(interp.current_call_expr()));
2272        }
2273
2274        if which < 0 {
2275            return Err(RError::other(
2276                "negative frame indices are not yet supported",
2277            ));
2278        }
2279
2280        let which = usize::try_from(which).map_err(RError::from)?;
2281        let frame = interp
2282            .call_frame(which)
2283            .ok_or_else(|| RError::other("not that many frames on the stack"))?;
2284        Ok(language_or_null(frame.call))
2285    })
2286}
2287
2288/// Get the function of a frame on the call stack.
2289///
2290/// @param which frame number (0 = current, positive = counting from bottom)
2291/// @return the function object for the given frame
2292#[interpreter_builtin(name = "sys.function", max_args = 1)]
2293fn interp_sys_function(
2294    positional: &[RValue],
2295    _named: &[(String, RValue)],
2296    context: &BuiltinContext,
2297) -> Result<RValue, RError> {
2298    let which = optional_frame_index(positional, 0)?;
2299    context.with_interpreter(|interp| {
2300        if which == 0 {
2301            return interp
2302                .current_call_frame()
2303                .map(|frame| frame.function)
2304                .ok_or_else(|| RError::other("not that many frames on the stack"));
2305        }
2306
2307        if which < 0 {
2308            return Err(RError::other(
2309                "negative frame indices are not yet supported",
2310            ));
2311        }
2312
2313        let which = usize::try_from(which).map_err(RError::from)?;
2314        interp
2315            .call_frame(which)
2316            .map(|frame| frame.function)
2317            .ok_or_else(|| RError::other("not that many frames on the stack"))
2318    })
2319}
2320
2321/// Get the environment of a frame on the call stack.
2322///
2323/// @param which frame number (0 = global env, positive = counting from bottom)
2324/// @return the environment for the given frame
2325#[interpreter_builtin(name = "sys.frame", max_args = 1)]
2326fn interp_sys_frame(
2327    positional: &[RValue],
2328    _named: &[(String, RValue)],
2329    context: &BuiltinContext,
2330) -> Result<RValue, RError> {
2331    let which = optional_frame_index(positional, 0)?;
2332    context.with_interpreter(|interp| {
2333        if which == 0 {
2334            return Ok(RValue::Environment(interp.global_env.clone()));
2335        }
2336
2337        if which < 0 {
2338            return Err(RError::other(
2339                "negative frame indices are not yet supported",
2340            ));
2341        }
2342
2343        let which = usize::try_from(which).map_err(RError::from)?;
2344        interp
2345            .call_frame(which)
2346            .map(|frame| RValue::Environment(frame.env))
2347            .ok_or_else(|| RError::other("not that many frames on the stack"))
2348    })
2349}
2350
2351/// Get the list of all calls on the call stack.
2352///
2353/// @return list of call language objects for all active frames
2354#[interpreter_builtin(name = "sys.calls", max_args = 0)]
2355fn interp_sys_calls(
2356    _positional: &[RValue],
2357    _named: &[(String, RValue)],
2358    context: &BuiltinContext,
2359) -> Result<RValue, RError> {
2360    context.with_interpreter(|interp| {
2361        let values = interp
2362            .call_frames()
2363            .into_iter()
2364            .map(|frame| (None, language_or_null(frame.call)))
2365            .collect();
2366        Ok(RValue::List(RList::new(values)))
2367    })
2368}
2369
2370/// Get the list of all environments on the call stack.
2371///
2372/// @return list of environments for all active frames
2373#[interpreter_builtin(name = "sys.frames", max_args = 0)]
2374fn interp_sys_frames(
2375    _positional: &[RValue],
2376    _named: &[(String, RValue)],
2377    context: &BuiltinContext,
2378) -> Result<RValue, RError> {
2379    context.with_interpreter(|interp| {
2380        let values = interp
2381            .call_frames()
2382            .into_iter()
2383            .map(|frame| (None, RValue::Environment(frame.env)))
2384            .collect();
2385        Ok(RValue::List(RList::new(values)))
2386    })
2387}
2388
2389/// Get the parent frame indices for all frames on the call stack.
2390///
2391/// @return integer vector of parent frame indices
2392#[interpreter_builtin(name = "sys.parents", max_args = 0)]
2393fn interp_sys_parents(
2394    _positional: &[RValue],
2395    _named: &[(String, RValue)],
2396    context: &BuiltinContext,
2397) -> Result<RValue, RError> {
2398    context.with_interpreter(|interp| {
2399        let len = interp.call_frames().len();
2400        let parents: Vec<Option<i64>> = (0..len)
2401            .map(|i| i64::try_from(i).map(Some))
2402            .collect::<Result<_, _>>()
2403            .map_err(RError::from)?;
2404        Ok(RValue::vec(Vector::Integer(parents.into())))
2405    })
2406}
2407
2408/// Get the on.exit expression for the current frame.
2409///
2410/// @return the on.exit expression as a language object, or NULL if none
2411#[interpreter_builtin(name = "sys.on.exit", max_args = 0)]
2412fn interp_sys_on_exit(
2413    _positional: &[RValue],
2414    _named: &[(String, RValue)],
2415    context: &BuiltinContext,
2416) -> Result<RValue, RError> {
2417    context.with_interpreter(|interp| {
2418        let frame = match interp.current_call_frame() {
2419            Some(frame) => frame,
2420            None => return Ok(RValue::Null),
2421        };
2422
2423        let exprs = frame.env.peek_on_exit();
2424        match exprs.len() {
2425            0 => Ok(RValue::Null),
2426            1 => Ok(RValue::Language(Language::new(exprs[0].clone()))),
2427            _ => Ok(RValue::Language(Language::new(
2428                crate::parser::ast::Expr::Block(exprs),
2429            ))),
2430        }
2431    })
2432}
2433
2434/// Get the number of frames on the call stack.
2435///
2436/// @return integer giving the current stack depth
2437#[interpreter_builtin(name = "sys.nframe", max_args = 0)]
2438fn interp_sys_nframe(
2439    _positional: &[RValue],
2440    _named: &[(String, RValue)],
2441    context: &BuiltinContext,
2442) -> Result<RValue, RError> {
2443    context.with_interpreter(|interp| {
2444        let len = i64::try_from(interp.call_frames().len()).map_err(RError::from)?;
2445        Ok(RValue::vec(Vector::Integer(vec![Some(len)].into())))
2446    })
2447}
2448
2449/// Get the number of arguments supplied to the current function call.
2450///
2451/// @return integer giving the number of supplied arguments
2452#[interpreter_builtin(name = "nargs", max_args = 0)]
2453fn interp_nargs(
2454    _positional: &[RValue],
2455    _named: &[(String, RValue)],
2456    context: &BuiltinContext,
2457) -> Result<RValue, RError> {
2458    context.with_interpreter(|interp| {
2459        let count = interp
2460            .current_call_frame()
2461            .map(|frame| frame.supplied_arg_count)
2462            .unwrap_or(0);
2463        Ok(RValue::vec(Vector::Integer(
2464            vec![Some(i64::try_from(count).map_err(RError::from)?)].into(),
2465        )))
2466    })
2467}
2468
2469/// Recursively call the current function with new arguments.
2470///
2471/// `Recall(...)` finds the currently executing user function from the call stack
2472/// and calls it again with the supplied arguments. This is useful for anonymous
2473/// recursive functions that don't have a name to call themselves by.
2474///
2475/// @param ... arguments to pass to the recursive call
2476/// @return the result of calling the current function with the new arguments
2477#[interpreter_builtin(name = "Recall")]
2478fn interp_recall(
2479    positional: &[RValue],
2480    named: &[(String, RValue)],
2481    context: &BuiltinContext,
2482) -> Result<RValue, RError> {
2483    let env = context.env();
2484    context.with_interpreter(|interp| {
2485        let frame = interp.current_call_frame().ok_or_else(|| {
2486            RError::other(
2487                "Recall() called from outside a function. \
2488                 Recall() can only be used inside a function body to recursively \
2489                 call the current function.",
2490            )
2491        })?;
2492        interp
2493            .call_function(&frame.function, positional, named, env)
2494            .map_err(RError::from)
2495    })
2496}
2497
2498/// Get the environment of the parent (calling) frame.
2499///
2500/// @param n number of generations to go back (default 1)
2501/// @return the environment of the n-th parent frame
2502#[interpreter_builtin(name = "parent.frame", max_args = 1)]
2503fn interp_parent_frame(
2504    positional: &[RValue],
2505    _named: &[(String, RValue)],
2506    context: &BuiltinContext,
2507) -> Result<RValue, RError> {
2508    let n = optional_frame_index(positional, 1)?;
2509    if n <= 0 {
2510        return Err(RError::new(
2511            RErrorKind::Argument,
2512            "invalid 'n' value".to_string(),
2513        ));
2514    }
2515
2516    context.with_interpreter(|interp| {
2517        let depth = interp.call_frames().len();
2518        let n = usize::try_from(n).map_err(RError::from)?;
2519        if n >= depth {
2520            return Ok(RValue::Environment(interp.global_env.clone()));
2521        }
2522
2523        let target = depth - n;
2524        interp
2525            .call_frame(target)
2526            .map(|frame| RValue::Environment(frame.env))
2527            .ok_or_else(|| RError::other("not that many frames on the stack"))
2528    })
2529}
2530
2531/// List the names of objects in an environment.
2532///
2533/// @param envir environment to list (default: calling environment)
2534/// @return character vector of variable names
2535#[interpreter_builtin(name = "ls", names = ["objects"])]
2536fn interp_ls(
2537    positional: &[RValue],
2538    named: &[(String, RValue)],
2539    context: &BuiltinContext,
2540) -> Result<RValue, RError> {
2541    let env = context.env();
2542    let target_env = CallArgs::new(positional, named).environment_or("envir", 0, env)?;
2543
2544    let names = target_env.ls();
2545    let chars: Vec<Option<String>> = names.into_iter().map(Some).collect();
2546    Ok(RValue::vec(Vector::Character(chars.into())))
2547}
2548
2549// rm() / remove() is implemented as a pre_eval builtin in pre_eval.rs
2550// to support NSE (bare symbol names like `rm(x)` instead of `rm("x")`)
2551
2552/// Lock an environment so no new bindings can be added.
2553///
2554/// @param env environment to lock
2555/// @param bindings if TRUE, also lock all existing bindings (default FALSE)
2556/// @return NULL (invisibly)
2557#[interpreter_builtin(name = "lockEnvironment", min_args = 1)]
2558fn interp_lock_environment(
2559    positional: &[RValue],
2560    named: &[(String, RValue)],
2561    _context: &BuiltinContext,
2562) -> Result<RValue, RError> {
2563    let env = match positional.first() {
2564        Some(RValue::Environment(e)) => e.clone(),
2565        _ => {
2566            return Err(RError::new(
2567                RErrorKind::Argument,
2568                "not an environment".to_string(),
2569            ))
2570        }
2571    };
2572
2573    let call_args = CallArgs::new(positional, named);
2574    let bindings = call_args.logical_flag("bindings", 1, false);
2575    env.lock(bindings);
2576    Ok(RValue::Null)
2577}
2578
2579/// Check whether an environment is locked.
2580///
2581/// @param env environment to query
2582/// @return logical scalar: TRUE if locked, FALSE otherwise
2583#[interpreter_builtin(name = "environmentIsLocked", min_args = 1)]
2584fn interp_environment_is_locked(
2585    positional: &[RValue],
2586    _named: &[(String, RValue)],
2587    _context: &BuiltinContext,
2588) -> Result<RValue, RError> {
2589    let locked = match positional.first() {
2590        Some(RValue::Environment(e)) => e.is_locked(),
2591        _ => {
2592            return Err(RError::new(
2593                RErrorKind::Argument,
2594                "not an environment".to_string(),
2595            ))
2596        }
2597    };
2598    Ok(RValue::vec(Vector::Logical(vec![Some(locked)].into())))
2599}
2600
2601/// Lock a specific binding in an environment.
2602///
2603/// @param sym name of the binding to lock (character string)
2604/// @param env environment containing the binding
2605/// @return NULL (invisibly)
2606#[interpreter_builtin(name = "lockBinding", min_args = 2)]
2607fn interp_lock_binding(
2608    positional: &[RValue],
2609    _named: &[(String, RValue)],
2610    _context: &BuiltinContext,
2611) -> Result<RValue, RError> {
2612    let sym = positional
2613        .first()
2614        .and_then(|v| v.as_vector()?.as_character_scalar())
2615        .ok_or_else(|| RError::new(RErrorKind::Argument, "not a valid symbol name".to_string()))?;
2616    let env = match positional.get(1) {
2617        Some(RValue::Environment(e)) => e.clone(),
2618        _ => {
2619            return Err(RError::new(
2620                RErrorKind::Argument,
2621                "not an environment".to_string(),
2622            ))
2623        }
2624    };
2625    env.lock_binding(&sym);
2626    Ok(RValue::Null)
2627}
2628
2629/// Check whether a binding is locked in an environment.
2630///
2631/// @param sym name of the binding to check (character string)
2632/// @param env environment containing the binding
2633/// @return logical scalar: TRUE if the binding is locked, FALSE otherwise
2634#[interpreter_builtin(name = "bindingIsLocked", min_args = 2)]
2635fn interp_binding_is_locked(
2636    positional: &[RValue],
2637    _named: &[(String, RValue)],
2638    _context: &BuiltinContext,
2639) -> Result<RValue, RError> {
2640    let sym = positional
2641        .first()
2642        .and_then(|v| v.as_vector()?.as_character_scalar())
2643        .ok_or_else(|| RError::new(RErrorKind::Argument, "not a valid symbol name".to_string()))?;
2644    let locked = match positional.get(1) {
2645        Some(RValue::Environment(e)) => e.binding_is_locked(&sym),
2646        _ => {
2647            return Err(RError::new(
2648                RErrorKind::Argument,
2649                "not an environment".to_string(),
2650            ))
2651        }
2652    };
2653    Ok(RValue::vec(Vector::Logical(vec![Some(locked)].into())))
2654}
2655
2656/// Create an active binding.
2657///
2658/// Active bindings call a function every time they are accessed.
2659/// The function `fun` is stored in the environment and re-evaluated
2660/// on every read of `sym`.
2661///
2662/// @param sym name for the binding (character string)
2663/// @param fun zero-argument function to call on access
2664/// @param env environment in which to create the binding
2665/// @return NULL (invisibly)
2666#[interpreter_builtin(name = "makeActiveBinding", min_args = 3)]
2667fn interp_make_active_binding(
2668    positional: &[RValue],
2669    _named: &[(String, RValue)],
2670    _context: &BuiltinContext,
2671) -> Result<RValue, RError> {
2672    let sym = positional
2673        .first()
2674        .and_then(|v| v.as_vector()?.as_character_scalar())
2675        .ok_or_else(|| RError::new(RErrorKind::Argument, "not a valid symbol name".to_string()))?;
2676    let fun = positional.get(1).ok_or_else(|| {
2677        RError::new(
2678            RErrorKind::Argument,
2679            "'fun' argument is missing".to_string(),
2680        )
2681    })?;
2682    let env = match positional.get(2) {
2683        Some(RValue::Environment(e)) => e.clone(),
2684        _ => {
2685            return Err(RError::new(
2686                RErrorKind::Argument,
2687                "not an environment".to_string(),
2688            ))
2689        }
2690    };
2691
2692    env.set_active_binding(sym, fun.clone());
2693    Ok(RValue::Null)
2694}
2695
2696/// Check whether a binding is an active binding.
2697///
2698/// @param sym name of the binding (character string)
2699/// @param env environment in which to check
2700/// @return logical scalar
2701#[interpreter_builtin(name = "isActiveBinding", min_args = 2)]
2702fn interp_is_active_binding(
2703    positional: &[RValue],
2704    _named: &[(String, RValue)],
2705    _context: &BuiltinContext,
2706) -> Result<RValue, RError> {
2707    let sym = positional
2708        .first()
2709        .and_then(|v| v.as_vector()?.as_character_scalar())
2710        .ok_or_else(|| RError::new(RErrorKind::Argument, "not a valid symbol name".to_string()))?;
2711    let env = match positional.get(1) {
2712        Some(RValue::Environment(e)) => e.clone(),
2713        _ => {
2714            return Err(RError::new(
2715                RErrorKind::Argument,
2716                "not an environment".to_string(),
2717            ))
2718        }
2719    };
2720
2721    let is_active = env.is_local_active_binding(&sym);
2722    Ok(RValue::vec(Vector::Logical(vec![Some(is_active)].into())))
2723}
2724
2725/// Evaluate an expression in a specified environment.
2726///
2727/// @param expr expression to evaluate (language object or character string)
2728/// @param envir environment in which to evaluate (default: calling environment)
2729/// @return the result of evaluating expr
2730#[interpreter_builtin(name = "eval", min_args = 1)]
2731fn interp_eval(
2732    positional: &[RValue],
2733    named: &[(String, RValue)],
2734    context: &BuiltinContext,
2735) -> Result<RValue, RError> {
2736    let env = context.env();
2737    let call_args = CallArgs::new(positional, named);
2738    let expr = positional.first().ok_or_else(|| {
2739        RError::new(
2740            RErrorKind::Argument,
2741            "argument 'expr' is missing".to_string(),
2742        )
2743    })?;
2744
2745    let eval_env = call_args.environment_or("envir", 1, env)?;
2746
2747    match expr {
2748        // Language object: evaluate the AST
2749        RValue::Language(ast) => context
2750            .with_interpreter(|interp| interp.eval_in(ast, &eval_env))
2751            .map_err(RError::from),
2752        // Character string: parse then eval
2753        RValue::Vector(rv) if matches!(rv.inner, Vector::Character(_)) => {
2754            let text = rv.as_character_scalar().unwrap_or_default();
2755            let parsed = crate::parser::parse_program(&text)
2756                .map_err(|e| RError::new(RErrorKind::Parse, format!("{}", e)))?;
2757            context
2758                .with_interpreter(|interp| interp.eval_in(&parsed, &eval_env))
2759                .map_err(RError::from)
2760        }
2761        // Already evaluated value: return as-is
2762        _ => Ok(expr.clone()),
2763    }
2764}
2765
2766/// Parse R source text into a language object.
2767///
2768/// @param text character string containing R code to parse
2769/// @return a language object representing the parsed expression
2770#[interpreter_builtin(name = "parse", min_args = 0)]
2771fn interp_parse(
2772    positional: &[RValue],
2773    named: &[(String, RValue)],
2774    _context: &BuiltinContext,
2775) -> Result<RValue, RError> {
2776    let text = named
2777        .iter()
2778        .find(|(n, _)| n == "text")
2779        .and_then(|(_, v)| v.as_vector()?.as_character_scalar())
2780        .or_else(|| {
2781            positional
2782                .first()
2783                .and_then(|v| v.as_vector()?.as_character_scalar())
2784        })
2785        .ok_or_else(|| {
2786            RError::new(
2787                RErrorKind::Argument,
2788                "argument 'text' is missing".to_string(),
2789            )
2790        })?;
2791
2792    let parsed = crate::parser::parse_program(&text)
2793        .map_err(|e| RError::new(RErrorKind::Parse, format!("{}", e)))?;
2794    Ok(RValue::Language(Language::new(parsed)))
2795}
2796
2797// --- apply family: apply, mapply, tapply, by ---
2798
2799/// Apply a function over rows or columns of a matrix.
2800///
2801/// @param X matrix or array
2802/// @param MARGIN 1 for rows, 2 for columns
2803/// @param FUN function to apply
2804/// @param ... additional arguments passed to FUN
2805/// @return vector, matrix, or list of results
2806#[interpreter_builtin(name = "apply", min_args = 3)]
2807fn interp_apply(
2808    positional: &[RValue],
2809    named: &[(String, RValue)],
2810    context: &BuiltinContext,
2811) -> Result<RValue, RError> {
2812    let env = context.env();
2813    let (fail_fast, extra_named) = extract_fail_fast(named);
2814    let x = positional
2815        .first()
2816        .ok_or_else(|| RError::new(RErrorKind::Argument, "argument 'X' is missing".to_string()))?;
2817    let margin_val = positional.get(1).ok_or_else(|| {
2818        RError::new(
2819            RErrorKind::Argument,
2820            "argument 'MARGIN' is missing".to_string(),
2821        )
2822    })?;
2823    let fun = match_fun(
2824        positional.get(2).ok_or_else(|| {
2825            RError::new(
2826                RErrorKind::Argument,
2827                "argument 'FUN' is missing".to_string(),
2828            )
2829        })?,
2830        env,
2831    )?;
2832
2833    let margin = margin_val
2834        .as_vector()
2835        .and_then(|v| v.as_integer_scalar())
2836        .ok_or_else(|| {
2837            RError::new(
2838                RErrorKind::Argument,
2839                "MARGIN must be 1 (rows) or 2 (columns) — got a non-integer value".to_string(),
2840            )
2841        })?;
2842
2843    // Extract dim attribute — X must be a matrix
2844    let (nrow, ncol, vec_inner) = match x {
2845        RValue::Vector(rv) => {
2846            let dims = super::get_dim_ints(rv.get_attr("dim")).ok_or_else(|| {
2847                RError::new(
2848                    RErrorKind::Argument,
2849                    "X must have a 'dim' attribute (i.e. be a matrix or array). \
2850                     Use matrix() to create one."
2851                        .to_string(),
2852                )
2853            })?;
2854            if dims.len() < 2 {
2855                return Err(RError::new(
2856                    RErrorKind::Argument,
2857                    "X must be a 2D matrix for apply() — got an array with fewer than 2 dimensions"
2858                        .to_string(),
2859                ));
2860            }
2861            let nr = usize::try_from(dims[0].unwrap_or(0))?;
2862            let nc = usize::try_from(dims[1].unwrap_or(0))?;
2863            (nr, nc, &rv.inner)
2864        }
2865        _ => {
2866            return Err(RError::new(
2867                RErrorKind::Argument,
2868                "apply() requires a matrix (vector with dim attribute) as the first argument"
2869                    .to_string(),
2870            ))
2871        }
2872    };
2873
2874    // Extra args to pass to FUN (positional args beyond the first 3)
2875    let extra_args: Vec<RValue> = positional.iter().skip(3).cloned().collect();
2876
2877    match margin {
2878        1 => {
2879            // Apply FUN to each row — extract row indices preserving original type
2880            let mut results: Vec<RValue> = Vec::with_capacity(nrow);
2881            context.with_interpreter(|interp| {
2882                for i in 0..nrow {
2883                    // Column-major: element (i, j) is at index i + j * nrow
2884                    let indices: Vec<usize> = (0..ncol).map(|j| i + j * nrow).collect();
2885                    let row_vec = vec_inner.select_indices(&indices);
2886                    let row_val = RValue::vec(row_vec);
2887                    let mut call_args = vec![row_val];
2888                    call_args.extend(extra_args.iter().cloned());
2889                    if fail_fast {
2890                        let result = interp.call_function(&fun, &call_args, &extra_named, env)?;
2891                        results.push(result);
2892                    } else {
2893                        match interp.call_function(&fun, &call_args, &extra_named, env) {
2894                            Ok(result) => results.push(result),
2895                            Err(_) => results.push(RValue::Null),
2896                        }
2897                    }
2898                }
2899                Ok::<(), RError>(())
2900            })?;
2901            simplify_apply_results(results)
2902        }
2903        2 => {
2904            // Apply FUN to each column — extract column indices preserving original type
2905            let mut results: Vec<RValue> = Vec::with_capacity(ncol);
2906            context.with_interpreter(|interp| {
2907                for j in 0..ncol {
2908                    // Column-major: column j starts at j * nrow
2909                    let indices: Vec<usize> = (0..nrow).map(|i| i + j * nrow).collect();
2910                    let col_vec = vec_inner.select_indices(&indices);
2911                    let col_val = RValue::vec(col_vec);
2912                    let mut call_args = vec![col_val];
2913                    call_args.extend(extra_args.iter().cloned());
2914                    if fail_fast {
2915                        let result = interp.call_function(&fun, &call_args, &extra_named, env)?;
2916                        results.push(result);
2917                    } else {
2918                        match interp.call_function(&fun, &call_args, &extra_named, env) {
2919                            Ok(result) => results.push(result),
2920                            Err(_) => results.push(RValue::Null),
2921                        }
2922                    }
2923                }
2924                Ok::<(), RError>(())
2925            })?;
2926            simplify_apply_results(results)
2927        }
2928        _ => Err(RError::new(
2929            RErrorKind::Argument,
2930            format!(
2931                "MARGIN must be 1 (rows) or 2 (columns) — got {}. \
2932             Higher-dimensional margins are not yet supported.",
2933                margin
2934            ),
2935        )),
2936    }
2937}
2938
2939/// Simplify apply() results: if all results are scalars, return a vector;
2940/// if all are equal-length vectors, return a matrix; otherwise return a list.
2941fn simplify_apply_results(results: Vec<RValue>) -> Result<RValue, RError> {
2942    if results.is_empty() {
2943        return Ok(RValue::List(RList::new(vec![])));
2944    }
2945
2946    // Check if all results are scalar
2947    let all_scalar = results.iter().all(|r| r.length() == 1);
2948    if all_scalar {
2949        let first_type = results[0].type_name();
2950        let all_same = results.iter().all(|r| r.type_name() == first_type);
2951        if all_same {
2952            match first_type {
2953                "double" => {
2954                    let vals: Vec<Option<f64>> = results
2955                        .iter()
2956                        .filter_map(|r| {
2957                            r.as_vector()
2958                                .map(|v| v.to_doubles().into_iter().next().unwrap_or(None))
2959                        })
2960                        .collect();
2961                    return Ok(RValue::vec(Vector::Double(vals.into())));
2962                }
2963                "integer" => {
2964                    let vals: Vec<Option<i64>> = results
2965                        .iter()
2966                        .filter_map(|r| {
2967                            r.as_vector()
2968                                .map(|v| v.to_integers().into_iter().next().unwrap_or(None))
2969                        })
2970                        .collect();
2971                    return Ok(RValue::vec(Vector::Integer(vals.into())));
2972                }
2973                "character" => {
2974                    let vals: Vec<Option<String>> = results
2975                        .iter()
2976                        .filter_map(|r| {
2977                            r.as_vector()
2978                                .map(|v| v.to_characters().into_iter().next().unwrap_or(None))
2979                        })
2980                        .collect();
2981                    return Ok(RValue::vec(Vector::Character(vals.into())));
2982                }
2983                "logical" => {
2984                    let vals: Vec<Option<bool>> = results
2985                        .iter()
2986                        .filter_map(|r| {
2987                            r.as_vector()
2988                                .map(|v| v.to_logicals().into_iter().next().unwrap_or(None))
2989                        })
2990                        .collect();
2991                    return Ok(RValue::vec(Vector::Logical(vals.into())));
2992                }
2993                _ => {}
2994            }
2995        }
2996    }
2997
2998    // Check if all results are equal-length vectors — return a matrix
2999    let first_len = results[0].length();
3000    let all_same_len = first_len > 1 && results.iter().all(|r| r.length() == first_len);
3001    if all_same_len {
3002        // Build a matrix: each result becomes a column (R's apply convention)
3003        let ncol = results.len();
3004        let nrow = first_len;
3005        let mut mat_data: Vec<Option<f64>> = Vec::with_capacity(nrow * ncol);
3006        for result in &results {
3007            if let Some(v) = result.as_vector() {
3008                mat_data.extend(v.to_doubles());
3009            }
3010        }
3011        let mut rv = RVector::from(Vector::Double(mat_data.into()));
3012        rv.set_attr(
3013            "class".to_string(),
3014            RValue::vec(Vector::Character(
3015                vec![Some("matrix".to_string()), Some("array".to_string())].into(),
3016            )),
3017        );
3018        rv.set_attr(
3019            "dim".to_string(),
3020            RValue::vec(Vector::Integer(
3021                vec![Some(i64::try_from(nrow)?), Some(i64::try_from(ncol)?)].into(),
3022            )),
3023        );
3024        return Ok(RValue::Vector(rv));
3025    }
3026
3027    // Fall back to a list
3028    let values: Vec<(Option<String>, RValue)> = results.into_iter().map(|v| (None, v)).collect();
3029    Ok(RValue::List(RList::new(values)))
3030}
3031
3032/// Apply a function to corresponding elements of multiple vectors.
3033///
3034/// @param FUN function to apply
3035/// @param ... vectors to iterate over in parallel
3036/// @param MoreArgs list of additional arguments passed to FUN in every call
3037/// @param SIMPLIFY if TRUE, simplify the result to a vector or matrix
3038/// @return simplified vector or list of results
3039#[interpreter_builtin(name = "mapply", min_args = 2)]
3040fn interp_mapply(
3041    positional: &[RValue],
3042    named: &[(String, RValue)],
3043    context: &BuiltinContext,
3044) -> Result<RValue, RError> {
3045    let env = context.env();
3046    // mapply(FUN, ..., MoreArgs = NULL, SIMPLIFY = TRUE, USE.NAMES = TRUE)
3047    let (fail_fast, extra_named) = extract_fail_fast(named);
3048    let fun = match_fun(
3049        positional.first().ok_or_else(|| {
3050            RError::new(
3051                RErrorKind::Argument,
3052                "argument 'FUN' is missing".to_string(),
3053            )
3054        })?,
3055        env,
3056    )?;
3057
3058    let simplify = extra_named
3059        .iter()
3060        .find(|(n, _)| n == "SIMPLIFY")
3061        .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
3062        .unwrap_or(true);
3063
3064    // Extract MoreArgs: a list of additional arguments to pass to FUN in every call.
3065    // Named elements become named args; unnamed elements become positional args.
3066    let (more_positional, more_named): (Vec<RValue>, Vec<(String, RValue)>) = extra_named
3067        .iter()
3068        .find(|(n, _)| n == "MoreArgs")
3069        .map(|(_, v)| match v {
3070            RValue::List(l) => {
3071                let mut pos = Vec::new();
3072                let mut named = Vec::new();
3073                for (name, val) in &l.values {
3074                    match name {
3075                        Some(n) if !n.is_empty() => named.push((n.clone(), val.clone())),
3076                        _ => pos.push(val.clone()),
3077                    }
3078                }
3079                (pos, named)
3080            }
3081            _ => (Vec::new(), Vec::new()),
3082        })
3083        .unwrap_or_default();
3084
3085    // Collect the input sequences (all positional args after FUN, excluding named)
3086    let seqs: Vec<Vec<RValue>> = positional[1..].iter().map(rvalue_to_items).collect();
3087
3088    if seqs.is_empty() {
3089        return Ok(RValue::List(RList::new(vec![])));
3090    }
3091
3092    // Find the longest sequence for recycling
3093    let max_len = seqs.iter().map(|s| s.len()).max().unwrap_or(0);
3094
3095    let mut results: Vec<RValue> = Vec::with_capacity(max_len);
3096
3097    context.with_interpreter(|interp| {
3098        for i in 0..max_len {
3099            let mut call_args: Vec<RValue> = seqs
3100                .iter()
3101                .map(|s| {
3102                    if s.is_empty() {
3103                        RValue::Null
3104                    } else {
3105                        s[i % s.len()].clone()
3106                    }
3107                })
3108                .collect();
3109            // Append MoreArgs positional values
3110            call_args.extend(more_positional.iter().cloned());
3111            let result = if fail_fast {
3112                interp.call_function(&fun, &call_args, &more_named, env)?
3113            } else {
3114                interp
3115                    .call_function(&fun, &call_args, &more_named, env)
3116                    .unwrap_or(RValue::Null)
3117            };
3118            results.push(result);
3119        }
3120        Ok::<(), RError>(())
3121    })?;
3122
3123    if simplify {
3124        let all_scalar = results.iter().all(|r| r.length() == 1);
3125        if all_scalar && !results.is_empty() {
3126            let first_type = results[0].type_name();
3127            let all_same = results.iter().all(|r| r.type_name() == first_type);
3128            if all_same {
3129                match first_type {
3130                    "double" => {
3131                        let vals: Vec<Option<f64>> = results
3132                            .iter()
3133                            .filter_map(|r| {
3134                                r.as_vector()
3135                                    .map(|v| v.to_doubles().into_iter().next().unwrap_or(None))
3136                            })
3137                            .collect();
3138                        return Ok(RValue::vec(Vector::Double(vals.into())));
3139                    }
3140                    "integer" => {
3141                        let vals: Vec<Option<i64>> = results
3142                            .iter()
3143                            .filter_map(|r| {
3144                                r.as_vector()
3145                                    .map(|v| v.to_integers().into_iter().next().unwrap_or(None))
3146                            })
3147                            .collect();
3148                        return Ok(RValue::vec(Vector::Integer(vals.into())));
3149                    }
3150                    "character" => {
3151                        let vals: Vec<Option<String>> = results
3152                            .iter()
3153                            .filter_map(|r| {
3154                                r.as_vector()
3155                                    .map(|v| v.to_characters().into_iter().next().unwrap_or(None))
3156                            })
3157                            .collect();
3158                        return Ok(RValue::vec(Vector::Character(vals.into())));
3159                    }
3160                    "logical" => {
3161                        let vals: Vec<Option<bool>> = results
3162                            .iter()
3163                            .filter_map(|r| {
3164                                r.as_vector()
3165                                    .map(|v| v.to_logicals().into_iter().next().unwrap_or(None))
3166                            })
3167                            .collect();
3168                        return Ok(RValue::vec(Vector::Logical(vals.into())));
3169                    }
3170                    _ => {}
3171                }
3172            }
3173        }
3174    }
3175
3176    let values: Vec<(Option<String>, RValue)> = results.into_iter().map(|v| (None, v)).collect();
3177    Ok(RValue::List(RList::new(values)))
3178}
3179
3180/// Apply a function to groups of values defined by a factor/index.
3181///
3182/// @param X vector of values to split into groups
3183/// @param INDEX factor or vector defining the groups
3184/// @param FUN function to apply to each group
3185/// @return named vector or list of per-group results
3186#[interpreter_builtin(name = "tapply", min_args = 3)]
3187fn interp_tapply(
3188    positional: &[RValue],
3189    named: &[(String, RValue)],
3190    context: &BuiltinContext,
3191) -> Result<RValue, RError> {
3192    let env = context.env();
3193    // tapply(X, INDEX, FUN)
3194    let (fail_fast, extra_named) = extract_fail_fast(named);
3195    let x = positional
3196        .first()
3197        .ok_or_else(|| RError::new(RErrorKind::Argument, "argument 'X' is missing".to_string()))?;
3198    let index = positional.get(1).ok_or_else(|| {
3199        RError::new(
3200            RErrorKind::Argument,
3201            "argument 'INDEX' is missing".to_string(),
3202        )
3203    })?;
3204    let fun = match_fun(
3205        positional.get(2).ok_or_else(|| {
3206            RError::new(
3207                RErrorKind::Argument,
3208                "argument 'FUN' is missing".to_string(),
3209            )
3210        })?,
3211        env,
3212    )?;
3213
3214    let x_items = rvalue_to_items(x);
3215    let index_items = rvalue_to_items(index);
3216
3217    if x_items.len() != index_items.len() {
3218        return Err(RError::new(
3219            RErrorKind::Argument,
3220            format!(
3221                "arguments 'X' (length {}) and 'INDEX' (length {}) must have the same length",
3222                x_items.len(),
3223                index_items.len()
3224            ),
3225        ));
3226    }
3227
3228    // Convert index values to string keys for grouping
3229    let index_keys: Vec<String> = index_items
3230        .iter()
3231        .map(|v| match v {
3232            RValue::Vector(rv) => rv
3233                .inner
3234                .as_character_scalar()
3235                .unwrap_or_else(|| format!("{}", v)),
3236            _ => format!("{}", v),
3237        })
3238        .collect();
3239
3240    // Collect unique group names preserving first-seen order
3241    let mut group_names: Vec<String> = Vec::new();
3242    let mut seen = std::collections::HashSet::new();
3243    for key in &index_keys {
3244        if seen.insert(key.clone()) {
3245            group_names.push(key.clone());
3246        }
3247    }
3248
3249    // Group X values by INDEX
3250    let mut groups: std::collections::HashMap<String, Vec<RValue>> =
3251        std::collections::HashMap::new();
3252    for (item, key) in x_items.into_iter().zip(index_keys.iter()) {
3253        groups.entry(key.clone()).or_default().push(item);
3254    }
3255
3256    // Apply FUN to each group
3257    let mut result_entries: Vec<(Option<String>, RValue)> = Vec::with_capacity(group_names.len());
3258
3259    context.with_interpreter(|interp| {
3260        for name in &group_names {
3261            let group = groups.remove(name).unwrap_or_default();
3262            let group_vec = combine_items_to_vector(&group);
3263            if fail_fast {
3264                let result = interp.call_function(&fun, &[group_vec], &extra_named, env)?;
3265                result_entries.push((Some(name.clone()), result));
3266            } else {
3267                match interp.call_function(&fun, &[group_vec], &extra_named, env) {
3268                    Ok(result) => result_entries.push((Some(name.clone()), result)),
3269                    Err(_) => result_entries.push((Some(name.clone()), RValue::Null)),
3270                }
3271            }
3272        }
3273        Ok::<(), RError>(())
3274    })?;
3275
3276    // Try to simplify to a named vector if all results are scalar
3277    let all_scalar = result_entries.iter().all(|(_, v)| v.length() == 1);
3278    if all_scalar && !result_entries.is_empty() {
3279        let first_type = result_entries[0].1.type_name();
3280        let all_same = result_entries
3281            .iter()
3282            .all(|(_, v)| v.type_name() == first_type);
3283        if all_same {
3284            let names: Vec<Option<String>> =
3285                result_entries.iter().map(|(n, _)| n.clone()).collect();
3286            match first_type {
3287                "double" => {
3288                    let vals: Vec<Option<f64>> = result_entries
3289                        .iter()
3290                        .filter_map(|(_, r)| {
3291                            r.as_vector()
3292                                .map(|v| v.to_doubles().into_iter().next().unwrap_or(None))
3293                        })
3294                        .collect();
3295                    let mut rv = RVector::from(Vector::Double(vals.into()));
3296                    rv.set_attr(
3297                        "names".to_string(),
3298                        RValue::vec(Vector::Character(names.into())),
3299                    );
3300                    return Ok(RValue::Vector(rv));
3301                }
3302                "integer" => {
3303                    let vals: Vec<Option<i64>> = result_entries
3304                        .iter()
3305                        .filter_map(|(_, r)| {
3306                            r.as_vector()
3307                                .map(|v| v.to_integers().into_iter().next().unwrap_or(None))
3308                        })
3309                        .collect();
3310                    let mut rv = RVector::from(Vector::Integer(vals.into()));
3311                    rv.set_attr(
3312                        "names".to_string(),
3313                        RValue::vec(Vector::Character(names.into())),
3314                    );
3315                    return Ok(RValue::Vector(rv));
3316                }
3317                "character" => {
3318                    let vals: Vec<Option<String>> = result_entries
3319                        .iter()
3320                        .filter_map(|(_, r)| {
3321                            r.as_vector()
3322                                .map(|v| v.to_characters().into_iter().next().unwrap_or(None))
3323                        })
3324                        .collect();
3325                    let mut rv = RVector::from(Vector::Character(vals.into()));
3326                    rv.set_attr(
3327                        "names".to_string(),
3328                        RValue::vec(Vector::Character(names.into())),
3329                    );
3330                    return Ok(RValue::Vector(rv));
3331                }
3332                "logical" => {
3333                    let vals: Vec<Option<bool>> = result_entries
3334                        .iter()
3335                        .filter_map(|(_, r)| {
3336                            r.as_vector()
3337                                .map(|v| v.to_logicals().into_iter().next().unwrap_or(None))
3338                        })
3339                        .collect();
3340                    let mut rv = RVector::from(Vector::Logical(vals.into()));
3341                    rv.set_attr(
3342                        "names".to_string(),
3343                        RValue::vec(Vector::Character(names.into())),
3344                    );
3345                    return Ok(RValue::Vector(rv));
3346                }
3347                _ => {}
3348            }
3349        }
3350    }
3351
3352    Ok(RValue::List(RList::new(result_entries)))
3353}
3354
3355/// Combine a list of scalar RValues back into a single vector RValue.
3356fn combine_items_to_vector(items: &[RValue]) -> RValue {
3357    if items.is_empty() {
3358        return RValue::Null;
3359    }
3360
3361    // Determine the type from the first element
3362    let first_type = items[0].type_name();
3363    let all_same = items.iter().all(|v| v.type_name() == first_type);
3364
3365    if all_same {
3366        match first_type {
3367            "double" => {
3368                let vals: Vec<Option<f64>> = items
3369                    .iter()
3370                    .flat_map(|r| {
3371                        r.as_vector()
3372                            .map(|v| v.to_doubles())
3373                            .unwrap_or_else(|| vec![None])
3374                    })
3375                    .collect();
3376                RValue::vec(Vector::Double(vals.into()))
3377            }
3378            "integer" => {
3379                let vals: Vec<Option<i64>> = items
3380                    .iter()
3381                    .flat_map(|r| {
3382                        r.as_vector()
3383                            .map(|v| v.to_integers())
3384                            .unwrap_or_else(|| vec![None])
3385                    })
3386                    .collect();
3387                RValue::vec(Vector::Integer(vals.into()))
3388            }
3389            "character" => {
3390                let vals: Vec<Option<String>> = items
3391                    .iter()
3392                    .flat_map(|r| {
3393                        r.as_vector()
3394                            .map(|v| v.to_characters())
3395                            .unwrap_or_else(|| vec![None])
3396                    })
3397                    .collect();
3398                RValue::vec(Vector::Character(vals.into()))
3399            }
3400            "logical" => {
3401                let vals: Vec<Option<bool>> = items
3402                    .iter()
3403                    .flat_map(|r| {
3404                        r.as_vector()
3405                            .map(|v| v.to_logicals())
3406                            .unwrap_or_else(|| vec![None])
3407                    })
3408                    .collect();
3409                RValue::vec(Vector::Logical(vals.into()))
3410            }
3411            _ => {
3412                // Fall back to coercing to doubles
3413                let vals: Vec<Option<f64>> = items
3414                    .iter()
3415                    .flat_map(|r| {
3416                        r.as_vector()
3417                            .map(|v| v.to_doubles())
3418                            .unwrap_or_else(|| vec![None])
3419                    })
3420                    .collect();
3421                RValue::vec(Vector::Double(vals.into()))
3422            }
3423        }
3424    } else {
3425        // Mixed types: coerce to doubles (R's coercion hierarchy)
3426        let vals: Vec<Option<f64>> = items
3427            .iter()
3428            .flat_map(|r| {
3429                r.as_vector()
3430                    .map(|v| v.to_doubles())
3431                    .unwrap_or_else(|| vec![None])
3432            })
3433            .collect();
3434        RValue::vec(Vector::Double(vals.into()))
3435    }
3436}
3437
3438/// Apply a function to subsets of a data frame or vector split by a grouping factor.
3439///
3440/// @param data data frame or vector to split
3441/// @param INDICES factor or vector defining the groups
3442/// @param FUN function to apply to each subset
3443/// @return list of per-group results
3444#[interpreter_builtin(name = "by", min_args = 3)]
3445fn interp_by(
3446    positional: &[RValue],
3447    named: &[(String, RValue)],
3448    context: &BuiltinContext,
3449) -> Result<RValue, RError> {
3450    let env = context.env();
3451    // by(data, INDICES, FUN) — similar to tapply but for data-frame-like objects.
3452    let (fail_fast, extra_named) = extract_fail_fast(named);
3453    // For vectors, delegate to tapply-like behavior.
3454    // For lists/data frames, split rows by INDICES and apply FUN to each subset.
3455    let data = positional.first().ok_or_else(|| {
3456        RError::new(
3457            RErrorKind::Argument,
3458            "argument 'data' is missing".to_string(),
3459        )
3460    })?;
3461    let indices = positional.get(1).ok_or_else(|| {
3462        RError::new(
3463            RErrorKind::Argument,
3464            "argument 'INDICES' is missing".to_string(),
3465        )
3466    })?;
3467    let fun = match_fun(
3468        positional.get(2).ok_or_else(|| {
3469            RError::new(
3470                RErrorKind::Argument,
3471                "argument 'FUN' is missing".to_string(),
3472            )
3473        })?,
3474        env,
3475    )?;
3476
3477    // For atomic vectors, treat like tapply
3478    if matches!(data, RValue::Vector(_)) {
3479        let x_items = rvalue_to_items(data);
3480        let index_items = rvalue_to_items(indices);
3481
3482        if x_items.len() != index_items.len() {
3483            return Err(RError::new(
3484                RErrorKind::Argument,
3485                format!(
3486                "arguments 'data' (length {}) and 'INDICES' (length {}) must have the same length",
3487                x_items.len(),
3488                index_items.len()
3489            ),
3490            ));
3491        }
3492
3493        let index_keys: Vec<String> = index_items
3494            .iter()
3495            .map(|v| match v {
3496                RValue::Vector(rv) => rv
3497                    .inner
3498                    .as_character_scalar()
3499                    .unwrap_or_else(|| format!("{}", v)),
3500                _ => format!("{}", v),
3501            })
3502            .collect();
3503
3504        let mut group_names: Vec<String> = Vec::new();
3505        let mut seen = std::collections::HashSet::new();
3506        for key in &index_keys {
3507            if seen.insert(key.clone()) {
3508                group_names.push(key.clone());
3509            }
3510        }
3511
3512        let mut groups: std::collections::HashMap<String, Vec<RValue>> =
3513            std::collections::HashMap::new();
3514        for (item, key) in x_items.into_iter().zip(index_keys.iter()) {
3515            groups.entry(key.clone()).or_default().push(item);
3516        }
3517
3518        let mut result_entries: Vec<(Option<String>, RValue)> =
3519            Vec::with_capacity(group_names.len());
3520
3521        context.with_interpreter(|interp| {
3522            for name in &group_names {
3523                let group = groups.remove(name).unwrap_or_default();
3524                let group_vec = combine_items_to_vector(&group);
3525                if fail_fast {
3526                    let result = interp.call_function(&fun, &[group_vec], &extra_named, env)?;
3527                    result_entries.push((Some(name.clone()), result));
3528                } else {
3529                    match interp.call_function(&fun, &[group_vec], &extra_named, env) {
3530                        Ok(result) => result_entries.push((Some(name.clone()), result)),
3531                        Err(_) => result_entries.push((Some(name.clone()), RValue::Null)),
3532                    }
3533                }
3534            }
3535            Ok::<(), RError>(())
3536        })?;
3537
3538        return Ok(RValue::List(RList::new(result_entries)));
3539    }
3540
3541    // For lists (including data frames), split by INDICES and apply FUN
3542    if let RValue::List(list) = data {
3543        let index_items = rvalue_to_items(indices);
3544
3545        // For a data frame, determine nrow from the first column
3546        let nrow = list.values.first().map(|(_, v)| v.length()).unwrap_or(0);
3547
3548        if index_items.len() != nrow {
3549            return Err(RError::new(
3550                RErrorKind::Argument,
3551                format!(
3552                "arguments 'data' ({} rows) and 'INDICES' (length {}) must have the same length",
3553                nrow,
3554                index_items.len()
3555            ),
3556            ));
3557        }
3558
3559        let index_keys: Vec<String> = index_items
3560            .iter()
3561            .map(|v| match v {
3562                RValue::Vector(rv) => rv
3563                    .inner
3564                    .as_character_scalar()
3565                    .unwrap_or_else(|| format!("{}", v)),
3566                _ => format!("{}", v),
3567            })
3568            .collect();
3569
3570        let mut group_names: Vec<String> = Vec::new();
3571        let mut seen = std::collections::HashSet::new();
3572        for key in &index_keys {
3573            if seen.insert(key.clone()) {
3574                group_names.push(key.clone());
3575            }
3576        }
3577
3578        // For each group, build a subset data frame and call FUN
3579        let mut result_entries: Vec<(Option<String>, RValue)> =
3580            Vec::with_capacity(group_names.len());
3581
3582        context.with_interpreter(|interp| {
3583            for name in &group_names {
3584                // Find row indices belonging to this group
3585                let row_indices: Vec<usize> = index_keys
3586                    .iter()
3587                    .enumerate()
3588                    .filter(|(_, k)| k.as_str() == name)
3589                    .map(|(i, _)| i)
3590                    .collect();
3591
3592                // Build a subset list (data frame) with only these rows
3593                let mut subset_cols: Vec<(Option<String>, RValue)> = Vec::new();
3594                for (col_name, col_val) in &list.values {
3595                    let col_items = rvalue_to_items(col_val);
3596                    let subset: Vec<RValue> = row_indices
3597                        .iter()
3598                        .filter_map(|&i| col_items.get(i).cloned())
3599                        .collect();
3600                    let subset_vec = combine_items_to_vector(&subset);
3601                    subset_cols.push((col_name.clone(), subset_vec));
3602                }
3603
3604                let mut subset_list = RList::new(subset_cols);
3605                // Preserve data.frame class if the original had it
3606                if let Some(cls) = list.get_attr("class") {
3607                    subset_list.set_attr("class".to_string(), cls.clone());
3608                }
3609                // Set row.names for the subset
3610                let row_names: Vec<Option<i64>> =
3611                    (1..=i64::try_from(row_indices.len())?).map(Some).collect();
3612                subset_list.set_attr(
3613                    "row.names".to_string(),
3614                    RValue::vec(Vector::Integer(row_names.into())),
3615                );
3616                // Set names attribute
3617                if let Some(names) = list.get_attr("names") {
3618                    subset_list.set_attr("names".to_string(), names.clone());
3619                }
3620
3621                let subset_val = RValue::List(subset_list);
3622                if fail_fast {
3623                    let result = interp.call_function(&fun, &[subset_val], &extra_named, env)?;
3624                    result_entries.push((Some(name.clone()), result));
3625                } else {
3626                    match interp.call_function(&fun, &[subset_val], &extra_named, env) {
3627                        Ok(result) => result_entries.push((Some(name.clone()), result)),
3628                        Err(_) => result_entries.push((Some(name.clone()), RValue::Null)),
3629                    }
3630                }
3631            }
3632            Ok::<(), RError>(())
3633        })?;
3634
3635        return Ok(RValue::List(RList::new(result_entries)));
3636    }
3637
3638    Err(RError::new(
3639        RErrorKind::Argument,
3640        "by() requires a vector, list, or data frame as 'data'".to_string(),
3641    ))
3642}
3643
3644// region: split / unsplit / aggregate
3645
3646/// Split a vector or data frame into groups defined by a factor.
3647///
3648/// @param x vector or data frame to split
3649/// @param f factor or vector defining the groups (same length as x, or nrow(x) for data frames)
3650/// @param drop if TRUE, drop unused factor levels (currently ignored)
3651/// @return named list of subsets
3652#[interpreter_builtin(min_args = 2)]
3653fn interp_split(
3654    positional: &[RValue],
3655    _named: &[(String, RValue)],
3656    _context: &BuiltinContext,
3657) -> Result<RValue, RError> {
3658    let x = positional
3659        .first()
3660        .ok_or_else(|| RError::new(RErrorKind::Argument, "argument 'x' is missing".to_string()))?;
3661    let f = positional
3662        .get(1)
3663        .ok_or_else(|| RError::new(RErrorKind::Argument, "argument 'f' is missing".to_string()))?;
3664
3665    split_impl(x, f)
3666}
3667
3668/// Internal split implementation shared by split() and aggregate().
3669fn split_impl(x: &RValue, f: &RValue) -> Result<RValue, RError> {
3670    let f_items = rvalue_to_items(f);
3671
3672    // Convert factor values to string keys
3673    let f_keys: Vec<String> = f_items
3674        .iter()
3675        .map(|v| match v {
3676            RValue::Vector(rv) => rv
3677                .inner
3678                .as_character_scalar()
3679                .unwrap_or_else(|| format!("{}", v)),
3680            _ => format!("{}", v),
3681        })
3682        .collect();
3683
3684    // Collect unique group names preserving first-seen order
3685    let mut group_names: Vec<String> = Vec::new();
3686    let mut seen = std::collections::HashSet::new();
3687    for key in &f_keys {
3688        if seen.insert(key.clone()) {
3689            group_names.push(key.clone());
3690        }
3691    }
3692
3693    match x {
3694        RValue::Vector(_) => {
3695            let x_items = rvalue_to_items(x);
3696            if x_items.len() != f_keys.len() {
3697                return Err(RError::new(
3698                    RErrorKind::Argument,
3699                    format!(
3700                        "'x' (length {}) and 'f' (length {}) must have the same length",
3701                        x_items.len(),
3702                        f_keys.len()
3703                    ),
3704                ));
3705            }
3706
3707            let mut groups: std::collections::HashMap<String, Vec<RValue>> =
3708                std::collections::HashMap::new();
3709            for (item, key) in x_items.into_iter().zip(f_keys.iter()) {
3710                groups.entry(key.clone()).or_default().push(item);
3711            }
3712
3713            let entries: Vec<(Option<String>, RValue)> = group_names
3714                .into_iter()
3715                .map(|name| {
3716                    let items = groups.remove(&name).unwrap_or_default();
3717                    let vec = combine_items_to_vector(&items);
3718                    (Some(name), vec)
3719                })
3720                .collect();
3721
3722            Ok(RValue::List(RList::new(entries)))
3723        }
3724        RValue::List(list) => {
3725            // Data frame: split rows by f
3726            let nrow = list.values.first().map(|(_, v)| v.length()).unwrap_or(0);
3727            if f_keys.len() != nrow {
3728                return Err(RError::new(
3729                    RErrorKind::Argument,
3730                    format!(
3731                        "data frame has {} rows but 'f' has length {}",
3732                        nrow,
3733                        f_keys.len()
3734                    ),
3735                ));
3736            }
3737
3738            let entries: Vec<(Option<String>, RValue)> = group_names
3739                .into_iter()
3740                .map(|name| {
3741                    let row_indices: Vec<usize> = f_keys
3742                        .iter()
3743                        .enumerate()
3744                        .filter(|(_, k)| k.as_str() == name)
3745                        .map(|(i, _)| i)
3746                        .collect();
3747
3748                    let mut subset_cols: Vec<(Option<String>, RValue)> = Vec::new();
3749                    for (col_name, col_val) in &list.values {
3750                        let col_items = rvalue_to_items(col_val);
3751                        let subset: Vec<RValue> = row_indices
3752                            .iter()
3753                            .filter_map(|&i| col_items.get(i).cloned())
3754                            .collect();
3755                        let subset_vec = combine_items_to_vector(&subset);
3756                        subset_cols.push((col_name.clone(), subset_vec));
3757                    }
3758
3759                    let mut subset_list = RList::new(subset_cols);
3760                    if let Some(cls) = list.get_attr("class") {
3761                        subset_list.set_attr("class".to_string(), cls.clone());
3762                    }
3763                    if let Some(names) = list.get_attr("names") {
3764                        subset_list.set_attr("names".to_string(), names.clone());
3765                    }
3766                    // row_indices.len() is bounded by original data frame row count
3767                    let n_rows = i64::try_from(row_indices.len()).unwrap_or(0);
3768                    let row_names: Vec<Option<i64>> = (1..=n_rows).map(Some).collect();
3769                    subset_list.set_attr(
3770                        "row.names".to_string(),
3771                        RValue::vec(Vector::Integer(row_names.into())),
3772                    );
3773
3774                    (Some(name), RValue::List(subset_list))
3775                })
3776                .collect();
3777
3778            Ok(RValue::List(RList::new(entries)))
3779        }
3780        _ => Err(RError::new(
3781            RErrorKind::Argument,
3782            "split() requires a vector, list, or data frame as 'x'".to_string(),
3783        )),
3784    }
3785}
3786
3787/// Reverse of split: reassemble a vector from a split list.
3788///
3789/// @param value list of vectors (as produced by split())
3790/// @param f factor or vector defining the groups (same length as the original vector)
3791/// @return vector with elements placed back at their original positions
3792#[interpreter_builtin(min_args = 2)]
3793fn interp_unsplit(
3794    positional: &[RValue],
3795    _named: &[(String, RValue)],
3796    _context: &BuiltinContext,
3797) -> Result<RValue, RError> {
3798    let value = positional.first().ok_or_else(|| {
3799        RError::new(
3800            RErrorKind::Argument,
3801            "argument 'value' is missing".to_string(),
3802        )
3803    })?;
3804    let f = positional
3805        .get(1)
3806        .ok_or_else(|| RError::new(RErrorKind::Argument, "argument 'f' is missing".to_string()))?;
3807
3808    let f_items = rvalue_to_items(f);
3809    let n = f_items.len();
3810
3811    // Convert factor values to string keys
3812    let f_keys: Vec<String> = f_items
3813        .iter()
3814        .map(|v| match v {
3815            RValue::Vector(rv) => rv
3816                .inner
3817                .as_character_scalar()
3818                .unwrap_or_else(|| format!("{}", v)),
3819            _ => format!("{}", v),
3820        })
3821        .collect();
3822
3823    // value must be a named list
3824    let list = match value {
3825        RValue::List(l) => l,
3826        _ => {
3827            return Err(RError::new(
3828                RErrorKind::Argument,
3829                "unsplit() requires a list as 'value'".to_string(),
3830            ))
3831        }
3832    };
3833
3834    // Build a map from group name to items iterator
3835    let mut group_items: std::collections::HashMap<String, Vec<RValue>> =
3836        std::collections::HashMap::new();
3837    for (name, val) in &list.values {
3838        if let Some(name) = name {
3839            group_items.insert(name.clone(), rvalue_to_items(val));
3840        }
3841    }
3842
3843    // Track how many items we've consumed from each group
3844    let mut group_cursors: std::collections::HashMap<String, usize> =
3845        std::collections::HashMap::new();
3846
3847    let mut result: Vec<RValue> = Vec::with_capacity(n);
3848    for key in &f_keys {
3849        let cursor = group_cursors.entry(key.clone()).or_insert(0);
3850        let item = group_items
3851            .get(key)
3852            .and_then(|items| items.get(*cursor))
3853            .cloned()
3854            .unwrap_or(RValue::Null);
3855        *cursor += 1;
3856        result.push(item);
3857    }
3858
3859    Ok(combine_items_to_vector(&result))
3860}
3861
3862/// Aggregate data by groups, applying a function to each group.
3863///
3864/// Supports two calling conventions:
3865///   aggregate(x, by, FUN) — x is a vector/matrix, by is a list of grouping vectors
3866///   aggregate(formula, data, FUN) — formula interface (y ~ x, data=df, FUN=mean)
3867///
3868/// @param x numeric vector or data frame column to aggregate, or a formula
3869/// @param by list of grouping vectors (each same length as x), or data frame (formula interface)
3870/// @param FUN function to apply to each group
3871/// @param data data frame (named argument, formula interface)
3872/// @return data frame with grouping columns and aggregated value columns
3873#[interpreter_builtin(min_args = 2)]
3874fn interp_aggregate(
3875    positional: &[RValue],
3876    named: &[(String, RValue)],
3877    context: &BuiltinContext,
3878) -> Result<RValue, RError> {
3879    let env = context.env();
3880    let (fail_fast, extra_named) = extract_fail_fast(named);
3881
3882    let first = positional
3883        .first()
3884        .ok_or_else(|| RError::new(RErrorKind::Argument, "argument 'x' is missing".to_string()))?;
3885
3886    // Check if first argument is a formula (Language with class "formula")
3887    let is_formula = match first {
3888        RValue::Language(lang) => lang
3889            .get_attr("class")
3890            .and_then(|v| v.as_vector()?.as_character_scalar())
3891            .is_some_and(|c| c == "formula"),
3892        _ => false,
3893    };
3894
3895    if is_formula {
3896        return aggregate_formula(
3897            first,
3898            positional,
3899            named,
3900            &extra_named,
3901            fail_fast,
3902            env,
3903            context,
3904        );
3905    }
3906
3907    // Standard interface: aggregate(x, by, FUN)
3908    let by = positional
3909        .get(1)
3910        .or_else(|| named.iter().find(|(n, _)| n == "by").map(|(_, v)| v))
3911        .ok_or_else(|| RError::new(RErrorKind::Argument, "argument 'by' is missing".to_string()))?;
3912    let fun_val = positional
3913        .get(2)
3914        .or_else(|| named.iter().find(|(n, _)| n == "FUN").map(|(_, v)| v))
3915        .ok_or_else(|| {
3916            RError::new(
3917                RErrorKind::Argument,
3918                "argument 'FUN' is missing".to_string(),
3919            )
3920        })?;
3921    let fun = match_fun(fun_val, env)?;
3922
3923    aggregate_standard(first, by, &fun, &extra_named, fail_fast, env, context)
3924}
3925
3926/// Extract column names from a formula expression (e.g., y ~ x parses to lhs=y, rhs=x).
3927fn extract_formula_vars(expr: &Expr) -> (Vec<String>, Vec<String>) {
3928    match expr {
3929        Expr::Formula { lhs, rhs } => {
3930            let lhs_vars = lhs
3931                .as_ref()
3932                .map(|e| collect_symbol_names(e))
3933                .unwrap_or_default();
3934            let rhs_vars = rhs
3935                .as_ref()
3936                .map(|e| collect_symbol_names(e))
3937                .unwrap_or_default();
3938            (lhs_vars, rhs_vars)
3939        }
3940        _ => (Vec::new(), Vec::new()),
3941    }
3942}
3943
3944/// Collect all symbol names from an expression (handles +, ., and bare symbols).
3945fn collect_symbol_names(expr: &Expr) -> Vec<String> {
3946    match expr {
3947        Expr::Symbol(name) if name == "." => {
3948            // "." means all other columns — handled by the caller
3949            vec![".".to_string()]
3950        }
3951        Expr::Symbol(name) => vec![name.clone()],
3952        Expr::BinaryOp {
3953            op: BinaryOp::Add,
3954            lhs,
3955            rhs,
3956        } => {
3957            let mut names = collect_symbol_names(lhs);
3958            names.extend(collect_symbol_names(rhs));
3959            names
3960        }
3961        _ => Vec::new(),
3962    }
3963}
3964
3965/// Get a column from a data frame by name.
3966fn df_get_column<'a>(df: &'a RList, name: &str) -> Option<&'a RValue> {
3967    // First try by named values
3968    for (col_name, val) in &df.values {
3969        if col_name.as_deref() == Some(name) {
3970            return Some(val);
3971        }
3972    }
3973    // Try the "names" attribute
3974    if let Some(names_val) = df.get_attr("names") {
3975        if let Some(Vector::Character(names)) = names_val.as_vector() {
3976            for (i, n) in names.iter().enumerate() {
3977                if n.as_deref() == Some(name) {
3978                    if let Some((_, val)) = df.values.get(i) {
3979                        return Some(val);
3980                    }
3981                }
3982            }
3983        }
3984    }
3985    None
3986}
3987
3988/// Get all column names from a data frame.
3989fn df_column_names(df: &RList) -> Vec<String> {
3990    if let Some(names_val) = df.get_attr("names") {
3991        if let Some(Vector::Character(names)) = names_val.as_vector() {
3992            return names.iter().filter_map(|n| n.clone()).collect();
3993        }
3994    }
3995    df.values
3996        .iter()
3997        .enumerate()
3998        .map(|(i, (name, _))| name.clone().unwrap_or_else(|| format!("V{}", i + 1)))
3999        .collect()
4000}
4001
4002/// Formula interface for aggregate: aggregate(y ~ x, data=df, FUN=mean)
4003fn aggregate_formula(
4004    formula_val: &RValue,
4005    positional: &[RValue],
4006    named: &[(String, RValue)],
4007    extra_named: &[(String, RValue)],
4008    fail_fast: bool,
4009    env: &Environment,
4010    context: &BuiltinContext,
4011) -> Result<RValue, RError> {
4012    let formula_expr = match formula_val {
4013        RValue::Language(lang) => &*lang.inner,
4014        _ => {
4015            return Err(RError::new(
4016                RErrorKind::Argument,
4017                "first argument must be a formula".to_string(),
4018            ))
4019        }
4020    };
4021
4022    // Extract response and grouping variable names from the formula
4023    let (response_vars, grouping_vars) = extract_formula_vars(formula_expr);
4024
4025    // Get the data argument (second positional or named "data")
4026    let data = positional
4027        .get(1)
4028        .or_else(|| named.iter().find(|(n, _)| n == "data").map(|(_, v)| v))
4029        .ok_or_else(|| {
4030            RError::new(
4031                RErrorKind::Argument,
4032                "argument 'data' is missing for formula interface".to_string(),
4033            )
4034        })?;
4035
4036    let df = match data {
4037        RValue::List(l) => l,
4038        _ => {
4039            return Err(RError::new(
4040                RErrorKind::Argument,
4041                "'data' must be a data frame".to_string(),
4042            ))
4043        }
4044    };
4045
4046    // Get FUN argument (third positional or named "FUN")
4047    let fun_val = positional
4048        .get(2)
4049        .or_else(|| named.iter().find(|(n, _)| n == "FUN").map(|(_, v)| v))
4050        .ok_or_else(|| {
4051            RError::new(
4052                RErrorKind::Argument,
4053                "argument 'FUN' is missing".to_string(),
4054            )
4055        })?;
4056    let fun = match_fun(fun_val, env)?;
4057
4058    let all_col_names = df_column_names(df);
4059
4060    // Resolve "." in grouping vars (means all columns not in response)
4061    let resolved_grouping: Vec<String> = if grouping_vars.iter().any(|v| v == ".") {
4062        all_col_names
4063            .iter()
4064            .filter(|n| !response_vars.contains(n))
4065            .cloned()
4066            .collect()
4067    } else {
4068        grouping_vars.clone()
4069    };
4070
4071    // Resolve "." in response vars (means all columns not in grouping)
4072    let resolved_response: Vec<String> = if response_vars.iter().any(|v| v == ".") {
4073        all_col_names
4074            .iter()
4075            .filter(|n| !resolved_grouping.contains(n))
4076            .cloned()
4077            .collect()
4078    } else {
4079        response_vars
4080    };
4081
4082    // Extract grouping columns from the data frame
4083    let mut by_vectors: Vec<(Option<String>, Vec<RValue>)> = Vec::new();
4084    for gv_name in &resolved_grouping {
4085        let col = df_get_column(df, gv_name).ok_or_else(|| {
4086            RError::new(
4087                RErrorKind::Argument,
4088                format!("column '{}' not found in data frame", gv_name),
4089            )
4090        })?;
4091        by_vectors.push((Some(gv_name.clone()), rvalue_to_items(col)));
4092    }
4093
4094    // For each response variable, run the standard aggregation
4095    let mut all_result_cols: Vec<(Option<String>, RValue)> = Vec::new();
4096    let mut group_cols_built = false;
4097    let mut n_groups = 0usize;
4098
4099    for resp_name in &resolved_response {
4100        let resp_col = df_get_column(df, resp_name).ok_or_else(|| {
4101            RError::new(
4102                RErrorKind::Argument,
4103                format!("column '{}' not found in data frame", resp_name),
4104            )
4105        })?;
4106
4107        let x_items = rvalue_to_items(resp_col);
4108        let n = x_items.len();
4109
4110        // Validate grouping vectors match response length
4111        for (i, (_, gv)) in by_vectors.iter().enumerate() {
4112            if gv.len() != n {
4113                return Err(RError::new(
4114                    RErrorKind::Argument,
4115                    format!(
4116                        "grouping vector {} has length {} but response '{}' has length {}",
4117                        i + 1,
4118                        gv.len(),
4119                        resp_name,
4120                        n
4121                    ),
4122                ));
4123            }
4124        }
4125
4126        // Build composite group keys
4127        let mut group_keys: Vec<Vec<String>> = Vec::with_capacity(n);
4128        for i in 0..n {
4129            let key: Vec<String> = by_vectors
4130                .iter()
4131                .map(|(_, gv)| match &gv[i] {
4132                    RValue::Vector(rv) => rv
4133                        .inner
4134                        .as_character_scalar()
4135                        .unwrap_or_else(|| format!("{}", gv[i])),
4136                    other => format!("{}", other),
4137                })
4138                .collect();
4139            group_keys.push(key);
4140        }
4141
4142        // Collect unique keys preserving first-seen order
4143        let mut unique_keys: Vec<Vec<String>> = Vec::new();
4144        let mut seen: std::collections::HashSet<Vec<String>> = std::collections::HashSet::new();
4145        for key in &group_keys {
4146            if seen.insert(key.clone()) {
4147                unique_keys.push(key.clone());
4148            }
4149        }
4150
4151        // Group items by composite key
4152        let mut groups: std::collections::HashMap<Vec<String>, Vec<RValue>> =
4153            std::collections::HashMap::new();
4154        for (item, key) in x_items.into_iter().zip(group_keys.iter()) {
4155            groups.entry(key.clone()).or_default().push(item);
4156        }
4157
4158        n_groups = unique_keys.len();
4159
4160        // Build grouping columns (only for the first response variable)
4161        if !group_cols_built {
4162            for (gi, _) in by_vectors.iter().enumerate() {
4163                let col_vals: Vec<Option<String>> = unique_keys
4164                    .iter()
4165                    .map(|key| Some(key[gi].clone()))
4166                    .collect();
4167                let col_name = by_vectors
4168                    .get(gi)
4169                    .and_then(|(n, _)| n.clone())
4170                    .unwrap_or_else(|| format!("Group.{}", gi + 1));
4171                all_result_cols.push((
4172                    Some(col_name),
4173                    RValue::vec(Vector::Character(col_vals.into())),
4174                ));
4175            }
4176            group_cols_built = true;
4177        }
4178
4179        // Apply FUN to each group
4180        let mut result_vals: Vec<RValue> = Vec::with_capacity(n_groups);
4181        context.with_interpreter(|interp| {
4182            for key in &unique_keys {
4183                let items = groups.remove(key).unwrap_or_default();
4184                let group_vec = combine_items_to_vector(&items);
4185                if fail_fast {
4186                    let result = interp.call_function(&fun, &[group_vec], extra_named, env)?;
4187                    result_vals.push(result);
4188                } else {
4189                    match interp.call_function(&fun, &[group_vec], extra_named, env) {
4190                        Ok(result) => result_vals.push(result),
4191                        Err(_) => result_vals.push(RValue::Null),
4192                    }
4193                }
4194            }
4195            Ok::<(), RError>(())
4196        })?;
4197
4198        // Add result column
4199        let all_scalar = result_vals.iter().all(|r| r.length() == 1);
4200        if all_scalar && !result_vals.is_empty() {
4201            let simplified = combine_items_to_vector(&result_vals);
4202            all_result_cols.push((Some(resp_name.clone()), simplified));
4203        } else {
4204            let entries: Vec<(Option<String>, RValue)> =
4205                result_vals.into_iter().map(|v| (None, v)).collect();
4206            all_result_cols.push((Some(resp_name.clone()), RValue::List(RList::new(entries))));
4207        }
4208    }
4209
4210    let mut result = RList::new(all_result_cols);
4211    result.set_attr(
4212        "class".to_string(),
4213        RValue::vec(Vector::Character(
4214            vec![Some("data.frame".to_string())].into(),
4215        )),
4216    );
4217    let row_names: Vec<Option<i64>> = (1..=i64::try_from(n_groups)?).map(Some).collect();
4218    result.set_attr(
4219        "row.names".to_string(),
4220        RValue::vec(Vector::Integer(row_names.into())),
4221    );
4222    let col_names: Vec<Option<String>> = result.values.iter().map(|(n, _)| n.clone()).collect();
4223    result.set_attr(
4224        "names".to_string(),
4225        RValue::vec(Vector::Character(col_names.into())),
4226    );
4227
4228    Ok(RValue::List(result))
4229}
4230
4231/// Standard (non-formula) aggregate: aggregate(x, by, FUN)
4232fn aggregate_standard(
4233    x: &RValue,
4234    by: &RValue,
4235    fun: &RValue,
4236    extra_named: &[(String, RValue)],
4237    fail_fast: bool,
4238    env: &Environment,
4239    context: &BuiltinContext,
4240) -> Result<RValue, RError> {
4241    // by must be a list of grouping vectors
4242    let by_vectors: Vec<(Option<String>, Vec<RValue>)> = match by {
4243        RValue::List(l) => l
4244            .values
4245            .iter()
4246            .map(|(name, v)| (name.clone(), rvalue_to_items(v)))
4247            .collect(),
4248        _ => {
4249            // Single grouping vector — wrap in a list
4250            vec![(None, rvalue_to_items(by))]
4251        }
4252    };
4253
4254    let x_items = rvalue_to_items(x);
4255    let n = x_items.len();
4256
4257    // Validate all grouping vectors have the same length as x
4258    for (i, (_, gv)) in by_vectors.iter().enumerate() {
4259        if gv.len() != n {
4260            return Err(RError::new(
4261                RErrorKind::Argument,
4262                format!(
4263                    "grouping vector {} has length {} but 'x' has length {}",
4264                    i + 1,
4265                    gv.len(),
4266                    n
4267                ),
4268            ));
4269        }
4270    }
4271
4272    // Build composite group keys from all grouping vectors
4273    let mut group_keys: Vec<Vec<String>> = Vec::with_capacity(n);
4274    for i in 0..n {
4275        let key: Vec<String> = by_vectors
4276            .iter()
4277            .map(|(_, gv)| match &gv[i] {
4278                RValue::Vector(rv) => rv
4279                    .inner
4280                    .as_character_scalar()
4281                    .unwrap_or_else(|| format!("{}", gv[i])),
4282                other => format!("{}", other),
4283            })
4284            .collect();
4285        group_keys.push(key);
4286    }
4287
4288    // Collect unique composite keys preserving first-seen order
4289    let mut unique_keys: Vec<Vec<String>> = Vec::new();
4290    let mut seen: std::collections::HashSet<Vec<String>> = std::collections::HashSet::new();
4291    for key in &group_keys {
4292        if seen.insert(key.clone()) {
4293            unique_keys.push(key.clone());
4294        }
4295    }
4296
4297    // Group x items by composite key
4298    let mut groups: std::collections::HashMap<Vec<String>, Vec<RValue>> =
4299        std::collections::HashMap::new();
4300    for (item, key) in x_items.into_iter().zip(group_keys.iter()) {
4301        groups.entry(key.clone()).or_default().push(item);
4302    }
4303
4304    // Apply FUN to each group and build result columns
4305    let n_groups = unique_keys.len();
4306    let n_by = by_vectors.len();
4307
4308    // Group columns (Group.1, Group.2, ...)
4309    let mut group_cols: Vec<Vec<Option<String>>> = vec![Vec::with_capacity(n_groups); n_by];
4310    let mut result_vals: Vec<RValue> = Vec::with_capacity(n_groups);
4311
4312    context.with_interpreter(|interp| {
4313        for key in &unique_keys {
4314            for (col_idx, k) in key.iter().enumerate() {
4315                group_cols[col_idx].push(Some(k.clone()));
4316            }
4317            let items = groups.remove(key).unwrap_or_default();
4318            let group_vec = combine_items_to_vector(&items);
4319            if fail_fast {
4320                let result = interp.call_function(fun, &[group_vec], extra_named, env)?;
4321                result_vals.push(result);
4322            } else {
4323                match interp.call_function(fun, &[group_vec], extra_named, env) {
4324                    Ok(result) => result_vals.push(result),
4325                    Err(_) => result_vals.push(RValue::Null),
4326                }
4327            }
4328        }
4329        Ok::<(), RError>(())
4330    })?;
4331
4332    // Build the result data frame
4333    let mut df_cols: Vec<(Option<String>, RValue)> = Vec::new();
4334
4335    // Add grouping columns
4336    for (i, col) in group_cols.into_iter().enumerate() {
4337        let col_name = by_vectors
4338            .get(i)
4339            .and_then(|(n, _)| n.clone())
4340            .unwrap_or_else(|| format!("Group.{}", i + 1));
4341        df_cols.push((Some(col_name), RValue::vec(Vector::Character(col.into()))));
4342    }
4343
4344    // Add the result column — try to simplify scalar results to a vector
4345    let all_scalar = result_vals.iter().all(|r| r.length() == 1);
4346    if all_scalar && !result_vals.is_empty() {
4347        let simplified = combine_items_to_vector(&result_vals);
4348        df_cols.push((Some("x".to_string()), simplified));
4349    } else {
4350        let entries: Vec<(Option<String>, RValue)> =
4351            result_vals.into_iter().map(|v| (None, v)).collect();
4352        df_cols.push((Some("x".to_string()), RValue::List(RList::new(entries))));
4353    }
4354
4355    let mut result = RList::new(df_cols);
4356    result.set_attr(
4357        "class".to_string(),
4358        RValue::vec(Vector::Character(
4359            vec![Some("data.frame".to_string())].into(),
4360        )),
4361    );
4362    let row_names: Vec<Option<i64>> = (1..=i64::try_from(n_groups)?).map(Some).collect();
4363    result.set_attr(
4364        "row.names".to_string(),
4365        RValue::vec(Vector::Integer(row_names.into())),
4366    );
4367    // Set names attribute
4368    let col_names: Vec<Option<String>> = result.values.iter().map(|(n, _)| n.clone()).collect();
4369    result.set_attr(
4370        "names".to_string(),
4371        RValue::vec(Vector::Character(col_names.into())),
4372    );
4373
4374    Ok(RValue::List(result))
4375}
4376
4377// endregion
4378
4379// region: outer
4380
4381/// Outer product of two vectors, applying FUN to each pair of elements.
4382///
4383/// @param X first vector
4384/// @param Y second vector
4385/// @param FUN function to apply (default: "*")
4386/// @return matrix with dim = c(length(X), length(Y))
4387#[interpreter_builtin(min_args = 2)]
4388fn interp_outer(
4389    positional: &[RValue],
4390    named: &[(String, RValue)],
4391    context: &BuiltinContext,
4392) -> Result<RValue, RError> {
4393    let env = context.env();
4394
4395    let x = positional
4396        .first()
4397        .ok_or_else(|| RError::new(RErrorKind::Argument, "argument 'X' is missing".to_string()))?;
4398    let y = positional
4399        .get(1)
4400        .ok_or_else(|| RError::new(RErrorKind::Argument, "argument 'Y' is missing".to_string()))?;
4401
4402    // FUN can be positional arg #3 or named
4403    let fun_val = named
4404        .iter()
4405        .find(|(n, _)| n == "FUN")
4406        .map(|(_, v)| v.clone())
4407        .or_else(|| positional.get(2).cloned());
4408
4409    let x_items = rvalue_to_items(x);
4410    let y_items = rvalue_to_items(y);
4411    let nx = x_items.len();
4412    let ny = y_items.len();
4413
4414    // Try to resolve as a known arithmetic operator for fast path
4415    let use_fast_path = match &fun_val {
4416        Some(RValue::Vector(rv)) => rv.inner.as_character_scalar().is_some(),
4417        None => true, // default is "*"
4418        _ => false,
4419    };
4420
4421    if use_fast_path {
4422        let fun_str = fun_val
4423            .as_ref()
4424            .and_then(|v| v.as_vector()?.as_character_scalar())
4425            .unwrap_or_else(|| "*".to_string());
4426
4427        let op: Option<fn(f64, f64) -> f64> = match fun_str.as_str() {
4428            "*" => Some(|a, b| a * b),
4429            "+" => Some(|a, b| a + b),
4430            "-" => Some(|a, b| a - b),
4431            "/" => Some(|a, b| a / b),
4432            "^" | "**" => Some(|a: f64, b: f64| a.powf(b)),
4433            "%%" => Some(|a, b| a % b),
4434            "%/%" => Some(|a: f64, b: f64| (a / b).floor()),
4435            _ => None, // Fall through to general path
4436        };
4437
4438        if let Some(op) = op {
4439            let x_vec = match x {
4440                RValue::Vector(rv) => rv.to_doubles(),
4441                _ => {
4442                    return Err(RError::new(
4443                        RErrorKind::Argument,
4444                        "outer() requires vectors for X and Y".to_string(),
4445                    ))
4446                }
4447            };
4448            let y_vec = match y {
4449                RValue::Vector(rv) => rv.to_doubles(),
4450                _ => {
4451                    return Err(RError::new(
4452                        RErrorKind::Argument,
4453                        "outer() requires vectors for X and Y".to_string(),
4454                    ))
4455                }
4456            };
4457
4458            // R stores matrices column-major: iterate columns (Y) then rows (X)
4459            let mut result = Vec::with_capacity(nx * ny);
4460            for y_val in &y_vec {
4461                for x_val in &x_vec {
4462                    let val = match (x_val, y_val) {
4463                        (Some(xv), Some(yv)) => Some(op(*xv, *yv)),
4464                        _ => None,
4465                    };
4466                    result.push(val);
4467                }
4468            }
4469
4470            return build_outer_matrix(Vector::Double(result.into()), nx, ny, x, y);
4471        }
4472
4473        // If it's a string naming a function, look it up
4474        let fun_rv = match_fun(
4475            &RValue::vec(Vector::Character(vec![Some(fun_str)].into())),
4476            env,
4477        )?;
4478        return outer_general(&fun_rv, &x_items, &y_items, nx, ny, x, y, context, env);
4479    }
4480
4481    // General path: FUN is a closure or other callable
4482    let fun = match_fun(fun_val.as_ref().unwrap_or(&RValue::Null), env)?;
4483    outer_general(&fun, &x_items, &y_items, nx, ny, x, y, context, env)
4484}
4485
4486/// Extract the "names" attribute from an RValue as a list of optional strings.
4487fn outer_names(val: &RValue) -> Option<Vec<Option<String>>> {
4488    match val {
4489        RValue::Vector(rv) => rv.get_attr("names").and_then(|nv| {
4490            if let RValue::Vector(nrv) = nv {
4491                Some(nrv.inner.to_characters())
4492            } else {
4493                None
4494            }
4495        }),
4496        _ => None,
4497    }
4498}
4499
4500/// Set dimnames on an outer product result matrix if X or Y had names.
4501fn set_outer_dimnames(rv: &mut RVector, x: &RValue, y: &RValue) {
4502    let x_names = outer_names(x);
4503    let y_names = outer_names(y);
4504    if x_names.is_some() || y_names.is_some() {
4505        let row_names = x_names
4506            .map(|n| RValue::vec(Vector::Character(n.into())))
4507            .unwrap_or(RValue::Null);
4508        let col_names = y_names
4509            .map(|n| RValue::vec(Vector::Character(n.into())))
4510            .unwrap_or(RValue::Null);
4511        rv.set_attr(
4512            "dimnames".to_string(),
4513            RValue::List(RList::new(vec![(None, row_names), (None, col_names)])),
4514        );
4515    }
4516}
4517
4518/// Build an RVector matrix with class, dim, and optional dimnames.
4519fn build_outer_matrix(
4520    inner: Vector,
4521    nx: usize,
4522    ny: usize,
4523    x_orig: &RValue,
4524    y_orig: &RValue,
4525) -> Result<RValue, RError> {
4526    let mut rv = RVector::from(inner);
4527    rv.set_attr(
4528        "class".to_string(),
4529        RValue::vec(Vector::Character(
4530            vec![Some("matrix".to_string()), Some("array".to_string())].into(),
4531        )),
4532    );
4533    rv.set_attr(
4534        "dim".to_string(),
4535        RValue::vec(Vector::Integer(
4536            vec![Some(i64::try_from(nx)?), Some(i64::try_from(ny)?)].into(),
4537        )),
4538    );
4539    set_outer_dimnames(&mut rv, x_orig, y_orig);
4540    Ok(RValue::Vector(rv))
4541}
4542
4543/// General outer product: call FUN(x_i, y_j) for each pair and collect into a matrix.
4544#[allow(clippy::too_many_arguments)]
4545fn outer_general(
4546    fun: &RValue,
4547    x_items: &[RValue],
4548    y_items: &[RValue],
4549    nx: usize,
4550    ny: usize,
4551    x_orig: &RValue,
4552    y_orig: &RValue,
4553    context: &BuiltinContext,
4554    env: &Environment,
4555) -> Result<RValue, RError> {
4556    let mut results: Vec<RValue> = Vec::with_capacity(nx * ny);
4557
4558    context.with_interpreter(|interp| {
4559        // Column-major order: iterate Y (columns) then X (rows)
4560        for yv in y_items {
4561            for xv in x_items {
4562                let result = interp.call_function(fun, &[xv.clone(), yv.clone()], &[], env)?;
4563                results.push(result);
4564            }
4565        }
4566        Ok::<(), RError>(())
4567    })?;
4568
4569    // Try to simplify: if all results are scalar, combine into a typed vector
4570    let all_scalar = results.iter().all(|r| r.length() == 1);
4571    if all_scalar && !results.is_empty() {
4572        let first_type = results[0].type_name();
4573        let all_same = results.iter().all(|r| r.type_name() == first_type);
4574        if all_same {
4575            match first_type {
4576                "double" => {
4577                    let vals: Vec<Option<f64>> = results
4578                        .iter()
4579                        .filter_map(|r| {
4580                            r.as_vector()
4581                                .map(|v| v.to_doubles().into_iter().next().unwrap_or(None))
4582                        })
4583                        .collect();
4584                    return build_outer_matrix(Vector::Double(vals.into()), nx, ny, x_orig, y_orig);
4585                }
4586                "integer" => {
4587                    let vals: Vec<Option<i64>> = results
4588                        .iter()
4589                        .filter_map(|r| {
4590                            r.as_vector()
4591                                .map(|v| v.to_integers().into_iter().next().unwrap_or(None))
4592                        })
4593                        .collect();
4594                    return build_outer_matrix(
4595                        Vector::Integer(vals.into()),
4596                        nx,
4597                        ny,
4598                        x_orig,
4599                        y_orig,
4600                    );
4601                }
4602                "character" => {
4603                    let vals: Vec<Option<String>> = results
4604                        .iter()
4605                        .filter_map(|r| {
4606                            r.as_vector()
4607                                .map(|v| v.to_characters().into_iter().next().unwrap_or(None))
4608                        })
4609                        .collect();
4610                    return build_outer_matrix(
4611                        Vector::Character(vals.into()),
4612                        nx,
4613                        ny,
4614                        x_orig,
4615                        y_orig,
4616                    );
4617                }
4618                _ => {}
4619            }
4620        }
4621    }
4622
4623    // Fall back: collect all results into doubles
4624    let vals: Vec<Option<f64>> = results
4625        .iter()
4626        .filter_map(|r| {
4627            r.as_vector()
4628                .map(|v| v.to_doubles().into_iter().next().unwrap_or(None))
4629        })
4630        .collect();
4631    build_outer_matrix(Vector::Double(vals.into()), nx, ny, x_orig, y_orig)
4632}
4633
4634// endregion
4635
4636/// Summarize an object (S3 generic).
4637///
4638/// Dispatches to summary.lm, summary.data.frame, etc. when a method exists.
4639/// Falls back to printing the object's structure.
4640///
4641/// @param object the object to summarize
4642/// @return a summary of the object
4643#[interpreter_builtin(min_args = 1)]
4644fn interp_summary(
4645    args: &[RValue],
4646    named: &[(String, RValue)],
4647    context: &BuiltinContext,
4648) -> Result<RValue, RError> {
4649    // Try S3 dispatch (summary.lm, summary.data.frame, etc.)
4650    if let Some(result) = try_s3_dispatch("summary", args, named, context)? {
4651        return Ok(result);
4652    }
4653    // Default: for vectors, compute basic summary statistics
4654    match args.first() {
4655        Some(RValue::Vector(rv)) => {
4656            let doubles = rv.to_doubles();
4657            let vals: Vec<f64> = doubles.into_iter().flatten().collect();
4658            if vals.is_empty() {
4659                return Ok(RValue::Null);
4660            }
4661            let min = vals.iter().copied().fold(f64::INFINITY, f64::min);
4662            let max = vals.iter().copied().fold(f64::NEG_INFINITY, f64::max);
4663            let sum: f64 = vals.iter().sum();
4664            let mean = sum / vals.len() as f64;
4665            let mut sorted = vals;
4666            sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
4667            let median = quantile_type7(&sorted, 0.5);
4668            let q1 = quantile_type7(&sorted, 0.25);
4669            let q3 = quantile_type7(&sorted, 0.75);
4670
4671            let mut result_rv = RVector::from(Vector::Double(
4672                vec![
4673                    Some(min),
4674                    Some(q1),
4675                    Some(median),
4676                    Some(mean),
4677                    Some(q3),
4678                    Some(max),
4679                ]
4680                .into(),
4681            ));
4682            result_rv.set_attr(
4683                "names".to_string(),
4684                RValue::vec(Vector::Character(
4685                    vec![
4686                        Some("Min.".to_string()),
4687                        Some("1st Qu.".to_string()),
4688                        Some("Median".to_string()),
4689                        Some("Mean".to_string()),
4690                        Some("3rd Qu.".to_string()),
4691                        Some("Max.".to_string()),
4692                    ]
4693                    .into(),
4694                )),
4695            );
4696            Ok(RValue::Vector(result_rv))
4697        }
4698        Some(other) => Ok(other.clone()),
4699        None => Ok(RValue::Null),
4700    }
4701}
4702
4703/// Compute a quantile using R's type 7 algorithm (the default).
4704///
4705/// For sorted data of length n and probability p:
4706/// h = (n - 1) * p, result = x[floor(h)] + (h - floor(h)) * (x[ceil(h)] - x[floor(h)])
4707fn quantile_type7(sorted: &[f64], p: f64) -> f64 {
4708    let n = sorted.len();
4709    if n == 0 {
4710        return f64::NAN;
4711    }
4712    if n == 1 {
4713        return sorted[0];
4714    }
4715    let h = (n - 1) as f64 * p;
4716    let lo = h.floor() as usize;
4717    let hi = h.ceil() as usize;
4718    let frac = h - h.floor();
4719    sorted[lo] + frac * (sorted[hi] - sorted[lo])
4720}
4721
4722// region: reg.finalizer
4723
4724/// Register a function to be called when an environment is garbage collected,
4725/// or at interpreter exit if `onexit = TRUE`.
4726///
4727/// Since miniR uses Rc-based environments (no tracing GC), finalizers with
4728/// `onexit = FALSE` are accepted silently but will never fire. When
4729/// `onexit = TRUE`, the finalizer is stored on the Interpreter and executed
4730/// during its Drop.
4731///
4732/// @param e an environment to attach the finalizer to
4733/// @param f a function of one argument (the environment) to call
4734/// @param onexit logical; if TRUE, run the finalizer at interpreter exit
4735/// @return NULL, invisibly
4736#[interpreter_builtin(name = "reg.finalizer", min_args = 2, max_args = 3)]
4737fn interp_reg_finalizer(
4738    args: &[RValue],
4739    named: &[(String, RValue)],
4740    context: &BuiltinContext,
4741) -> Result<RValue, RError> {
4742    let call_args = CallArgs::new(args, named);
4743
4744    // e — must be an environment
4745    let e = call_args.value("e", 0).ok_or_else(|| {
4746        RError::new(
4747            RErrorKind::Argument,
4748            "reg.finalizer() requires an environment as its first argument".to_string(),
4749        )
4750    })?;
4751    if !matches!(e, RValue::Environment(_)) {
4752        return Err(RError::new(
4753            RErrorKind::Argument,
4754            "reg.finalizer() requires an environment as its first argument".to_string(),
4755        ));
4756    }
4757
4758    // f — must be a function
4759    let f = call_args.value("f", 1).ok_or_else(|| {
4760        RError::new(
4761            RErrorKind::Argument,
4762            "reg.finalizer() requires a function as its second argument".to_string(),
4763        )
4764    })?;
4765    let f = match_fun(f, context.env())?;
4766
4767    // onexit — logical, default FALSE
4768    let onexit = call_args.logical_flag("onexit", 2, false);
4769
4770    if onexit {
4771        context.with_interpreter(|interp| {
4772            interp.finalizers.borrow_mut().push(f);
4773        });
4774    }
4775    // When onexit is FALSE, we accept silently — no GC means it won't fire,
4776    // but it shouldn't error either.
4777
4778    Ok(RValue::Null)
4779}
4780
4781// endregion
4782
4783// region: options
4784
4785/// Get or set global options.
4786///
4787/// With no arguments, returns all current options as a named list.
4788/// With character arguments, returns the named options.
4789/// With name=value pairs, sets those options and returns the previous values.
4790///
4791/// @param ... option names to query, or name=value pairs to set
4792/// @return list of (previous) option values
4793#[interpreter_builtin]
4794fn interp_options(
4795    positional: &[RValue],
4796    named: &[(String, RValue)],
4797    context: &BuiltinContext,
4798) -> Result<RValue, RError> {
4799    context.with_interpreter(|interp| {
4800        let mut result: Vec<(Option<String>, RValue)> = Vec::new();
4801
4802        // If no arguments, return all options
4803        if positional.is_empty() && named.is_empty() {
4804            let opts = interp.options.borrow();
4805            let mut entries: Vec<_> = opts.iter().collect();
4806            entries.sort_by_key(|(k, _)| (*k).clone());
4807            for (k, v) in entries {
4808                result.push((Some(k.clone()), v.clone()));
4809            }
4810            return Ok(RValue::List(RList::new(result)));
4811        }
4812
4813        // Process positional args — character strings are queries
4814        for arg in positional {
4815            if let Some(name) = arg.as_vector().and_then(|v| v.as_character_scalar()) {
4816                let val = interp
4817                    .options
4818                    .borrow()
4819                    .get(&name)
4820                    .cloned()
4821                    .unwrap_or(RValue::Null);
4822                result.push((Some(name), val));
4823            } else if let RValue::List(list) = arg {
4824                // Setting options from a list (e.g. options(old_opts))
4825                for (opt_name, val) in &list.values {
4826                    if let Some(opt_name) = opt_name {
4827                        let prev = interp
4828                            .options
4829                            .borrow()
4830                            .get(opt_name.as_str())
4831                            .cloned()
4832                            .unwrap_or(RValue::Null);
4833                        interp
4834                            .options
4835                            .borrow_mut()
4836                            .insert(opt_name.clone(), val.clone());
4837                        result.push((Some(opt_name.clone()), prev));
4838                    }
4839                }
4840            }
4841        }
4842
4843        // Process named args — these are set operations
4844        for (name, val) in named {
4845            let prev = interp
4846                .options
4847                .borrow()
4848                .get(name)
4849                .cloned()
4850                .unwrap_or(RValue::Null);
4851            interp
4852                .options
4853                .borrow_mut()
4854                .insert(name.clone(), val.clone());
4855            result.push((Some(name.clone()), prev));
4856        }
4857
4858        Ok(RValue::List(RList::new(result)))
4859    })
4860}
4861
4862/// Get the value of a named global option.
4863///
4864/// @param name character string — the option name
4865/// @param default value to return if the option is not set (default NULL)
4866/// @return the option value, or default if not set
4867#[interpreter_builtin(name = "getOption", min_args = 1)]
4868fn interp_get_option(
4869    positional: &[RValue],
4870    _named: &[(String, RValue)],
4871    context: &BuiltinContext,
4872) -> Result<RValue, RError> {
4873    let name = positional
4874        .first()
4875        .and_then(|v| v.as_vector()?.as_character_scalar())
4876        .ok_or_else(|| {
4877            RError::new(
4878                RErrorKind::Argument,
4879                "getOption() requires a character string as its first argument".to_string(),
4880            )
4881        })?;
4882    let default = positional.get(1).cloned().unwrap_or(RValue::Null);
4883
4884    context.with_interpreter(|interp| {
4885        Ok(interp
4886            .options
4887            .borrow()
4888            .get(&name)
4889            .cloned()
4890            .unwrap_or(default))
4891    })
4892}
4893
4894// endregion
4895
4896// region: match.call, Find, Position, Negate, rapply
4897
4898/// Return the call expression with arguments matched to formal parameters.
4899///
4900/// Reconstructs the call as if all arguments were named according to the
4901/// function's formal parameter list. Useful for programming on the language.
4902///
4903/// @param definition the function whose formals to match against (default: parent function)
4904/// @param call the call to match (default: parent's call)
4905/// @return language object with matched arguments
4906#[interpreter_builtin(name = "match.call")]
4907fn interp_match_call(
4908    _positional: &[RValue],
4909    _named: &[(String, RValue)],
4910    context: &BuiltinContext,
4911) -> Result<RValue, RError> {
4912    context.with_interpreter(|interp| {
4913        let frame = interp
4914            .current_call_frame()
4915            .ok_or_else(|| RError::other("match.call() must be called from within a function"))?;
4916
4917        // Get the formals from the function
4918        let params: Vec<Param> = match &frame.function {
4919            RValue::Function(RFunction::Closure { params, .. }) => params.clone(),
4920            _ => Vec::new(),
4921        };
4922
4923        // Get the original call expression
4924        let call_expr = frame
4925            .call
4926            .ok_or_else(|| RError::other("match.call() requires a call expression on the stack"))?;
4927
4928        // Extract the function name from the call
4929        let func_expr = match &call_expr {
4930            Expr::Call { func, .. } => (**func).clone(),
4931            _ => return Ok(RValue::Language(Language::new(call_expr))),
4932        };
4933
4934        // Reconstruct with matched argument names
4935        let positional = &frame.supplied_positional;
4936        let named = &frame.supplied_named;
4937
4938        // Simplified 3-pass matching to figure out which positional maps to which formal
4939        let formal_names: Vec<&str> = params
4940            .iter()
4941            .filter(|p| !p.is_dots)
4942            .map(|p| p.name.as_str())
4943            .collect();
4944
4945        let mut named_to_formal: std::collections::HashMap<usize, &str> =
4946            std::collections::HashMap::new();
4947        let mut matched_formals: std::collections::HashSet<&str> = std::collections::HashSet::new();
4948
4949        // Pass 1: exact name match
4950        for (i, (arg_name, _)) in named.iter().enumerate() {
4951            if let Some(&formal) = formal_names.iter().find(|&&f| f == arg_name) {
4952                if !matched_formals.contains(formal) {
4953                    matched_formals.insert(formal);
4954                    named_to_formal.insert(i, formal);
4955                }
4956            }
4957        }
4958
4959        // Pass 2: partial match
4960        for (i, (arg_name, _)) in named.iter().enumerate() {
4961            if named_to_formal.contains_key(&i) {
4962                continue;
4963            }
4964            let candidates: Vec<&str> = formal_names
4965                .iter()
4966                .filter(|&&f| !matched_formals.contains(f) && f.starts_with(arg_name.as_str()))
4967                .copied()
4968                .collect();
4969            if candidates.len() == 1 {
4970                matched_formals.insert(candidates[0]);
4971                named_to_formal.insert(i, candidates[0]);
4972            }
4973        }
4974
4975        // Build reverse map
4976        let formal_to_named: std::collections::HashMap<&str, usize> = named_to_formal
4977            .iter()
4978            .map(|(&idx, &formal)| (formal, idx))
4979            .collect();
4980
4981        // Reconstruct args in formal order
4982        let mut result_args: Vec<Arg> = Vec::new();
4983        let mut pos_idx = 0usize;
4984
4985        for param in &params {
4986            if param.is_dots {
4987                // Collect remaining positional
4988                while pos_idx < positional.len() {
4989                    result_args.push(Arg {
4990                        name: None,
4991                        value: Some(rvalue_to_expr(&positional[pos_idx])),
4992                    });
4993                    pos_idx += 1;
4994                }
4995                // Collect unmatched named
4996                for (i, (name, val)) in named.iter().enumerate() {
4997                    if !named_to_formal.contains_key(&i) {
4998                        result_args.push(Arg {
4999                            name: Some(name.clone()),
5000                            value: Some(rvalue_to_expr(val)),
5001                        });
5002                    }
5003                }
5004                continue;
5005            }
5006
5007            if let Some(&named_idx) = formal_to_named.get(param.name.as_str()) {
5008                result_args.push(Arg {
5009                    name: Some(param.name.clone()),
5010                    value: Some(rvalue_to_expr(&named[named_idx].1)),
5011                });
5012            } else if pos_idx < positional.len() {
5013                result_args.push(Arg {
5014                    name: Some(param.name.clone()),
5015                    value: Some(rvalue_to_expr(&positional[pos_idx])),
5016                });
5017                pos_idx += 1;
5018            }
5019            // Skip unmatched formals with defaults
5020        }
5021
5022        let matched_call = Expr::Call {
5023            func: Box::new(func_expr),
5024            args: result_args,
5025            span: None,
5026        };
5027        Ok(RValue::Language(Language::new(matched_call)))
5028    })
5029}
5030
5031/// Convert an RValue to an Expr for use in match.call() reconstructed calls.
5032fn rvalue_to_expr(val: &RValue) -> Expr {
5033    match val {
5034        RValue::Null => Expr::Null,
5035        RValue::Vector(rv) => match &rv.inner {
5036            Vector::Double(d) if d.len() == 1 => match d.get_opt(0) {
5037                Some(v) if v.is_infinite() && v > 0.0 => Expr::Inf,
5038                Some(v) if v.is_nan() => Expr::NaN,
5039                Some(v) => Expr::Double(v),
5040                None => Expr::Na(crate::parser::ast::NaType::Real),
5041            },
5042            Vector::Integer(i) if i.len() == 1 => match i.get_opt(0) {
5043                Some(v) => Expr::Integer(v),
5044                None => Expr::Na(crate::parser::ast::NaType::Integer),
5045            },
5046            Vector::Logical(l) if l.len() == 1 => match l[0] {
5047                Some(v) => Expr::Bool(v),
5048                None => Expr::Na(crate::parser::ast::NaType::Logical),
5049            },
5050            Vector::Character(c) if c.len() == 1 => match &c[0] {
5051                Some(v) => Expr::String(v.clone()),
5052                None => Expr::Na(crate::parser::ast::NaType::Character),
5053            },
5054            _ => Expr::Symbol(format!("{}", val)),
5055        },
5056        RValue::Language(lang) => (*lang.inner).clone(),
5057        _ => Expr::Symbol(format!("{}", val)),
5058    }
5059}
5060
5061/// Find the first element of a vector for which a predicate returns TRUE.
5062///
5063/// @param f predicate function returning a logical scalar
5064/// @param x vector or list to search
5065/// @param right if TRUE, search from right to left
5066/// @return the first matching element, or NULL if none found
5067#[interpreter_builtin(name = "Find", min_args = 2)]
5068fn interp_find(
5069    positional: &[RValue],
5070    named: &[(String, RValue)],
5071    context: &BuiltinContext,
5072) -> Result<RValue, RError> {
5073    if positional.len() < 2 {
5074        return Err(RError::new(
5075            RErrorKind::Argument,
5076            "Find requires 2 arguments: f and x".to_string(),
5077        ));
5078    }
5079    let env = context.env();
5080    let f = match_fun(&positional[0], env)?;
5081    let x = &positional[1];
5082
5083    let right = named
5084        .iter()
5085        .find(|(n, _)| n == "right")
5086        .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
5087        .unwrap_or(false);
5088
5089    let items: Vec<RValue> = rvalue_to_items(x);
5090
5091    context.with_interpreter(|interp| {
5092        let iter: Box<dyn Iterator<Item = &RValue>> = if right {
5093            Box::new(items.iter().rev())
5094        } else {
5095            Box::new(items.iter())
5096        };
5097
5098        for item in iter {
5099            let result = interp.call_function(&f, std::slice::from_ref(item), &[], env)?;
5100            if result
5101                .as_vector()
5102                .and_then(|v| v.as_logical_scalar())
5103                .unwrap_or(false)
5104            {
5105                return Ok(item.clone());
5106            }
5107        }
5108        Ok(RValue::Null)
5109    })
5110}
5111
5112/// Find the position (1-based index) of the first element where a predicate is TRUE.
5113///
5114/// @param f predicate function returning a logical scalar
5115/// @param x vector or list to search
5116/// @param right if TRUE, search from right to left
5117/// @return scalar integer position, or NULL if none found
5118#[interpreter_builtin(name = "Position", min_args = 2)]
5119fn interp_position(
5120    positional: &[RValue],
5121    named: &[(String, RValue)],
5122    context: &BuiltinContext,
5123) -> Result<RValue, RError> {
5124    if positional.len() < 2 {
5125        return Err(RError::new(
5126            RErrorKind::Argument,
5127            "Position requires 2 arguments: f and x".to_string(),
5128        ));
5129    }
5130    let env = context.env();
5131    let f = match_fun(&positional[0], env)?;
5132    let x = &positional[1];
5133
5134    let right = named
5135        .iter()
5136        .find(|(n, _)| n == "right")
5137        .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
5138        .unwrap_or(false);
5139
5140    let items: Vec<RValue> = rvalue_to_items(x);
5141
5142    context.with_interpreter(|interp| {
5143        let indices: Box<dyn Iterator<Item = usize>> = if right {
5144            Box::new((0..items.len()).rev())
5145        } else {
5146            Box::new(0..items.len())
5147        };
5148
5149        for i in indices {
5150            let result = interp.call_function(&f, std::slice::from_ref(&items[i]), &[], env)?;
5151            if result
5152                .as_vector()
5153                .and_then(|v| v.as_logical_scalar())
5154                .unwrap_or(false)
5155            {
5156                let pos = i64::try_from(i + 1).map_err(RError::from)?;
5157                return Ok(RValue::vec(Vector::Integer(vec![Some(pos)].into())));
5158            }
5159        }
5160        Ok(RValue::Null)
5161    })
5162}
5163
5164/// Negate a predicate function, returning a new function that returns the
5165/// logical complement of the original.
5166///
5167/// @param f predicate function
5168/// @return a new closure that calls f and negates the result
5169#[interpreter_builtin(name = "Negate", min_args = 1)]
5170fn interp_negate(
5171    positional: &[RValue],
5172    _named: &[(String, RValue)],
5173    context: &BuiltinContext,
5174) -> Result<RValue, RError> {
5175    let env = context.env();
5176    let f = match_fun(&positional[0], env)?;
5177
5178    // Create an environment that captures the original function
5179    let closure_env = Environment::new_child(env);
5180    closure_env.set(".negate_f".to_string(), f);
5181
5182    // Build: function(...) !.negate_f(...)
5183    let body = Expr::UnaryOp {
5184        op: UnaryOp::Not,
5185        operand: Box::new(Expr::Call {
5186            func: Box::new(Expr::Symbol(".negate_f".to_string())),
5187            span: None,
5188            args: vec![Arg {
5189                name: None,
5190                value: Some(Expr::Dots),
5191            }],
5192        }),
5193    };
5194
5195    Ok(RValue::Function(RFunction::Closure {
5196        params: vec![Param {
5197            name: "...".to_string(),
5198            default: None,
5199            is_dots: true,
5200        }],
5201        body,
5202        env: closure_env,
5203    }))
5204}
5205
5206/// Recursively apply a function to elements of a (nested) list.
5207///
5208/// When `classes` is specified, only leaf elements whose type matches one of the
5209/// given class names are transformed by `f`. Non-matching leaves are replaced
5210/// by `deflt` (or left unchanged in "replace" mode).
5211///
5212/// @param object a list (possibly nested)
5213/// @param f function to apply to matching leaf elements
5214/// @param classes character vector of class names to match (default: "ANY" matches all)
5215/// @param deflt default value for non-matching leaves (used with "unlist"/"list" modes)
5216/// @param how one of "unlist" (default), "replace", or "list"
5217/// @return depends on `how`: "unlist" returns a flat vector, "replace" returns a list
5218///   with the same structure, "list" returns a flat list of results
5219#[interpreter_builtin(name = "rapply", min_args = 2)]
5220fn interp_rapply(
5221    positional: &[RValue],
5222    named: &[(String, RValue)],
5223    context: &BuiltinContext,
5224) -> Result<RValue, RError> {
5225    if positional.len() < 2 {
5226        return Err(RError::new(
5227            RErrorKind::Argument,
5228            "rapply requires at least 2 arguments: object and f".to_string(),
5229        ));
5230    }
5231    let env = context.env();
5232    let object = &positional[0];
5233    let f = match_fun(&positional[1], env)?;
5234
5235    let how = named
5236        .iter()
5237        .find(|(n, _)| n == "how")
5238        .and_then(|(_, v)| v.as_vector()?.as_character_scalar())
5239        .or_else(|| {
5240            positional
5241                .get(2)
5242                .and_then(|v| v.as_vector()?.as_character_scalar())
5243        })
5244        .unwrap_or_else(|| "unlist".to_string());
5245
5246    // Extract classes parameter: a character vector of type names to match
5247    let classes: Option<Vec<String>> =
5248        named
5249            .iter()
5250            .find(|(n, _)| n == "classes")
5251            .and_then(|(_, v)| match v.as_vector() {
5252                Some(rv) => {
5253                    let chars = rv.to_characters();
5254                    let strs: Vec<String> = chars.into_iter().flatten().collect();
5255                    if strs.len() == 1 && strs[0] == "ANY" {
5256                        None // "ANY" means match everything (same as no filter)
5257                    } else {
5258                        Some(strs)
5259                    }
5260                }
5261                None => None,
5262            });
5263
5264    // Extract deflt parameter: default value for non-matching leaves
5265    let deflt = named
5266        .iter()
5267        .find(|(n, _)| n == "deflt")
5268        .map(|(_, v)| v.clone());
5269
5270    context.with_interpreter(|interp| match how.as_str() {
5271        "replace" => rapply_replace(interp, object, &f, env, classes.as_deref()),
5272        "list" => {
5273            let mut results = Vec::new();
5274            rapply_collect(
5275                interp,
5276                object,
5277                &f,
5278                env,
5279                &mut results,
5280                classes.as_deref(),
5281                deflt.as_ref(),
5282            )?;
5283            Ok(RValue::List(RList::new(
5284                results.into_iter().map(|v| (None, v)).collect(),
5285            )))
5286        }
5287        _ => {
5288            // "unlist" (default)
5289            let mut results = Vec::new();
5290            rapply_collect(
5291                interp,
5292                object,
5293                &f,
5294                env,
5295                &mut results,
5296                classes.as_deref(),
5297                deflt.as_ref(),
5298            )?;
5299            if results.is_empty() {
5300                return Ok(RValue::Null);
5301            }
5302            // Try to simplify to a vector via c()
5303            crate::interpreter::builtins::builtin_c(&results, &[])
5304        }
5305    })
5306}
5307
5308/// Check if an RValue's type name matches one of the given class names.
5309fn rapply_matches_class(x: &RValue, classes: Option<&[String]>) -> bool {
5310    match classes {
5311        None => true, // No filter — match everything
5312        Some(cls) => {
5313            let type_name = x.type_name();
5314            // Map R type names: "double" -> "numeric", etc.
5315            cls.iter().any(|c| {
5316                c == type_name
5317                    || (c == "numeric" && (type_name == "double" || type_name == "integer"))
5318                    || (c == "character" && type_name == "character")
5319                    || (c == "logical" && type_name == "logical")
5320                    || (c == "complex" && type_name == "complex")
5321                    || (c == "integer" && type_name == "integer")
5322                    || (c == "double" && type_name == "double")
5323            })
5324        }
5325    }
5326}
5327
5328/// Helper: collect results of applying f to matching leaf (non-list) elements.
5329fn rapply_collect(
5330    interp: &crate::interpreter::Interpreter,
5331    x: &RValue,
5332    f: &RValue,
5333    env: &Environment,
5334    out: &mut Vec<RValue>,
5335    classes: Option<&[String]>,
5336    deflt: Option<&RValue>,
5337) -> Result<(), RError> {
5338    match x {
5339        RValue::List(list) => {
5340            for (_, val) in &list.values {
5341                rapply_collect(interp, val, f, env, out, classes, deflt)?;
5342            }
5343        }
5344        _ => {
5345            if rapply_matches_class(x, classes) {
5346                let result = interp
5347                    .call_function(f, std::slice::from_ref(x), &[], env)
5348                    .map_err(RError::from)?;
5349                out.push(result);
5350            } else if let Some(d) = deflt {
5351                out.push(d.clone());
5352            }
5353            // If no deflt and class doesn't match, skip the element
5354        }
5355    }
5356    Ok(())
5357}
5358
5359/// Helper: recursively apply f, preserving list structure ("replace" mode).
5360fn rapply_replace(
5361    interp: &crate::interpreter::Interpreter,
5362    x: &RValue,
5363    f: &RValue,
5364    env: &Environment,
5365    classes: Option<&[String]>,
5366) -> Result<RValue, RError> {
5367    match x {
5368        RValue::List(list) => {
5369            let new_vals: Vec<(Option<String>, RValue)> = list
5370                .values
5371                .iter()
5372                .map(|(name, val)| {
5373                    let new_val = rapply_replace(interp, val, f, env, classes)?;
5374                    Ok((name.clone(), new_val))
5375                })
5376                .collect::<Result<Vec<_>, RError>>()?;
5377            Ok(RValue::List(RList::new(new_vals)))
5378        }
5379        _ => {
5380            if rapply_matches_class(x, classes) {
5381                Ok(interp
5382                    .call_function(f, std::slice::from_ref(x), &[], env)
5383                    .map_err(RError::from)?)
5384            } else {
5385                // Non-matching leaf: keep as-is in "replace" mode
5386                Ok(x.clone())
5387            }
5388        }
5389    }
5390}
5391
5392// endregion
5393
5394// region: Introspection — search path, namespace exploration, function lookup
5395
5396/// Return the search path as a character vector.
5397///
5398/// The search path represents the order in which environments are searched
5399/// for names: .GlobalEnv -> attached packages -> package:base.
5400///
5401/// @return character vector of environment names on the search path
5402#[interpreter_builtin(name = "search")]
5403fn interp_search(
5404    _args: &[RValue],
5405    _named: &[(String, RValue)],
5406    context: &BuiltinContext,
5407) -> Result<RValue, RError> {
5408    let path = context.with_interpreter(|interp| interp.get_search_path());
5409    Ok(RValue::vec(Vector::Character(
5410        path.into_iter().map(Some).collect::<Vec<_>>().into(),
5411    )))
5412}
5413
5414/// List all loaded namespace names.
5415///
5416/// Returns names of all namespaces that have been loaded (via library(),
5417/// loadNamespace(), etc.), plus builtin namespaces.
5418///
5419/// @return character vector of namespace names
5420#[interpreter_builtin(name = "loadedNamespaces")]
5421fn interp_loaded_namespaces(
5422    _args: &[RValue],
5423    _named: &[(String, RValue)],
5424    context: &BuiltinContext,
5425) -> Result<RValue, RError> {
5426    let mut namespaces: Vec<String> = super::BUILTIN_REGISTRY
5427        .iter()
5428        .map(|d| d.namespace.to_string())
5429        .filter(|ns| !ns.is_empty())
5430        .collect();
5431
5432    // Add loaded package namespaces
5433    context.with_interpreter(|interp| {
5434        for name in interp.loaded_namespaces.borrow().keys() {
5435            namespaces.push(name.clone());
5436        }
5437    });
5438
5439    namespaces.sort();
5440    namespaces.dedup();
5441    Ok(RValue::vec(Vector::Character(
5442        namespaces.into_iter().map(Some).collect::<Vec<_>>().into(),
5443    )))
5444}
5445
5446/// Get exports from a namespace (list functions in a package).
5447///
5448/// @param ns character scalar: namespace name (e.g. "base", "stats", "utils")
5449/// @return character vector of function names in that namespace
5450/// @namespace base
5451#[interpreter_builtin(name = "getNamespaceExports", min_args = 1)]
5452fn interp_get_namespace_exports(
5453    args: &[RValue],
5454    _named: &[(String, RValue)],
5455    context: &BuiltinContext,
5456) -> Result<RValue, RError> {
5457    let ns = args
5458        .first()
5459        .and_then(|v| v.as_vector()?.as_character_scalar())
5460        .ok_or_else(|| RError::new(RErrorKind::Argument, "invalid namespace name".to_string()))?;
5461
5462    // Check loaded packages first
5463    let loaded_exports = context.with_interpreter(|interp| {
5464        interp
5465            .loaded_namespaces
5466            .borrow()
5467            .get(&ns)
5468            .map(|loaded| loaded.exports_env.ls())
5469    });
5470
5471    if let Some(mut names) = loaded_exports {
5472        names.sort();
5473        return Ok(RValue::vec(Vector::Character(
5474            names.into_iter().map(Some).collect::<Vec<_>>().into(),
5475        )));
5476    }
5477
5478    // Fall back to builtin registry
5479    let mut names: Vec<String> = super::BUILTIN_REGISTRY
5480        .iter()
5481        .filter(|d| d.namespace == ns)
5482        .map(|d| d.name.to_string())
5483        .collect();
5484    names.sort();
5485    Ok(RValue::vec(Vector::Character(
5486        names.into_iter().map(Some).collect::<Vec<_>>().into(),
5487    )))
5488}
5489
5490/// Find which namespace a function belongs to.
5491///
5492/// @param what character scalar: function name to look up
5493/// @return character vector of namespace names where the function is registered
5494/// @namespace utils
5495#[interpreter_builtin(name = "find", min_args = 1, namespace = "utils")]
5496fn interp_find_on_search_path(
5497    args: &[RValue],
5498    _named: &[(String, RValue)],
5499    context: &BuiltinContext,
5500) -> Result<RValue, RError> {
5501    let name = args
5502        .first()
5503        .and_then(|v| v.as_vector()?.as_character_scalar())
5504        .ok_or_else(|| RError::new(RErrorKind::Argument, "invalid 'what' argument".to_string()))?;
5505
5506    let mut found = Vec::new();
5507
5508    // Check global env
5509    if context.env().get(&name).is_some() {
5510        found.push(".GlobalEnv".to_string());
5511    }
5512
5513    // Check loaded packages on search path
5514    context.with_interpreter(|interp| {
5515        for entry in interp.search_path.borrow().iter() {
5516            if entry.env.has_local(&name) {
5517                found.push(entry.name.clone());
5518            }
5519        }
5520    });
5521
5522    // Check builtin registry
5523    for d in super::BUILTIN_REGISTRY.iter() {
5524        if d.name == name {
5525            found.push("package:base".to_string());
5526            break;
5527        }
5528    }
5529
5530    found.dedup();
5531    Ok(RValue::vec(Vector::Character(
5532        found.into_iter().map(Some).collect::<Vec<_>>().into(),
5533    )))
5534}
5535
5536/// Get a namespace environment by name.
5537///
5538/// Returns the namespace environment for a loaded package. If the namespace
5539/// is not yet loaded, attempts to load it (like GNU R's getNamespace).
5540/// Falls back to the base environment for builtin namespaces like "base".
5541///
5542/// @param ns character scalar: namespace name
5543/// @return environment
5544/// @namespace base
5545#[interpreter_builtin(name = "getNamespace", min_args = 1)]
5546fn interp_get_namespace(
5547    args: &[RValue],
5548    _named: &[(String, RValue)],
5549    context: &BuiltinContext,
5550) -> Result<RValue, RError> {
5551    let ns = args
5552        .first()
5553        .and_then(|v| v.as_vector()?.as_character_scalar())
5554        .ok_or_else(|| RError::new(RErrorKind::Argument, "invalid namespace name".to_string()))?;
5555
5556    // Check loaded packages first
5557    let loaded_ns = context.with_interpreter(|interp| {
5558        interp
5559            .loaded_namespaces
5560            .borrow()
5561            .get(&ns)
5562            .map(|loaded| loaded.namespace_env.clone())
5563    });
5564
5565    if let Some(env) = loaded_ns {
5566        return Ok(RValue::Environment(env));
5567    }
5568
5569    // Try to load the namespace if it's not already loaded
5570    let loaded_env = context.with_interpreter(|interp| interp.load_namespace(&ns).ok());
5571
5572    if let Some(env) = loaded_env {
5573        return Ok(RValue::Environment(env));
5574    }
5575
5576    // Fall back to base env for builtin namespaces (base, utils, stats, etc.)
5577    let env = context.with_interpreter(|interp| interp.base_env());
5578    Ok(RValue::Environment(env))
5579}
5580
5581/// Check if a namespace is loaded.
5582///
5583/// @param ns character scalar: namespace name
5584/// @return logical scalar
5585/// @namespace base
5586#[interpreter_builtin(name = "isNamespaceLoaded", min_args = 1)]
5587fn interp_is_namespace_loaded(
5588    args: &[RValue],
5589    _named: &[(String, RValue)],
5590    context: &BuiltinContext,
5591) -> Result<RValue, RError> {
5592    let ns = args
5593        .first()
5594        .and_then(|v| v.as_vector()?.as_character_scalar())
5595        .ok_or_else(|| RError::new(RErrorKind::Argument, "invalid namespace name".to_string()))?;
5596
5597    // Check loaded packages
5598    let loaded =
5599        context.with_interpreter(|interp| interp.loaded_namespaces.borrow().contains_key(&ns));
5600
5601    if loaded {
5602        return Ok(RValue::vec(Vector::Logical(vec![Some(true)].into())));
5603    }
5604
5605    // Fall back to builtin registry
5606    let exists = super::BUILTIN_REGISTRY.iter().any(|d| d.namespace == ns);
5607    Ok(RValue::vec(Vector::Logical(vec![Some(exists)].into())))
5608}
5609
5610/// Get the version of an installed package from its DESCRIPTION file.
5611///
5612/// @param pkg character scalar: the package name
5613/// @param lib.loc character vector: library paths to search (defaults to .libPaths())
5614/// @return character scalar: the version string (e.g. "1.4.2")
5615/// @namespace utils
5616#[interpreter_builtin(name = "packageVersion", min_args = 1, namespace = "utils")]
5617fn interp_package_version(
5618    args: &[RValue],
5619    named: &[(String, RValue)],
5620    context: &BuiltinContext,
5621) -> Result<RValue, RError> {
5622    let pkg = args
5623        .first()
5624        .and_then(|v| v.as_vector()?.as_character_scalar())
5625        .ok_or_else(|| {
5626            RError::new(
5627                RErrorKind::Argument,
5628                "'pkg' must be a character string".to_string(),
5629            )
5630        })?;
5631
5632    // Optional lib.loc argument
5633    let lib_loc: Option<Vec<String>> =
5634        named
5635            .iter()
5636            .find(|(n, _)| n == "lib.loc")
5637            .and_then(|(_, v)| {
5638                let vec = v.as_vector()?;
5639                Some(
5640                    vec.to_characters()
5641                        .into_iter()
5642                        .flatten()
5643                        .collect::<Vec<String>>(),
5644                )
5645            });
5646
5647    let version = context.with_interpreter(|interp| {
5648        // Check loaded namespaces first — avoids re-reading DESCRIPTION from disk
5649        if let Some(ns) = interp.loaded_namespaces.borrow().get(&pkg) {
5650            return Some(ns.description.version.clone());
5651        }
5652
5653        // Search on disk
5654        let lib_paths = lib_loc.unwrap_or_else(|| interp.get_lib_paths());
5655        for lib_path in &lib_paths {
5656            let desc_path = std::path::Path::new(lib_path)
5657                .join(&pkg)
5658                .join("DESCRIPTION");
5659            if let Ok(text) = std::fs::read_to_string(&desc_path) {
5660                if let Ok(desc) = crate::interpreter::packages::PackageDescription::parse(&text) {
5661                    return Some(desc.version);
5662                }
5663            }
5664        }
5665        None
5666    });
5667
5668    match version {
5669        Some(v) => Ok(RValue::vec(Vector::Character(vec![Some(v)].into()))),
5670        None => Err(RError::new(
5671            RErrorKind::Other,
5672            format!(
5673                "package '{}' not found\n  \
5674                 Hint: check that the package is installed in one of the library paths \
5675                 returned by .libPaths()",
5676                pkg
5677            ),
5678        )),
5679    }
5680}
5681
5682/// Read the DESCRIPTION file for a package, returning all fields as a named list.
5683///
5684/// @param pkg character scalar: package name
5685/// @param lib.loc character vector of library paths (default: .libPaths())
5686/// @param fields character vector of fields to return (default: all)
5687/// @return a named list (with class "packageDescription") of DESCRIPTION fields
5688/// @namespace utils
5689#[interpreter_builtin(name = "packageDescription", min_args = 1, namespace = "utils")]
5690fn interp_package_description(
5691    args: &[RValue],
5692    named: &[(String, RValue)],
5693    context: &BuiltinContext,
5694) -> Result<RValue, RError> {
5695    let pkg = args
5696        .first()
5697        .and_then(|v| v.as_vector()?.as_character_scalar())
5698        .ok_or_else(|| {
5699            RError::new(
5700                RErrorKind::Argument,
5701                "'pkg' must be a character string".to_string(),
5702            )
5703        })?;
5704
5705    let lib_loc: Option<Vec<String>> =
5706        named
5707            .iter()
5708            .find(|(n, _)| n == "lib.loc")
5709            .and_then(|(_, v)| {
5710                let vec = v.as_vector()?;
5711                Some(
5712                    vec.to_characters()
5713                        .into_iter()
5714                        .flatten()
5715                        .collect::<Vec<String>>(),
5716                )
5717            });
5718
5719    let fields_filter: Option<Vec<String>> =
5720        named
5721            .iter()
5722            .find(|(n, _)| n == "fields")
5723            .and_then(|(_, v)| {
5724                let vec = v.as_vector()?;
5725                Some(
5726                    vec.to_characters()
5727                        .into_iter()
5728                        .flatten()
5729                        .collect::<Vec<String>>(),
5730                )
5731            });
5732
5733    let desc_fields = context.with_interpreter(|interp| {
5734        // Synthetic DESCRIPTION for base packages
5735        if crate::interpreter::Interpreter::is_base_package(&pkg) {
5736            let mut fields = std::collections::HashMap::new();
5737            fields.insert("Package".to_string(), pkg.clone());
5738            fields.insert("Version".to_string(), "4.4.0".to_string());
5739            fields.insert("Priority".to_string(), "base".to_string());
5740            fields.insert("Title".to_string(), format!("The R {pkg} Package"));
5741            return Some(fields);
5742        }
5743
5744        // Check loaded namespaces first
5745        if let Some(ns) = interp.loaded_namespaces.borrow().get(&pkg) {
5746            return Some(ns.description.fields.clone());
5747        }
5748
5749        // Search on disk
5750        let lib_paths = lib_loc.unwrap_or_else(|| interp.get_lib_paths());
5751        for lib_path in &lib_paths {
5752            let desc_path = std::path::Path::new(lib_path)
5753                .join(&pkg)
5754                .join("DESCRIPTION");
5755            if let Ok(text) = std::fs::read_to_string(&desc_path) {
5756                if let Ok(desc) = crate::interpreter::packages::PackageDescription::parse(&text) {
5757                    return Some(desc.fields);
5758                }
5759            }
5760        }
5761        None
5762    });
5763
5764    match desc_fields {
5765        Some(fields) => {
5766            let mut values: Vec<(Option<String>, RValue)> = Vec::new();
5767            for (key, val) in &fields {
5768                if let Some(ref filter) = fields_filter {
5769                    if !filter.iter().any(|f| f == key) {
5770                        continue;
5771                    }
5772                }
5773                values.push((
5774                    Some(key.clone()),
5775                    RValue::vec(Vector::Character(vec![Some(val.clone())].into())),
5776                ));
5777            }
5778            let mut list = RList::new(values);
5779            let mut attrs = indexmap::IndexMap::new();
5780            attrs.insert(
5781                "class".to_string(),
5782                RValue::vec(Vector::Character(
5783                    vec![Some("packageDescription".to_string())].into(),
5784                )),
5785            );
5786            list.attrs = Some(Box::new(attrs));
5787            Ok(RValue::List(list))
5788        }
5789        None => Err(RError::new(
5790            RErrorKind::Other,
5791            format!("package '{}' not found", pkg),
5792        )),
5793    }
5794}
5795
5796/// Return the number of builtins registered, optionally filtered by namespace.
5797///
5798/// This is a miniR extension — not in GNU R. Useful for debugging.
5799///
5800/// @param ns optional character scalar: namespace to filter by
5801/// @return integer scalar
5802/// @namespace base
5803#[interpreter_builtin(name = ".builtinCount")]
5804fn interp_builtin_count(
5805    args: &[RValue],
5806    _named: &[(String, RValue)],
5807    _context: &BuiltinContext,
5808) -> Result<RValue, RError> {
5809    let ns_filter = args
5810        .first()
5811        .and_then(|v| v.as_vector()?.as_character_scalar());
5812
5813    let count = match ns_filter {
5814        Some(ns) => super::BUILTIN_REGISTRY
5815            .iter()
5816            .filter(|d| d.namespace == ns)
5817            .count(),
5818        None => super::BUILTIN_REGISTRY.len(),
5819    };
5820
5821    Ok(RValue::vec(Vector::Integer(
5822        vec![Some(i64::try_from(count).unwrap_or(0))].into(),
5823    )))
5824}
5825
5826// endregion
5827
5828// region: .Primitive
5829
5830/// `.Primitive(name)` — look up a primitive/builtin function by name.
5831///
5832/// In GNU R, returns a primitive function object. In miniR, returns the
5833/// builtin function from the base environment via the interpreter.
5834///
5835/// @param name character string naming the function
5836/// @return the function
5837/// @namespace base
5838#[interpreter_builtin(name = ".Primitive", min_args = 1)]
5839fn interp_dot_primitive(
5840    args: &[RValue],
5841    _named: &[(String, RValue)],
5842    context: &BuiltinContext,
5843) -> Result<RValue, RError> {
5844    let name = args
5845        .first()
5846        .and_then(|v| v.as_vector()?.as_character_scalar())
5847        .ok_or_else(|| {
5848            RError::new(
5849                RErrorKind::Argument,
5850                ".Primitive() requires a character string argument",
5851            )
5852        })?;
5853    context.with_interpreter(|interp| {
5854        interp
5855            .base_env()
5856            .get(&name)
5857            .ok_or_else(|| RError::new(RErrorKind::Other, format!("no such primitive: {name}")))
5858    })
5859}
5860
5861// endregion
5862
5863// region: namespace info
5864
5865/// `getNamespaceInfo(ns, which)` — get metadata about a namespace.
5866///
5867/// @param ns a namespace or package name
5868/// @param which character: "exports", "imports", "path", "spec"
5869/// @return the requested info
5870/// @namespace base
5871#[interpreter_builtin(name = "getNamespaceInfo", min_args = 2)]
5872fn interp_get_namespace_info(
5873    args: &[RValue],
5874    _named: &[(String, RValue)],
5875    context: &BuiltinContext,
5876) -> Result<RValue, RError> {
5877    get_namespace_info_impl(args, context)
5878}
5879
5880/// `.getNamespaceInfo(ns, which)` — internal version.
5881#[interpreter_builtin(name = ".getNamespaceInfo", min_args = 2)]
5882fn interp_dot_get_namespace_info(
5883    args: &[RValue],
5884    _named: &[(String, RValue)],
5885    context: &BuiltinContext,
5886) -> Result<RValue, RError> {
5887    get_namespace_info_impl(args, context)
5888}
5889
5890fn get_namespace_info_impl(args: &[RValue], context: &BuiltinContext) -> Result<RValue, RError> {
5891    let which = args
5892        .get(1)
5893        .and_then(|v| v.as_vector()?.as_character_scalar())
5894        .unwrap_or_default();
5895
5896    // Resolve the namespace: accept environment or string name
5897    let ns_env = match &args[0] {
5898        RValue::Environment(env) => env.clone(),
5899        RValue::Vector(rv) => {
5900            let name = rv
5901                .inner
5902                .as_character_scalar()
5903                .ok_or_else(|| RError::new(RErrorKind::Argument, "invalid namespace"))?;
5904            context.with_interpreter(|interp| interp.load_namespace(&name))?
5905        }
5906        _ => return Err(RError::new(RErrorKind::Argument, "invalid namespace")),
5907    };
5908
5909    match which.as_str() {
5910        "exports" => {
5911            let names: Vec<Option<String>> = ns_env.ls().into_iter().map(Some).collect();
5912            Ok(RValue::vec(Vector::Character(names.into())))
5913        }
5914        "path" | "spec" => {
5915            let name = ns_env.name().unwrap_or_default();
5916            let name = name.strip_prefix("namespace:").unwrap_or(&name);
5917            Ok(RValue::vec(Vector::Character(
5918                vec![Some(name.to_string())].into(),
5919            )))
5920        }
5921        _ => Ok(RValue::Null),
5922    }
5923}
5924
5925// endregion