Skip to main content

r/interpreter/builtins/
tables.rs

1//! Table and tabulate builtins — contingency tables and integer bin counting.
2
3use crate::interpreter::coerce::f64_to_i64;
4use crate::interpreter::value::*;
5use itertools::Itertools;
6use minir_macros::builtin;
7
8use super::factors::rvalue_to_char_vec;
9
10/// `tabulate(bin, nbins)` — count occurrences of each integer value 1..nbins.
11#[builtin(min_args = 1)]
12fn builtin_tabulate(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
13    let bins: Vec<Option<i64>> = match &args[0] {
14        RValue::Vector(rv) => match &rv.inner {
15            Vector::Integer(v) => v.iter_opt().collect(),
16            Vector::Double(v) => v
17                .iter_opt()
18                .map(|d| d.map(f64_to_i64).transpose())
19                .collect::<Result<Vec<_>, _>>()?,
20            _ => {
21                return Err(RError::new(
22                    RErrorKind::Type,
23                    "tabulate() requires an integer or double vector".to_string(),
24                ))
25            }
26        },
27        _ => {
28            return Err(RError::new(
29                RErrorKind::Type,
30                "tabulate() requires a numeric vector".to_string(),
31            ))
32        }
33    };
34
35    let max_bin = bins.iter().filter_map(|b| *b).max().unwrap_or(0);
36    let nbins = match args.get(1) {
37        Some(RValue::Vector(rv)) => {
38            usize::try_from(rv.inner.as_integer_scalar().unwrap_or(max_bin).max(0))?
39        }
40        _ => usize::try_from(max_bin.max(0))?,
41    };
42
43    let mut counts = vec![0i64; nbins];
44    for v in bins.iter().flatten() {
45        let idx = *v - 1;
46        if idx >= 0 {
47            if let Ok(uidx) = usize::try_from(idx) {
48                if uidx < nbins {
49                    counts[uidx] += 1;
50                }
51            }
52        }
53    }
54
55    Ok(RValue::vec(Vector::Integer(
56        counts.into_iter().map(Some).collect::<Vec<_>>().into(),
57    )))
58}
59
60/// `table(...)` — contingency table (one-way for now).
61///
62/// For a single vector, counts occurrences of each unique value.
63/// Returns a named integer vector with class "table".
64#[builtin]
65fn builtin_table(args: &[RValue], _named: &[(String, RValue)]) -> Result<RValue, RError> {
66    if args.is_empty() {
67        return Ok(RValue::Null);
68    }
69
70    let vals = rvalue_to_char_vec(&args[0])?;
71
72    // Count unique values, sorted
73    let counts = vals.iter().flatten().counts();
74    let order: Vec<&String> = counts.keys().sorted().copied().collect();
75
76    let names: Vec<Option<String>> = order.iter().map(|s| Some((*s).clone())).collect();
77    let values: Vec<Option<i64>> = order
78        .iter()
79        .map(|s| Some(i64::try_from(counts[*s]).unwrap_or(0)))
80        .collect();
81
82    let mut rv = RVector::from(Vector::Integer(values.into()));
83    rv.set_attr(
84        "names".to_string(),
85        RValue::vec(Vector::Character(names.into())),
86    );
87    rv.set_attr(
88        "class".to_string(),
89        RValue::vec(Vector::Character(vec![Some("table".to_string())].into())),
90    );
91    Ok(RValue::Vector(rv))
92}