Skip to main content

r/interpreter/builtins/
factors.rs

1//! Factor builtins — factor construction, levels, and nlevels.
2//!
3//! A factor is an integer vector with a "levels" attribute (character) and
4//! class "factor" (or c("ordered", "factor") if ordered).
5
6use crate::interpreter::value::*;
7use itertools::Itertools;
8use minir_macros::builtin;
9
10/// Coerce an RValue to a character vector for level matching.
11pub(super) fn rvalue_to_char_vec(x: &RValue) -> Result<Vec<Option<String>>, RError> {
12    match x {
13        RValue::Vector(rv) => match &rv.inner {
14            Vector::Character(c) => Ok(c.to_vec()),
15            Vector::Integer(v) => Ok(v.iter().map(|i| i.map(|n| n.to_string())).collect()),
16            Vector::Double(v) => Ok(v.iter().map(|d| d.map(|n| n.to_string())).collect()),
17            Vector::Complex(v) => Ok(v.iter().map(|c| c.map(format_r_complex)).collect()),
18            Vector::Raw(v) => Ok(v.iter().map(|b| Some(format!("{:02x}", b))).collect()),
19            Vector::Logical(v) => Ok(v
20                .iter()
21                .map(|b| {
22                    b.map(|b| {
23                        if b {
24                            "TRUE".to_string()
25                        } else {
26                            "FALSE".to_string()
27                        }
28                    })
29                })
30                .collect()),
31        },
32        RValue::Null => Ok(vec![]),
33        _ => Err(RError::new(
34            RErrorKind::Type,
35            "expected an atomic vector".to_string(),
36        )),
37    }
38}
39
40/// Encode a vector as a factor (categorical variable).
41///
42/// @param x vector to encode
43/// @param levels character vector of allowed levels (default: sorted unique values)
44/// @param labels character vector of display labels for the levels (default: same as levels)
45/// @param ordered if TRUE, create an ordered factor
46/// @return an integer vector with "levels" and "class" attributes
47#[builtin]
48fn builtin_factor(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
49    let x = args.first().cloned().unwrap_or(RValue::Null);
50    let char_vals = rvalue_to_char_vec(&x)?;
51
52    // Get levels: explicit or unique values in order of appearance
53    let explicit_levels = named.iter().find(|(n, _)| n == "levels").map(|(_, v)| v);
54    let levels: Vec<String> = if let Some(lv) = explicit_levels {
55        match lv {
56            RValue::Vector(rv) => match &rv.inner {
57                Vector::Character(c) => c.iter().filter_map(|s| s.clone()).collect(),
58                _ => {
59                    return Err(RError::new(
60                        RErrorKind::Argument,
61                        "levels must be a character vector".to_string(),
62                    ))
63                }
64            },
65            RValue::Null => {
66                let mut seen = Vec::new();
67                for s in char_vals.iter().flatten() {
68                    if !seen.contains(s) {
69                        seen.push(s.clone());
70                    }
71                }
72                seen
73            }
74            _ => {
75                return Err(RError::new(
76                    RErrorKind::Argument,
77                    "levels must be a character vector or NULL".to_string(),
78                ))
79            }
80        }
81    } else {
82        char_vals
83            .iter()
84            .flatten()
85            .unique()
86            .sorted()
87            .cloned()
88            .collect()
89    };
90
91    // Get labels (default = levels themselves)
92    let labels: Vec<String> = if let Some((_, lbl_val)) = named.iter().find(|(n, _)| n == "labels")
93    {
94        match lbl_val {
95            RValue::Vector(rv) => match &rv.inner {
96                Vector::Character(c) => c.iter().filter_map(|s| s.clone()).collect(),
97                _ => levels.clone(),
98            },
99            _ => levels.clone(),
100        }
101    } else {
102        levels.clone()
103    };
104
105    let ordered = named
106        .iter()
107        .find(|(n, _)| n == "ordered")
108        .and_then(|(_, v)| match v {
109            RValue::Vector(rv) => rv.inner.as_logical_scalar(),
110            _ => None,
111        })
112        .unwrap_or(false);
113
114    // Map each value to its 1-based level index (NA if not in levels)
115    let codes: Vec<Option<i64>> = char_vals
116        .iter()
117        .map(|v| match v {
118            Some(s) => levels
119                .iter()
120                .position(|l| l == s)
121                .map(|i| i64::try_from(i + 1))
122                .transpose(),
123            None => Ok(None),
124        })
125        .collect::<Result<Vec<_>, _>>()?;
126
127    let mut rv = RVector::from(Vector::Integer(codes.into()));
128    rv.set_attr(
129        "levels".to_string(),
130        RValue::vec(Vector::Character(
131            labels
132                .iter()
133                .map(|s| Some(s.clone()))
134                .collect::<Vec<_>>()
135                .into(),
136        )),
137    );
138    let class = if ordered {
139        RValue::vec(Vector::Character(
140            vec![Some("ordered".to_string()), Some("factor".to_string())].into(),
141        ))
142    } else {
143        RValue::vec(Vector::Character(vec![Some("factor".to_string())].into()))
144    };
145    rv.set_attr("class".to_string(), class);
146
147    Ok(RValue::Vector(rv))
148}
149
150/// Construct an ordered factor.
151///
152/// Delegates to `factor()` with `ordered = TRUE`. Ordered factors represent
153/// ordinal data where the levels have a meaningful order.
154///
155/// @param x vector to encode (default: character())
156/// @param levels character vector of allowed levels (default: sorted unique values)
157/// @param labels character vector of display labels for the levels
158/// @return an ordered factor (integer vector with "levels" and "class" attributes)
159#[builtin]
160fn builtin_ordered(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
161    // Build a new named args list with ordered=TRUE forced
162    let mut new_named: Vec<(String, RValue)> = named
163        .iter()
164        .filter(|(n, _)| n != "ordered")
165        .cloned()
166        .collect();
167    new_named.push((
168        "ordered".to_string(),
169        RValue::vec(Vector::Logical(vec![Some(true)].into())),
170    ));
171    builtin_factor(args, &new_named)
172}
173
174/// `levels(x)` — get the levels of a factor.
175#[builtin(min_args = 1)]
176fn builtin_levels(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
177    match args.first() {
178        Some(RValue::Vector(rv)) => Ok(rv.get_attr("levels").cloned().unwrap_or(RValue::Null)),
179        _ => Ok(RValue::Null),
180    }
181}
182
183/// `nlevels(x)` — number of levels of a factor.
184#[builtin(min_args = 1)]
185fn builtin_nlevels(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
186    match args.first() {
187        Some(RValue::Vector(rv)) => match rv.get_attr("levels") {
188            Some(RValue::Vector(lv)) => Ok(RValue::vec(Vector::Integer(
189                vec![Some(i64::try_from(lv.inner.len())?)].into(),
190            ))),
191            _ => Ok(RValue::vec(Vector::Integer(vec![Some(0i64)].into()))),
192        },
193        _ => Ok(RValue::vec(Vector::Integer(vec![Some(0i64)].into()))),
194    }
195}