Skip to main content

r/interpreter/builtins/
json.rs

1//! JSON builtins — `fromJSON()` and `toJSON()` providing jsonlite-compatible
2//! conversion between R values and JSON strings.
3
4use std::collections::HashMap;
5
6use super::CallArgs;
7use crate::interpreter::value::*;
8use minir_macros::builtin;
9
10// region: fromJSON
11
12/// Parse a JSON string into an R value.
13///
14/// Conversion rules:
15/// - JSON object -> named list
16/// - JSON array of objects (same keys) -> data.frame
17/// - JSON array of scalars -> vector
18/// - JSON null -> NULL
19/// - JSON true/false -> logical
20/// - JSON number -> double (or integer if representable)
21/// - JSON string -> character
22///
23/// @param txt character scalar: JSON string to parse
24/// @return R value corresponding to the JSON structure
25#[builtin(name = "fromJSON", min_args = 1, names = ["jsonlite::fromJSON"], namespace = "jsonlite")]
26fn builtin_from_json(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
27    let call_args = CallArgs::new(args, named);
28    let txt = call_args.string("txt", 0)?;
29
30    let json_value: serde_json::Value = serde_json::from_str(&txt)
31        .map_err(|e| RError::new(RErrorKind::Other, format!("JSON parse error: {e}")))?;
32
33    json_to_rvalue(&json_value)
34}
35
36/// Convert a `serde_json::Value` to an `RValue`.
37fn json_to_rvalue(value: &serde_json::Value) -> Result<RValue, RError> {
38    match value {
39        serde_json::Value::Null => Ok(RValue::Null),
40        serde_json::Value::Bool(b) => Ok(RValue::vec(Vector::Logical(vec![Some(*b)].into()))),
41        serde_json::Value::Number(n) => {
42            if let Some(i) = n.as_i64() {
43                // Check if it fits in i64 (it always does here) and has no fractional part
44                Ok(RValue::vec(Vector::Integer(vec![Some(i)].into())))
45            } else if let Some(f) = n.as_f64() {
46                Ok(RValue::vec(Vector::Double(vec![Some(f)].into())))
47            } else {
48                Ok(RValue::vec(Vector::Double(vec![None].into())))
49            }
50        }
51        serde_json::Value::String(s) => {
52            Ok(RValue::vec(Vector::Character(vec![Some(s.clone())].into())))
53        }
54        serde_json::Value::Array(arr) => json_array_to_rvalue(arr),
55        serde_json::Value::Object(obj) => json_object_to_rvalue(obj),
56    }
57}
58
59/// Convert a JSON array to an R value.
60///
61/// If all elements are scalars of the same type, produce an atomic vector.
62/// If all elements are objects with the same keys, produce a data.frame.
63/// Otherwise, produce a list.
64fn json_array_to_rvalue(arr: &[serde_json::Value]) -> Result<RValue, RError> {
65    if arr.is_empty() {
66        return Ok(RValue::List(RList::new(vec![])));
67    }
68
69    // Check if all elements are objects with the same keys -> data.frame
70    if let Some(df) = try_array_as_dataframe(arr)? {
71        return Ok(df);
72    }
73
74    // Check if all elements are homogeneous scalars -> atomic vector
75    if let Some(vec) = try_array_as_vector(arr) {
76        return Ok(vec);
77    }
78
79    // Fallback: heterogeneous list
80    let elements: Result<Vec<(Option<String>, RValue)>, RError> =
81        arr.iter().map(|v| Ok((None, json_to_rvalue(v)?))).collect();
82    Ok(RValue::List(RList::new(elements?)))
83}
84
85/// Try to convert a JSON array of objects into a data.frame.
86/// Returns `None` if the array elements are not all objects with the same keys.
87fn try_array_as_dataframe(arr: &[serde_json::Value]) -> Result<Option<RValue>, RError> {
88    // Collect keys from each object element
89    let mut all_objects = true;
90    let mut key_sets: Vec<Vec<String>> = Vec::new();
91
92    for item in arr {
93        if let serde_json::Value::Object(obj) = item {
94            let keys: Vec<String> = obj.keys().cloned().collect();
95            key_sets.push(keys);
96        } else {
97            all_objects = false;
98            break;
99        }
100    }
101
102    if !all_objects || key_sets.is_empty() {
103        return Ok(None);
104    }
105
106    // Check all objects have the same keys (order-independent)
107    let first_keys: std::collections::HashSet<&str> =
108        key_sets[0].iter().map(|s| s.as_str()).collect();
109    for ks in &key_sets[1..] {
110        let this_keys: std::collections::HashSet<&str> = ks.iter().map(|s| s.as_str()).collect();
111        if this_keys != first_keys {
112            return Ok(None);
113        }
114    }
115
116    // Build column-oriented data: collect all values for each key
117    let col_names: Vec<String> = key_sets[0].clone();
118    let nrows = arr.len();
119    let mut columns: HashMap<&str, Vec<&serde_json::Value>> = HashMap::new();
120    for key in &col_names {
121        columns.insert(key.as_str(), Vec::with_capacity(nrows));
122    }
123
124    for item in arr {
125        if let serde_json::Value::Object(obj) = item {
126            for key in &col_names {
127                let val = obj.get(key.as_str()).unwrap_or(&serde_json::Value::Null);
128                columns
129                    .get_mut(key.as_str())
130                    .expect("key was inserted into columns map above")
131                    .push(val);
132            }
133        }
134    }
135
136    // Build each column as an R vector, coercing scalar types
137    let mut list_cols: Vec<(Option<String>, RValue)> = Vec::new();
138    for key in &col_names {
139        let vals = &columns[key.as_str()];
140        let col_value = coerce_json_column(vals)?;
141        list_cols.push((Some(key.clone()), col_value));
142    }
143
144    let mut list = RList::new(list_cols);
145    list.set_attr(
146        "class".to_string(),
147        RValue::vec(Vector::Character(
148            vec![Some("data.frame".to_string())].into(),
149        )),
150    );
151    list.set_attr(
152        "names".to_string(),
153        RValue::vec(Vector::Character(
154            col_names.into_iter().map(Some).collect::<Vec<_>>().into(),
155        )),
156    );
157    let row_names: Vec<Option<i64>> = (1..=i64::try_from(nrows)?).map(Some).collect();
158    list.set_attr(
159        "row.names".to_string(),
160        RValue::vec(Vector::Integer(row_names.into())),
161    );
162
163    Ok(Some(RValue::List(list)))
164}
165
166/// Coerce a column of JSON values to an R vector.
167/// Tries integer -> double -> character, with null becoming NA.
168fn coerce_json_column(vals: &[&serde_json::Value]) -> Result<RValue, RError> {
169    // Check what types are present
170    let mut has_null = false;
171    let mut has_bool = false;
172    let mut has_int = false;
173    let mut has_float = false;
174    let mut has_string = false;
175    let mut has_complex = false; // arrays/objects
176
177    for v in vals {
178        match v {
179            serde_json::Value::Null => has_null = true,
180            serde_json::Value::Bool(_) => has_bool = true,
181            serde_json::Value::Number(n) => {
182                if n.is_i64() {
183                    has_int = true;
184                } else {
185                    has_float = true;
186                }
187            }
188            serde_json::Value::String(_) => has_string = true,
189            _ => has_complex = true,
190        }
191    }
192    let _ = has_null; // null is always compatible (becomes NA)
193
194    // If complex values present, fall back to list column
195    if has_complex {
196        let elements: Result<Vec<(Option<String>, RValue)>, RError> = vals
197            .iter()
198            .map(|v| Ok((None, json_to_rvalue(v)?)))
199            .collect();
200        return Ok(RValue::List(RList::new(elements?)));
201    }
202
203    // If strings present, everything becomes character
204    if has_string {
205        let result: Vec<Option<String>> = vals
206            .iter()
207            .map(|v| match v {
208                serde_json::Value::Null => None,
209                serde_json::Value::String(s) => Some(s.clone()),
210                serde_json::Value::Bool(b) => Some(if *b { "TRUE" } else { "FALSE" }.to_string()),
211                serde_json::Value::Number(n) => Some(n.to_string()),
212                _ => None,
213            })
214            .collect();
215        return Ok(RValue::vec(Vector::Character(result.into())));
216    }
217
218    // If only booleans (and nulls), produce logical
219    if has_bool && !has_int && !has_float {
220        let result: Vec<Option<bool>> = vals
221            .iter()
222            .map(|v| match v {
223                serde_json::Value::Bool(b) => Some(*b),
224                _ => None,
225            })
226            .collect();
227        return Ok(RValue::vec(Vector::Logical(result.into())));
228    }
229
230    // If floats present, everything numeric becomes double
231    if has_float {
232        let result: Vec<Option<f64>> = vals
233            .iter()
234            .map(|v| match v {
235                serde_json::Value::Number(n) => n.as_f64(),
236                serde_json::Value::Bool(b) => Some(if *b { 1.0 } else { 0.0 }),
237                _ => None,
238            })
239            .collect();
240        return Ok(RValue::vec(Vector::Double(result.into())));
241    }
242
243    // Pure integer (and nulls)
244    if has_int {
245        let result: Vec<Option<i64>> = vals
246            .iter()
247            .map(|v| match v {
248                serde_json::Value::Number(n) => n.as_i64(),
249                serde_json::Value::Bool(b) => Some(i64::from(*b)),
250                _ => None,
251            })
252            .collect();
253        return Ok(RValue::vec(Vector::Integer(result.into())));
254    }
255
256    // If booleans mixed with integers
257    if has_bool {
258        let result: Vec<Option<i64>> = vals
259            .iter()
260            .map(|v| match v {
261                serde_json::Value::Bool(b) => Some(i64::from(*b)),
262                serde_json::Value::Number(n) => n.as_i64(),
263                _ => None,
264            })
265            .collect();
266        return Ok(RValue::vec(Vector::Integer(result.into())));
267    }
268
269    // All null
270    let result: Vec<Option<bool>> = vals.iter().map(|_| None).collect();
271    Ok(RValue::vec(Vector::Logical(result.into())))
272}
273
274/// Try to convert a JSON array of scalars into an atomic vector.
275/// Returns `None` if the array contains non-scalar values.
276fn try_array_as_vector(arr: &[serde_json::Value]) -> Option<RValue> {
277    // Check what types of scalars we have
278    let mut has_null = false;
279    let mut has_bool = false;
280    let mut has_int = false;
281    let mut has_float = false;
282    let mut has_string = false;
283
284    for item in arr {
285        match item {
286            serde_json::Value::Null => has_null = true,
287            serde_json::Value::Bool(_) => has_bool = true,
288            serde_json::Value::Number(n) => {
289                if n.is_i64() {
290                    has_int = true;
291                } else {
292                    has_float = true;
293                }
294            }
295            serde_json::Value::String(_) => has_string = true,
296            // Non-scalar: not a homogeneous scalar array
297            serde_json::Value::Array(_) | serde_json::Value::Object(_) => return None,
298        }
299    }
300    let _ = has_null; // null is always compatible
301
302    // Strings dominate
303    if has_string {
304        let result: Vec<Option<String>> = arr
305            .iter()
306            .map(|v| match v {
307                serde_json::Value::String(s) => Some(s.clone()),
308                serde_json::Value::Null => None,
309                serde_json::Value::Bool(b) => Some(if *b { "TRUE" } else { "FALSE" }.to_string()),
310                serde_json::Value::Number(n) => Some(n.to_string()),
311                _ => None,
312            })
313            .collect();
314        return Some(RValue::vec(Vector::Character(result.into())));
315    }
316
317    // Pure booleans (no numbers)
318    if has_bool && !has_int && !has_float {
319        let result: Vec<Option<bool>> = arr
320            .iter()
321            .map(|v| match v {
322                serde_json::Value::Bool(b) => Some(*b),
323                _ => None,
324            })
325            .collect();
326        return Some(RValue::vec(Vector::Logical(result.into())));
327    }
328
329    // Has floats -> all numeric becomes double
330    if has_float {
331        let result: Vec<Option<f64>> = arr
332            .iter()
333            .map(|v| match v {
334                serde_json::Value::Number(n) => n.as_f64(),
335                serde_json::Value::Bool(b) => Some(if *b { 1.0 } else { 0.0 }),
336                _ => None,
337            })
338            .collect();
339        return Some(RValue::vec(Vector::Double(result.into())));
340    }
341
342    // Pure integers
343    if has_int {
344        let result: Vec<Option<i64>> = arr
345            .iter()
346            .map(|v| match v {
347                serde_json::Value::Number(n) => n.as_i64(),
348                serde_json::Value::Bool(b) => Some(i64::from(*b)),
349                _ => None,
350            })
351            .collect();
352        return Some(RValue::vec(Vector::Integer(result.into())));
353    }
354
355    // Booleans mixed with numbers -> integer
356    if has_bool {
357        let result: Vec<Option<i64>> = arr
358            .iter()
359            .map(|v| match v {
360                serde_json::Value::Bool(b) => Some(i64::from(*b)),
361                _ => None,
362            })
363            .collect();
364        return Some(RValue::vec(Vector::Integer(result.into())));
365    }
366
367    // All null
368    let result: Vec<Option<bool>> = arr.iter().map(|_| None).collect();
369    Some(RValue::vec(Vector::Logical(result.into())))
370}
371
372/// Convert a JSON object to a named list.
373fn json_object_to_rvalue(
374    obj: &serde_json::Map<String, serde_json::Value>,
375) -> Result<RValue, RError> {
376    let mut entries: Vec<(Option<String>, RValue)> = Vec::with_capacity(obj.len());
377    for (key, value) in obj {
378        entries.push((Some(key.clone()), json_to_rvalue(value)?));
379    }
380    Ok(RValue::List(RList::new(entries)))
381}
382
383// endregion
384
385// region: toJSON
386
387/// Convert an R value to a JSON string.
388///
389/// Conversion rules:
390/// - Named list -> JSON object
391/// - Unnamed list -> JSON array
392/// - Vector of length 1 -> JSON scalar
393/// - Vector of length > 1 -> JSON array
394/// - NULL -> null
395///
396/// @param x R value to convert
397/// @return character scalar containing the JSON string
398#[builtin(name = "toJSON", min_args = 1, names = ["jsonlite::toJSON"], namespace = "jsonlite")]
399fn builtin_to_json(args: &[RValue], _named: &[(String, RValue)]) -> Result<RValue, RError> {
400    let value = args
401        .first()
402        .ok_or_else(|| RError::new(RErrorKind::Argument, "argument 'x' is missing".to_string()))?;
403
404    let json = rvalue_to_json(value)?;
405    let json_str = serde_json::to_string(&json)
406        .map_err(|e| RError::new(RErrorKind::Other, format!("JSON serialization error: {e}")))?;
407
408    Ok(RValue::vec(Vector::Character(vec![Some(json_str)].into())))
409}
410
411/// Convert an `RValue` to a `serde_json::Value`.
412fn rvalue_to_json(value: &RValue) -> Result<serde_json::Value, RError> {
413    match value {
414        RValue::Null => Ok(serde_json::Value::Null),
415        RValue::Vector(rv) => vector_to_json(&rv.inner),
416        RValue::List(list) => list_to_json(list),
417        RValue::Function(_) => Err(RError::new(
418            RErrorKind::Type,
419            "cannot convert function to JSON".to_string(),
420        )),
421        RValue::Environment(_) => Err(RError::new(
422            RErrorKind::Type,
423            "cannot convert environment to JSON".to_string(),
424        )),
425        RValue::Language(_) => Err(RError::new(
426            RErrorKind::Type,
427            "cannot convert language object to JSON".to_string(),
428        )),
429        RValue::Promise(_) => Err(RError::new(
430            RErrorKind::Type,
431            "cannot convert promise to JSON (force it first)".to_string(),
432        )),
433    }
434}
435
436/// Convert an atomic vector to JSON.
437/// Scalars (length 1) become JSON scalars; longer vectors become JSON arrays.
438fn vector_to_json(vec: &Vector) -> Result<serde_json::Value, RError> {
439    match vec {
440        Vector::Logical(v) => {
441            if v.len() == 1 {
442                match v[0] {
443                    Some(b) => Ok(serde_json::Value::Bool(b)),
444                    None => Ok(serde_json::Value::Null),
445                }
446            } else {
447                let arr: Vec<serde_json::Value> = v
448                    .iter()
449                    .map(|x| match x {
450                        Some(b) => serde_json::Value::Bool(*b),
451                        None => serde_json::Value::Null,
452                    })
453                    .collect();
454                Ok(serde_json::Value::Array(arr))
455            }
456        }
457        Vector::Integer(v) => {
458            if v.len() == 1 {
459                match v.get_opt(0) {
460                    Some(i) => Ok(serde_json::json!(i)),
461                    None => Ok(serde_json::Value::Null),
462                }
463            } else {
464                let arr: Vec<serde_json::Value> = v
465                    .iter_opt()
466                    .map(|x| match x {
467                        Some(i) => serde_json::json!(i),
468                        None => serde_json::Value::Null,
469                    })
470                    .collect();
471                Ok(serde_json::Value::Array(arr))
472            }
473        }
474        Vector::Double(v) => {
475            if v.len() == 1 {
476                match v.get_opt(0) {
477                    Some(f) => double_to_json(f),
478                    None => Ok(serde_json::Value::Null),
479                }
480            } else {
481                let arr: Result<Vec<serde_json::Value>, RError> = v
482                    .iter_opt()
483                    .map(|x| match x {
484                        Some(f) => double_to_json(f),
485                        None => Ok(serde_json::Value::Null),
486                    })
487                    .collect();
488                Ok(serde_json::Value::Array(arr?))
489            }
490        }
491        Vector::Character(v) => {
492            if v.len() == 1 {
493                match &v[0] {
494                    Some(s) => Ok(serde_json::Value::String(s.clone())),
495                    None => Ok(serde_json::Value::Null),
496                }
497            } else {
498                let arr: Vec<serde_json::Value> = v
499                    .iter()
500                    .map(|x| match x {
501                        Some(s) => serde_json::Value::String(s.clone()),
502                        None => serde_json::Value::Null,
503                    })
504                    .collect();
505                Ok(serde_json::Value::Array(arr))
506            }
507        }
508        Vector::Complex(v) => {
509            // Represent complex as string "re+imi"
510            let arr: Vec<serde_json::Value> = v
511                .iter()
512                .map(|x| match x {
513                    Some(c) => serde_json::Value::String(format!("{}+{}i", c.re, c.im)),
514                    None => serde_json::Value::Null,
515                })
516                .collect();
517            if arr.len() == 1 {
518                Ok(arr.into_iter().next().expect("vec has exactly one element"))
519            } else {
520                Ok(serde_json::Value::Array(arr))
521            }
522        }
523        Vector::Raw(v) => {
524            let arr: Vec<serde_json::Value> = v.iter().map(|b| serde_json::json!(*b)).collect();
525            if arr.len() == 1 {
526                Ok(arr.into_iter().next().expect("vec has exactly one element"))
527            } else {
528                Ok(serde_json::Value::Array(arr))
529            }
530        }
531    }
532}
533
534/// Convert an f64 to a JSON number, handling special values.
535fn double_to_json(f: f64) -> Result<serde_json::Value, RError> {
536    if f.is_nan() || f.is_infinite() {
537        // JSON has no NaN/Inf, represent as null (matches jsonlite behavior)
538        Ok(serde_json::Value::Null)
539    } else {
540        Ok(serde_json::json!(f))
541    }
542}
543
544/// Convert an R list to JSON.
545/// Named lists become objects; unnamed lists become arrays.
546fn list_to_json(list: &RList) -> Result<serde_json::Value, RError> {
547    let all_named = !list.values.is_empty() && list.values.iter().all(|(name, _)| name.is_some());
548
549    if all_named {
550        let mut map = serde_json::Map::new();
551        for (name, value) in &list.values {
552            let key = name
553                .as_ref()
554                .expect("all_named check guarantees name is Some")
555                .clone();
556            map.insert(key, rvalue_to_json(value)?);
557        }
558        Ok(serde_json::Value::Object(map))
559    } else {
560        let arr: Result<Vec<serde_json::Value>, RError> = list
561            .values
562            .iter()
563            .map(|(_, value)| rvalue_to_json(value))
564            .collect();
565        Ok(serde_json::Value::Array(arr?))
566    }
567}
568
569// endregion