1use std::collections::HashSet;
5
6use super::CallArgs;
7use crate::interpreter::environment::Environment;
8use crate::interpreter::value::*;
9use crate::interpreter::BuiltinContext;
10use crate::interpreter::Interpreter;
11use crate::parser::ast::{Arg, Expr};
12use crate::parser::parse_program;
13use derive_more::{Display, Error};
14use itertools::Itertools;
15use minir_macros::{builtin, interpreter_builtin, pre_eval_builtin};
16
17const MINIR_RDS_HEADER: &str = "miniRDS1\n";
18const MINIR_WORKSPACE_CLASS: &str = "miniR.workspace";
19
20#[derive(Debug, Display, Error)]
24pub enum IoError {
25 #[display("cannot open file '{}': {}", path, source)]
26 CannotOpen {
27 path: String,
28 source: std::io::Error,
29 },
30 #[display("cannot write to file '{}': {}", path, source)]
31 WriteFailed {
32 path: String,
33 source: std::io::Error,
34 },
35 #[display("error reading CSV {}: {}", context, source)]
36 CsvRead { context: String, source: csv::Error },
37 #[display("error writing CSV: {}", source)]
38 CsvWrite {
39 #[error(source)]
40 source: csv::Error,
41 },
42 #[display("cannot open connection: {}", source)]
43 Connection {
44 #[error(source)]
45 source: std::io::Error,
46 },
47 #[display("unsupported value in saveRDS(): {}", details)]
48 UnsupportedSerialization { details: String },
49}
50
51impl From<IoError> for RError {
52 fn from(e: IoError) -> Self {
53 RError::from_source(RErrorKind::Other, e)
54 }
55}
56
57fn resolved_path_string(interp: &Interpreter, path: &str) -> String {
60 interp.resolve_path(path).to_string_lossy().to_string()
61}
62
63fn escape_r_string(value: &str) -> String {
64 let mut escaped = String::new();
65 for ch in value.chars() {
66 match ch {
67 '\\' => escaped.push_str("\\\\"),
68 '"' => escaped.push_str("\\\""),
69 '\n' => escaped.push_str("\\n"),
70 '\r' => escaped.push_str("\\r"),
71 '\t' => escaped.push_str("\\t"),
72 _ => escaped.push(ch),
73 }
74 }
75 escaped
76}
77
78fn syntactic_attr_name(name: &str) -> bool {
79 let mut chars = name.chars();
80 let Some(first) = chars.next() else {
81 return false;
82 };
83 if !(first.is_ascii_alphabetic() || first == '.') {
84 return false;
85 }
86 chars.all(|ch| ch.is_ascii_alphanumeric() || ch == '.' || ch == '_')
87}
88
89fn serialize_complex(value: num_complex::Complex64) -> String {
90 if value.im < 0.0 {
91 format!(
92 "{}{}i",
93 format_r_double(value.re),
94 format_r_double(value.im)
95 )
96 } else {
97 format!(
98 "{}+{}i",
99 format_r_double(value.re),
100 format_r_double(value.im)
101 )
102 }
103}
104
105fn serialize_vector(value: &Vector) -> String {
106 match value {
107 Vector::Raw(values) if values.is_empty() => "raw(0)".to_string(),
108 Vector::Raw(values) => format!(
109 "as.raw(c({}))",
110 values.iter().map(|value| value.to_string()).join(", ")
111 ),
112 Vector::Logical(values) if values.is_empty() => "logical(0)".to_string(),
113 Vector::Logical(values) => format!(
114 "c({})",
115 values
116 .iter()
117 .map(|value| match value {
118 Some(true) => "TRUE".to_string(),
119 Some(false) => "FALSE".to_string(),
120 None => "NA".to_string(),
121 })
122 .join(", ")
123 ),
124 Vector::Integer(values) if values.is_empty() => "integer(0)".to_string(),
125 Vector::Integer(values) => format!(
126 "c({})",
127 values
128 .iter()
129 .map(|value| match value {
130 Some(value) => format!("{value}L"),
131 None => "NA_integer_".to_string(),
132 })
133 .join(", ")
134 ),
135 Vector::Double(values) if values.is_empty() => "numeric(0)".to_string(),
136 Vector::Double(values) => format!(
137 "c({})",
138 values
139 .iter()
140 .map(|value| match value {
141 Some(value) => format_r_double(value),
142 None => "NA_real_".to_string(),
143 })
144 .join(", ")
145 ),
146 Vector::Complex(values) if values.is_empty() => "complex(0)".to_string(),
147 Vector::Complex(values) => format!(
148 "c({})",
149 values
150 .iter()
151 .map(|value| match value {
152 Some(value) => serialize_complex(*value),
153 None => "NA_complex_".to_string(),
154 })
155 .join(", ")
156 ),
157 Vector::Character(values) if values.is_empty() => "character(0)".to_string(),
158 Vector::Character(values) => format!(
159 "c({})",
160 values
161 .iter()
162 .map(|value| match value {
163 Some(value) => format!("\"{}\"", escape_r_string(value)),
164 None => "NA_character_".to_string(),
165 })
166 .join(", ")
167 ),
168 }
169}
170
171fn serialize_attr_pairs(
172 attrs: Option<&indexmap::IndexMap<String, RValue>>,
173 synthetic_names: Option<Vec<Option<String>>>,
174) -> Result<Vec<(String, String)>, RError> {
175 let mut pairs = Vec::new();
176 let mut seen = HashSet::new();
177
178 if let Some(names) = synthetic_names {
179 if names.iter().any(|name| name.is_some()) {
180 pairs.push((
181 "names".to_string(),
182 serialize_rvalue(&RValue::vec(Vector::Character(names.into())))?,
183 ));
184 seen.insert("names".to_string());
185 }
186 }
187
188 if let Some(attrs) = attrs {
189 for key in attrs.keys().sorted() {
190 if seen.contains(key) {
191 continue;
192 }
193 if !syntactic_attr_name(key) {
194 return Err(IoError::UnsupportedSerialization {
195 details: format!("attribute '{}' is not yet serializable", key),
196 }
197 .into());
198 }
199 pairs.push((key.clone(), serialize_rvalue(&attrs[key])?));
200 }
201 }
202
203 Ok(pairs)
204}
205
206fn serialize_with_attrs(base: String, attrs: Vec<(String, String)>) -> String {
207 if attrs.is_empty() {
208 return base;
209 }
210 let attr_args = attrs
211 .into_iter()
212 .map(|(name, value)| format!("{name} = {value}"))
213 .join(", ");
214 format!("structure({base}, {attr_args})")
215}
216
217fn serialize_rvalue(value: &RValue) -> Result<String, RError> {
218 match value {
219 RValue::Null => Ok("NULL".to_string()),
220 RValue::Vector(rv) => Ok(serialize_with_attrs(
221 serialize_vector(&rv.inner),
222 serialize_attr_pairs(rv.attrs.as_deref(), None)?,
223 )),
224 RValue::List(list) => {
225 let base = format!(
226 "list({})",
227 list.values
228 .iter()
229 .map(|(_, value)| serialize_rvalue(value))
230 .collect::<Result<Vec<_>, _>>()?
231 .join(", ")
232 );
233 let synthetic_names = if list.get_attr("names").is_none() {
234 Some(
235 list.values
236 .iter()
237 .map(|(name, _)| name.clone())
238 .collect::<Vec<_>>(),
239 )
240 } else {
241 None
242 };
243 Ok(serialize_with_attrs(
244 base,
245 serialize_attr_pairs(list.attrs.as_deref(), synthetic_names)?,
246 ))
247 }
248 RValue::Language(expr) => {
249 let base = format!("quote({})", deparse_expr(expr));
250 Ok(serialize_with_attrs(
251 base,
252 serialize_attr_pairs(expr.attrs.as_deref(), None)?,
253 ))
254 }
255 RValue::Function(_) => Err(IoError::UnsupportedSerialization {
256 details: "functions are not yet serializable".to_string(),
257 }
258 .into()),
259 RValue::Environment(_) => Err(IoError::UnsupportedSerialization {
260 details: "environments are not yet serializable".to_string(),
261 }
262 .into()),
263 RValue::Promise(_) => Err(IoError::UnsupportedSerialization {
264 details: "promises are not yet serializable".to_string(),
265 }
266 .into()),
267 }
268}
269
270fn write_minirds(path: &str, value: &RValue) -> Result<(), RError> {
273 let serialized = serialize_rvalue(value)?;
274 std::fs::write(path, format!("{MINIR_RDS_HEADER}{serialized}\n")).map_err(|source| {
275 IoError::WriteFailed {
276 path: path.to_string(),
277 source,
278 }
279 .into()
280 })
281}
282
283fn read_minirds(
284 path: &str,
285 reader_name: &str,
286 writer_name: &str,
287 interp: &Interpreter,
288) -> Result<RValue, RError> {
289 let content = std::fs::read_to_string(path).map_err(|source| IoError::CannotOpen {
290 path: path.to_string(),
291 source,
292 })?;
293
294 let body = content.strip_prefix(MINIR_RDS_HEADER).ok_or_else(|| {
295 RError::new(
296 RErrorKind::Argument,
297 format!(
298 "unsupported {reader_name}() format in '{}': miniR currently reads only miniRDS text files written by {writer_name}()",
299 path,
300 ),
301 )
302 })?;
303
304 let ast =
305 parse_program(body).map_err(|err| RError::new(RErrorKind::Parse, format!("{err}")))?;
306
307 let base = interp
308 .global_env
309 .parent()
310 .unwrap_or_else(|| interp.global_env.clone());
311 let eval_env = Environment::new_child(&base);
312 interp.eval_in(&ast, &eval_env).map_err(RError::from)
313}
314
315fn workspace_class_value() -> RValue {
320 RValue::vec(Vector::Character(
321 vec![Some(MINIR_WORKSPACE_CLASS.to_string())].into(),
322 ))
323}
324
325fn is_workspace_value(value: &RValue) -> bool {
326 let RValue::List(list) = value else {
327 return false;
328 };
329
330 list.get_attr("class")
331 .and_then(|value| value.as_vector())
332 .map(|values| {
333 values
334 .to_characters()
335 .iter()
336 .flatten()
337 .any(|class_name| class_name == MINIR_WORKSPACE_CLASS)
338 })
339 .unwrap_or(false)
340}
341
342fn workspace_binding_names(list: &RList) -> Result<Vec<String>, RError> {
343 if let Some(names_attr) = list.get_attr("names") {
344 let values = names_attr
345 .as_vector()
346 .ok_or_else(|| {
347 RError::new(
348 RErrorKind::Argument,
349 "invalid workspace file: 'names' attribute is not a character vector"
350 .to_string(),
351 )
352 })?
353 .to_characters();
354
355 if values.len() != list.values.len() {
356 return Err(RError::new(
357 RErrorKind::Argument,
358 "invalid workspace file: binding names do not match saved values".to_string(),
359 ));
360 }
361
362 return values
363 .into_iter()
364 .map(|name| {
365 name.ok_or_else(|| {
366 RError::new(
367 RErrorKind::Argument,
368 "invalid workspace file: every saved object needs a name".to_string(),
369 )
370 })
371 })
372 .collect();
373 }
374
375 list.values
376 .iter()
377 .map(|(name, _)| {
378 name.clone().ok_or_else(|| {
379 RError::new(
380 RErrorKind::Argument,
381 "invalid workspace file: every saved object needs a name".to_string(),
382 )
383 })
384 })
385 .collect()
386}
387
388fn eval_arg_value(arg: &Arg, env: &Environment, interp: &Interpreter) -> Result<RValue, RError> {
389 let expr = arg.value.as_ref().ok_or_else(|| {
390 RError::new(
391 RErrorKind::Argument,
392 "argument is missing a value".to_string(),
393 )
394 })?;
395 interp.eval_in(expr, env).map_err(RError::from)
396}
397
398fn push_save_name(
399 names: &mut Vec<String>,
400 seen: &mut HashSet<String>,
401 name: String,
402) -> Result<(), RError> {
403 if !seen.insert(name.clone()) {
404 return Err(RError::new(
405 RErrorKind::Argument,
406 format!("duplicate object name '{}' in save()", name),
407 ));
408 }
409
410 names.push(name);
411 Ok(())
412}
413
414fn workspace_file_arg(
415 args: &[Arg],
416 env: &Environment,
417 interp: &Interpreter,
418) -> Result<String, RError> {
419 args.iter()
420 .find(|arg| arg.name.as_deref() == Some("file"))
421 .ok_or_else(|| {
422 RError::new(
423 RErrorKind::Argument,
424 "save() requires a named 'file' argument".to_string(),
425 )
426 })
427 .and_then(|arg| {
428 eval_arg_value(arg, env, interp)?
429 .as_vector()
430 .and_then(|value| value.as_character_scalar())
431 .ok_or_else(|| {
432 RError::new(RErrorKind::Argument, "invalid 'file' argument".to_string())
433 })
434 })
435 .map(|path| resolved_path_string(interp, &path))
436}
437
438fn workspace_target_env(
439 args: &[Arg],
440 env: &Environment,
441 interp: &Interpreter,
442) -> Result<Environment, RError> {
443 match args.iter().find(|arg| arg.name.as_deref() == Some("envir")) {
444 Some(arg) => match eval_arg_value(arg, env, interp)? {
445 RValue::Environment(target_env) => Ok(target_env),
446 _ => Err(RError::new(
447 RErrorKind::Argument,
448 "invalid 'envir' argument".to_string(),
449 )),
450 },
451 None => Ok(env.clone()),
452 }
453}
454
455fn workspace_bool_arg(
456 args: &[Arg],
457 env: &Environment,
458 interp: &Interpreter,
459 name: &str,
460 default: bool,
461) -> Result<bool, RError> {
462 match args.iter().find(|arg| arg.name.as_deref() == Some(name)) {
463 Some(arg) => {
464 let val = eval_arg_value(arg, env, interp)?;
465 val.as_vector()
466 .and_then(|v| v.as_logical_scalar())
467 .ok_or_else(|| {
468 RError::new(
469 RErrorKind::Argument,
470 format!("invalid '{}' argument: expected TRUE or FALSE", name),
471 )
472 })
473 }
474 None => Ok(default),
475 }
476}
477
478fn workspace_requested_names(
479 args: &[Arg],
480 env: &Environment,
481 interp: &Interpreter,
482) -> Result<Vec<String>, RError> {
483 let mut names = Vec::new();
484 let mut seen = HashSet::new();
485
486 for arg in args {
487 match arg.name.as_deref() {
488 None => match arg.value.as_ref() {
489 Some(Expr::Symbol(name)) | Some(Expr::String(name)) => {
490 push_save_name(&mut names, &mut seen, name.clone())?;
491 }
492 Some(_) => {
493 return Err(RError::new(
494 RErrorKind::Argument,
495 "save() positional arguments must be bare names; use list = c(...) for computed names".to_string(),
496 ));
497 }
498 None => {
499 return Err(RError::new(
500 RErrorKind::Argument,
501 "save() received an empty argument".to_string(),
502 ));
503 }
504 },
505 Some("file" | "envir") => {}
506 Some("list") => {
507 let listed = eval_arg_value(arg, env, interp)?;
508 if listed.is_null() {
509 continue;
510 }
511
512 let listed_names = listed
513 .as_vector()
514 .ok_or_else(|| {
515 RError::new(
516 RErrorKind::Argument,
517 "invalid 'list' argument in save(): expected a character vector of object names"
518 .to_string(),
519 )
520 })?
521 .to_characters();
522
523 for name in listed_names {
524 push_save_name(
525 &mut names,
526 &mut seen,
527 name.ok_or_else(|| {
528 RError::new(
529 RErrorKind::Argument,
530 "invalid 'list' argument in save(): object names cannot be NA"
531 .to_string(),
532 )
533 })?,
534 )?;
535 }
536 }
537 Some("ascii" | "compress" | "version" | "precheck" | "eval.promises" | "safe") => {}
538 Some(name) => {
539 return Err(RError::new(
540 RErrorKind::Argument,
541 format!("unsupported argument '{}' in save()", name),
542 ));
543 }
544 }
545 }
546
547 if names.is_empty() {
548 return Err(RError::new(
549 RErrorKind::Argument,
550 "save() needs at least one object name".to_string(),
551 ));
552 }
553
554 Ok(names)
555}
556
557fn read_rds_path(args: &[RValue], named: &[(String, RValue)]) -> Result<String, RError> {
560 CallArgs::new(args, named).string("file", 0)
561}
562
563#[interpreter_builtin(name = "readRDS", min_args = 1)]
572fn interp_read_rds(
573 args: &[RValue],
574 named: &[(String, RValue)],
575 context: &BuiltinContext,
576) -> Result<RValue, RError> {
577 let path = resolved_path_string(context.interpreter(), &read_rds_path(args, named)?);
578
579 let raw_bytes = std::fs::read(&path).map_err(|source| IoError::CannotOpen {
581 path: path.clone(),
582 source,
583 })?;
584
585 if super::serialize::is_binary_rds(&raw_bytes) {
586 return super::serialize::unserialize_rds(&raw_bytes);
587 }
588
589 read_minirds(&path, "readRDS", "saveRDS", context.interpreter())
591}
592
593#[interpreter_builtin(name = "saveRDS", min_args = 2)]
606fn builtin_save_rds(
607 args: &[RValue],
608 named: &[(String, RValue)],
609 context: &BuiltinContext,
610) -> Result<RValue, RError> {
611 let call_args = CallArgs::new(args, named);
612 let object = call_args.value("object", 0).ok_or_else(|| {
613 RError::new(
614 RErrorKind::Argument,
615 "argument 'object' is missing".to_string(),
616 )
617 })?;
618 let path = resolved_path_string(context.interpreter(), &call_args.string("file", 1)?);
619
620 let ascii = call_args
622 .value("ascii", 2)
623 .and_then(|v| v.as_vector().and_then(|vec| vec.as_logical_scalar()))
624 .unwrap_or(false);
625
626 let compress = call_args
628 .value("compress", 3)
629 .and_then(|v| v.as_vector().and_then(|vec| vec.as_logical_scalar()))
630 .unwrap_or(true);
631
632 let bytes = super::serialize::serialize_rds(object, compress, ascii);
633 std::fs::write(&path, bytes).map_err(|source| IoError::WriteFailed {
634 path: path.clone(),
635 source,
636 })?;
637 Ok(RValue::Null)
638}
639
640#[interpreter_builtin(name = "load", min_args = 1)]
646fn interp_load(
647 positional: &[RValue],
648 named: &[(String, RValue)],
649 context: &BuiltinContext,
650) -> Result<RValue, RError> {
651 let path = resolved_path_string(context.interpreter(), &read_rds_path(positional, named)?);
652 let env = context.env();
653 let target_env = named
654 .iter()
655 .find(|(name, _)| name == "envir")
656 .map(|(_, value)| value)
657 .or_else(|| positional.get(1))
658 .map(|value| match value {
659 RValue::Environment(target_env) => Ok(target_env.clone()),
660 _ => Err(RError::new(
661 RErrorKind::Argument,
662 "invalid 'envir' argument".to_string(),
663 )),
664 })
665 .transpose()?
666 .unwrap_or_else(|| env.clone());
667
668 let raw_bytes = std::fs::read(&path).map_err(|source| IoError::CannotOpen {
670 path: path.clone(),
671 source,
672 })?;
673
674 if let Some(names) = try_load_binary_rdata(&raw_bytes, &target_env)? {
675 return Ok(RValue::vec(Vector::Character(names.into())));
676 }
677
678 if super::serialize::is_binary_rds(&raw_bytes) {
681 return Err(RError::new(
682 RErrorKind::Argument,
683 format!(
684 "unsupported load() format in '{}': not a recognized workspace file \
685 (this looks like an RDS file — use readRDS() instead of load())",
686 path
687 ),
688 ));
689 }
690
691 let value = read_minirds(&path, "load", "save", context.interpreter())?;
693 if !is_workspace_value(&value) {
694 return Err(RError::new(
695 RErrorKind::Argument,
696 format!(
697 "unsupported load() format in '{}': not a recognized workspace file",
698 path
699 ),
700 ));
701 }
702
703 let RValue::List(list) = value else {
704 unreachable!();
705 };
706 let names = workspace_binding_names(&list)?;
707 let loaded_names: Vec<Option<String>> = names.iter().cloned().map(Some).collect();
708
709 for (name, (_, saved_value)) in names.into_iter().zip(list.values.into_iter()) {
710 target_env.set(name, saved_value);
711 }
712
713 Ok(RValue::vec(Vector::Character(loaded_names.into())))
714}
715
716pub fn try_load_binary_rdata(
721 data: &[u8],
722 target_env: &Environment,
723) -> Result<Option<Vec<Option<String>>>, RError> {
724 let working_data = if data.starts_with(b"RDX2\n") {
727 data.to_vec()
728 } else if super::serialize::is_gzip_data(data) {
729 #[cfg(feature = "compression")]
730 {
731 use flate2::read::GzDecoder;
732 use std::io::Read;
733 let mut decoder = GzDecoder::new(data);
734 let mut buf = Vec::new();
735 decoder.read_to_end(&mut buf).map_err(|e| {
736 RError::new(
737 RErrorKind::Other,
738 format!("failed to decompress .RData file: {}", e),
739 )
740 })?;
741 if buf.starts_with(b"RDX2\n") {
742 buf
743 } else {
744 return Ok(None);
745 }
746 }
747 #[cfg(not(feature = "compression"))]
748 {
749 return Ok(None);
750 }
751 } else if super::serialize::is_bzip2_data(data) {
752 #[cfg(feature = "compression")]
753 {
754 use bzip2::read::BzDecoder;
755 use std::io::Read;
756 let mut decoder = BzDecoder::new(data);
757 let mut buf = Vec::new();
758 decoder.read_to_end(&mut buf).map_err(|e| {
759 RError::new(
760 RErrorKind::Other,
761 format!("failed to decompress bzip2 .RData file: {}", e),
762 )
763 })?;
764 if buf.starts_with(b"RDX2\n") {
765 buf
766 } else {
767 return Ok(None);
768 }
769 }
770 #[cfg(not(feature = "compression"))]
771 {
772 return Ok(None);
773 }
774 } else {
775 return Ok(None);
776 };
777 let payload = &working_data[5..];
778
779 let value = super::serialize::unserialize_xdr(payload)?;
781
782 let loaded_names = match value {
785 RValue::List(list) => {
786 let mut names = Vec::new();
787 for (name, val) in list.values {
788 if let Some(n) = name {
789 target_env.set(n.clone(), val);
790 names.push(Some(n));
791 }
792 }
793 names
794 }
795 RValue::Null => Vec::new(),
796 _ => {
797 target_env.set(".Data".to_string(), value);
799 vec![Some(".Data".to_string())]
800 }
801 };
802
803 Ok(Some(loaded_names))
804}
805
806#[pre_eval_builtin(name = "save")]
819fn pre_eval_save(
820 args: &[Arg],
821 env: &Environment,
822 context: &BuiltinContext,
823) -> Result<RValue, RError> {
824 let interp = context.interpreter();
825 let path = workspace_file_arg(args, env, interp)?;
826 let target_env = workspace_target_env(args, env, interp)?;
827 let requested_names = workspace_requested_names(args, env, interp)?;
828 let ascii = workspace_bool_arg(args, env, interp, "ascii", false)?;
829 let compress = workspace_bool_arg(args, env, interp, "compress", true)?;
830
831 let mut bindings = Vec::with_capacity(requested_names.len());
832 for name in requested_names {
833 let value = target_env.get(&name).ok_or_else(|| {
834 RError::new(
835 RErrorKind::Name,
836 format!("object '{}' not found in save()", name),
837 )
838 })?;
839 bindings.push((name, value));
840 }
841
842 if ascii {
843 let values = bindings
845 .into_iter()
846 .map(|(name, value)| (Some(name), value))
847 .collect();
848 let mut workspace = RList::new(values);
849 workspace.set_attr("class".to_string(), workspace_class_value());
850 write_minirds(&path, &RValue::List(workspace))?;
851 } else {
852 let bytes = super::serialize::serialize_rdata(&bindings, compress);
854 std::fs::write(&path, bytes).map_err(|source| IoError::WriteFailed {
855 path: path.clone(),
856 source,
857 })?;
858 }
859 Ok(RValue::Null)
860}
861
862#[pre_eval_builtin(name = "save.image")]
869fn pre_eval_save_image(
870 args: &[Arg],
871 env: &Environment,
872 context: &BuiltinContext,
873) -> Result<RValue, RError> {
874 let interp = context.interpreter();
875
876 let path = args
878 .iter()
879 .find(|arg| arg.name.as_deref() == Some("file"))
880 .or_else(|| {
881 args.iter()
882 .find(|arg| arg.name.is_none() && arg.value.is_some())
883 })
884 .map(|arg| eval_arg_value(arg, env, interp))
885 .transpose()?
886 .and_then(|v| v.as_vector().and_then(|vec| vec.as_character_scalar()))
887 .unwrap_or_else(|| ".RData".to_string());
888 let path = resolved_path_string(interp, &path);
889
890 let ascii = workspace_bool_arg(args, env, interp, "ascii", false)?;
891 let compress = workspace_bool_arg(args, env, interp, "compress", true)?;
892
893 let global_env = &interp.global_env;
895 let all_names = global_env.ls();
896 let mut bindings = Vec::with_capacity(all_names.len());
897 for name in all_names {
898 if let Some(value) = global_env.get(&name) {
899 bindings.push((name, value));
900 }
901 }
902
903 if ascii {
904 let values = bindings
905 .into_iter()
906 .map(|(name, value)| (Some(name), value))
907 .collect();
908 let mut workspace = RList::new(values);
909 workspace.set_attr("class".to_string(), workspace_class_value());
910 write_minirds(&path, &RValue::List(workspace))?;
911 } else {
912 let bytes = super::serialize::serialize_rdata(&bindings, compress);
913 std::fs::write(&path, bytes).map_err(|source| IoError::WriteFailed {
914 path: path.clone(),
915 source,
916 })?;
917 }
918 Ok(RValue::Null)
919}
920
921#[builtin]
927fn builtin_file_path(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
928 let sep = CallArgs::new(args, named)
929 .named_string("fsep")
930 .unwrap_or_else(|| "/".to_string());
931
932 let parts: Vec<String> = args
933 .iter()
934 .filter_map(|v| v.as_vector()?.as_character_scalar())
935 .collect();
936 Ok(RValue::vec(Vector::Character(
937 vec![Some(parts.join(&sep))].into(),
938 )))
939}
940
941#[interpreter_builtin(name = "file.exists", min_args = 1)]
946fn builtin_file_exists(
947 args: &[RValue],
948 _: &[(String, RValue)],
949 context: &BuiltinContext,
950) -> Result<RValue, RError> {
951 let results: Vec<Option<bool>> = args
952 .iter()
953 .map(|arg| {
954 let path = arg
955 .as_vector()
956 .and_then(|v| v.as_character_scalar())
957 .unwrap_or_default();
958 let path = resolved_path_string(context.interpreter(), &path);
959 Some(std::path::Path::new(&path).exists())
960 })
961 .collect();
962 Ok(RValue::vec(Vector::Logical(results.into())))
963}
964
965#[interpreter_builtin(name = "read.csv", min_args = 1, namespace = "utils")]
972fn builtin_read_csv(
973 args: &[RValue],
974 named: &[(String, RValue)],
975 context: &BuiltinContext,
976) -> Result<RValue, RError> {
977 let call_args = CallArgs::new(args, named);
978 let path = resolved_path_string(context.interpreter(), &call_args.string("file", 0)?);
979
980 let header = call_args.logical_flag("header", usize::MAX, true);
981
982 let sep = call_args
983 .named_string("sep")
984 .and_then(|s| s.bytes().next())
985 .unwrap_or(b',');
986
987 let mut rdr = csv::ReaderBuilder::new()
988 .has_headers(header)
989 .delimiter(sep)
990 .from_path(&path)
991 .map_err(|source| IoError::CsvRead {
992 context: format!("opening '{}'", path),
993 source,
994 })?;
995
996 let col_names: Vec<String> = if header {
997 rdr.headers()
998 .map_err(|source| IoError::CsvRead {
999 context: "headers".to_string(),
1000 source,
1001 })?
1002 .iter()
1003 .map(|s| s.to_string())
1004 .collect()
1005 } else {
1006 let ncols = rdr
1008 .records()
1009 .next()
1010 .and_then(|r| r.ok())
1011 .map(|r| r.len())
1012 .unwrap_or(0);
1013 (1..=ncols).map(|i| format!("V{}", i)).collect()
1014 };
1015
1016 let ncols = col_names.len();
1017 let mut columns: Vec<Vec<Option<String>>> = vec![vec![]; ncols];
1018 let mut nrows = 0usize;
1019
1020 for result in rdr.records() {
1021 let record = result.map_err(|source| IoError::CsvRead {
1022 context: "record".to_string(),
1023 source,
1024 })?;
1025 for (i, field) in record.iter().enumerate() {
1026 if i < ncols {
1027 if field == "NA" || field.is_empty() {
1028 columns[i].push(None);
1029 } else {
1030 columns[i].push(Some(field.to_string()));
1031 }
1032 }
1033 }
1034 nrows += 1;
1035 }
1036
1037 let mut list_cols: Vec<(Option<String>, RValue)> = Vec::new();
1039 for (i, col_data) in columns.into_iter().enumerate() {
1040 let name = col_names.get(i).cloned();
1041 let all_numeric = col_data.iter().all(|v| match v {
1043 None => true,
1044 Some(s) => s.parse::<f64>().is_ok(),
1045 });
1046 if all_numeric {
1047 let all_int = col_data.iter().all(|v| match v {
1049 None => true,
1050 Some(s) => s.parse::<i64>().is_ok(),
1051 });
1052 if all_int {
1053 let vals: Vec<Option<i64>> =
1054 col_data.iter().map(|v| v.as_ref()?.parse().ok()).collect();
1055 list_cols.push((name, RValue::vec(Vector::Integer(vals.into()))));
1056 } else {
1057 let vals: Vec<Option<f64>> =
1058 col_data.iter().map(|v| v.as_ref()?.parse().ok()).collect();
1059 list_cols.push((name, RValue::vec(Vector::Double(vals.into()))));
1060 }
1061 } else {
1062 list_cols.push((name, RValue::vec(Vector::Character(col_data.into()))));
1063 }
1064 }
1065
1066 let mut list = RList::new(list_cols);
1067 list.set_attr(
1068 "class".to_string(),
1069 RValue::vec(Vector::Character(
1070 vec![Some("data.frame".to_string())].into(),
1071 )),
1072 );
1073 list.set_attr(
1074 "names".to_string(),
1075 RValue::vec(Vector::Character(
1076 col_names.into_iter().map(Some).collect::<Vec<_>>().into(),
1077 )),
1078 );
1079 let row_names: Vec<Option<i64>> = (1..=i64::try_from(nrows)?).map(Some).collect();
1080 list.set_attr(
1081 "row.names".to_string(),
1082 RValue::vec(Vector::Integer(row_names.into())),
1083 );
1084 Ok(RValue::List(list))
1085}
1086
1087#[interpreter_builtin(name = "write.csv", min_args = 1, namespace = "utils")]
1094fn builtin_write_csv(
1095 args: &[RValue],
1096 named: &[(String, RValue)],
1097 context: &BuiltinContext,
1098) -> Result<RValue, RError> {
1099 let data = args
1100 .first()
1101 .ok_or_else(|| RError::new(RErrorKind::Argument, "argument 'x' is missing".to_string()))?;
1102 let file = args
1103 .get(1)
1104 .or_else(|| named.iter().find(|(n, _)| n == "file").map(|(_, v)| v))
1105 .and_then(|v| v.as_vector()?.as_character_scalar())
1106 .ok_or_else(|| {
1107 RError::new(
1108 RErrorKind::Argument,
1109 "argument 'file' is missing".to_string(),
1110 )
1111 })?;
1112 let file = resolved_path_string(context.interpreter(), &file);
1113
1114 let row_names = named
1115 .iter()
1116 .find(|(n, _)| n == "row.names")
1117 .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
1118 .unwrap_or(true);
1119
1120 let RValue::List(list) = data else {
1121 return Err(RError::new(
1122 RErrorKind::Argument,
1123 "write.csv requires a data frame or list".to_string(),
1124 ));
1125 };
1126
1127 let mut wtr = csv::Writer::from_path(&file).map_err(|source| IoError::CsvRead {
1128 context: format!("opening '{}'", file),
1129 source,
1130 })?;
1131
1132 let col_names: Vec<String> = list
1134 .values
1135 .iter()
1136 .map(|(n, _)| n.clone().unwrap_or_default())
1137 .collect();
1138
1139 if row_names {
1140 let mut header = vec!["".to_string()];
1141 header.extend(col_names.clone());
1142 wtr.write_record(&header)
1143 .map_err(|source| IoError::CsvWrite { source })?;
1144 } else {
1145 wtr.write_record(&col_names)
1146 .map_err(|source| IoError::CsvWrite { source })?;
1147 }
1148
1149 let nrows = list.values.first().map(|(_, v)| v.length()).unwrap_or(0);
1151
1152 for row in 0..nrows {
1154 let mut record: Vec<String> = Vec::new();
1155 if row_names {
1156 record.push((row + 1).to_string());
1157 }
1158 for (_, col_val) in &list.values {
1159 if let RValue::Vector(rv) = col_val {
1160 let chars = rv.to_characters();
1161 record.push(
1162 chars
1163 .get(row)
1164 .and_then(|v| v.clone())
1165 .unwrap_or_else(|| "NA".to_string()),
1166 );
1167 } else {
1168 record.push("NA".to_string());
1169 }
1170 }
1171 wtr.write_record(&record)
1172 .map_err(|source| IoError::CsvWrite { source })?;
1173 }
1174
1175 wtr.flush().map_err(|source| IoError::CsvWrite {
1176 source: csv::Error::from(source),
1177 })?;
1178 Ok(RValue::Null)
1179}
1180
1181#[interpreter_builtin(name = "scan")]
1188fn builtin_scan(
1189 args: &[RValue],
1190 named: &[(String, RValue)],
1191 context: &BuiltinContext,
1192) -> Result<RValue, RError> {
1193 let file = args
1194 .first()
1195 .and_then(|v| match v {
1196 RValue::Vector(rv) => rv.inner.as_character_scalar(),
1197 _ => None,
1198 })
1199 .unwrap_or_default();
1200
1201 if file.is_empty() {
1202 return Err(RError::new(
1203 RErrorKind::Argument,
1204 "scan() requires a file path — reading from stdin is not yet supported".to_string(),
1205 ));
1206 }
1207
1208 let file = resolved_path_string(context.interpreter(), &file);
1209
1210 let content = std::fs::read_to_string(&file).map_err(|source| IoError::CannotOpen {
1211 path: file.clone(),
1212 source,
1213 })?;
1214
1215 let sep = named
1217 .iter()
1218 .find(|(n, _)| n == "sep")
1219 .and_then(|(_, v)| match v {
1220 RValue::Vector(rv) => rv.inner.as_character_scalar(),
1221 _ => None,
1222 });
1223
1224 let tokens: Vec<&str> = match &sep {
1225 Some(s) if !s.is_empty() => content.split(s.as_str()).collect(),
1226 _ => content.split_whitespace().collect(),
1227 };
1228
1229 let what = args
1231 .get(1)
1232 .or_else(|| named.iter().find(|(n, _)| n == "what").map(|(_, v)| v));
1233
1234 match what {
1235 Some(RValue::Vector(rv)) => match &rv.inner {
1236 Vector::Double(_) => {
1237 let vals: Vec<Option<f64>> = tokens.iter().map(|t| t.parse::<f64>().ok()).collect();
1238 Ok(RValue::vec(Vector::Double(vals.into())))
1239 }
1240 Vector::Integer(_) => {
1241 let vals: Vec<Option<i64>> = tokens.iter().map(|t| t.parse::<i64>().ok()).collect();
1242 Ok(RValue::vec(Vector::Integer(vals.into())))
1243 }
1244 Vector::Logical(_) => {
1245 let vals: Vec<Option<bool>> = tokens
1246 .iter()
1247 .map(|t| match *t {
1248 "TRUE" | "T" => Some(true),
1249 "FALSE" | "F" => Some(false),
1250 _ => None,
1251 })
1252 .collect();
1253 Ok(RValue::vec(Vector::Logical(vals.into())))
1254 }
1255 _ => {
1256 let vals: Vec<Option<String>> =
1257 tokens.iter().map(|t| Some(t.to_string())).collect();
1258 Ok(RValue::vec(Vector::Character(vals.into())))
1259 }
1260 },
1261 _ => {
1262 let vals: Vec<Option<String>> = tokens.iter().map(|t| Some(t.to_string())).collect();
1263 Ok(RValue::vec(Vector::Character(vals.into())))
1264 }
1265 }
1266}
1267
1268#[interpreter_builtin(name = "read.table", min_args = 1, namespace = "utils")]
1275fn builtin_read_table(
1276 args: &[RValue],
1277 named: &[(String, RValue)],
1278 context: &BuiltinContext,
1279) -> Result<RValue, RError> {
1280 let file = match &args[0] {
1281 RValue::Vector(rv) => rv.inner.as_character_scalar().ok_or_else(|| {
1282 RError::new(
1283 RErrorKind::Argument,
1284 "read.table() requires a file path string".to_string(),
1285 )
1286 })?,
1287 _ => {
1288 return Err(RError::new(
1289 RErrorKind::Argument,
1290 "read.table() requires a file path string".to_string(),
1291 ))
1292 }
1293 };
1294
1295 let header = named
1296 .iter()
1297 .find(|(n, _)| n == "header")
1298 .and_then(|(_, v)| match v {
1299 RValue::Vector(rv) => rv.inner.as_logical_scalar(),
1300 _ => None,
1301 })
1302 .unwrap_or(false);
1303
1304 let sep = named
1305 .iter()
1306 .find(|(n, _)| n == "sep")
1307 .and_then(|(_, v)| match v {
1308 RValue::Vector(rv) => rv.inner.as_character_scalar(),
1309 _ => None,
1310 })
1311 .unwrap_or_else(|| "".to_string()); let file = resolved_path_string(context.interpreter(), &file);
1314
1315 let content = std::fs::read_to_string(&file).map_err(|source| IoError::CannotOpen {
1316 path: file.clone(),
1317 source,
1318 })?;
1319
1320 let mut lines: Vec<&str> = content.lines().collect();
1321 if lines.is_empty() {
1322 return Ok(RValue::List(RList::new(vec![])));
1323 }
1324
1325 let col_names: Vec<String> = if header {
1327 let header_line = lines.remove(0);
1328 split_line(header_line, &sep)
1329 .iter()
1330 .map(|s| s.to_string())
1331 .collect()
1332 } else {
1333 vec![]
1334 };
1335
1336 let rows: Vec<Vec<String>> = lines
1338 .iter()
1339 .filter(|l| !l.trim().is_empty())
1340 .map(|l| split_line(l, &sep).iter().map(|s| s.to_string()).collect())
1341 .collect();
1342
1343 if rows.is_empty() {
1344 return Ok(RValue::List(RList::new(vec![])));
1345 }
1346
1347 let ncols = rows.iter().map(|r| r.len()).max().unwrap_or(0);
1348 let mut columns: Vec<(Option<String>, RValue)> = Vec::with_capacity(ncols);
1349
1350 for col_idx in 0..ncols {
1351 let col_data: Vec<Option<String>> = rows.iter().map(|r| r.get(col_idx).cloned()).collect();
1352
1353 let all_numeric = col_data.iter().all(|v| {
1355 v.as_ref()
1356 .is_none_or(|s| s.is_empty() || s == "NA" || s.parse::<f64>().is_ok())
1357 });
1358
1359 let col_val = if all_numeric {
1360 let vals: Vec<Option<f64>> = col_data
1361 .iter()
1362 .map(|v| {
1363 v.as_ref().and_then(|s| {
1364 if s == "NA" || s.is_empty() {
1365 None
1366 } else {
1367 s.parse().ok()
1368 }
1369 })
1370 })
1371 .collect();
1372 RValue::vec(Vector::Double(vals.into()))
1373 } else {
1374 RValue::vec(Vector::Character(col_data.into()))
1375 };
1376
1377 let name = col_names
1378 .get(col_idx)
1379 .cloned()
1380 .or_else(|| Some(format!("V{}", col_idx + 1)));
1381 columns.push((name, col_val));
1382 }
1383
1384 Ok(RValue::List(RList::new(columns)))
1385}
1386
1387#[interpreter_builtin(name = "write.table", min_args = 2, namespace = "utils")]
1396fn builtin_write_table(
1397 args: &[RValue],
1398 named: &[(String, RValue)],
1399 context: &BuiltinContext,
1400) -> Result<RValue, RError> {
1401 let file = match &args[1] {
1402 RValue::Vector(rv) => rv.inner.as_character_scalar().ok_or_else(|| {
1403 RError::new(
1404 RErrorKind::Argument,
1405 "write.table() requires a file path".to_string(),
1406 )
1407 })?,
1408 _ => {
1409 return Err(RError::new(
1410 RErrorKind::Argument,
1411 "write.table() requires a file path as second argument".to_string(),
1412 ))
1413 }
1414 };
1415 let file = resolved_path_string(context.interpreter(), &file);
1416
1417 let sep = named
1418 .iter()
1419 .find(|(n, _)| n == "sep")
1420 .and_then(|(_, v)| match v {
1421 RValue::Vector(rv) => rv.inner.as_character_scalar(),
1422 _ => None,
1423 })
1424 .unwrap_or_else(|| " ".to_string());
1425
1426 let col_names = named
1427 .iter()
1428 .find(|(n, _)| n == "col.names")
1429 .and_then(|(_, v)| match v {
1430 RValue::Vector(rv) => rv.inner.as_logical_scalar(),
1431 _ => None,
1432 })
1433 .unwrap_or(true);
1434
1435 let quote = named
1436 .iter()
1437 .find(|(n, _)| n == "quote")
1438 .and_then(|(_, v)| match v {
1439 RValue::Vector(rv) => rv.inner.as_logical_scalar(),
1440 _ => None,
1441 })
1442 .unwrap_or(true);
1443
1444 let mut output = String::new();
1445
1446 match &args[0] {
1447 RValue::List(list) => {
1448 let ncols = list.values.len();
1449 let nrows = list.values.first().map(|(_, v)| v.length()).unwrap_or(0);
1450
1451 if col_names {
1453 let names: Vec<String> = list
1454 .values
1455 .iter()
1456 .enumerate()
1457 .map(|(i, (name, _))| {
1458 let n = name.clone().unwrap_or_else(|| format!("V{}", i + 1));
1459 if quote {
1460 format!("\"{}\"", n)
1461 } else {
1462 n
1463 }
1464 })
1465 .collect();
1466 output.push_str(&names.join(&sep));
1467 output.push('\n');
1468 }
1469
1470 for row_idx in 0..nrows {
1472 let cells: Vec<String> = (0..ncols)
1473 .map(|col_idx| {
1474 let (_, val) = &list.values[col_idx];
1475 format_cell(val, row_idx, quote)
1476 })
1477 .collect();
1478 output.push_str(&cells.join(&sep));
1479 output.push('\n');
1480 }
1481 }
1482 RValue::Vector(rv) => {
1483 let dim = rv.get_attr("dim");
1485 match dim {
1486 Some(RValue::Vector(dim_rv)) => {
1487 if let Vector::Integer(d) = &dim_rv.inner {
1488 if d.len() >= 2 {
1489 let nrow = usize::try_from(d.get_opt(0).unwrap_or(0))?;
1490 let ncol = usize::try_from(d.get_opt(1).unwrap_or(0))?;
1491 for r in 0..nrow {
1492 let cells: Vec<String> = (0..ncol)
1493 .map(|c| {
1494 let idx = c * nrow + r;
1495 format_cell(&args[0], idx, quote)
1496 })
1497 .collect();
1498 output.push_str(&cells.join(&sep));
1499 output.push('\n');
1500 }
1501 }
1502 }
1503 }
1504 _ => {
1505 for i in 0..rv.inner.len() {
1507 output.push_str(&format_cell(&args[0], i, quote));
1508 output.push('\n');
1509 }
1510 }
1511 }
1512 }
1513 _ => {
1514 return Err(RError::new(
1515 RErrorKind::Type,
1516 "write.table() requires a list or matrix".to_string(),
1517 ))
1518 }
1519 }
1520
1521 std::fs::write(&file, output).map_err(|source| IoError::WriteFailed {
1522 path: file.clone(),
1523 source,
1524 })?;
1525
1526 Ok(RValue::Null)
1527}
1528
1529fn split_line<'a>(line: &'a str, sep: &str) -> Vec<&'a str> {
1531 if sep.is_empty() {
1532 line.split_whitespace().collect()
1533 } else {
1534 line.split(sep).collect()
1535 }
1536}
1537
1538fn format_cell(val: &RValue, idx: usize, quote: bool) -> String {
1540 match val {
1541 RValue::Vector(rv) => match &rv.inner {
1542 Vector::Raw(v) => v
1543 .get(idx)
1544 .map_or("00".to_string(), |b| format!("{:02x}", b)),
1545 Vector::Double(v) => v
1546 .get_opt(idx)
1547 .map_or("NA".to_string(), |f| format!("{}", f)),
1548 Vector::Integer(v) => v
1549 .get_opt(idx)
1550 .map_or("NA".to_string(), |i| format!("{}", i)),
1551 Vector::Logical(v) => v.get(idx).and_then(|x| *x).map_or("NA".to_string(), |b| {
1552 if b { "TRUE" } else { "FALSE" }.to_string()
1553 }),
1554 Vector::Complex(v) => v
1555 .get(idx)
1556 .and_then(|x| *x)
1557 .map_or("NA".to_string(), format_r_complex),
1558 Vector::Character(v) => {
1559 v.get(idx)
1560 .and_then(|x| x.as_ref())
1561 .map_or("NA".to_string(), |s| {
1562 if quote {
1563 format!("\"{}\"", s)
1564 } else {
1565 s.clone()
1566 }
1567 })
1568 }
1569 },
1570 _ => "NA".to_string(),
1571 }
1572}