1use crate::interpreter::environment::Environment;
8use crate::interpreter::value::*;
9use crate::parser::ast::{Expr, Param};
10use indexmap::IndexMap;
11use std::fmt::Write as FmtWrite;
12
13const R_NA_INTEGER: i32 = i32::MIN;
17
18const R_NA_LOGICAL: i32 = i32::MIN;
20
21const R_NA_REAL_BITS: u64 = 0x7FF00000000007A2;
23
24const NILSXP: u8 = 0;
26const SYMSXP: u8 = 1;
27const LISTSXP: u8 = 2;
28const CLOSXP: u8 = 3;
29const ENVSXP: u8 = 4;
30const PROMSXP: u8 = 5;
31const LANGSXP: u8 = 6;
32const SPECIALSXP: u8 = 7;
33const BUILTINSXP: u8 = 8;
34const CHARSXP: u8 = 9;
35const LGLSXP: u8 = 10;
36const INTSXP: u8 = 13;
37const REALSXP: u8 = 14;
38const CPLXSXP: u8 = 15;
39const STRSXP: u8 = 16;
40const VECSXP: u8 = 19;
41const EXPRSXP: u8 = 20;
42const RAWSXP: u8 = 24;
43
44const REFSXP: u8 = 255;
46const NILVALUE_SXP: u8 = 254;
47const GLOBALENV_SXP: u8 = 244;
48const BASEENV_SXP: u8 = 243;
49const EMPTYENV_SXP: u8 = 242;
50#[allow(dead_code)]
51const UNBOUNDVALUE_SXP: u8 = 245;
52const MISSINGARG_SXP: u8 = 246;
53const BASENAMESPACE_SXP: u8 = 247;
54const NAMESPACESXP: u8 = 249;
55
56const HAS_ATTR_MASK: u32 = 1 << 9;
58const HAS_TAG_MASK: u32 = 1 << 10;
59
60struct XdrReader<'a> {
66 data: &'a [u8],
67 pos: usize,
68 ref_table: Vec<RValue>,
70}
71
72impl<'a> XdrReader<'a> {
73 fn new(data: &'a [u8]) -> Self {
74 XdrReader {
75 data,
76 pos: 0,
77 ref_table: Vec::new(),
78 }
79 }
80
81 fn remaining(&self) -> usize {
82 self.data.len().saturating_sub(self.pos)
83 }
84
85 fn read_bytes(&mut self, n: usize) -> Result<&'a [u8], RError> {
86 if self.pos + n > self.data.len() {
87 return Err(RError::new(
88 RErrorKind::Other,
89 format!(
90 "unexpected end of RDS data: need {} bytes at offset {}, have {}",
91 n,
92 self.pos,
93 self.remaining()
94 ),
95 ));
96 }
97 let slice = &self.data[self.pos..self.pos + n];
98 self.pos += n;
99 Ok(slice)
100 }
101
102 fn read_int(&mut self) -> Result<i32, RError> {
104 let bytes = self.read_bytes(4)?;
105 Ok(i32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
106 }
107
108 fn read_double(&mut self) -> Result<f64, RError> {
110 let bytes = self.read_bytes(8)?;
111 Ok(f64::from_be_bytes([
112 bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
113 ]))
114 }
115
116 fn read_length(&mut self) -> Result<usize, RError> {
118 let len = self.read_int()?;
119 if len >= 0 {
120 Ok(len as usize)
121 } else if len == -1 {
122 let upper = self.read_int()? as u32;
124 let lower = self.read_int()? as u32;
125 let long_len = (u64::from(upper) << 32) | u64::from(lower);
126 usize::try_from(long_len).map_err(|_| {
127 RError::new(
128 RErrorKind::Other,
129 format!("vector length {} too large for this platform", long_len),
130 )
131 })
132 } else {
133 Err(RError::new(
134 RErrorKind::Other,
135 format!("invalid vector length: {}", len),
136 ))
137 }
138 }
139
140 fn read_charsxp(&mut self) -> Result<Option<String>, RError> {
142 let len = self.read_int()?;
143 if len == -1 {
144 return Ok(None); }
146 let n = usize::try_from(len).map_err(|_| {
147 RError::new(
148 RErrorKind::Other,
149 format!("invalid CHARSXP length: {}", len),
150 )
151 })?;
152 let bytes = self.read_bytes(n)?;
153 match std::str::from_utf8(bytes) {
156 Ok(s) => Ok(Some(s.to_string())),
157 Err(_) => {
158 let s: String = bytes.iter().map(|&b| b as char).collect();
160 Ok(Some(s))
161 }
162 }
163 }
164
165 fn ref_add(&mut self, value: RValue) -> RValue {
167 self.ref_table.push(value.clone());
168 value
169 }
170
171 fn ref_get(&self, index: usize) -> Result<RValue, RError> {
173 if index == 0 || index > self.ref_table.len() {
174 return Err(RError::new(
175 RErrorKind::Other,
176 format!(
177 "invalid reference index {} (table has {} entries)",
178 index,
179 self.ref_table.len()
180 ),
181 ));
182 }
183 Ok(self.ref_table[index - 1].clone())
184 }
185
186 fn read_flags(&mut self) -> Result<(u8, bool, bool, u32), RError> {
188 let flags = self.read_int()? as u32;
189 let sxp_type = (flags & 0xFF) as u8;
190 let has_attr = flags & HAS_ATTR_MASK != 0;
191 let has_tag = flags & HAS_TAG_MASK != 0;
192 Ok((sxp_type, has_attr, has_tag, flags))
193 }
194
195 fn read_attributes(&mut self) -> Result<Attributes, RError> {
197 let mut attrs = IndexMap::new();
198 loop {
201 let (sxp_type, _has_attr, has_tag, _flags) = self.read_flags()?;
202 match sxp_type {
203 LISTSXP => {
204 let tag_name = if has_tag {
205 self.read_item_as_symbol()?
206 } else {
207 String::new()
208 };
209 let value = self.read_item()?;
210 if !tag_name.is_empty() {
211 attrs.insert(tag_name, value);
212 }
213 }
222 NILVALUE_SXP => break,
223 _ => {
224 return Err(RError::new(
225 RErrorKind::Other,
226 format!(
227 "unexpected SEXPTYPE {} in attribute pairlist (expected LISTSXP or NILVALUE)",
228 sxp_type
229 ),
230 ));
231 }
232 }
233 }
234 Ok(attrs)
235 }
236
237 fn read_item(&mut self) -> Result<RValue, RError> {
239 let (sxp_type, has_attr, has_tag, flags) = self.read_flags()?;
240 self.read_item_inner(sxp_type, has_attr, has_tag, flags)
241 }
242
243 fn read_item_inner(
244 &mut self,
245 sxp_type: u8,
246 has_attr: bool,
247 has_tag: bool,
248 flags: u32,
249 ) -> Result<RValue, RError> {
250 match sxp_type {
251 NILVALUE_SXP => Ok(RValue::Null),
252 NILSXP => Ok(RValue::Null),
253
254 EMPTYENV_SXP => {
255 let env = Environment::new_empty();
256 let val = RValue::Environment(env);
257 Ok(self.ref_add(val))
258 }
259
260 BASEENV_SXP | BASENAMESPACE_SXP => {
261 let env = Environment::new_empty();
264 env.set_name("base".to_string());
265 let val = RValue::Environment(env);
266 Ok(self.ref_add(val))
267 }
268
269 GLOBALENV_SXP => {
270 let env = Environment::new_global();
271 let val = RValue::Environment(env);
272 Ok(self.ref_add(val))
273 }
274
275 MISSINGARG_SXP => Ok(RValue::Null),
276
277 NAMESPACESXP => {
278 let _info = self.read_item()?;
281 let val = RValue::Null;
282 Ok(self.ref_add(val))
283 }
284
285 REFSXP => {
286 let ref_index = (flags >> 8) as usize;
289 if ref_index == 0 {
290 let idx = self.read_int()? as usize;
291 self.ref_get(idx)
292 } else {
293 self.ref_get(ref_index)
294 }
295 }
296
297 SYMSXP => {
298 let (inner_type, _ia, _it, inner_flags) = self.read_flags()?;
300 let name = if inner_type == CHARSXP {
301 self.read_charsxp_with_flags(inner_flags)?
302 .unwrap_or_default()
303 } else {
304 return Err(RError::new(
306 RErrorKind::Other,
307 format!("expected CHARSXP inside SYMSXP, got type {}", inner_type),
308 ));
309 };
310 let val = RValue::vec(Vector::Character(vec![Some(name)].into()));
311 Ok(self.ref_add(val))
312 }
313
314 CHARSXP => {
315 let s = self.read_charsxp_with_flags(flags)?;
316 Ok(match s {
317 Some(s) => RValue::vec(Vector::Character(vec![Some(s)].into())),
318 None => RValue::Null,
319 })
320 }
321
322 LGLSXP => {
323 let len = self.read_length()?;
324 let mut values = Vec::with_capacity(len);
325 for _ in 0..len {
326 let raw = self.read_int()?;
327 if raw == R_NA_LOGICAL {
328 values.push(None);
329 } else {
330 values.push(Some(raw != 0));
331 }
332 }
333 let mut rv = RVector::from(Vector::Logical(values.into()));
334 if has_attr {
335 let attrs = self.read_attributes()?;
336 rv.attrs = Some(Box::new(attrs));
337 }
338 Ok(RValue::Vector(rv))
339 }
340
341 INTSXP => {
342 let len = self.read_length()?;
343 let mut values: Vec<Option<i64>> = Vec::with_capacity(len);
344 for _ in 0..len {
345 let raw = self.read_int()?;
346 if raw == R_NA_INTEGER {
347 values.push(None);
348 } else {
349 values.push(Some(i64::from(raw)));
350 }
351 }
352 let mut rv = RVector::from(Vector::Integer(values.into()));
353 if has_attr {
354 let attrs = self.read_attributes()?;
355 rv.attrs = Some(Box::new(attrs));
356 }
357 Ok(RValue::Vector(rv))
358 }
359
360 REALSXP => {
361 let len = self.read_length()?;
362 let mut values: Vec<Option<f64>> = Vec::with_capacity(len);
363 for _ in 0..len {
364 let val = self.read_double()?;
365 if val.to_bits() == R_NA_REAL_BITS {
366 values.push(None);
367 } else {
368 values.push(Some(val));
369 }
370 }
371 let mut rv = RVector::from(Vector::Double(values.into()));
372 if has_attr {
373 let attrs = self.read_attributes()?;
374 rv.attrs = Some(Box::new(attrs));
375 }
376 Ok(RValue::Vector(rv))
377 }
378
379 CPLXSXP => {
380 let len = self.read_length()?;
381 let mut values: Vec<Option<num_complex::Complex64>> = Vec::with_capacity(len);
382 for _ in 0..len {
383 let re = self.read_double()?;
384 let im = self.read_double()?;
385 if re.to_bits() == R_NA_REAL_BITS || im.to_bits() == R_NA_REAL_BITS {
386 values.push(None);
387 } else {
388 values.push(Some(num_complex::Complex64::new(re, im)));
389 }
390 }
391 let mut rv = RVector::from(Vector::Complex(values.into()));
392 if has_attr {
393 let attrs = self.read_attributes()?;
394 rv.attrs = Some(Box::new(attrs));
395 }
396 Ok(RValue::Vector(rv))
397 }
398
399 STRSXP => {
400 let len = self.read_length()?;
401 let mut values: Vec<Option<String>> = Vec::with_capacity(len);
402 for _ in 0..len {
403 let (inner_type, _ia, _it, inner_flags) = self.read_flags()?;
405 if inner_type == CHARSXP {
406 values.push(self.read_charsxp_with_flags(inner_flags)?);
407 } else if inner_type == NILVALUE_SXP {
408 values.push(None);
409 } else {
410 return Err(RError::new(
411 RErrorKind::Other,
412 format!(
413 "expected CHARSXP in STRSXP element, got type {}",
414 inner_type
415 ),
416 ));
417 }
418 }
419 let mut rv = RVector::from(Vector::Character(values.into()));
420 if has_attr {
421 let attrs = self.read_attributes()?;
422 rv.attrs = Some(Box::new(attrs));
423 }
424 Ok(RValue::Vector(rv))
425 }
426
427 RAWSXP => {
428 let len = self.read_length()?;
429 let bytes = self.read_bytes(len)?.to_vec();
430 let mut rv = RVector::from(Vector::Raw(bytes));
431 if has_attr {
432 let attrs = self.read_attributes()?;
433 rv.attrs = Some(Box::new(attrs));
434 }
435 Ok(RValue::Vector(rv))
436 }
437
438 VECSXP | EXPRSXP => {
439 let len = self.read_length()?;
440 let mut elements = Vec::with_capacity(len);
441 for _ in 0..len {
442 let val = self.read_item()?;
443 elements.push((None, val));
444 }
445 let mut list = RList::new(elements);
446 if has_attr {
447 let attrs = self.read_attributes()?;
448 if let Some(names_val) = attrs.get("names") {
450 if let Some(names_vec) = names_val.as_vector() {
451 let names = names_vec.to_characters();
452 for (i, name) in names.iter().enumerate() {
453 if i < list.values.len() {
454 list.values[i].0 = name.clone();
455 }
456 }
457 }
458 }
459 let mut remaining: Attributes =
461 attrs.into_iter().filter(|(k, _)| k != "names").collect();
462 if !remaining.is_empty() {
463 if let Some(first_name) = list.values.first() {
465 if first_name.0.is_some() {
466 let names: Vec<Option<String>> =
467 list.values.iter().map(|(n, _)| n.clone()).collect();
468 remaining.insert(
469 "names".to_string(),
470 RValue::vec(Vector::Character(names.into())),
471 );
472 }
473 }
474 list.attrs = Some(Box::new(remaining));
475 }
476 }
477 Ok(RValue::List(list))
478 }
479
480 LISTSXP => {
481 self.read_pairlist_as_list(has_attr, has_tag, flags)
484 }
485
486 CLOSXP => {
487 let env_val = self.read_item()?;
489 let formals_val = self.read_item()?;
490 let body_val = self.read_item()?;
491
492 let env = match env_val {
494 RValue::Environment(e) => e,
495 _ => Environment::new_global(),
496 };
497
498 let params = self.pairlist_to_params(&formals_val);
500
501 let body = self.rvalue_to_body(&body_val);
506
507 if has_attr {
508 let _attrs = self.read_attributes()?;
509 }
510
511 Ok(RValue::Function(RFunction::Closure { params, body, env }))
512 }
513
514 ENVSXP => {
515 let env = Environment::new_empty();
518 let val = RValue::Environment(env.clone());
519 let val = self.ref_add(val);
520
521 let locked = self.read_int()?;
522 let _enclos = self.read_item()?; let frame = self.read_item()?; let _hashtab = self.read_item()?; if has_attr {
527 let _attrs = self.read_attributes()?;
528 }
529
530 if locked != 0 {
531 env.lock(false);
532 }
533
534 if let RValue::Environment(parent) = &_enclos {
536 env.set_parent(Some(parent.clone()));
537 }
538
539 if let RValue::List(list) = &frame {
541 for (name, value) in &list.values {
542 if let Some(n) = name {
543 env.set(n.clone(), value.clone());
544 }
545 }
546 }
547
548 Ok(val)
549 }
550
551 PROMSXP => {
552 let _env = self.read_item()?;
555 let value = self.read_item()?;
556 let expr = self.read_item()?;
557 if has_attr {
558 let _attrs = self.read_attributes()?;
559 }
560 if value.is_null() {
562 Ok(expr)
563 } else {
564 Ok(value)
565 }
566 }
567
568 SPECIALSXP | BUILTINSXP => {
569 let len = self.read_length()?;
571 let name_bytes = self.read_bytes(len)?;
572 let name = String::from_utf8_lossy(name_bytes).to_string();
573 Ok(RValue::vec(Vector::Character(
576 vec![Some(format!(".Primitive(\"{}\")", name))].into(),
577 )))
578 }
579
580 LANGSXP => {
581 let list_val = self.read_pairlist_as_list(has_attr, has_tag, flags)?;
584 if let Some(expr) = self.list_to_call_expr(&list_val) {
586 Ok(RValue::Language(Language::new(expr)))
587 } else {
588 Ok(list_val)
590 }
591 }
592
593 25 => {
595 let attrs = if has_attr {
597 self.read_attributes()?
598 } else {
599 IndexMap::new()
600 };
601 let mut list = RList::new(Vec::new());
602 if !attrs.is_empty() {
603 list.attrs = Some(Box::new(attrs));
604 }
605 Ok(RValue::List(list))
606 }
607
608 _ => Err(RError::new(
609 RErrorKind::Other,
610 format!(
611 "unsupported SEXPTYPE {} at offset {} in RDS data",
612 sxp_type,
613 self.pos - 4
614 ),
615 )),
616 }
617 }
618
619 fn read_charsxp_with_flags(&mut self, _flags: u32) -> Result<Option<String>, RError> {
621 self.read_charsxp()
622 }
623
624 fn read_item_as_symbol(&mut self) -> Result<String, RError> {
626 let val = self.read_item()?;
627 match &val {
628 RValue::Vector(rv) => match &rv.inner {
629 Vector::Character(c) => Ok(c.first().and_then(|s| s.clone()).unwrap_or_default()),
630 _ => Ok(String::new()),
631 },
632 _ => Ok(String::new()),
633 }
634 }
635
636 fn read_pairlist_as_list(
638 &mut self,
639 has_attr: bool,
640 has_tag: bool,
641 _flags: u32,
642 ) -> Result<RValue, RError> {
643 let mut elements = Vec::new();
644
645 let tag = if has_tag {
647 Some(self.read_item_as_symbol()?)
648 } else {
649 None
650 };
651 let car = self.read_item()?;
652 elements.push((tag, car));
653
654 loop {
656 let (sxp_type, _has_attr_cdr, has_tag_cdr, _cdr_flags) = self.read_flags()?;
657 match sxp_type {
658 LISTSXP => {
659 let tag = if has_tag_cdr {
660 Some(self.read_item_as_symbol()?)
661 } else {
662 None
663 };
664 let car = self.read_item()?;
665 elements.push((tag, car));
666 }
667 NILVALUE_SXP => break,
668 _ => {
669 let val =
672 self.read_item_inner(sxp_type, _has_attr_cdr, has_tag_cdr, _cdr_flags)?;
673 elements.push((None, val));
674 break;
675 }
676 }
677 }
678
679 let mut list = RList::new(elements);
680 if has_attr {
681 let attrs = self.read_attributes()?;
682 list.attrs = Some(Box::new(attrs));
683 }
684 Ok(RValue::List(list))
685 }
686
687 fn pairlist_to_params(&self, val: &RValue) -> Vec<Param> {
692 match val {
693 RValue::Null => Vec::new(),
694 RValue::List(list) => list
695 .values
696 .iter()
697 .map(|(name, default_val)| {
698 let param_name = name.clone().unwrap_or_default();
699 let is_dots = param_name == "...";
700 let default = if default_val.is_null() {
701 None
702 } else {
703 Some(self.rvalue_to_body(default_val))
705 };
706 Param {
707 name: param_name,
708 default,
709 is_dots,
710 }
711 })
712 .collect(),
713 _ => Vec::new(),
714 }
715 }
716
717 fn rvalue_to_body(&self, val: &RValue) -> Expr {
725 match val {
726 RValue::Null => Expr::Null,
727 RValue::Language(lang) => (*lang.inner).clone(),
728 RValue::Vector(rv) => {
729 if rv.get_attr("miniR.source").is_some() {
731 if let Vector::Character(vals) = &rv.inner {
732 if let Some(Some(source)) = vals.first() {
733 if let Ok(parsed) = crate::parser::parse_program(source) {
734 return match parsed {
735 Expr::Program(mut exprs) if exprs.len() == 1 => exprs.remove(0),
736 Expr::Program(exprs) => Expr::Block(exprs),
737 other => other,
738 };
739 }
740 }
742 }
743 }
744 match &rv.inner {
745 Vector::Logical(vals) if vals.len() == 1 => match vals.first() {
746 Some(Some(b)) => Expr::Bool(*b),
747 _ => Expr::Na(crate::parser::ast::NaType::Logical),
748 },
749 Vector::Integer(vals) if vals.len() == 1 => match vals.first_opt() {
750 Some(i) => Expr::Integer(i),
751 _ => Expr::Na(crate::parser::ast::NaType::Integer),
752 },
753 Vector::Double(vals) if vals.len() == 1 => match vals.first_opt() {
754 Some(d) => {
755 if d == f64::INFINITY {
756 Expr::Inf
757 } else if d.is_nan() {
758 Expr::NaN
759 } else {
760 Expr::Double(d)
761 }
762 }
763 _ => Expr::Na(crate::parser::ast::NaType::Real),
764 },
765 Vector::Character(vals) if vals.len() == 1 => match vals.first() {
766 Some(Some(s)) => Expr::String(s.clone()),
767 _ => Expr::Na(crate::parser::ast::NaType::Character),
768 },
769 _ => {
770 let deparsed = format!("{}", val);
772 Expr::Symbol(deparsed)
773 }
774 }
775 }
776 RValue::Function(RFunction::Closure { params, body, .. }) => Expr::Function {
777 params: params.clone(),
778 body: Box::new(body.clone()),
779 },
780 _ => {
781 let deparsed = format!("{}", val);
782 Expr::Symbol(deparsed)
783 }
784 }
785 }
786
787 fn list_to_call_expr(&self, val: &RValue) -> Option<Expr> {
791 let list = match val {
792 RValue::List(l) => l,
793 _ => return None,
794 };
795 if list.values.is_empty() {
796 return None;
797 }
798
799 let (_, func_val) = &list.values[0];
800 let func_expr = match func_val {
801 RValue::Vector(rv) => match &rv.inner {
802 Vector::Character(c) => {
803 let name = c.first().and_then(|s| s.clone()).unwrap_or_default();
804 Expr::Symbol(name)
805 }
806 _ => return None,
807 },
808 RValue::Language(lang) => (*lang.inner).clone(),
809 _ => return None,
810 };
811
812 let args: Vec<crate::parser::ast::Arg> = list.values[1..]
813 .iter()
814 .map(|(name, val)| crate::parser::ast::Arg {
815 name: name.clone(),
816 value: Some(self.rvalue_to_body(val)),
817 })
818 .collect();
819
820 Some(Expr::Call {
821 func: Box::new(func_expr),
822 args,
823 span: None,
824 })
825 }
826}
827
828struct AsciiReader<'a> {
840 data: &'a [u8],
841 pos: usize,
842 ref_table: Vec<RValue>,
844}
845
846impl<'a> AsciiReader<'a> {
847 fn new(data: &'a [u8]) -> Self {
848 AsciiReader {
849 data,
850 pos: 0,
851 ref_table: Vec::new(),
852 }
853 }
854
855 fn read_line(&mut self) -> Result<&'a str, RError> {
858 let start = self.pos;
859 while self.pos < self.data.len() && self.data[self.pos] != b'\n' {
860 self.pos += 1;
861 }
862 let line_bytes = &self.data[start..self.pos];
863 if self.pos < self.data.len() {
865 self.pos += 1;
866 }
867 std::str::from_utf8(line_bytes).map_err(|e| {
868 RError::new(
869 RErrorKind::Other,
870 format!("invalid UTF-8 in ASCII RDS at offset {}: {}", start, e),
871 )
872 })
873 }
874
875 fn read_int(&mut self) -> Result<i32, RError> {
877 let line = self.read_line()?.trim();
878 line.parse::<i32>().map_err(|e| {
879 RError::new(
880 RErrorKind::Other,
881 format!("expected integer in ASCII RDS, got '{}': {}", line, e),
882 )
883 })
884 }
885
886 fn read_double(&mut self) -> Result<f64, RError> {
889 let line = self.read_line()?.trim().to_string();
890 parse_ascii_double(&line)
891 }
892
893 fn read_length(&mut self) -> Result<usize, RError> {
895 let len = self.read_int()?;
896 if len >= 0 {
897 Ok(len as usize)
898 } else if len == -1 {
899 let upper = self.read_int()? as u32;
901 let lower = self.read_int()? as u32;
902 let long_len = (u64::from(upper) << 32) | u64::from(lower);
903 usize::try_from(long_len).map_err(|_| {
904 RError::new(
905 RErrorKind::Other,
906 format!("vector length {} too large for this platform", long_len),
907 )
908 })
909 } else {
910 Err(RError::new(
911 RErrorKind::Other,
912 format!("invalid vector length: {}", len),
913 ))
914 }
915 }
916
917 fn read_charsxp(&mut self) -> Result<Option<String>, RError> {
920 let len = self.read_int()?;
921 if len == -1 {
922 return Ok(None); }
924 let n = usize::try_from(len).map_err(|_| {
925 RError::new(
926 RErrorKind::Other,
927 format!("invalid CHARSXP length: {}", len),
928 )
929 })?;
930 if self.pos + n > self.data.len() {
932 return Err(RError::new(
933 RErrorKind::Other,
934 format!(
935 "unexpected end of ASCII RDS data: need {} bytes at offset {}, have {}",
936 n,
937 self.pos,
938 self.data.len() - self.pos,
939 ),
940 ));
941 }
942 let bytes = &self.data[self.pos..self.pos + n];
943 self.pos += n;
944 if self.pos < self.data.len() && self.data[self.pos] == b'\n' {
946 self.pos += 1;
947 }
948 match std::str::from_utf8(bytes) {
949 Ok(s) => Ok(Some(s.to_string())),
950 Err(_) => {
951 let s: String = bytes.iter().map(|&b| b as char).collect();
953 Ok(Some(s))
954 }
955 }
956 }
957
958 fn ref_add(&mut self, value: RValue) -> RValue {
960 self.ref_table.push(value.clone());
961 value
962 }
963
964 fn ref_get(&self, index: usize) -> Result<RValue, RError> {
966 if index == 0 || index > self.ref_table.len() {
967 return Err(RError::new(
968 RErrorKind::Other,
969 format!(
970 "invalid reference index {} (table has {} entries)",
971 index,
972 self.ref_table.len()
973 ),
974 ));
975 }
976 Ok(self.ref_table[index - 1].clone())
977 }
978
979 fn read_flags(&mut self) -> Result<(u8, bool, bool, u32), RError> {
981 let flags = self.read_int()? as u32;
982 let sxp_type = (flags & 0xFF) as u8;
983 let has_attr = flags & HAS_ATTR_MASK != 0;
984 let has_tag = flags & HAS_TAG_MASK != 0;
985 Ok((sxp_type, has_attr, has_tag, flags))
986 }
987
988 fn read_attributes(&mut self) -> Result<Attributes, RError> {
990 let mut attrs = IndexMap::new();
991 loop {
992 let (sxp_type, _has_attr, has_tag, _flags) = self.read_flags()?;
993 match sxp_type {
994 LISTSXP => {
995 let tag_name = if has_tag {
996 self.read_item_as_symbol()?
997 } else {
998 String::new()
999 };
1000 let value = self.read_item()?;
1001 if !tag_name.is_empty() {
1002 attrs.insert(tag_name, value);
1003 }
1004 }
1005 NILVALUE_SXP => break,
1006 _ => {
1007 return Err(RError::new(
1008 RErrorKind::Other,
1009 format!(
1010 "unexpected SEXPTYPE {} in attribute pairlist (expected LISTSXP or NILVALUE)",
1011 sxp_type
1012 ),
1013 ));
1014 }
1015 }
1016 }
1017 Ok(attrs)
1018 }
1019
1020 fn read_item(&mut self) -> Result<RValue, RError> {
1022 let (sxp_type, has_attr, has_tag, flags) = self.read_flags()?;
1023 self.read_item_inner(sxp_type, has_attr, has_tag, flags)
1024 }
1025
1026 fn read_item_inner(
1027 &mut self,
1028 sxp_type: u8,
1029 has_attr: bool,
1030 has_tag: bool,
1031 flags: u32,
1032 ) -> Result<RValue, RError> {
1033 match sxp_type {
1034 NILVALUE_SXP | NILSXP => Ok(RValue::Null),
1035
1036 EMPTYENV_SXP | BASEENV_SXP | GLOBALENV_SXP | BASENAMESPACE_SXP => {
1037 let val = RValue::Null;
1038 Ok(self.ref_add(val))
1039 }
1040
1041 MISSINGARG_SXP => Ok(RValue::Null),
1042
1043 NAMESPACESXP => {
1044 let _info = self.read_item()?;
1045 let val = RValue::Null;
1046 Ok(self.ref_add(val))
1047 }
1048
1049 REFSXP => {
1050 let ref_index = (flags >> 8) as usize;
1051 if ref_index == 0 {
1052 let idx = self.read_int()? as usize;
1053 self.ref_get(idx)
1054 } else {
1055 self.ref_get(ref_index)
1056 }
1057 }
1058
1059 SYMSXP => {
1060 let (inner_type, _ia, _it, _inner_flags) = self.read_flags()?;
1061 let name = if inner_type == CHARSXP {
1062 self.read_charsxp()?.unwrap_or_default()
1063 } else {
1064 return Err(RError::new(
1065 RErrorKind::Other,
1066 format!("expected CHARSXP inside SYMSXP, got type {}", inner_type),
1067 ));
1068 };
1069 let val = RValue::vec(Vector::Character(vec![Some(name)].into()));
1070 Ok(self.ref_add(val))
1071 }
1072
1073 CHARSXP => {
1074 let s = self.read_charsxp()?;
1075 Ok(match s {
1076 Some(s) => RValue::vec(Vector::Character(vec![Some(s)].into())),
1077 None => RValue::Null,
1078 })
1079 }
1080
1081 LGLSXP => {
1082 let len = self.read_length()?;
1083 let mut values = Vec::with_capacity(len);
1084 for _ in 0..len {
1085 let raw = self.read_int()?;
1086 if raw == R_NA_LOGICAL {
1087 values.push(None);
1088 } else {
1089 values.push(Some(raw != 0));
1090 }
1091 }
1092 let mut rv = RVector::from(Vector::Logical(values.into()));
1093 if has_attr {
1094 let attrs = self.read_attributes()?;
1095 rv.attrs = Some(Box::new(attrs));
1096 }
1097 Ok(RValue::Vector(rv))
1098 }
1099
1100 INTSXP => {
1101 let len = self.read_length()?;
1102 let mut values: Vec<Option<i64>> = Vec::with_capacity(len);
1103 for _ in 0..len {
1104 let raw = self.read_int()?;
1105 if raw == R_NA_INTEGER {
1106 values.push(None);
1107 } else {
1108 values.push(Some(i64::from(raw)));
1109 }
1110 }
1111 let mut rv = RVector::from(Vector::Integer(values.into()));
1112 if has_attr {
1113 let attrs = self.read_attributes()?;
1114 rv.attrs = Some(Box::new(attrs));
1115 }
1116 Ok(RValue::Vector(rv))
1117 }
1118
1119 REALSXP => {
1120 let len = self.read_length()?;
1121 let mut values: Vec<Option<f64>> = Vec::with_capacity(len);
1122 for _ in 0..len {
1123 let val = self.read_double()?;
1124 if val.to_bits() == R_NA_REAL_BITS {
1125 values.push(None);
1126 } else {
1127 values.push(Some(val));
1128 }
1129 }
1130 let mut rv = RVector::from(Vector::Double(values.into()));
1131 if has_attr {
1132 let attrs = self.read_attributes()?;
1133 rv.attrs = Some(Box::new(attrs));
1134 }
1135 Ok(RValue::Vector(rv))
1136 }
1137
1138 CPLXSXP => {
1139 let len = self.read_length()?;
1140 let mut values: Vec<Option<num_complex::Complex64>> = Vec::with_capacity(len);
1141 for _ in 0..len {
1142 let re = self.read_double()?;
1143 let im = self.read_double()?;
1144 if re.to_bits() == R_NA_REAL_BITS || im.to_bits() == R_NA_REAL_BITS {
1145 values.push(None);
1146 } else {
1147 values.push(Some(num_complex::Complex64::new(re, im)));
1148 }
1149 }
1150 let mut rv = RVector::from(Vector::Complex(values.into()));
1151 if has_attr {
1152 let attrs = self.read_attributes()?;
1153 rv.attrs = Some(Box::new(attrs));
1154 }
1155 Ok(RValue::Vector(rv))
1156 }
1157
1158 STRSXP => {
1159 let len = self.read_length()?;
1160 let mut values: Vec<Option<String>> = Vec::with_capacity(len);
1161 for _ in 0..len {
1162 let (inner_type, _ia, _it, _inner_flags) = self.read_flags()?;
1163 if inner_type == CHARSXP {
1164 values.push(self.read_charsxp()?);
1165 } else if inner_type == NILVALUE_SXP {
1166 values.push(None);
1167 } else {
1168 return Err(RError::new(
1169 RErrorKind::Other,
1170 format!(
1171 "expected CHARSXP in STRSXP element, got type {}",
1172 inner_type
1173 ),
1174 ));
1175 }
1176 }
1177 let mut rv = RVector::from(Vector::Character(values.into()));
1178 if has_attr {
1179 let attrs = self.read_attributes()?;
1180 rv.attrs = Some(Box::new(attrs));
1181 }
1182 Ok(RValue::Vector(rv))
1183 }
1184
1185 RAWSXP => {
1186 let len = self.read_length()?;
1187 let hex_line = self.read_line()?;
1189 let hex = hex_line.trim();
1190 let mut bytes = Vec::with_capacity(len);
1191 let mut i = 0;
1192 while i + 1 < hex.len() && bytes.len() < len {
1193 let byte = u8::from_str_radix(&hex[i..i + 2], 16).map_err(|e| {
1194 RError::new(
1195 RErrorKind::Other,
1196 format!("invalid hex byte '{}' in RAWSXP: {}", &hex[i..i + 2], e),
1197 )
1198 })?;
1199 bytes.push(byte);
1200 i += 2;
1201 }
1202 let mut rv = RVector::from(Vector::Raw(bytes));
1203 if has_attr {
1204 let attrs = self.read_attributes()?;
1205 rv.attrs = Some(Box::new(attrs));
1206 }
1207 Ok(RValue::Vector(rv))
1208 }
1209
1210 VECSXP | EXPRSXP => {
1211 let len = self.read_length()?;
1212 let mut elements = Vec::with_capacity(len);
1213 for _ in 0..len {
1214 let val = self.read_item()?;
1215 elements.push((None, val));
1216 }
1217 let mut list = RList::new(elements);
1218 if has_attr {
1219 let attrs = self.read_attributes()?;
1220 if let Some(names_val) = attrs.get("names") {
1221 if let Some(names_vec) = names_val.as_vector() {
1222 let names = names_vec.to_characters();
1223 for (i, name) in names.iter().enumerate() {
1224 if i < list.values.len() {
1225 list.values[i].0 = name.clone();
1226 }
1227 }
1228 }
1229 }
1230 let mut remaining: Attributes =
1231 attrs.into_iter().filter(|(k, _)| k != "names").collect();
1232 if !remaining.is_empty() {
1233 if let Some(first_name) = list.values.first() {
1234 if first_name.0.is_some() {
1235 let names: Vec<Option<String>> =
1236 list.values.iter().map(|(n, _)| n.clone()).collect();
1237 remaining.insert(
1238 "names".to_string(),
1239 RValue::vec(Vector::Character(names.into())),
1240 );
1241 }
1242 }
1243 list.attrs = Some(Box::new(remaining));
1244 }
1245 }
1246 Ok(RValue::List(list))
1247 }
1248
1249 LISTSXP => self.read_pairlist_as_list(has_attr, has_tag, flags),
1250
1251 CLOSXP => {
1252 let _env = self.read_item()?;
1253 let _formals = self.read_item()?;
1254 let _body = self.read_item()?;
1255 let val = RValue::Null;
1256 if has_attr {
1257 let _attrs = self.read_attributes()?;
1258 }
1259 Ok(val)
1260 }
1261
1262 LANGSXP => self.read_pairlist_as_list(has_attr, has_tag, flags),
1263
1264 25 => {
1266 let attrs = if has_attr {
1267 self.read_attributes()?
1268 } else {
1269 IndexMap::new()
1270 };
1271 let mut list = RList::new(Vec::new());
1272 if !attrs.is_empty() {
1273 list.attrs = Some(Box::new(attrs));
1274 }
1275 Ok(RValue::List(list))
1276 }
1277
1278 _ => Err(RError::new(
1279 RErrorKind::Other,
1280 format!(
1281 "unsupported SEXPTYPE {} at offset {} in ASCII RDS data",
1282 sxp_type, self.pos
1283 ),
1284 )),
1285 }
1286 }
1287
1288 fn read_item_as_symbol(&mut self) -> Result<String, RError> {
1290 let val = self.read_item()?;
1291 match &val {
1292 RValue::Vector(rv) => match &rv.inner {
1293 Vector::Character(c) => Ok(c.first().and_then(|s| s.clone()).unwrap_or_default()),
1294 _ => Ok(String::new()),
1295 },
1296 _ => Ok(String::new()),
1297 }
1298 }
1299
1300 fn read_pairlist_as_list(
1302 &mut self,
1303 has_attr: bool,
1304 has_tag: bool,
1305 _flags: u32,
1306 ) -> Result<RValue, RError> {
1307 let mut elements = Vec::new();
1308
1309 let tag = if has_tag {
1310 Some(self.read_item_as_symbol()?)
1311 } else {
1312 None
1313 };
1314 let car = self.read_item()?;
1315 elements.push((tag, car));
1316
1317 loop {
1318 let (sxp_type, _has_attr_cdr, has_tag_cdr, _cdr_flags) = self.read_flags()?;
1319 match sxp_type {
1320 LISTSXP => {
1321 let tag = if has_tag_cdr {
1322 Some(self.read_item_as_symbol()?)
1323 } else {
1324 None
1325 };
1326 let car = self.read_item()?;
1327 elements.push((tag, car));
1328 }
1329 NILVALUE_SXP => break,
1330 _ => {
1331 let val =
1332 self.read_item_inner(sxp_type, _has_attr_cdr, has_tag_cdr, _cdr_flags)?;
1333 elements.push((None, val));
1334 break;
1335 }
1336 }
1337 }
1338
1339 let mut list = RList::new(elements);
1340 if has_attr {
1341 let attrs = self.read_attributes()?;
1342 list.attrs = Some(Box::new(attrs));
1343 }
1344 Ok(RValue::List(list))
1345 }
1346}
1347
1348fn parse_ascii_double(s: &str) -> Result<f64, RError> {
1353 match s {
1354 "NA" => Ok(f64::from_bits(R_NA_REAL_BITS)),
1355 "Inf" => Ok(f64::INFINITY),
1356 "-Inf" => Ok(f64::NEG_INFINITY),
1357 "NaN" => Ok(f64::NAN),
1358 _ if s.starts_with("0x") || s.starts_with("-0x") => parse_hex_float(s),
1359 _ => s.parse::<f64>().map_err(|e| {
1360 RError::new(
1361 RErrorKind::Other,
1362 format!("failed to parse double '{}': {}", s, e),
1363 )
1364 }),
1365 }
1366}
1367
1368fn parse_hex_float(s: &str) -> Result<f64, RError> {
1372 let make_err = || RError::new(RErrorKind::Other, format!("invalid hex float: '{}'", s));
1373
1374 let (negative, rest) = if let Some(r) = s.strip_prefix('-') {
1375 (true, r)
1376 } else if let Some(r) = s.strip_prefix('+') {
1377 (false, r)
1378 } else {
1379 (false, s)
1380 };
1381
1382 let rest = rest
1383 .strip_prefix("0x")
1384 .or_else(|| rest.strip_prefix("0X"))
1385 .ok_or_else(make_err)?;
1386
1387 let (mantissa_str, exp_str) = if let Some(idx) = rest.find(['p', 'P']) {
1389 (&rest[..idx], &rest[idx + 1..])
1390 } else {
1391 (rest, "0")
1393 };
1394
1395 let (int_part, frac_part) = if let Some(dot_idx) = mantissa_str.find('.') {
1397 (&mantissa_str[..dot_idx], &mantissa_str[dot_idx + 1..])
1398 } else {
1399 (mantissa_str, "")
1400 };
1401
1402 let int_val = if int_part.is_empty() {
1404 0u64
1405 } else {
1406 u64::from_str_radix(int_part, 16).map_err(|_| make_err())?
1407 };
1408
1409 let mut frac_val: f64 = 0.0;
1411 let mut frac_scale: f64 = 1.0 / 16.0;
1412 for ch in frac_part.chars() {
1413 let digit = ch.to_digit(16).ok_or_else(make_err)?;
1414 frac_val += f64::from(digit) * frac_scale;
1415 frac_scale /= 16.0;
1416 }
1417
1418 let mantissa = int_val as f64 + frac_val;
1419
1420 let exp: i32 = if exp_str.is_empty() {
1422 0
1423 } else {
1424 exp_str.parse().map_err(|_| make_err())?
1425 };
1426
1427 let result = mantissa * (2.0f64).powi(exp);
1428 if negative {
1429 Ok(-result)
1430 } else {
1431 Ok(result)
1432 }
1433}
1434
1435struct AsciiWriter {
1441 buf: String,
1442}
1443
1444impl AsciiWriter {
1445 fn new() -> Self {
1446 AsciiWriter { buf: String::new() }
1447 }
1448
1449 fn write_int(&mut self, val: i32) {
1451 writeln!(self.buf, "{}", val).expect("Vec<u8> write");
1452 }
1453
1454 fn write_double(&mut self, val: f64) {
1457 if val.to_bits() == R_NA_REAL_BITS {
1458 writeln!(self.buf, "NA").expect("Vec<u8> write");
1459 } else if val.is_infinite() {
1460 if val > 0.0 {
1461 writeln!(self.buf, "Inf").expect("Vec<u8> write");
1462 } else {
1463 writeln!(self.buf, "-Inf").expect("Vec<u8> write");
1464 }
1465 } else if val.is_nan() {
1466 writeln!(self.buf, "NaN").expect("Vec<u8> write");
1467 } else {
1468 writeln!(self.buf, "{}", format_hex_float(val)).expect("Vec<u8> write");
1469 }
1470 }
1471
1472 fn write_flags(&mut self, sxp_type: u8, has_attr: bool, has_tag: bool) {
1474 let mut flags: u32 = u32::from(sxp_type);
1475 if has_attr {
1476 flags |= HAS_ATTR_MASK;
1477 }
1478 if has_tag {
1479 flags |= HAS_TAG_MASK;
1480 }
1481 self.write_int(flags as i32);
1482 }
1483
1484 fn write_charsxp(&mut self, s: Option<&str>) {
1486 match s {
1487 Some(text) => {
1488 let flags: u32 = u32::from(CHARSXP) | (1 << 12);
1490 self.write_int(flags as i32);
1491 let bytes = text.as_bytes();
1492 self.write_int(i32::try_from(bytes.len()).unwrap_or(i32::MAX));
1493 self.buf.push_str(text);
1495 self.buf.push('\n');
1496 }
1497 None => {
1498 let flags: u32 = u32::from(CHARSXP);
1500 self.write_int(flags as i32);
1501 self.write_int(-1);
1502 }
1503 }
1504 }
1505
1506 fn write_nilvalue(&mut self) {
1508 self.write_flags(NILVALUE_SXP, false, false);
1509 }
1510
1511 fn write_length(&mut self, len: usize) {
1513 if let Ok(n) = i32::try_from(len) {
1514 self.write_int(n);
1515 } else {
1516 self.write_int(-1);
1517 let long_len = len as u64;
1518 self.write_int((long_len >> 32) as i32);
1519 self.write_int(long_len as i32);
1520 }
1521 }
1522
1523 fn write_attributes(&mut self, attrs: &Attributes) {
1525 for (name, value) in attrs {
1526 self.write_flags(LISTSXP, false, true);
1527 self.write_flags(SYMSXP, false, false);
1529 self.write_charsxp(Some(name));
1530 self.write_item(value);
1532 }
1533 self.write_nilvalue();
1534 }
1535
1536 fn write_item(&mut self, value: &RValue) {
1538 match value {
1539 RValue::Null => {
1540 self.write_flags(NILVALUE_SXP, false, false);
1541 }
1542 RValue::Vector(rv) => {
1543 let has_attr = rv.attrs.as_ref().is_some_and(|a| !a.is_empty());
1544 match &rv.inner {
1545 Vector::Logical(vals) => {
1546 self.write_flags(LGLSXP, has_attr, false);
1547 self.write_length(vals.len());
1548 for v in vals.iter() {
1549 match v {
1550 Some(true) => self.write_int(1),
1551 Some(false) => self.write_int(0),
1552 None => self.write_int(R_NA_LOGICAL),
1553 }
1554 }
1555 }
1556 Vector::Integer(vals) => {
1557 self.write_flags(INTSXP, has_attr, false);
1558 self.write_length(vals.len());
1559 for v in vals.iter() {
1560 match v {
1561 Some(i) => {
1562 let clamped = i32::try_from(i).unwrap_or_else(|_| {
1563 if i > i64::from(i32::MAX) {
1564 i32::MAX
1565 } else {
1566 i32::MIN + 1
1567 }
1568 });
1569 if clamped == R_NA_INTEGER {
1570 self.write_int(R_NA_INTEGER + 1);
1571 } else {
1572 self.write_int(clamped);
1573 }
1574 }
1575 None => self.write_int(R_NA_INTEGER),
1576 }
1577 }
1578 }
1579 Vector::Double(vals) => {
1580 self.write_flags(REALSXP, has_attr, false);
1581 self.write_length(vals.len());
1582 for v in vals.iter() {
1583 match v {
1584 Some(d) => self.write_double(d),
1585 None => writeln!(self.buf, "NA").expect("Vec<u8> write"),
1586 }
1587 }
1588 }
1589 Vector::Complex(vals) => {
1590 self.write_flags(CPLXSXP, has_attr, false);
1591 self.write_length(vals.len());
1592 for v in vals.iter() {
1593 match v {
1594 Some(c) => {
1595 self.write_double(c.re);
1596 self.write_double(c.im);
1597 }
1598 None => {
1599 writeln!(self.buf, "NA").expect("Vec<u8> write");
1600 writeln!(self.buf, "NA").expect("Vec<u8> write");
1601 }
1602 }
1603 }
1604 }
1605 Vector::Character(vals) => {
1606 self.write_flags(STRSXP, has_attr, false);
1607 self.write_length(vals.len());
1608 for v in vals.iter() {
1609 self.write_charsxp(v.as_deref());
1610 }
1611 }
1612 Vector::Raw(bytes) => {
1613 self.write_flags(RAWSXP, has_attr, false);
1614 self.write_length(bytes.len());
1615 for byte in bytes {
1617 write!(self.buf, "{:02x}", byte).expect("Vec<u8> write");
1618 }
1619 self.buf.push('\n');
1620 }
1621 }
1622 if has_attr {
1623 if let Some(attrs) = rv.attrs.as_ref() {
1624 self.write_attributes(attrs)
1625 };
1626 }
1627 }
1628 RValue::List(list) => {
1629 let has_names = list.values.iter().any(|(name, _)| name.is_some());
1630 let mut effective_attrs: Attributes = list
1631 .attrs
1632 .as_ref()
1633 .map(|a| a.as_ref().clone())
1634 .unwrap_or_default();
1635 if has_names && !effective_attrs.contains_key("names") {
1636 let names: Vec<Option<String>> =
1637 list.values.iter().map(|(n, _)| n.clone()).collect();
1638 effective_attrs.insert(
1639 "names".to_string(),
1640 RValue::vec(Vector::Character(names.into())),
1641 );
1642 }
1643 let has_attr = !effective_attrs.is_empty();
1644
1645 self.write_flags(VECSXP, has_attr, false);
1646 self.write_length(list.values.len());
1647 for (_, val) in &list.values {
1648 self.write_item(val);
1649 }
1650 if has_attr {
1651 self.write_attributes(&effective_attrs);
1652 }
1653 }
1654 RValue::Function(_)
1655 | RValue::Environment(_)
1656 | RValue::Language(_)
1657 | RValue::Promise(_) => {
1658 self.write_flags(NILVALUE_SXP, false, false);
1659 }
1660 }
1661 }
1662
1663 fn finish(self) -> Vec<u8> {
1664 self.buf.into_bytes()
1665 }
1666}
1667
1668fn format_hex_float(val: f64) -> String {
1672 if val == 0.0 {
1673 if val.is_sign_negative() {
1675 return "-0x0p+0".to_string();
1676 } else {
1677 return "0x0p+0".to_string();
1678 }
1679 }
1680
1681 let bits = val.to_bits();
1682 let sign = (bits >> 63) != 0;
1683 let biased_exp = ((bits >> 52) & 0x7FF) as i32;
1684 let mantissa_bits = bits & 0x000F_FFFF_FFFF_FFFF;
1685
1686 let mut result = String::new();
1687 if sign {
1688 result.push('-');
1689 }
1690
1691 if biased_exp == 0 {
1692 result.push_str("0x0.");
1694 write!(result, "{:013x}", mantissa_bits).expect("String write");
1696 let trimmed = result.trim_end_matches('0');
1698 let mut trimmed = trimmed.to_string();
1699 if trimmed.ends_with('.') {
1700 trimmed.push('0');
1701 }
1702 write!(trimmed, "p-1022").expect("String write");
1703 trimmed
1704 } else {
1705 let exponent = biased_exp - 1023;
1707 result.push_str("0x1.");
1708 write!(result, "{:013x}", mantissa_bits).expect("String write");
1709 while result.ends_with('0') && !result.ends_with(".0") {
1711 result.pop();
1712 }
1713 write!(result, "p{:+}", exponent).expect("String write");
1714 result
1715 }
1716}
1717
1718pub fn unserialize_xdr(data: &[u8]) -> Result<RValue, RError> {
1726 if data.len() < 2 {
1727 return Err(RError::new(
1728 RErrorKind::Other,
1729 "RDS data too short".to_string(),
1730 ));
1731 }
1732
1733 let format_byte = data[0];
1735 if data[1] != b'\n' {
1736 return Err(RError::new(
1737 RErrorKind::Other,
1738 format!("expected newline after format byte, got 0x{:02x}", data[1]),
1739 ));
1740 }
1741
1742 match format_byte {
1743 b'X' => {
1744 let mut reader = XdrReader::new(&data[2..]);
1746 let _version = reader.read_int()?;
1747 let _r_version_wrote = reader.read_int()?;
1748 let _r_version_min = reader.read_int()?;
1749 if _version == 3 {
1750 let _native_encoding = reader.read_item()?;
1751 }
1752 reader.read_item()
1753 }
1754 b'A' => {
1755 let mut reader = AsciiReader::new(&data[2..]);
1757 let _version = reader.read_int()?;
1758 let _r_version_wrote = reader.read_int()?;
1759 let _r_version_min = reader.read_int()?;
1760 if _version == 3 {
1761 let _native_encoding = reader.read_item()?;
1762 }
1763 reader.read_item()
1764 }
1765 b'B' => Err(RError::new(
1766 RErrorKind::Other,
1767 "native binary serialization format is not yet supported; \
1768 only XDR binary (format 'X') and ASCII (format 'A') are implemented"
1769 .to_string(),
1770 )),
1771 _ => Err(RError::new(
1772 RErrorKind::Other,
1773 format!("unknown serialization format byte: 0x{:02x}", format_byte),
1774 )),
1775 }
1776}
1777
1778pub fn is_binary_rds(data: &[u8]) -> bool {
1782 if data.len() < 2 {
1783 return false;
1784 }
1785 if data[1] == b'\n' && matches!(data[0], b'X' | b'A' | b'B') {
1787 return true;
1788 }
1789 is_gzip_data(data)
1791}
1792
1793pub fn is_gzip_data(data: &[u8]) -> bool {
1795 data.len() >= 2 && data[0] == 0x1f && data[1] == 0x8b
1796}
1797
1798pub fn is_bzip2_data(data: &[u8]) -> bool {
1800 data.len() >= 3 && data[0] == b'B' && data[1] == b'Z' && data[2] == b'h'
1801}
1802
1803#[cfg(feature = "compression")]
1805pub fn unserialize_rds(data: &[u8]) -> Result<RValue, RError> {
1806 if is_gzip_data(data) {
1807 use flate2::read::GzDecoder;
1808 use std::io::Read;
1809
1810 let mut decoder = GzDecoder::new(data);
1811 let mut decompressed = Vec::new();
1812 decoder.read_to_end(&mut decompressed).map_err(|e| {
1813 RError::new(
1814 RErrorKind::Other,
1815 format!("failed to decompress gzip RDS data: {}", e),
1816 )
1817 })?;
1818 unserialize_xdr(&decompressed)
1819 } else if is_bzip2_data(data) {
1820 use bzip2::read::BzDecoder;
1821 use std::io::Read;
1822
1823 let mut decoder = BzDecoder::new(data);
1824 let mut decompressed = Vec::new();
1825 decoder.read_to_end(&mut decompressed).map_err(|e| {
1826 RError::new(
1827 RErrorKind::Other,
1828 format!("failed to decompress bzip2 RDS data: {}", e),
1829 )
1830 })?;
1831 unserialize_xdr(&decompressed)
1832 } else {
1833 unserialize_xdr(data)
1834 }
1835}
1836
1837#[cfg(not(feature = "compression"))]
1839pub fn unserialize_rds(data: &[u8]) -> Result<RValue, RError> {
1840 if is_gzip_data(data) || is_bzip2_data(data) {
1841 Err(RError::new(
1842 RErrorKind::Other,
1843 "RDS file is compressed but miniR was built without the 'compression' feature; \
1844 rebuild with `--features compression` to read compressed RDS files"
1845 .to_string(),
1846 ))
1847 } else {
1848 unserialize_xdr(data)
1849 }
1850}
1851
1852struct XdrWriter {
1858 buf: Vec<u8>,
1859}
1860
1861impl XdrWriter {
1862 fn new() -> Self {
1863 XdrWriter { buf: Vec::new() }
1864 }
1865
1866 fn write_int(&mut self, val: i32) {
1868 self.buf.extend_from_slice(&val.to_be_bytes());
1869 }
1870
1871 fn write_double(&mut self, val: f64) {
1873 self.buf.extend_from_slice(&val.to_be_bytes());
1874 }
1875
1876 fn write_flags(&mut self, sxp_type: u8, has_attr: bool, has_tag: bool) {
1878 let mut flags: u32 = u32::from(sxp_type);
1879 if has_attr {
1880 flags |= HAS_ATTR_MASK;
1881 }
1882 if has_tag {
1883 flags |= HAS_TAG_MASK;
1884 }
1885 self.write_int(flags as i32);
1886 }
1887
1888 fn write_charsxp(&mut self, s: Option<&str>) {
1890 match s {
1891 Some(text) => {
1892 let flags: u32 = u32::from(CHARSXP) | (1 << 12);
1894 self.write_int(flags as i32);
1895 let bytes = text.as_bytes();
1896 self.write_int(i32::try_from(bytes.len()).unwrap_or(i32::MAX));
1897 self.buf.extend_from_slice(bytes);
1898 }
1899 None => {
1900 let flags: u32 = u32::from(CHARSXP);
1902 self.write_int(flags as i32);
1903 self.write_int(-1);
1904 }
1905 }
1906 }
1907
1908 fn write_nilvalue(&mut self) {
1910 self.write_flags(NILVALUE_SXP, false, false);
1911 }
1912
1913 fn write_length(&mut self, len: usize) {
1915 if let Ok(n) = i32::try_from(len) {
1916 self.write_int(n);
1917 } else {
1918 self.write_int(-1);
1920 let long_len = len as u64;
1921 self.write_int((long_len >> 32) as i32);
1922 self.write_int(long_len as i32);
1923 }
1924 }
1925
1926 fn write_attributes(&mut self, attrs: &Attributes) {
1929 for (name, value) in attrs {
1930 self.write_flags(LISTSXP, false, true); self.write_flags(SYMSXP, false, false);
1933 self.write_charsxp(Some(name));
1934 self.write_item(value);
1936 }
1937 self.write_nilvalue();
1938 }
1939
1940 fn write_item(&mut self, value: &RValue) {
1942 match value {
1943 RValue::Null => {
1944 self.write_flags(NILVALUE_SXP, false, false);
1945 }
1946 RValue::Vector(rv) => {
1947 let has_attr = rv.attrs.as_ref().is_some_and(|a| !a.is_empty());
1948 match &rv.inner {
1949 Vector::Logical(vals) => {
1950 self.write_flags(LGLSXP, has_attr, false);
1951 self.write_length(vals.len());
1952 for v in vals.iter() {
1953 match v {
1954 Some(true) => self.write_int(1),
1955 Some(false) => self.write_int(0),
1956 None => self.write_int(R_NA_LOGICAL),
1957 }
1958 }
1959 }
1960 Vector::Integer(vals) => {
1961 self.write_flags(INTSXP, has_attr, false);
1962 self.write_length(vals.len());
1963 for v in vals.iter() {
1964 match v {
1965 Some(i) => {
1966 let clamped = i32::try_from(i).unwrap_or_else(|_| {
1968 if i > i64::from(i32::MAX) {
1969 i32::MAX
1970 } else {
1971 i32::MIN + 1
1973 }
1974 });
1975 if clamped == R_NA_INTEGER {
1978 self.write_int(R_NA_INTEGER + 1);
1979 } else {
1980 self.write_int(clamped);
1981 }
1982 }
1983 None => self.write_int(R_NA_INTEGER),
1984 }
1985 }
1986 }
1987 Vector::Double(vals) => {
1988 self.write_flags(REALSXP, has_attr, false);
1989 self.write_length(vals.len());
1990 for v in vals.iter() {
1991 match v {
1992 Some(d) => self.write_double(d),
1993 None => self.buf.extend_from_slice(&R_NA_REAL_BITS.to_be_bytes()),
1994 }
1995 }
1996 }
1997 Vector::Complex(vals) => {
1998 self.write_flags(CPLXSXP, has_attr, false);
1999 self.write_length(vals.len());
2000 for v in vals.iter() {
2001 match v {
2002 Some(c) => {
2003 self.write_double(c.re);
2004 self.write_double(c.im);
2005 }
2006 None => {
2007 self.buf.extend_from_slice(&R_NA_REAL_BITS.to_be_bytes());
2008 self.buf.extend_from_slice(&R_NA_REAL_BITS.to_be_bytes());
2009 }
2010 }
2011 }
2012 }
2013 Vector::Character(vals) => {
2014 self.write_flags(STRSXP, has_attr, false);
2015 self.write_length(vals.len());
2016 for v in vals.iter() {
2017 self.write_charsxp(v.as_deref());
2018 }
2019 }
2020 Vector::Raw(bytes) => {
2021 self.write_flags(RAWSXP, has_attr, false);
2022 self.write_length(bytes.len());
2023 self.buf.extend_from_slice(bytes);
2024 }
2025 }
2026 if has_attr {
2027 if let Some(attrs) = rv.attrs.as_ref() {
2028 self.write_attributes(attrs)
2029 };
2030 }
2031 }
2032 RValue::List(list) => {
2033 let has_names = list.values.iter().any(|(name, _)| name.is_some());
2035 let mut effective_attrs: Attributes = list
2036 .attrs
2037 .as_ref()
2038 .map(|a| a.as_ref().clone())
2039 .unwrap_or_default();
2040 if has_names && !effective_attrs.contains_key("names") {
2041 let names: Vec<Option<String>> =
2042 list.values.iter().map(|(n, _)| n.clone()).collect();
2043 effective_attrs.insert(
2044 "names".to_string(),
2045 RValue::vec(Vector::Character(names.into())),
2046 );
2047 }
2048 let has_attr = !effective_attrs.is_empty();
2049
2050 self.write_flags(VECSXP, has_attr, false);
2051 self.write_length(list.values.len());
2052 for (_, val) in &list.values {
2053 self.write_item(val);
2054 }
2055 if has_attr {
2056 self.write_attributes(&effective_attrs);
2057 }
2058 }
2059 RValue::Function(func) => match func {
2060 RFunction::Closure { params, body, env } => {
2061 self.write_flags(CLOSXP, false, false);
2062 self.write_environment(env);
2064 self.write_formals(params);
2066 self.write_body_expr(body);
2071 }
2072 RFunction::Builtin { name, .. } => {
2073 let name_bytes = name.as_bytes();
2075 self.write_flags(BUILTINSXP, false, false);
2076 self.write_length(name_bytes.len());
2077 self.buf.extend_from_slice(name_bytes);
2078 }
2079 },
2080 RValue::Environment(env) => {
2081 self.write_environment(env);
2082 }
2083 RValue::Language(lang) => {
2084 self.write_langsxp_expr(&lang.inner);
2085 }
2086 RValue::Promise(_) => {
2087 self.write_flags(NILVALUE_SXP, false, false);
2089 }
2090 }
2091 }
2092
2093 fn write_symbol(&mut self, name: &str) {
2095 self.write_flags(SYMSXP, false, false);
2096 self.write_charsxp(Some(name));
2097 }
2098
2099 fn write_pairlist(&mut self, bindings: &[(String, RValue)]) {
2103 for (name, value) in bindings {
2104 self.write_flags(LISTSXP, false, true);
2106 self.write_symbol(name);
2108 self.write_item(value);
2110 }
2111 self.write_nilvalue();
2113 }
2114
2115 fn write_environment(&mut self, env: &Environment) {
2119 match env.name().as_deref() {
2120 Some("R_GlobalEnv") => {
2121 self.write_flags(GLOBALENV_SXP, false, false);
2122 }
2123 Some("R_EmptyEnv") => {
2124 self.write_flags(EMPTYENV_SXP, false, false);
2125 }
2126 Some("base") => {
2127 self.write_flags(BASEENV_SXP, false, false);
2128 }
2129 _ => {
2130 let bindings = env.local_bindings();
2132 self.write_flags(ENVSXP, false, false);
2133 self.write_int(i32::from(env.is_locked()));
2135 if let Some(parent) = env.parent() {
2137 self.write_environment(&parent);
2138 } else {
2139 self.write_flags(EMPTYENV_SXP, false, false);
2140 }
2141 if bindings.is_empty() {
2143 self.write_nilvalue();
2144 } else {
2145 self.write_pairlist(&bindings);
2146 }
2147 self.write_nilvalue();
2149 }
2150 }
2151 }
2152
2153 fn write_formals(&mut self, params: &[Param]) {
2158 if params.is_empty() {
2159 self.write_nilvalue();
2160 return;
2161 }
2162 for param in params {
2163 self.write_flags(LISTSXP, false, true); self.write_symbol(if param.is_dots { "..." } else { ¶m.name });
2166 match ¶m.default {
2168 Some(default_expr) => {
2169 self.write_body_expr(default_expr);
2170 }
2171 None => {
2172 self.write_flags(MISSINGARG_SXP, false, false);
2173 }
2174 }
2175 }
2176 self.write_nilvalue();
2177 }
2178
2179 fn write_body_expr(&mut self, expr: &Expr) {
2185 match expr {
2186 Expr::Null => self.write_nilvalue(),
2187 Expr::Bool(b) => {
2188 self.write_flags(LGLSXP, false, false);
2189 self.write_length(1);
2190 self.write_int(i32::from(*b));
2191 }
2192 Expr::Integer(i) => {
2193 self.write_flags(INTSXP, false, false);
2194 self.write_length(1);
2195 let clamped =
2196 i32::try_from(*i).unwrap_or(if *i > 0 { i32::MAX } else { i32::MIN + 1 });
2197 self.write_int(clamped);
2198 }
2199 Expr::Double(d) => {
2200 self.write_flags(REALSXP, false, false);
2201 self.write_length(1);
2202 self.write_double(*d);
2203 }
2204 Expr::String(s) => {
2205 self.write_flags(STRSXP, false, false);
2206 self.write_length(1);
2207 self.write_charsxp(Some(s));
2208 }
2209 Expr::Na(na_type) => {
2210 use crate::parser::ast::NaType;
2211 match na_type {
2212 NaType::Logical => {
2213 self.write_flags(LGLSXP, false, false);
2214 self.write_length(1);
2215 self.write_int(R_NA_LOGICAL);
2216 }
2217 NaType::Integer => {
2218 self.write_flags(INTSXP, false, false);
2219 self.write_length(1);
2220 self.write_int(R_NA_INTEGER);
2221 }
2222 NaType::Real => {
2223 self.write_flags(REALSXP, false, false);
2224 self.write_length(1);
2225 self.buf.extend_from_slice(&R_NA_REAL_BITS.to_be_bytes());
2226 }
2227 NaType::Character => {
2228 self.write_flags(STRSXP, false, false);
2229 self.write_length(1);
2230 self.write_charsxp(None);
2231 }
2232 NaType::Complex => {
2233 self.write_flags(CPLXSXP, false, false);
2234 self.write_length(1);
2235 self.buf.extend_from_slice(&R_NA_REAL_BITS.to_be_bytes());
2236 self.buf.extend_from_slice(&R_NA_REAL_BITS.to_be_bytes());
2237 }
2238 }
2239 }
2240 Expr::Inf => {
2241 self.write_flags(REALSXP, false, false);
2242 self.write_length(1);
2243 self.write_double(f64::INFINITY);
2244 }
2245 Expr::NaN => {
2246 self.write_flags(REALSXP, false, false);
2247 self.write_length(1);
2248 self.write_double(f64::NAN);
2249 }
2250 _ => {
2253 let deparsed = deparse_expr(expr);
2254 self.write_flags(STRSXP, true, false); self.write_length(1);
2257 self.write_charsxp(Some(&deparsed));
2258 let mut attrs: Attributes = IndexMap::new();
2260 attrs.insert(
2261 "miniR.source".to_string(),
2262 RValue::vec(Vector::Logical(vec![Some(true)].into())),
2263 );
2264 self.write_attributes(&attrs);
2265 }
2266 }
2267 }
2268
2269 fn write_langsxp_expr(&mut self, expr: &Expr) {
2274 match expr {
2275 Expr::Call { func, args, .. } => {
2276 let has_named_args = args.iter().any(|a| a.name.is_some());
2278 self.write_flags(LANGSXP, false, has_named_args);
2279 if !has_named_args {
2281 self.write_body_expr(func);
2282 } else {
2283 self.write_body_expr(func);
2285 }
2286 for arg in args {
2288 let has_tag = arg.name.is_some();
2289 self.write_flags(LISTSXP, false, has_tag);
2290 if let Some(name) = &arg.name {
2291 self.write_symbol(name);
2292 }
2293 match &arg.value {
2294 Some(val_expr) => self.write_body_expr(val_expr),
2295 None => self.write_flags(MISSINGARG_SXP, false, false),
2296 }
2297 }
2298 self.write_nilvalue();
2299 }
2300 _ => {
2301 self.write_body_expr(expr);
2303 }
2304 }
2305 }
2306
2307 fn finish(self) -> Vec<u8> {
2308 self.buf
2309 }
2310}
2311
2312pub fn serialize_xdr(value: &RValue) -> Vec<u8> {
2322 let mut w = XdrWriter::new();
2323
2324 w.buf.extend_from_slice(b"X\n");
2326
2327 w.write_int(2);
2329 w.write_int(0x00040300);
2331 w.write_int(0x00020300);
2333
2334 w.write_item(value);
2336
2337 w.finish()
2338}
2339
2340pub fn serialize_ascii(value: &RValue) -> Vec<u8> {
2346 let mut w = AsciiWriter::new();
2347
2348 w.buf.push_str("A\n");
2350
2351 w.write_int(2);
2353 w.write_int(0x00040300);
2355 w.write_int(0x00020300);
2357
2358 w.write_item(value);
2360
2361 w.finish()
2362}
2363
2364#[cfg(feature = "compression")]
2370pub fn serialize_rds(value: &RValue, compress: bool, ascii: bool) -> Vec<u8> {
2371 let raw = if ascii {
2372 serialize_ascii(value)
2373 } else {
2374 serialize_xdr(value)
2375 };
2376 if compress && !ascii {
2377 use flate2::write::GzEncoder;
2378 use flate2::Compression;
2379 use std::io::Write;
2380
2381 let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
2382 encoder.write_all(&raw).expect("gzip encoding failed");
2384 encoder.finish().expect("gzip finish failed")
2385 } else {
2386 raw
2387 }
2388}
2389
2390#[cfg(not(feature = "compression"))]
2393pub fn serialize_rds(value: &RValue, _compress: bool, ascii: bool) -> Vec<u8> {
2394 if ascii {
2395 serialize_ascii(value)
2396 } else {
2397 serialize_xdr(value)
2398 }
2399}
2400
2401#[cfg(feature = "compression")]
2409pub fn serialize_rdata(bindings: &[(String, RValue)], compress: bool) -> Vec<u8> {
2410 let mut w = XdrWriter::new();
2411
2412 w.buf.extend_from_slice(b"RDX2\n");
2414
2415 w.buf.extend_from_slice(b"X\n");
2417
2418 w.write_int(2);
2420 w.write_int(0x00040300);
2422 w.write_int(0x00020300);
2424
2425 w.write_pairlist(bindings);
2427
2428 let raw = w.finish();
2429
2430 if compress {
2431 use flate2::write::GzEncoder;
2432 use flate2::Compression;
2433 use std::io::Write;
2434
2435 let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
2436 encoder.write_all(&raw).expect("gzip encoding failed");
2437 encoder.finish().expect("gzip finish failed")
2438 } else {
2439 raw
2440 }
2441}
2442
2443#[cfg(not(feature = "compression"))]
2445pub fn serialize_rdata(bindings: &[(String, RValue)], _compress: bool) -> Vec<u8> {
2446 let mut w = XdrWriter::new();
2447
2448 w.buf.extend_from_slice(b"RDX2\n");
2450
2451 w.buf.extend_from_slice(b"X\n");
2453
2454 w.write_int(2);
2456 w.write_int(0x00040300);
2458 w.write_int(0x00020300);
2460
2461 w.write_pairlist(bindings);
2463
2464 w.finish()
2465}
2466
2467#[cfg(test)]
2470mod tests {
2471 use super::*;
2472
2473 fn build_rds_header() -> Vec<u8> {
2474 let mut buf = Vec::new();
2475 buf.extend_from_slice(b"X\n");
2476 buf.extend_from_slice(&2i32.to_be_bytes());
2477 buf.extend_from_slice(&0x00040300i32.to_be_bytes());
2478 buf.extend_from_slice(&0x00020300i32.to_be_bytes());
2479 buf
2480 }
2481
2482 fn write_flags(buf: &mut Vec<u8>, sxp_type: u8, has_attr: bool, has_tag: bool) {
2483 let mut flags: u32 = u32::from(sxp_type);
2484 if has_attr {
2485 flags |= 1 << 9;
2486 }
2487 if has_tag {
2488 flags |= 1 << 10;
2489 }
2490 buf.extend_from_slice(&(flags as i32).to_be_bytes());
2491 }
2492
2493 fn write_charsxp(buf: &mut Vec<u8>, s: &str) {
2494 let flags: u32 = 9 | (1 << 12);
2495 buf.extend_from_slice(&(flags as i32).to_be_bytes());
2496 buf.extend_from_slice(&(s.len() as i32).to_be_bytes());
2497 buf.extend_from_slice(s.as_bytes());
2498 }
2499
2500 fn write_nilvalue(buf: &mut Vec<u8>) {
2501 write_flags(buf, 254, false, false);
2502 }
2503
2504 #[test]
2505 fn unit_test_named_int_vec() {
2506 let mut buf = build_rds_header();
2507
2508 write_flags(&mut buf, 13, true, false);
2510 buf.extend_from_slice(&3i32.to_be_bytes());
2511 buf.extend_from_slice(&10i32.to_be_bytes());
2512 buf.extend_from_slice(&20i32.to_be_bytes());
2513 buf.extend_from_slice(&30i32.to_be_bytes());
2514
2515 write_flags(&mut buf, 2, false, true);
2517 write_flags(&mut buf, 1, false, false);
2519 write_charsxp(&mut buf, "names");
2520 write_flags(&mut buf, 16, false, false);
2522 buf.extend_from_slice(&3i32.to_be_bytes());
2523 write_charsxp(&mut buf, "a");
2524 write_charsxp(&mut buf, "b");
2525 write_charsxp(&mut buf, "c");
2526 write_nilvalue(&mut buf);
2528
2529 let result = unserialize_xdr(&buf).unwrap();
2530 match &result {
2531 RValue::Vector(rv) => {
2532 assert!(
2533 matches!(&rv.inner, Vector::Integer(_)),
2534 "expected integer vector, got {:?}",
2535 rv.inner
2536 );
2537 let names = rv.get_attr("names");
2538 assert!(
2539 names.is_some(),
2540 "expected names attribute, attrs: {:?}",
2541 rv.attrs
2542 );
2543 }
2544 other => panic!("expected Vector, got {:?}", other),
2545 }
2546 }
2547
2548 #[test]
2549 fn unit_test_closure_round_trip() {
2550 use crate::interpreter::environment::Environment;
2551 use crate::parser::ast::{BinaryOp, Expr, Param};
2552
2553 let closure = RValue::Function(RFunction::Closure {
2555 params: vec![Param {
2556 name: "x".to_string(),
2557 default: None,
2558 is_dots: false,
2559 }],
2560 body: Expr::BinaryOp {
2561 op: BinaryOp::Add,
2562 lhs: Box::new(Expr::Symbol("x".to_string())),
2563 rhs: Box::new(Expr::Integer(1)),
2564 },
2565 env: Environment::new_global(),
2566 });
2567
2568 let bytes = serialize_xdr(&closure);
2569 let result = unserialize_xdr(&bytes).unwrap();
2570
2571 match &result {
2572 RValue::Function(RFunction::Closure { params, body, .. }) => {
2573 assert_eq!(params.len(), 1);
2574 assert_eq!(params[0].name, "x");
2575 assert!(params[0].default.is_none());
2576 let deparsed = deparse_expr(body);
2578 assert_eq!(deparsed, "x + 1L");
2579 }
2580 other => panic!("expected Function(Closure), got {:?}", other),
2581 }
2582 }
2583
2584 #[test]
2585 fn unit_test_closure_with_defaults_round_trip() {
2586 use crate::interpreter::environment::Environment;
2587 use crate::parser::ast::{BinaryOp, Expr, Param};
2588
2589 let closure = RValue::Function(RFunction::Closure {
2591 params: vec![
2592 Param {
2593 name: "x".to_string(),
2594 default: None,
2595 is_dots: false,
2596 },
2597 Param {
2598 name: "y".to_string(),
2599 default: Some(Expr::Integer(10)),
2600 is_dots: false,
2601 },
2602 ],
2603 body: Expr::BinaryOp {
2604 op: BinaryOp::Add,
2605 lhs: Box::new(Expr::Symbol("x".to_string())),
2606 rhs: Box::new(Expr::Symbol("y".to_string())),
2607 },
2608 env: Environment::new_global(),
2609 });
2610
2611 let bytes = serialize_xdr(&closure);
2612 let result = unserialize_xdr(&bytes).unwrap();
2613
2614 match &result {
2615 RValue::Function(RFunction::Closure { params, body, .. }) => {
2616 assert_eq!(params.len(), 2);
2617 assert_eq!(params[0].name, "x");
2618 assert!(params[0].default.is_none());
2619 assert_eq!(params[1].name, "y");
2620 assert!(params[1].default.is_some());
2621 let deparsed = deparse_expr(body);
2623 assert_eq!(deparsed, "x + y");
2624 }
2625 other => panic!("expected Function(Closure), got {:?}", other),
2626 }
2627 }
2628
2629 #[test]
2630 fn unit_test_parse_program_deparsed() {
2631 let result = crate::parser::parse_program("x + 1L");
2634 match result {
2635 Ok(expr) => {
2636 assert!(
2637 matches!(&expr, Expr::BinaryOp { .. }),
2638 "expected BinaryOp, got {:?}",
2639 expr
2640 );
2641 }
2642 Err(e) => panic!("parse failed: {:?}", e),
2643 }
2644 }
2645
2646 #[test]
2647 fn unit_test_closure_body_debug() {
2648 use crate::interpreter::environment::Environment;
2649 use crate::parser::ast::{BinaryOp, Expr, Param};
2650
2651 let closure = RValue::Function(RFunction::Closure {
2653 params: vec![Param {
2654 name: "x".to_string(),
2655 default: None,
2656 is_dots: false,
2657 }],
2658 body: Expr::BinaryOp {
2659 op: BinaryOp::Add,
2660 lhs: Box::new(Expr::Symbol("x".to_string())),
2661 rhs: Box::new(Expr::Integer(1)),
2662 },
2663 env: Environment::new_global(),
2664 });
2665
2666 let bytes = serialize_xdr(&closure);
2667 let result = unserialize_xdr(&bytes).unwrap();
2669 match &result {
2670 RValue::Function(RFunction::Closure { body, .. }) => {
2671 let deparsed = deparse_expr(body);
2672 assert!(
2675 !deparsed.starts_with('"'),
2676 "body was stored as string literal instead of being re-parsed: {}",
2677 deparsed
2678 );
2679 }
2680 other => panic!("expected Function(Closure), got {:?}", other),
2681 }
2682 }
2683
2684 #[test]
2685 fn unit_test_strsxp_with_minir_source_attr() {
2686 let mut w = super::XdrWriter::new();
2689 w.buf.extend_from_slice(b"X\n");
2690 w.write_int(2);
2691 w.write_int(0x00040300);
2692 w.write_int(0x00020300);
2693
2694 w.write_flags(STRSXP, true, false);
2696 w.write_length(1);
2697 w.write_charsxp(Some("x + 1L"));
2698 let mut attrs: Attributes = IndexMap::new();
2700 attrs.insert(
2701 "miniR.source".to_string(),
2702 RValue::vec(Vector::Logical(vec![Some(true)].into())),
2703 );
2704 w.write_attributes(&attrs);
2705
2706 let bytes = w.finish();
2707 let result = unserialize_xdr(&bytes).unwrap();
2708 match &result {
2709 RValue::Vector(rv) => {
2710 assert!(
2711 rv.get_attr("miniR.source").is_some(),
2712 "miniR.source attribute missing; attrs: {:?}",
2713 rv.attrs
2714 );
2715 }
2716 other => panic!("expected Vector, got {:?}", other),
2717 }
2718 }
2719
2720 #[test]
2721 fn unit_test_env_singleton_round_trip() {
2722 use crate::interpreter::environment::Environment;
2723
2724 let global = RValue::Environment(Environment::new_global());
2725 let bytes = serialize_xdr(&global);
2726 let result = unserialize_xdr(&bytes).unwrap();
2727 match &result {
2728 RValue::Environment(env) => {
2729 assert_eq!(env.name().as_deref(), Some("R_GlobalEnv"));
2730 }
2731 other => panic!("expected Environment, got {:?}", other),
2732 }
2733
2734 let empty = RValue::Environment(Environment::new_empty());
2735 let bytes = serialize_xdr(&empty);
2736 let result = unserialize_xdr(&bytes).unwrap();
2737 match &result {
2738 RValue::Environment(env) => {
2739 assert_eq!(env.name().as_deref(), Some("R_EmptyEnv"));
2740 }
2741 other => panic!("expected Environment, got {:?}", other),
2742 }
2743 }
2744
2745 #[test]
2746 fn unit_test_simple_int_vec() {
2747 let mut buf = build_rds_header();
2748
2749 write_flags(&mut buf, 13, false, false);
2750 buf.extend_from_slice(&3i32.to_be_bytes());
2751 buf.extend_from_slice(&1i32.to_be_bytes());
2752 buf.extend_from_slice(&2i32.to_be_bytes());
2753 buf.extend_from_slice(&3i32.to_be_bytes());
2754
2755 let result = unserialize_xdr(&buf).unwrap();
2756 match &result {
2757 RValue::Vector(rv) => {
2758 assert!(matches!(&rv.inner, Vector::Integer(_)));
2759 if let Vector::Integer(ints) = &rv.inner {
2760 assert_eq!(ints.len(), 3);
2761 assert_eq!(ints.get_opt(0), Some(1));
2762 assert_eq!(ints.get_opt(1), Some(2));
2763 assert_eq!(ints.get_opt(2), Some(3));
2764 }
2765 }
2766 other => panic!("expected Vector, got {:?}", other),
2767 }
2768 }
2769
2770 #[test]
2773 fn hex_float_roundtrip_normal_values() {
2774 let values = [
2775 0.0,
2776 -0.0,
2777 1.0,
2778 -1.0,
2779 0.1,
2780 0.5,
2781 2.0,
2782 std::f64::consts::PI,
2783 1e-300,
2784 1e300,
2785 f64::EPSILON,
2786 f64::MIN_POSITIVE,
2787 ];
2788 for &v in &values {
2789 let hex = format_hex_float(v);
2790 let parsed = parse_hex_float(&hex).unwrap_or_else(|e| {
2791 panic!("failed to parse hex float '{}' (from {}): {}", hex, v, e)
2792 });
2793 assert_eq!(
2794 v.to_bits(),
2795 parsed.to_bits(),
2796 "hex float roundtrip failed for {}: '{}' parsed to {}",
2797 v,
2798 hex,
2799 parsed
2800 );
2801 }
2802 }
2803
2804 #[test]
2805 fn hex_float_negative_zero() {
2806 let hex = format_hex_float(-0.0);
2807 assert!(hex.starts_with('-'), "negative zero should have minus sign");
2808 let parsed = parse_hex_float(&hex).unwrap();
2809 assert!(parsed.is_sign_negative(), "parsed -0.0 should be negative");
2810 }
2811
2812 #[test]
2813 fn parse_ascii_double_special_values() {
2814 assert_eq!(parse_ascii_double("Inf").unwrap(), f64::INFINITY);
2815 assert_eq!(parse_ascii_double("-Inf").unwrap(), f64::NEG_INFINITY);
2816 assert!(parse_ascii_double("NaN").unwrap().is_nan());
2817 assert_eq!(parse_ascii_double("NA").unwrap().to_bits(), R_NA_REAL_BITS);
2818 }
2819
2820 #[test]
2821 fn ascii_roundtrip_integer_vector() {
2822 let val = RValue::vec(Vector::Integer(
2823 vec![Some(1), Some(2), None, Some(4)].into(),
2824 ));
2825 let bytes = serialize_ascii(&val);
2826 let result = unserialize_xdr(&bytes).unwrap();
2827 match &result {
2828 RValue::Vector(rv) => {
2829 if let Vector::Integer(ints) = &rv.inner {
2830 assert_eq!(ints.len(), 4);
2831 assert_eq!(ints.get_opt(0), Some(1));
2832 assert_eq!(ints.get_opt(1), Some(2));
2833 assert_eq!(ints.get_opt(2), None);
2834 assert_eq!(ints.get_opt(3), Some(4));
2835 } else {
2836 panic!("expected Integer vector");
2837 }
2838 }
2839 other => panic!("expected Vector, got {:?}", other),
2840 }
2841 }
2842
2843 #[test]
2844 fn ascii_roundtrip_double_vector() {
2845 let val = RValue::vec(Vector::Double(
2846 vec![Some(0.1), Some(f64::INFINITY), None, Some(-0.0)].into(),
2847 ));
2848 let bytes = serialize_ascii(&val);
2849 let result = unserialize_xdr(&bytes).unwrap();
2850 match &result {
2851 RValue::Vector(rv) => {
2852 if let Vector::Double(dbls) = &rv.inner {
2853 assert_eq!(dbls.len(), 4);
2854 assert_eq!(dbls.get_opt(0), Some(0.1));
2855 assert_eq!(dbls.get_opt(1), Some(f64::INFINITY));
2856 assert_eq!(dbls.get_opt(2), None);
2857 assert_eq!(
2859 dbls.get_opt(3).expect("-0.0 should not be NA").to_bits(),
2860 (-0.0f64).to_bits()
2861 );
2862 } else {
2863 panic!("expected Double vector");
2864 }
2865 }
2866 other => panic!("expected Vector, got {:?}", other),
2867 }
2868 }
2869
2870 #[test]
2871 fn ascii_roundtrip_character_vector() {
2872 let val = RValue::vec(Vector::Character(
2873 vec![Some("hello".to_string()), None, Some("world".to_string())].into(),
2874 ));
2875 let bytes = serialize_ascii(&val);
2876 let result = unserialize_xdr(&bytes).unwrap();
2877 match &result {
2878 RValue::Vector(rv) => {
2879 if let Vector::Character(chars) = &rv.inner {
2880 assert_eq!(chars.len(), 3);
2881 assert_eq!(chars[0], Some("hello".to_string()));
2882 assert_eq!(chars[1], None);
2883 assert_eq!(chars[2], Some("world".to_string()));
2884 } else {
2885 panic!("expected Character vector");
2886 }
2887 }
2888 other => panic!("expected Vector, got {:?}", other),
2889 }
2890 }
2891
2892 #[test]
2893 fn ascii_roundtrip_null() {
2894 let val = RValue::Null;
2895 let bytes = serialize_ascii(&val);
2896 let result = unserialize_xdr(&bytes).unwrap();
2897 assert!(matches!(result, RValue::Null));
2898 }
2899
2900 }