Skip to main content

r/interpreter/builtins/
serialize.rs

1//! GNU R serialization — reading and writing RDS files.
2//!
3//! Implements both the XDR binary format (format 'X') and the ASCII text format
4//! (format 'A') used by `readRDS()`/`saveRDS()`.
5//! See R-ints.texi "Serialization Formats" for the spec.
6
7use crate::interpreter::environment::Environment;
8use crate::interpreter::value::*;
9use crate::parser::ast::{Expr, Param};
10use indexmap::IndexMap;
11use std::fmt::Write as FmtWrite;
12
13// region: constants
14
15/// R's NA_INTEGER is i32::MIN
16const R_NA_INTEGER: i32 = i32::MIN;
17
18/// R's NA_LOGICAL is also i32::MIN in serialization
19const R_NA_LOGICAL: i32 = i32::MIN;
20
21/// R's NA_REAL bit pattern: 0x7FF00000000007A2 (a specific NaN)
22const R_NA_REAL_BITS: u64 = 0x7FF00000000007A2;
23
24// SEXPTYPE codes (must match GNU R's Rinternals.h)
25const NILSXP: u8 = 0;
26const SYMSXP: u8 = 1;
27const LISTSXP: u8 = 2;
28const CLOSXP: u8 = 3;
29const ENVSXP: u8 = 4;
30const PROMSXP: u8 = 5;
31const LANGSXP: u8 = 6;
32const SPECIALSXP: u8 = 7;
33const BUILTINSXP: u8 = 8;
34const CHARSXP: u8 = 9;
35const LGLSXP: u8 = 10;
36const INTSXP: u8 = 13;
37const REALSXP: u8 = 14;
38const CPLXSXP: u8 = 15;
39const STRSXP: u8 = 16;
40const VECSXP: u8 = 19;
41const EXPRSXP: u8 = 20;
42const RAWSXP: u8 = 24;
43
44// Pseudo-SEXPTYPE codes
45const REFSXP: u8 = 255;
46const NILVALUE_SXP: u8 = 254;
47const GLOBALENV_SXP: u8 = 244;
48const BASEENV_SXP: u8 = 243;
49const EMPTYENV_SXP: u8 = 242;
50#[allow(dead_code)]
51const UNBOUNDVALUE_SXP: u8 = 245;
52const MISSINGARG_SXP: u8 = 246;
53const BASENAMESPACE_SXP: u8 = 247;
54const NAMESPACESXP: u8 = 249;
55
56// Flag bits
57const HAS_ATTR_MASK: u32 = 1 << 9;
58const HAS_TAG_MASK: u32 = 1 << 10;
59
60// endregion
61
62// region: XdrReader
63
64/// Cursor-based reader for big-endian (XDR) binary data.
65struct XdrReader<'a> {
66    data: &'a [u8],
67    pos: usize,
68    /// Reference table for back-references (pseudo-SEXPTYPE 255).
69    ref_table: Vec<RValue>,
70}
71
72impl<'a> XdrReader<'a> {
73    fn new(data: &'a [u8]) -> Self {
74        XdrReader {
75            data,
76            pos: 0,
77            ref_table: Vec::new(),
78        }
79    }
80
81    fn remaining(&self) -> usize {
82        self.data.len().saturating_sub(self.pos)
83    }
84
85    fn read_bytes(&mut self, n: usize) -> Result<&'a [u8], RError> {
86        if self.pos + n > self.data.len() {
87            return Err(RError::new(
88                RErrorKind::Other,
89                format!(
90                    "unexpected end of RDS data: need {} bytes at offset {}, have {}",
91                    n,
92                    self.pos,
93                    self.remaining()
94                ),
95            ));
96        }
97        let slice = &self.data[self.pos..self.pos + n];
98        self.pos += n;
99        Ok(slice)
100    }
101
102    /// Read a big-endian i32.
103    fn read_int(&mut self) -> Result<i32, RError> {
104        let bytes = self.read_bytes(4)?;
105        Ok(i32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
106    }
107
108    /// Read a big-endian f64.
109    fn read_double(&mut self) -> Result<f64, RError> {
110        let bytes = self.read_bytes(8)?;
111        Ok(f64::from_be_bytes([
112            bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
113        ]))
114    }
115
116    /// Read a length value, handling long vectors (length == -1 means 64-bit length).
117    fn read_length(&mut self) -> Result<usize, RError> {
118        let len = self.read_int()?;
119        if len >= 0 {
120            Ok(len as usize)
121        } else if len == -1 {
122            // Long vector: next two i32s form a 64-bit length (upper, lower).
123            let upper = self.read_int()? as u32;
124            let lower = self.read_int()? as u32;
125            let long_len = (u64::from(upper) << 32) | u64::from(lower);
126            usize::try_from(long_len).map_err(|_| {
127                RError::new(
128                    RErrorKind::Other,
129                    format!("vector length {} too large for this platform", long_len),
130                )
131            })
132        } else {
133            Err(RError::new(
134                RErrorKind::Other,
135                format!("invalid vector length: {}", len),
136            ))
137        }
138    }
139
140    /// Read a CHARSXP: length + raw bytes. Length -1 means NA_STRING.
141    fn read_charsxp(&mut self) -> Result<Option<String>, RError> {
142        let len = self.read_int()?;
143        if len == -1 {
144            return Ok(None); // NA_STRING
145        }
146        let n = usize::try_from(len).map_err(|_| {
147            RError::new(
148                RErrorKind::Other,
149                format!("invalid CHARSXP length: {}", len),
150            )
151        })?;
152        let bytes = self.read_bytes(n)?;
153        // R strings can be in various encodings; assume UTF-8 or Latin-1.
154        // Try UTF-8 first, fall back to lossy conversion.
155        match std::str::from_utf8(bytes) {
156            Ok(s) => Ok(Some(s.to_string())),
157            Err(_) => {
158                // Latin-1 fallback: each byte maps to its Unicode codepoint
159                let s: String = bytes.iter().map(|&b| b as char).collect();
160                Ok(Some(s))
161            }
162        }
163    }
164
165    /// Register a value in the reference table and return it.
166    fn ref_add(&mut self, value: RValue) -> RValue {
167        self.ref_table.push(value.clone());
168        value
169    }
170
171    /// Look up a reference by 1-based index.
172    fn ref_get(&self, index: usize) -> Result<RValue, RError> {
173        if index == 0 || index > self.ref_table.len() {
174            return Err(RError::new(
175                RErrorKind::Other,
176                format!(
177                    "invalid reference index {} (table has {} entries)",
178                    index,
179                    self.ref_table.len()
180                ),
181            ));
182        }
183        Ok(self.ref_table[index - 1].clone())
184    }
185
186    /// Read flags integer and extract SEXPTYPE, has-attr, has-tag bits.
187    fn read_flags(&mut self) -> Result<(u8, bool, bool, u32), RError> {
188        let flags = self.read_int()? as u32;
189        let sxp_type = (flags & 0xFF) as u8;
190        let has_attr = flags & HAS_ATTR_MASK != 0;
191        let has_tag = flags & HAS_TAG_MASK != 0;
192        Ok((sxp_type, has_attr, has_tag, flags))
193    }
194
195    /// Read attributes stored as a pairlist, returning an Attributes map.
196    fn read_attributes(&mut self) -> Result<Attributes, RError> {
197        let mut attrs = IndexMap::new();
198        // Attributes are stored as a pairlist (LISTSXP chain).
199        // Each node has: flags, tag (symbol), car (value), then cdr (next node or NILVALUE).
200        loop {
201            let (sxp_type, _has_attr, has_tag, _flags) = self.read_flags()?;
202            match sxp_type {
203                LISTSXP => {
204                    let tag_name = if has_tag {
205                        self.read_item_as_symbol()?
206                    } else {
207                        String::new()
208                    };
209                    let value = self.read_item()?;
210                    if !tag_name.is_empty() {
211                        attrs.insert(tag_name, value);
212                    }
213                    // CDR is the next node — continue the loop via recursion,
214                    // but we handle it iteratively by reading the next flags.
215                    // Actually, the CDR is explicitly written. We need to check
216                    // if the next item is NILVALUE_SXP to stop.
217                    // Peek at what we just consumed for CDR... Actually the pairlist
218                    // structure means after reading TAG+CAR, we need to read CDR.
219                    // But CDR is the next pairlist node, which is the next iteration.
220                    // This doesn't work right - we need to read the full pairlist recursively.
221                }
222                NILVALUE_SXP => break,
223                _ => {
224                    return Err(RError::new(
225                        RErrorKind::Other,
226                        format!(
227                            "unexpected SEXPTYPE {} in attribute pairlist (expected LISTSXP or NILVALUE)",
228                            sxp_type
229                        ),
230                    ));
231                }
232            }
233        }
234        Ok(attrs)
235    }
236
237    /// Read one serialized item recursively.
238    fn read_item(&mut self) -> Result<RValue, RError> {
239        let (sxp_type, has_attr, has_tag, flags) = self.read_flags()?;
240        self.read_item_inner(sxp_type, has_attr, has_tag, flags)
241    }
242
243    fn read_item_inner(
244        &mut self,
245        sxp_type: u8,
246        has_attr: bool,
247        has_tag: bool,
248        flags: u32,
249    ) -> Result<RValue, RError> {
250        match sxp_type {
251            NILVALUE_SXP => Ok(RValue::Null),
252            NILSXP => Ok(RValue::Null),
253
254            EMPTYENV_SXP => {
255                let env = Environment::new_empty();
256                let val = RValue::Environment(env);
257                Ok(self.ref_add(val))
258            }
259
260            BASEENV_SXP | BASENAMESPACE_SXP => {
261                // Base env and base namespace are both represented as a named
262                // environment. We use a simple named env as a stand-in.
263                let env = Environment::new_empty();
264                env.set_name("base".to_string());
265                let val = RValue::Environment(env);
266                Ok(self.ref_add(val))
267            }
268
269            GLOBALENV_SXP => {
270                let env = Environment::new_global();
271                let val = RValue::Environment(env);
272                Ok(self.ref_add(val))
273            }
274
275            MISSINGARG_SXP => Ok(RValue::Null),
276
277            NAMESPACESXP => {
278                // Namespace: read a STRSXP with the namespace info,
279                // register as ref, return Null placeholder.
280                let _info = self.read_item()?;
281                let val = RValue::Null;
282                Ok(self.ref_add(val))
283            }
284
285            REFSXP => {
286                // Reference: the flags contain the packed index.
287                // If the index in the flags field is 0, read an explicit integer.
288                let ref_index = (flags >> 8) as usize;
289                if ref_index == 0 {
290                    let idx = self.read_int()? as usize;
291                    self.ref_get(idx)
292                } else {
293                    self.ref_get(ref_index)
294                }
295            }
296
297            SYMSXP => {
298                // Symbol: read a CHARSXP for the name.
299                let (inner_type, _ia, _it, inner_flags) = self.read_flags()?;
300                let name = if inner_type == CHARSXP {
301                    self.read_charsxp_with_flags(inner_flags)?
302                        .unwrap_or_default()
303                } else {
304                    // Unexpected — try to read as item and convert
305                    return Err(RError::new(
306                        RErrorKind::Other,
307                        format!("expected CHARSXP inside SYMSXP, got type {}", inner_type),
308                    ));
309                };
310                let val = RValue::vec(Vector::Character(vec![Some(name)].into()));
311                Ok(self.ref_add(val))
312            }
313
314            CHARSXP => {
315                let s = self.read_charsxp_with_flags(flags)?;
316                Ok(match s {
317                    Some(s) => RValue::vec(Vector::Character(vec![Some(s)].into())),
318                    None => RValue::Null,
319                })
320            }
321
322            LGLSXP => {
323                let len = self.read_length()?;
324                let mut values = Vec::with_capacity(len);
325                for _ in 0..len {
326                    let raw = self.read_int()?;
327                    if raw == R_NA_LOGICAL {
328                        values.push(None);
329                    } else {
330                        values.push(Some(raw != 0));
331                    }
332                }
333                let mut rv = RVector::from(Vector::Logical(values.into()));
334                if has_attr {
335                    let attrs = self.read_attributes()?;
336                    rv.attrs = Some(Box::new(attrs));
337                }
338                Ok(RValue::Vector(rv))
339            }
340
341            INTSXP => {
342                let len = self.read_length()?;
343                let mut values: Vec<Option<i64>> = Vec::with_capacity(len);
344                for _ in 0..len {
345                    let raw = self.read_int()?;
346                    if raw == R_NA_INTEGER {
347                        values.push(None);
348                    } else {
349                        values.push(Some(i64::from(raw)));
350                    }
351                }
352                let mut rv = RVector::from(Vector::Integer(values.into()));
353                if has_attr {
354                    let attrs = self.read_attributes()?;
355                    rv.attrs = Some(Box::new(attrs));
356                }
357                Ok(RValue::Vector(rv))
358            }
359
360            REALSXP => {
361                let len = self.read_length()?;
362                let mut values: Vec<Option<f64>> = Vec::with_capacity(len);
363                for _ in 0..len {
364                    let val = self.read_double()?;
365                    if val.to_bits() == R_NA_REAL_BITS {
366                        values.push(None);
367                    } else {
368                        values.push(Some(val));
369                    }
370                }
371                let mut rv = RVector::from(Vector::Double(values.into()));
372                if has_attr {
373                    let attrs = self.read_attributes()?;
374                    rv.attrs = Some(Box::new(attrs));
375                }
376                Ok(RValue::Vector(rv))
377            }
378
379            CPLXSXP => {
380                let len = self.read_length()?;
381                let mut values: Vec<Option<num_complex::Complex64>> = Vec::with_capacity(len);
382                for _ in 0..len {
383                    let re = self.read_double()?;
384                    let im = self.read_double()?;
385                    if re.to_bits() == R_NA_REAL_BITS || im.to_bits() == R_NA_REAL_BITS {
386                        values.push(None);
387                    } else {
388                        values.push(Some(num_complex::Complex64::new(re, im)));
389                    }
390                }
391                let mut rv = RVector::from(Vector::Complex(values.into()));
392                if has_attr {
393                    let attrs = self.read_attributes()?;
394                    rv.attrs = Some(Box::new(attrs));
395                }
396                Ok(RValue::Vector(rv))
397            }
398
399            STRSXP => {
400                let len = self.read_length()?;
401                let mut values: Vec<Option<String>> = Vec::with_capacity(len);
402                for _ in 0..len {
403                    // Each element is a CHARSXP.
404                    let (inner_type, _ia, _it, inner_flags) = self.read_flags()?;
405                    if inner_type == CHARSXP {
406                        values.push(self.read_charsxp_with_flags(inner_flags)?);
407                    } else if inner_type == NILVALUE_SXP {
408                        values.push(None);
409                    } else {
410                        return Err(RError::new(
411                            RErrorKind::Other,
412                            format!(
413                                "expected CHARSXP in STRSXP element, got type {}",
414                                inner_type
415                            ),
416                        ));
417                    }
418                }
419                let mut rv = RVector::from(Vector::Character(values.into()));
420                if has_attr {
421                    let attrs = self.read_attributes()?;
422                    rv.attrs = Some(Box::new(attrs));
423                }
424                Ok(RValue::Vector(rv))
425            }
426
427            RAWSXP => {
428                let len = self.read_length()?;
429                let bytes = self.read_bytes(len)?.to_vec();
430                let mut rv = RVector::from(Vector::Raw(bytes));
431                if has_attr {
432                    let attrs = self.read_attributes()?;
433                    rv.attrs = Some(Box::new(attrs));
434                }
435                Ok(RValue::Vector(rv))
436            }
437
438            VECSXP | EXPRSXP => {
439                let len = self.read_length()?;
440                let mut elements = Vec::with_capacity(len);
441                for _ in 0..len {
442                    let val = self.read_item()?;
443                    elements.push((None, val));
444                }
445                let mut list = RList::new(elements);
446                if has_attr {
447                    let attrs = self.read_attributes()?;
448                    // Extract "names" attribute and apply to list elements.
449                    if let Some(names_val) = attrs.get("names") {
450                        if let Some(names_vec) = names_val.as_vector() {
451                            let names = names_vec.to_characters();
452                            for (i, name) in names.iter().enumerate() {
453                                if i < list.values.len() {
454                                    list.values[i].0 = name.clone();
455                                }
456                            }
457                        }
458                    }
459                    // Store remaining attributes (excluding names, which we consumed).
460                    let mut remaining: Attributes =
461                        attrs.into_iter().filter(|(k, _)| k != "names").collect();
462                    if !remaining.is_empty() {
463                        // Re-add names to attrs too — R keeps them there
464                        if let Some(first_name) = list.values.first() {
465                            if first_name.0.is_some() {
466                                let names: Vec<Option<String>> =
467                                    list.values.iter().map(|(n, _)| n.clone()).collect();
468                                remaining.insert(
469                                    "names".to_string(),
470                                    RValue::vec(Vector::Character(names.into())),
471                                );
472                            }
473                        }
474                        list.attrs = Some(Box::new(remaining));
475                    }
476                }
477                Ok(RValue::List(list))
478            }
479
480            LISTSXP => {
481                // Pairlist: TAG (optional) + CAR + CDR chain.
482                // Convert to a named list.
483                self.read_pairlist_as_list(has_attr, has_tag, flags)
484            }
485
486            CLOSXP => {
487                // Closure: environment + formals (pairlist) + body.
488                let env_val = self.read_item()?;
489                let formals_val = self.read_item()?;
490                let body_val = self.read_item()?;
491
492                // Extract environment (fall back to an empty env if not available).
493                let env = match env_val {
494                    RValue::Environment(e) => e,
495                    _ => Environment::new_global(),
496                };
497
498                // Convert formals pairlist to Vec<Param>.
499                let params = self.pairlist_to_params(&formals_val);
500
501                // Convert body to an Expr.
502                // If the body was serialized as a LANGSXP, it was read as a list;
503                // we try to reconstruct the AST from a deparsed string attribute
504                // or fall back to a deparsed string body.
505                let body = self.rvalue_to_body(&body_val);
506
507                if has_attr {
508                    let _attrs = self.read_attributes()?;
509                }
510
511                Ok(RValue::Function(RFunction::Closure { params, body, env }))
512            }
513
514            ENVSXP => {
515                // Non-singleton environment: locked flag + enclos + frame + hashtab + attrs.
516                // Register a placeholder first so back-references to this env work.
517                let env = Environment::new_empty();
518                let val = RValue::Environment(env.clone());
519                let val = self.ref_add(val);
520
521                let locked = self.read_int()?;
522                let _enclos = self.read_item()?; // enclosing env
523                let frame = self.read_item()?; // frame (pairlist of bindings)
524                let _hashtab = self.read_item()?; // hash table (VECSXP or NULL)
525
526                if has_attr {
527                    let _attrs = self.read_attributes()?;
528                }
529
530                if locked != 0 {
531                    env.lock(false);
532                }
533
534                // Set enclosing environment if available.
535                if let RValue::Environment(parent) = &_enclos {
536                    env.set_parent(Some(parent.clone()));
537                }
538
539                // Populate bindings from the frame pairlist.
540                if let RValue::List(list) = &frame {
541                    for (name, value) in &list.values {
542                        if let Some(n) = name {
543                            env.set(n.clone(), value.clone());
544                        }
545                    }
546                }
547
548                Ok(val)
549            }
550
551            PROMSXP => {
552                // Promise: environment + value + expr.
553                // We skip the promise wrapper and return the value (or expr if unforced).
554                let _env = self.read_item()?;
555                let value = self.read_item()?;
556                let expr = self.read_item()?;
557                if has_attr {
558                    let _attrs = self.read_attributes()?;
559                }
560                // If the value is UNBOUNDVALUE_SXP (read as Null), use the expression.
561                if value.is_null() {
562                    Ok(expr)
563                } else {
564                    Ok(value)
565                }
566            }
567
568            SPECIALSXP | BUILTINSXP => {
569                // Builtin/special functions: stored as a length + name string.
570                let len = self.read_length()?;
571                let name_bytes = self.read_bytes(len)?;
572                let name = String::from_utf8_lossy(name_bytes).to_string();
573                // We can't reconstruct builtin function pointers, so return a
574                // placeholder symbol indicating the builtin name.
575                Ok(RValue::vec(Vector::Character(
576                    vec![Some(format!(".Primitive(\"{}\")", name))].into(),
577                )))
578            }
579
580            LANGSXP => {
581                // Language object: pairlist where CAR is function, CDR is args.
582                // Read as a pairlist and then try to reconstruct a Language/Expr.
583                let list_val = self.read_pairlist_as_list(has_attr, has_tag, flags)?;
584                // Try to convert the pairlist representation to an Expr.
585                if let Some(expr) = self.list_to_call_expr(&list_val) {
586                    Ok(RValue::Language(Language::new(expr)))
587                } else {
588                    // Fall back to keeping as a list if we can't reconstruct.
589                    Ok(list_val)
590                }
591            }
592
593            // S4 object (type 25)
594            25 => {
595                // OBJSXP / S4: read attributes only.
596                let attrs = if has_attr {
597                    self.read_attributes()?
598                } else {
599                    IndexMap::new()
600                };
601                let mut list = RList::new(Vec::new());
602                if !attrs.is_empty() {
603                    list.attrs = Some(Box::new(attrs));
604                }
605                Ok(RValue::List(list))
606            }
607
608            _ => Err(RError::new(
609                RErrorKind::Other,
610                format!(
611                    "unsupported SEXPTYPE {} at offset {} in RDS data",
612                    sxp_type,
613                    self.pos - 4
614                ),
615            )),
616        }
617    }
618
619    /// Read a CHARSXP given that the flags have already been read.
620    fn read_charsxp_with_flags(&mut self, _flags: u32) -> Result<Option<String>, RError> {
621        self.read_charsxp()
622    }
623
624    /// Read an item and extract it as a symbol name (string).
625    fn read_item_as_symbol(&mut self) -> Result<String, RError> {
626        let val = self.read_item()?;
627        match &val {
628            RValue::Vector(rv) => match &rv.inner {
629                Vector::Character(c) => Ok(c.first().and_then(|s| s.clone()).unwrap_or_default()),
630                _ => Ok(String::new()),
631            },
632            _ => Ok(String::new()),
633        }
634    }
635
636    /// Read a pairlist (LISTSXP chain) and convert to RList.
637    fn read_pairlist_as_list(
638        &mut self,
639        has_attr: bool,
640        has_tag: bool,
641        _flags: u32,
642    ) -> Result<RValue, RError> {
643        let mut elements = Vec::new();
644
645        // Read the first node's tag + car.
646        let tag = if has_tag {
647            Some(self.read_item_as_symbol()?)
648        } else {
649            None
650        };
651        let car = self.read_item()?;
652        elements.push((tag, car));
653
654        // Read CDR chain.
655        loop {
656            let (sxp_type, _has_attr_cdr, has_tag_cdr, _cdr_flags) = self.read_flags()?;
657            match sxp_type {
658                LISTSXP => {
659                    let tag = if has_tag_cdr {
660                        Some(self.read_item_as_symbol()?)
661                    } else {
662                        None
663                    };
664                    let car = self.read_item()?;
665                    elements.push((tag, car));
666                }
667                NILVALUE_SXP => break,
668                _ => {
669                    // CDR is a non-pairlist value (unusual but valid).
670                    // Read it and store as unnamed.
671                    let val =
672                        self.read_item_inner(sxp_type, _has_attr_cdr, has_tag_cdr, _cdr_flags)?;
673                    elements.push((None, val));
674                    break;
675                }
676            }
677        }
678
679        let mut list = RList::new(elements);
680        if has_attr {
681            let attrs = self.read_attributes()?;
682            list.attrs = Some(Box::new(attrs));
683        }
684        Ok(RValue::List(list))
685    }
686
687    /// Convert a pairlist (read as RList) into function parameters.
688    ///
689    /// Each pairlist node has TAG = param name and CAR = default value.
690    /// MISSINGARG_SXP becomes a parameter with no default.
691    fn pairlist_to_params(&self, val: &RValue) -> Vec<Param> {
692        match val {
693            RValue::Null => Vec::new(),
694            RValue::List(list) => list
695                .values
696                .iter()
697                .map(|(name, default_val)| {
698                    let param_name = name.clone().unwrap_or_default();
699                    let is_dots = param_name == "...";
700                    let default = if default_val.is_null() {
701                        None
702                    } else {
703                        // Try to recover the default expression from the value.
704                        Some(self.rvalue_to_body(default_val))
705                    };
706                    Param {
707                        name: param_name,
708                        default,
709                        is_dots,
710                    }
711                })
712                .collect(),
713            _ => Vec::new(),
714        }
715    }
716
717    /// Best-effort conversion of an RValue to an Expr for use as a function body
718    /// or default value.
719    ///
720    /// If the value is a Language object, use its inner Expr directly.
721    /// If it's a scalar literal, convert to the corresponding Expr literal.
722    /// If it has a "miniR.source" attribute, re-parse the deparsed source string.
723    /// Otherwise deparse to a string and wrap as a symbol (identifier).
724    fn rvalue_to_body(&self, val: &RValue) -> Expr {
725        match val {
726            RValue::Null => Expr::Null,
727            RValue::Language(lang) => (*lang.inner).clone(),
728            RValue::Vector(rv) => {
729                // Check for miniR.source attribute — indicates a deparsed expr string.
730                if rv.get_attr("miniR.source").is_some() {
731                    if let Vector::Character(vals) = &rv.inner {
732                        if let Some(Some(source)) = vals.first() {
733                            if let Ok(parsed) = crate::parser::parse_program(source) {
734                                return match parsed {
735                                    Expr::Program(mut exprs) if exprs.len() == 1 => exprs.remove(0),
736                                    Expr::Program(exprs) => Expr::Block(exprs),
737                                    other => other,
738                                };
739                            }
740                            // Parse failed — fall through to string literal.
741                        }
742                    }
743                }
744                match &rv.inner {
745                    Vector::Logical(vals) if vals.len() == 1 => match vals.first() {
746                        Some(Some(b)) => Expr::Bool(*b),
747                        _ => Expr::Na(crate::parser::ast::NaType::Logical),
748                    },
749                    Vector::Integer(vals) if vals.len() == 1 => match vals.first_opt() {
750                        Some(i) => Expr::Integer(i),
751                        _ => Expr::Na(crate::parser::ast::NaType::Integer),
752                    },
753                    Vector::Double(vals) if vals.len() == 1 => match vals.first_opt() {
754                        Some(d) => {
755                            if d == f64::INFINITY {
756                                Expr::Inf
757                            } else if d.is_nan() {
758                                Expr::NaN
759                            } else {
760                                Expr::Double(d)
761                            }
762                        }
763                        _ => Expr::Na(crate::parser::ast::NaType::Real),
764                    },
765                    Vector::Character(vals) if vals.len() == 1 => match vals.first() {
766                        Some(Some(s)) => Expr::String(s.clone()),
767                        _ => Expr::Na(crate::parser::ast::NaType::Character),
768                    },
769                    _ => {
770                        // Multi-element vectors: deparse as a symbol reference.
771                        let deparsed = format!("{}", val);
772                        Expr::Symbol(deparsed)
773                    }
774                }
775            }
776            RValue::Function(RFunction::Closure { params, body, .. }) => Expr::Function {
777                params: params.clone(),
778                body: Box::new(body.clone()),
779            },
780            _ => {
781                let deparsed = format!("{}", val);
782                Expr::Symbol(deparsed)
783            }
784        }
785    }
786
787    /// Try to convert a LANGSXP pairlist (read as an RList) into a Call Expr.
788    ///
789    /// The first element is the function (usually a symbol), the rest are arguments.
790    fn list_to_call_expr(&self, val: &RValue) -> Option<Expr> {
791        let list = match val {
792            RValue::List(l) => l,
793            _ => return None,
794        };
795        if list.values.is_empty() {
796            return None;
797        }
798
799        let (_, func_val) = &list.values[0];
800        let func_expr = match func_val {
801            RValue::Vector(rv) => match &rv.inner {
802                Vector::Character(c) => {
803                    let name = c.first().and_then(|s| s.clone()).unwrap_or_default();
804                    Expr::Symbol(name)
805                }
806                _ => return None,
807            },
808            RValue::Language(lang) => (*lang.inner).clone(),
809            _ => return None,
810        };
811
812        let args: Vec<crate::parser::ast::Arg> = list.values[1..]
813            .iter()
814            .map(|(name, val)| crate::parser::ast::Arg {
815                name: name.clone(),
816                value: Some(self.rvalue_to_body(val)),
817            })
818            .collect();
819
820        Some(Expr::Call {
821            func: Box::new(func_expr),
822            args,
823            span: None,
824        })
825    }
826}
827
828// endregion
829
830// region: AsciiReader
831
832/// Line-oriented reader for the ASCII serialization format (format 'A').
833///
834/// In R's ASCII format, every primitive is written as a line of text:
835/// - Integers: decimal on their own line
836/// - Doubles: hex float (%a) or "NA", "Inf", "-Inf", "NaN"
837/// - Strings (CHARSXP): length on one line, then the raw bytes
838/// - Flags/types: decimal integer on one line
839struct AsciiReader<'a> {
840    data: &'a [u8],
841    pos: usize,
842    /// Reference table for back-references (pseudo-SEXPTYPE 255).
843    ref_table: Vec<RValue>,
844}
845
846impl<'a> AsciiReader<'a> {
847    fn new(data: &'a [u8]) -> Self {
848        AsciiReader {
849            data,
850            pos: 0,
851            ref_table: Vec::new(),
852        }
853    }
854
855    /// Read bytes until the next newline (or end of data), returning the line
856    /// content without the trailing newline.
857    fn read_line(&mut self) -> Result<&'a str, RError> {
858        let start = self.pos;
859        while self.pos < self.data.len() && self.data[self.pos] != b'\n' {
860            self.pos += 1;
861        }
862        let line_bytes = &self.data[start..self.pos];
863        // Skip the newline
864        if self.pos < self.data.len() {
865            self.pos += 1;
866        }
867        std::str::from_utf8(line_bytes).map_err(|e| {
868            RError::new(
869                RErrorKind::Other,
870                format!("invalid UTF-8 in ASCII RDS at offset {}: {}", start, e),
871            )
872        })
873    }
874
875    /// Read a line and parse it as an i32.
876    fn read_int(&mut self) -> Result<i32, RError> {
877        let line = self.read_line()?.trim();
878        line.parse::<i32>().map_err(|e| {
879            RError::new(
880                RErrorKind::Other,
881                format!("expected integer in ASCII RDS, got '{}': {}", line, e),
882            )
883        })
884    }
885
886    /// Read a double from a line. R writes doubles using %a (hex float) format,
887    /// or special values "NA", "Inf", "-Inf", "NaN".
888    fn read_double(&mut self) -> Result<f64, RError> {
889        let line = self.read_line()?.trim().to_string();
890        parse_ascii_double(&line)
891    }
892
893    /// Read a length value (same format as int, but interpreted as usize).
894    fn read_length(&mut self) -> Result<usize, RError> {
895        let len = self.read_int()?;
896        if len >= 0 {
897            Ok(len as usize)
898        } else if len == -1 {
899            // Long vector: two more ints forming a 64-bit length
900            let upper = self.read_int()? as u32;
901            let lower = self.read_int()? as u32;
902            let long_len = (u64::from(upper) << 32) | u64::from(lower);
903            usize::try_from(long_len).map_err(|_| {
904                RError::new(
905                    RErrorKind::Other,
906                    format!("vector length {} too large for this platform", long_len),
907                )
908            })
909        } else {
910            Err(RError::new(
911                RErrorKind::Other,
912                format!("invalid vector length: {}", len),
913            ))
914        }
915    }
916
917    /// Read a CHARSXP in ASCII format: length line, then that many bytes of content.
918    /// Length -1 means NA_STRING.
919    fn read_charsxp(&mut self) -> Result<Option<String>, RError> {
920        let len = self.read_int()?;
921        if len == -1 {
922            return Ok(None); // NA_STRING
923        }
924        let n = usize::try_from(len).map_err(|_| {
925            RError::new(
926                RErrorKind::Other,
927                format!("invalid CHARSXP length: {}", len),
928            )
929        })?;
930        // Read exactly n bytes, then skip the trailing newline
931        if self.pos + n > self.data.len() {
932            return Err(RError::new(
933                RErrorKind::Other,
934                format!(
935                    "unexpected end of ASCII RDS data: need {} bytes at offset {}, have {}",
936                    n,
937                    self.pos,
938                    self.data.len() - self.pos,
939                ),
940            ));
941        }
942        let bytes = &self.data[self.pos..self.pos + n];
943        self.pos += n;
944        // Skip trailing newline after the string bytes
945        if self.pos < self.data.len() && self.data[self.pos] == b'\n' {
946            self.pos += 1;
947        }
948        match std::str::from_utf8(bytes) {
949            Ok(s) => Ok(Some(s.to_string())),
950            Err(_) => {
951                // Latin-1 fallback
952                let s: String = bytes.iter().map(|&b| b as char).collect();
953                Ok(Some(s))
954            }
955        }
956    }
957
958    /// Register a value in the reference table and return it.
959    fn ref_add(&mut self, value: RValue) -> RValue {
960        self.ref_table.push(value.clone());
961        value
962    }
963
964    /// Look up a reference by 1-based index.
965    fn ref_get(&self, index: usize) -> Result<RValue, RError> {
966        if index == 0 || index > self.ref_table.len() {
967            return Err(RError::new(
968                RErrorKind::Other,
969                format!(
970                    "invalid reference index {} (table has {} entries)",
971                    index,
972                    self.ref_table.len()
973                ),
974            ));
975        }
976        Ok(self.ref_table[index - 1].clone())
977    }
978
979    /// Read flags integer and extract SEXPTYPE, has-attr, has-tag bits.
980    fn read_flags(&mut self) -> Result<(u8, bool, bool, u32), RError> {
981        let flags = self.read_int()? as u32;
982        let sxp_type = (flags & 0xFF) as u8;
983        let has_attr = flags & HAS_ATTR_MASK != 0;
984        let has_tag = flags & HAS_TAG_MASK != 0;
985        Ok((sxp_type, has_attr, has_tag, flags))
986    }
987
988    /// Read attributes stored as a pairlist.
989    fn read_attributes(&mut self) -> Result<Attributes, RError> {
990        let mut attrs = IndexMap::new();
991        loop {
992            let (sxp_type, _has_attr, has_tag, _flags) = self.read_flags()?;
993            match sxp_type {
994                LISTSXP => {
995                    let tag_name = if has_tag {
996                        self.read_item_as_symbol()?
997                    } else {
998                        String::new()
999                    };
1000                    let value = self.read_item()?;
1001                    if !tag_name.is_empty() {
1002                        attrs.insert(tag_name, value);
1003                    }
1004                }
1005                NILVALUE_SXP => break,
1006                _ => {
1007                    return Err(RError::new(
1008                        RErrorKind::Other,
1009                        format!(
1010                            "unexpected SEXPTYPE {} in attribute pairlist (expected LISTSXP or NILVALUE)",
1011                            sxp_type
1012                        ),
1013                    ));
1014                }
1015            }
1016        }
1017        Ok(attrs)
1018    }
1019
1020    /// Read one serialized item recursively.
1021    fn read_item(&mut self) -> Result<RValue, RError> {
1022        let (sxp_type, has_attr, has_tag, flags) = self.read_flags()?;
1023        self.read_item_inner(sxp_type, has_attr, has_tag, flags)
1024    }
1025
1026    fn read_item_inner(
1027        &mut self,
1028        sxp_type: u8,
1029        has_attr: bool,
1030        has_tag: bool,
1031        flags: u32,
1032    ) -> Result<RValue, RError> {
1033        match sxp_type {
1034            NILVALUE_SXP | NILSXP => Ok(RValue::Null),
1035
1036            EMPTYENV_SXP | BASEENV_SXP | GLOBALENV_SXP | BASENAMESPACE_SXP => {
1037                let val = RValue::Null;
1038                Ok(self.ref_add(val))
1039            }
1040
1041            MISSINGARG_SXP => Ok(RValue::Null),
1042
1043            NAMESPACESXP => {
1044                let _info = self.read_item()?;
1045                let val = RValue::Null;
1046                Ok(self.ref_add(val))
1047            }
1048
1049            REFSXP => {
1050                let ref_index = (flags >> 8) as usize;
1051                if ref_index == 0 {
1052                    let idx = self.read_int()? as usize;
1053                    self.ref_get(idx)
1054                } else {
1055                    self.ref_get(ref_index)
1056                }
1057            }
1058
1059            SYMSXP => {
1060                let (inner_type, _ia, _it, _inner_flags) = self.read_flags()?;
1061                let name = if inner_type == CHARSXP {
1062                    self.read_charsxp()?.unwrap_or_default()
1063                } else {
1064                    return Err(RError::new(
1065                        RErrorKind::Other,
1066                        format!("expected CHARSXP inside SYMSXP, got type {}", inner_type),
1067                    ));
1068                };
1069                let val = RValue::vec(Vector::Character(vec![Some(name)].into()));
1070                Ok(self.ref_add(val))
1071            }
1072
1073            CHARSXP => {
1074                let s = self.read_charsxp()?;
1075                Ok(match s {
1076                    Some(s) => RValue::vec(Vector::Character(vec![Some(s)].into())),
1077                    None => RValue::Null,
1078                })
1079            }
1080
1081            LGLSXP => {
1082                let len = self.read_length()?;
1083                let mut values = Vec::with_capacity(len);
1084                for _ in 0..len {
1085                    let raw = self.read_int()?;
1086                    if raw == R_NA_LOGICAL {
1087                        values.push(None);
1088                    } else {
1089                        values.push(Some(raw != 0));
1090                    }
1091                }
1092                let mut rv = RVector::from(Vector::Logical(values.into()));
1093                if has_attr {
1094                    let attrs = self.read_attributes()?;
1095                    rv.attrs = Some(Box::new(attrs));
1096                }
1097                Ok(RValue::Vector(rv))
1098            }
1099
1100            INTSXP => {
1101                let len = self.read_length()?;
1102                let mut values: Vec<Option<i64>> = Vec::with_capacity(len);
1103                for _ in 0..len {
1104                    let raw = self.read_int()?;
1105                    if raw == R_NA_INTEGER {
1106                        values.push(None);
1107                    } else {
1108                        values.push(Some(i64::from(raw)));
1109                    }
1110                }
1111                let mut rv = RVector::from(Vector::Integer(values.into()));
1112                if has_attr {
1113                    let attrs = self.read_attributes()?;
1114                    rv.attrs = Some(Box::new(attrs));
1115                }
1116                Ok(RValue::Vector(rv))
1117            }
1118
1119            REALSXP => {
1120                let len = self.read_length()?;
1121                let mut values: Vec<Option<f64>> = Vec::with_capacity(len);
1122                for _ in 0..len {
1123                    let val = self.read_double()?;
1124                    if val.to_bits() == R_NA_REAL_BITS {
1125                        values.push(None);
1126                    } else {
1127                        values.push(Some(val));
1128                    }
1129                }
1130                let mut rv = RVector::from(Vector::Double(values.into()));
1131                if has_attr {
1132                    let attrs = self.read_attributes()?;
1133                    rv.attrs = Some(Box::new(attrs));
1134                }
1135                Ok(RValue::Vector(rv))
1136            }
1137
1138            CPLXSXP => {
1139                let len = self.read_length()?;
1140                let mut values: Vec<Option<num_complex::Complex64>> = Vec::with_capacity(len);
1141                for _ in 0..len {
1142                    let re = self.read_double()?;
1143                    let im = self.read_double()?;
1144                    if re.to_bits() == R_NA_REAL_BITS || im.to_bits() == R_NA_REAL_BITS {
1145                        values.push(None);
1146                    } else {
1147                        values.push(Some(num_complex::Complex64::new(re, im)));
1148                    }
1149                }
1150                let mut rv = RVector::from(Vector::Complex(values.into()));
1151                if has_attr {
1152                    let attrs = self.read_attributes()?;
1153                    rv.attrs = Some(Box::new(attrs));
1154                }
1155                Ok(RValue::Vector(rv))
1156            }
1157
1158            STRSXP => {
1159                let len = self.read_length()?;
1160                let mut values: Vec<Option<String>> = Vec::with_capacity(len);
1161                for _ in 0..len {
1162                    let (inner_type, _ia, _it, _inner_flags) = self.read_flags()?;
1163                    if inner_type == CHARSXP {
1164                        values.push(self.read_charsxp()?);
1165                    } else if inner_type == NILVALUE_SXP {
1166                        values.push(None);
1167                    } else {
1168                        return Err(RError::new(
1169                            RErrorKind::Other,
1170                            format!(
1171                                "expected CHARSXP in STRSXP element, got type {}",
1172                                inner_type
1173                            ),
1174                        ));
1175                    }
1176                }
1177                let mut rv = RVector::from(Vector::Character(values.into()));
1178                if has_attr {
1179                    let attrs = self.read_attributes()?;
1180                    rv.attrs = Some(Box::new(attrs));
1181                }
1182                Ok(RValue::Vector(rv))
1183            }
1184
1185            RAWSXP => {
1186                let len = self.read_length()?;
1187                // In ASCII format, raw bytes are written as hex pairs, two chars per byte
1188                let hex_line = self.read_line()?;
1189                let hex = hex_line.trim();
1190                let mut bytes = Vec::with_capacity(len);
1191                let mut i = 0;
1192                while i + 1 < hex.len() && bytes.len() < len {
1193                    let byte = u8::from_str_radix(&hex[i..i + 2], 16).map_err(|e| {
1194                        RError::new(
1195                            RErrorKind::Other,
1196                            format!("invalid hex byte '{}' in RAWSXP: {}", &hex[i..i + 2], e),
1197                        )
1198                    })?;
1199                    bytes.push(byte);
1200                    i += 2;
1201                }
1202                let mut rv = RVector::from(Vector::Raw(bytes));
1203                if has_attr {
1204                    let attrs = self.read_attributes()?;
1205                    rv.attrs = Some(Box::new(attrs));
1206                }
1207                Ok(RValue::Vector(rv))
1208            }
1209
1210            VECSXP | EXPRSXP => {
1211                let len = self.read_length()?;
1212                let mut elements = Vec::with_capacity(len);
1213                for _ in 0..len {
1214                    let val = self.read_item()?;
1215                    elements.push((None, val));
1216                }
1217                let mut list = RList::new(elements);
1218                if has_attr {
1219                    let attrs = self.read_attributes()?;
1220                    if let Some(names_val) = attrs.get("names") {
1221                        if let Some(names_vec) = names_val.as_vector() {
1222                            let names = names_vec.to_characters();
1223                            for (i, name) in names.iter().enumerate() {
1224                                if i < list.values.len() {
1225                                    list.values[i].0 = name.clone();
1226                                }
1227                            }
1228                        }
1229                    }
1230                    let mut remaining: Attributes =
1231                        attrs.into_iter().filter(|(k, _)| k != "names").collect();
1232                    if !remaining.is_empty() {
1233                        if let Some(first_name) = list.values.first() {
1234                            if first_name.0.is_some() {
1235                                let names: Vec<Option<String>> =
1236                                    list.values.iter().map(|(n, _)| n.clone()).collect();
1237                                remaining.insert(
1238                                    "names".to_string(),
1239                                    RValue::vec(Vector::Character(names.into())),
1240                                );
1241                            }
1242                        }
1243                        list.attrs = Some(Box::new(remaining));
1244                    }
1245                }
1246                Ok(RValue::List(list))
1247            }
1248
1249            LISTSXP => self.read_pairlist_as_list(has_attr, has_tag, flags),
1250
1251            CLOSXP => {
1252                let _env = self.read_item()?;
1253                let _formals = self.read_item()?;
1254                let _body = self.read_item()?;
1255                let val = RValue::Null;
1256                if has_attr {
1257                    let _attrs = self.read_attributes()?;
1258                }
1259                Ok(val)
1260            }
1261
1262            LANGSXP => self.read_pairlist_as_list(has_attr, has_tag, flags),
1263
1264            // S4 object (type 25)
1265            25 => {
1266                let attrs = if has_attr {
1267                    self.read_attributes()?
1268                } else {
1269                    IndexMap::new()
1270                };
1271                let mut list = RList::new(Vec::new());
1272                if !attrs.is_empty() {
1273                    list.attrs = Some(Box::new(attrs));
1274                }
1275                Ok(RValue::List(list))
1276            }
1277
1278            _ => Err(RError::new(
1279                RErrorKind::Other,
1280                format!(
1281                    "unsupported SEXPTYPE {} at offset {} in ASCII RDS data",
1282                    sxp_type, self.pos
1283                ),
1284            )),
1285        }
1286    }
1287
1288    /// Read an item and extract it as a symbol name (string).
1289    fn read_item_as_symbol(&mut self) -> Result<String, RError> {
1290        let val = self.read_item()?;
1291        match &val {
1292            RValue::Vector(rv) => match &rv.inner {
1293                Vector::Character(c) => Ok(c.first().and_then(|s| s.clone()).unwrap_or_default()),
1294                _ => Ok(String::new()),
1295            },
1296            _ => Ok(String::new()),
1297        }
1298    }
1299
1300    /// Read a pairlist (LISTSXP chain) and convert to RList.
1301    fn read_pairlist_as_list(
1302        &mut self,
1303        has_attr: bool,
1304        has_tag: bool,
1305        _flags: u32,
1306    ) -> Result<RValue, RError> {
1307        let mut elements = Vec::new();
1308
1309        let tag = if has_tag {
1310            Some(self.read_item_as_symbol()?)
1311        } else {
1312            None
1313        };
1314        let car = self.read_item()?;
1315        elements.push((tag, car));
1316
1317        loop {
1318            let (sxp_type, _has_attr_cdr, has_tag_cdr, _cdr_flags) = self.read_flags()?;
1319            match sxp_type {
1320                LISTSXP => {
1321                    let tag = if has_tag_cdr {
1322                        Some(self.read_item_as_symbol()?)
1323                    } else {
1324                        None
1325                    };
1326                    let car = self.read_item()?;
1327                    elements.push((tag, car));
1328                }
1329                NILVALUE_SXP => break,
1330                _ => {
1331                    let val =
1332                        self.read_item_inner(sxp_type, _has_attr_cdr, has_tag_cdr, _cdr_flags)?;
1333                    elements.push((None, val));
1334                    break;
1335                }
1336            }
1337        }
1338
1339        let mut list = RList::new(elements);
1340        if has_attr {
1341            let attrs = self.read_attributes()?;
1342            list.attrs = Some(Box::new(attrs));
1343        }
1344        Ok(RValue::List(list))
1345    }
1346}
1347
1348/// Parse a double from R's ASCII serialization format.
1349///
1350/// R writes doubles using the C `%a` hex-float format (e.g. `0x1.5p+5`),
1351/// or special strings "NA", "Inf", "-Inf", "NaN".
1352fn parse_ascii_double(s: &str) -> Result<f64, RError> {
1353    match s {
1354        "NA" => Ok(f64::from_bits(R_NA_REAL_BITS)),
1355        "Inf" => Ok(f64::INFINITY),
1356        "-Inf" => Ok(f64::NEG_INFINITY),
1357        "NaN" => Ok(f64::NAN),
1358        _ if s.starts_with("0x") || s.starts_with("-0x") => parse_hex_float(s),
1359        _ => s.parse::<f64>().map_err(|e| {
1360            RError::new(
1361                RErrorKind::Other,
1362                format!("failed to parse double '{}': {}", s, e),
1363            )
1364        }),
1365    }
1366}
1367
1368/// Parse a C-style hex float string like "0x1.999999999999ap-4" into f64.
1369///
1370/// Format: [+-]0x<hex_mantissa>p[+-]<decimal_exponent>
1371fn parse_hex_float(s: &str) -> Result<f64, RError> {
1372    let make_err = || RError::new(RErrorKind::Other, format!("invalid hex float: '{}'", s));
1373
1374    let (negative, rest) = if let Some(r) = s.strip_prefix('-') {
1375        (true, r)
1376    } else if let Some(r) = s.strip_prefix('+') {
1377        (false, r)
1378    } else {
1379        (false, s)
1380    };
1381
1382    let rest = rest
1383        .strip_prefix("0x")
1384        .or_else(|| rest.strip_prefix("0X"))
1385        .ok_or_else(make_err)?;
1386
1387    // Split at 'p' or 'P' for the exponent
1388    let (mantissa_str, exp_str) = if let Some(idx) = rest.find(['p', 'P']) {
1389        (&rest[..idx], &rest[idx + 1..])
1390    } else {
1391        // No exponent — treat as 0
1392        (rest, "0")
1393    };
1394
1395    // Parse the hex mantissa (may have a decimal point)
1396    let (int_part, frac_part) = if let Some(dot_idx) = mantissa_str.find('.') {
1397        (&mantissa_str[..dot_idx], &mantissa_str[dot_idx + 1..])
1398    } else {
1399        (mantissa_str, "")
1400    };
1401
1402    // Parse integer part of mantissa as hex
1403    let int_val = if int_part.is_empty() {
1404        0u64
1405    } else {
1406        u64::from_str_radix(int_part, 16).map_err(|_| make_err())?
1407    };
1408
1409    // Parse fractional part: each hex digit contributes 4 bits
1410    let mut frac_val: f64 = 0.0;
1411    let mut frac_scale: f64 = 1.0 / 16.0;
1412    for ch in frac_part.chars() {
1413        let digit = ch.to_digit(16).ok_or_else(make_err)?;
1414        frac_val += f64::from(digit) * frac_scale;
1415        frac_scale /= 16.0;
1416    }
1417
1418    let mantissa = int_val as f64 + frac_val;
1419
1420    // Parse the binary exponent
1421    let exp: i32 = if exp_str.is_empty() {
1422        0
1423    } else {
1424        exp_str.parse().map_err(|_| make_err())?
1425    };
1426
1427    let result = mantissa * (2.0f64).powi(exp);
1428    if negative {
1429        Ok(-result)
1430    } else {
1431        Ok(result)
1432    }
1433}
1434
1435// endregion
1436
1437// region: AsciiWriter
1438
1439/// Line-oriented writer for the ASCII serialization format (format 'A').
1440struct AsciiWriter {
1441    buf: String,
1442}
1443
1444impl AsciiWriter {
1445    fn new() -> Self {
1446        AsciiWriter { buf: String::new() }
1447    }
1448
1449    /// Write a decimal integer on its own line.
1450    fn write_int(&mut self, val: i32) {
1451        writeln!(self.buf, "{}", val).expect("Vec<u8> write");
1452    }
1453
1454    /// Write a double in hex-float format (%a) on its own line.
1455    /// Special values: "NA", "Inf", "-Inf", "NaN".
1456    fn write_double(&mut self, val: f64) {
1457        if val.to_bits() == R_NA_REAL_BITS {
1458            writeln!(self.buf, "NA").expect("Vec<u8> write");
1459        } else if val.is_infinite() {
1460            if val > 0.0 {
1461                writeln!(self.buf, "Inf").expect("Vec<u8> write");
1462            } else {
1463                writeln!(self.buf, "-Inf").expect("Vec<u8> write");
1464            }
1465        } else if val.is_nan() {
1466            writeln!(self.buf, "NaN").expect("Vec<u8> write");
1467        } else {
1468            writeln!(self.buf, "{}", format_hex_float(val)).expect("Vec<u8> write");
1469        }
1470    }
1471
1472    /// Write flags for an object.
1473    fn write_flags(&mut self, sxp_type: u8, has_attr: bool, has_tag: bool) {
1474        let mut flags: u32 = u32::from(sxp_type);
1475        if has_attr {
1476            flags |= HAS_ATTR_MASK;
1477        }
1478        if has_tag {
1479            flags |= HAS_TAG_MASK;
1480        }
1481        self.write_int(flags as i32);
1482    }
1483
1484    /// Write a CHARSXP: flags line, then length line, then the raw bytes + newline.
1485    fn write_charsxp(&mut self, s: Option<&str>) {
1486        match s {
1487            Some(text) => {
1488                // CHARSXP flags: type 9, UTF-8 encoding (bit 12)
1489                let flags: u32 = u32::from(CHARSXP) | (1 << 12);
1490                self.write_int(flags as i32);
1491                let bytes = text.as_bytes();
1492                self.write_int(i32::try_from(bytes.len()).unwrap_or(i32::MAX));
1493                // Write the raw bytes followed by a newline
1494                self.buf.push_str(text);
1495                self.buf.push('\n');
1496            }
1497            None => {
1498                // NA_STRING: CHARSXP with length -1
1499                let flags: u32 = u32::from(CHARSXP);
1500                self.write_int(flags as i32);
1501                self.write_int(-1);
1502            }
1503        }
1504    }
1505
1506    /// Write NILVALUE_SXP sentinel.
1507    fn write_nilvalue(&mut self) {
1508        self.write_flags(NILVALUE_SXP, false, false);
1509    }
1510
1511    /// Write a length value.
1512    fn write_length(&mut self, len: usize) {
1513        if let Ok(n) = i32::try_from(len) {
1514            self.write_int(n);
1515        } else {
1516            self.write_int(-1);
1517            let long_len = len as u64;
1518            self.write_int((long_len >> 32) as i32);
1519            self.write_int(long_len as i32);
1520        }
1521    }
1522
1523    /// Write attributes as a pairlist.
1524    fn write_attributes(&mut self, attrs: &Attributes) {
1525        for (name, value) in attrs {
1526            self.write_flags(LISTSXP, false, true);
1527            // Tag: SYMSXP containing a CHARSXP
1528            self.write_flags(SYMSXP, false, false);
1529            self.write_charsxp(Some(name));
1530            // Value
1531            self.write_item(value);
1532        }
1533        self.write_nilvalue();
1534    }
1535
1536    /// Write a single R value recursively.
1537    fn write_item(&mut self, value: &RValue) {
1538        match value {
1539            RValue::Null => {
1540                self.write_flags(NILVALUE_SXP, false, false);
1541            }
1542            RValue::Vector(rv) => {
1543                let has_attr = rv.attrs.as_ref().is_some_and(|a| !a.is_empty());
1544                match &rv.inner {
1545                    Vector::Logical(vals) => {
1546                        self.write_flags(LGLSXP, has_attr, false);
1547                        self.write_length(vals.len());
1548                        for v in vals.iter() {
1549                            match v {
1550                                Some(true) => self.write_int(1),
1551                                Some(false) => self.write_int(0),
1552                                None => self.write_int(R_NA_LOGICAL),
1553                            }
1554                        }
1555                    }
1556                    Vector::Integer(vals) => {
1557                        self.write_flags(INTSXP, has_attr, false);
1558                        self.write_length(vals.len());
1559                        for v in vals.iter() {
1560                            match v {
1561                                Some(i) => {
1562                                    let clamped = i32::try_from(i).unwrap_or_else(|_| {
1563                                        if i > i64::from(i32::MAX) {
1564                                            i32::MAX
1565                                        } else {
1566                                            i32::MIN + 1
1567                                        }
1568                                    });
1569                                    if clamped == R_NA_INTEGER {
1570                                        self.write_int(R_NA_INTEGER + 1);
1571                                    } else {
1572                                        self.write_int(clamped);
1573                                    }
1574                                }
1575                                None => self.write_int(R_NA_INTEGER),
1576                            }
1577                        }
1578                    }
1579                    Vector::Double(vals) => {
1580                        self.write_flags(REALSXP, has_attr, false);
1581                        self.write_length(vals.len());
1582                        for v in vals.iter() {
1583                            match v {
1584                                Some(d) => self.write_double(d),
1585                                None => writeln!(self.buf, "NA").expect("Vec<u8> write"),
1586                            }
1587                        }
1588                    }
1589                    Vector::Complex(vals) => {
1590                        self.write_flags(CPLXSXP, has_attr, false);
1591                        self.write_length(vals.len());
1592                        for v in vals.iter() {
1593                            match v {
1594                                Some(c) => {
1595                                    self.write_double(c.re);
1596                                    self.write_double(c.im);
1597                                }
1598                                None => {
1599                                    writeln!(self.buf, "NA").expect("Vec<u8> write");
1600                                    writeln!(self.buf, "NA").expect("Vec<u8> write");
1601                                }
1602                            }
1603                        }
1604                    }
1605                    Vector::Character(vals) => {
1606                        self.write_flags(STRSXP, has_attr, false);
1607                        self.write_length(vals.len());
1608                        for v in vals.iter() {
1609                            self.write_charsxp(v.as_deref());
1610                        }
1611                    }
1612                    Vector::Raw(bytes) => {
1613                        self.write_flags(RAWSXP, has_attr, false);
1614                        self.write_length(bytes.len());
1615                        // Write as hex string
1616                        for byte in bytes {
1617                            write!(self.buf, "{:02x}", byte).expect("Vec<u8> write");
1618                        }
1619                        self.buf.push('\n');
1620                    }
1621                }
1622                if has_attr {
1623                    if let Some(attrs) = rv.attrs.as_ref() {
1624                        self.write_attributes(attrs)
1625                    };
1626                }
1627            }
1628            RValue::List(list) => {
1629                let has_names = list.values.iter().any(|(name, _)| name.is_some());
1630                let mut effective_attrs: Attributes = list
1631                    .attrs
1632                    .as_ref()
1633                    .map(|a| a.as_ref().clone())
1634                    .unwrap_or_default();
1635                if has_names && !effective_attrs.contains_key("names") {
1636                    let names: Vec<Option<String>> =
1637                        list.values.iter().map(|(n, _)| n.clone()).collect();
1638                    effective_attrs.insert(
1639                        "names".to_string(),
1640                        RValue::vec(Vector::Character(names.into())),
1641                    );
1642                }
1643                let has_attr = !effective_attrs.is_empty();
1644
1645                self.write_flags(VECSXP, has_attr, false);
1646                self.write_length(list.values.len());
1647                for (_, val) in &list.values {
1648                    self.write_item(val);
1649                }
1650                if has_attr {
1651                    self.write_attributes(&effective_attrs);
1652                }
1653            }
1654            RValue::Function(_)
1655            | RValue::Environment(_)
1656            | RValue::Language(_)
1657            | RValue::Promise(_) => {
1658                self.write_flags(NILVALUE_SXP, false, false);
1659            }
1660        }
1661    }
1662
1663    fn finish(self) -> Vec<u8> {
1664        self.buf.into_bytes()
1665    }
1666}
1667
1668/// Format a f64 as a C-style hex float string (matching R's %a format).
1669///
1670/// Produces strings like "0x1.999999999999ap-4" for 0.1.
1671fn format_hex_float(val: f64) -> String {
1672    if val == 0.0 {
1673        // Distinguish +0 and -0
1674        if val.is_sign_negative() {
1675            return "-0x0p+0".to_string();
1676        } else {
1677            return "0x0p+0".to_string();
1678        }
1679    }
1680
1681    let bits = val.to_bits();
1682    let sign = (bits >> 63) != 0;
1683    let biased_exp = ((bits >> 52) & 0x7FF) as i32;
1684    let mantissa_bits = bits & 0x000F_FFFF_FFFF_FFFF;
1685
1686    let mut result = String::new();
1687    if sign {
1688        result.push('-');
1689    }
1690
1691    if biased_exp == 0 {
1692        // Subnormal: 0x0.<mantissa>p-1022
1693        result.push_str("0x0.");
1694        // mantissa_bits is the fractional part * 2^52
1695        write!(result, "{:013x}", mantissa_bits).expect("String write");
1696        // Trim trailing zeros
1697        let trimmed = result.trim_end_matches('0');
1698        let mut trimmed = trimmed.to_string();
1699        if trimmed.ends_with('.') {
1700            trimmed.push('0');
1701        }
1702        write!(trimmed, "p-1022").expect("String write");
1703        trimmed
1704    } else {
1705        // Normal: 0x1.<mantissa>p<exp>
1706        let exponent = biased_exp - 1023;
1707        result.push_str("0x1.");
1708        write!(result, "{:013x}", mantissa_bits).expect("String write");
1709        // Trim trailing zeros from the mantissa hex
1710        while result.ends_with('0') && !result.ends_with(".0") {
1711            result.pop();
1712        }
1713        write!(result, "p{:+}", exponent).expect("String write");
1714        result
1715    }
1716}
1717
1718// endregion
1719
1720// region: public API
1721
1722/// Deserialize an R object from either XDR binary or ASCII format bytes.
1723///
1724/// The `data` should start at the format byte ('X', 'A', or 'B').
1725pub fn unserialize_xdr(data: &[u8]) -> Result<RValue, RError> {
1726    if data.len() < 2 {
1727        return Err(RError::new(
1728            RErrorKind::Other,
1729            "RDS data too short".to_string(),
1730        ));
1731    }
1732
1733    // Parse format header: format byte + newline
1734    let format_byte = data[0];
1735    if data[1] != b'\n' {
1736        return Err(RError::new(
1737            RErrorKind::Other,
1738            format!("expected newline after format byte, got 0x{:02x}", data[1]),
1739        ));
1740    }
1741
1742    match format_byte {
1743        b'X' => {
1744            // XDR binary format
1745            let mut reader = XdrReader::new(&data[2..]);
1746            let _version = reader.read_int()?;
1747            let _r_version_wrote = reader.read_int()?;
1748            let _r_version_min = reader.read_int()?;
1749            if _version == 3 {
1750                let _native_encoding = reader.read_item()?;
1751            }
1752            reader.read_item()
1753        }
1754        b'A' => {
1755            // ASCII text format
1756            let mut reader = AsciiReader::new(&data[2..]);
1757            let _version = reader.read_int()?;
1758            let _r_version_wrote = reader.read_int()?;
1759            let _r_version_min = reader.read_int()?;
1760            if _version == 3 {
1761                let _native_encoding = reader.read_item()?;
1762            }
1763            reader.read_item()
1764        }
1765        b'B' => Err(RError::new(
1766            RErrorKind::Other,
1767            "native binary serialization format is not yet supported; \
1768                 only XDR binary (format 'X') and ASCII (format 'A') are implemented"
1769                .to_string(),
1770        )),
1771        _ => Err(RError::new(
1772            RErrorKind::Other,
1773            format!("unknown serialization format byte: 0x{:02x}", format_byte),
1774        )),
1775    }
1776}
1777
1778/// Check if bytes look like a GNU R binary RDS file.
1779///
1780/// Returns true if the data starts with 'X\n', 'A\n', 'B\n', or a gzip header.
1781pub fn is_binary_rds(data: &[u8]) -> bool {
1782    if data.len() < 2 {
1783        return false;
1784    }
1785    // Direct format headers
1786    if data[1] == b'\n' && matches!(data[0], b'X' | b'A' | b'B') {
1787        return true;
1788    }
1789    // Gzip magic number
1790    is_gzip_data(data)
1791}
1792
1793/// Check for gzip magic number (0x1f 0x8b).
1794pub fn is_gzip_data(data: &[u8]) -> bool {
1795    data.len() >= 2 && data[0] == 0x1f && data[1] == 0x8b
1796}
1797
1798/// Check for bzip2 magic number ("BZh").
1799pub fn is_bzip2_data(data: &[u8]) -> bool {
1800    data.len() >= 3 && data[0] == b'B' && data[1] == b'Z' && data[2] == b'h'
1801}
1802
1803/// Decompress gzip data and then deserialize.
1804#[cfg(feature = "compression")]
1805pub fn unserialize_rds(data: &[u8]) -> Result<RValue, RError> {
1806    if is_gzip_data(data) {
1807        use flate2::read::GzDecoder;
1808        use std::io::Read;
1809
1810        let mut decoder = GzDecoder::new(data);
1811        let mut decompressed = Vec::new();
1812        decoder.read_to_end(&mut decompressed).map_err(|e| {
1813            RError::new(
1814                RErrorKind::Other,
1815                format!("failed to decompress gzip RDS data: {}", e),
1816            )
1817        })?;
1818        unserialize_xdr(&decompressed)
1819    } else if is_bzip2_data(data) {
1820        use bzip2::read::BzDecoder;
1821        use std::io::Read;
1822
1823        let mut decoder = BzDecoder::new(data);
1824        let mut decompressed = Vec::new();
1825        decoder.read_to_end(&mut decompressed).map_err(|e| {
1826            RError::new(
1827                RErrorKind::Other,
1828                format!("failed to decompress bzip2 RDS data: {}", e),
1829            )
1830        })?;
1831        unserialize_xdr(&decompressed)
1832    } else {
1833        unserialize_xdr(data)
1834    }
1835}
1836
1837/// Decompress and then deserialize (no-compression fallback).
1838#[cfg(not(feature = "compression"))]
1839pub fn unserialize_rds(data: &[u8]) -> Result<RValue, RError> {
1840    if is_gzip_data(data) || is_bzip2_data(data) {
1841        Err(RError::new(
1842            RErrorKind::Other,
1843            "RDS file is compressed but miniR was built without the 'compression' feature; \
1844             rebuild with `--features compression` to read compressed RDS files"
1845                .to_string(),
1846        ))
1847    } else {
1848        unserialize_xdr(data)
1849    }
1850}
1851
1852// endregion
1853
1854// region: XdrWriter
1855
1856/// Cursor-based writer for big-endian (XDR) binary data.
1857struct XdrWriter {
1858    buf: Vec<u8>,
1859}
1860
1861impl XdrWriter {
1862    fn new() -> Self {
1863        XdrWriter { buf: Vec::new() }
1864    }
1865
1866    /// Write a big-endian i32.
1867    fn write_int(&mut self, val: i32) {
1868        self.buf.extend_from_slice(&val.to_be_bytes());
1869    }
1870
1871    /// Write a big-endian f64.
1872    fn write_double(&mut self, val: f64) {
1873        self.buf.extend_from_slice(&val.to_be_bytes());
1874    }
1875
1876    /// Write flags for an object: SEXPTYPE in bits 0:7, has-attr in bit 9, has-tag in bit 10.
1877    fn write_flags(&mut self, sxp_type: u8, has_attr: bool, has_tag: bool) {
1878        let mut flags: u32 = u32::from(sxp_type);
1879        if has_attr {
1880            flags |= HAS_ATTR_MASK;
1881        }
1882        if has_tag {
1883            flags |= HAS_TAG_MASK;
1884        }
1885        self.write_int(flags as i32);
1886    }
1887
1888    /// Write a CHARSXP: flags + length + raw bytes. Pass `None` for NA_STRING.
1889    fn write_charsxp(&mut self, s: Option<&str>) {
1890        match s {
1891            Some(text) => {
1892                // CHARSXP flags: type 9, UTF-8 encoding (bit 12)
1893                let flags: u32 = u32::from(CHARSXP) | (1 << 12);
1894                self.write_int(flags as i32);
1895                let bytes = text.as_bytes();
1896                self.write_int(i32::try_from(bytes.len()).unwrap_or(i32::MAX));
1897                self.buf.extend_from_slice(bytes);
1898            }
1899            None => {
1900                // NA_STRING: CHARSXP with length -1
1901                let flags: u32 = u32::from(CHARSXP);
1902                self.write_int(flags as i32);
1903                self.write_int(-1);
1904            }
1905        }
1906    }
1907
1908    /// Write NILVALUE_SXP sentinel (end of pairlists, etc.)
1909    fn write_nilvalue(&mut self) {
1910        self.write_flags(NILVALUE_SXP, false, false);
1911    }
1912
1913    /// Write a length value. Uses the standard i32 encoding for lengths < 2^31.
1914    fn write_length(&mut self, len: usize) {
1915        if let Ok(n) = i32::try_from(len) {
1916            self.write_int(n);
1917        } else {
1918            // Long vector: write -1 sentinel, then upper/lower 32-bit halves.
1919            self.write_int(-1);
1920            let long_len = len as u64;
1921            self.write_int((long_len >> 32) as i32);
1922            self.write_int(long_len as i32);
1923        }
1924    }
1925
1926    /// Write attributes as a pairlist. Each entry becomes a LISTSXP node with a
1927    /// SYMSXP tag and the value as CAR. The chain terminates with NILVALUE_SXP.
1928    fn write_attributes(&mut self, attrs: &Attributes) {
1929        for (name, value) in attrs {
1930            self.write_flags(LISTSXP, false, true); // has_tag = true
1931                                                    // Tag: SYMSXP containing a CHARSXP
1932            self.write_flags(SYMSXP, false, false);
1933            self.write_charsxp(Some(name));
1934            // Value: the attribute value
1935            self.write_item(value);
1936        }
1937        self.write_nilvalue();
1938    }
1939
1940    /// Write a single R value recursively.
1941    fn write_item(&mut self, value: &RValue) {
1942        match value {
1943            RValue::Null => {
1944                self.write_flags(NILVALUE_SXP, false, false);
1945            }
1946            RValue::Vector(rv) => {
1947                let has_attr = rv.attrs.as_ref().is_some_and(|a| !a.is_empty());
1948                match &rv.inner {
1949                    Vector::Logical(vals) => {
1950                        self.write_flags(LGLSXP, has_attr, false);
1951                        self.write_length(vals.len());
1952                        for v in vals.iter() {
1953                            match v {
1954                                Some(true) => self.write_int(1),
1955                                Some(false) => self.write_int(0),
1956                                None => self.write_int(R_NA_LOGICAL),
1957                            }
1958                        }
1959                    }
1960                    Vector::Integer(vals) => {
1961                        self.write_flags(INTSXP, has_attr, false);
1962                        self.write_length(vals.len());
1963                        for v in vals.iter() {
1964                            match v {
1965                                Some(i) => {
1966                                    // R integers are i32; clamp to i32 range.
1967                                    let clamped = i32::try_from(i).unwrap_or_else(|_| {
1968                                        if i > i64::from(i32::MAX) {
1969                                            i32::MAX
1970                                        } else {
1971                                            // i32::MIN is NA, so use MIN + 1
1972                                            i32::MIN + 1
1973                                        }
1974                                    });
1975                                    // Guard against accidentally writing NA_INTEGER
1976                                    // for a non-NA value.
1977                                    if clamped == R_NA_INTEGER {
1978                                        self.write_int(R_NA_INTEGER + 1);
1979                                    } else {
1980                                        self.write_int(clamped);
1981                                    }
1982                                }
1983                                None => self.write_int(R_NA_INTEGER),
1984                            }
1985                        }
1986                    }
1987                    Vector::Double(vals) => {
1988                        self.write_flags(REALSXP, has_attr, false);
1989                        self.write_length(vals.len());
1990                        for v in vals.iter() {
1991                            match v {
1992                                Some(d) => self.write_double(d),
1993                                None => self.buf.extend_from_slice(&R_NA_REAL_BITS.to_be_bytes()),
1994                            }
1995                        }
1996                    }
1997                    Vector::Complex(vals) => {
1998                        self.write_flags(CPLXSXP, has_attr, false);
1999                        self.write_length(vals.len());
2000                        for v in vals.iter() {
2001                            match v {
2002                                Some(c) => {
2003                                    self.write_double(c.re);
2004                                    self.write_double(c.im);
2005                                }
2006                                None => {
2007                                    self.buf.extend_from_slice(&R_NA_REAL_BITS.to_be_bytes());
2008                                    self.buf.extend_from_slice(&R_NA_REAL_BITS.to_be_bytes());
2009                                }
2010                            }
2011                        }
2012                    }
2013                    Vector::Character(vals) => {
2014                        self.write_flags(STRSXP, has_attr, false);
2015                        self.write_length(vals.len());
2016                        for v in vals.iter() {
2017                            self.write_charsxp(v.as_deref());
2018                        }
2019                    }
2020                    Vector::Raw(bytes) => {
2021                        self.write_flags(RAWSXP, has_attr, false);
2022                        self.write_length(bytes.len());
2023                        self.buf.extend_from_slice(bytes);
2024                    }
2025                }
2026                if has_attr {
2027                    if let Some(attrs) = rv.attrs.as_ref() {
2028                        self.write_attributes(attrs)
2029                    };
2030                }
2031            }
2032            RValue::List(list) => {
2033                // Build the effective attributes: merge list names into attrs.
2034                let has_names = list.values.iter().any(|(name, _)| name.is_some());
2035                let mut effective_attrs: Attributes = list
2036                    .attrs
2037                    .as_ref()
2038                    .map(|a| a.as_ref().clone())
2039                    .unwrap_or_default();
2040                if has_names && !effective_attrs.contains_key("names") {
2041                    let names: Vec<Option<String>> =
2042                        list.values.iter().map(|(n, _)| n.clone()).collect();
2043                    effective_attrs.insert(
2044                        "names".to_string(),
2045                        RValue::vec(Vector::Character(names.into())),
2046                    );
2047                }
2048                let has_attr = !effective_attrs.is_empty();
2049
2050                self.write_flags(VECSXP, has_attr, false);
2051                self.write_length(list.values.len());
2052                for (_, val) in &list.values {
2053                    self.write_item(val);
2054                }
2055                if has_attr {
2056                    self.write_attributes(&effective_attrs);
2057                }
2058            }
2059            RValue::Function(func) => match func {
2060                RFunction::Closure { params, body, env } => {
2061                    self.write_flags(CLOSXP, false, false);
2062                    // Environment
2063                    self.write_environment(env);
2064                    // Formals: pairlist of parameters
2065                    self.write_formals(params);
2066                    // Body: serialize the AST as a LANGSXP.
2067                    // We deparse the body to a string and store it as a STRSXP
2068                    // so it can be re-parsed on deserialization. This ensures
2069                    // round-tripping even for complex bodies.
2070                    self.write_body_expr(body);
2071                }
2072                RFunction::Builtin { name, .. } => {
2073                    // Builtins are serialized as BUILTINSXP with the name.
2074                    let name_bytes = name.as_bytes();
2075                    self.write_flags(BUILTINSXP, false, false);
2076                    self.write_length(name_bytes.len());
2077                    self.buf.extend_from_slice(name_bytes);
2078                }
2079            },
2080            RValue::Environment(env) => {
2081                self.write_environment(env);
2082            }
2083            RValue::Language(lang) => {
2084                self.write_langsxp_expr(&lang.inner);
2085            }
2086            RValue::Promise(_) => {
2087                // Promises should be forced before serialization; write NULL.
2088                self.write_flags(NILVALUE_SXP, false, false);
2089            }
2090        }
2091    }
2092
2093    /// Write a SYMSXP (symbol): flags + CHARSXP for the name.
2094    fn write_symbol(&mut self, name: &str) {
2095        self.write_flags(SYMSXP, false, false);
2096        self.write_charsxp(Some(name));
2097    }
2098
2099    /// Write a pairlist: a chain of LISTSXP nodes, each with a TAG (symbol) and
2100    /// CAR (value), terminated by NILVALUE_SXP. This is the format used by GNU R's
2101    /// `save()` for writing workspace files (RDX2 format).
2102    fn write_pairlist(&mut self, bindings: &[(String, RValue)]) {
2103        for (name, value) in bindings {
2104            // Each node: LISTSXP with has_tag = true
2105            self.write_flags(LISTSXP, false, true);
2106            // TAG: symbol with the binding name
2107            self.write_symbol(name);
2108            // CAR: the value
2109            self.write_item(value);
2110        }
2111        // Terminate with NILVALUE_SXP
2112        self.write_nilvalue();
2113    }
2114
2115    /// Write an environment. Singleton environments (global, base, empty)
2116    /// are written as their pseudo-SEXPTYPE codes. Other environments are
2117    /// written as ENVSXP with their bindings.
2118    fn write_environment(&mut self, env: &Environment) {
2119        match env.name().as_deref() {
2120            Some("R_GlobalEnv") => {
2121                self.write_flags(GLOBALENV_SXP, false, false);
2122            }
2123            Some("R_EmptyEnv") => {
2124                self.write_flags(EMPTYENV_SXP, false, false);
2125            }
2126            Some("base") => {
2127                self.write_flags(BASEENV_SXP, false, false);
2128            }
2129            _ => {
2130                // Non-singleton: write as ENVSXP.
2131                let bindings = env.local_bindings();
2132                self.write_flags(ENVSXP, false, false);
2133                // locked flag
2134                self.write_int(i32::from(env.is_locked()));
2135                // Enclosing environment
2136                if let Some(parent) = env.parent() {
2137                    self.write_environment(&parent);
2138                } else {
2139                    self.write_flags(EMPTYENV_SXP, false, false);
2140                }
2141                // Frame: pairlist of bindings
2142                if bindings.is_empty() {
2143                    self.write_nilvalue();
2144                } else {
2145                    self.write_pairlist(&bindings);
2146                }
2147                // Hash table: write NULL (we don't use R's hash table structure)
2148                self.write_nilvalue();
2149            }
2150        }
2151    }
2152
2153    /// Write function formals as a pairlist.
2154    ///
2155    /// Each parameter becomes a LISTSXP node: TAG = param name (SYMSXP),
2156    /// CAR = default value (or MISSINGARG_SXP if no default).
2157    fn write_formals(&mut self, params: &[Param]) {
2158        if params.is_empty() {
2159            self.write_nilvalue();
2160            return;
2161        }
2162        for param in params {
2163            self.write_flags(LISTSXP, false, true); // has_tag = true
2164                                                    // TAG: parameter name
2165            self.write_symbol(if param.is_dots { "..." } else { &param.name });
2166            // CAR: default value or MISSINGARG_SXP
2167            match &param.default {
2168                Some(default_expr) => {
2169                    self.write_body_expr(default_expr);
2170                }
2171                None => {
2172                    self.write_flags(MISSINGARG_SXP, false, false);
2173                }
2174            }
2175        }
2176        self.write_nilvalue();
2177    }
2178
2179    /// Write an AST expression as a LANGSXP (language object).
2180    ///
2181    /// Simple literals are written directly as their R serialized form.
2182    /// Complex expressions (calls, blocks, etc.) are deparsed to a string and
2183    /// written as a STRSXP with a "miniR.source" attribute, enabling re-parsing.
2184    fn write_body_expr(&mut self, expr: &Expr) {
2185        match expr {
2186            Expr::Null => self.write_nilvalue(),
2187            Expr::Bool(b) => {
2188                self.write_flags(LGLSXP, false, false);
2189                self.write_length(1);
2190                self.write_int(i32::from(*b));
2191            }
2192            Expr::Integer(i) => {
2193                self.write_flags(INTSXP, false, false);
2194                self.write_length(1);
2195                let clamped =
2196                    i32::try_from(*i).unwrap_or(if *i > 0 { i32::MAX } else { i32::MIN + 1 });
2197                self.write_int(clamped);
2198            }
2199            Expr::Double(d) => {
2200                self.write_flags(REALSXP, false, false);
2201                self.write_length(1);
2202                self.write_double(*d);
2203            }
2204            Expr::String(s) => {
2205                self.write_flags(STRSXP, false, false);
2206                self.write_length(1);
2207                self.write_charsxp(Some(s));
2208            }
2209            Expr::Na(na_type) => {
2210                use crate::parser::ast::NaType;
2211                match na_type {
2212                    NaType::Logical => {
2213                        self.write_flags(LGLSXP, false, false);
2214                        self.write_length(1);
2215                        self.write_int(R_NA_LOGICAL);
2216                    }
2217                    NaType::Integer => {
2218                        self.write_flags(INTSXP, false, false);
2219                        self.write_length(1);
2220                        self.write_int(R_NA_INTEGER);
2221                    }
2222                    NaType::Real => {
2223                        self.write_flags(REALSXP, false, false);
2224                        self.write_length(1);
2225                        self.buf.extend_from_slice(&R_NA_REAL_BITS.to_be_bytes());
2226                    }
2227                    NaType::Character => {
2228                        self.write_flags(STRSXP, false, false);
2229                        self.write_length(1);
2230                        self.write_charsxp(None);
2231                    }
2232                    NaType::Complex => {
2233                        self.write_flags(CPLXSXP, false, false);
2234                        self.write_length(1);
2235                        self.buf.extend_from_slice(&R_NA_REAL_BITS.to_be_bytes());
2236                        self.buf.extend_from_slice(&R_NA_REAL_BITS.to_be_bytes());
2237                    }
2238                }
2239            }
2240            Expr::Inf => {
2241                self.write_flags(REALSXP, false, false);
2242                self.write_length(1);
2243                self.write_double(f64::INFINITY);
2244            }
2245            Expr::NaN => {
2246                self.write_flags(REALSXP, false, false);
2247                self.write_length(1);
2248                self.write_double(f64::NAN);
2249            }
2250            // For complex expressions (calls, blocks, etc.), deparse and store
2251            // as a STRSXP with a "miniR.source" attribute so it can be re-parsed.
2252            _ => {
2253                let deparsed = deparse_expr(expr);
2254                // Write as STRSXP with a "miniR.source" attr to mark it as deparsed.
2255                self.write_flags(STRSXP, true, false); // has_attr = true
2256                self.write_length(1);
2257                self.write_charsxp(Some(&deparsed));
2258                // Attribute: "miniR.source" = TRUE (marker)
2259                let mut attrs: Attributes = IndexMap::new();
2260                attrs.insert(
2261                    "miniR.source".to_string(),
2262                    RValue::vec(Vector::Logical(vec![Some(true)].into())),
2263                );
2264                self.write_attributes(&attrs);
2265            }
2266        }
2267    }
2268
2269    /// Write a Language (LANGSXP) from an Expr.
2270    ///
2271    /// Call expressions are written as proper LANGSXP pairlists.
2272    /// Other expressions are deparsed and written as STRSXP with the source marker.
2273    fn write_langsxp_expr(&mut self, expr: &Expr) {
2274        match expr {
2275            Expr::Call { func, args, .. } => {
2276                // LANGSXP: CAR = func symbol, CDR = args pairlist
2277                let has_named_args = args.iter().any(|a| a.name.is_some());
2278                self.write_flags(LANGSXP, false, has_named_args);
2279                // First element (CAR): the function
2280                if !has_named_args {
2281                    self.write_body_expr(func);
2282                } else {
2283                    // If any arg is named, the first node has no tag
2284                    self.write_body_expr(func);
2285                }
2286                // CDR: argument chain as pairlist nodes
2287                for arg in args {
2288                    let has_tag = arg.name.is_some();
2289                    self.write_flags(LISTSXP, false, has_tag);
2290                    if let Some(name) = &arg.name {
2291                        self.write_symbol(name);
2292                    }
2293                    match &arg.value {
2294                        Some(val_expr) => self.write_body_expr(val_expr),
2295                        None => self.write_flags(MISSINGARG_SXP, false, false),
2296                    }
2297                }
2298                self.write_nilvalue();
2299            }
2300            _ => {
2301                // Non-call language object: deparse as body expression.
2302                self.write_body_expr(expr);
2303            }
2304        }
2305    }
2306
2307    fn finish(self) -> Vec<u8> {
2308        self.buf
2309    }
2310}
2311
2312// endregion
2313
2314// region: serialize public API
2315
2316/// Serialize an R value to XDR binary format (version 2).
2317///
2318/// Returns the complete byte stream including the "X\n" header, version triple,
2319/// and the recursively serialized object. The output is compatible with GNU R's
2320/// `readRDS()` / `unserialize()`.
2321pub fn serialize_xdr(value: &RValue) -> Vec<u8> {
2322    let mut w = XdrWriter::new();
2323
2324    // Format header: "X\n"
2325    w.buf.extend_from_slice(b"X\n");
2326
2327    // Version 2
2328    w.write_int(2);
2329    // R version that wrote: encode as 4.3.0 (0x00040300)
2330    w.write_int(0x00040300);
2331    // Minimum R version to read: 2.3.0 (0x00020300)
2332    w.write_int(0x00020300);
2333
2334    // The actual object
2335    w.write_item(value);
2336
2337    w.finish()
2338}
2339
2340/// Serialize an R value to ASCII text format (version 2).
2341///
2342/// Returns the complete byte stream including the "A\n" header, version triple,
2343/// and the recursively serialized object. The output is human-readable and
2344/// compatible with GNU R's `readRDS()` / `unserialize()`.
2345pub fn serialize_ascii(value: &RValue) -> Vec<u8> {
2346    let mut w = AsciiWriter::new();
2347
2348    // Format header: "A\n"
2349    w.buf.push_str("A\n");
2350
2351    // Version 2
2352    w.write_int(2);
2353    // R version that wrote: encode as 4.3.0 (0x00040300)
2354    w.write_int(0x00040300);
2355    // Minimum R version to read: 2.3.0 (0x00020300)
2356    w.write_int(0x00020300);
2357
2358    // The actual object
2359    w.write_item(value);
2360
2361    w.finish()
2362}
2363
2364/// Serialize an R value to an RDS byte stream, optionally gzip-compressed.
2365///
2366/// When `compress` is true and the `compression` feature is enabled, the output
2367/// is gzip-compressed (matching GNU R's default `saveRDS(compress = TRUE)`).
2368/// When `ascii` is true, uses the ASCII text format instead of XDR binary.
2369#[cfg(feature = "compression")]
2370pub fn serialize_rds(value: &RValue, compress: bool, ascii: bool) -> Vec<u8> {
2371    let raw = if ascii {
2372        serialize_ascii(value)
2373    } else {
2374        serialize_xdr(value)
2375    };
2376    if compress && !ascii {
2377        use flate2::write::GzEncoder;
2378        use flate2::Compression;
2379        use std::io::Write;
2380
2381        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
2382        // write_all is infallible for a Vec<u8> backed encoder
2383        encoder.write_all(&raw).expect("gzip encoding failed");
2384        encoder.finish().expect("gzip finish failed")
2385    } else {
2386        raw
2387    }
2388}
2389
2390/// Serialize an R value to an RDS byte stream (no-compression fallback).
2391/// When `ascii` is true, uses the ASCII text format instead of XDR binary.
2392#[cfg(not(feature = "compression"))]
2393pub fn serialize_rds(value: &RValue, _compress: bool, ascii: bool) -> Vec<u8> {
2394    if ascii {
2395        serialize_ascii(value)
2396    } else {
2397        serialize_xdr(value)
2398    }
2399}
2400
2401/// Serialize named bindings to GNU R binary .RData format (RDX2).
2402///
2403/// Writes the "RDX2\n" header followed by the XDR serialization stream containing
2404/// a pairlist where each node has TAG=symbol(name) and CAR=value. When `compress`
2405/// is true, the entire output is gzip-compressed.
2406///
2407/// This is compatible with GNU R's `load()`.
2408#[cfg(feature = "compression")]
2409pub fn serialize_rdata(bindings: &[(String, RValue)], compress: bool) -> Vec<u8> {
2410    let mut w = XdrWriter::new();
2411
2412    // RDX2 header
2413    w.buf.extend_from_slice(b"RDX2\n");
2414
2415    // XDR format header: "X\n"
2416    w.buf.extend_from_slice(b"X\n");
2417
2418    // Version 2
2419    w.write_int(2);
2420    // R version that wrote: 4.3.0 (0x00040300)
2421    w.write_int(0x00040300);
2422    // Minimum R version to read: 2.3.0 (0x00020300)
2423    w.write_int(0x00020300);
2424
2425    // Write the pairlist of bindings
2426    w.write_pairlist(bindings);
2427
2428    let raw = w.finish();
2429
2430    if compress {
2431        use flate2::write::GzEncoder;
2432        use flate2::Compression;
2433        use std::io::Write;
2434
2435        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
2436        encoder.write_all(&raw).expect("gzip encoding failed");
2437        encoder.finish().expect("gzip finish failed")
2438    } else {
2439        raw
2440    }
2441}
2442
2443/// Serialize named bindings to GNU R binary .RData format (no-compression fallback).
2444#[cfg(not(feature = "compression"))]
2445pub fn serialize_rdata(bindings: &[(String, RValue)], _compress: bool) -> Vec<u8> {
2446    let mut w = XdrWriter::new();
2447
2448    // RDX2 header
2449    w.buf.extend_from_slice(b"RDX2\n");
2450
2451    // XDR format header: "X\n"
2452    w.buf.extend_from_slice(b"X\n");
2453
2454    // Version 2
2455    w.write_int(2);
2456    // R version that wrote: 4.3.0 (0x00040300)
2457    w.write_int(0x00040300);
2458    // Minimum R version to read: 2.3.0 (0x00020300)
2459    w.write_int(0x00020300);
2460
2461    // Write the pairlist of bindings
2462    w.write_pairlist(bindings);
2463
2464    w.finish()
2465}
2466
2467// endregion
2468
2469#[cfg(test)]
2470mod tests {
2471    use super::*;
2472
2473    fn build_rds_header() -> Vec<u8> {
2474        let mut buf = Vec::new();
2475        buf.extend_from_slice(b"X\n");
2476        buf.extend_from_slice(&2i32.to_be_bytes());
2477        buf.extend_from_slice(&0x00040300i32.to_be_bytes());
2478        buf.extend_from_slice(&0x00020300i32.to_be_bytes());
2479        buf
2480    }
2481
2482    fn write_flags(buf: &mut Vec<u8>, sxp_type: u8, has_attr: bool, has_tag: bool) {
2483        let mut flags: u32 = u32::from(sxp_type);
2484        if has_attr {
2485            flags |= 1 << 9;
2486        }
2487        if has_tag {
2488            flags |= 1 << 10;
2489        }
2490        buf.extend_from_slice(&(flags as i32).to_be_bytes());
2491    }
2492
2493    fn write_charsxp(buf: &mut Vec<u8>, s: &str) {
2494        let flags: u32 = 9 | (1 << 12);
2495        buf.extend_from_slice(&(flags as i32).to_be_bytes());
2496        buf.extend_from_slice(&(s.len() as i32).to_be_bytes());
2497        buf.extend_from_slice(s.as_bytes());
2498    }
2499
2500    fn write_nilvalue(buf: &mut Vec<u8>) {
2501        write_flags(buf, 254, false, false);
2502    }
2503
2504    #[test]
2505    fn unit_test_named_int_vec() {
2506        let mut buf = build_rds_header();
2507
2508        // INTSXP (13) with has_attr
2509        write_flags(&mut buf, 13, true, false);
2510        buf.extend_from_slice(&3i32.to_be_bytes());
2511        buf.extend_from_slice(&10i32.to_be_bytes());
2512        buf.extend_from_slice(&20i32.to_be_bytes());
2513        buf.extend_from_slice(&30i32.to_be_bytes());
2514
2515        // Attributes pairlist: LISTSXP with has_tag
2516        write_flags(&mut buf, 2, false, true);
2517        // Tag: SYMSXP
2518        write_flags(&mut buf, 1, false, false);
2519        write_charsxp(&mut buf, "names");
2520        // Value: STRSXP c("a", "b", "c")
2521        write_flags(&mut buf, 16, false, false);
2522        buf.extend_from_slice(&3i32.to_be_bytes());
2523        write_charsxp(&mut buf, "a");
2524        write_charsxp(&mut buf, "b");
2525        write_charsxp(&mut buf, "c");
2526        // NILVALUE
2527        write_nilvalue(&mut buf);
2528
2529        let result = unserialize_xdr(&buf).unwrap();
2530        match &result {
2531            RValue::Vector(rv) => {
2532                assert!(
2533                    matches!(&rv.inner, Vector::Integer(_)),
2534                    "expected integer vector, got {:?}",
2535                    rv.inner
2536                );
2537                let names = rv.get_attr("names");
2538                assert!(
2539                    names.is_some(),
2540                    "expected names attribute, attrs: {:?}",
2541                    rv.attrs
2542                );
2543            }
2544            other => panic!("expected Vector, got {:?}", other),
2545        }
2546    }
2547
2548    #[test]
2549    fn unit_test_closure_round_trip() {
2550        use crate::interpreter::environment::Environment;
2551        use crate::parser::ast::{BinaryOp, Expr, Param};
2552
2553        // Build a simple closure: function(x) x + 1
2554        let closure = RValue::Function(RFunction::Closure {
2555            params: vec![Param {
2556                name: "x".to_string(),
2557                default: None,
2558                is_dots: false,
2559            }],
2560            body: Expr::BinaryOp {
2561                op: BinaryOp::Add,
2562                lhs: Box::new(Expr::Symbol("x".to_string())),
2563                rhs: Box::new(Expr::Integer(1)),
2564            },
2565            env: Environment::new_global(),
2566        });
2567
2568        let bytes = serialize_xdr(&closure);
2569        let result = unserialize_xdr(&bytes).unwrap();
2570
2571        match &result {
2572            RValue::Function(RFunction::Closure { params, body, .. }) => {
2573                assert_eq!(params.len(), 1);
2574                assert_eq!(params[0].name, "x");
2575                assert!(params[0].default.is_none());
2576                // The body should be reconstructed from the deparsed string.
2577                let deparsed = deparse_expr(body);
2578                assert_eq!(deparsed, "x + 1L");
2579            }
2580            other => panic!("expected Function(Closure), got {:?}", other),
2581        }
2582    }
2583
2584    #[test]
2585    fn unit_test_closure_with_defaults_round_trip() {
2586        use crate::interpreter::environment::Environment;
2587        use crate::parser::ast::{BinaryOp, Expr, Param};
2588
2589        // function(x, y = 10) x + y
2590        let closure = RValue::Function(RFunction::Closure {
2591            params: vec![
2592                Param {
2593                    name: "x".to_string(),
2594                    default: None,
2595                    is_dots: false,
2596                },
2597                Param {
2598                    name: "y".to_string(),
2599                    default: Some(Expr::Integer(10)),
2600                    is_dots: false,
2601                },
2602            ],
2603            body: Expr::BinaryOp {
2604                op: BinaryOp::Add,
2605                lhs: Box::new(Expr::Symbol("x".to_string())),
2606                rhs: Box::new(Expr::Symbol("y".to_string())),
2607            },
2608            env: Environment::new_global(),
2609        });
2610
2611        let bytes = serialize_xdr(&closure);
2612        let result = unserialize_xdr(&bytes).unwrap();
2613
2614        match &result {
2615            RValue::Function(RFunction::Closure { params, body, .. }) => {
2616                assert_eq!(params.len(), 2);
2617                assert_eq!(params[0].name, "x");
2618                assert!(params[0].default.is_none());
2619                assert_eq!(params[1].name, "y");
2620                assert!(params[1].default.is_some());
2621                // Body should be reconstructed
2622                let deparsed = deparse_expr(body);
2623                assert_eq!(deparsed, "x + y");
2624            }
2625            other => panic!("expected Function(Closure), got {:?}", other),
2626        }
2627    }
2628
2629    #[test]
2630    fn unit_test_parse_program_deparsed() {
2631        // Verify parse_program works on deparsed R expressions.
2632        // Note: parse_program may return a single expression directly (not always Program).
2633        let result = crate::parser::parse_program("x + 1L");
2634        match result {
2635            Ok(expr) => {
2636                assert!(
2637                    matches!(&expr, Expr::BinaryOp { .. }),
2638                    "expected BinaryOp, got {:?}",
2639                    expr
2640                );
2641            }
2642            Err(e) => panic!("parse failed: {:?}", e),
2643        }
2644    }
2645
2646    #[test]
2647    fn unit_test_closure_body_debug() {
2648        use crate::interpreter::environment::Environment;
2649        use crate::parser::ast::{BinaryOp, Expr, Param};
2650
2651        // Build a simple closure: function(x) x + 1
2652        let closure = RValue::Function(RFunction::Closure {
2653            params: vec![Param {
2654                name: "x".to_string(),
2655                default: None,
2656                is_dots: false,
2657            }],
2658            body: Expr::BinaryOp {
2659                op: BinaryOp::Add,
2660                lhs: Box::new(Expr::Symbol("x".to_string())),
2661                rhs: Box::new(Expr::Integer(1)),
2662            },
2663            env: Environment::new_global(),
2664        });
2665
2666        let bytes = serialize_xdr(&closure);
2667        // Read back and inspect the raw body value
2668        let result = unserialize_xdr(&bytes).unwrap();
2669        match &result {
2670            RValue::Function(RFunction::Closure { body, .. }) => {
2671                let deparsed = deparse_expr(body);
2672                // If the body is Expr::String("x + 1L"), deparsed would be "\"x + 1L\""
2673                // If correctly parsed, deparsed would be "x + 1L"
2674                assert!(
2675                    !deparsed.starts_with('"'),
2676                    "body was stored as string literal instead of being re-parsed: {}",
2677                    deparsed
2678                );
2679            }
2680            other => panic!("expected Function(Closure), got {:?}", other),
2681        }
2682    }
2683
2684    #[test]
2685    fn unit_test_strsxp_with_minir_source_attr() {
2686        // Manually build an STRSXP with "miniR.source" attribute and check
2687        // that the reader preserves the attribute.
2688        let mut w = super::XdrWriter::new();
2689        w.buf.extend_from_slice(b"X\n");
2690        w.write_int(2);
2691        w.write_int(0x00040300);
2692        w.write_int(0x00020300);
2693
2694        // Write STRSXP with has_attr = true
2695        w.write_flags(STRSXP, true, false);
2696        w.write_length(1);
2697        w.write_charsxp(Some("x + 1L"));
2698        // Attribute pairlist
2699        let mut attrs: Attributes = IndexMap::new();
2700        attrs.insert(
2701            "miniR.source".to_string(),
2702            RValue::vec(Vector::Logical(vec![Some(true)].into())),
2703        );
2704        w.write_attributes(&attrs);
2705
2706        let bytes = w.finish();
2707        let result = unserialize_xdr(&bytes).unwrap();
2708        match &result {
2709            RValue::Vector(rv) => {
2710                assert!(
2711                    rv.get_attr("miniR.source").is_some(),
2712                    "miniR.source attribute missing; attrs: {:?}",
2713                    rv.attrs
2714                );
2715            }
2716            other => panic!("expected Vector, got {:?}", other),
2717        }
2718    }
2719
2720    #[test]
2721    fn unit_test_env_singleton_round_trip() {
2722        use crate::interpreter::environment::Environment;
2723
2724        let global = RValue::Environment(Environment::new_global());
2725        let bytes = serialize_xdr(&global);
2726        let result = unserialize_xdr(&bytes).unwrap();
2727        match &result {
2728            RValue::Environment(env) => {
2729                assert_eq!(env.name().as_deref(), Some("R_GlobalEnv"));
2730            }
2731            other => panic!("expected Environment, got {:?}", other),
2732        }
2733
2734        let empty = RValue::Environment(Environment::new_empty());
2735        let bytes = serialize_xdr(&empty);
2736        let result = unserialize_xdr(&bytes).unwrap();
2737        match &result {
2738            RValue::Environment(env) => {
2739                assert_eq!(env.name().as_deref(), Some("R_EmptyEnv"));
2740            }
2741            other => panic!("expected Environment, got {:?}", other),
2742        }
2743    }
2744
2745    #[test]
2746    fn unit_test_simple_int_vec() {
2747        let mut buf = build_rds_header();
2748
2749        write_flags(&mut buf, 13, false, false);
2750        buf.extend_from_slice(&3i32.to_be_bytes());
2751        buf.extend_from_slice(&1i32.to_be_bytes());
2752        buf.extend_from_slice(&2i32.to_be_bytes());
2753        buf.extend_from_slice(&3i32.to_be_bytes());
2754
2755        let result = unserialize_xdr(&buf).unwrap();
2756        match &result {
2757            RValue::Vector(rv) => {
2758                assert!(matches!(&rv.inner, Vector::Integer(_)));
2759                if let Vector::Integer(ints) = &rv.inner {
2760                    assert_eq!(ints.len(), 3);
2761                    assert_eq!(ints.get_opt(0), Some(1));
2762                    assert_eq!(ints.get_opt(1), Some(2));
2763                    assert_eq!(ints.get_opt(2), Some(3));
2764                }
2765            }
2766            other => panic!("expected Vector, got {:?}", other),
2767        }
2768    }
2769
2770    // region: hex float tests
2771
2772    #[test]
2773    fn hex_float_roundtrip_normal_values() {
2774        let values = [
2775            0.0,
2776            -0.0,
2777            1.0,
2778            -1.0,
2779            0.1,
2780            0.5,
2781            2.0,
2782            std::f64::consts::PI,
2783            1e-300,
2784            1e300,
2785            f64::EPSILON,
2786            f64::MIN_POSITIVE,
2787        ];
2788        for &v in &values {
2789            let hex = format_hex_float(v);
2790            let parsed = parse_hex_float(&hex).unwrap_or_else(|e| {
2791                panic!("failed to parse hex float '{}' (from {}): {}", hex, v, e)
2792            });
2793            assert_eq!(
2794                v.to_bits(),
2795                parsed.to_bits(),
2796                "hex float roundtrip failed for {}: '{}' parsed to {}",
2797                v,
2798                hex,
2799                parsed
2800            );
2801        }
2802    }
2803
2804    #[test]
2805    fn hex_float_negative_zero() {
2806        let hex = format_hex_float(-0.0);
2807        assert!(hex.starts_with('-'), "negative zero should have minus sign");
2808        let parsed = parse_hex_float(&hex).unwrap();
2809        assert!(parsed.is_sign_negative(), "parsed -0.0 should be negative");
2810    }
2811
2812    #[test]
2813    fn parse_ascii_double_special_values() {
2814        assert_eq!(parse_ascii_double("Inf").unwrap(), f64::INFINITY);
2815        assert_eq!(parse_ascii_double("-Inf").unwrap(), f64::NEG_INFINITY);
2816        assert!(parse_ascii_double("NaN").unwrap().is_nan());
2817        assert_eq!(parse_ascii_double("NA").unwrap().to_bits(), R_NA_REAL_BITS);
2818    }
2819
2820    #[test]
2821    fn ascii_roundtrip_integer_vector() {
2822        let val = RValue::vec(Vector::Integer(
2823            vec![Some(1), Some(2), None, Some(4)].into(),
2824        ));
2825        let bytes = serialize_ascii(&val);
2826        let result = unserialize_xdr(&bytes).unwrap();
2827        match &result {
2828            RValue::Vector(rv) => {
2829                if let Vector::Integer(ints) = &rv.inner {
2830                    assert_eq!(ints.len(), 4);
2831                    assert_eq!(ints.get_opt(0), Some(1));
2832                    assert_eq!(ints.get_opt(1), Some(2));
2833                    assert_eq!(ints.get_opt(2), None);
2834                    assert_eq!(ints.get_opt(3), Some(4));
2835                } else {
2836                    panic!("expected Integer vector");
2837                }
2838            }
2839            other => panic!("expected Vector, got {:?}", other),
2840        }
2841    }
2842
2843    #[test]
2844    fn ascii_roundtrip_double_vector() {
2845        let val = RValue::vec(Vector::Double(
2846            vec![Some(0.1), Some(f64::INFINITY), None, Some(-0.0)].into(),
2847        ));
2848        let bytes = serialize_ascii(&val);
2849        let result = unserialize_xdr(&bytes).unwrap();
2850        match &result {
2851            RValue::Vector(rv) => {
2852                if let Vector::Double(dbls) = &rv.inner {
2853                    assert_eq!(dbls.len(), 4);
2854                    assert_eq!(dbls.get_opt(0), Some(0.1));
2855                    assert_eq!(dbls.get_opt(1), Some(f64::INFINITY));
2856                    assert_eq!(dbls.get_opt(2), None);
2857                    // Check -0.0 bit pattern
2858                    assert_eq!(
2859                        dbls.get_opt(3).expect("-0.0 should not be NA").to_bits(),
2860                        (-0.0f64).to_bits()
2861                    );
2862                } else {
2863                    panic!("expected Double vector");
2864                }
2865            }
2866            other => panic!("expected Vector, got {:?}", other),
2867        }
2868    }
2869
2870    #[test]
2871    fn ascii_roundtrip_character_vector() {
2872        let val = RValue::vec(Vector::Character(
2873            vec![Some("hello".to_string()), None, Some("world".to_string())].into(),
2874        ));
2875        let bytes = serialize_ascii(&val);
2876        let result = unserialize_xdr(&bytes).unwrap();
2877        match &result {
2878            RValue::Vector(rv) => {
2879                if let Vector::Character(chars) = &rv.inner {
2880                    assert_eq!(chars.len(), 3);
2881                    assert_eq!(chars[0], Some("hello".to_string()));
2882                    assert_eq!(chars[1], None);
2883                    assert_eq!(chars[2], Some("world".to_string()));
2884                } else {
2885                    panic!("expected Character vector");
2886                }
2887            }
2888            other => panic!("expected Vector, got {:?}", other),
2889        }
2890    }
2891
2892    #[test]
2893    fn ascii_roundtrip_null() {
2894        let val = RValue::Null;
2895        let bytes = serialize_ascii(&val);
2896        let result = unserialize_xdr(&bytes).unwrap();
2897        assert!(matches!(result, RValue::Null));
2898    }
2899
2900    // endregion
2901}