Skip to main content

r/interpreter/native/
sexp.rs

1//! SEXP type definitions — the C-compatible memory layout for R values.
2//!
3//! This defines miniR's own SEXP ABI. Package C code compiled against our
4//! `Rinternals.h` header uses this layout to access R values.
5//!
6//! All allocations use the C allocator (`calloc`/`malloc`/`free`) so that
7//! SEXPs created by Rust and SEXPs created by C code are interchangeable
8//! and can be freed by either side.
9
10use std::ffi::CStr;
11use std::os::raw::c_char;
12
13// region: C allocator FFI
14
15extern "C" {
16    fn calloc(count: usize, size: usize) -> *mut u8;
17    fn malloc(size: usize) -> *mut u8;
18    fn free(ptr: *mut u8);
19}
20
21// endregion
22
23// region: SEXPTYPE constants (matching GNU R)
24
25pub const NILSXP: u8 = 0;
26pub const SYMSXP: u8 = 1;
27pub const LISTSXP: u8 = 2;
28pub const ENVSXP: u8 = 4;
29pub const LANGSXP: u8 = 6;
30pub const CHARSXP: u8 = 9;
31pub const LGLSXP: u8 = 10;
32pub const INTSXP: u8 = 13;
33pub const REALSXP: u8 = 14;
34pub const CPLXSXP: u8 = 15;
35pub const STRSXP: u8 = 16;
36pub const VECSXP: u8 = 19;
37pub const RAWSXP: u8 = 24;
38
39// endregion
40
41// region: NA sentinel values
42
43/// R's NA_REAL — a specific NaN with payload 1954 (0x7FF00000000007A2).
44pub const NA_REAL: f64 = f64::from_bits(0x7FF00000000007A2);
45/// R's NA_INTEGER — i32::MIN.
46pub const NA_INTEGER: i32 = i32::MIN;
47/// R's NA_LOGICAL — same as NA_INTEGER.
48pub const NA_LOGICAL: i32 = i32::MIN;
49
50/// Check if a f64 is R's NA_REAL (not just any NaN).
51pub fn is_na_real(x: f64) -> bool {
52    x.to_bits() == NA_REAL.to_bits()
53}
54
55// endregion
56
57// region: SEXPREC struct
58
59/// A heap-allocated R value in C-compatible layout.
60///
61/// This struct is read/written by both Rust (for conversion) and C code
62/// (via the accessor macros in `Rinternals.h`). The layout must match
63/// the C struct exactly.
64#[repr(C)]
65pub struct SexpRec {
66    /// SEXPTYPE tag (REALSXP=14, INTSXP=13, etc.)
67    pub stype: u8,
68    /// Flags (currently unused — reserved for GC marks, NAMED count)
69    pub flags: u8,
70    /// Padding for alignment
71    pub padding: u16,
72    /// Vector length
73    pub length: i32,
74    /// Pointer to the data buffer (type depends on stype).
75    pub data: *mut u8,
76    /// Attributes (NULL for no attributes). Currently unused.
77    pub attrib: *mut SexpRec,
78}
79
80/// The SEXP pointer type — equivalent to C's `SEXP`.
81pub type Sexp = *mut SexpRec;
82
83/// Pairlist node data — matches `minir_pairlist_data` in Rinternals.h.
84/// Used by LISTSXP/LANGSXP nodes. Stored at `data` pointer of the SexpRec.
85#[repr(C)]
86pub struct PairlistData {
87    pub car: Sexp,
88    pub cdr: Sexp,
89    pub tag: Sexp,
90}
91
92/// Null SEXP sentinel.
93pub const R_NIL_VALUE: Sexp = std::ptr::null_mut();
94
95// endregion
96
97// region: Allocation (using C allocator for compatibility with Rinternals.h)
98
99/// Allocate a SEXPREC with a typed data buffer using the C allocator.
100pub fn alloc_vector(stype: u8, length: i32) -> Sexp {
101    unsafe {
102        let rec = calloc(1, std::mem::size_of::<SexpRec>()) as Sexp;
103        if rec.is_null() {
104            return R_NIL_VALUE;
105        }
106        (*rec).stype = stype;
107        (*rec).length = length;
108        (*rec).attrib = R_NIL_VALUE;
109
110        if length > 0 {
111            let len = length as usize;
112            (*rec).data = match stype {
113                REALSXP => calloc(len, std::mem::size_of::<f64>()),
114                INTSXP | LGLSXP => calloc(len, std::mem::size_of::<i32>()),
115                // Rcomplex = { double r, i } = 16 bytes
116                CPLXSXP => calloc(len, 2 * std::mem::size_of::<f64>()),
117                STRSXP | VECSXP => calloc(len, std::mem::size_of::<Sexp>()),
118                RAWSXP => calloc(len, 1),
119                _ => std::ptr::null_mut(),
120            };
121        }
122
123        rec
124    }
125}
126
127/// Allocate a CHARSXP from a Rust string.
128pub fn mk_char(s: &str) -> Sexp {
129    unsafe {
130        let rec = calloc(1, std::mem::size_of::<SexpRec>()) as Sexp;
131        if rec.is_null() {
132            return R_NIL_VALUE;
133        }
134        (*rec).stype = CHARSXP;
135        (*rec).length = s.len() as i32;
136
137        let buf = malloc(s.len() + 1);
138        if !buf.is_null() {
139            std::ptr::copy_nonoverlapping(s.as_ptr(), buf, s.len());
140            *buf.add(s.len()) = 0; // null terminator
141        }
142        (*rec).data = buf;
143        (*rec).attrib = R_NIL_VALUE;
144
145        rec
146    }
147}
148
149/// Allocate a scalar REALSXP.
150pub fn scalar_real(x: f64) -> Sexp {
151    let s = alloc_vector(REALSXP, 1);
152    if !s.is_null() {
153        unsafe { *((*s).data as *mut f64) = x };
154    }
155    s
156}
157
158/// Allocate a scalar INTSXP.
159pub fn scalar_integer(x: i32) -> Sexp {
160    let s = alloc_vector(INTSXP, 1);
161    if !s.is_null() {
162        unsafe { *((*s).data as *mut i32) = x };
163    }
164    s
165}
166
167/// Allocate a scalar LGLSXP.
168pub fn scalar_logical(x: i32) -> Sexp {
169    let s = alloc_vector(LGLSXP, 1);
170    if !s.is_null() {
171        unsafe { *((*s).data as *mut i32) = x };
172    }
173    s
174}
175
176/// Allocate a length-1 STRSXP from a Rust string.
177pub fn mk_string(s: &str) -> Sexp {
178    let strsxp = alloc_vector(STRSXP, 1);
179    let charsxp = mk_char(s);
180    if !strsxp.is_null() {
181        unsafe {
182            let elts = (*strsxp).data as *mut Sexp;
183            *elts = charsxp;
184        }
185    }
186    strsxp
187}
188
189/// Allocate a NILSXP.
190pub fn mk_null() -> Sexp {
191    unsafe {
192        let rec = calloc(1, std::mem::size_of::<SexpRec>()) as Sexp;
193        if rec.is_null() {
194            return R_NIL_VALUE;
195        }
196        (*rec).stype = NILSXP;
197        rec
198    }
199}
200
201// endregion
202
203// region: Accessors (Rust side)
204
205/// Read the CHARSXP data as a Rust &str.
206///
207/// # Safety
208/// `s` must be a valid CHARSXP pointer.
209pub unsafe fn char_data(s: Sexp) -> &'static str {
210    if s.is_null() || (*s).data.is_null() {
211        return "";
212    }
213    let cstr = CStr::from_ptr((*s).data as *const c_char);
214    cstr.to_str().unwrap_or("")
215}
216
217/// Read REAL data pointer.
218///
219/// # Safety
220/// `s` must be a valid REALSXP pointer.
221pub unsafe fn real_ptr(s: Sexp) -> *mut f64 {
222    (*s).data as *mut f64
223}
224
225/// Read INTEGER data pointer.
226///
227/// # Safety
228/// `s` must be a valid INTSXP pointer.
229pub unsafe fn integer_ptr(s: Sexp) -> *mut i32 {
230    (*s).data as *mut i32
231}
232
233/// Read LOGICAL data pointer.
234///
235/// # Safety
236/// `s` must be a valid LGLSXP pointer.
237pub unsafe fn logical_ptr(s: Sexp) -> *mut i32 {
238    (*s).data as *mut i32
239}
240
241/// Read STRING_ELT.
242///
243/// # Safety
244/// `s` must be a valid STRSXP pointer, `i` must be in bounds.
245pub unsafe fn string_elt(s: Sexp, i: usize) -> Sexp {
246    let elts = (*s).data as *const Sexp;
247    *elts.add(i)
248}
249
250/// Read VECTOR_ELT.
251///
252/// # Safety
253/// `s` must be a valid VECSXP pointer, `i` must be in bounds.
254pub unsafe fn vector_elt(s: Sexp, i: usize) -> Sexp {
255    let elts = (*s).data as *const Sexp;
256    *elts.add(i)
257}
258
259// endregion
260
261// region: Deallocation
262
263/// Free a SEXP and its data buffer using the C allocator.
264///
265/// # Safety
266/// `s` must have been allocated by `alloc_vector`, `mk_char`, `mk_null`,
267/// or by the C runtime in `Rinternals.h` (which also uses calloc/malloc).
268/// Must not be called twice on the same pointer.
269pub unsafe fn free_sexp(s: Sexp) {
270    if s.is_null() {
271        return;
272    }
273    let rec = &*s;
274    if !rec.data.is_null() {
275        let len = rec.length.max(0) as usize;
276        match rec.stype {
277            STRSXP => {
278                // Free each CHARSXP element first
279                let elts = rec.data as *mut Sexp;
280                for i in 0..len {
281                    let elt = *elts.add(i);
282                    if !elt.is_null() {
283                        free_sexp(elt);
284                    }
285                }
286                free(rec.data);
287            }
288            VECSXP => {
289                // Free each list element first
290                let elts = rec.data as *mut Sexp;
291                for i in 0..len {
292                    let elt = *elts.add(i);
293                    if !elt.is_null() {
294                        free_sexp(elt);
295                    }
296                }
297                free(rec.data);
298            }
299            ENVSXP => {
300                // ENVSXP data is a Box<Environment> allocated by Rust — drop it properly
301                let env_ptr = rec.data as *mut crate::interpreter::environment::Environment;
302                drop(Box::from_raw(env_ptr));
303            }
304            _ => {
305                // REALSXP, INTSXP, LGLSXP, RAWSXP, CHARSXP — simple data buffer
306                free(rec.data);
307            }
308        }
309    }
310    // Free the SexpRec itself
311    free(s as *mut u8);
312}
313
314// endregion