Skip to main content

miniextendr_api/
altrep_sexp.rs

1//! `AltrepSexp` — a `!Send + !Sync` wrapper for ALTREP vectors.
2//!
3//! R uses ALTREP (Alternative Representations) for common idioms like `1:N`,
4//! `seq_len(N)`, and `as.character(1:N)`. These vectors are lazily materialized:
5//! calling `DATAPTR_RO` triggers allocation, GC, and C callbacks inside R's
6//! runtime. This must only happen on the R main thread.
7//!
8//! This module provides two complementary tools:
9//!
10//! - **[`AltrepSexp`]** — a `!Send + !Sync` wrapper that holds an ALTREP SEXP
11//!   and prevents it from crossing thread boundaries at compile time.
12//! - **[`ensure_materialized`]** — a function that forces materialization if
13//!   the SEXP is ALTREP, returning a SEXP with a stable data pointer.
14//!
15//! Plain (non-ALTREP) SEXPs are `Send + Sync` and are unaffected by either.
16//!
17//! # How ALTREP flows through miniextendr
18//!
19//! | Parameter type | ALTREP handling |
20//! |---|---|
21//! | Typed (`Vec<i32>`, `&[f64]`) | Auto-materialized via `DATAPTR_RO` in `TryFromSexp` |
22//! | `SEXP` | Auto-materialized via [`ensure_materialized`] in `TryFromSexp` |
23//! | [`AltrepSexp`] | Wrapped without materializing. `!Send + !Sync`. |
24//! | `extern "C-unwind"` raw SEXP | No conversion — receives raw SEXP as-is |
25//!
26//! # Usage
27//!
28//! ```ignore
29//! use miniextendr_api::AltrepSexp;
30//!
31//! // As a #[miniextendr] parameter — accepts only ALTREP vectors:
32//! #[miniextendr]
33//! pub fn altrep_length(x: AltrepSexp) -> usize {
34//!     x.len()
35//! }
36//!
37//! // Manual wrapping:
38//! if let Some(altrep) = AltrepSexp::try_wrap(sexp) {
39//!     // Must materialize on R main thread before accessing data
40//!     let materialized: SEXP = unsafe { altrep.materialize() };
41//! }
42//!
43//! // Or use the convenience helper on any SEXP:
44//! let safe_sexp = unsafe { ensure_materialized(sexp) };
45//! ```
46//!
47//! See also: `docs/ALTREP_SEXP.md` for the full guide on receiving ALTREP
48//! vectors from R.
49
50use crate::ffi::{self, Rcomplex, SEXP, SEXPTYPE, SexpExt};
51use crate::from_r::r_slice;
52use std::marker::PhantomData;
53use std::rc::Rc;
54
55/// A SEXP known to be ALTREP. `!Send + !Sync` — must be materialized on the
56/// R main thread before data can be accessed or sent to other threads.
57///
58/// This type prevents ALTREP vectors from being accidentally sent to rayon
59/// or other worker threads where `DATAPTR_RO` would invoke R internals
60/// (undefined behavior).
61///
62/// # As a `#[miniextendr]` parameter
63///
64/// `AltrepSexp` implements [`TryFromSexp`](crate::from_r::TryFromSexp), so it
65/// can be used directly as a function parameter. It **only accepts ALTREP
66/// vectors** — non-ALTREP input produces an error.
67///
68/// ```ignore
69/// #[miniextendr]
70/// pub fn altrep_info(x: AltrepSexp) -> String {
71///     format!("{:?}, len={}", x.sexptype(), x.len())
72/// }
73/// ```
74///
75/// ```r
76/// altrep_info(1:10)          # OK — 1:10 is ALTREP
77/// altrep_info(c(1L, 2L, 3L)) # Error: "expected an ALTREP vector"
78/// ```
79///
80/// # Construction
81///
82/// - [`AltrepSexp::try_wrap`] — runtime check, returns `None` if not ALTREP
83/// - [`AltrepSexp::from_raw`] — unsafe, caller asserts `ALTREP(sexp) != 0`
84///
85/// # Materialization
86///
87/// All materialization methods must be called on the R main thread.
88///
89/// - [`AltrepSexp::materialize`] — forces R to materialize, returns plain SEXP
90/// - [`AltrepSexp::materialize_integer`] — materialize INTSXP and return `&[i32]`
91/// - [`AltrepSexp::materialize_real`] — materialize REALSXP and return `&[f64]`
92/// - [`AltrepSexp::materialize_logical`] — materialize LGLSXP and return `&[i32]`
93/// - [`AltrepSexp::materialize_raw`] — materialize RAWSXP and return `&[u8]`
94/// - [`AltrepSexp::materialize_complex`] — materialize CPLXSXP and return `&[Rcomplex]`
95/// - [`AltrepSexp::materialize_strings`] — materialize STRSXP to `Vec<Option<String>>`
96///
97/// # Thread safety
98///
99/// `AltrepSexp` is `!Send + !Sync` (via `PhantomData<Rc<()>>`). This is a
100/// compile-time guarantee: you cannot send an un-materialized ALTREP vector
101/// to another thread. Call one of the `materialize_*` methods first to get
102/// a `Send + Sync` slice or SEXP.
103pub struct AltrepSexp {
104    sexp: SEXP,
105    /// PhantomData<Rc<()>> makes this type !Send + !Sync.
106    _not_send: PhantomData<Rc<()>>,
107}
108
109impl AltrepSexp {
110    /// Wrap a SEXP that is known to be ALTREP.
111    ///
112    /// # Safety
113    ///
114    /// Caller must ensure `ALTREP(sexp)` is true (non-zero).
115    #[inline]
116    pub unsafe fn from_raw(sexp: SEXP) -> Self {
117        debug_assert!(sexp.is_altrep());
118        Self {
119            sexp,
120            _not_send: PhantomData,
121        }
122    }
123
124    /// Check a SEXP and wrap if ALTREP. Returns `None` if not ALTREP.
125    #[inline]
126    pub fn try_wrap(sexp: SEXP) -> Option<Self> {
127        if sexp.is_altrep() {
128            Some(Self {
129                sexp,
130                _not_send: PhantomData,
131            })
132        } else {
133            None
134        }
135    }
136
137    /// Force materialization and return the (now materialized) SEXP.
138    ///
139    /// For contiguous types (INTSXP, REALSXP, LGLSXP, RAWSXP, CPLXSXP),
140    /// calls `DATAPTR_RO` to trigger ALTREP materialization.
141    /// For STRSXP, iterates `STRING_ELT` to force element materialization.
142    ///
143    /// After this call, the SEXP's data pointer is stable and can be safely
144    /// accessed from any thread (the SEXP itself is still `Send + Sync`).
145    ///
146    /// # Safety
147    ///
148    /// Must be called on the R main thread.
149    pub unsafe fn materialize(self) -> SEXP {
150        let typ = self.sexp.type_of();
151        match typ {
152            SEXPTYPE::STRSXP => {
153                let n = unsafe { ffi::Rf_xlength(self.sexp) };
154                for i in 0..n {
155                    let _ = self.sexp.string_elt(i);
156                }
157            }
158            SEXPTYPE::INTSXP
159            | SEXPTYPE::REALSXP
160            | SEXPTYPE::LGLSXP
161            | SEXPTYPE::RAWSXP
162            | SEXPTYPE::CPLXSXP => {
163                let _ = unsafe { ffi::DATAPTR_RO(self.sexp) };
164            }
165            _ => {} // non-vector types, nothing to materialize
166        }
167        self.sexp
168    }
169
170    /// Materialize and return a typed slice of `f64` (REALSXP).
171    ///
172    /// # Safety
173    ///
174    /// Must be called on the R main thread. The SEXP must be REALSXP.
175    pub unsafe fn materialize_real(&self) -> &[f64] {
176        let ptr = unsafe { ffi::DATAPTR_RO(self.sexp) } as *const f64;
177        let len = unsafe { ffi::Rf_xlength(self.sexp) } as usize;
178        unsafe { r_slice(ptr, len) }
179    }
180
181    /// Materialize and return a typed slice of `i32` (INTSXP).
182    ///
183    /// # Safety
184    ///
185    /// Must be called on the R main thread. The SEXP must be INTSXP.
186    pub unsafe fn materialize_integer(&self) -> &[i32] {
187        let ptr = unsafe { ffi::DATAPTR_RO(self.sexp) } as *const i32;
188        let len = unsafe { ffi::Rf_xlength(self.sexp) } as usize;
189        unsafe { r_slice(ptr, len) }
190    }
191
192    /// Materialize and return a typed slice of `i32` (LGLSXP, R's internal logical storage).
193    ///
194    /// # Safety
195    ///
196    /// Must be called on the R main thread. The SEXP must be LGLSXP.
197    pub unsafe fn materialize_logical(&self) -> &[i32] {
198        let ptr = unsafe { ffi::DATAPTR_RO(self.sexp) } as *const i32;
199        let len = unsafe { ffi::Rf_xlength(self.sexp) } as usize;
200        unsafe { r_slice(ptr, len) }
201    }
202
203    /// Materialize and return a typed slice of `u8` (RAWSXP).
204    ///
205    /// # Safety
206    ///
207    /// Must be called on the R main thread. The SEXP must be RAWSXP.
208    pub unsafe fn materialize_raw(&self) -> &[u8] {
209        let ptr = unsafe { ffi::DATAPTR_RO(self.sexp) } as *const u8;
210        let len = unsafe { ffi::Rf_xlength(self.sexp) } as usize;
211        unsafe { r_slice(ptr, len) }
212    }
213
214    /// Materialize and return a typed slice of `Rcomplex` (CPLXSXP).
215    ///
216    /// # Safety
217    ///
218    /// Must be called on the R main thread. The SEXP must be CPLXSXP.
219    pub unsafe fn materialize_complex(&self) -> &[Rcomplex] {
220        let ptr = unsafe { ffi::DATAPTR_RO(self.sexp) } as *const Rcomplex;
221        let len = unsafe { ffi::Rf_xlength(self.sexp) } as usize;
222        unsafe { r_slice(ptr, len) }
223    }
224
225    /// Materialize strings into owned Rust data.
226    ///
227    /// Each element is `None` for `NA_character_`, or `Some(String)` otherwise.
228    ///
229    /// # Safety
230    ///
231    /// Must be called on the R main thread. The SEXP must be STRSXP.
232    pub unsafe fn materialize_strings(&self) -> Vec<Option<String>> {
233        let n = unsafe { ffi::Rf_xlength(self.sexp) } as usize;
234        let mut out = Vec::with_capacity(n);
235        for i in 0..n {
236            let elt = self.sexp.string_elt(i as ffi::R_xlen_t);
237            if elt == SEXP::na_string() {
238                out.push(None);
239            } else {
240                let cstr = unsafe { ffi::Rf_translateCharUTF8(elt) };
241                let s = unsafe { std::ffi::CStr::from_ptr(cstr) }
242                    .to_string_lossy()
243                    .into_owned();
244                out.push(Some(s));
245            }
246        }
247        out
248    }
249
250    /// Get the inner SEXP without materializing.
251    ///
252    /// # Safety
253    ///
254    /// The returned SEXP is still ALTREP. Do not call `DATAPTR_RO` on it
255    /// from a non-R thread.
256    #[inline]
257    pub unsafe fn as_raw(&self) -> SEXP {
258        self.sexp
259    }
260
261    /// Get the SEXPTYPE of the underlying vector.
262    #[inline]
263    pub fn sexptype(&self) -> SEXPTYPE {
264        self.sexp.type_of()
265    }
266
267    /// Get the length of the underlying vector.
268    #[inline]
269    pub fn len(&self) -> usize {
270        (unsafe { ffi::Rf_xlength(self.sexp) }) as usize
271    }
272
273    /// Check if the underlying vector is empty.
274    #[inline]
275    pub fn is_empty(&self) -> bool {
276        self.len() == 0
277    }
278}
279
280/// Conversion from R SEXP to `AltrepSexp`.
281///
282/// Only succeeds if the input is an ALTREP vector (`ALTREP(sexp) != 0`).
283/// Non-ALTREP input produces `SexpError::InvalidValue`.
284///
285/// This is the inverse of [`TryFromSexp for SEXP`](crate::from_r::TryFromSexp),
286/// which accepts any SEXP but auto-materializes ALTREP.
287impl crate::from_r::TryFromSexp for AltrepSexp {
288    type Error = crate::from_r::SexpError;
289
290    #[inline]
291    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
292        AltrepSexp::try_wrap(sexp).ok_or_else(|| {
293            crate::from_r::SexpError::InvalidValue(
294                "expected an ALTREP vector but got a non-ALTREP SEXP".to_string(),
295            )
296        })
297    }
298
299    #[inline]
300    unsafe fn try_from_sexp_unchecked(sexp: SEXP) -> Result<Self, Self::Error> {
301        Self::try_from_sexp(sexp)
302    }
303}
304
305impl std::fmt::Debug for AltrepSexp {
306    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
307        f.debug_struct("AltrepSexp")
308            .field("sexptype", &self.sexptype())
309            .field("len", &self.len())
310            .finish()
311    }
312}
313
314/// If `sexp` is ALTREP, force materialization and return the SEXP.
315/// If not ALTREP, return as-is (no-op).
316///
317/// This is the main entry point for ensuring a SEXP is safe to access
318/// from non-R threads. After materialization, the data pointer is stable
319/// and the SEXP can be freely sent across threads.
320///
321/// Called automatically by `TryFromSexp for SEXP` — you only need to call
322/// this directly in `extern "C-unwind"` functions that receive raw SEXPs.
323///
324/// For contiguous types (INTSXP, REALSXP, LGLSXP, RAWSXP, CPLXSXP),
325/// calls `DATAPTR_RO` to trigger materialization. For STRSXP, iterates
326/// `STRING_ELT` to force each element to materialize.
327///
328/// # Safety
329///
330/// Must be called on the R main thread (materialization invokes R internals).
331#[inline]
332pub unsafe fn ensure_materialized(sexp: SEXP) -> SEXP {
333    if sexp.is_altrep() {
334        unsafe { AltrepSexp::from_raw(sexp).materialize() }
335    } else {
336        sexp
337    }
338}
339
340// Compile-time assertions: SEXP must remain Send + Sync.
341const _: () = {
342    fn _assert_send<T: Send>() {}
343    fn _assert_sync<T: Sync>() {}
344
345    fn _sexp_is_send_sync() {
346        _assert_send::<SEXP>();
347        _assert_sync::<SEXP>();
348    }
349};
350
351#[cfg(test)]
352mod tests {
353    use super::*;
354
355    /// Verify AltrepSexp is !Send and !Sync at compile time.
356    /// SEXP IS Send + Sync.
357    fn _assert_send_sync_properties() {
358        fn requires_send<T: Send>() {}
359        fn requires_sync<T: Sync>() {}
360
361        // These must NOT compile — uncomment to verify:
362        // requires_send::<AltrepSexp>();
363        // requires_sync::<AltrepSexp>();
364
365        // SEXP IS Send + Sync:
366        requires_send::<SEXP>();
367        requires_sync::<SEXP>();
368    }
369}