Skip to main content

miniextendr_api/altrep_data/iter/
state.rs

1//! Core iterator-backed ALTREP infrastructure.
2//!
3//! Provides `IterState<I, T>` (the shared lazy-caching state machine) and
4//! wrapper types for each ALTREP family: `IterIntData`, `IterRealData`,
5//! `IterLogicalData`, `IterRawData`, `IterStringData`, `IterListData`,
6//! `IterComplexData`.
7
8use std::cell::RefCell;
9use std::sync::OnceLock;
10
11use crate::altrep_data::{
12    AltIntegerData, AltLogicalData, AltRawData, AltRealData, AltrepLen, InferBase, Logical,
13    fill_region,
14};
15
16/// Core state for iterator-backed ALTREP vectors.
17///
18/// Provides lazy element generation with caching for random-access semantics.
19/// Iterator elements are cached as they're accessed, enabling repeatable reads.
20///
21/// # Type Parameters
22///
23/// - `I`: The iterator type (must be `ExactSizeIterator` or provide explicit length)
24/// - `T`: The element type produced by the iterator
25///
26/// # Design
27///
28/// - **Lazy:** Elements generated on-demand via `elt(i)`
29/// - **Cached:** Once generated, elements stored in cache for repeat access
30/// - **Materializable:** Can be fully materialized for `Dataptr` or serialization
31/// - **Safe:** Uses `RefCell` for interior mutability, protected by R's GC
32pub struct IterState<I, T> {
33    /// Vector length (from `ExactSizeIterator::len()` or explicit)
34    len: usize,
35    /// Iterator state (consumed as we advance)
36    iter: RefCell<Option<I>>,
37    /// Cache of generated elements (prefix of the vector)
38    cache: RefCell<Vec<T>>,
39    /// Full materialization (when all elements have been generated)
40    materialized: OnceLock<Vec<T>>,
41}
42
43impl<I, T> IterState<I, T>
44where
45    I: Iterator<Item = T>,
46{
47    /// Create a new iterator state with an explicit length.
48    ///
49    /// # Arguments
50    ///
51    /// - `iter`: The iterator to wrap
52    /// - `len`: The expected number of elements
53    ///
54    /// # Length Mismatch
55    ///
56    /// If the iterator produces a different number of elements than `len`:
57    /// - Fewer elements: Missing indices return `None`/NA/default values
58    /// - More elements: Extra elements are ignored (truncated to `len`)
59    ///
60    /// A warning is printed to stderr when a mismatch is detected.
61    pub fn new(iter: I, len: usize) -> Self {
62        Self {
63            len,
64            iter: RefCell::new(Some(iter)),
65            cache: RefCell::new(Vec::with_capacity(len.min(1024))),
66            materialized: OnceLock::new(),
67        }
68    }
69
70    /// Ensure the element at index `i` is in the cache and return it by value.
71    ///
72    /// Advances the iterator as needed. Only works for `Copy` types.
73    ///
74    /// # Returns
75    ///
76    /// - `Some(T)` if element exists and has been generated
77    /// - `None` if index is out of bounds or iterator exhausted before reaching index `i`
78    pub fn get_element(&self, i: usize) -> Option<T>
79    where
80        T: Copy,
81    {
82        // Check bounds
83        if i >= self.len {
84            return None;
85        }
86
87        // If fully materialized, return from materialized vec
88        if let Some(vec) = self.materialized.get() {
89            return vec.get(i).copied();
90        }
91
92        // Otherwise, check cache and advance iterator if needed
93        let mut cache = self.cache.borrow_mut();
94
95        // Already in cache?
96        if i < cache.len() {
97            return Some(cache[i]);
98        }
99
100        // Need to advance iterator to index i
101        let mut iter_opt = self.iter.borrow_mut();
102        {
103            let iter = iter_opt.as_mut()?;
104
105            // Fill cache up to and including index i
106            while cache.len() <= i {
107                if let Some(elem) = iter.next() {
108                    cache.push(elem);
109                } else {
110                    // Iterator exhausted before reaching expected length
111                    return None;
112                }
113            }
114        }
115
116        let value = cache[i];
117
118        // If we've generated the full vector via random-access, promote the cache
119        // to the materialized storage so `as_slice()` can expose it.
120        if cache.len() == self.len {
121            iter_opt.take();
122
123            let vec = std::mem::take(&mut *cache);
124            drop(cache);
125            drop(iter_opt);
126
127            let _ = self.materialized.set(vec);
128        }
129
130        Some(value)
131    }
132
133    /// Materialize all remaining elements from the iterator.
134    ///
135    /// After this call, all elements are guaranteed to be in memory and
136    /// `as_materialized()` will return `Some`.
137    ///
138    /// # Length Mismatch Handling
139    ///
140    /// If the iterator produces fewer elements than declared `len`, the missing
141    /// elements are left uninitialized in the cache (callers should handle this
142    /// via bounds checking). If the iterator produces more elements than declared,
143    /// extra elements are silently ignored (truncated to `len`).
144    ///
145    /// A warning is printed to stderr if a length mismatch is detected.
146    pub fn materialize_all(&self) -> &[T] {
147        // Already materialized?
148        if let Some(vec) = self.materialized.get() {
149            return vec;
150        }
151
152        // Consume iterator and move cache to materialized storage
153        let mut cache = self.cache.borrow_mut();
154        let mut iter_opt = self.iter.borrow_mut();
155
156        if let Some(iter) = iter_opt.take() {
157            // Drain remaining elements (up to self.len to avoid memory issues)
158            for elem in iter {
159                if cache.len() >= self.len {
160                    // Iterator produced more than expected - truncate and warn
161                    eprintln!(
162                        "[miniextendr warning] iterator ALTREP: iterator produced more elements than declared length ({}), truncating",
163                        self.len
164                    );
165                    break;
166                }
167                cache.push(elem);
168            }
169
170            // Check if iterator exhausted early
171            if cache.len() < self.len {
172                eprintln!(
173                    "[miniextendr warning] iterator ALTREP: iterator produced {} elements, expected {} - accessing missing indices will return NA/default",
174                    cache.len(),
175                    self.len
176                );
177            }
178        }
179
180        // Move cache to materialized (take ownership)
181        let vec = std::mem::take(&mut *cache);
182        drop(cache);
183        drop(iter_opt);
184
185        // Store in OnceLock and return reference
186        self.materialized.get_or_init(|| vec)
187    }
188
189    /// Get the materialized vector if all elements have been generated.
190    ///
191    /// Returns `None` if not yet fully materialized.
192    pub fn as_materialized(&self) -> Option<&[T]> {
193        self.materialized.get().map(|v| v.as_slice())
194    }
195
196    /// Get the current length.
197    pub fn len(&self) -> usize {
198        self.len
199    }
200
201    /// Check if the vector is empty.
202    pub fn is_empty(&self) -> bool {
203        self.len == 0
204    }
205}
206
207impl<I, T> IterState<I, T>
208where
209    I: ExactSizeIterator<Item = T>,
210{
211    /// Create a new iterator state from an `ExactSizeIterator`.
212    ///
213    /// The length is automatically determined from `iter.len()`.
214    pub fn from_exact_size(iter: I) -> Self {
215        let len = iter.len();
216        Self::new(iter, len)
217    }
218}
219
220/// Iterator-backed integer vector data.
221///
222/// Wraps an iterator producing `i32` values and exposes it as an ALTREP integer vector.
223///
224/// # Example
225///
226/// ```ignore
227/// use miniextendr_api::altrep_data::IterIntData;
228///
229/// // Create from an iterator
230/// let data = IterIntData::from_iter((1..=10).map(|x| x * 2), 10);
231/// ```
232pub struct IterIntData<I: Iterator<Item = i32>> {
233    state: IterState<I, i32>,
234}
235
236impl<I: Iterator<Item = i32>> IterIntData<I> {
237    /// Create from an iterator with explicit length.
238    pub fn from_iter(iter: I, len: usize) -> Self {
239        Self {
240            state: IterState::new(iter, len),
241        }
242    }
243}
244
245impl<I: ExactSizeIterator<Item = i32>> IterIntData<I> {
246    /// Create from an ExactSizeIterator (length auto-detected).
247    pub fn from_exact_iter(iter: I) -> Self {
248        Self {
249            state: IterState::from_exact_size(iter),
250        }
251    }
252}
253
254impl<I: Iterator<Item = i32>> AltrepLen for IterIntData<I> {
255    fn len(&self) -> usize {
256        self.state.len()
257    }
258}
259
260impl<I: Iterator<Item = i32>> AltIntegerData for IterIntData<I> {
261    fn elt(&self, i: usize) -> i32 {
262        self.state
263            .get_element(i)
264            .unwrap_or(crate::altrep_traits::NA_INTEGER)
265    }
266
267    fn as_slice(&self) -> Option<&[i32]> {
268        self.state.as_materialized()
269    }
270
271    fn get_region(&self, start: usize, len: usize, buf: &mut [i32]) -> usize {
272        fill_region(start, len, self.len(), buf, |idx| self.elt(idx))
273    }
274}
275
276impl<I: Iterator<Item = i32> + 'static> crate::externalptr::TypedExternal for IterIntData<I> {
277    const TYPE_NAME: &'static str = "IterIntData";
278    const TYPE_NAME_CSTR: &'static [u8] = b"IterIntData\0";
279    const TYPE_ID_CSTR: &'static [u8] = b"miniextendr_api::altrep::IterIntData\0";
280}
281
282impl<I: Iterator<Item = i32> + 'static> InferBase for IterIntData<I> {
283    const BASE: crate::altrep::RBase = crate::altrep::RBase::Int;
284
285    unsafe fn make_class(
286        class_name: *const i8,
287        pkg_name: *const i8,
288    ) -> crate::ffi::altrep::R_altrep_class_t {
289        unsafe {
290            crate::ffi::altrep::R_make_altinteger_class(class_name, pkg_name, core::ptr::null_mut())
291        }
292    }
293
294    unsafe fn install_methods(cls: crate::ffi::altrep::R_altrep_class_t) {
295        unsafe { crate::altrep_bridge::install_base::<Self>(cls) };
296        unsafe { crate::altrep_bridge::install_vec::<Self>(cls) };
297        unsafe { crate::altrep_bridge::install_int::<Self>(cls) };
298    }
299}
300
301impl<I: Iterator<Item = i32> + 'static> crate::altrep_traits::Altrep for IterIntData<I> {
302    fn length(x: crate::ffi::SEXP) -> crate::ffi::R_xlen_t {
303        let data = unsafe { <Self as crate::altrep_data::AltrepExtract>::altrep_extract_ref(x) };
304        data.len() as crate::ffi::R_xlen_t
305    }
306}
307
308impl<I: Iterator<Item = i32> + 'static> crate::altrep_traits::AltVec for IterIntData<I> {}
309
310impl<I: Iterator<Item = i32> + 'static> crate::altrep_traits::AltInteger for IterIntData<I> {
311    const HAS_ELT: bool = true;
312
313    fn elt(x: crate::ffi::SEXP, i: crate::ffi::R_xlen_t) -> i32 {
314        let data = unsafe { <Self as crate::altrep_data::AltrepExtract>::altrep_extract_ref(x) };
315        AltIntegerData::elt(data, i as usize)
316    }
317
318    const HAS_GET_REGION: bool = true;
319
320    fn get_region(
321        x: crate::ffi::SEXP,
322        start: crate::ffi::R_xlen_t,
323        len: crate::ffi::R_xlen_t,
324        buf: &mut [i32],
325    ) -> crate::ffi::R_xlen_t {
326        let data = unsafe { <Self as crate::altrep_data::AltrepExtract>::altrep_extract_ref(x) };
327        AltIntegerData::get_region(data, start as usize, len as usize, buf) as crate::ffi::R_xlen_t
328    }
329}
330
331/// Iterator-backed real (f64) vector data.
332///
333/// Wraps an iterator producing `f64` values and exposes it as an ALTREP real vector.
334pub struct IterRealData<I: Iterator<Item = f64>> {
335    state: IterState<I, f64>,
336}
337
338impl<I: Iterator<Item = f64>> IterRealData<I> {
339    /// Create from an iterator with explicit length.
340    pub fn from_iter(iter: I, len: usize) -> Self {
341        Self {
342            state: IterState::new(iter, len),
343        }
344    }
345}
346
347impl<I: ExactSizeIterator<Item = f64>> IterRealData<I> {
348    /// Create from an ExactSizeIterator (length auto-detected).
349    pub fn from_exact_iter(iter: I) -> Self {
350        Self {
351            state: IterState::from_exact_size(iter),
352        }
353    }
354}
355
356impl<I: Iterator<Item = f64>> AltrepLen for IterRealData<I> {
357    fn len(&self) -> usize {
358        self.state.len()
359    }
360}
361
362impl<I: Iterator<Item = f64>> AltRealData for IterRealData<I> {
363    fn elt(&self, i: usize) -> f64 {
364        self.state.get_element(i).unwrap_or(f64::NAN)
365    }
366
367    fn as_slice(&self) -> Option<&[f64]> {
368        self.state.as_materialized()
369    }
370
371    fn get_region(&self, start: usize, len: usize, buf: &mut [f64]) -> usize {
372        fill_region(start, len, self.len(), buf, |idx| self.elt(idx))
373    }
374}
375
376impl<I: Iterator<Item = f64> + 'static> crate::externalptr::TypedExternal for IterRealData<I> {
377    const TYPE_NAME: &'static str = "IterRealData";
378    const TYPE_NAME_CSTR: &'static [u8] = b"IterRealData\0";
379    const TYPE_ID_CSTR: &'static [u8] = b"miniextendr_api::altrep::IterRealData\0";
380}
381
382impl<I: Iterator<Item = f64> + 'static> InferBase for IterRealData<I> {
383    const BASE: crate::altrep::RBase = crate::altrep::RBase::Real;
384
385    unsafe fn make_class(
386        class_name: *const i8,
387        pkg_name: *const i8,
388    ) -> crate::ffi::altrep::R_altrep_class_t {
389        unsafe {
390            crate::ffi::altrep::R_make_altreal_class(class_name, pkg_name, core::ptr::null_mut())
391        }
392    }
393
394    unsafe fn install_methods(cls: crate::ffi::altrep::R_altrep_class_t) {
395        unsafe { crate::altrep_bridge::install_base::<Self>(cls) };
396        unsafe { crate::altrep_bridge::install_vec::<Self>(cls) };
397        unsafe { crate::altrep_bridge::install_real::<Self>(cls) };
398    }
399}
400
401impl<I: Iterator<Item = f64> + 'static> crate::altrep_traits::Altrep for IterRealData<I> {
402    fn length(x: crate::ffi::SEXP) -> crate::ffi::R_xlen_t {
403        let data = unsafe { <Self as crate::altrep_data::AltrepExtract>::altrep_extract_ref(x) };
404        data.len() as crate::ffi::R_xlen_t
405    }
406}
407
408impl<I: Iterator<Item = f64> + 'static> crate::altrep_traits::AltVec for IterRealData<I> {}
409
410impl<I: Iterator<Item = f64> + 'static> crate::altrep_traits::AltReal for IterRealData<I> {
411    const HAS_ELT: bool = true;
412
413    fn elt(x: crate::ffi::SEXP, i: crate::ffi::R_xlen_t) -> f64 {
414        let data = unsafe { <Self as crate::altrep_data::AltrepExtract>::altrep_extract_ref(x) };
415        AltRealData::elt(data, i as usize)
416    }
417
418    const HAS_GET_REGION: bool = true;
419
420    fn get_region(
421        x: crate::ffi::SEXP,
422        start: crate::ffi::R_xlen_t,
423        len: crate::ffi::R_xlen_t,
424        buf: &mut [f64],
425    ) -> crate::ffi::R_xlen_t {
426        let data = unsafe { <Self as crate::altrep_data::AltrepExtract>::altrep_extract_ref(x) };
427        AltRealData::get_region(data, start as usize, len as usize, buf) as crate::ffi::R_xlen_t
428    }
429}
430
431/// Iterator-backed logical vector data.
432///
433/// Wraps an iterator producing `bool` values and exposes it as an ALTREP logical vector.
434pub struct IterLogicalData<I: Iterator<Item = bool>> {
435    state: IterState<I, bool>,
436}
437
438impl<I: Iterator<Item = bool>> IterLogicalData<I> {
439    /// Create from an iterator with explicit length.
440    pub fn from_iter(iter: I, len: usize) -> Self {
441        Self {
442            state: IterState::new(iter, len),
443        }
444    }
445}
446
447impl<I: ExactSizeIterator<Item = bool>> IterLogicalData<I> {
448    /// Create from an ExactSizeIterator (length auto-detected).
449    pub fn from_exact_iter(iter: I) -> Self {
450        Self {
451            state: IterState::from_exact_size(iter),
452        }
453    }
454}
455
456impl<I: Iterator<Item = bool>> AltrepLen for IterLogicalData<I> {
457    fn len(&self) -> usize {
458        self.state.len()
459    }
460}
461
462impl<I: Iterator<Item = bool>> AltLogicalData for IterLogicalData<I> {
463    fn elt(&self, i: usize) -> Logical {
464        self.state
465            .get_element(i)
466            .map(Logical::from_bool)
467            .unwrap_or(Logical::Na)
468    }
469
470    fn get_region(&self, start: usize, len: usize, buf: &mut [i32]) -> usize {
471        fill_region(start, len, self.len(), buf, |idx| self.elt(idx).to_r_int())
472    }
473}
474
475impl<I: Iterator<Item = bool> + 'static> crate::externalptr::TypedExternal for IterLogicalData<I> {
476    const TYPE_NAME: &'static str = "IterLogicalData";
477    const TYPE_NAME_CSTR: &'static [u8] = b"IterLogicalData\0";
478    const TYPE_ID_CSTR: &'static [u8] = b"miniextendr_api::altrep::IterLogicalData\0";
479}
480
481impl<I: Iterator<Item = bool> + 'static> InferBase for IterLogicalData<I> {
482    const BASE: crate::altrep::RBase = crate::altrep::RBase::Logical;
483
484    unsafe fn make_class(
485        class_name: *const i8,
486        pkg_name: *const i8,
487    ) -> crate::ffi::altrep::R_altrep_class_t {
488        unsafe {
489            crate::ffi::altrep::R_make_altlogical_class(class_name, pkg_name, core::ptr::null_mut())
490        }
491    }
492
493    unsafe fn install_methods(cls: crate::ffi::altrep::R_altrep_class_t) {
494        unsafe { crate::altrep_bridge::install_base::<Self>(cls) };
495        unsafe { crate::altrep_bridge::install_vec::<Self>(cls) };
496        unsafe { crate::altrep_bridge::install_lgl::<Self>(cls) };
497    }
498}
499
500impl<I: Iterator<Item = bool> + 'static> crate::altrep_traits::Altrep for IterLogicalData<I> {
501    fn length(x: crate::ffi::SEXP) -> crate::ffi::R_xlen_t {
502        let data = unsafe { <Self as crate::altrep_data::AltrepExtract>::altrep_extract_ref(x) };
503        data.len() as crate::ffi::R_xlen_t
504    }
505}
506
507impl<I: Iterator<Item = bool> + 'static> crate::altrep_traits::AltVec for IterLogicalData<I> {}
508
509impl<I: Iterator<Item = bool> + 'static> crate::altrep_traits::AltLogical for IterLogicalData<I> {
510    const HAS_ELT: bool = true;
511
512    fn elt(x: crate::ffi::SEXP, i: crate::ffi::R_xlen_t) -> i32 {
513        let data = unsafe { <Self as crate::altrep_data::AltrepExtract>::altrep_extract_ref(x) };
514        AltLogicalData::elt(data, i as usize).to_r_int()
515    }
516
517    const HAS_GET_REGION: bool = true;
518
519    fn get_region(
520        x: crate::ffi::SEXP,
521        start: crate::ffi::R_xlen_t,
522        len: crate::ffi::R_xlen_t,
523        buf: &mut [i32],
524    ) -> crate::ffi::R_xlen_t {
525        let data = unsafe { <Self as crate::altrep_data::AltrepExtract>::altrep_extract_ref(x) };
526        AltLogicalData::get_region(data, start as usize, len as usize, buf) as crate::ffi::R_xlen_t
527    }
528}
529
530/// Iterator-backed raw (u8) vector data.
531///
532/// Wraps an iterator producing `u8` values and exposes it as an ALTREP raw vector.
533pub struct IterRawData<I: Iterator<Item = u8>> {
534    state: IterState<I, u8>,
535}
536
537impl<I: Iterator<Item = u8>> IterRawData<I> {
538    /// Create from an iterator with explicit length.
539    pub fn from_iter(iter: I, len: usize) -> Self {
540        Self {
541            state: IterState::new(iter, len),
542        }
543    }
544}
545
546impl<I: ExactSizeIterator<Item = u8>> IterRawData<I> {
547    /// Create from an ExactSizeIterator (length auto-detected).
548    pub fn from_exact_iter(iter: I) -> Self {
549        Self {
550            state: IterState::from_exact_size(iter),
551        }
552    }
553}
554
555impl<I: Iterator<Item = u8>> AltrepLen for IterRawData<I> {
556    fn len(&self) -> usize {
557        self.state.len()
558    }
559}
560
561impl<I: Iterator<Item = u8>> AltRawData for IterRawData<I> {
562    fn elt(&self, i: usize) -> u8 {
563        self.state.get_element(i).unwrap_or(0)
564    }
565
566    fn as_slice(&self) -> Option<&[u8]> {
567        self.state.as_materialized()
568    }
569
570    fn get_region(&self, start: usize, len: usize, buf: &mut [u8]) -> usize {
571        fill_region(start, len, self.len(), buf, |idx| self.elt(idx))
572    }
573}
574
575impl<I: Iterator<Item = u8> + 'static> crate::externalptr::TypedExternal for IterRawData<I> {
576    const TYPE_NAME: &'static str = "IterRawData";
577    const TYPE_NAME_CSTR: &'static [u8] = b"IterRawData\0";
578    const TYPE_ID_CSTR: &'static [u8] = b"miniextendr_api::altrep::IterRawData\0";
579}
580
581impl<I: Iterator<Item = u8> + 'static> InferBase for IterRawData<I> {
582    const BASE: crate::altrep::RBase = crate::altrep::RBase::Raw;
583
584    unsafe fn make_class(
585        class_name: *const i8,
586        pkg_name: *const i8,
587    ) -> crate::ffi::altrep::R_altrep_class_t {
588        unsafe {
589            crate::ffi::altrep::R_make_altraw_class(class_name, pkg_name, core::ptr::null_mut())
590        }
591    }
592
593    unsafe fn install_methods(cls: crate::ffi::altrep::R_altrep_class_t) {
594        unsafe { crate::altrep_bridge::install_base::<Self>(cls) };
595        unsafe { crate::altrep_bridge::install_vec::<Self>(cls) };
596        unsafe { crate::altrep_bridge::install_raw::<Self>(cls) };
597    }
598}
599
600impl<I: Iterator<Item = u8> + 'static> crate::altrep_traits::Altrep for IterRawData<I> {
601    fn length(x: crate::ffi::SEXP) -> crate::ffi::R_xlen_t {
602        let data = unsafe { <Self as crate::altrep_data::AltrepExtract>::altrep_extract_ref(x) };
603        data.len() as crate::ffi::R_xlen_t
604    }
605}
606
607impl<I: Iterator<Item = u8> + 'static> crate::altrep_traits::AltVec for IterRawData<I> {}
608
609impl<I: Iterator<Item = u8> + 'static> crate::altrep_traits::AltRaw for IterRawData<I> {
610    const HAS_ELT: bool = true;
611
612    fn elt(x: crate::ffi::SEXP, i: crate::ffi::R_xlen_t) -> u8 {
613        let data = unsafe { <Self as crate::altrep_data::AltrepExtract>::altrep_extract_ref(x) };
614        AltRawData::elt(data, i as usize)
615    }
616
617    const HAS_GET_REGION: bool = true;
618
619    fn get_region(
620        x: crate::ffi::SEXP,
621        start: crate::ffi::R_xlen_t,
622        len: crate::ffi::R_xlen_t,
623        buf: &mut [u8],
624    ) -> crate::ffi::R_xlen_t {
625        let data = unsafe { <Self as crate::altrep_data::AltrepExtract>::altrep_extract_ref(x) };
626        AltRawData::get_region(data, start as usize, len as usize, buf) as crate::ffi::R_xlen_t
627    }
628}
629// endregion