Skip to main content

miniextendr_api/
strvec.rs

1//! Thin wrapper around R character vector (`STRSXP`).
2//!
3//! Provides safe construction and element insertion for string vectors.
4
5use std::borrow::Cow;
6
7use crate::ffi::SEXPTYPE::STRSXP;
8use crate::ffi::{self, SEXP, SexpExt};
9use crate::from_r::{SexpError, SexpTypeError, TryFromSexp, charsxp_to_cow, charsxp_to_str};
10use crate::gc_protect::{OwnedProtect, ProtectScope};
11use crate::into_r::IntoR;
12
13/// Owned handle to an R character vector (`STRSXP`).
14///
15/// This wrapper provides safe methods for building character vectors
16/// element-by-element with proper GC protection.
17#[derive(Clone, Copy, Debug)]
18pub struct StrVec(SEXP);
19
20impl StrVec {
21    /// Wrap an existing `STRSXP` without additional checks.
22    ///
23    /// # Safety
24    ///
25    /// Caller must ensure `sexp` is a valid character vector (`STRSXP`)
26    /// whose lifetime remains managed by R.
27    #[inline]
28    pub const unsafe fn from_raw(sexp: SEXP) -> Self {
29        StrVec(sexp)
30    }
31
32    /// Get the underlying `SEXP`.
33    #[inline]
34    pub const fn as_sexp(self) -> SEXP {
35        self.0
36    }
37
38    /// Length of the character vector (number of elements).
39    #[inline]
40    pub fn len(self) -> isize {
41        unsafe { ffi::Rf_xlength(self.0) }
42    }
43
44    /// Returns true if the vector is empty.
45    #[inline]
46    pub fn is_empty(self) -> bool {
47        self.len() == 0
48    }
49
50    /// Get the CHARSXP at the given index.
51    ///
52    /// Returns `None` if out of bounds.
53    #[inline]
54    pub fn get_charsxp(self, idx: isize) -> Option<SEXP> {
55        if idx < 0 || idx >= self.len() {
56            return None;
57        }
58        Some(self.0.string_elt(idx))
59    }
60
61    /// Get the string at the given index (zero-copy).
62    ///
63    /// Returns `None` if out of bounds or if the element is `NA_character_`.
64    /// Panics if the CHARSXP is not valid UTF-8 (should not happen in a UTF-8 locale).
65    #[inline]
66    pub fn get_str(self, idx: isize) -> Option<&'static str> {
67        let charsxp = self.get_charsxp(idx)?;
68        unsafe {
69            if charsxp == SEXP::na_string() {
70                return None;
71            }
72            Some(charsxp_to_str(charsxp))
73        }
74    }
75
76    /// Get the string at the given index as `Cow<str>` (encoding-safe).
77    ///
78    /// Returns `Cow::Borrowed` for UTF-8 strings (zero-copy), `Cow::Owned` for
79    /// non-UTF-8 strings (translated via `Rf_translateCharUTF8`).
80    /// Returns `None` if out of bounds or `NA_character_`.
81    #[inline]
82    pub fn get_cow(self, idx: isize) -> Option<Cow<'static, str>> {
83        let charsxp = self.get_charsxp(idx)?;
84        unsafe {
85            if charsxp == SEXP::na_string() {
86                return None;
87            }
88            Some(charsxp_to_cow(charsxp))
89        }
90    }
91
92    /// Iterate over elements as `Option<&str>`.
93    ///
94    /// `NA_character_` elements yield `None`, valid strings yield `Some(&str)`.
95    /// Zero-copy — each `&str` borrows directly from R's CHARSXP.
96    #[inline]
97    pub fn iter(self) -> StrVecIter {
98        StrVecIter {
99            vec: self,
100            idx: 0,
101            len: self.len(),
102        }
103    }
104
105    /// Iterate over elements as `Option<Cow<str>>` (encoding-safe).
106    ///
107    /// Like [`iter`](Self::iter) but handles non-UTF-8 CHARSXPs gracefully.
108    #[inline]
109    pub fn iter_cow(self) -> StrVecCowIter {
110        StrVecCowIter {
111            vec: self,
112            idx: 0,
113            len: self.len(),
114        }
115    }
116
117    // region: Safe element insertion
118
119    /// Set a CHARSXP at the given index, protecting it during insertion.
120    ///
121    /// This is the safe way to insert a freshly allocated CHARSXP into a string vector.
122    ///
123    /// # Safety
124    ///
125    /// - Must be called from the R main thread
126    /// - `charsxp` must be a valid CHARSXP (from `Rf_mkChar*` or `STRING_ELT`)
127    /// - `self` must be a valid, protected STRSXP
128    ///
129    /// # Panics
130    ///
131    /// Panics if `idx` is out of bounds.
132    #[inline]
133    pub unsafe fn set_charsxp(self, idx: isize, charsxp: SEXP) {
134        assert!(idx >= 0 && idx < self.len(), "index out of bounds");
135        // SAFETY: caller guarantees R main thread and valid SEXPs
136        unsafe {
137            // Protect CHARSXP during SET_STRING_ELT.
138            // Note: Rf_mkCharLenCE returns a CHARSXP that may be from the global
139            // CHARSXP cache, but protection is still needed for newly allocated ones.
140            let _guard = OwnedProtect::new(charsxp);
141            self.0.set_string_elt(idx, charsxp);
142        }
143    }
144
145    /// Set a CHARSXP without protecting it.
146    ///
147    /// # Safety
148    ///
149    /// In addition to the safety requirements of [`set_charsxp`](Self::set_charsxp):
150    /// - The caller must ensure `charsxp` is already protected or from the
151    ///   global CHARSXP cache.
152    #[inline]
153    pub unsafe fn set_charsxp_unchecked(self, idx: isize, charsxp: SEXP) {
154        debug_assert!(idx >= 0 && idx < self.len(), "index out of bounds");
155        // SAFETY: caller guarantees charsxp is protected/cached
156        self.0.set_string_elt(idx, charsxp);
157    }
158
159    /// Set an element from a Rust string.
160    ///
161    /// Creates a CHARSXP from the string and inserts it safely.
162    ///
163    /// # Safety
164    ///
165    /// - Must be called from the R main thread
166    /// - `self` must be a valid, protected STRSXP
167    ///
168    /// # Panics
169    ///
170    /// Panics if `idx` is out of bounds.
171    #[inline]
172    pub unsafe fn set_str(self, idx: isize, s: &str) {
173        assert!(idx >= 0 && idx < self.len(), "index out of bounds");
174        // SAFETY: caller guarantees R main thread
175        unsafe {
176            let charsxp = SEXP::charsxp(s);
177            // CHARSXP may be cached, but protect anyway for safety
178            let _guard = OwnedProtect::new(charsxp);
179            self.0.set_string_elt(idx, charsxp);
180        }
181    }
182
183    /// Set an element to `NA_character_`.
184    ///
185    /// # Safety
186    ///
187    /// - Must be called from the R main thread
188    /// - `self` must be a valid, protected STRSXP
189    ///
190    /// # Panics
191    ///
192    /// Panics if `idx` is out of bounds.
193    #[inline]
194    pub unsafe fn set_na(self, idx: isize) {
195        assert!(idx >= 0 && idx < self.len(), "index out of bounds");
196        // R_NaString is a global constant, no protection needed
197        self.0.set_string_elt(idx, SEXP::na_string());
198    }
199
200    /// Set an element from an optional string.
201    ///
202    /// `None` becomes `NA_character_`.
203    ///
204    /// # Safety
205    ///
206    /// - Must be called from the R main thread
207    /// - `self` must be a valid, protected STRSXP
208    ///
209    /// # Panics
210    ///
211    /// Panics if `idx` is out of bounds.
212    #[inline]
213    pub unsafe fn set_opt_str(self, idx: isize, s: Option<&str>) {
214        match s {
215            Some(s) => unsafe { self.set_str(idx, s) },
216            None => unsafe { self.set_na(idx) },
217        }
218    }
219    // endregion
220}
221
222// region: StrVec iterators
223
224/// Iterator over `StrVec` elements as `Option<&str>`.
225///
226/// Yields `None` for `NA_character_`, `Some(&str)` for valid strings.
227/// Zero-copy — each `&str` borrows directly from R's CHARSXP.
228pub struct StrVecIter {
229    vec: StrVec,
230    idx: isize,
231    len: isize,
232}
233
234impl Iterator for StrVecIter {
235    type Item = Option<&'static str>;
236
237    #[inline]
238    fn next(&mut self) -> Option<Self::Item> {
239        if self.idx >= self.len {
240            return None;
241        }
242        let charsxp = self.vec.0.string_elt(self.idx);
243        self.idx += 1;
244        if charsxp == SEXP::na_string() {
245            Some(None)
246        } else {
247            Some(Some(unsafe { charsxp_to_str(charsxp) }))
248        }
249    }
250
251    #[inline]
252    fn size_hint(&self) -> (usize, Option<usize>) {
253        let remaining = (self.len - self.idx) as usize;
254        (remaining, Some(remaining))
255    }
256}
257
258impl ExactSizeIterator for StrVecIter {}
259
260/// Iterator over `StrVec` elements as `Option<Cow<'static, str>>`.
261///
262/// Like [`StrVecIter`] but handles non-UTF-8 CHARSXPs via `Rf_translateCharUTF8`.
263pub struct StrVecCowIter {
264    vec: StrVec,
265    idx: isize,
266    len: isize,
267}
268
269impl Iterator for StrVecCowIter {
270    type Item = Option<Cow<'static, str>>;
271
272    #[inline]
273    fn next(&mut self) -> Option<Self::Item> {
274        if self.idx >= self.len {
275            return None;
276        }
277        let charsxp = self.vec.0.string_elt(self.idx);
278        self.idx += 1;
279        if charsxp == SEXP::na_string() {
280            Some(None)
281        } else {
282            Some(Some(unsafe { charsxp_to_cow(charsxp) }))
283        }
284    }
285
286    #[inline]
287    fn size_hint(&self) -> (usize, Option<usize>) {
288        let remaining = (self.len - self.idx) as usize;
289        (remaining, Some(remaining))
290    }
291}
292
293impl ExactSizeIterator for StrVecCowIter {}
294
295impl IntoIterator for StrVec {
296    type Item = Option<&'static str>;
297    type IntoIter = StrVecIter;
298
299    #[inline]
300    fn into_iter(self) -> Self::IntoIter {
301        self.iter()
302    }
303}
304
305// endregion
306
307// region: StrVecBuilder - efficient batch string vector construction
308
309/// Builder for constructing string vectors with efficient protection management.
310///
311/// # Example
312///
313/// ```ignore
314/// unsafe fn build_strvec(strings: &[&str]) -> SEXP {
315///     let scope = ProtectScope::new();
316///     let builder = StrVecBuilder::new(&scope, strings.len() as isize);
317///
318///     for (i, s) in strings.iter().enumerate() {
319///         builder.set_str(i as isize, s);
320///     }
321///
322///     builder.into_sexp()
323/// }
324/// ```
325pub struct StrVecBuilder<'a> {
326    vec: SEXP,
327    _scope: &'a ProtectScope,
328}
329
330impl<'a> StrVecBuilder<'a> {
331    /// Create a new string vector builder with the given length.
332    ///
333    /// # Safety
334    ///
335    /// Must be called from the R main thread.
336    #[inline]
337    pub unsafe fn new(scope: &'a ProtectScope, len: usize) -> Self {
338        // SAFETY: caller guarantees R main thread
339        let vec = unsafe { scope.alloc_character(len).into_raw() };
340        Self { vec, _scope: scope }
341    }
342
343    /// Set an element from a Rust string.
344    ///
345    /// # Safety
346    ///
347    /// Must be called from the R main thread.
348    #[inline]
349    pub unsafe fn set_str(&self, idx: isize, s: &str) {
350        debug_assert!(idx >= 0 && idx < unsafe { ffi::Rf_xlength(self.vec) });
351        let charsxp = SEXP::charsxp(s);
352        self.vec.set_string_elt(idx, charsxp);
353    }
354
355    /// Set an element to `NA_character_`.
356    ///
357    /// # Safety
358    ///
359    /// Must be called from the R main thread.
360    #[inline]
361    pub unsafe fn set_na(&self, idx: isize) {
362        debug_assert!(idx >= 0 && idx < unsafe { ffi::Rf_xlength(self.vec) });
363        self.vec.set_string_elt(idx, SEXP::na_string());
364    }
365
366    /// Set an element from an optional string.
367    ///
368    /// # Safety
369    ///
370    /// Must be called from the R main thread.
371    #[inline]
372    pub unsafe fn set_opt_str(&self, idx: isize, s: Option<&str>) {
373        match s {
374            // SAFETY: caller guarantees R main thread
375            Some(s) => unsafe { self.set_str(idx, s) },
376            None => unsafe { self.set_na(idx) },
377        }
378    }
379
380    /// Get the underlying SEXP.
381    #[inline]
382    pub fn as_sexp(&self) -> SEXP {
383        self.vec
384    }
385
386    /// Convert to a `StrVec` wrapper.
387    #[inline]
388    pub fn into_strvec(self) -> StrVec {
389        StrVec(self.vec)
390    }
391
392    /// Convert to the underlying SEXP.
393    #[inline]
394    pub fn into_sexp(self) -> SEXP {
395        self.vec
396    }
397
398    /// Get the length.
399    #[inline]
400    pub fn len(&self) -> isize {
401        unsafe { ffi::Rf_xlength(self.vec) }
402    }
403
404    /// Check if empty.
405    #[inline]
406    pub fn is_empty(&self) -> bool {
407        self.len() == 0
408    }
409}
410// endregion
411
412// region: Trait implementations
413
414impl IntoR for StrVec {
415    type Error = std::convert::Infallible;
416    fn try_into_sexp(self) -> Result<SEXP, Self::Error> {
417        Ok(self.into_sexp())
418    }
419    unsafe fn try_into_sexp_unchecked(self) -> Result<SEXP, Self::Error> {
420        self.try_into_sexp()
421    }
422    #[inline]
423    fn into_sexp(self) -> SEXP {
424        self.0
425    }
426}
427
428impl TryFromSexp for StrVec {
429    type Error = SexpError;
430
431    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
432        let actual = sexp.type_of();
433        if actual != STRSXP {
434            return Err(SexpTypeError {
435                expected: STRSXP,
436                actual,
437            }
438            .into());
439        }
440        Ok(StrVec(sexp))
441    }
442}
443// endregion
444
445// region: ProtectedStrVec — GC-protected string vector with proper lifetimes
446
447/// GC-protected view over an R character vector (`STRSXP`).
448///
449/// Unlike [`StrVec`] (which is `Copy` and trusts the caller for GC protection),
450/// `ProtectedStrVec` owns an [`OwnedProtect`] guard that keeps the STRSXP alive.
451/// All borrowed data (`&str`, iterators) has its lifetime tied to `&self`,
452/// not `'static` — preventing use-after-GC bugs at compile time.
453///
454/// # When to use
455///
456/// - **`StrVec`**: for SEXP arguments to `.Call` (R protects them), or when you
457///   manage protection yourself. Lightweight, `Copy`.
458/// - **`ProtectedStrVec`**: when you allocate or receive an STRSXP and need to
459///   keep it alive beyond the immediate scope. Not `Copy`.
460///
461/// # Example
462///
463/// ```ignore
464/// #[miniextendr]
465/// pub fn count_unique(strings: ProtectedStrVec) -> i32 {
466///     let unique: HashSet<&str> = strings.iter()
467///         .filter_map(|s| s)
468///         .collect();
469///     unique.len() as i32
470/// }
471/// ```
472pub struct ProtectedStrVec {
473    inner: StrVec,
474    len: isize,
475    _protect: Option<OwnedProtect>,
476}
477
478impl ProtectedStrVec {
479    /// Create a protected view over an STRSXP.
480    ///
481    /// Calls `Rf_protect` on the SEXP. Use [`from_sexp_trusted`](Self::from_sexp_trusted)
482    /// when the SEXP is already protected (e.g., `.Call` arguments) to avoid
483    /// double-protecting.
484    ///
485    /// # Safety
486    ///
487    /// - `sexp` must be a valid STRSXP.
488    /// - Must be called from the R main thread.
489    #[inline]
490    pub unsafe fn new(sexp: SEXP) -> Self {
491        let guard = unsafe { OwnedProtect::new(sexp) };
492        let inner = unsafe { StrVec::from_raw(guard.get()) };
493        let len = inner.len();
494        Self {
495            inner,
496            len,
497            _protect: Some(guard),
498        }
499    }
500
501    /// Create a view without adding GC protection.
502    ///
503    /// Use this when the SEXP is already protected by R (e.g., a `.Call`
504    /// argument, or in a `ProtectScope`). Avoids the redundant
505    /// `Rf_protect`/`Rf_unprotect` pair.
506    ///
507    /// The lifetime-bound `&str` borrows are still enforced — this only
508    /// skips the protect stack push, not the safety guarantees.
509    ///
510    /// # Safety
511    ///
512    /// - `sexp` must be a valid STRSXP.
513    /// - `sexp` must remain GC-protected for the lifetime of this struct.
514    /// - Must be called from the R main thread.
515    #[inline]
516    pub unsafe fn from_sexp_trusted(sexp: SEXP) -> Self {
517        let inner = unsafe { StrVec::from_raw(sexp) };
518        let len = inner.len();
519        Self {
520            inner,
521            len,
522            _protect: None,
523        }
524    }
525
526    /// Number of elements.
527    #[inline]
528    pub fn len(&self) -> isize {
529        self.len
530    }
531
532    /// Whether the vector is empty.
533    #[inline]
534    pub fn is_empty(&self) -> bool {
535        self.len == 0
536    }
537
538    /// Get the string at index (zero-copy, lifetime tied to `&self`).
539    ///
540    /// Returns `None` for out-of-bounds or `NA_character_`.
541    #[inline]
542    pub fn get_str(&self, idx: isize) -> Option<&str> {
543        // charsxp_to_str returns &'static str, but lifetime elision
544        // restricts it to &'_ (tied to &self) — correct: data lives
545        // as long as OwnedProtect keeps the STRSXP alive.
546        self.inner.get_str(idx)
547    }
548
549    /// Get the string at index as `Cow<str>` (encoding-safe, lifetime tied to `&self`).
550    #[inline]
551    pub fn get_cow(&self, idx: isize) -> Option<Cow<'_, str>> {
552        self.inner.get_cow(idx)
553    }
554
555    /// Iterate over elements as `Option<&str>` (lifetime tied to `&self`).
556    #[inline]
557    pub fn iter(&self) -> ProtectedStrVecIter<'_> {
558        ProtectedStrVecIter {
559            vec: self,
560            idx: 0,
561            len: self.len,
562        }
563    }
564
565    /// Iterate over elements as `Option<Cow<str>>` (encoding-safe).
566    #[inline]
567    pub fn iter_cow(&self) -> ProtectedStrVecCowIter<'_> {
568        ProtectedStrVecCowIter {
569            vec: self,
570            idx: 0,
571            len: self.len,
572        }
573    }
574
575    /// Get the underlying SEXP (still protected by this handle).
576    #[inline]
577    pub fn as_sexp(&self) -> SEXP {
578        self.inner.as_sexp()
579    }
580
581    /// Get the inner `StrVec` (unprotected copy — caller assumes protection responsibility).
582    #[inline]
583    pub fn as_strvec(&self) -> StrVec {
584        self.inner
585    }
586}
587
588/// Iterator over `ProtectedStrVec` with lifetime tied to the protection guard.
589pub struct ProtectedStrVecIter<'a> {
590    vec: &'a ProtectedStrVec,
591    idx: isize,
592    len: isize,
593}
594
595impl<'a> Iterator for ProtectedStrVecIter<'a> {
596    type Item = Option<&'a str>;
597
598    #[inline]
599    fn next(&mut self) -> Option<Self::Item> {
600        if self.idx >= self.len {
601            return None;
602        }
603        let result = self.vec.get_str(self.idx);
604        self.idx += 1;
605        // get_str returns None for NA; we need to distinguish "end of iter" from "NA element"
606        // Wrap: Some(None) = NA, Some(Some(&str)) = value, None = end
607        Some(result)
608    }
609
610    #[inline]
611    fn size_hint(&self) -> (usize, Option<usize>) {
612        let remaining = (self.len - self.idx) as usize;
613        (remaining, Some(remaining))
614    }
615}
616
617impl ExactSizeIterator for ProtectedStrVecIter<'_> {}
618
619/// Encoding-safe iterator over `ProtectedStrVec`.
620pub struct ProtectedStrVecCowIter<'a> {
621    vec: &'a ProtectedStrVec,
622    idx: isize,
623    len: isize,
624}
625
626impl<'a> Iterator for ProtectedStrVecCowIter<'a> {
627    type Item = Option<Cow<'a, str>>;
628
629    #[inline]
630    fn next(&mut self) -> Option<Self::Item> {
631        if self.idx >= self.len {
632            return None;
633        }
634        let result = self.vec.get_cow(self.idx);
635        self.idx += 1;
636        Some(result)
637    }
638
639    #[inline]
640    fn size_hint(&self) -> (usize, Option<usize>) {
641        let remaining = (self.len - self.idx) as usize;
642        (remaining, Some(remaining))
643    }
644}
645
646impl ExactSizeIterator for ProtectedStrVecCowIter<'_> {}
647
648impl<'a> IntoIterator for &'a ProtectedStrVec {
649    type Item = Option<&'a str>;
650    type IntoIter = ProtectedStrVecIter<'a>;
651
652    #[inline]
653    fn into_iter(self) -> Self::IntoIter {
654        self.iter()
655    }
656}
657
658impl IntoR for ProtectedStrVec {
659    type Error = std::convert::Infallible;
660    fn try_into_sexp(self) -> Result<SEXP, Self::Error> {
661        Ok(self.as_sexp())
662    }
663    unsafe fn try_into_sexp_unchecked(self) -> Result<SEXP, Self::Error> {
664        Ok(self.as_sexp())
665    }
666    #[inline]
667    fn into_sexp(self) -> SEXP {
668        self.as_sexp()
669    }
670}
671
672impl TryFromSexp for ProtectedStrVec {
673    type Error = SexpError;
674
675    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
676        let actual = sexp.type_of();
677        if actual != STRSXP {
678            return Err(SexpTypeError {
679                expected: STRSXP,
680                actual,
681            }
682            .into());
683        }
684        // Use from_sexp_trusted: TryFromSexp is called from generated .Call
685        // wrappers where R already protects the argument. No need to double-protect.
686        Ok(unsafe { ProtectedStrVec::from_sexp_trusted(sexp) })
687    }
688}
689
690impl std::fmt::Debug for ProtectedStrVec {
691    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
692        f.debug_struct("ProtectedStrVec")
693            .field("len", &self.len)
694            .finish()
695    }
696}
697// endregion