miniextendr_api/strvec.rs
1//! Thin wrapper around R character vector (`STRSXP`).
2//!
3//! Provides safe construction and element insertion for string vectors.
4
5use std::borrow::Cow;
6
7use crate::ffi::SEXPTYPE::STRSXP;
8use crate::ffi::{self, SEXP, SexpExt};
9use crate::from_r::{SexpError, SexpTypeError, TryFromSexp, charsxp_to_cow, charsxp_to_str};
10use crate::gc_protect::{OwnedProtect, ProtectScope};
11use crate::into_r::IntoR;
12
13/// Owned handle to an R character vector (`STRSXP`).
14///
15/// This wrapper provides safe methods for building character vectors
16/// element-by-element with proper GC protection.
17#[derive(Clone, Copy, Debug)]
18pub struct StrVec(SEXP);
19
20impl StrVec {
21 /// Wrap an existing `STRSXP` without additional checks.
22 ///
23 /// # Safety
24 ///
25 /// Caller must ensure `sexp` is a valid character vector (`STRSXP`)
26 /// whose lifetime remains managed by R.
27 #[inline]
28 pub const unsafe fn from_raw(sexp: SEXP) -> Self {
29 StrVec(sexp)
30 }
31
32 /// Get the underlying `SEXP`.
33 #[inline]
34 pub const fn as_sexp(self) -> SEXP {
35 self.0
36 }
37
38 /// Length of the character vector (number of elements).
39 #[inline]
40 pub fn len(self) -> isize {
41 unsafe { ffi::Rf_xlength(self.0) }
42 }
43
44 /// Returns true if the vector is empty.
45 #[inline]
46 pub fn is_empty(self) -> bool {
47 self.len() == 0
48 }
49
50 /// Get the CHARSXP at the given index.
51 ///
52 /// Returns `None` if out of bounds.
53 #[inline]
54 pub fn get_charsxp(self, idx: isize) -> Option<SEXP> {
55 if idx < 0 || idx >= self.len() {
56 return None;
57 }
58 Some(self.0.string_elt(idx))
59 }
60
61 /// Get the string at the given index (zero-copy).
62 ///
63 /// Returns `None` if out of bounds or if the element is `NA_character_`.
64 /// Panics if the CHARSXP is not valid UTF-8 (should not happen in a UTF-8 locale).
65 #[inline]
66 pub fn get_str(self, idx: isize) -> Option<&'static str> {
67 let charsxp = self.get_charsxp(idx)?;
68 unsafe {
69 if charsxp == SEXP::na_string() {
70 return None;
71 }
72 Some(charsxp_to_str(charsxp))
73 }
74 }
75
76 /// Get the string at the given index as `Cow<str>` (encoding-safe).
77 ///
78 /// Returns `Cow::Borrowed` for UTF-8 strings (zero-copy), `Cow::Owned` for
79 /// non-UTF-8 strings (translated via `Rf_translateCharUTF8`).
80 /// Returns `None` if out of bounds or `NA_character_`.
81 #[inline]
82 pub fn get_cow(self, idx: isize) -> Option<Cow<'static, str>> {
83 let charsxp = self.get_charsxp(idx)?;
84 unsafe {
85 if charsxp == SEXP::na_string() {
86 return None;
87 }
88 Some(charsxp_to_cow(charsxp))
89 }
90 }
91
92 /// Iterate over elements as `Option<&str>`.
93 ///
94 /// `NA_character_` elements yield `None`, valid strings yield `Some(&str)`.
95 /// Zero-copy — each `&str` borrows directly from R's CHARSXP.
96 #[inline]
97 pub fn iter(self) -> StrVecIter {
98 StrVecIter {
99 vec: self,
100 idx: 0,
101 len: self.len(),
102 }
103 }
104
105 /// Iterate over elements as `Option<Cow<str>>` (encoding-safe).
106 ///
107 /// Like [`iter`](Self::iter) but handles non-UTF-8 CHARSXPs gracefully.
108 #[inline]
109 pub fn iter_cow(self) -> StrVecCowIter {
110 StrVecCowIter {
111 vec: self,
112 idx: 0,
113 len: self.len(),
114 }
115 }
116
117 // region: Safe element insertion
118
119 /// Set a CHARSXP at the given index, protecting it during insertion.
120 ///
121 /// This is the safe way to insert a freshly allocated CHARSXP into a string vector.
122 ///
123 /// # Safety
124 ///
125 /// - Must be called from the R main thread
126 /// - `charsxp` must be a valid CHARSXP (from `Rf_mkChar*` or `STRING_ELT`)
127 /// - `self` must be a valid, protected STRSXP
128 ///
129 /// # Panics
130 ///
131 /// Panics if `idx` is out of bounds.
132 #[inline]
133 pub unsafe fn set_charsxp(self, idx: isize, charsxp: SEXP) {
134 assert!(idx >= 0 && idx < self.len(), "index out of bounds");
135 // SAFETY: caller guarantees R main thread and valid SEXPs
136 unsafe {
137 // Protect CHARSXP during SET_STRING_ELT.
138 // Note: Rf_mkCharLenCE returns a CHARSXP that may be from the global
139 // CHARSXP cache, but protection is still needed for newly allocated ones.
140 let _guard = OwnedProtect::new(charsxp);
141 self.0.set_string_elt(idx, charsxp);
142 }
143 }
144
145 /// Set a CHARSXP without protecting it.
146 ///
147 /// # Safety
148 ///
149 /// In addition to the safety requirements of [`set_charsxp`](Self::set_charsxp):
150 /// - The caller must ensure `charsxp` is already protected or from the
151 /// global CHARSXP cache.
152 #[inline]
153 pub unsafe fn set_charsxp_unchecked(self, idx: isize, charsxp: SEXP) {
154 debug_assert!(idx >= 0 && idx < self.len(), "index out of bounds");
155 // SAFETY: caller guarantees charsxp is protected/cached
156 self.0.set_string_elt(idx, charsxp);
157 }
158
159 /// Set an element from a Rust string.
160 ///
161 /// Creates a CHARSXP from the string and inserts it safely.
162 ///
163 /// # Safety
164 ///
165 /// - Must be called from the R main thread
166 /// - `self` must be a valid, protected STRSXP
167 ///
168 /// # Panics
169 ///
170 /// Panics if `idx` is out of bounds.
171 #[inline]
172 pub unsafe fn set_str(self, idx: isize, s: &str) {
173 assert!(idx >= 0 && idx < self.len(), "index out of bounds");
174 // SAFETY: caller guarantees R main thread
175 unsafe {
176 let charsxp = SEXP::charsxp(s);
177 // CHARSXP may be cached, but protect anyway for safety
178 let _guard = OwnedProtect::new(charsxp);
179 self.0.set_string_elt(idx, charsxp);
180 }
181 }
182
183 /// Set an element to `NA_character_`.
184 ///
185 /// # Safety
186 ///
187 /// - Must be called from the R main thread
188 /// - `self` must be a valid, protected STRSXP
189 ///
190 /// # Panics
191 ///
192 /// Panics if `idx` is out of bounds.
193 #[inline]
194 pub unsafe fn set_na(self, idx: isize) {
195 assert!(idx >= 0 && idx < self.len(), "index out of bounds");
196 // R_NaString is a global constant, no protection needed
197 self.0.set_string_elt(idx, SEXP::na_string());
198 }
199
200 /// Set an element from an optional string.
201 ///
202 /// `None` becomes `NA_character_`.
203 ///
204 /// # Safety
205 ///
206 /// - Must be called from the R main thread
207 /// - `self` must be a valid, protected STRSXP
208 ///
209 /// # Panics
210 ///
211 /// Panics if `idx` is out of bounds.
212 #[inline]
213 pub unsafe fn set_opt_str(self, idx: isize, s: Option<&str>) {
214 match s {
215 Some(s) => unsafe { self.set_str(idx, s) },
216 None => unsafe { self.set_na(idx) },
217 }
218 }
219 // endregion
220}
221
222// region: StrVec iterators
223
224/// Iterator over `StrVec` elements as `Option<&str>`.
225///
226/// Yields `None` for `NA_character_`, `Some(&str)` for valid strings.
227/// Zero-copy — each `&str` borrows directly from R's CHARSXP.
228pub struct StrVecIter {
229 vec: StrVec,
230 idx: isize,
231 len: isize,
232}
233
234impl Iterator for StrVecIter {
235 type Item = Option<&'static str>;
236
237 #[inline]
238 fn next(&mut self) -> Option<Self::Item> {
239 if self.idx >= self.len {
240 return None;
241 }
242 let charsxp = self.vec.0.string_elt(self.idx);
243 self.idx += 1;
244 if charsxp == SEXP::na_string() {
245 Some(None)
246 } else {
247 Some(Some(unsafe { charsxp_to_str(charsxp) }))
248 }
249 }
250
251 #[inline]
252 fn size_hint(&self) -> (usize, Option<usize>) {
253 let remaining = (self.len - self.idx) as usize;
254 (remaining, Some(remaining))
255 }
256}
257
258impl ExactSizeIterator for StrVecIter {}
259
260/// Iterator over `StrVec` elements as `Option<Cow<'static, str>>`.
261///
262/// Like [`StrVecIter`] but handles non-UTF-8 CHARSXPs via `Rf_translateCharUTF8`.
263pub struct StrVecCowIter {
264 vec: StrVec,
265 idx: isize,
266 len: isize,
267}
268
269impl Iterator for StrVecCowIter {
270 type Item = Option<Cow<'static, str>>;
271
272 #[inline]
273 fn next(&mut self) -> Option<Self::Item> {
274 if self.idx >= self.len {
275 return None;
276 }
277 let charsxp = self.vec.0.string_elt(self.idx);
278 self.idx += 1;
279 if charsxp == SEXP::na_string() {
280 Some(None)
281 } else {
282 Some(Some(unsafe { charsxp_to_cow(charsxp) }))
283 }
284 }
285
286 #[inline]
287 fn size_hint(&self) -> (usize, Option<usize>) {
288 let remaining = (self.len - self.idx) as usize;
289 (remaining, Some(remaining))
290 }
291}
292
293impl ExactSizeIterator for StrVecCowIter {}
294
295impl IntoIterator for StrVec {
296 type Item = Option<&'static str>;
297 type IntoIter = StrVecIter;
298
299 #[inline]
300 fn into_iter(self) -> Self::IntoIter {
301 self.iter()
302 }
303}
304
305// endregion
306
307// region: StrVecBuilder - efficient batch string vector construction
308
309/// Builder for constructing string vectors with efficient protection management.
310///
311/// # Example
312///
313/// ```ignore
314/// unsafe fn build_strvec(strings: &[&str]) -> SEXP {
315/// let scope = ProtectScope::new();
316/// let builder = StrVecBuilder::new(&scope, strings.len() as isize);
317///
318/// for (i, s) in strings.iter().enumerate() {
319/// builder.set_str(i as isize, s);
320/// }
321///
322/// builder.into_sexp()
323/// }
324/// ```
325pub struct StrVecBuilder<'a> {
326 vec: SEXP,
327 _scope: &'a ProtectScope,
328}
329
330impl<'a> StrVecBuilder<'a> {
331 /// Create a new string vector builder with the given length.
332 ///
333 /// # Safety
334 ///
335 /// Must be called from the R main thread.
336 #[inline]
337 pub unsafe fn new(scope: &'a ProtectScope, len: usize) -> Self {
338 // SAFETY: caller guarantees R main thread
339 let vec = unsafe { scope.alloc_character(len).into_raw() };
340 Self { vec, _scope: scope }
341 }
342
343 /// Set an element from a Rust string.
344 ///
345 /// # Safety
346 ///
347 /// Must be called from the R main thread.
348 #[inline]
349 pub unsafe fn set_str(&self, idx: isize, s: &str) {
350 debug_assert!(idx >= 0 && idx < unsafe { ffi::Rf_xlength(self.vec) });
351 let charsxp = SEXP::charsxp(s);
352 self.vec.set_string_elt(idx, charsxp);
353 }
354
355 /// Set an element to `NA_character_`.
356 ///
357 /// # Safety
358 ///
359 /// Must be called from the R main thread.
360 #[inline]
361 pub unsafe fn set_na(&self, idx: isize) {
362 debug_assert!(idx >= 0 && idx < unsafe { ffi::Rf_xlength(self.vec) });
363 self.vec.set_string_elt(idx, SEXP::na_string());
364 }
365
366 /// Set an element from an optional string.
367 ///
368 /// # Safety
369 ///
370 /// Must be called from the R main thread.
371 #[inline]
372 pub unsafe fn set_opt_str(&self, idx: isize, s: Option<&str>) {
373 match s {
374 // SAFETY: caller guarantees R main thread
375 Some(s) => unsafe { self.set_str(idx, s) },
376 None => unsafe { self.set_na(idx) },
377 }
378 }
379
380 /// Get the underlying SEXP.
381 #[inline]
382 pub fn as_sexp(&self) -> SEXP {
383 self.vec
384 }
385
386 /// Convert to a `StrVec` wrapper.
387 #[inline]
388 pub fn into_strvec(self) -> StrVec {
389 StrVec(self.vec)
390 }
391
392 /// Convert to the underlying SEXP.
393 #[inline]
394 pub fn into_sexp(self) -> SEXP {
395 self.vec
396 }
397
398 /// Get the length.
399 #[inline]
400 pub fn len(&self) -> isize {
401 unsafe { ffi::Rf_xlength(self.vec) }
402 }
403
404 /// Check if empty.
405 #[inline]
406 pub fn is_empty(&self) -> bool {
407 self.len() == 0
408 }
409}
410// endregion
411
412// region: Trait implementations
413
414impl IntoR for StrVec {
415 type Error = std::convert::Infallible;
416 fn try_into_sexp(self) -> Result<SEXP, Self::Error> {
417 Ok(self.into_sexp())
418 }
419 unsafe fn try_into_sexp_unchecked(self) -> Result<SEXP, Self::Error> {
420 self.try_into_sexp()
421 }
422 #[inline]
423 fn into_sexp(self) -> SEXP {
424 self.0
425 }
426}
427
428impl TryFromSexp for StrVec {
429 type Error = SexpError;
430
431 fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
432 let actual = sexp.type_of();
433 if actual != STRSXP {
434 return Err(SexpTypeError {
435 expected: STRSXP,
436 actual,
437 }
438 .into());
439 }
440 Ok(StrVec(sexp))
441 }
442}
443// endregion
444
445// region: ProtectedStrVec — GC-protected string vector with proper lifetimes
446
447/// GC-protected view over an R character vector (`STRSXP`).
448///
449/// Unlike [`StrVec`] (which is `Copy` and trusts the caller for GC protection),
450/// `ProtectedStrVec` owns an [`OwnedProtect`] guard that keeps the STRSXP alive.
451/// All borrowed data (`&str`, iterators) has its lifetime tied to `&self`,
452/// not `'static` — preventing use-after-GC bugs at compile time.
453///
454/// # When to use
455///
456/// - **`StrVec`**: for SEXP arguments to `.Call` (R protects them), or when you
457/// manage protection yourself. Lightweight, `Copy`.
458/// - **`ProtectedStrVec`**: when you allocate or receive an STRSXP and need to
459/// keep it alive beyond the immediate scope. Not `Copy`.
460///
461/// # Example
462///
463/// ```ignore
464/// #[miniextendr]
465/// pub fn count_unique(strings: ProtectedStrVec) -> i32 {
466/// let unique: HashSet<&str> = strings.iter()
467/// .filter_map(|s| s)
468/// .collect();
469/// unique.len() as i32
470/// }
471/// ```
472pub struct ProtectedStrVec {
473 inner: StrVec,
474 len: isize,
475 _protect: Option<OwnedProtect>,
476}
477
478impl ProtectedStrVec {
479 /// Create a protected view over an STRSXP.
480 ///
481 /// Calls `Rf_protect` on the SEXP. Use [`from_sexp_trusted`](Self::from_sexp_trusted)
482 /// when the SEXP is already protected (e.g., `.Call` arguments) to avoid
483 /// double-protecting.
484 ///
485 /// # Safety
486 ///
487 /// - `sexp` must be a valid STRSXP.
488 /// - Must be called from the R main thread.
489 #[inline]
490 pub unsafe fn new(sexp: SEXP) -> Self {
491 let guard = unsafe { OwnedProtect::new(sexp) };
492 let inner = unsafe { StrVec::from_raw(guard.get()) };
493 let len = inner.len();
494 Self {
495 inner,
496 len,
497 _protect: Some(guard),
498 }
499 }
500
501 /// Create a view without adding GC protection.
502 ///
503 /// Use this when the SEXP is already protected by R (e.g., a `.Call`
504 /// argument, or in a `ProtectScope`). Avoids the redundant
505 /// `Rf_protect`/`Rf_unprotect` pair.
506 ///
507 /// The lifetime-bound `&str` borrows are still enforced — this only
508 /// skips the protect stack push, not the safety guarantees.
509 ///
510 /// # Safety
511 ///
512 /// - `sexp` must be a valid STRSXP.
513 /// - `sexp` must remain GC-protected for the lifetime of this struct.
514 /// - Must be called from the R main thread.
515 #[inline]
516 pub unsafe fn from_sexp_trusted(sexp: SEXP) -> Self {
517 let inner = unsafe { StrVec::from_raw(sexp) };
518 let len = inner.len();
519 Self {
520 inner,
521 len,
522 _protect: None,
523 }
524 }
525
526 /// Number of elements.
527 #[inline]
528 pub fn len(&self) -> isize {
529 self.len
530 }
531
532 /// Whether the vector is empty.
533 #[inline]
534 pub fn is_empty(&self) -> bool {
535 self.len == 0
536 }
537
538 /// Get the string at index (zero-copy, lifetime tied to `&self`).
539 ///
540 /// Returns `None` for out-of-bounds or `NA_character_`.
541 #[inline]
542 pub fn get_str(&self, idx: isize) -> Option<&str> {
543 // charsxp_to_str returns &'static str, but lifetime elision
544 // restricts it to &'_ (tied to &self) — correct: data lives
545 // as long as OwnedProtect keeps the STRSXP alive.
546 self.inner.get_str(idx)
547 }
548
549 /// Get the string at index as `Cow<str>` (encoding-safe, lifetime tied to `&self`).
550 #[inline]
551 pub fn get_cow(&self, idx: isize) -> Option<Cow<'_, str>> {
552 self.inner.get_cow(idx)
553 }
554
555 /// Iterate over elements as `Option<&str>` (lifetime tied to `&self`).
556 #[inline]
557 pub fn iter(&self) -> ProtectedStrVecIter<'_> {
558 ProtectedStrVecIter {
559 vec: self,
560 idx: 0,
561 len: self.len,
562 }
563 }
564
565 /// Iterate over elements as `Option<Cow<str>>` (encoding-safe).
566 #[inline]
567 pub fn iter_cow(&self) -> ProtectedStrVecCowIter<'_> {
568 ProtectedStrVecCowIter {
569 vec: self,
570 idx: 0,
571 len: self.len,
572 }
573 }
574
575 /// Get the underlying SEXP (still protected by this handle).
576 #[inline]
577 pub fn as_sexp(&self) -> SEXP {
578 self.inner.as_sexp()
579 }
580
581 /// Get the inner `StrVec` (unprotected copy — caller assumes protection responsibility).
582 #[inline]
583 pub fn as_strvec(&self) -> StrVec {
584 self.inner
585 }
586}
587
588/// Iterator over `ProtectedStrVec` with lifetime tied to the protection guard.
589pub struct ProtectedStrVecIter<'a> {
590 vec: &'a ProtectedStrVec,
591 idx: isize,
592 len: isize,
593}
594
595impl<'a> Iterator for ProtectedStrVecIter<'a> {
596 type Item = Option<&'a str>;
597
598 #[inline]
599 fn next(&mut self) -> Option<Self::Item> {
600 if self.idx >= self.len {
601 return None;
602 }
603 let result = self.vec.get_str(self.idx);
604 self.idx += 1;
605 // get_str returns None for NA; we need to distinguish "end of iter" from "NA element"
606 // Wrap: Some(None) = NA, Some(Some(&str)) = value, None = end
607 Some(result)
608 }
609
610 #[inline]
611 fn size_hint(&self) -> (usize, Option<usize>) {
612 let remaining = (self.len - self.idx) as usize;
613 (remaining, Some(remaining))
614 }
615}
616
617impl ExactSizeIterator for ProtectedStrVecIter<'_> {}
618
619/// Encoding-safe iterator over `ProtectedStrVec`.
620pub struct ProtectedStrVecCowIter<'a> {
621 vec: &'a ProtectedStrVec,
622 idx: isize,
623 len: isize,
624}
625
626impl<'a> Iterator for ProtectedStrVecCowIter<'a> {
627 type Item = Option<Cow<'a, str>>;
628
629 #[inline]
630 fn next(&mut self) -> Option<Self::Item> {
631 if self.idx >= self.len {
632 return None;
633 }
634 let result = self.vec.get_cow(self.idx);
635 self.idx += 1;
636 Some(result)
637 }
638
639 #[inline]
640 fn size_hint(&self) -> (usize, Option<usize>) {
641 let remaining = (self.len - self.idx) as usize;
642 (remaining, Some(remaining))
643 }
644}
645
646impl ExactSizeIterator for ProtectedStrVecCowIter<'_> {}
647
648impl<'a> IntoIterator for &'a ProtectedStrVec {
649 type Item = Option<&'a str>;
650 type IntoIter = ProtectedStrVecIter<'a>;
651
652 #[inline]
653 fn into_iter(self) -> Self::IntoIter {
654 self.iter()
655 }
656}
657
658impl IntoR for ProtectedStrVec {
659 type Error = std::convert::Infallible;
660 fn try_into_sexp(self) -> Result<SEXP, Self::Error> {
661 Ok(self.as_sexp())
662 }
663 unsafe fn try_into_sexp_unchecked(self) -> Result<SEXP, Self::Error> {
664 Ok(self.as_sexp())
665 }
666 #[inline]
667 fn into_sexp(self) -> SEXP {
668 self.as_sexp()
669 }
670}
671
672impl TryFromSexp for ProtectedStrVec {
673 type Error = SexpError;
674
675 fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
676 let actual = sexp.type_of();
677 if actual != STRSXP {
678 return Err(SexpTypeError {
679 expected: STRSXP,
680 actual,
681 }
682 .into());
683 }
684 // Use from_sexp_trusted: TryFromSexp is called from generated .Call
685 // wrappers where R already protects the argument. No need to double-protect.
686 Ok(unsafe { ProtectedStrVec::from_sexp_trusted(sexp) })
687 }
688}
689
690impl std::fmt::Debug for ProtectedStrVec {
691 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
692 f.debug_struct("ProtectedStrVec")
693 .field("len", &self.len)
694 .finish()
695 }
696}
697// endregion