Skip to main content

miniextendr_api/from_r/
strings.rs

1//! String conversions — STRSXP requires special handling via `STRING_ELT`.
2//!
3//! R stores strings as STRSXP (vector of CHARSXP). Each element requires
4//! `STRING_ELT` + `R_CHAR` to extract, unlike numeric vectors which expose
5//! a contiguous data pointer.
6//!
7//! Covers: `&str`, `String`, `char`, `Option<&str>`, `Option<String>`,
8//! `Vec<String>`, `Vec<&str>`, `Box<[String]>`.
9
10use crate::ffi::{SEXP, SEXPTYPE, SexpExt};
11use crate::from_r::{
12    SexpError, SexpLengthError, SexpTypeError, TryFromSexp, charsxp_to_str,
13    charsxp_to_str_unchecked,
14};
15
16/// Convert R character vector (STRSXP) to Rust &str.
17///
18/// Extracts the first element of the character vector and returns it as a UTF-8 string.
19/// The returned string has static lifetime because it points to R's internal string pool.
20///
21/// # NA Handling
22///
23/// **Warning:** `NA_character_` is converted to empty string `""`. This is lossy!
24/// If you need to distinguish between NA and empty strings, use `Option<String>` instead:
25///
26/// ```ignore
27/// let maybe_str: Option<String> = sexp.try_into()?;
28/// ```
29///
30/// # Safety
31/// The returned &str is only valid as long as R doesn't garbage collect the CHARSXP.
32/// In practice, this is safe within a single .Call invocation.
33impl TryFromSexp for &'static str {
34    type Error = SexpError;
35
36    #[inline]
37    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
38        let actual = sexp.type_of();
39        if actual != SEXPTYPE::STRSXP {
40            return Err(SexpTypeError {
41                expected: SEXPTYPE::STRSXP,
42                actual,
43            }
44            .into());
45        }
46
47        let len = sexp.len();
48        if len != 1 {
49            return Err(SexpLengthError {
50                expected: 1,
51                actual: len,
52            }
53            .into());
54        }
55
56        // Get the CHARSXP at index 0
57        let charsxp = sexp.string_elt(0);
58
59        // Check for NA_STRING or R_BlankString
60        if charsxp == SEXP::na_string() {
61            return Ok("");
62        }
63        if charsxp == SEXP::blank_string() {
64            return Ok("");
65        }
66
67        // Use LENGTH-based conversion (O(1)) instead of CStr::from_ptr (O(n) strlen)
68        Ok(unsafe { charsxp_to_str(charsxp) })
69    }
70
71    #[inline]
72    unsafe fn try_from_sexp_unchecked(sexp: SEXP) -> Result<Self, Self::Error> {
73        let actual = sexp.type_of();
74        if actual != SEXPTYPE::STRSXP {
75            return Err(SexpTypeError {
76                expected: SEXPTYPE::STRSXP,
77                actual,
78            }
79            .into());
80        }
81
82        let len = unsafe { sexp.len_unchecked() };
83        if len != 1 {
84            return Err(SexpLengthError {
85                expected: 1,
86                actual: len,
87            }
88            .into());
89        }
90
91        // Get the CHARSXP at index 0
92        let charsxp = unsafe { sexp.string_elt_unchecked(0) };
93
94        // Check for NA_STRING or R_BlankString
95        if charsxp == SEXP::na_string() {
96            return Ok("");
97        }
98        if charsxp == SEXP::blank_string() {
99            return Ok("");
100        }
101
102        // Use LENGTH-based conversion (O(1)) instead of CStr::from_ptr (O(n) strlen)
103        Ok(unsafe { charsxp_to_str_unchecked(charsxp) })
104    }
105}
106
107impl TryFromSexp for Option<&'static str> {
108    type Error = SexpError;
109
110    #[inline]
111    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
112        if sexp.type_of() == SEXPTYPE::NILSXP {
113            return Ok(None);
114        }
115
116        let actual = sexp.type_of();
117        if actual != SEXPTYPE::STRSXP {
118            return Err(SexpTypeError {
119                expected: SEXPTYPE::STRSXP,
120                actual,
121            }
122            .into());
123        }
124
125        let len = sexp.len();
126        if len != 1 {
127            return Err(SexpLengthError {
128                expected: 1,
129                actual: len,
130            }
131            .into());
132        }
133
134        let charsxp = sexp.string_elt(0);
135        if charsxp == SEXP::na_string() {
136            return Ok(None);
137        }
138        if charsxp == SEXP::blank_string() {
139            return Ok(Some(""));
140        }
141
142        Ok(Some(unsafe { charsxp_to_str(charsxp) }))
143    }
144
145    #[inline]
146    unsafe fn try_from_sexp_unchecked(sexp: SEXP) -> Result<Self, Self::Error> {
147        if sexp.type_of() == SEXPTYPE::NILSXP {
148            return Ok(None);
149        }
150
151        let actual = sexp.type_of();
152        if actual != SEXPTYPE::STRSXP {
153            return Err(SexpTypeError {
154                expected: SEXPTYPE::STRSXP,
155                actual,
156            }
157            .into());
158        }
159
160        let len = unsafe { sexp.len_unchecked() };
161        if len != 1 {
162            return Err(SexpLengthError {
163                expected: 1,
164                actual: len,
165            }
166            .into());
167        }
168
169        let charsxp = unsafe { sexp.string_elt_unchecked(0) };
170        if charsxp == SEXP::na_string() {
171            return Ok(None);
172        }
173        if charsxp == SEXP::blank_string() {
174            return Ok(Some(""));
175        }
176
177        Ok(Some(unsafe { charsxp_to_str_unchecked(charsxp) }))
178    }
179}
180
181/// Convert R character vector (STRSXP) to Rust char.
182///
183/// Extracts the first character of the first element of the character vector.
184/// Returns an error if the string is empty, NA, or has more than one character.
185impl TryFromSexp for char {
186    type Error = SexpError;
187
188    #[inline]
189    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
190        let s: &str = TryFromSexp::try_from_sexp(sexp)?;
191        let mut chars = s.chars();
192        match (chars.next(), chars.next()) {
193            (Some(c), None) => Ok(c),
194            (None, _) => Err(SexpError::InvalidValue(
195                "empty string cannot be converted to char".to_string(),
196            )),
197            (Some(_), Some(_)) => Err(SexpError::InvalidValue(
198                "string has more than one character".to_string(),
199            )),
200        }
201    }
202
203    #[inline]
204    unsafe fn try_from_sexp_unchecked(sexp: SEXP) -> Result<Self, Self::Error> {
205        let s: &str = unsafe { TryFromSexp::try_from_sexp_unchecked(sexp)? };
206        let mut chars = s.chars();
207        match (chars.next(), chars.next()) {
208            (Some(c), None) => Ok(c),
209            (None, _) => Err(SexpError::InvalidValue(
210                "empty string cannot be converted to char".to_string(),
211            )),
212            (Some(_), Some(_)) => Err(SexpError::InvalidValue(
213                "string has more than one character".to_string(),
214            )),
215        }
216    }
217}
218
219/// Convert R character vector (STRSXP) to owned Rust String.
220///
221/// Extracts the first element and creates an owned copy.
222///
223/// # NA Handling
224///
225/// **Warning:** `NA_character_` is converted to empty string `""`. This is lossy!
226/// If you need to distinguish between NA and empty strings, use `Option<String>` instead:
227///
228/// ```ignore
229/// let maybe_str: Option<String> = sexp.try_into()?;
230/// ```
231impl TryFromSexp for String {
232    type Error = SexpError;
233
234    #[inline]
235    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
236        use crate::ffi::Rf_translateCharUTF8;
237
238        let actual = sexp.type_of();
239        if actual != SEXPTYPE::STRSXP {
240            return Err(SexpTypeError {
241                expected: SEXPTYPE::STRSXP,
242                actual,
243            }
244            .into());
245        }
246
247        let len = sexp.len();
248        if len != 1 {
249            return Err(SexpLengthError {
250                expected: 1,
251                actual: len,
252            }
253            .into());
254        }
255
256        // Get the CHARSXP at index 0
257        let charsxp = sexp.string_elt(0);
258
259        // Check for NA_STRING
260        if charsxp == SEXP::na_string() {
261            return Ok(String::new());
262        }
263
264        // Translate to UTF-8 in an R-managed buffer, then copy to an owned Rust String.
265        let c_str = unsafe { Rf_translateCharUTF8(charsxp) };
266        if c_str.is_null() {
267            return Ok(String::new());
268        }
269
270        let rust_str = unsafe { std::ffi::CStr::from_ptr(c_str) };
271        rust_str.to_str().map(|s| s.to_owned()).map_err(|_| {
272            SexpTypeError {
273                expected: SEXPTYPE::STRSXP,
274                actual: SEXPTYPE::STRSXP,
275            }
276            .into()
277        })
278    }
279
280    #[inline]
281    unsafe fn try_from_sexp_unchecked(sexp: SEXP) -> Result<Self, Self::Error> {
282        use crate::ffi::Rf_translateCharUTF8_unchecked;
283
284        let actual = sexp.type_of();
285        if actual != SEXPTYPE::STRSXP {
286            return Err(SexpTypeError {
287                expected: SEXPTYPE::STRSXP,
288                actual,
289            }
290            .into());
291        }
292
293        let len = unsafe { sexp.len_unchecked() };
294        if len != 1 {
295            return Err(SexpLengthError {
296                expected: 1,
297                actual: len,
298            }
299            .into());
300        }
301
302        // Get the CHARSXP at index 0
303        let charsxp = unsafe { sexp.string_elt_unchecked(0) };
304
305        // Check for NA_STRING
306        if charsxp == SEXP::na_string() {
307            return Ok(String::new());
308        }
309
310        // Translate to UTF-8 in an R-managed buffer, then copy to an owned Rust String.
311        let c_str = unsafe { Rf_translateCharUTF8_unchecked(charsxp) };
312        if c_str.is_null() {
313            return Ok(String::new());
314        }
315
316        let rust_str = unsafe { std::ffi::CStr::from_ptr(c_str) };
317        rust_str.to_str().map(|s| s.to_owned()).map_err(|_| {
318            SexpTypeError {
319                expected: SEXPTYPE::STRSXP,
320                actual: SEXPTYPE::STRSXP,
321            }
322            .into()
323        })
324    }
325}
326
327/// NA-aware string conversion: returns `None` for `NA_character_`.
328///
329/// Use this when you need to distinguish between NA and empty strings:
330/// ```ignore
331/// let maybe_str: Option<String> = sexp.try_into()?;
332/// match maybe_str {
333///     Some(s) => println!("Got string: {}", s),
334///     None => println!("Got NA"),
335/// }
336/// ```
337impl TryFromSexp for Option<String> {
338    type Error = SexpError;
339
340    #[inline]
341    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
342        use crate::ffi::Rf_translateCharUTF8;
343
344        let actual = sexp.type_of();
345        // NULL -> None
346        if actual == SEXPTYPE::NILSXP {
347            return Ok(None);
348        }
349        if actual != SEXPTYPE::STRSXP {
350            return Err(SexpTypeError {
351                expected: SEXPTYPE::STRSXP,
352                actual,
353            }
354            .into());
355        }
356
357        let len = sexp.len();
358        if len != 1 {
359            return Err(SexpLengthError {
360                expected: 1,
361                actual: len,
362            }
363            .into());
364        }
365
366        let charsxp = sexp.string_elt(0);
367
368        // Return None for NA_STRING
369        if charsxp == SEXP::na_string() {
370            return Ok(None);
371        }
372
373        let c_str = unsafe { Rf_translateCharUTF8(charsxp) };
374        if c_str.is_null() {
375            return Ok(Some(String::new()));
376        }
377
378        let rust_str = unsafe { std::ffi::CStr::from_ptr(c_str) };
379        rust_str.to_str().map(|s| Some(s.to_owned())).map_err(|_| {
380            SexpTypeError {
381                expected: SEXPTYPE::STRSXP,
382                actual: SEXPTYPE::STRSXP,
383            }
384            .into()
385        })
386    }
387
388    #[inline]
389    unsafe fn try_from_sexp_unchecked(sexp: SEXP) -> Result<Self, Self::Error> {
390        // For Option<String>, unchecked is same as checked (NA check is semantic, not safety)
391        Self::try_from_sexp(sexp)
392    }
393}
394// endregion