1use std::collections::HashMap;
2
3use aho_corasick::AhoCorasick;
4use bstr::ByteSlice;
5use memchr::memmem;
6use unicode_width::UnicodeWidthStr;
7
8use super::CallArgs;
9use crate::interpreter::value::*;
10use crate::interpreter::BuiltinContext;
11use derive_more::{Display, Error};
12use minir_macros::{builtin, interpreter_builtin};
13use regex::Regex;
14
15use crate::interpreter::value::deparse_expr;
16
17#[derive(Debug, Display, Error)]
21pub enum StringError {
22 #[display("invalid regular expression: {}", source)]
23 InvalidRegex {
24 #[error(source)]
25 source: regex::Error,
26 },
27}
28
29impl From<StringError> for RError {
30 fn from(e: StringError) -> Self {
31 RError::from_source(RErrorKind::Argument, e)
32 }
33}
34
35fn get_regex_opts(named: &[(String, RValue)]) -> (bool, bool) {
39 let fixed = named
40 .iter()
41 .find(|(n, _)| n == "fixed")
42 .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
43 .unwrap_or(false);
44 let ignore_case = named
45 .iter()
46 .find(|(n, _)| n == "ignore.case")
47 .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
48 .unwrap_or(false);
49 (fixed, ignore_case)
50}
51
52fn translate_pcre_to_rust(pattern: &str) -> String {
59 let mut result = String::with_capacity(pattern.len());
60 let mut in_class = false;
61 let chars: Vec<char> = pattern.chars().collect();
62 let mut i = 0;
63 while i < chars.len() {
64 match chars[i] {
65 '[' if !in_class => {
66 in_class = true;
67 result.push('[');
68 if i + 1 < chars.len() && chars[i + 1] == '^' {
70 result.push('^');
71 i += 1;
72 }
73 if i + 1 < chars.len() && chars[i + 1] == ']' {
74 result.push(']');
75 i += 1;
76 }
77 }
78 ']' if in_class => {
79 in_class = false;
80 result.push(']');
81 }
82 '\\' if in_class && i + 1 < chars.len() => {
83 if chars[i + 1] == '\\' {
84 result.push('\\');
86 result.push('\\');
87 i += 1;
88 } else if chars[i + 1] == ']' {
89 result.push(']');
91 i += 1;
92 } else {
93 result.push('\\');
95 result.push(chars[i + 1]);
96 i += 1;
97 }
98 }
99 c => result.push(c),
100 }
101 i += 1;
102 }
103 result
104}
105
106fn build_regex(pattern: &str, fixed: bool, ignore_case: bool) -> Result<Regex, RError> {
107 let pat = if fixed {
108 regex::escape(pattern)
109 } else {
110 translate_pcre_to_rust(pattern)
111 };
112 let pat = if ignore_case {
113 format!("(?i){}", pat)
114 } else {
115 pat
116 };
117 Regex::new(&pat).map_err(|source| -> RError { StringError::InvalidRegex { source }.into() })
118}
119
120fn convert_replacement(repl: &str) -> String {
122 let mut result = String::with_capacity(repl.len());
123 let chars: Vec<char> = repl.chars().collect();
124 let mut i = 0;
125 while i < chars.len() {
126 if chars[i] == '\\' && i + 1 < chars.len() && chars[i + 1].is_ascii_digit() {
127 result.push('$');
128 result.push(chars[i + 1]);
129 i += 2;
130 } else {
131 result.push(chars[i]);
132 i += 1;
133 }
134 }
135 result
136}
137
138fn build_fixed_searcher(pattern: &str, ignore_case: bool) -> Result<AhoCorasick, RError> {
143 let mut builder = aho_corasick::AhoCorasickBuilder::new();
144 builder
145 .ascii_case_insensitive(ignore_case)
146 .kind(Some(aho_corasick::AhoCorasickKind::DFA));
147 builder.build([pattern]).map_err(|e| {
148 RError::new(
149 RErrorKind::Argument,
150 format!("failed to build fixed-pattern searcher: {e}"),
151 )
152 })
153}
154
155fn fixed_gsub(haystack: &str, needle: &str, replacement: &str) -> String {
157 if needle.is_empty() {
158 let mut result =
160 String::with_capacity(haystack.len() + replacement.len() * (haystack.len() + 1));
161 for ch in haystack.chars() {
162 result.push_str(replacement);
163 result.push(ch);
164 }
165 result.push_str(replacement);
166 return result;
167 }
168 let finder = memmem::Finder::new(needle.as_bytes());
169 let mut result = String::with_capacity(haystack.len());
170 let mut last_end = 0;
171 for pos in finder.find_iter(haystack.as_bytes()) {
172 result.push_str(&haystack[last_end..pos]);
173 result.push_str(replacement);
174 last_end = pos + needle.len();
175 }
176 result.push_str(&haystack[last_end..]);
177 result
178}
179
180fn fixed_split(haystack: &str, needle: &str) -> Vec<Option<String>> {
182 if needle.is_empty() {
183 return haystack.chars().map(|c| Some(c.to_string())).collect();
185 }
186 let finder = memmem::Finder::new(needle.as_bytes());
187 let mut parts = Vec::new();
188 let mut last_end = 0;
189 for pos in finder.find_iter(haystack.as_bytes()) {
190 parts.push(Some(haystack[last_end..pos].to_string()));
191 last_end = pos + needle.len();
192 }
193 parts.push(Some(haystack[last_end..].to_string()));
194 parts
195}
196
197fn levenshtein_distance(a: &str, b: &str) -> usize {
204 let a_chars: Vec<char> = a.chars().collect();
205 let b_chars: Vec<char> = b.chars().collect();
206 let m = a_chars.len();
207 let n = b_chars.len();
208
209 if m == 0 {
210 return n;
211 }
212 if n == 0 {
213 return m;
214 }
215
216 let (row_chars, col_chars, rows, cols) = if m <= n {
220 (&b_chars, &a_chars, n, m)
221 } else {
222 (&a_chars, &b_chars, m, n)
223 };
224
225 let mut prev_row: Vec<usize> = (0..=cols).collect();
226 let mut curr_row: Vec<usize> = vec![0; cols + 1];
227
228 for i in 1..=rows {
229 curr_row[0] = i;
230 for j in 1..=cols {
231 let cost = if row_chars[i - 1] == col_chars[j - 1] {
232 0
233 } else {
234 1
235 };
236 curr_row[j] = (prev_row[j] + 1)
237 .min(curr_row[j - 1] + 1)
238 .min(prev_row[j - 1] + cost);
239 }
240 std::mem::swap(&mut prev_row, &mut curr_row);
241 }
242
243 prev_row[cols]
244}
245
246fn approximate_contains(haystack: &str, needle: &str, max_dist: usize) -> bool {
250 let needle_chars: Vec<char> = needle.chars().collect();
251 let haystack_chars: Vec<char> = haystack.chars().collect();
252 let n_len = needle_chars.len();
253 let h_len = haystack_chars.len();
254
255 if n_len == 0 {
256 return true;
257 }
258
259 let min_window = n_len.saturating_sub(max_dist);
261 let max_window = (n_len + max_dist).min(h_len);
262
263 for window_size in min_window..=max_window {
264 if window_size > h_len {
265 break;
266 }
267 for start in 0..=(h_len - window_size) {
268 let window: String = haystack_chars[start..start + window_size].iter().collect();
269 let needle_str: String = needle_chars.iter().collect();
270 if levenshtein_distance(&window, &needle_str) <= max_dist {
271 return true;
272 }
273 }
274 }
275
276 false
277}
278
279fn parse_max_distance(named: &[(String, RValue)], pattern_len: usize) -> usize {
283 let dist = named
284 .iter()
285 .find(|(n, _)| n == "max.distance")
286 .and_then(|(_, v)| v.as_vector()?.as_double_scalar());
287
288 match dist {
289 Some(d) if d < 1.0 => {
290 let computed = (d * pattern_len as f64).floor() as usize;
292 computed.max(0)
293 }
294 Some(d) => {
295 d.floor() as usize
297 }
298 None => {
299 let computed = (0.1 * pattern_len as f64).floor() as usize;
301 computed.max(0)
302 }
303 }
304}
305
306#[builtin(min_args = 3, names = ["substring"])]
317fn builtin_substr(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
318 let x_vec = args
319 .first()
320 .and_then(|v| v.as_vector())
321 .map(|v| v.to_characters())
322 .unwrap_or_default();
323 let start_vec = args
324 .get(1)
325 .and_then(|v| v.as_vector())
326 .map(|v| v.to_integers())
327 .unwrap_or_else(|| vec![Some(1)]);
328 let stop_vec = args
329 .get(2)
330 .and_then(|v| v.as_vector())
331 .map(|v| v.to_integers())
332 .unwrap_or_default();
333
334 if x_vec.is_empty() {
335 return Ok(RValue::vec(Vector::Character(vec![].into())));
336 }
337
338 let n = x_vec.len().max(start_vec.len()).max(stop_vec.len());
339 let result: Vec<Option<String>> = (0..n)
340 .map(|i| {
341 let s_opt = &x_vec[i % x_vec.len()];
342 let start_opt = start_vec[i % start_vec.len()];
343 let stop_opt = stop_vec[i % stop_vec.len()];
344 match (s_opt, start_opt, stop_opt) {
345 (Some(s), Some(start), Some(stop)) => {
346 let start = usize::try_from(start).unwrap_or(0);
347 let stop = usize::try_from(stop).unwrap_or(0);
348 let start = start.saturating_sub(1); if start < s.len() {
350 Some(s[start..stop.min(s.len())].to_string())
351 } else {
352 Some(String::new())
353 }
354 }
355 (None, _, _) | (_, None, _) | (_, _, None) => None,
356 }
357 })
358 .collect();
359 Ok(RValue::vec(Vector::Character(result.into())))
360}
361
362#[builtin(min_args = 1)]
367fn builtin_toupper(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
368 let vals = match args.first() {
369 Some(RValue::Vector(rv)) => rv.to_characters(),
370 Some(RValue::Null) => return Ok(RValue::vec(Vector::Character(vec![].into()))),
371 _ => {
372 return Err(RError::new(
373 RErrorKind::Argument,
374 "argument is not character".to_string(),
375 ))
376 }
377 };
378 let result: Vec<Option<String>> = vals
379 .iter()
380 .map(|s| s.as_ref().map(|s| s.to_uppercase()))
381 .collect();
382 Ok(RValue::vec(Vector::Character(result.into())))
383}
384
385#[builtin(min_args = 1)]
390fn builtin_tolower(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
391 let vals = match args.first() {
392 Some(RValue::Vector(rv)) => rv.to_characters(),
393 Some(RValue::Null) => return Ok(RValue::vec(Vector::Character(vec![].into()))),
394 _ => {
395 return Err(RError::new(
396 RErrorKind::Argument,
397 "argument is not character".to_string(),
398 ))
399 }
400 };
401 let result: Vec<Option<String>> = vals
402 .iter()
403 .map(|s| s.as_ref().map(|s| s.to_lowercase()))
404 .collect();
405 Ok(RValue::vec(Vector::Character(result.into())))
406}
407
408#[builtin(min_args = 1)]
416fn builtin_trimws(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
417 let which = named
418 .iter()
419 .find(|(n, _)| n == "which")
420 .and_then(|(_, v)| v.as_vector()?.as_character_scalar())
421 .or_else(|| {
422 args.get(1)
423 .and_then(|v| v.as_vector()?.as_character_scalar())
424 })
425 .unwrap_or_else(|| "both".to_string());
426
427 let whitespace_pat = named
428 .iter()
429 .find(|(n, _)| n == "whitespace")
430 .and_then(|(_, v)| v.as_vector()?.as_character_scalar())
431 .or_else(|| {
432 args.get(2)
433 .and_then(|v| v.as_vector()?.as_character_scalar())
434 })
435 .unwrap_or_else(|| "[ \\t\\r\\n]".to_string());
436
437 let use_regex = whitespace_pat != "[ \\t\\r\\n]";
439
440 if use_regex {
441 let left_re = Regex::new(&format!("^({whitespace_pat})+")).map_err(|e| {
443 RError::new(
444 RErrorKind::Argument,
445 format!("invalid 'whitespace' regex pattern: {e}"),
446 )
447 })?;
448 let right_re = Regex::new(&format!("({whitespace_pat})+$")).map_err(|e| {
449 RError::new(
450 RErrorKind::Argument,
451 format!("invalid 'whitespace' regex pattern: {e}"),
452 )
453 })?;
454
455 match args.first() {
456 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
457 let Vector::Character(vals) = &rv.inner else {
458 unreachable!()
459 };
460 let result: Vec<Option<String>> = vals
461 .iter()
462 .map(|s| {
463 s.as_ref().map(|s| {
464 let s = match which.as_str() {
465 "both" | "left" => left_re.replace(s, "").into_owned(),
466 _ => s.to_string(),
467 };
468 match which.as_str() {
469 "both" | "right" => right_re.replace(&s, "").into_owned(),
470 _ => s,
471 }
472 })
473 })
474 .collect();
475 Ok(RValue::vec(Vector::Character(result.into())))
476 }
477 _ => Err(RError::new(
478 RErrorKind::Argument,
479 "argument is not character".to_string(),
480 )),
481 }
482 } else {
483 let trim_fn: fn(&str) -> &str = match which.as_str() {
484 "both" => str::trim,
485 "left" => str::trim_start,
486 "right" => str::trim_end,
487 _ => {
488 return Err(RError::new(
489 RErrorKind::Argument,
490 format!(
491 "invalid 'which' argument: {:?} — must be \"both\", \"left\", or \"right\"",
492 which
493 ),
494 ))
495 }
496 };
497 match args.first() {
498 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
499 let Vector::Character(vals) = &rv.inner else {
500 unreachable!()
501 };
502 let result: Vec<Option<String>> = vals
503 .iter()
504 .map(|s| s.as_ref().map(|s| trim_fn(s).to_string()))
505 .collect();
506 Ok(RValue::vec(Vector::Character(result.into())))
507 }
508 _ => Err(RError::new(
509 RErrorKind::Argument,
510 "argument is not character".to_string(),
511 )),
512 }
513 }
514}
515
516#[builtin(min_args = 3)]
525fn builtin_gsub(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
526 if matches!(args.get(2), Some(RValue::Null) | None) {
528 return Ok(RValue::vec(Vector::Character(vec![].into())));
529 }
530 let pattern = args
531 .first()
532 .and_then(|v| v.as_vector()?.as_character_scalar())
533 .unwrap_or_default();
534 let replacement = args
535 .get(1)
536 .and_then(|v| v.as_vector()?.as_character_scalar())
537 .unwrap_or_default();
538 let (fixed, ignore_case) = get_regex_opts(named);
539
540 if fixed {
542 if pattern.is_empty() && !ignore_case {
544 return match args.get(2) {
545 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
546 let Vector::Character(vals) = &rv.inner else {
547 unreachable!()
548 };
549 let result: Vec<Option<String>> = vals
550 .iter()
551 .map(|s| s.as_ref().map(|s| fixed_gsub(s, &pattern, &replacement)))
552 .collect();
553 Ok(RValue::vec(Vector::Character(result.into())))
554 }
555 _ => Err(RError::new(
556 RErrorKind::Argument,
557 "argument is not character".to_string(),
558 )),
559 };
560 }
561 let ac = build_fixed_searcher(&pattern, ignore_case)?;
562 return match args.get(2) {
563 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
564 let Vector::Character(vals) = &rv.inner else {
565 unreachable!()
566 };
567 let result: Vec<Option<String>> = vals
568 .iter()
569 .map(|s| {
570 s.as_ref()
571 .map(|s| ac.replace_all(s, &[replacement.as_str()]))
572 })
573 .collect();
574 Ok(RValue::vec(Vector::Character(result.into())))
575 }
576 _ => Err(RError::new(
577 RErrorKind::Argument,
578 "argument is not character".to_string(),
579 )),
580 };
581 }
582
583 let re = build_regex(&pattern, fixed, ignore_case)?;
584 let repl = convert_replacement(&replacement);
585 match args.get(2) {
586 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
587 let Vector::Character(vals) = &rv.inner else {
588 unreachable!()
589 };
590 let result: Vec<Option<String>> = vals
591 .iter()
592 .map(|s| {
593 s.as_ref()
594 .map(|s| re.replace_all(s, repl.as_str()).into_owned())
595 })
596 .collect();
597 Ok(RValue::vec(Vector::Character(result.into())))
598 }
599 _ => Err(RError::new(
600 RErrorKind::Argument,
601 "argument is not character".to_string(),
602 )),
603 }
604}
605
606#[builtin(min_args = 3)]
615fn builtin_sub(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
616 if matches!(args.get(2), Some(RValue::Null) | None) {
618 return Ok(RValue::vec(Vector::Character(vec![].into())));
619 }
620 let pattern = args
621 .first()
622 .and_then(|v| v.as_vector()?.as_character_scalar())
623 .unwrap_or_default();
624 let replacement = args
625 .get(1)
626 .and_then(|v| v.as_vector()?.as_character_scalar())
627 .unwrap_or_default();
628 let (fixed, ignore_case) = get_regex_opts(named);
629
630 if fixed {
632 let ac = build_fixed_searcher(&pattern, ignore_case)?;
633 return match args.get(2) {
634 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
635 let Vector::Character(vals) = &rv.inner else {
636 unreachable!()
637 };
638 let result: Vec<Option<String>> = vals
639 .iter()
640 .map(|s| {
641 s.as_ref().map(|s| {
642 match ac.find(s) {
644 Some(m) => {
645 let mut out = String::with_capacity(
646 s.len() - m.len() + replacement.len(),
647 );
648 out.push_str(&s[..m.start()]);
649 out.push_str(&replacement);
650 out.push_str(&s[m.end()..]);
651 out
652 }
653 None => s.to_string(),
654 }
655 })
656 })
657 .collect();
658 Ok(RValue::vec(Vector::Character(result.into())))
659 }
660 _ => Err(RError::new(
661 RErrorKind::Argument,
662 "argument is not character".to_string(),
663 )),
664 };
665 }
666
667 let re = build_regex(&pattern, fixed, ignore_case)?;
668 let repl = convert_replacement(&replacement);
669 match args.get(2) {
670 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
671 let Vector::Character(vals) = &rv.inner else {
672 unreachable!()
673 };
674 let result: Vec<Option<String>> = vals
675 .iter()
676 .map(|s| {
677 s.as_ref()
678 .map(|s| re.replace(s, repl.as_str()).into_owned())
679 })
680 .collect();
681 Ok(RValue::vec(Vector::Character(result.into())))
682 }
683 _ => Err(RError::new(
684 RErrorKind::Argument,
685 "argument is not character".to_string(),
686 )),
687 }
688}
689
690#[builtin(min_args = 2)]
698fn builtin_grepl(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
699 if matches!(args.get(1), Some(RValue::Null) | None) {
700 return Ok(RValue::vec(Vector::Logical(vec![].into())));
701 }
702 let pattern = args
703 .first()
704 .and_then(|v| v.as_vector()?.as_character_scalar())
705 .unwrap_or_default();
706 let (fixed, ignore_case) = get_regex_opts(named);
707
708 if fixed {
710 let ac = build_fixed_searcher(&pattern, ignore_case)?;
711 return match args.get(1) {
712 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
713 let Vector::Character(vals) = &rv.inner else {
714 unreachable!()
715 };
716 let result: Vec<Option<bool>> = vals
717 .iter()
718 .map(|s| s.as_ref().map(|s| ac.is_match(s)))
719 .collect();
720 Ok(RValue::vec(Vector::Logical(result.into())))
721 }
722 _ => Err(RError::new(
723 RErrorKind::Argument,
724 "argument is not character".to_string(),
725 )),
726 };
727 }
728
729 let re = build_regex(&pattern, fixed, ignore_case)?;
730 match args.get(1) {
731 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
732 let Vector::Character(vals) = &rv.inner else {
733 unreachable!()
734 };
735 let result: Vec<Option<bool>> = vals
736 .iter()
737 .map(|s| s.as_ref().map(|s| re.is_match(s)))
738 .collect();
739 Ok(RValue::vec(Vector::Logical(result.into())))
740 }
741 _ => Err(RError::new(
742 RErrorKind::Argument,
743 "argument is not character".to_string(),
744 )),
745 }
746}
747
748#[builtin(min_args = 2)]
757fn builtin_grep(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
758 if matches!(args.get(1), Some(RValue::Null) | None) {
759 return Ok(RValue::vec(Vector::Integer(vec![].into())));
760 }
761 let pattern = args
762 .first()
763 .and_then(|v| v.as_vector()?.as_character_scalar())
764 .unwrap_or_default();
765 let value = named
766 .iter()
767 .find(|(n, _)| n == "value")
768 .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
769 .unwrap_or(false);
770 let (fixed, ignore_case) = get_regex_opts(named);
771
772 if fixed {
774 let ac = build_fixed_searcher(&pattern, ignore_case)?;
775 return match args.get(1) {
776 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
777 let Vector::Character(vals) = &rv.inner else {
778 unreachable!()
779 };
780 if value {
781 let result: Vec<Option<String>> = vals
782 .iter()
783 .filter(|s| s.as_ref().map(|s| ac.is_match(s)).unwrap_or(false))
784 .cloned()
785 .collect();
786 Ok(RValue::vec(Vector::Character(result.into())))
787 } else {
788 let result: Result<Vec<Option<i64>>, RError> = vals
789 .iter()
790 .enumerate()
791 .filter(|(_, s)| s.as_ref().map(|s| ac.is_match(s)).unwrap_or(false))
792 .map(|(i, _)| Ok(Some(i64::try_from(i)? + 1)))
793 .collect();
794 Ok(RValue::vec(Vector::Integer(result?.into())))
795 }
796 }
797 _ => Err(RError::new(
798 RErrorKind::Argument,
799 "argument is not character".to_string(),
800 )),
801 };
802 }
803
804 let re = build_regex(&pattern, fixed, ignore_case)?;
805
806 match args.get(1) {
807 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
808 let Vector::Character(vals) = &rv.inner else {
809 unreachable!()
810 };
811 if value {
812 let result: Vec<Option<String>> = vals
813 .iter()
814 .filter(|s| s.as_ref().map(|s| re.is_match(s)).unwrap_or(false))
815 .cloned()
816 .collect();
817 Ok(RValue::vec(Vector::Character(result.into())))
818 } else {
819 let result: Result<Vec<Option<i64>>, RError> = vals
820 .iter()
821 .enumerate()
822 .filter(|(_, s)| s.as_ref().map(|s| re.is_match(s)).unwrap_or(false))
823 .map(|(i, _)| Ok(Some(i64::try_from(i)? + 1)))
824 .collect();
825 Ok(RValue::vec(Vector::Integer(result?.into())))
826 }
827 }
828 _ => Err(RError::new(
829 RErrorKind::Argument,
830 "argument is not character".to_string(),
831 )),
832 }
833}
834
835#[builtin(name = "agrep", min_args = 2)]
847fn builtin_agrep(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
848 let pattern = args
849 .first()
850 .and_then(|v| v.as_vector()?.as_character_scalar())
851 .unwrap_or_default();
852 let ignore_case = named
853 .iter()
854 .find(|(n, _)| n == "ignore.case")
855 .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
856 .unwrap_or(false);
857 let value = named
858 .iter()
859 .find(|(n, _)| n == "value")
860 .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
861 .unwrap_or(false);
862 let max_dist = parse_max_distance(named, pattern.chars().count());
863
864 let match_pattern = if ignore_case {
865 pattern.to_lowercase()
866 } else {
867 pattern.clone()
868 };
869
870 match args.get(1) {
871 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
872 let Vector::Character(vals) = &rv.inner else {
873 unreachable!()
874 };
875 if value {
876 let result: Vec<Option<String>> = vals
877 .iter()
878 .filter(|s| {
879 s.as_ref()
880 .map(|s| {
881 let hay = if ignore_case {
882 s.to_lowercase()
883 } else {
884 s.to_string()
885 };
886 approximate_contains(&hay, &match_pattern, max_dist)
887 })
888 .unwrap_or(false)
889 })
890 .cloned()
891 .collect();
892 Ok(RValue::vec(Vector::Character(result.into())))
893 } else {
894 let result: Result<Vec<Option<i64>>, RError> = vals
895 .iter()
896 .enumerate()
897 .filter(|(_, s)| {
898 s.as_ref()
899 .map(|s| {
900 let hay = if ignore_case {
901 s.to_lowercase()
902 } else {
903 s.to_string()
904 };
905 approximate_contains(&hay, &match_pattern, max_dist)
906 })
907 .unwrap_or(false)
908 })
909 .map(|(i, _)| Ok(Some(i64::try_from(i)? + 1)))
910 .collect();
911 Ok(RValue::vec(Vector::Integer(result?.into())))
912 }
913 }
914 _ => Err(RError::new(
915 RErrorKind::Argument,
916 "argument is not character".to_string(),
917 )),
918 }
919}
920
921#[builtin(name = "agrepl", min_args = 2)]
931fn builtin_agrepl(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
932 let pattern = args
933 .first()
934 .and_then(|v| v.as_vector()?.as_character_scalar())
935 .unwrap_or_default();
936 let ignore_case = named
937 .iter()
938 .find(|(n, _)| n == "ignore.case")
939 .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
940 .unwrap_or(false);
941 let max_dist = parse_max_distance(named, pattern.chars().count());
942
943 let match_pattern = if ignore_case {
944 pattern.to_lowercase()
945 } else {
946 pattern.clone()
947 };
948
949 match args.get(1) {
950 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
951 let Vector::Character(vals) = &rv.inner else {
952 unreachable!()
953 };
954 let result: Vec<Option<bool>> = vals
955 .iter()
956 .map(|s| {
957 s.as_ref().map(|s| {
958 let hay = if ignore_case {
959 s.to_lowercase()
960 } else {
961 s.to_string()
962 };
963 approximate_contains(&hay, &match_pattern, max_dist)
964 })
965 })
966 .collect();
967 Ok(RValue::vec(Vector::Logical(result.into())))
968 }
969 _ => Err(RError::new(
970 RErrorKind::Argument,
971 "argument is not character".to_string(),
972 )),
973 }
974}
975
976#[builtin(min_args = 2)]
984fn builtin_regexpr(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
985 let pattern = args
986 .first()
987 .and_then(|v| v.as_vector()?.as_character_scalar())
988 .unwrap_or_default();
989 let (fixed, ignore_case) = get_regex_opts(named);
990 let re = build_regex(&pattern, fixed, ignore_case)?;
991 match args.get(1) {
992 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
993 let Vector::Character(vals) = &rv.inner else {
994 unreachable!()
995 };
996 let mut positions = Vec::new();
997 let mut lengths = Vec::new();
998 for s in vals.iter() {
999 match s.as_ref().and_then(|s| re.find(s)) {
1000 Some(m) => {
1001 positions.push(Some(i64::try_from(m.start())? + 1)); lengths.push(Some(i64::try_from(m.len())?));
1003 }
1004 None => {
1005 positions.push(Some(-1));
1006 lengths.push(Some(-1));
1007 }
1008 }
1009 }
1010 let mut rv = RVector::from(Vector::Integer(positions.into()));
1011 rv.set_attr(
1012 "match.length".to_string(),
1013 RValue::vec(Vector::Integer(lengths.into())),
1014 );
1015 Ok(RValue::Vector(rv))
1016 }
1017 _ => Err(RError::new(
1018 RErrorKind::Argument,
1019 "argument is not character".to_string(),
1020 )),
1021 }
1022}
1023
1024#[builtin(min_args = 2)]
1032fn builtin_gregexpr(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
1033 let pattern = args
1034 .first()
1035 .and_then(|v| v.as_vector()?.as_character_scalar())
1036 .unwrap_or_default();
1037 let (fixed, ignore_case) = get_regex_opts(named);
1038 let re = build_regex(&pattern, fixed, ignore_case)?;
1039 match args.get(1) {
1040 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
1041 let Vector::Character(vals) = &rv.inner else {
1042 unreachable!()
1043 };
1044 let mut list_items = Vec::new();
1045 for s in vals.iter() {
1046 let (positions, lengths): (Vec<Option<i64>>, Vec<Option<i64>>) = match s.as_ref() {
1047 Some(s) => {
1048 let matches: Vec<_> = re.find_iter(s).collect();
1049 if matches.is_empty() {
1050 (vec![Some(-1)], vec![Some(-1)])
1051 } else {
1052 let (positions, lengths): (Vec<_>, Vec<_>) = matches
1053 .iter()
1054 .map(|m| -> Result<_, RError> {
1055 Ok((
1056 Some(i64::try_from(m.start())? + 1),
1057 Some(i64::try_from(m.len())?),
1058 ))
1059 })
1060 .collect::<Result<Vec<_>, _>>()?
1061 .into_iter()
1062 .unzip();
1063 (positions, lengths)
1064 }
1065 }
1066 None => (vec![Some(-1)], vec![Some(-1)]),
1067 };
1068 let mut match_rv = RVector::from(Vector::Integer(positions.into()));
1069 match_rv.set_attr(
1070 "match.length".to_string(),
1071 RValue::vec(Vector::Integer(lengths.into())),
1072 );
1073 list_items.push((None, RValue::Vector(match_rv)));
1074 }
1075 Ok(RValue::List(RList::new(list_items)))
1076 }
1077 _ => Err(RError::new(
1078 RErrorKind::Argument,
1079 "argument is not character".to_string(),
1080 )),
1081 }
1082}
1083
1084#[builtin(min_args = 2)]
1090fn builtin_regmatches(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
1091 let x = match args.first() {
1092 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
1093 let Vector::Character(vals) = &rv.inner else {
1094 unreachable!()
1095 };
1096 vals.clone()
1097 }
1098 _ => {
1099 return Err(RError::new(
1100 RErrorKind::Argument,
1101 "argument is not character".to_string(),
1102 ))
1103 }
1104 };
1105
1106 match args.get(1) {
1108 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Integer(_)) => {
1110 let Vector::Integer(positions) = &rv.inner else {
1111 unreachable!()
1112 };
1113 let lengths = match rv.get_attr("match.length") {
1114 Some(RValue::Vector(lv)) => match &lv.inner {
1115 Vector::Integer(l) => l.clone(),
1116 _ => {
1117 return Err(RError::new(
1118 RErrorKind::Argument,
1119 "invalid match data".to_string(),
1120 ))
1121 }
1122 },
1123 _ => {
1124 return Err(RError::new(
1125 RErrorKind::Argument,
1126 "invalid match data".to_string(),
1127 ))
1128 }
1129 };
1130 let mut result = Vec::new();
1131 for (i, pos) in positions.iter_opt().enumerate() {
1132 let p = pos.unwrap_or(-1);
1133 let l = lengths.get_opt(i).unwrap_or(-1);
1134 if p > 0 && l > 0 {
1135 if let Some(Some(s)) = x.get(i) {
1136 let start = usize::try_from(p - 1)?;
1137 let end = start + usize::try_from(l)?;
1138 if end <= s.len() {
1139 result.push(Some(s[start..end].to_string()));
1140 } else {
1141 result.push(None);
1142 }
1143 } else {
1144 result.push(None);
1145 }
1146 }
1147 }
1149 Ok(RValue::vec(Vector::Character(result.into())))
1150 }
1151 Some(RValue::List(list)) => {
1153 let mut list_items = Vec::new();
1154 for (i, (_, match_val)) in list.values.iter().enumerate() {
1155 let RValue::Vector(rv) = match_val else {
1156 list_items.push((
1157 None,
1158 RValue::vec(Vector::Character(Vec::<Option<String>>::new().into())),
1159 ));
1160 continue;
1161 };
1162 let Vector::Integer(positions) = &rv.inner else {
1163 list_items.push((
1164 None,
1165 RValue::vec(Vector::Character(Vec::<Option<String>>::new().into())),
1166 ));
1167 continue;
1168 };
1169 let lengths: Integer = match rv.get_attr("match.length") {
1170 Some(RValue::Vector(lv)) => match &lv.inner {
1171 Vector::Integer(l) => l.clone(),
1172 _ => Integer::from_values(vec![]),
1173 },
1174 _ => Integer::from_values(vec![]),
1175 };
1176 let s = x.get(i).and_then(|s| s.as_ref());
1177 let mut matches = Vec::new();
1178 for (j, pos) in positions.iter_opt().enumerate() {
1179 let p = pos.unwrap_or(-1);
1180 let l = lengths.get_opt(j).unwrap_or(-1);
1181 if p > 0 && l > 0 {
1182 if let Some(s) = s {
1183 let start = usize::try_from(p - 1)?;
1184 let end = start + usize::try_from(l)?;
1185 if end <= s.len() {
1186 matches.push(Some(s[start..end].to_string()));
1187 }
1188 }
1189 }
1190 }
1191 list_items.push((None, RValue::vec(Vector::Character(matches.into()))));
1192 }
1193 Ok(RValue::List(RList::new(list_items)))
1194 }
1195 _ => Err(RError::new(
1196 RErrorKind::Argument,
1197 "invalid match data".to_string(),
1198 )),
1199 }
1200}
1201
1202#[builtin(name = "regmatches<-", min_args = 3)]
1214fn builtin_regmatches_assign(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
1215 let x = match args.first() {
1216 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
1217 let Vector::Character(vals) = &rv.inner else {
1218 unreachable!()
1219 };
1220 vals.clone()
1221 }
1222 _ => {
1223 return Err(RError::new(
1224 RErrorKind::Argument,
1225 "argument 'x' is not character".to_string(),
1226 ))
1227 }
1228 };
1229
1230 let match_data = args.get(1).ok_or_else(|| {
1231 RError::new(
1232 RErrorKind::Argument,
1233 "missing match data argument".to_string(),
1234 )
1235 })?;
1236
1237 let value = args.get(2).ok_or_else(|| {
1238 RError::new(
1239 RErrorKind::Argument,
1240 "missing replacement value".to_string(),
1241 )
1242 })?;
1243
1244 match match_data {
1245 RValue::Vector(rv) if matches!(rv.inner, Vector::Integer(_)) => {
1247 let Vector::Integer(positions) = &rv.inner else {
1248 unreachable!()
1249 };
1250 let lengths: Integer = match rv.get_attr("match.length") {
1251 Some(RValue::Vector(lv)) => match &lv.inner {
1252 Vector::Integer(l) => l.clone(),
1253 _ => {
1254 return Err(RError::new(
1255 RErrorKind::Argument,
1256 "invalid match data: match.length attribute is not integer".to_string(),
1257 ))
1258 }
1259 },
1260 _ => {
1261 return Err(RError::new(
1262 RErrorKind::Argument,
1263 "invalid match data: missing match.length attribute".to_string(),
1264 ))
1265 }
1266 };
1267 let replacements = match value {
1268 RValue::Vector(rv) => rv.to_characters(),
1269 _ => {
1270 return Err(RError::new(
1271 RErrorKind::Argument,
1272 "replacement value must be a character vector".to_string(),
1273 ))
1274 }
1275 };
1276
1277 let mut result: Vec<Option<String>> = x.to_vec();
1278 for (i, pos) in positions.iter_opt().enumerate() {
1279 let p = pos.unwrap_or(-1);
1280 let l = lengths.get_opt(i).unwrap_or(-1);
1281 if p > 0 && l >= 0 {
1282 if let Some(Some(s)) = result.get(i) {
1283 let start = usize::try_from(p - 1)?;
1284 let end = start + usize::try_from(l)?;
1285 if end <= s.len() {
1286 let repl = replacements
1287 .get(i)
1288 .and_then(|r| r.as_ref())
1289 .map(|r| r.as_str())
1290 .unwrap_or("");
1291 let new_s = format!("{}{}{}", &s[..start], repl, &s[end..]);
1292 result[i] = Some(new_s);
1293 }
1294 }
1295 }
1296 }
1297 Ok(RValue::vec(Vector::Character(result.into())))
1298 }
1299 RValue::List(match_list) => {
1301 let repl_list = match value {
1302 RValue::List(l) => l,
1303 _ => {
1304 return Err(RError::new(
1305 RErrorKind::Argument,
1306 "replacement value must be a list for gregexpr match data".to_string(),
1307 ))
1308 }
1309 };
1310
1311 let mut result: Vec<Option<String>> = x.to_vec();
1312 for (i, (_, match_val)) in match_list.values.iter().enumerate() {
1313 let RValue::Vector(rv) = match_val else {
1314 continue;
1315 };
1316 let Vector::Integer(positions) = &rv.inner else {
1317 continue;
1318 };
1319 let lengths: Integer = match rv.get_attr("match.length") {
1320 Some(RValue::Vector(lv)) => match &lv.inner {
1321 Vector::Integer(l) => l.clone(),
1322 _ => Integer::from_values(vec![]),
1323 },
1324 _ => Integer::from_values(vec![]),
1325 };
1326
1327 let repls: Vec<Option<String>> = match repl_list.values.get(i) {
1328 Some((_, RValue::Vector(rv))) => rv.to_characters().to_vec(),
1329 _ => vec![],
1330 };
1331
1332 if let Some(Some(s)) = result.get(i) {
1333 let mut edits: Vec<(usize, usize, &str)> = Vec::new();
1336 for (j, pos) in positions.iter_opt().enumerate() {
1337 let p = pos.unwrap_or(-1);
1338 let l = lengths.get_opt(j).unwrap_or(-1);
1339 if p > 0 && l >= 0 {
1340 let start = usize::try_from(p - 1)?;
1341 let end = start + usize::try_from(l)?;
1342 if end <= s.len() {
1343 let repl = repls
1344 .get(j)
1345 .and_then(|r| r.as_ref())
1346 .map(|r| r.as_str())
1347 .unwrap_or("");
1348 edits.push((start, end, repl));
1349 }
1350 }
1351 }
1352 edits.sort_by(|a, b| b.0.cmp(&a.0));
1354 let mut new_s = s.clone();
1355 for (start, end, repl) in edits {
1356 new_s = format!("{}{}{}", &new_s[..start], repl, &new_s[end..]);
1357 }
1358 result[i] = Some(new_s);
1359 }
1360 }
1361 Ok(RValue::vec(Vector::Character(result.into())))
1362 }
1363 _ => Err(RError::new(
1364 RErrorKind::Argument,
1365 "invalid match data".to_string(),
1366 )),
1367 }
1368}
1369
1370struct FmtSpec {
1372 flags: String,
1373 width: Option<usize>,
1374 precision: Option<usize>,
1375 specifier: char,
1376}
1377
1378impl FmtSpec {
1379 fn pad(&self, formatted: &str) -> String {
1384 let width = match self.width {
1385 Some(w) => w,
1386 None => return formatted.to_string(),
1387 };
1388 let display_width = UnicodeWidthStr::width(formatted);
1389 if display_width >= width {
1390 return formatted.to_string();
1391 }
1392 let pad_char = if self.flags.contains('0') && !self.flags.contains('-') {
1393 '0'
1394 } else {
1395 ' '
1396 };
1397 let pad_len = width.saturating_sub(display_width);
1398 if self.flags.contains('-') {
1399 format!("{}{}", formatted, " ".repeat(pad_len))
1401 } else if pad_char == '0' {
1402 if let Some(rest) = formatted.strip_prefix('-') {
1404 let pad_len = width.saturating_sub(UnicodeWidthStr::width(rest) + 1);
1405 format!("-{}{}", "0".repeat(pad_len), rest)
1406 } else {
1407 format!("{}{}", "0".repeat(pad_len), formatted)
1408 }
1409 } else {
1410 format!("{}{}", " ".repeat(pad_len), formatted)
1412 }
1413 }
1414
1415 fn format_int(&self, v: i64) -> String {
1417 let raw = if self.flags.contains('+') && v >= 0 {
1418 format!("+{}", v)
1419 } else {
1420 v.to_string()
1421 };
1422 self.pad(&raw)
1423 }
1424
1425 fn format_float(&self, v: f64) -> String {
1427 let prec = self.precision.unwrap_or(6);
1428 let raw = match self.specifier {
1429 'f' => {
1430 let s = format!("{:.prec$}", v, prec = prec);
1431 if self.flags.contains('+') && v >= 0.0 && !v.is_nan() {
1432 format!("+{}", s)
1433 } else {
1434 s
1435 }
1436 }
1437 'e' | 'E' => {
1438 let s = format_scientific(v, prec, self.specifier == 'E');
1439 if self.flags.contains('+') && v >= 0.0 && !v.is_nan() {
1440 format!("+{}", s)
1441 } else {
1442 s
1443 }
1444 }
1445 'g' | 'G' => {
1446 let s = format_g(v, prec, self.specifier == 'G');
1447 if self.flags.contains('+') && v >= 0.0 && !v.is_nan() {
1448 format!("+{}", s)
1449 } else {
1450 s
1451 }
1452 }
1453 _ => format!("{}", v),
1454 };
1455 self.pad(&raw)
1456 }
1457
1458 fn format_str(&self, v: &str) -> String {
1460 let truncated = match self.precision {
1461 Some(prec) => &v[..v.len().min(prec)],
1462 None => v,
1463 };
1464 self.pad(truncated)
1465 }
1466}
1467
1468fn format_scientific(v: f64, prec: usize, upper: bool) -> String {
1470 if v.is_nan() {
1471 return "NaN".to_string();
1472 }
1473 if v.is_infinite() {
1474 return if v > 0.0 {
1475 "Inf".to_string()
1476 } else {
1477 "-Inf".to_string()
1478 };
1479 }
1480 let e_char = if upper { 'E' } else { 'e' };
1481 if v == 0.0 {
1482 return format!("{:.prec$}{}{}", 0.0, e_char, "+00", prec = prec);
1483 }
1484 let abs_v = v.abs();
1485 let exp = abs_v.log10().floor() as i32;
1486 let mantissa = v / 10f64.powi(exp);
1487 format!("{:.prec$}{}{:+03}", mantissa, e_char, exp, prec = prec)
1488}
1489
1490fn format_g(v: f64, prec: usize, upper: bool) -> String {
1492 if v.is_nan() {
1493 return "NaN".to_string();
1494 }
1495 if v.is_infinite() {
1496 return if v > 0.0 {
1497 "Inf".to_string()
1498 } else {
1499 "-Inf".to_string()
1500 };
1501 }
1502 let prec = if prec == 0 { 1 } else { prec };
1503 if v == 0.0 {
1504 return "0".to_string();
1505 }
1506 let abs_v = v.abs();
1507 let exp = abs_v.log10().floor() as i32;
1508 if exp < -4 || exp >= i32::try_from(prec).unwrap_or(i32::MAX) {
1510 let sig_prec = prec.saturating_sub(1);
1511 let s = format_scientific(v, sig_prec, upper);
1512 if let Some(e_pos) = s.find(if upper { 'E' } else { 'e' }) {
1514 let mantissa_part = s[..e_pos].trim_end_matches('0').trim_end_matches('.');
1515 format!("{}{}", mantissa_part, &s[e_pos..])
1516 } else {
1517 s
1518 }
1519 } else {
1520 let decimal_places = if exp >= 0 {
1522 prec.saturating_sub(usize::try_from(exp + 1).unwrap_or(0))
1523 } else {
1524 prec + usize::try_from(-exp - 1).unwrap_or(0)
1525 };
1526 let s = format!("{:.prec$}", v, prec = decimal_places);
1527 if s.contains('.') {
1529 let trimmed = s.trim_end_matches('0').trim_end_matches('.');
1530 trimmed.to_string()
1531 } else {
1532 s
1533 }
1534 }
1535}
1536
1537fn parse_fmt_spec(chars: &[char]) -> Option<(FmtSpec, usize)> {
1539 let mut i = 0;
1540
1541 let mut flags = String::new();
1543 while i < chars.len() && "-+ 0#".contains(chars[i]) {
1544 flags.push(chars[i]);
1545 i += 1;
1546 }
1547
1548 let mut width = None;
1550 let width_start = i;
1551 while i < chars.len() && chars[i].is_ascii_digit() {
1552 i += 1;
1553 }
1554 if i > width_start {
1555 width = chars[width_start..i]
1556 .iter()
1557 .collect::<String>()
1558 .parse()
1559 .ok();
1560 }
1561
1562 let mut precision = None;
1564 if i < chars.len() && chars[i] == '.' {
1565 i += 1;
1566 let prec_start = i;
1567 while i < chars.len() && chars[i].is_ascii_digit() {
1568 i += 1;
1569 }
1570 precision = Some(
1571 chars[prec_start..i]
1572 .iter()
1573 .collect::<String>()
1574 .parse()
1575 .unwrap_or(0),
1576 );
1577 }
1578
1579 if i < chars.len() {
1581 let specifier = chars[i];
1582 i += 1;
1583 Some((
1584 FmtSpec {
1585 flags,
1586 width,
1587 precision,
1588 specifier,
1589 },
1590 i,
1591 ))
1592 } else {
1593 None
1594 }
1595}
1596
1597fn collect_fmt_specs(fmt: &str) -> Vec<(FmtSpec, usize)> {
1600 let chars: Vec<char> = fmt.chars().collect();
1601 let mut specs = Vec::new();
1602 let mut i = 0;
1603 let mut arg_idx: usize = 0;
1604 while i < chars.len() {
1605 if chars[i] == '%' && i + 1 < chars.len() {
1606 i += 1;
1607 if chars[i] == '%' {
1608 i += 1;
1609 continue;
1610 }
1611 if let Some((spec, consumed)) = parse_fmt_spec(&chars[i..]) {
1612 i += consumed;
1613 match spec.specifier {
1614 'd' | 'i' | 'f' | 'e' | 'E' | 'g' | 'G' | 's' => {
1615 specs.push((spec, arg_idx));
1616 arg_idx += 1;
1617 }
1618 _ => {}
1619 }
1620 }
1621 } else {
1622 i += 1;
1623 }
1624 }
1625 specs
1626}
1627
1628fn sprintf_one(fmt: &str, data_args: &[&Vector], elem_idx: usize) -> Result<String, RError> {
1631 let chars: Vec<char> = fmt.chars().collect();
1632 let mut output = String::new();
1633 let mut i = 0;
1634 let mut arg_idx: usize = 0;
1635
1636 while i < chars.len() {
1637 if chars[i] == '%' && i + 1 < chars.len() {
1638 i += 1;
1639 if chars[i] == '%' {
1640 output.push('%');
1641 i += 1;
1642 continue;
1643 }
1644 if let Some((spec, consumed)) = parse_fmt_spec(&chars[i..]) {
1645 i += consumed;
1646 match spec.specifier {
1647 'd' | 'i' => {
1648 let vec = data_args.get(arg_idx).ok_or_else(|| {
1649 RError::new(
1650 RErrorKind::Argument,
1651 format!(
1652 "too few arguments for sprintf format: \
1653 need argument {} but only {} supplied",
1654 arg_idx + 1,
1655 data_args.len()
1656 ),
1657 )
1658 })?;
1659 let ints = vec.to_integers();
1660 let v = if ints.is_empty() {
1661 0
1662 } else {
1663 ints[elem_idx % ints.len()].unwrap_or(0)
1664 };
1665 output.push_str(&spec.format_int(v));
1666 arg_idx += 1;
1667 }
1668 'f' | 'e' | 'E' | 'g' | 'G' => {
1669 let vec = data_args.get(arg_idx).ok_or_else(|| {
1670 RError::new(
1671 RErrorKind::Argument,
1672 format!(
1673 "too few arguments for sprintf format: \
1674 need argument {} but only {} supplied",
1675 arg_idx + 1,
1676 data_args.len()
1677 ),
1678 )
1679 })?;
1680 let doubles = vec.to_doubles();
1681 let v = if doubles.is_empty() {
1682 0.0
1683 } else {
1684 doubles[elem_idx % doubles.len()].unwrap_or(0.0)
1685 };
1686 output.push_str(&spec.format_float(v));
1687 arg_idx += 1;
1688 }
1689 's' => {
1690 let vec = data_args.get(arg_idx).ok_or_else(|| {
1691 RError::new(
1692 RErrorKind::Argument,
1693 format!(
1694 "too few arguments for sprintf format: \
1695 need argument {} but only {} supplied",
1696 arg_idx + 1,
1697 data_args.len()
1698 ),
1699 )
1700 })?;
1701 let chars_vec = vec.to_characters();
1702 let v = if chars_vec.is_empty() {
1703 String::new()
1704 } else {
1705 chars_vec[elem_idx % chars_vec.len()]
1706 .clone()
1707 .unwrap_or_default()
1708 };
1709 output.push_str(&spec.format_str(&v));
1710 arg_idx += 1;
1711 }
1712 _ => {
1713 output.push('%');
1714 output.push(spec.specifier);
1715 }
1716 }
1717 }
1718 } else {
1719 output.push(chars[i]);
1720 i += 1;
1721 }
1722 }
1723 Ok(output)
1724}
1725
1726#[builtin(min_args = 1)]
1736fn builtin_sprintf(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
1737 let fmt_vec = args.first().and_then(|v| v.as_vector()).ok_or_else(|| {
1738 RError::new(
1739 RErrorKind::Argument,
1740 "sprintf requires a character format string".to_string(),
1741 )
1742 })?;
1743
1744 let fmt_chars = fmt_vec.to_characters();
1745
1746 if fmt_chars.is_empty() {
1748 return Ok(RValue::vec(Vector::Character(
1749 Vec::<Option<String>>::new().into(),
1750 )));
1751 }
1752
1753 let fmt = fmt_chars[0].clone().unwrap_or_default();
1754
1755 let data_vecs: Vec<&Vector> = args[1..].iter().filter_map(|a| a.as_vector()).collect();
1757
1758 if data_vecs.iter().any(|v| v.is_empty()) {
1760 return Ok(RValue::vec(Vector::Character(
1761 Vec::<Option<String>>::new().into(),
1762 )));
1763 }
1764
1765 let max_len = data_vecs.iter().map(|v| v.len()).max().unwrap_or(1);
1767
1768 let specs = collect_fmt_specs(&fmt);
1771 let output_len = if specs.is_empty() || data_vecs.is_empty() {
1772 if data_vecs.is_empty() {
1773 1
1774 } else {
1775 max_len
1776 }
1777 } else {
1778 max_len
1779 };
1780
1781 let mut results: Vec<Option<String>> = Vec::with_capacity(output_len);
1782 for elem_idx in 0..output_len {
1783 results.push(Some(sprintf_one(&fmt, &data_vecs, elem_idx)?));
1784 }
1785
1786 Ok(RValue::vec(Vector::Character(results.into())))
1787}
1788
1789#[builtin(min_args = 2)]
1801fn builtin_strsplit(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
1802 let x_vec = args
1803 .first()
1804 .and_then(|v| v.as_vector())
1805 .map(|v| v.to_characters())
1806 .unwrap_or_default();
1807 let split = args
1808 .get(1)
1809 .and_then(|v| v.as_vector()?.as_character_scalar())
1810 .unwrap_or_default();
1811 let (fixed, ignore_case) = get_regex_opts(named);
1812
1813 let re = if !split.is_empty() && (!fixed || ignore_case) {
1815 Some(build_regex(&split, fixed, ignore_case)?)
1816 } else {
1817 None
1818 };
1819
1820 let parts: Vec<(Option<String>, RValue)> = x_vec
1821 .into_iter()
1822 .map(|s_opt| {
1823 let elem = match s_opt {
1824 None => RValue::vec(Vector::Character(vec![None].into())),
1825 Some(s) => {
1826 if split.is_empty() {
1827 let chars: Vec<Option<String>> =
1829 s.chars().map(|c| Some(c.to_string())).collect();
1830 RValue::vec(Vector::Character(chars.into()))
1831 } else if fixed && !ignore_case {
1832 let pieces = fixed_split(&s, &split);
1834 RValue::vec(Vector::Character(pieces.into()))
1835 } else {
1836 let pieces: Vec<Option<String>> = re
1838 .as_ref()
1839 .expect("regex compiled successfully above")
1840 .split(&s)
1841 .map(|p| Some(p.to_string()))
1842 .collect();
1843 RValue::vec(Vector::Character(pieces.into()))
1844 }
1845 }
1846 };
1847 (None, elem)
1848 })
1849 .collect();
1850 Ok(RValue::List(RList::new(parts)))
1851}
1852
1853#[builtin(name = "startsWith", min_args = 2)]
1861fn builtin_starts_with(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
1862 let x_vec = match args.first() {
1863 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
1864 let Vector::Character(vals) = &rv.inner else {
1865 unreachable!()
1866 };
1867 vals.clone()
1868 }
1869 _ => {
1870 return Err(RError::new(
1871 RErrorKind::Argument,
1872 "argument 'x' is not character".to_string(),
1873 ))
1874 }
1875 };
1876 let prefix_vec = args
1877 .get(1)
1878 .and_then(|v| v.as_vector())
1879 .map(|v| v.to_characters())
1880 .unwrap_or_default();
1881
1882 if x_vec.is_empty() || prefix_vec.is_empty() {
1883 return Ok(RValue::vec(Vector::Logical(
1884 Vec::<Option<bool>>::new().into(),
1885 )));
1886 }
1887
1888 let out_len = x_vec.len().max(prefix_vec.len());
1889 let result: Vec<Option<bool>> = (0..out_len)
1890 .map(|i| {
1891 let x_opt = &x_vec[i % x_vec.len()];
1892 let p_opt = &prefix_vec[i % prefix_vec.len()];
1893 match (x_opt, p_opt) {
1894 (Some(x), Some(p)) => Some(x.starts_with(p.as_str())),
1895 _ => None,
1896 }
1897 })
1898 .collect();
1899 Ok(RValue::vec(Vector::Logical(result.into())))
1900}
1901
1902#[builtin(name = "endsWith", min_args = 2)]
1910fn builtin_ends_with(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
1911 let x_vec = match args.first() {
1912 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
1913 let Vector::Character(vals) = &rv.inner else {
1914 unreachable!()
1915 };
1916 vals.clone()
1917 }
1918 _ => {
1919 return Err(RError::new(
1920 RErrorKind::Argument,
1921 "argument 'x' is not character".to_string(),
1922 ))
1923 }
1924 };
1925 let suffix_vec = args
1926 .get(1)
1927 .and_then(|v| v.as_vector())
1928 .map(|v| v.to_characters())
1929 .unwrap_or_default();
1930
1931 if x_vec.is_empty() || suffix_vec.is_empty() {
1932 return Ok(RValue::vec(Vector::Logical(
1933 Vec::<Option<bool>>::new().into(),
1934 )));
1935 }
1936
1937 let out_len = x_vec.len().max(suffix_vec.len());
1938 let result: Vec<Option<bool>> = (0..out_len)
1939 .map(|i| {
1940 let x_opt = &x_vec[i % x_vec.len()];
1941 let s_opt = &suffix_vec[i % suffix_vec.len()];
1942 match (x_opt, s_opt) {
1943 (Some(x), Some(s)) => Some(x.ends_with(s.as_str())),
1944 _ => None,
1945 }
1946 })
1947 .collect();
1948 Ok(RValue::vec(Vector::Logical(result.into())))
1949}
1950
1951#[builtin(min_args = 3)]
1960fn builtin_chartr(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
1961 let old = args
1962 .first()
1963 .and_then(|v| v.as_vector()?.as_character_scalar())
1964 .unwrap_or_default();
1965 let new = args
1966 .get(1)
1967 .and_then(|v| v.as_vector()?.as_character_scalar())
1968 .unwrap_or_default();
1969 let x_vec = args
1970 .get(2)
1971 .and_then(|v| v.as_vector())
1972 .map(|v| v.to_characters())
1973 .unwrap_or_default();
1974 let old_chars: Vec<char> = old.chars().collect();
1975 let new_chars: Vec<char> = new.chars().collect();
1976 let result: Vec<Option<String>> = x_vec
1977 .into_iter()
1978 .map(|s_opt| {
1979 s_opt.map(|s| {
1980 s.chars()
1981 .map(|c| {
1982 if let Some(pos) = old_chars.iter().position(|&oc| oc == c) {
1983 new_chars.get(pos).copied().unwrap_or(c)
1984 } else {
1985 c
1986 }
1987 })
1988 .collect()
1989 })
1990 })
1991 .collect();
1992 Ok(RValue::vec(Vector::Character(result.into())))
1993}
1994
1995#[builtin(min_args = 1)]
2000fn builtin_make_names(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2001 match args.first() {
2002 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
2003 let Vector::Character(vals) = &rv.inner else {
2004 unreachable!()
2005 };
2006 let result: Vec<Option<String>> = vals
2007 .iter()
2008 .map(|s| {
2009 s.as_ref().map(|s| {
2010 let mut name = String::new();
2011 for (i, c) in s.chars().enumerate() {
2012 if i == 0 && c.is_ascii_digit() {
2013 name.push('X');
2014 }
2015 if c.is_alphanumeric() || c == '.' || c == '_' {
2016 name.push(c);
2017 } else {
2018 name.push('.');
2019 }
2020 }
2021 if name.is_empty() {
2022 name = "X".to_string();
2023 }
2024 name
2025 })
2026 })
2027 .collect();
2028 Ok(RValue::vec(Vector::Character(result.into())))
2029 }
2030 _ => Ok(args.first().cloned().unwrap_or(RValue::Null)),
2031 }
2032}
2033
2034#[builtin(min_args = 1)]
2039fn builtin_make_unique(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2040 match args.first() {
2041 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
2042 let Vector::Character(vals) = &rv.inner else {
2043 unreachable!()
2044 };
2045 let mut result = Vec::new();
2046 let mut counts: HashMap<String, usize> = HashMap::new();
2047 for v in vals.iter() {
2048 if let Some(s) = v {
2049 let count = counts.entry(s.clone()).or_insert(0);
2050 if *count > 0 {
2051 result.push(Some(format!("{}.{}", s, count)));
2052 } else {
2053 result.push(Some(s.clone()));
2054 }
2055 *count += 1;
2056 } else {
2057 result.push(None);
2058 }
2059 }
2060 Ok(RValue::vec(Vector::Character(result.into())))
2061 }
2062 _ => Ok(args.first().cloned().unwrap_or(RValue::Null)),
2063 }
2064}
2065
2066#[builtin(min_args = 1)]
2073fn builtin_basename(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2074 let path_vec = args
2075 .first()
2076 .and_then(|v| v.as_vector())
2077 .map(|v| v.to_characters())
2078 .unwrap_or_default();
2079 let result: Vec<Option<String>> = path_vec
2080 .into_iter()
2081 .map(|p_opt| {
2082 p_opt.map(|p| {
2083 std::path::Path::new(&p)
2084 .file_name()
2085 .map(|n| n.to_string_lossy().to_string())
2086 .unwrap_or(p)
2087 })
2088 })
2089 .collect();
2090 Ok(RValue::vec(Vector::Character(result.into())))
2091}
2092
2093#[builtin(min_args = 1)]
2100fn builtin_dirname(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2101 let path_vec = args
2102 .first()
2103 .and_then(|v| v.as_vector())
2104 .map(|v| v.to_characters())
2105 .unwrap_or_default();
2106 let result: Vec<Option<String>> = path_vec
2107 .into_iter()
2108 .map(|p_opt| {
2109 p_opt.map(|p| {
2110 std::path::Path::new(&p)
2111 .parent()
2112 .map(|par| par.to_string_lossy().to_string())
2113 .unwrap_or_else(|| ".".to_string())
2114 })
2115 })
2116 .collect();
2117 Ok(RValue::vec(Vector::Character(result.into())))
2118}
2119
2120#[builtin(min_args = 1)]
2125fn builtin_deparse(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2126 let s = match args.first() {
2127 Some(RValue::Language(expr)) => deparse_expr(expr),
2128 Some(v) => format!("{}", v),
2129 None => "NULL".to_string(),
2130 };
2131 Ok(RValue::vec(Vector::Character(vec![Some(s)].into())))
2132}
2133
2134#[builtin(name = "deparse1", min_args = 1)]
2140fn builtin_deparse1(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
2141 let collapse = named
2142 .iter()
2143 .find(|(n, _)| n == "collapse")
2144 .and_then(|(_, v)| v.as_vector()?.as_character_scalar())
2145 .unwrap_or_else(|| " ".to_string());
2146
2147 let s = match args.first() {
2148 Some(RValue::Language(expr)) => deparse_expr(expr),
2149 Some(v) => format!("{v}"),
2150 None => "NULL".to_string(),
2151 };
2152 let collapsed = s.lines().collect::<Vec<_>>().join(&collapse);
2153 Ok(RValue::vec(Vector::Character(vec![Some(collapsed)].into())))
2154}
2155
2156#[builtin(name = "intToUtf8", min_args = 1)]
2161fn builtin_int_to_utf8(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2162 let ints = match args.first() {
2163 Some(RValue::Vector(rv)) => rv.to_integers(),
2164 _ => {
2165 return Err(RError::new(
2166 RErrorKind::Argument,
2167 "argument must be an integer vector".to_string(),
2168 ))
2169 }
2170 };
2171 let mut result = String::new();
2172 for val in &ints {
2173 match val {
2174 Some(code) if *code >= 0 => match char::from_u32(u32::try_from(*code)?) {
2175 Some(c) => result.push(c),
2176 None => {
2177 return Err(RError::new(
2178 RErrorKind::Argument,
2179 format!("invalid Unicode code point: {}", code),
2180 ))
2181 }
2182 },
2183 Some(code) => {
2184 return Err(RError::new(
2185 RErrorKind::Argument,
2186 format!("invalid Unicode code point: {}", code),
2187 ))
2188 }
2189 None => result.push('\u{FFFD}'), }
2191 }
2192 Ok(RValue::vec(Vector::Character(vec![Some(result)].into())))
2193}
2194
2195#[builtin(name = "utf8ToInt", min_args = 1)]
2200fn builtin_utf8_to_int(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2201 let s = args
2202 .first()
2203 .and_then(|v| v.as_vector()?.as_character_scalar())
2204 .ok_or_else(|| {
2205 RError::new(
2206 RErrorKind::Argument,
2207 "argument must be a single string".to_string(),
2208 )
2209 })?;
2210 let result: Vec<Option<i64>> = s.chars().map(|c| Some(i64::from(u32::from(c)))).collect();
2211 Ok(RValue::vec(Vector::Integer(result.into())))
2212}
2213
2214#[builtin(name = "charToRaw", min_args = 1)]
2219fn builtin_char_to_raw(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2220 let s = args
2221 .first()
2222 .and_then(|v| v.as_vector()?.as_character_scalar())
2223 .ok_or_else(|| RError::new(RErrorKind::Argument, "argument must be a single string"))?;
2224 let result: Vec<u8> = s.bytes().collect();
2225 Ok(RValue::vec(Vector::Raw(result)))
2226}
2227
2228#[builtin(name = "rawToChar", min_args = 1)]
2240fn builtin_raw_to_char(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
2241 let bytes = match args.first() {
2242 Some(RValue::Vector(rv)) => rv.inner.to_raw(),
2243 _ => {
2244 return Err(RError::new(
2245 RErrorKind::Argument,
2246 "argument must be a raw or integer vector",
2247 ))
2248 }
2249 };
2250
2251 let multiple = named
2252 .iter()
2253 .find(|(k, _)| k == "multiple")
2254 .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
2255 .or_else(|| args.get(1).and_then(|v| v.as_vector()?.as_logical_scalar()))
2256 .unwrap_or(false);
2257
2258 if multiple {
2259 let result: Vec<Option<String>> = bytes
2261 .iter()
2262 .map(|&b| {
2263 if b == 0 {
2265 Some(String::new())
2266 } else {
2267 Some(std::slice::from_ref(&b).to_str_lossy().into_owned())
2268 }
2269 })
2270 .collect();
2271 Ok(RValue::vec(Vector::Character(result.into())))
2272 } else {
2273 let filtered: Vec<u8> = bytes.into_iter().filter(|&b| b != 0).collect();
2275 let s = filtered.as_bstr().to_str_lossy().into_owned();
2276 Ok(RValue::vec(Vector::Character(vec![Some(s)].into())))
2277 }
2278}
2279
2280#[builtin]
2284fn builtin_raw(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2285 let n = args
2286 .first()
2287 .and_then(|v| v.as_vector()?.as_integer_scalar())
2288 .unwrap_or(0);
2289 if n < 0 {
2290 return Err(RError::new(
2291 RErrorKind::Argument,
2292 format!("invalid 'length' argument: {}", n),
2293 ));
2294 }
2295 let len = usize::try_from(n)?;
2296 Ok(RValue::vec(Vector::Raw(vec![0u8; len])))
2297}
2298
2299#[builtin(name = "rawShift", min_args = 2)]
2302fn builtin_raw_shift(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2303 let bytes = match args.first() {
2304 Some(RValue::Vector(rv)) => rv.inner.to_raw(),
2305 _ => {
2306 return Err(RError::new(
2307 RErrorKind::Argument,
2308 "argument 'x' must be a raw vector",
2309 ))
2310 }
2311 };
2312 let shift = args
2313 .get(1)
2314 .and_then(|v| v.as_vector()?.as_integer_scalar())
2315 .ok_or_else(|| {
2316 RError::new(
2317 RErrorKind::Argument,
2318 "argument 'n' must be a single integer",
2319 )
2320 })?;
2321 if !(-8..=8).contains(&shift) {
2322 return Err(RError::new(
2323 RErrorKind::Argument,
2324 format!("shift amount must be between -8 and 8, got {}", shift),
2325 ));
2326 }
2327
2328 let result: Vec<u8> = bytes
2329 .iter()
2330 .map(|&byte| {
2331 if shift >= 0 {
2332 byte.wrapping_shl(u32::try_from(shift).unwrap_or(0))
2333 } else {
2334 byte.wrapping_shr(u32::try_from(-shift).unwrap_or(0))
2335 }
2336 })
2337 .collect();
2338 Ok(RValue::vec(Vector::Raw(result)))
2339}
2340
2341#[builtin(name = "as.raw", min_args = 1)]
2343fn builtin_as_raw(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2344 match args.first() {
2345 Some(RValue::Vector(rv)) => Ok(RValue::vec(Vector::Raw(rv.inner.to_raw()))),
2346 _ => Err(RError::new(
2347 RErrorKind::Argument,
2348 "argument must be a vector",
2349 )),
2350 }
2351}
2352
2353#[builtin(name = "is.raw", min_args = 1)]
2355fn builtin_is_raw(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2356 let is_raw =
2357 matches!(args.first(), Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Raw(_)));
2358 Ok(RValue::vec(Vector::Logical(vec![Some(is_raw)].into())))
2359}
2360
2361#[builtin(name = "glob2rx", min_args = 1)]
2366fn builtin_glob2rx(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2367 let pattern = args
2368 .first()
2369 .and_then(|v| v.as_vector()?.as_character_scalar())
2370 .ok_or_else(|| {
2371 RError::new(
2372 RErrorKind::Argument,
2373 "argument must be a character string".to_string(),
2374 )
2375 })?;
2376 let mut result = String::from("^");
2377 for c in pattern.chars() {
2378 match c {
2379 '*' => result.push_str(".*"),
2380 '?' => result.push('.'),
2381 '.' | '(' | ')' | '+' | '|' | '{' | '}' | '[' | ']' | '^' | '$' | '\\' => {
2382 result.push('\\');
2383 result.push(c);
2384 }
2385 _ => result.push(c),
2386 }
2387 }
2388 result.push('$');
2389 Ok(RValue::vec(Vector::Character(vec![Some(result)].into())))
2390}
2391
2392#[builtin(min_args = 2)]
2400fn builtin_regexec(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
2401 let pattern = args
2402 .first()
2403 .and_then(|v| v.as_vector()?.as_character_scalar())
2404 .unwrap_or_default();
2405 let (fixed, ignore_case) = get_regex_opts(named);
2406 let re = build_regex(&pattern, fixed, ignore_case)?;
2407 match args.get(1) {
2408 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
2409 let Vector::Character(vals) = &rv.inner else {
2410 unreachable!()
2411 };
2412 let mut list_items = Vec::new();
2413 for s in vals.iter() {
2414 match s.as_ref().and_then(|s| re.captures(s)) {
2415 Some(caps) => {
2416 let mut positions = Vec::new();
2417 let mut lengths = Vec::new();
2418 for i in 0..caps.len() {
2419 match caps.get(i) {
2420 Some(m) => {
2421 positions.push(Some(i64::try_from(m.start())? + 1));
2422 lengths.push(Some(i64::try_from(m.len())?));
2423 }
2424 None => {
2425 positions.push(Some(-1));
2426 lengths.push(Some(-1));
2427 }
2428 }
2429 }
2430 let mut match_rv = RVector::from(Vector::Integer(positions.into()));
2431 match_rv.set_attr(
2432 "match.length".to_string(),
2433 RValue::vec(Vector::Integer(lengths.into())),
2434 );
2435 list_items.push((None, RValue::Vector(match_rv)));
2436 }
2437 None => {
2438 let mut match_rv = RVector::from(Vector::Integer(vec![Some(-1)].into()));
2439 match_rv.set_attr(
2440 "match.length".to_string(),
2441 RValue::vec(Vector::Integer(vec![Some(-1)].into())),
2442 );
2443 list_items.push((None, RValue::Vector(match_rv)));
2444 }
2445 }
2446 }
2447 Ok(RValue::List(RList::new(list_items)))
2448 }
2449 _ => Err(RError::new(
2450 RErrorKind::Argument,
2451 "argument is not character".to_string(),
2452 )),
2453 }
2454}
2455
2456#[interpreter_builtin(min_args = 1)]
2461fn interp_dput(
2462 args: &[RValue],
2463 _named: &[(String, RValue)],
2464 context: &BuiltinContext,
2465) -> Result<RValue, RError> {
2466 let s = match args.first() {
2467 Some(RValue::Language(expr)) => deparse_expr(expr),
2468 Some(v) => format!("{}", v),
2469 None => "NULL".to_string(),
2470 };
2471 context.write(&format!("{}\n", s));
2472 Ok(RValue::Null)
2473}
2474
2475#[builtin(min_args = 1)]
2483fn builtin_strtoi(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
2484 let x_vec = args
2485 .first()
2486 .and_then(|v| v.as_vector())
2487 .map(|v| v.to_characters())
2488 .unwrap_or_default();
2489 let base = named
2490 .iter()
2491 .find(|(n, _)| n == "base")
2492 .and_then(|(_, v)| v.as_vector()?.as_integer_scalar())
2493 .or_else(|| args.get(1).and_then(|v| v.as_vector()?.as_integer_scalar()))
2494 .unwrap_or(10);
2495 let base = u32::try_from(base)?;
2496 let result: Vec<Option<i64>> = x_vec
2497 .into_iter()
2498 .map(|s_opt| match s_opt {
2499 None => None,
2500 Some(s) => i64::from_str_radix(s.trim(), base).ok(),
2501 })
2502 .collect();
2503 Ok(RValue::vec(Vector::Integer(result.into())))
2504}
2505
2506#[builtin(min_args = 1)]
2511fn builtin_nzchar(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2512 match args.first() {
2513 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
2514 let Vector::Character(vals) = &rv.inner else {
2515 unreachable!()
2516 };
2517 let result: Vec<Option<bool>> = vals
2518 .iter()
2519 .map(|s| match s {
2520 Some(s) => Some(!s.is_empty()),
2521 None => Some(true),
2522 })
2523 .collect();
2524 Ok(RValue::vec(Vector::Logical(result.into())))
2525 }
2526 Some(val) => {
2527 let s = val
2528 .as_vector()
2529 .and_then(|v| v.as_character_scalar())
2530 .unwrap_or_default();
2531 Ok(RValue::vec(Vector::Logical(
2532 vec![Some(!s.is_empty())].into(),
2533 )))
2534 }
2535 None => Err(RError::new(
2536 RErrorKind::Argument,
2537 "argument is missing".to_string(),
2538 )),
2539 }
2540}
2541
2542#[builtin(name = "sQuote", min_args = 1)]
2547fn builtin_squote(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2548 match args.first() {
2549 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
2550 let Vector::Character(vals) = &rv.inner else {
2551 unreachable!()
2552 };
2553 let result: Vec<Option<String>> = vals
2554 .iter()
2555 .map(|s| s.as_ref().map(|s| format!("\u{2018}{}\u{2019}", s)))
2556 .collect();
2557 Ok(RValue::vec(Vector::Character(result.into())))
2558 }
2559 Some(val) => {
2560 let s = val
2561 .as_vector()
2562 .and_then(|v| v.as_character_scalar())
2563 .unwrap_or_default();
2564 Ok(RValue::vec(Vector::Character(
2565 vec![Some(format!("\u{2018}{}\u{2019}", s))].into(),
2566 )))
2567 }
2568 None => Ok(RValue::vec(Vector::Character(vec![None].into()))),
2569 }
2570}
2571
2572#[builtin(name = "dQuote", min_args = 1)]
2577fn builtin_dquote(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2578 match args.first() {
2579 Some(RValue::Vector(rv)) if matches!(rv.inner, Vector::Character(_)) => {
2580 let Vector::Character(vals) = &rv.inner else {
2581 unreachable!()
2582 };
2583 let result: Vec<Option<String>> = vals
2584 .iter()
2585 .map(|s| s.as_ref().map(|s| format!("\u{201C}{}\u{201D}", s)))
2586 .collect();
2587 Ok(RValue::vec(Vector::Character(result.into())))
2588 }
2589 Some(val) => {
2590 let s = val
2591 .as_vector()
2592 .and_then(|v| v.as_character_scalar())
2593 .unwrap_or_default();
2594 Ok(RValue::vec(Vector::Character(
2595 vec![Some(format!("\u{201C}{}\u{201D}", s))].into(),
2596 )))
2597 }
2598 _ => Ok(RValue::vec(Vector::Character(vec![None].into()))),
2599 }
2600}
2601
2602#[builtin(min_args = 2)]
2610fn builtin_strrep(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2611 let x = args.first().and_then(|v| v.as_vector()).ok_or_else(|| {
2612 RError::new(
2613 RErrorKind::Argument,
2614 "strrep() requires a character vector as first argument".to_string(),
2615 )
2616 })?;
2617 let times = args.get(1).and_then(|v| v.as_vector()).ok_or_else(|| {
2618 RError::new(
2619 RErrorKind::Argument,
2620 "strrep() requires an integer 'times' argument".to_string(),
2621 )
2622 })?;
2623
2624 let chars = x.to_characters();
2625 let ints = times.to_integers();
2626 let max_len = chars.len().max(ints.len());
2627
2628 let result: Vec<Option<String>> = (0..max_len)
2629 .map(|i| {
2630 let s = &chars[i % chars.len()];
2631 let n = ints[i % ints.len()];
2632 match (s, n) {
2633 (Some(s), Some(n)) => {
2634 if n < 0 {
2635 None } else {
2637 Some(s.repeat(usize::try_from(n).unwrap_or(0)))
2638 }
2639 }
2640 _ => None,
2641 }
2642 })
2643 .collect();
2644
2645 Ok(RValue::vec(Vector::Character(result.into())))
2646}
2647
2648#[builtin(name = "formatC", min_args = 1)]
2664fn builtin_format_c(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
2665 let x = args.first().and_then(|v| v.as_vector()).ok_or_else(|| {
2666 RError::new(
2667 RErrorKind::Argument,
2668 "formatC() requires a vector as first argument".to_string(),
2669 )
2670 })?;
2671
2672 let width: usize = named
2674 .iter()
2675 .find(|(k, _)| k == "width")
2676 .and_then(|(_, v)| v.as_vector()?.as_integer_scalar())
2677 .and_then(|i| usize::try_from(i).ok())
2678 .or_else(|| {
2679 args.get(1)
2680 .and_then(|v| v.as_vector()?.as_integer_scalar())
2681 .and_then(|i| usize::try_from(i).ok())
2682 })
2683 .unwrap_or(0);
2684
2685 let default_format = match x {
2686 Vector::Character(_) => "s",
2687 _ => "g",
2688 };
2689 let format = named
2690 .iter()
2691 .find(|(k, _)| k == "format")
2692 .and_then(|(_, v)| v.as_vector()?.as_character_scalar())
2693 .unwrap_or_else(|| default_format.to_string());
2694
2695 let flag = named
2696 .iter()
2697 .find(|(k, _)| k == "flag")
2698 .and_then(|(_, v)| v.as_vector()?.as_character_scalar())
2699 .unwrap_or_default();
2700
2701 let digits: usize = named
2702 .iter()
2703 .find(|(k, _)| k == "digits")
2704 .and_then(|(_, v)| v.as_vector()?.as_integer_scalar())
2705 .and_then(|i| usize::try_from(i).ok())
2706 .unwrap_or(6);
2707
2708 let spec = FmtSpec {
2709 flags: flag,
2710 width: if width > 0 { Some(width) } else { None },
2711 precision: Some(digits),
2712 specifier: format.chars().next().unwrap_or('g'),
2713 };
2714
2715 let result: Vec<Option<String>> = match &format[..] {
2716 "d" => {
2717 let ints = x.to_integers();
2718 ints.iter().map(|v| v.map(|i| spec.format_int(i))).collect()
2719 }
2720 "f" | "e" | "E" | "g" | "G" => {
2721 let doubles = x.to_doubles();
2722 doubles
2723 .iter()
2724 .map(|v| v.map(|f| spec.format_float(f)))
2725 .collect()
2726 }
2727 "s" => {
2728 let chars = x.to_characters();
2729 chars
2730 .iter()
2731 .map(|v| v.as_ref().map(|s| spec.format_str(s)))
2732 .collect()
2733 }
2734 _ => {
2735 return Err(RError::new(
2736 RErrorKind::Argument,
2737 format!(
2738 "formatC(): invalid 'format' argument '{}'. \
2739 Use one of: \"d\", \"f\", \"e\", \"g\", \"s\"",
2740 format
2741 ),
2742 ));
2743 }
2744 };
2745
2746 Ok(RValue::vec(Vector::Character(result.into())))
2747}
2748
2749#[builtin(name = "format.pval", min_args = 1)]
2761fn builtin_format_pval(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
2762 let x = args.first().and_then(|v| v.as_vector()).ok_or_else(|| {
2763 RError::new(
2764 RErrorKind::Argument,
2765 "format.pval() requires a numeric vector".to_string(),
2766 )
2767 })?;
2768
2769 let digits: usize = named
2770 .iter()
2771 .find(|(k, _)| k == "digits")
2772 .and_then(|(_, v)| v.as_vector()?.as_integer_scalar())
2773 .and_then(|i| usize::try_from(i).ok())
2774 .unwrap_or(3);
2775
2776 let eps: f64 = named
2777 .iter()
2778 .find(|(k, _)| k == "eps")
2779 .and_then(|(_, v)| v.as_vector()?.as_double_scalar())
2780 .unwrap_or(f64::EPSILON);
2781
2782 let doubles = x.to_doubles();
2783 let result: Vec<Option<String>> = doubles
2784 .iter()
2785 .map(|v| {
2786 v.map(|pv| {
2787 if pv.is_nan() {
2788 "NaN".to_string()
2789 } else if pv < eps {
2790 format!("< {:.e_digits$e}", eps, e_digits = digits.saturating_sub(1))
2791 } else if pv > 1.0 - eps {
2792 format!("{:.prec$}", pv, prec = digits)
2794 } else {
2795 format_g(pv, digits, false)
2796 }
2797 })
2798 })
2799 .collect();
2800
2801 Ok(RValue::vec(Vector::Character(result.into())))
2802}
2803
2804#[builtin(name = "prettyNum", min_args = 1)]
2817fn builtin_pretty_num(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
2818 let x = args.first().and_then(|v| v.as_vector()).ok_or_else(|| {
2819 RError::new(
2820 RErrorKind::Argument,
2821 "prettyNum() requires a vector as first argument".to_string(),
2822 )
2823 })?;
2824
2825 let big_mark = named
2826 .iter()
2827 .find(|(k, _)| k == "big.mark")
2828 .and_then(|(_, v)| v.as_vector()?.as_character_scalar())
2829 .unwrap_or_default();
2830
2831 let small_mark = named
2832 .iter()
2833 .find(|(k, _)| k == "small.mark")
2834 .and_then(|(_, v)| v.as_vector()?.as_character_scalar())
2835 .unwrap_or_default();
2836
2837 let chars = x.to_characters();
2838 let result: Vec<Option<String>> = chars
2839 .iter()
2840 .map(|v| v.as_ref().map(|s| insert_marks(s, &big_mark, &small_mark)))
2841 .collect();
2842
2843 Ok(RValue::vec(Vector::Character(result.into())))
2844}
2845
2846fn insert_marks(s: &str, big_mark: &str, small_mark: &str) -> String {
2849 if big_mark.is_empty() && small_mark.is_empty() {
2850 return s.to_string();
2851 }
2852
2853 let (sign, rest) = if let Some(stripped) = s.strip_prefix('-') {
2855 ("-", stripped)
2856 } else if let Some(stripped) = s.strip_prefix('+') {
2857 ("+", stripped)
2858 } else {
2859 ("", s)
2860 };
2861
2862 let rest = rest.trim();
2864
2865 let (int_part, frac_part) = match rest.find('.') {
2866 Some(dot) => (&rest[..dot], Some(&rest[dot + 1..])),
2867 None => (rest, None),
2868 };
2869
2870 let mut out = String::with_capacity(s.len() + 10);
2871 out.push_str(sign);
2872
2873 if !big_mark.is_empty() && int_part.len() > 3 {
2875 let digit_start = int_part.find(|c: char| c.is_ascii_digit()).unwrap_or(0);
2877 out.push_str(&int_part[..digit_start]);
2878 let digits = &int_part[digit_start..];
2879 let len = digits.len();
2880 for (i, ch) in digits.chars().enumerate() {
2881 out.push(ch);
2882 let pos_from_right = len - 1 - i;
2883 if pos_from_right > 0 && pos_from_right % 3 == 0 {
2884 out.push_str(big_mark);
2885 }
2886 }
2887 } else {
2888 out.push_str(int_part);
2889 }
2890
2891 if let Some(frac) = frac_part {
2893 out.push('.');
2894 if !small_mark.is_empty() && frac.len() > 3 {
2895 for (i, ch) in frac.chars().enumerate() {
2896 out.push(ch);
2897 let pos = i + 1;
2898 if pos < frac.len() && pos % 3 == 0 {
2899 out.push_str(small_mark);
2900 }
2901 }
2902 } else {
2903 out.push_str(frac);
2904 }
2905 }
2906
2907 out
2908}
2909
2910#[builtin(name = "Encoding", min_args = 1)]
2922fn builtin_encoding(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
2923 let chars = match args.first() {
2924 Some(RValue::Vector(rv)) => rv.to_characters(),
2925 _ => {
2926 return Err(RError::new(
2927 RErrorKind::Argument,
2928 "argument is not a character vector".to_string(),
2929 ))
2930 }
2931 };
2932 let result: Vec<Option<String>> = chars
2933 .iter()
2934 .map(|s| match s {
2935 Some(s) => {
2936 if s.is_ascii() {
2937 Some("unknown".to_string())
2938 } else {
2939 Some("UTF-8".to_string())
2940 }
2941 }
2942 None => Some("unknown".to_string()),
2943 })
2944 .collect();
2945 Ok(RValue::vec(Vector::Character(result.into())))
2946}
2947
2948#[builtin(min_args = 1)]
2963fn builtin_iconv(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
2964 let chars = match args.first() {
2965 Some(RValue::Vector(rv)) => rv.to_characters(),
2966 _ => {
2967 return Err(RError::new(
2968 RErrorKind::Argument,
2969 "argument is not a character vector".to_string(),
2970 ))
2971 }
2972 };
2973
2974 let call_args = super::CallArgs::new(args, named);
2975 let from_enc = call_args
2976 .optional_string("from", 1)
2977 .unwrap_or_default()
2978 .to_uppercase();
2979 let to_enc = call_args
2980 .optional_string("to", 2)
2981 .unwrap_or_default()
2982 .to_uppercase();
2983 let sub = call_args.optional_string("sub", 3);
2984
2985 let from_enc = normalize_encoding_name(&from_enc);
2987 let to_enc = normalize_encoding_name(&to_enc);
2988
2989 let result: Vec<Option<String>> = chars
2990 .iter()
2991 .map(|s| {
2992 s.as_ref()
2993 .map(|s| iconv_one(s, &from_enc, &to_enc, sub.as_deref()))
2994 })
2995 .collect();
2996
2997 Ok(RValue::vec(Vector::Character(result.into())))
2998}
2999
3000fn normalize_encoding_name(name: &str) -> String {
3002 let upper = name.to_uppercase();
3003 match upper.as_str() {
3004 "" | "NATIVE" | "NATIVE.ENC" => "UTF-8".to_string(),
3005 "LATIN1" | "LATIN-1" | "ISO-8859-1" | "ISO8859-1" | "ISO88591" => "LATIN-1".to_string(),
3006 "UTF8" | "UTF-8" => "UTF-8".to_string(),
3007 "ASCII" | "US-ASCII" => "ASCII".to_string(),
3008 "BYTES" => "BYTES".to_string(),
3009 _ => upper,
3010 }
3011}
3012
3013fn iconv_one(s: &str, from: &str, to: &str, sub: Option<&str>) -> String {
3015 if from == to {
3017 return s.to_string();
3018 }
3019
3020 match to {
3024 "UTF-8" => {
3025 s.to_string()
3027 }
3028 "ASCII" => {
3029 s.chars()
3031 .map(|c| {
3032 if c.is_ascii() {
3033 c.to_string()
3034 } else {
3035 match sub {
3036 Some("byte") => {
3037 let mut buf = [0u8; 4];
3039 let bytes = c.encode_utf8(&mut buf).as_bytes();
3040 bytes.iter().map(|b| format!("<{b:02x}>")).collect()
3041 }
3042 Some(replacement) => replacement.to_string(),
3043 None => String::new(), }
3045 }
3046 })
3047 .collect()
3048 }
3049 "LATIN-1" => {
3050 s.chars()
3053 .map(|c| {
3054 if u32::from(c) <= 255 {
3055 c.to_string()
3056 } else {
3057 match sub {
3058 Some("byte") => {
3059 let mut buf = [0u8; 4];
3060 let bytes = c.encode_utf8(&mut buf).as_bytes();
3061 bytes.iter().map(|b| format!("<{b:02x}>")).collect()
3062 }
3063 Some(replacement) => replacement.to_string(),
3064 None => String::new(),
3065 }
3066 }
3067 })
3068 .collect()
3069 }
3070 "BYTES" => {
3071 s.as_bytes().iter().map(|b| format!("\\x{b:02x}")).collect()
3073 }
3074 _ => {
3075 s.to_string()
3078 }
3079 }
3080}
3081
3082#[builtin(min_args = 1)]
3089fn builtin_enc2utf8(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
3090 match args.first() {
3091 Some(v @ RValue::Vector(_)) => Ok(v.clone()),
3092 _ => Err(RError::new(
3093 RErrorKind::Argument,
3094 "argument is not a character vector".to_string(),
3095 )),
3096 }
3097}
3098
3099#[builtin(min_args = 1)]
3106fn builtin_enc2native(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
3107 match args.first() {
3108 Some(v @ RValue::Vector(_)) => Ok(v.clone()),
3109 _ => Err(RError::new(
3110 RErrorKind::Argument,
3111 "argument is not a character vector".to_string(),
3112 )),
3113 }
3114}
3115
3116#[builtin(min_args = 2)]
3129fn builtin_strtrim(args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
3130 let chars = match args.first() {
3131 Some(RValue::Vector(rv)) => rv.to_characters(),
3132 _ => {
3133 return Err(RError::new(
3134 RErrorKind::Argument,
3135 "non-character argument".to_string(),
3136 ))
3137 }
3138 };
3139 let widths = match args.get(1) {
3140 Some(RValue::Vector(rv)) => rv.to_doubles(),
3141 _ => {
3142 return Err(RError::new(
3143 RErrorKind::Argument,
3144 "'width' must be numeric".to_string(),
3145 ))
3146 }
3147 };
3148 if widths.is_empty() {
3149 return Err(RError::new(
3150 RErrorKind::Argument,
3151 "invalid 'width' argument — must be a positive number".to_string(),
3152 ));
3153 }
3154
3155 let result: Vec<Option<String>> = chars
3156 .iter()
3157 .enumerate()
3158 .map(|(i, s)| {
3159 let w = widths[i % widths.len()];
3160 match (s, w) {
3161 (Some(s), Some(w)) => {
3162 let max_w = w.max(0.0) as usize;
3163 Some(trim_to_width(s, max_w))
3164 }
3165 (None, _) => None,
3166 (_, None) => None,
3167 }
3168 })
3169 .collect();
3170 Ok(RValue::vec(Vector::Character(result.into())))
3171}
3172
3173fn trim_to_width(s: &str, max_width: usize) -> String {
3175 use unicode_width::UnicodeWidthChar;
3176 let mut result = String::new();
3177 let mut current_width = 0;
3178 for ch in s.chars() {
3179 let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
3180 if current_width + ch_width > max_width {
3181 break;
3182 }
3183 result.push(ch);
3184 current_width += ch_width;
3185 }
3186 result
3187}
3188
3189#[builtin(name = "URLencode", min_args = 1, namespace = "utils")]
3201fn builtin_urlencode(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
3202 let url_vec = args
3203 .first()
3204 .and_then(|v| v.as_vector())
3205 .map(|v| v.to_characters())
3206 .unwrap_or_default();
3207 let reserved = named
3208 .iter()
3209 .find(|(k, _)| k == "reserved")
3210 .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
3211 .unwrap_or(true);
3212
3213 let unreserved = |b: u8| -> bool {
3214 b.is_ascii_alphanumeric() || b == b'-' || b == b'_' || b == b'.' || b == b'~'
3215 };
3216 let is_reserved = |b: u8| -> bool {
3217 matches!(
3218 b,
3219 b':' | b'/'
3220 | b'?'
3221 | b'#'
3222 | b'['
3223 | b']'
3224 | b'@'
3225 | b'!'
3226 | b'$'
3227 | b'&'
3228 | b'\''
3229 | b'('
3230 | b')'
3231 | b'*'
3232 | b'+'
3233 | b','
3234 | b';'
3235 | b'='
3236 )
3237 };
3238
3239 let result: Vec<Option<String>> = url_vec
3240 .into_iter()
3241 .map(|s_opt| {
3242 s_opt.map(|s| {
3243 let mut encoded = String::new();
3244 for &b in s.as_bytes() {
3245 if unreserved(b) || (!reserved && is_reserved(b)) {
3246 encoded.push(char::from(b));
3247 } else {
3248 encoded.push_str(&format!("%{:02X}", b));
3249 }
3250 }
3251 encoded
3252 })
3253 })
3254 .collect();
3255 Ok(RValue::vec(Vector::Character(result.into())))
3256}
3257
3258#[builtin(name = "URLdecode", min_args = 1, namespace = "utils")]
3265fn builtin_urldecode(args: &[RValue], _named: &[(String, RValue)]) -> Result<RValue, RError> {
3266 let url_vec = args
3267 .first()
3268 .and_then(|v| v.as_vector())
3269 .map(|v| v.to_characters())
3270 .unwrap_or_default();
3271
3272 let result: Vec<Option<String>> = url_vec
3273 .into_iter()
3274 .map(|s_opt| {
3275 s_opt.map(|s| {
3276 let mut bytes = Vec::new();
3277 let mut chars = s.bytes();
3278 while let Some(b) = chars.next() {
3279 if b == b'%' {
3280 let hi = chars.next().unwrap_or(b'0');
3281 let lo = chars.next().unwrap_or(b'0');
3282 let hex = [hi, lo];
3283 if let Ok(val) =
3284 u8::from_str_radix(std::str::from_utf8(&hex).unwrap_or("00"), 16)
3285 {
3286 bytes.push(val);
3287 }
3288 } else if b == b'+' {
3289 bytes.push(b' ');
3290 } else {
3291 bytes.push(b);
3292 }
3293 }
3294 String::from_utf8_lossy(&bytes).into_owned()
3295 })
3296 })
3297 .collect();
3298 Ok(RValue::vec(Vector::Character(result.into())))
3299}
3300
3301#[builtin(min_args = 1)]
3307fn builtin_casefold(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
3308 let upper = named
3309 .iter()
3310 .find(|(k, _)| k == "upper")
3311 .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
3312 .unwrap_or(false);
3313
3314 match args.first() {
3315 Some(RValue::Vector(rv)) => {
3316 let result: Vec<Option<String>> = rv
3317 .to_characters()
3318 .into_iter()
3319 .map(|opt| {
3320 opt.map(|s| {
3321 if upper {
3322 s.to_uppercase()
3323 } else {
3324 s.to_lowercase()
3325 }
3326 })
3327 })
3328 .collect();
3329 Ok(RValue::vec(Vector::Character(result.into())))
3330 }
3331 _ => Ok(RValue::Null),
3332 }
3333}
3334
3335#[builtin(name = "encodeString", min_args = 1)]
3346fn builtin_encode_string(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
3347 let width: Option<usize> = named
3348 .iter()
3349 .find(|(k, _)| k == "width")
3350 .and_then(|(_, v)| v.as_vector()?.as_integer_scalar())
3351 .and_then(|i| usize::try_from(i).ok())
3352 .or_else(|| {
3353 args.get(1)
3354 .and_then(|v| v.as_vector()?.as_integer_scalar())
3355 .and_then(|i| usize::try_from(i).ok())
3356 });
3357
3358 let quote = named
3359 .iter()
3360 .find(|(k, _)| k == "quote")
3361 .and_then(|(_, v)| v.as_vector()?.as_character_scalar())
3362 .or_else(|| {
3363 args.get(2)
3364 .and_then(|v| v.as_vector()?.as_character_scalar())
3365 })
3366 .unwrap_or_default();
3367
3368 let na_encode = named
3369 .iter()
3370 .find(|(k, _)| k == "na.encode")
3371 .and_then(|(_, v)| v.as_vector()?.as_logical_scalar())
3372 .unwrap_or(true);
3373
3374 let justify = named
3375 .iter()
3376 .find(|(k, _)| k == "justify")
3377 .and_then(|(_, v)| v.as_vector()?.as_character_scalar())
3378 .or_else(|| {
3379 args.get(3)
3380 .and_then(|v| v.as_vector()?.as_character_scalar())
3381 })
3382 .unwrap_or_else(|| {
3383 if width.is_some() {
3384 "left".to_string()
3385 } else {
3386 "none".to_string()
3387 }
3388 });
3389
3390 match args.first() {
3391 Some(RValue::Vector(rv)) => {
3392 let chars = rv.to_characters();
3394 let mut encoded: Vec<Option<String>> = chars
3395 .into_iter()
3396 .map(|opt| match opt {
3397 Some(s) => {
3398 let escaped = s
3399 .replace('\\', "\\\\")
3400 .replace('\n', "\\n")
3401 .replace('\r', "\\r")
3402 .replace('\t', "\\t");
3403 let escaped = if !quote.is_empty() {
3404 escaped.replace("e, &format!("\\{quote}"))
3405 } else {
3406 escaped
3407 };
3408 if quote.is_empty() {
3409 Some(escaped)
3410 } else {
3411 Some(format!("{quote}{escaped}{quote}"))
3412 }
3413 }
3414 None => {
3415 if na_encode {
3416 Some("NA".to_string())
3417 } else {
3418 None
3419 }
3420 }
3421 })
3422 .collect();
3423
3424 if let Some(w) = width {
3426 let effective_width = if w == 0 {
3428 encoded
3429 .iter()
3430 .filter_map(|s| s.as_ref())
3431 .map(|s| UnicodeWidthStr::width(s.as_str()))
3432 .max()
3433 .unwrap_or(0)
3434 } else {
3435 w
3436 };
3437
3438 if effective_width > 0 && justify != "none" {
3439 encoded = encoded
3440 .into_iter()
3441 .map(|opt| {
3442 opt.map(|s| {
3443 let display_w = UnicodeWidthStr::width(s.as_str());
3444 if display_w >= effective_width {
3445 s
3446 } else {
3447 let pad = effective_width - display_w;
3448 match justify.as_str() {
3449 "left" => format!("{}{}", s, " ".repeat(pad)),
3450 "right" => format!("{}{}", " ".repeat(pad), s),
3451 "centre" | "center" => {
3452 let left_pad = pad / 2;
3453 let right_pad = pad - left_pad;
3454 format!(
3455 "{}{}{}",
3456 " ".repeat(left_pad),
3457 s,
3458 " ".repeat(right_pad)
3459 )
3460 }
3461 _ => s, }
3463 }
3464 })
3465 })
3466 .collect();
3467 }
3468 }
3469
3470 Ok(RValue::vec(Vector::Character(encoded.into())))
3471 }
3472 _ => Ok(RValue::Null),
3473 }
3474}
3475
3476#[builtin(name = "substr<-", min_args = 4)]
3486fn builtin_substr_assign(args: &[RValue], _named: &[(String, RValue)]) -> Result<RValue, RError> {
3487 let x_vec = args
3488 .first()
3489 .and_then(|v| v.as_vector())
3490 .map(|v| v.to_characters())
3491 .unwrap_or_default();
3492 let start_vec = args
3493 .get(1)
3494 .and_then(|v| v.as_vector())
3495 .map(|v| v.to_integers())
3496 .unwrap_or_else(|| vec![Some(1)]);
3497 let stop_vec = args
3498 .get(2)
3499 .and_then(|v| v.as_vector())
3500 .map(|v| v.to_integers())
3501 .unwrap_or_else(|| vec![Some(1)]);
3502 let value_vec = args
3503 .get(3)
3504 .and_then(|v| v.as_vector())
3505 .map(|v| v.to_characters())
3506 .unwrap_or_default();
3507
3508 if x_vec.is_empty() {
3509 return Ok(RValue::vec(Vector::Character(vec![].into())));
3510 }
3511
3512 let n = x_vec.len();
3513 let result: Vec<Option<String>> = (0..n)
3514 .map(|i| {
3515 let x_opt = &x_vec[i];
3516 let start_opt = if start_vec.is_empty() {
3517 Some(1)
3518 } else {
3519 start_vec[i % start_vec.len()]
3520 };
3521 let stop_opt = if stop_vec.is_empty() {
3522 Some(1)
3523 } else {
3524 stop_vec[i % stop_vec.len()]
3525 };
3526 let value_opt = if value_vec.is_empty() {
3527 None
3528 } else {
3529 value_vec[i % value_vec.len()].clone()
3530 };
3531
3532 match (x_opt, start_opt, stop_opt, value_opt) {
3533 (Some(x), Some(start_i), Some(stop_i), Some(value)) => {
3534 let start = usize::try_from(start_i).unwrap_or(0);
3535 let stop = usize::try_from(stop_i).unwrap_or(0);
3536 let chars: Vec<char> = x.chars().collect();
3537 let start = start.saturating_sub(1).min(chars.len());
3538 let stop = stop.min(chars.len());
3539 let range_len = stop.saturating_sub(start);
3540 let repl_chars: Vec<char> = value.chars().take(range_len).collect();
3541
3542 let mut result: Vec<char> = chars[..start].to_vec();
3543 result.extend(&repl_chars);
3544 if repl_chars.len() < range_len {
3545 result.extend(&chars[start + repl_chars.len()..stop]);
3546 }
3547 result.extend(&chars[stop..]);
3548
3549 Some(result.into_iter().collect())
3550 }
3551 (None, _, _, _) | (_, None, _, _) | (_, _, None, _) | (_, _, _, None) => None,
3552 }
3553 })
3554 .collect();
3555
3556 Ok(RValue::vec(Vector::Character(result.into())))
3557}
3558
3559#[builtin(min_args = 1)]
3577fn builtin_strwrap(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
3578 let call_args = CallArgs::new(args, named);
3579
3580 let x = match call_args.value("x", 0) {
3581 Some(v) => match v.as_vector() {
3582 Some(v) => v.to_characters(),
3583 None => {
3584 return Err(RError::new(
3585 RErrorKind::Argument,
3586 "invalid 'x' argument".to_string(),
3587 ))
3588 }
3589 },
3590 None => {
3591 return Err(RError::new(
3592 RErrorKind::Argument,
3593 "argument 'x' is missing".to_string(),
3594 ))
3595 }
3596 };
3597
3598 let width = call_args.integer_or("width", 1, 80) as usize;
3599 let indent = call_args.integer_or("indent", 2, 0) as usize;
3600 let exdent = call_args.integer_or("exdent", 3, 0) as usize;
3601 let prefix = call_args.optional_string("prefix", 4).unwrap_or_default();
3602 let initial = call_args
3603 .optional_string("initial", 5)
3604 .unwrap_or_else(|| prefix.clone());
3605 let simplify = call_args.logical_flag("simplify", 6, true);
3606
3607 let indent_str = " ".repeat(indent);
3608 let exdent_str = " ".repeat(exdent);
3609
3610 let mut all_lines: Vec<Vec<Option<String>>> = Vec::new();
3611 for s_opt in &x {
3612 match s_opt {
3613 None => all_lines.push(vec![None]),
3614 Some(s) => {
3615 let effective_width = width.saturating_sub(initial.len() + indent);
3616 if effective_width == 0 {
3617 all_lines.push(vec![Some(format!("{initial}{indent_str}{s}"))]);
3618 continue;
3619 }
3620 let wrapped = textwrap::wrap(s, effective_width);
3621 let mut lines = Vec::new();
3622 for (i, line) in wrapped.iter().enumerate() {
3623 if i == 0 {
3624 lines.push(Some(format!("{initial}{indent_str}{line}")));
3625 } else {
3626 let ew = width.saturating_sub(prefix.len() + exdent);
3627 let rewrapped = if ew > 0 && line.len() > ew {
3628 textwrap::wrap(line, ew)
3629 } else {
3630 vec![std::borrow::Cow::Borrowed(line.as_ref())]
3631 };
3632 for subline in rewrapped {
3633 lines.push(Some(format!("{prefix}{exdent_str}{subline}")));
3634 }
3635 }
3636 }
3637 if lines.is_empty() {
3638 lines.push(Some(format!("{initial}{indent_str}")));
3639 }
3640 all_lines.push(lines);
3641 }
3642 }
3643 }
3644
3645 if simplify {
3646 let flat: Vec<Option<String>> = all_lines.into_iter().flatten().collect();
3647 Ok(RValue::vec(Vector::Character(flat.into())))
3648 } else {
3649 let list_vals: Vec<(Option<String>, RValue)> = all_lines
3650 .into_iter()
3651 .map(|lines| (None, RValue::vec(Vector::Character(lines.into()))))
3652 .collect();
3653 Ok(RValue::List(RList::new(list_vals)))
3654 }
3655}
3656
3657#[builtin(name = "toString", min_args = 1)]
3668fn builtin_to_string(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
3669 let call_args = CallArgs::new(args, named);
3670 let sep = call_args
3671 .optional_string("sep", 1)
3672 .unwrap_or_else(|| ", ".to_string());
3673
3674 let chars = match args.first() {
3675 Some(RValue::Vector(rv)) => rv.to_characters(),
3676 Some(RValue::Null) => Vec::new(),
3677 Some(RValue::List(l)) => l
3678 .values
3679 .iter()
3680 .map(|(_, v)| v.as_vector().and_then(|vec| vec.as_character_scalar()))
3681 .collect(),
3682 _ => vec![],
3683 };
3684
3685 let parts: Vec<String> = chars.into_iter().flatten().collect();
3686 Ok(RValue::vec(Vector::Character(
3687 vec![Some(parts.join(&sep))].into(),
3688 )))
3689}
3690
3691#[builtin(min_args = 1)]
3698fn builtin_gettext(args: &[RValue], _named: &[(String, RValue)]) -> Result<RValue, RError> {
3699 match args.first() {
3701 Some(v) => Ok(v.clone()),
3702 None => Ok(RValue::vec(Vector::Character(
3703 vec![Some(String::new())].into(),
3704 ))),
3705 }
3706}
3707
3708#[builtin(min_args = 3)]
3716fn builtin_ngettext(args: &[RValue], _named: &[(String, RValue)]) -> Result<RValue, RError> {
3717 let n = args
3718 .first()
3719 .and_then(|v| v.as_vector()?.as_integer_scalar())
3720 .unwrap_or(1);
3721 let msg = if n == 1 { args.get(1) } else { args.get(2) };
3722 match msg {
3723 Some(v) => Ok(v.clone()),
3724 None => Ok(RValue::vec(Vector::Character(
3725 vec![Some(String::new())].into(),
3726 ))),
3727 }
3728}
3729
3730#[builtin(min_args = 1)]
3737fn builtin_gettextf(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
3738 builtin_sprintf(args, named)
3740}
3741
3742#[builtin(name = "type.convert", min_args = 1)]
3749fn builtin_type_convert(args: &[RValue], _named: &[(String, RValue)]) -> Result<RValue, RError> {
3750 let chars = match args.first() {
3751 Some(RValue::Vector(rv)) => rv.to_characters(),
3752 _ => return Ok(args.first().cloned().unwrap_or(RValue::Null)),
3753 };
3754
3755 let all_logical = chars
3757 .iter()
3758 .all(|c| matches!(c.as_deref(), Some("TRUE" | "FALSE" | "T" | "F") | None));
3759 if all_logical && !chars.is_empty() {
3760 let vals: Vec<Option<bool>> = chars
3761 .iter()
3762 .map(|c| match c.as_deref() {
3763 Some("TRUE") | Some("T") => Some(true),
3764 Some("FALSE") | Some("F") => Some(false),
3765 _ => None,
3766 })
3767 .collect();
3768 return Ok(RValue::vec(Vector::Logical(vals.into())));
3769 }
3770
3771 let all_int = chars.iter().all(|c| match c {
3773 None => true,
3774 Some(s) => s.parse::<i64>().is_ok(),
3775 });
3776 if all_int && !chars.is_empty() {
3777 let vals: Vec<Option<i64>> = chars
3778 .iter()
3779 .map(|c| match c {
3780 None => None,
3781 Some(s) => s.parse::<i64>().ok(),
3782 })
3783 .collect();
3784 return Ok(RValue::vec(Vector::Integer(vals.into())));
3785 }
3786
3787 let all_double = chars.iter().all(|c| match c {
3789 None => true,
3790 Some(s) => s.parse::<f64>().is_ok() || s == "NA" || s == "NaN" || s == "Inf" || s == "-Inf",
3791 });
3792 if all_double && !chars.is_empty() {
3793 let vals: Vec<Option<f64>> = chars
3794 .iter()
3795 .map(|c| match c {
3796 None => None,
3797 Some(s) => match s.as_str() {
3798 "NA" => None,
3799 "NaN" => Some(f64::NAN),
3800 "Inf" => Some(f64::INFINITY),
3801 "-Inf" => Some(f64::NEG_INFINITY),
3802 _ => s.parse::<f64>().ok(),
3803 },
3804 })
3805 .collect();
3806 return Ok(RValue::vec(Vector::Double(vals.into())));
3807 }
3808
3809 Ok(args.first().cloned().unwrap_or(RValue::Null))
3811}
3812
3813#[builtin(name = "Sys.getlocale", min_args = 0)]
3819fn builtin_sys_getlocale(_args: &[RValue], _named: &[(String, RValue)]) -> Result<RValue, RError> {
3820 Ok(RValue::vec(Vector::Character(
3821 vec![Some("C".to_string())].into(),
3822 )))
3823}
3824
3825#[builtin(name = "Sys.setlocale", min_args = 0)]
3832fn builtin_sys_setlocale(_args: &[RValue], _named: &[(String, RValue)]) -> Result<RValue, RError> {
3833 Ok(RValue::vec(Vector::Character(
3834 vec![Some("C".to_string())].into(),
3835 )))
3836}
3837
3838