r/interpreter/packages/
rd.rs

1//! Parser for R documentation (.Rd) files.
2//!
3//! Rd is a LaTeX-like documentation format used by R packages. Each `.Rd` file
4//! in a package's `man/` directory documents one or more R objects (functions,
5//! datasets, classes, etc.).
6//!
7//! This module implements a metadata-first parser: it reliably extracts the
8//! structural sections needed for `help()` lookup, topic resolution, and
9//! example extraction, without attempting full GNU-R-compatible Rd rendering.
10//!
11//! The parser is a hand-written stateful lexer that handles:
12//! - Rd commands (`\name{}`, `\title{}`, etc.)
13//! - Nested brace groups
14//! - Escaped characters (`\%`, `\{`, `\}`, `\\`)
15//! - Comment lines (starting with `%`)
16//! - `#ifdef` / `#ifndef` / `#endif` preprocessor directives (treated as text)
17//! - `\dontrun{}`, `\donttest{}`, `\dontshow{}` blocks in examples
18
19use std::collections::HashMap;
20use std::path::Path;
21
22// region: Data types
23
24/// A parsed R documentation file.
25///
26/// Contains the extracted metadata and section content from an `.Rd` file.
27/// Text content has markup stripped to produce plain-text representations
28/// suitable for terminal display.
29#[derive(Debug, Clone, Default, PartialEq, Eq)]
30pub struct RdDoc {
31    /// The documented object's name (`\name{}`).
32    pub name: Option<String>,
33    /// Topic aliases that resolve to this doc page (`\alias{}`).
34    pub aliases: Vec<String>,
35    /// Short one-line title (`\title{}`).
36    pub title: Option<String>,
37    /// Description section (`\description{}`).
38    pub description: Option<String>,
39    /// Usage examples showing function signatures (`\usage{}`).
40    pub usage: Option<String>,
41    /// Argument descriptions (`\arguments{}`), keyed by parameter name.
42    pub arguments: Vec<(String, String)>,
43    /// Return value description (`\value{}`).
44    pub value: Option<String>,
45    /// Runnable examples (`\examples{}`).
46    pub examples: Option<String>,
47    /// See-also cross-references (`\seealso{}`).
48    pub seealso: Option<String>,
49    /// Document type (`\docType{}`), e.g. "package", "data", "class".
50    pub doc_type: Option<String>,
51    /// Keywords (`\keyword{}`).
52    pub keywords: Vec<String>,
53    /// Author information (`\author{}`).
54    pub author: Option<String>,
55    /// Details section (`\details{}`).
56    pub details: Option<String>,
57    /// Note section (`\note{}`).
58    pub note: Option<String>,
59    /// References section (`\references{}`).
60    pub references: Option<String>,
61    /// Named custom sections (`\section{Name}{...}`).
62    pub sections: Vec<(String, String)>,
63}
64
65/// Errors that can occur when parsing an Rd file.
66#[derive(Debug, Clone, PartialEq, Eq)]
67pub enum RdError {
68    /// Unbalanced braces in the input.
69    UnbalancedBraces { line: usize },
70    /// An I/O error occurred reading the file.
71    IoError(String),
72}
73
74impl std::fmt::Display for RdError {
75    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
76        match self {
77            RdError::UnbalancedBraces { line } => {
78                write!(f, "Rd parse error: unbalanced braces near line {line}")
79            }
80            RdError::IoError(msg) => write!(f, "Rd I/O error: {msg}"),
81        }
82    }
83}
84
85impl std::error::Error for RdError {}
86
87// endregion
88
89// region: Parser
90
91impl RdDoc {
92    /// Parse an Rd document from its text content.
93    pub fn parse(input: &str) -> Result<Self, RdError> {
94        let mut doc = RdDoc::default();
95        let mut parser = RdParser::new(input);
96        parser.parse_toplevel(&mut doc)?;
97        Ok(doc)
98    }
99
100    /// Parse an Rd file from disk.
101    pub fn parse_file(path: &Path) -> Result<Self, RdError> {
102        let content = std::fs::read_to_string(path).map_err(|e| RdError::IoError(e.to_string()))?;
103        Self::parse(&content)
104    }
105
106    /// Format the document as plain text for terminal display.
107    pub fn format_text(&self) -> String {
108        let mut out = String::new();
109
110        // Header
111        if let Some(name) = &self.name {
112            out.push_str(name);
113            if let Some(pkg) = self.keywords.first() {
114                out.push_str(&format!(" ({pkg})"));
115            }
116            out.push('\n');
117            out.push_str(&"\u{2500}".repeat(name.len().max(20)));
118            out.push('\n');
119        }
120
121        // Title
122        if let Some(title) = &self.title {
123            out.push('\n');
124            out.push_str(title);
125            out.push_str("\n\n");
126        }
127
128        // Description
129        if let Some(desc) = &self.description {
130            out.push_str("Description:\n");
131            for line in desc.lines() {
132                out.push_str("  ");
133                out.push_str(line);
134                out.push('\n');
135            }
136            out.push('\n');
137        }
138
139        // Usage
140        if let Some(usage) = &self.usage {
141            out.push_str("Usage:\n");
142            for line in usage.lines() {
143                out.push_str("  ");
144                out.push_str(line);
145                out.push('\n');
146            }
147            out.push('\n');
148        }
149
150        // Arguments
151        if !self.arguments.is_empty() {
152            out.push_str("Arguments:\n");
153            for (name, desc) in &self.arguments {
154                out.push_str(&format!("  {:<12} {}\n", name, desc));
155            }
156            out.push('\n');
157        }
158
159        // Details
160        if let Some(details) = &self.details {
161            out.push_str("Details:\n");
162            for line in details.lines() {
163                out.push_str("  ");
164                out.push_str(line);
165                out.push('\n');
166            }
167            out.push('\n');
168        }
169
170        // Value
171        if let Some(value) = &self.value {
172            out.push_str("Value:\n");
173            for line in value.lines() {
174                out.push_str("  ");
175                out.push_str(line);
176                out.push('\n');
177            }
178            out.push('\n');
179        }
180
181        // Custom sections
182        for (name, content) in &self.sections {
183            out.push_str(name);
184            out.push_str(":\n");
185            for line in content.lines() {
186                out.push_str("  ");
187                out.push_str(line);
188                out.push('\n');
189            }
190            out.push('\n');
191        }
192
193        // Note
194        if let Some(note) = &self.note {
195            out.push_str("Note:\n");
196            for line in note.lines() {
197                out.push_str("  ");
198                out.push_str(line);
199                out.push('\n');
200            }
201            out.push('\n');
202        }
203
204        // Author
205        if let Some(author) = &self.author {
206            out.push_str("Author(s):\n");
207            out.push_str("  ");
208            out.push_str(author);
209            out.push_str("\n\n");
210        }
211
212        // References
213        if let Some(refs) = &self.references {
214            out.push_str("References:\n");
215            for line in refs.lines() {
216                out.push_str("  ");
217                out.push_str(line);
218                out.push('\n');
219            }
220            out.push('\n');
221        }
222
223        // See Also
224        if let Some(seealso) = &self.seealso {
225            out.push_str("See Also:\n");
226            out.push_str("  ");
227            out.push_str(seealso);
228            out.push_str("\n\n");
229        }
230
231        // Examples
232        if let Some(examples) = &self.examples {
233            out.push_str("Examples:\n");
234            for line in examples.lines() {
235                out.push_str("  ");
236                out.push_str(line);
237                out.push('\n');
238            }
239            out.push('\n');
240        }
241
242        // Aliases (if different from name)
243        let non_name_aliases: Vec<&String> = self
244            .aliases
245            .iter()
246            .filter(|a| self.name.as_ref() != Some(a))
247            .collect();
248        if !non_name_aliases.is_empty() {
249            out.push_str("Aliases: ");
250            out.push_str(
251                &non_name_aliases
252                    .iter()
253                    .map(|a| a.as_str())
254                    .collect::<Vec<_>>()
255                    .join(", "),
256            );
257            out.push('\n');
258        }
259
260        out
261    }
262
263    /// Extract the examples section as runnable R code.
264    ///
265    /// Returns `None` if no examples section exists. The returned code has
266    /// `\dontrun{}` blocks removed and `\donttest{}` / `\dontshow{}` blocks
267    /// unwrapped (their content is included).
268    pub fn examples_code(&self) -> Option<&str> {
269        self.examples.as_deref()
270    }
271
272    /// Serialize this document to R's `.Rd` format.
273    ///
274    /// Produces a well-formed Rd file that can be placed in a package's `man/`
275    /// directory. Text content is escaped so that `%`, `{`, `}`, and `\` in
276    /// user-facing strings don't break Rd parsing, while Rd structural commands
277    /// are emitted verbatim.
278    pub fn to_rd(&self) -> String {
279        let mut out = String::new();
280
281        // \name{}
282        if let Some(name) = &self.name {
283            out.push_str(&format!("\\name{{{}}}\n", escape_rd(name)));
284        }
285
286        // \alias{} — one per alias
287        for alias in &self.aliases {
288            out.push_str(&format!("\\alias{{{}}}\n", escape_rd(alias)));
289        }
290
291        // \title{}
292        if let Some(title) = &self.title {
293            out.push_str(&format!("\\title{{{}}}\n", escape_rd(title)));
294        }
295
296        // \description{}
297        if let Some(desc) = &self.description {
298            out.push_str("\\description{\n");
299            out.push_str(&escape_rd(desc));
300            out.push('\n');
301            out.push_str("}\n");
302        }
303
304        // \usage{}
305        if let Some(usage) = &self.usage {
306            out.push_str("\\usage{\n");
307            // Usage is R code — escape only %, not braces (braces are valid R syntax
308            // and Rd parsers expect them in usage blocks).
309            out.push_str(&escape_rd_usage(usage));
310            out.push('\n');
311            out.push_str("}\n");
312        }
313
314        // \arguments{}
315        if !self.arguments.is_empty() {
316            out.push_str("\\arguments{\n");
317            for (param, desc) in &self.arguments {
318                out.push_str(&format!(
319                    "  \\item{{{}}}{{{}}}",
320                    escape_rd(param),
321                    escape_rd(desc)
322                ));
323                out.push('\n');
324            }
325            out.push_str("}\n");
326        }
327
328        // \details{}
329        if let Some(details) = &self.details {
330            out.push_str("\\details{\n");
331            out.push_str(&escape_rd(details));
332            out.push('\n');
333            out.push_str("}\n");
334        }
335
336        // \value{}
337        if let Some(value) = &self.value {
338            out.push_str("\\value{\n");
339            out.push_str(&escape_rd(value));
340            out.push('\n');
341            out.push_str("}\n");
342        }
343
344        // \note{}
345        if let Some(note) = &self.note {
346            out.push_str("\\note{\n");
347            out.push_str(&escape_rd(note));
348            out.push('\n');
349            out.push_str("}\n");
350        }
351
352        // \author{}
353        if let Some(author) = &self.author {
354            out.push_str(&format!("\\author{{{}}}\n", escape_rd(author)));
355        }
356
357        // \references{}
358        if let Some(refs) = &self.references {
359            out.push_str("\\references{\n");
360            out.push_str(&escape_rd(refs));
361            out.push('\n');
362            out.push_str("}\n");
363        }
364
365        // \seealso{}
366        if let Some(seealso) = &self.seealso {
367            out.push_str("\\seealso{\n");
368            out.push_str(&escape_rd(seealso));
369            out.push('\n');
370            out.push_str("}\n");
371        }
372
373        // \section{Name}{...} — custom sections
374        for (sec_name, sec_content) in &self.sections {
375            out.push_str(&format!("\\section{{{}}}{{", escape_rd(sec_name)));
376            out.push('\n');
377            out.push_str(&escape_rd(sec_content));
378            out.push('\n');
379            out.push_str("}\n");
380        }
381
382        // \examples{}
383        if let Some(examples) = &self.examples {
384            out.push_str("\\examples{\n");
385            // Examples are R code — wrap in \dontrun{} since these are
386            // synthesized from doc comments, not tested runnable examples.
387            out.push_str("\\dontrun{\n");
388            out.push_str(&escape_rd_usage(examples));
389            out.push('\n');
390            out.push_str("}\n");
391            out.push_str("}\n");
392        }
393
394        // \keyword{}
395        for kw in &self.keywords {
396            out.push_str(&format!("\\keyword{{{}}}\n", escape_rd(kw)));
397        }
398
399        // \docType{}
400        if let Some(doc_type) = &self.doc_type {
401            out.push_str(&format!("\\docType{{{}}}\n", escape_rd(doc_type)));
402        }
403
404        out
405    }
406}
407
408/// Escape text for inclusion in Rd markup.
409///
410/// In Rd format, `%` starts a comment, `{` and `}` delimit arguments, and `\`
411/// introduces commands. All four must be escaped when they appear in plain text.
412fn escape_rd(text: &str) -> String {
413    let mut out = String::with_capacity(text.len());
414    for ch in text.chars() {
415        match ch {
416            '\\' => out.push_str("\\\\"),
417            '%' => out.push_str("\\%"),
418            '{' => out.push_str("\\{"),
419            '}' => out.push_str("\\}"),
420            _ => out.push(ch),
421        }
422    }
423    out
424}
425
426/// Escape text for Rd usage/examples blocks (R code).
427///
428/// In R code sections, only `%` needs escaping (it's still an Rd comment
429/// character). Braces and backslashes are valid R syntax and should be
430/// left alone so the code remains valid.
431fn escape_rd_usage(text: &str) -> String {
432    text.replace('%', "\\%")
433}
434
435/// Stateful parser for Rd files.
436///
437/// The parser processes input character by character, tracking brace depth
438/// and recognizing Rd commands, comments, and escape sequences.
439struct RdParser<'a> {
440    input: &'a str,
441    pos: usize,
442    line: usize,
443}
444
445impl<'a> RdParser<'a> {
446    fn new(input: &'a str) -> Self {
447        Self {
448            input,
449            pos: 0,
450            line: 1,
451        }
452    }
453
454    /// Peek at the current character without advancing.
455    fn peek(&self) -> Option<char> {
456        self.input[self.pos..].chars().next()
457    }
458
459    /// Peek at the next n characters as a string slice (char-aware, not byte-aware).
460    fn peek_str(&self, n: usize) -> &'a str {
461        let remaining = &self.input[self.pos..];
462        let end = remaining
463            .char_indices()
464            .nth(n)
465            .map(|(i, _)| i)
466            .unwrap_or(remaining.len());
467        &remaining[..end]
468    }
469
470    /// Advance by one character and return it.
471    fn advance(&mut self) -> Option<char> {
472        let ch = self.input[self.pos..].chars().next()?;
473        self.pos += ch.len_utf8();
474        if ch == '\n' {
475            self.line += 1;
476        }
477        Some(ch)
478    }
479
480    /// Skip whitespace (but not newlines).
481    fn skip_spaces(&mut self) {
482        while let Some(ch) = self.peek() {
483            if ch == ' ' || ch == '\t' {
484                self.advance();
485            } else {
486                break;
487            }
488        }
489    }
490
491    /// Check if we're at end of input.
492    fn at_end(&self) -> bool {
493        self.pos >= self.input.len()
494    }
495
496    /// Read a command name after `\`. Returns the command name (e.g., "name", "title").
497    fn read_command_name(&mut self) -> String {
498        let mut name = String::new();
499        while let Some(ch) = self.peek() {
500            if ch.is_ascii_alphanumeric() || ch == '.' || ch == '_' {
501                name.push(ch);
502                self.advance();
503            } else {
504                break;
505            }
506        }
507        name
508    }
509
510    /// Read a brace-delimited argument `{...}`, handling nesting and Rd markup.
511    /// Returns the content with markup stripped to plain text.
512    fn read_brace_arg(&mut self) -> Result<String, RdError> {
513        self.read_brace_arg_inner(false)
514    }
515
516    /// Read a brace-delimited argument in verbatim mode.
517    fn read_brace_arg_verbatim(&mut self) -> Result<String, RdError> {
518        self.read_brace_arg_ex(true, false)
519    }
520
521    /// Read a brace-delimited argument in R-code mode.
522    ///
523    /// Like normal mode (Rd commands still expanded via `\` + alpha → break),
524    /// but tracks R string literals so `{` inside `"..."` or `'...'` doesn't
525    /// affect brace depth. `%` still acts as Rd comment. `#` comments still
526    /// count braces (matching GNU R's gramRd.y behavior — `## END}` closes).
527    ///
528    /// Used for `\examples{}` (RSECTIONHEADER) and `\code{}` (RCODEMACRO).
529    fn read_brace_arg_rcode(&mut self) -> Result<String, RdError> {
530        self.read_brace_arg_ex(false, true)
531    }
532
533    fn read_brace_arg_inner(&mut self, verbatim: bool) -> Result<String, RdError> {
534        self.read_brace_arg_ex(verbatim, false)
535    }
536
537    /// Core brace-delimited argument reader.
538    ///
539    /// `verbatim`: no Rd command expansion, `%` is literal.
540    /// `rlike`: track R string literals (don't count braces inside strings).
541    fn read_brace_arg_ex(&mut self, verbatim: bool, rlike: bool) -> Result<String, RdError> {
542        // Expect opening brace
543        if self.peek() != Some('{') {
544            return Ok(String::new());
545        }
546        self.advance(); // consume '{'
547
548        let mut depth: usize = 1;
549        let mut text = String::new();
550        let start_line = self.line;
551        // R string tracking (RLIKE mode from GNU R's gramRd.y).
552        // When inside "...", '...', or `...`, braces don't affect depth.
553        let mut in_r_string: Option<char> = None;
554
555        while !self.at_end() && depth > 0 {
556            let ch = self
557                .peek()
558                .expect("not at end (checked by while condition)");
559
560            // Inside an R string: consume chars, don't count braces.
561            // Only the matching unescaped close quote exits string mode.
562            // Backslash inside string: if followed by the quote char or
563            // another backslash, it's an escape — consume both chars.
564            if rlike {
565                if let Some(quote) = in_r_string {
566                    self.advance();
567                    text.push(ch);
568                    if ch == '\\' {
569                        // Could be R escape (\") or Rd escape (\\).
570                        // In both cases, consume the next char to avoid
571                        // mistaking it for a closing quote.
572                        if let Some(next) = self.peek() {
573                            self.advance();
574                            text.push(next);
575                        }
576                    } else if ch == quote {
577                        in_r_string = None;
578                    }
579                    continue;
580                }
581                // Outside string: enter string mode on quote characters.
582                if ch == '"' || ch == '\'' || ch == '`' {
583                    in_r_string = Some(ch);
584                    self.advance();
585                    text.push(ch);
586                    continue;
587                }
588                // R comment (#): consume to end of line, but STILL count
589                // braces (matching GNU R). Don't enter string mode for '
590                // inside comments like "# it's a test".
591                if ch == '#' {
592                    while !self.at_end() {
593                        let c = self
594                            .peek()
595                            .expect("not at end (checked by while condition)");
596                        self.advance();
597                        text.push(c);
598                        if c == '\\' {
599                            // Rd escape in comment — check for \{ \} \\
600                            if let Some(next) = self.peek() {
601                                if next == '{' || next == '}' || next == '\\' || next == '%' {
602                                    self.advance();
603                                    text.push(next);
604                                    continue;
605                                }
606                            }
607                        }
608                        if c == '{' {
609                            depth += 1;
610                        } else if c == '}' {
611                            depth -= 1;
612                            if depth == 0 {
613                                // Brace that closes the section — put it back
614                                // so the outer loop handles it.
615                                text.pop(); // remove the }
616                                self.pos -= 1; // un-advance
617                                break;
618                            }
619                        }
620                        if c == '\n' {
621                            break;
622                        }
623                    }
624                    continue;
625                }
626            }
627
628            match ch {
629                '{' => {
630                    depth += 1;
631                    self.advance();
632                    text.push('{');
633                }
634                '}' => {
635                    depth -= 1;
636                    self.advance();
637                    if depth > 0 {
638                        text.push('}');
639                    }
640                }
641                '\\' => {
642                    self.advance();
643                    if let Some(next) = self.peek() {
644                        match next {
645                            // Escaped specials — always recognized, even in verbatim mode
646                            '%' | '{' | '}' | '\\' => {
647                                text.push(next);
648                                self.advance();
649                            }
650                            _ if verbatim => {
651                                // In verbatim mode, backslash + non-special is literal
652                                text.push('\\');
653                                text.push(next);
654                                self.advance();
655                            }
656                            _ => {
657                                // This is a command inside the brace group
658                                let cmd = self.read_command_name();
659                                if cmd.is_empty() {
660                                    // Just a backslash followed by non-alpha
661                                    // (e.g. `\n` in preformatted or `\ `)
662                                    text.push(next);
663                                    self.advance();
664                                } else {
665                                    self.handle_inline_command(&cmd, &mut text)?;
666                                }
667                            }
668                        }
669                    }
670                }
671                '%' if !verbatim => {
672                    // Comment — skip to end of line
673                    self.skip_comment_line();
674                }
675                '#' if !verbatim => {
676                    // Preprocessor directives (#ifdef, #ifndef, #endif) are only
677                    // recognized at the start of a line. Mid-line '#' is just text.
678                    if self.is_at_line_start(&text) && self.is_preprocessor_directive() {
679                        self.skip_to_eol();
680                    } else {
681                        text.push(ch);
682                        self.advance();
683                    }
684                }
685                '\n' => {
686                    text.push('\n');
687                    self.advance();
688                }
689                _ => {
690                    text.push(ch);
691                    self.advance();
692                }
693            }
694        }
695
696        if depth > 0 {
697            return Err(RdError::UnbalancedBraces { line: start_line });
698        }
699
700        Ok(text)
701    }
702
703    /// Handle an inline Rd command encountered inside a brace group.
704    /// Strips the markup and appends plain-text content to `out`.
705    fn handle_inline_command(&mut self, cmd: &str, out: &mut String) -> Result<(), RdError> {
706        match cmd {
707            // Commands whose content is included as-is
708            // NOTE: \code{} uses normal mode, not RLIKE, because \code{}
709            // appears in text sections where ' is an apostrophe, not a string.
710            // RLIKE is only safe for top-level sections (\examples, \usage).
711            "code" | "bold" | "strong" | "emph" | "samp" | "file" | "pkg" | "var" | "env"
712            | "option" | "command" | "dfn" | "cite" | "acronym" | "sQuote" | "dQuote" => {
713                if self.peek() == Some('{') {
714                    let content = self.read_brace_arg()?;
715                    out.push_str(&content);
716                }
717            }
718            // Verbatim commands — `%` is literal, commands not expanded
719            "preformatted" | "verb" => {
720                if self.peek() == Some('{') {
721                    let content = self.read_brace_arg_verbatim()?;
722                    out.push_str(&content);
723                }
724            }
725            // \link[pkg]{text} or \link{text} — extract the display text
726            "link" | "linkS4class" => {
727                // Skip optional [...] argument
728                if self.peek() == Some('[') {
729                    self.skip_bracket_arg();
730                }
731                if self.peek() == Some('{') {
732                    let content = self.read_brace_arg()?;
733                    out.push_str(&content);
734                }
735            }
736            // \href{url}{text} — show the text
737            "href" => {
738                if self.peek() == Some('{') {
739                    let _url = self.read_brace_arg()?;
740                }
741                if self.peek() == Some('{') {
742                    let text = self.read_brace_arg()?;
743                    out.push_str(&text);
744                }
745            }
746            // \url{...} — show the URL
747            "url" => {
748                if self.peek() == Some('{') {
749                    let url = self.read_brace_arg()?;
750                    out.push_str(&url);
751                }
752            }
753            // \email{...} — show the email
754            "email" => {
755                if self.peek() == Some('{') {
756                    let email = self.read_brace_arg()?;
757                    out.push_str(&email);
758                }
759            }
760            // \eqn{latex}{text} or \eqn{latex} — prefer text alt if present
761            "eqn" => {
762                if self.peek() == Some('{') {
763                    let first = self.read_brace_arg()?;
764                    if self.peek() == Some('{') {
765                        let text_alt = self.read_brace_arg()?;
766                        out.push_str(&text_alt);
767                    } else {
768                        out.push_str(&first);
769                    }
770                }
771            }
772            // \deqn{latex}{text} — centered equation
773            "deqn" => {
774                if self.peek() == Some('{') {
775                    let first = self.read_brace_arg()?;
776                    if self.peek() == Some('{') {
777                        let text_alt = self.read_brace_arg()?;
778                        out.push('\n');
779                        out.push_str(text_alt.trim());
780                        out.push('\n');
781                    } else {
782                        out.push('\n');
783                        out.push_str(first.trim());
784                        out.push('\n');
785                    }
786                }
787            }
788            // \item{name}{desc} — used in \arguments and \describe
789            "item" => {
790                if self.peek() == Some('{') {
791                    let name = self.read_brace_arg()?;
792                    out.push_str(&name);
793                    if self.peek() == Some('{') {
794                        let desc = self.read_brace_arg()?;
795                        out.push_str(": ");
796                        out.push_str(&desc);
797                    }
798                }
799            }
800            // \dots, \ldots — ellipsis
801            "dots" | "ldots" => {
802                out.push_str("...");
803            }
804            // \R — the R name
805            "R" => {
806                out.push('R');
807                // Consume optional empty braces
808                if self.peek() == Some('{') {
809                    self.read_brace_arg()?;
810                }
811            }
812            // \cr — line break
813            "cr" => {
814                out.push('\n');
815            }
816            // \tab — tab separator (in \tabular)
817            "tab" => {
818                out.push('\t');
819            }
820            // \Sexpr[...]{...} — skip (cannot evaluate R code)
821            "Sexpr" => {
822                if self.peek() == Some('[') {
823                    self.skip_bracket_arg();
824                }
825                if self.peek() == Some('{') {
826                    self.read_brace_arg()?;
827                }
828            }
829            // \if{format}{text}\else{text} — skip conditionals
830            "ifelse" | "if" => {
831                if self.peek() == Some('{') {
832                    self.read_brace_arg()?;
833                }
834                if self.peek() == Some('{') {
835                    self.read_brace_arg()?;
836                }
837                // Check for \else
838                self.skip_spaces();
839                if self.peek_str(5) == "\\else" {
840                    for _ in 0..5 {
841                        self.advance();
842                    }
843                    if self.peek() == Some('{') {
844                        let text = self.read_brace_arg()?;
845                        out.push_str(&text);
846                    }
847                }
848            }
849            // \tabular{fmt}{rows} — extract content
850            "tabular" => {
851                if self.peek() == Some('{') {
852                    self.read_brace_arg()?; // format spec
853                }
854                if self.peek() == Some('{') {
855                    let content = self.read_brace_arg()?;
856                    out.push_str(&content);
857                }
858            }
859            // \describe{...} and \enumerate{...} and \itemize{...}
860            "describe" | "enumerate" | "itemize" | "value" => {
861                if self.peek() == Some('{') {
862                    let content = self.read_brace_arg()?;
863                    out.push_str(&content);
864                }
865            }
866            // \dontrun{...} — skip content
867            "dontrun" => {
868                if self.peek() == Some('{') {
869                    self.read_brace_arg()?;
870                }
871            }
872            // \donttest{...} and \dontshow{...} — include content
873            "donttest" | "dontshow" | "testonly" => {
874                if self.peek() == Some('{') {
875                    let content = self.read_brace_arg()?;
876                    out.push_str(&content);
877                }
878            }
879            // \newcommand, \renewcommand — skip
880            "newcommand" | "renewcommand" => {
881                if self.peek() == Some('{') {
882                    self.read_brace_arg()?;
883                }
884                if self.peek() == Some('{') {
885                    self.read_brace_arg()?;
886                }
887            }
888            // Package macros — consume braces, output nothing meaningful
889            "packageDESCRIPTION" | "packageIndices" | "packageAuthor" | "packageMaintainer"
890            | "packageTitle" => {
891                if self.peek() == Some('{') {
892                    let pkg = self.read_brace_arg()?;
893                    out.push_str(&format!("[{cmd}: {pkg}]"));
894                }
895            }
896            // \method{generic}{class} — format as generic.class
897            "method" | "S3method" | "S4method" => {
898                if self.peek() == Some('{') {
899                    let generic = self.read_brace_arg()?;
900                    if self.peek() == Some('{') {
901                        let class = self.read_brace_arg()?;
902                        out.push_str(&format!("{generic}.{class}"));
903                    } else {
904                        out.push_str(&generic);
905                    }
906                }
907            }
908            // \Rdversion{...}, \RdOpts{...}, \encoding{...} — skip
909            "Rdversion" | "RdOpts" | "encoding" | "concept" | "source" => {
910                if self.peek() == Some('{') {
911                    self.read_brace_arg()?;
912                }
913            }
914            // \out{...} — raw output (HTML/LaTeX), just include as text
915            "out" => {
916                if self.peek() == Some('{') {
917                    let content = self.read_brace_arg_verbatim()?;
918                    // Strip HTML tags for plain text display
919                    out.push_str(&content);
920                }
921            }
922            // \subsection{title}{body} — nested section
923            "subsection" => {
924                if self.peek() == Some('{') {
925                    let title = self.read_brace_arg()?;
926                    out.push('\n');
927                    out.push_str(&title);
928                    out.push('\n');
929                }
930                if self.peek() == Some('{') {
931                    let body = self.read_brace_arg()?;
932                    out.push_str(&body);
933                    out.push('\n');
934                }
935            }
936            // \special{...}
937            "special" => {
938                if self.peek() == Some('{') {
939                    let content = self.read_brace_arg()?;
940                    out.push_str(&content);
941                }
942            }
943            // Unknown command — try to extract brace content
944            _ => {
945                if self.peek() == Some('{') {
946                    let content = self.read_brace_arg()?;
947                    out.push_str(&content);
948                }
949            }
950        }
951        Ok(())
952    }
953
954    /// Skip a [...] optional argument.
955    fn skip_bracket_arg(&mut self) {
956        if self.peek() != Some('[') {
957            return;
958        }
959        self.advance(); // consume '['
960        let mut depth = 1;
961        while !self.at_end() && depth > 0 {
962            match self.peek() {
963                Some('[') => {
964                    depth += 1;
965                    self.advance();
966                }
967                Some(']') => {
968                    depth -= 1;
969                    self.advance();
970                }
971                Some('\\') => {
972                    self.advance();
973                    self.advance(); // skip escaped char
974                }
975                _ => {
976                    self.advance();
977                }
978            }
979        }
980    }
981
982    /// Skip a `%` comment line (from `%` to end of line).
983    fn skip_comment_line(&mut self) {
984        while let Some(ch) = self.advance() {
985            if ch == '\n' {
986                break;
987            }
988        }
989    }
990
991    /// Skip to end of line (for preprocessor directives).
992    fn skip_to_eol(&mut self) {
993        while let Some(ch) = self.peek() {
994            if ch == '\n' {
995                self.advance();
996                break;
997            }
998            self.advance();
999        }
1000    }
1001
1002    /// Check if we're at the start of a line (nothing but whitespace before
1003    /// the current position on this line).
1004    fn is_at_line_start(&self, text_so_far: &str) -> bool {
1005        // Check if the last line in text_so_far is all whitespace
1006        match text_so_far.rfind('\n') {
1007            Some(pos) => text_so_far[pos + 1..]
1008                .chars()
1009                .all(|c| c == ' ' || c == '\t'),
1010            None => text_so_far.chars().all(|c| c == ' ' || c == '\t'),
1011        }
1012    }
1013
1014    /// Check if the current position starts a preprocessor directive
1015    /// (#ifdef, #ifndef, #endif).
1016    fn is_preprocessor_directive(&self) -> bool {
1017        let remaining = &self.input[self.pos..];
1018        remaining.starts_with("#ifdef")
1019            || remaining.starts_with("#ifndef")
1020            || remaining.starts_with("#endif")
1021    }
1022
1023    /// Parse the top-level structure of an Rd file.
1024    fn parse_toplevel(&mut self, doc: &mut RdDoc) -> Result<(), RdError> {
1025        while !self.at_end() {
1026            let ch = self
1027                .peek()
1028                .expect("not at end (checked by while condition)");
1029
1030            match ch {
1031                '%' => {
1032                    self.skip_comment_line();
1033                }
1034                '#' if self.is_preprocessor_directive() => {
1035                    self.skip_to_eol();
1036                }
1037                '\\' => {
1038                    self.advance(); // consume '\'
1039                    let cmd = self.read_command_name();
1040                    if cmd.is_empty() {
1041                        // Escaped character at top level
1042                        self.advance();
1043                        continue;
1044                    }
1045                    self.parse_toplevel_command(&cmd, doc)?;
1046                }
1047                '\n' | ' ' | '\t' | '\r' => {
1048                    self.advance();
1049                }
1050                _ => {
1051                    // Stray text at top level — skip
1052                    self.advance();
1053                }
1054            }
1055        }
1056        Ok(())
1057    }
1058
1059    /// Parse a top-level Rd command and store its content in the doc.
1060    fn parse_toplevel_command(&mut self, cmd: &str, doc: &mut RdDoc) -> Result<(), RdError> {
1061        match cmd {
1062            "name" => {
1063                let content = self.read_brace_arg()?;
1064                doc.name = Some(content.trim().to_string());
1065            }
1066            "alias" => {
1067                let content = self.read_brace_arg()?;
1068                let alias = content.trim().to_string();
1069                if !alias.is_empty() {
1070                    doc.aliases.push(alias);
1071                }
1072            }
1073            "title" => {
1074                let content = self.read_brace_arg()?;
1075                doc.title = Some(normalize_whitespace(&content));
1076            }
1077            "description" => {
1078                let content = self.read_brace_arg()?;
1079                doc.description = Some(clean_text(&content));
1080            }
1081            "usage" => {
1082                // Normal mode (not RLIKE) because \usage{} mixes R code with
1083                // Rd markup like \method{generic}{class} and \dots.
1084                let content = self.read_brace_arg()?;
1085                doc.usage = Some(clean_usage(&content));
1086            }
1087            "arguments" => {
1088                self.parse_arguments(doc)?;
1089            }
1090            "value" => {
1091                let content = self.read_brace_arg()?;
1092                doc.value = Some(clean_text(&content));
1093            }
1094            "examples" => {
1095                let content = self.read_brace_arg_rcode()?;
1096                doc.examples = Some(clean_examples(&content));
1097            }
1098            "seealso" => {
1099                let content = self.read_brace_arg()?;
1100                doc.seealso = Some(clean_text(&content));
1101            }
1102            "details" => {
1103                let content = self.read_brace_arg()?;
1104                doc.details = Some(clean_text(&content));
1105            }
1106            "note" => {
1107                let content = self.read_brace_arg()?;
1108                doc.note = Some(clean_text(&content));
1109            }
1110            "references" => {
1111                let content = self.read_brace_arg()?;
1112                doc.references = Some(clean_text(&content));
1113            }
1114            "author" => {
1115                let content = self.read_brace_arg()?;
1116                doc.author = Some(normalize_whitespace(&content));
1117            }
1118            "keyword" => {
1119                let content = self.read_brace_arg()?;
1120                let kw = content.trim().to_string();
1121                if !kw.is_empty() {
1122                    doc.keywords.push(kw);
1123                }
1124            }
1125            "docType" => {
1126                let content = self.read_brace_arg()?;
1127                doc.doc_type = Some(content.trim().to_string());
1128            }
1129            "section" => {
1130                // \section{Title}{Content}
1131                let title = self.read_brace_arg()?;
1132                let content = self.read_brace_arg()?;
1133                doc.sections
1134                    .push((normalize_whitespace(&title), clean_text(&content)));
1135            }
1136            // Top-level commands we want to skip entirely (single arg)
1137            "Rdversion" | "RdOpts" | "encoding" | "concept" | "source" => {
1138                if self.peek() == Some('{') {
1139                    self.read_brace_arg()?;
1140                }
1141            }
1142            // Macro definitions have two brace args
1143            "newcommand" | "renewcommand" => {
1144                if self.peek() == Some('{') {
1145                    self.read_brace_arg()?;
1146                }
1147                if self.peek() == Some('{') {
1148                    self.read_brace_arg()?;
1149                }
1150            }
1151            // Unknown top-level command — consume its brace argument if present
1152            _ => {
1153                if self.peek() == Some('{') {
1154                    self.read_brace_arg()?;
1155                }
1156            }
1157        }
1158        Ok(())
1159    }
1160
1161    /// Parse the `\arguments{...}` section, extracting `\item{name}{desc}` pairs.
1162    fn parse_arguments(&mut self, doc: &mut RdDoc) -> Result<(), RdError> {
1163        if self.peek() != Some('{') {
1164            return Ok(());
1165        }
1166        self.advance(); // consume '{'
1167
1168        let mut depth: usize = 1;
1169        let start_line = self.line;
1170
1171        while !self.at_end() && depth > 0 {
1172            let ch = self
1173                .peek()
1174                .expect("not at end (checked by while condition)");
1175
1176            match ch {
1177                '{' => {
1178                    depth += 1;
1179                    self.advance();
1180                }
1181                '}' => {
1182                    depth -= 1;
1183                    self.advance();
1184                }
1185                '%' => {
1186                    self.skip_comment_line();
1187                }
1188                '#' if self.is_preprocessor_directive() => {
1189                    self.skip_to_eol();
1190                }
1191                '\\' => {
1192                    self.advance();
1193                    let cmd = self.read_command_name();
1194                    if cmd == "item" {
1195                        let name_raw = self.read_brace_arg()?;
1196                        let desc_raw = self.read_brace_arg()?;
1197                        let name = normalize_whitespace(&name_raw);
1198                        let desc = clean_text(&desc_raw);
1199                        if !name.is_empty() {
1200                            doc.arguments.push((name, desc));
1201                        }
1202                    } else if !cmd.is_empty() {
1203                        // Some other command inside arguments — skip its args
1204                        if self.peek() == Some('{') {
1205                            self.read_brace_arg()?;
1206                        }
1207                    }
1208                }
1209                _ => {
1210                    self.advance();
1211                }
1212            }
1213        }
1214
1215        if depth > 0 {
1216            return Err(RdError::UnbalancedBraces { line: start_line });
1217        }
1218
1219        Ok(())
1220    }
1221}
1222
1223// endregion
1224
1225// region: Text cleaning utilities
1226
1227/// Normalize whitespace: collapse runs of whitespace (including newlines) into single spaces,
1228/// then trim.
1229fn normalize_whitespace(s: &str) -> String {
1230    s.split_whitespace().collect::<Vec<_>>().join(" ")
1231}
1232
1233/// Clean section text: trim leading/trailing blank lines, normalize internal
1234/// paragraph breaks (double newlines) while preserving single newlines for
1235/// line structure.
1236fn clean_text(s: &str) -> String {
1237    let lines: Vec<&str> = s.lines().collect();
1238
1239    // Trim leading and trailing blank lines
1240    let start = lines.iter().position(|l| !l.trim().is_empty()).unwrap_or(0);
1241    let end = lines
1242        .iter()
1243        .rposition(|l| !l.trim().is_empty())
1244        .map(|i| i + 1)
1245        .unwrap_or(0);
1246
1247    if start >= end {
1248        return String::new();
1249    }
1250
1251    lines[start..end]
1252        .iter()
1253        .map(|l| l.trim())
1254        .collect::<Vec<_>>()
1255        .join("\n")
1256}
1257
1258/// Clean usage text: preserve line structure but trim each line.
1259fn clean_usage(s: &str) -> String {
1260    let lines: Vec<&str> = s.lines().collect();
1261    let start = lines.iter().position(|l| !l.trim().is_empty()).unwrap_or(0);
1262    let end = lines
1263        .iter()
1264        .rposition(|l| !l.trim().is_empty())
1265        .map(|i| i + 1)
1266        .unwrap_or(0);
1267
1268    if start >= end {
1269        return String::new();
1270    }
1271
1272    lines[start..end]
1273        .iter()
1274        .map(|l| l.trim())
1275        .collect::<Vec<_>>()
1276        .join("\n")
1277}
1278
1279/// Clean examples text: strip comment-only lines (starting with `%`) and
1280/// trim leading/trailing blank lines.
1281fn clean_examples(s: &str) -> String {
1282    let lines: Vec<&str> = s
1283        .lines()
1284        .filter(|l| !l.trim_start().starts_with('%'))
1285        .collect();
1286
1287    let start = lines.iter().position(|l| !l.trim().is_empty()).unwrap_or(0);
1288    let end = lines
1289        .iter()
1290        .rposition(|l| !l.trim().is_empty())
1291        .map(|i| i + 1)
1292        .unwrap_or(0);
1293
1294    if start >= end {
1295        return String::new();
1296    }
1297
1298    lines[start..end].join("\n")
1299}
1300
1301// endregion
1302
1303// region: Help index
1304
1305/// An index of Rd documentation, mapping topic aliases to parsed docs.
1306///
1307/// Used by `help()` to look up documentation by topic name. The index scans
1308/// package `man/` directories and builds an alias-to-doc mapping.
1309#[derive(Debug, Default)]
1310pub struct RdHelpIndex {
1311    /// Maps topic alias -> (package name, parsed doc).
1312    entries: HashMap<String, Vec<RdIndexEntry>>,
1313}
1314
1315/// A single entry in the help index.
1316#[derive(Debug, Clone)]
1317pub struct RdIndexEntry {
1318    /// Package this doc belongs to.
1319    pub package: String,
1320    /// Path to the source .Rd file.
1321    pub file_path: String,
1322    /// The parsed documentation.
1323    pub doc: RdDoc,
1324}
1325
1326impl RdHelpIndex {
1327    /// Create a new empty help index.
1328    pub fn new() -> Self {
1329        Self::default()
1330    }
1331
1332    /// Index all `.Rd` files in a package's `man/` directory.
1333    ///
1334    /// Parses each file and registers all its aliases in the index.
1335    /// Files that fail to parse are silently skipped.
1336    pub fn index_package_dir(&mut self, package_name: &str, man_dir: &Path) {
1337        let entries = match std::fs::read_dir(man_dir) {
1338            Ok(entries) => entries,
1339            Err(_) => return,
1340        };
1341
1342        for entry in entries.flatten() {
1343            let path = entry.path();
1344            if path.extension().and_then(|e| e.to_str()) != Some("Rd") {
1345                continue;
1346            }
1347
1348            let doc = match RdDoc::parse_file(&path) {
1349                Ok(doc) => doc,
1350                Err(_) => continue,
1351            };
1352
1353            let file_path = path.to_string_lossy().to_string();
1354            let index_entry = RdIndexEntry {
1355                package: package_name.to_string(),
1356                file_path,
1357                doc,
1358            };
1359
1360            // Register under each alias
1361            for alias in &index_entry.doc.aliases {
1362                self.entries
1363                    .entry(alias.clone())
1364                    .or_default()
1365                    .push(index_entry.clone());
1366            }
1367
1368            // Also register under the \name if not already an alias
1369            if let Some(name) = &index_entry.doc.name {
1370                if !index_entry.doc.aliases.contains(name) {
1371                    self.entries
1372                        .entry(name.clone())
1373                        .or_default()
1374                        .push(index_entry.clone());
1375                }
1376            }
1377        }
1378    }
1379
1380    /// Register a single entry under a topic name.
1381    pub fn register_entry(&mut self, topic: &str, entry: RdIndexEntry) {
1382        self.entries
1383            .entry(topic.to_string())
1384            .or_default()
1385            .push(entry);
1386    }
1387
1388    /// Look up documentation for a topic.
1389    ///
1390    /// Returns all matching entries (there may be multiple from different packages).
1391    pub fn lookup(&self, topic: &str) -> Vec<&RdIndexEntry> {
1392        self.entries
1393            .get(topic)
1394            .map(|entries| entries.iter().collect())
1395            .unwrap_or_default()
1396    }
1397
1398    /// Look up documentation for a topic in a specific package.
1399    pub fn lookup_in_package(&self, topic: &str, package: &str) -> Option<&RdIndexEntry> {
1400        self.entries
1401            .get(topic)
1402            .and_then(|entries| entries.iter().find(|e| e.package == package))
1403    }
1404
1405    /// Get all indexed topics.
1406    pub fn topics(&self) -> Vec<&str> {
1407        self.entries.keys().map(|s| s.as_str()).collect()
1408    }
1409
1410    /// Get all indexed topics for a specific package.
1411    pub fn package_topics(&self, package: &str) -> Vec<&str> {
1412        self.entries
1413            .iter()
1414            .filter(|(_, entries)| entries.iter().any(|e| e.package == package))
1415            .map(|(topic, _)| topic.as_str())
1416            .collect()
1417    }
1418}
1419
1420// endregion
1421
1422#[cfg(test)]
1423mod tests {
1424    use super::*;
1425
1426    #[test]
1427    fn parse_simple_rd() {
1428        let input = r#"\name{f}
1429\alias{f}
1430\title{Function f -- a Test}
1431\description{ An Rd test only. }
1432\usage{
1433f(a)
1434}
1435\arguments{
1436  \item{a}{a number.}
1437}
1438\value{a number.}
1439\examples{
1440f(42)
1441}
1442"#;
1443        let doc = RdDoc::parse(input).unwrap();
1444        assert_eq!(doc.name.as_deref(), Some("f"));
1445        assert_eq!(doc.aliases, vec!["f"]);
1446        assert_eq!(doc.title.as_deref(), Some("Function f -- a Test"));
1447        assert_eq!(doc.description.as_deref(), Some("An Rd test only."));
1448        assert_eq!(doc.usage.as_deref(), Some("f(a)"));
1449        assert_eq!(doc.arguments.len(), 1);
1450        assert_eq!(doc.arguments[0].0, "a");
1451        assert_eq!(doc.arguments[0].1, "a number.");
1452        assert_eq!(doc.value.as_deref(), Some("a number."));
1453        assert_eq!(doc.examples.as_deref(), Some("f(42)"));
1454    }
1455
1456    #[test]
1457    fn parse_multiple_aliases() {
1458        let input = r#"\name{PkgC-package}
1459\alias{PkgC-package}
1460\alias{PkgC}
1461\docType{package}
1462\title{Base R Regression Testing Dummy Package - C}
1463\keyword{ package }
1464"#;
1465        let doc = RdDoc::parse(input).unwrap();
1466        assert_eq!(doc.name.as_deref(), Some("PkgC-package"));
1467        assert_eq!(doc.aliases, vec!["PkgC-package", "PkgC"]);
1468        assert_eq!(doc.doc_type.as_deref(), Some("package"));
1469        assert_eq!(
1470            doc.title.as_deref(),
1471            Some("Base R Regression Testing Dummy Package - C")
1472        );
1473        assert_eq!(doc.keywords, vec!["package"]);
1474    }
1475
1476    #[test]
1477    fn parse_escaped_chars() {
1478        let input = r#"\name{test}
1479\title{Test escaping}
1480\description{
1481  Use \code{\%} for percent and \code{\{} for brace.
1482}
1483"#;
1484        let doc = RdDoc::parse(input).unwrap();
1485        let desc = doc.description.as_deref().unwrap();
1486        assert!(desc.contains('%'));
1487        assert!(desc.contains('{'));
1488    }
1489
1490    #[test]
1491    fn parse_comments_stripped() {
1492        let input = "% This is a comment\n\\name{test}\n% Another comment\n\\title{Test}\n";
1493        let doc = RdDoc::parse(input).unwrap();
1494        assert_eq!(doc.name.as_deref(), Some("test"));
1495        assert_eq!(doc.title.as_deref(), Some("Test"));
1496    }
1497
1498    #[test]
1499    fn parse_examples_strips_dontrun() {
1500        let input = r#"\name{test}
1501\examples{
1502x <- 1
1503\dontrun{stop("fail")}
1504y <- 2
1505}
1506"#;
1507        let doc = RdDoc::parse(input).unwrap();
1508        let ex = doc.examples.as_deref().unwrap();
1509        assert!(ex.contains("x <- 1"));
1510        assert!(ex.contains("y <- 2"));
1511        assert!(!ex.contains("stop"));
1512    }
1513
1514    #[test]
1515    fn parse_examples_includes_donttest() {
1516        let input = r#"\name{test}
1517\examples{
1518x <- 1
1519\donttest{y <- 2}
1520z <- 3
1521}
1522"#;
1523        let doc = RdDoc::parse(input).unwrap();
1524        let ex = doc.examples.as_deref().unwrap();
1525        assert!(ex.contains("x <- 1"));
1526        assert!(ex.contains("y <- 2"));
1527        assert!(ex.contains("z <- 3"));
1528    }
1529
1530    #[test]
1531    fn parse_nested_markup() {
1532        let input = r#"\name{test}
1533\description{
1534  See \code{\link[stats]{weighted.mean}} for details.
1535}
1536"#;
1537        let doc = RdDoc::parse(input).unwrap();
1538        let desc = doc.description.as_deref().unwrap();
1539        assert!(desc.contains("weighted.mean"));
1540    }
1541
1542    #[test]
1543    fn parse_link_with_display_text() {
1544        let input = r#"\name{test}
1545\seealso{
1546  \link[=Paren]{\{} and \link[stats:weighted.mean]{ditto}
1547}
1548"#;
1549        let doc = RdDoc::parse(input).unwrap();
1550        let sa = doc.seealso.as_deref().unwrap();
1551        assert!(sa.contains('{'));
1552        assert!(sa.contains("ditto"));
1553    }
1554
1555    #[test]
1556    fn parse_section_custom() {
1557        let input = r#"\name{test}
1558\section{Warning}{
1559  Do not use in production.
1560}
1561"#;
1562        let doc = RdDoc::parse(input).unwrap();
1563        assert_eq!(doc.sections.len(), 1);
1564        assert_eq!(doc.sections[0].0, "Warning");
1565        assert!(doc.sections[0].1.contains("Do not use in production."));
1566    }
1567
1568    #[test]
1569    fn parse_multi_item_arguments() {
1570        let input = r#"\name{test}
1571\arguments{
1572  \item{x}{the input value}
1573  \item{y}{the output value}
1574  \item{...}{additional arguments}
1575}
1576"#;
1577        let doc = RdDoc::parse(input).unwrap();
1578        assert_eq!(doc.arguments.len(), 3);
1579        assert_eq!(doc.arguments[0].0, "x");
1580        assert_eq!(doc.arguments[0].1, "the input value");
1581        assert_eq!(doc.arguments[1].0, "y");
1582        assert_eq!(doc.arguments[1].1, "the output value");
1583        assert_eq!(doc.arguments[2].0, "...");
1584        assert_eq!(doc.arguments[2].1, "additional arguments");
1585    }
1586
1587    #[test]
1588    fn parse_combined_item_args() {
1589        // R allows \item{x, y}{description} for combined arguments
1590        let input = r#"\name{test}
1591\arguments{
1592  \item{
1593    x,
1594    y
1595  }{
1596    combined arguments
1597  }
1598}
1599"#;
1600        let doc = RdDoc::parse(input).unwrap();
1601        assert_eq!(doc.arguments.len(), 1);
1602        assert_eq!(doc.arguments[0].0, "x, y");
1603        assert!(doc.arguments[0].1.contains("combined arguments"));
1604    }
1605
1606    #[test]
1607    fn parse_eqn_with_text_alt() {
1608        let input = r#"\name{test}
1609\description{
1610  The formula \eqn{\alpha}{alpha} is important.
1611}
1612"#;
1613        let doc = RdDoc::parse(input).unwrap();
1614        let desc = doc.description.as_deref().unwrap();
1615        assert!(desc.contains("alpha"));
1616    }
1617
1618    #[test]
1619    fn parse_href() {
1620        let input = r#"\name{test}
1621\description{
1622  See \href{https://example.org}{the website} for info.
1623}
1624"#;
1625        let doc = RdDoc::parse(input).unwrap();
1626        let desc = doc.description.as_deref().unwrap();
1627        assert!(desc.contains("the website"));
1628    }
1629
1630    #[test]
1631    fn parse_dots_expansion() {
1632        let input = r#"\name{test}
1633\description{
1634  Pass \dots to the function. Also \ldots works.
1635}
1636"#;
1637        let doc = RdDoc::parse(input).unwrap();
1638        let desc = doc.description.as_deref().unwrap();
1639        // \dots and \ldots should both become "..."
1640        let dot_count = desc.matches("...").count();
1641        assert_eq!(dot_count, 2);
1642    }
1643
1644    #[test]
1645    fn format_text_output() {
1646        let input = r#"\name{myFunc}
1647\alias{myFunc}
1648\title{My Function Title}
1649\description{This function does something useful.}
1650\usage{myFunc(x, y = 1)}
1651\arguments{
1652  \item{x}{the input}
1653  \item{y}{optional parameter}
1654}
1655\value{A numeric value.}
1656\examples{
1657myFunc(1, 2)
1658}
1659"#;
1660        let doc = RdDoc::parse(input).unwrap();
1661        let text = doc.format_text();
1662        assert!(text.contains("myFunc"));
1663        assert!(text.contains("My Function Title"));
1664        assert!(text.contains("This function does something useful."));
1665        assert!(text.contains("myFunc(x, y = 1)"));
1666        assert!(text.contains("the input"));
1667        assert!(text.contains("A numeric value."));
1668        assert!(text.contains("myFunc(1, 2)"));
1669    }
1670
1671    #[test]
1672    fn help_index_lookup() {
1673        let mut index = RdHelpIndex::new();
1674
1675        let doc = RdDoc {
1676            name: Some("myFunc".to_string()),
1677            aliases: vec!["myFunc".to_string(), "mf".to_string()],
1678            title: Some("My Function".to_string()),
1679            ..Default::default()
1680        };
1681
1682        let entry = RdIndexEntry {
1683            package: "testPkg".to_string(),
1684            file_path: "/path/to/myFunc.Rd".to_string(),
1685            doc,
1686        };
1687
1688        // Manually register
1689        for alias in &entry.doc.aliases {
1690            index
1691                .entries
1692                .entry(alias.clone())
1693                .or_default()
1694                .push(entry.clone());
1695        }
1696
1697        assert_eq!(index.lookup("myFunc").len(), 1);
1698        assert_eq!(index.lookup("mf").len(), 1);
1699        assert_eq!(index.lookup("nonexistent").len(), 0);
1700
1701        let result = index.lookup("myFunc")[0];
1702        assert_eq!(result.package, "testPkg");
1703        assert_eq!(result.doc.title.as_deref(), Some("My Function"));
1704    }
1705
1706    #[test]
1707    fn help_index_package_filter() {
1708        let mut index = RdHelpIndex::new();
1709
1710        let doc1 = RdDoc {
1711            name: Some("func".to_string()),
1712            aliases: vec!["func".to_string()],
1713            title: Some("From pkg A".to_string()),
1714            ..Default::default()
1715        };
1716
1717        let doc2 = RdDoc {
1718            name: Some("func".to_string()),
1719            aliases: vec!["func".to_string()],
1720            title: Some("From pkg B".to_string()),
1721            ..Default::default()
1722        };
1723
1724        index
1725            .entries
1726            .entry("func".to_string())
1727            .or_default()
1728            .push(RdIndexEntry {
1729                package: "pkgA".to_string(),
1730                file_path: "a.Rd".to_string(),
1731                doc: doc1,
1732            });
1733        index
1734            .entries
1735            .entry("func".to_string())
1736            .or_default()
1737            .push(RdIndexEntry {
1738                package: "pkgB".to_string(),
1739                file_path: "b.Rd".to_string(),
1740                doc: doc2,
1741            });
1742
1743        // Unfiltered: both results
1744        assert_eq!(index.lookup("func").len(), 2);
1745
1746        // Package-filtered
1747        let a = index.lookup_in_package("func", "pkgA").unwrap();
1748        assert_eq!(a.doc.title.as_deref(), Some("From pkg A"));
1749
1750        let b = index.lookup_in_package("func", "pkgB").unwrap();
1751        assert_eq!(b.doc.title.as_deref(), Some("From pkg B"));
1752
1753        assert!(index.lookup_in_package("func", "pkgC").is_none());
1754    }
1755
1756    #[test]
1757    fn parse_real_testit_rd() {
1758        // Test against the real testit.Rd file from the R test suite
1759        let input = r#"% A regression test example of Rd conversion
1760\name{testit}
1761\title{An Rd Regression Test}
1762\alias{\{}
1763\usage{
1764\\x \\y \%\{\}
1765
1766foo(\var{x}, \var{y}, ...)
1767}
1768\arguments{
1769  \item{
1770    x,
1771    y
1772  }{
1773    combined arguments, in multiple Rd lines
1774
1775    paragraph
1776  }
1777  \item{...}{description of \dots: \ldots}
1778}
1779\value{
1780  [NULL]\cr\cr\dots
1781}
1782\examples{
1783\\x
1784\%\{\}
1785
1786\dontrun{stop("doomed to fail")}
1787
1788foo(\var{x},
1789% pure comment lines should be dropped
1790    \var{y})
1791}
1792"#;
1793        let doc = RdDoc::parse(input).unwrap();
1794        assert_eq!(doc.name.as_deref(), Some("testit"));
1795        assert_eq!(doc.title.as_deref(), Some("An Rd Regression Test"));
1796        assert!(doc.aliases.contains(&"{".to_string()));
1797        assert_eq!(doc.arguments.len(), 2);
1798        assert_eq!(doc.arguments[0].0, "x, y");
1799        assert!(doc.arguments[1].1.contains("..."));
1800
1801        // Examples should not contain \dontrun content
1802        let ex = doc.examples.as_deref().unwrap();
1803        assert!(!ex.contains("doomed to fail"));
1804        // Comment lines should be stripped from examples
1805        assert!(!ex.contains("pure comment lines"));
1806    }
1807
1808    #[test]
1809    fn parse_rd_with_author_keyword() {
1810        let input = r#"\name{pkg}
1811\alias{pkg}
1812\title{A Package}
1813\author{Jane Doe}
1814\keyword{package}
1815\keyword{utilities}
1816"#;
1817        let doc = RdDoc::parse(input).unwrap();
1818        assert_eq!(doc.author.as_deref(), Some("Jane Doe"));
1819        assert_eq!(doc.keywords, vec!["package", "utilities"]);
1820    }
1821
1822    #[test]
1823    fn examples_code_extraction() {
1824        let input = r#"\name{test}
1825\examples{
1826x <- 1 + 2
1827stopifnot(x == 3)
1828}
1829"#;
1830        let doc = RdDoc::parse(input).unwrap();
1831        let code = doc.examples_code().unwrap();
1832        assert!(code.contains("x <- 1 + 2"));
1833        assert!(code.contains("stopifnot(x == 3)"));
1834    }
1835
1836    #[test]
1837    fn parse_r_command() {
1838        let input = r#"\name{test}
1839\description{This uses \R for statistics.}
1840"#;
1841        let doc = RdDoc::parse(input).unwrap();
1842        let desc = doc.description.as_deref().unwrap();
1843        assert!(desc.contains("R"));
1844        assert!(desc.contains("statistics"));
1845    }
1846
1847    #[test]
1848    fn parse_sexpr_skipped() {
1849        let input = r#"\name{foo}
1850\alias{foo}
1851\title{Foo Title}
1852\description{
1853  Does nothing.  Here is pi:  \Sexpr{pi}.
1854}
1855"#;
1856        let doc = RdDoc::parse(input).unwrap();
1857        assert_eq!(doc.name.as_deref(), Some("foo"));
1858        assert_eq!(doc.title.as_deref(), Some("Foo Title"));
1859        // \Sexpr content is skipped since we can't evaluate R
1860        let desc = doc.description.as_deref().unwrap();
1861        assert!(desc.contains("Does nothing."));
1862    }
1863
1864    #[test]
1865    fn parse_ver20_rd() {
1866        let input = r#"\name{ver20}
1867\Rdversion{1.1}
1868\title{Johnson & Johnson,  $ _  ### ^ ~}
1869\arguments{
1870    \item{foo}{item 1}
1871    \item{bar}{space, the item 2}
1872    \item{bah}{newline, then item 3}
1873}
1874\description{
1875  This is the description
1876}
1877\examples{
1878\\x
1879}
1880"#;
1881        let doc = RdDoc::parse(input).unwrap();
1882        assert_eq!(doc.name.as_deref(), Some("ver20"));
1883        assert!(doc.title.as_deref().unwrap().contains("Johnson & Johnson"));
1884        assert_eq!(doc.arguments.len(), 3);
1885        assert_eq!(doc.arguments[0].0, "foo");
1886        assert_eq!(doc.arguments[1].0, "bar");
1887        assert_eq!(doc.arguments[2].0, "bah");
1888        assert!(doc.description.is_some());
1889    }
1890
1891    #[test]
1892    fn parse_empty_input() {
1893        let doc = RdDoc::parse("").unwrap();
1894        assert_eq!(doc.name, None);
1895        assert!(doc.aliases.is_empty());
1896    }
1897
1898    #[test]
1899    fn parse_only_comments() {
1900        let doc = RdDoc::parse("% just a comment\n% another one\n").unwrap();
1901        assert_eq!(doc.name, None);
1902    }
1903
1904    #[test]
1905    fn index_from_directory() {
1906        // Test indexing with a synthetic directory structure
1907        let dir = std::env::temp_dir().join(format!("minir-rd-test-{}", std::process::id()));
1908        let _ = std::fs::remove_dir_all(&dir);
1909        std::fs::create_dir_all(&dir).unwrap();
1910        let man_dir = dir.join("man");
1911        std::fs::create_dir(&man_dir).unwrap();
1912
1913        std::fs::write(
1914            man_dir.join("add.Rd"),
1915            r#"\name{add}
1916\alias{add}
1917\alias{plus}
1918\title{Add two numbers}
1919\description{Adds two numbers together.}
1920\usage{add(x, y)}
1921\arguments{
1922  \item{x}{first number}
1923  \item{y}{second number}
1924}
1925\value{The sum of x and y.}
1926\examples{
1927add(1, 2)
1928}
1929"#,
1930        )
1931        .unwrap();
1932
1933        std::fs::write(
1934            man_dir.join("sub.Rd"),
1935            r#"\name{sub}
1936\alias{sub}
1937\alias{minus}
1938\title{Subtract two numbers}
1939\description{Subtracts y from x.}
1940"#,
1941        )
1942        .unwrap();
1943
1944        let mut index = RdHelpIndex::new();
1945        index.index_package_dir("mathPkg", &man_dir);
1946
1947        // Lookup by name
1948        assert_eq!(index.lookup("add").len(), 1);
1949        assert_eq!(index.lookup("sub").len(), 1);
1950
1951        // Lookup by alias
1952        assert_eq!(index.lookup("plus").len(), 1);
1953        assert_eq!(index.lookup("minus").len(), 1);
1954
1955        // Package topics
1956        let topics = index.package_topics("mathPkg");
1957        assert!(topics.contains(&"add"));
1958        assert!(topics.contains(&"sub"));
1959        assert!(topics.contains(&"plus"));
1960        assert!(topics.contains(&"minus"));
1961
1962        // Verify doc content
1963        let add_doc = &index.lookup("add")[0].doc;
1964        assert_eq!(add_doc.title.as_deref(), Some("Add two numbers"));
1965        assert_eq!(add_doc.arguments.len(), 2);
1966        assert!(add_doc.examples.is_some());
1967
1968        // Cleanup
1969        let _ = std::fs::remove_dir_all(&dir);
1970    }
1971
1972    #[test]
1973    fn parse_real_rd_files() {
1974        // Test against the real .Rd files in the repository
1975        let test_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests");
1976
1977        // tests/Pkgs/pkgD/man/f.Rd
1978        let f_rd = test_dir.join("Pkgs/pkgD/man/f.Rd");
1979        if f_rd.exists() {
1980            let doc = RdDoc::parse_file(&f_rd).unwrap();
1981            assert_eq!(doc.name.as_deref(), Some("f"));
1982            assert_eq!(doc.aliases, vec!["f"]);
1983            assert_eq!(doc.title.as_deref(), Some("Function f -- a Test"));
1984            assert!(doc.description.is_some());
1985            assert!(doc.usage.is_some());
1986            assert_eq!(doc.arguments.len(), 1);
1987            assert_eq!(doc.arguments[0].0, "a");
1988            assert!(doc.examples.is_some());
1989        }
1990
1991        // tests/Pkgs/pkgC/man/PkgC-package.Rd
1992        let pkgc_rd = test_dir.join("Pkgs/pkgC/man/PkgC-package.Rd");
1993        if pkgc_rd.exists() {
1994            let doc = RdDoc::parse_file(&pkgc_rd).unwrap();
1995            assert_eq!(doc.name.as_deref(), Some("PkgC-package"));
1996            assert!(doc.aliases.contains(&"PkgC-package".to_string()));
1997            assert!(doc.aliases.contains(&"PkgC".to_string()));
1998            assert_eq!(doc.doc_type.as_deref(), Some("package"));
1999        }
2000
2001        // tests/ver20.Rd
2002        let ver20_rd = test_dir.join("ver20.Rd");
2003        if ver20_rd.exists() {
2004            let doc = RdDoc::parse_file(&ver20_rd).unwrap();
2005            assert_eq!(doc.name.as_deref(), Some("ver20"));
2006            assert_eq!(doc.arguments.len(), 3);
2007        }
2008
2009        // tests/testit.Rd
2010        let testit_rd = test_dir.join("testit.Rd");
2011        if testit_rd.exists() {
2012            let doc = RdDoc::parse_file(&testit_rd).unwrap();
2013            assert_eq!(doc.name.as_deref(), Some("testit"));
2014            assert!(doc.title.is_some());
2015            assert!(doc.arguments.len() >= 2);
2016        }
2017    }
2018}
r/interpreter/packages/rd.rs

r/interpreter/packages/
rd.rs