Skip to main content

r/interpreter/packages/
description.rs

1//! Parser for R package DESCRIPTION files (Debian Control File format).
2//!
3//! The DCF format is simple:
4//! - Lines of the form `Field: Value` introduce a new field
5//! - Continuation lines start with whitespace and append to the current field
6//! - Blank lines separate stanzas (DESCRIPTION files have exactly one stanza)
7//! - `#` comments are NOT supported in DCF (unlike NAMESPACE)
8//!
9//! Dependency fields (`Depends`, `Imports`, `Suggests`, `LinkingTo`) contain
10//! comma-separated package names with optional version constraints like
11//! `Matrix (>= 1.2-0)`.
12
13use std::collections::HashMap;
14
15/// A parsed R package DESCRIPTION file.
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct PackageDescription {
18    /// The package name (`Package` field). Always present in valid DESCRIPTION files.
19    pub package: String,
20    /// The package version (`Version` field).
21    pub version: String,
22    /// The package title (`Title` field).
23    pub title: Option<String>,
24    /// Packages listed in `Depends` (attached when this package loads).
25    pub depends: Vec<Dependency>,
26    /// Packages listed in `Imports` (loaded but not attached).
27    pub imports: Vec<Dependency>,
28    /// Packages listed in `Suggests` (optional, for tests/examples).
29    pub suggests: Vec<Dependency>,
30    /// Packages listed in `LinkingTo` (C/C++ headers at compile time).
31    pub linking_to: Vec<Dependency>,
32    /// All raw fields from the DCF file, for access to fields we don't
33    /// explicitly model (e.g. `License`, `Author`, `Description`).
34    pub fields: HashMap<String, String>,
35}
36
37/// A single dependency entry, e.g. `Matrix (>= 1.2-0)`.
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub struct Dependency {
40    /// Package name.
41    pub package: String,
42    /// Optional version constraint, e.g. `>= 1.2-0`.
43    pub version_constraint: Option<String>,
44}
45
46/// Errors that can occur when parsing a DESCRIPTION file.
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub enum DescriptionError {
49    /// The `Package` field is missing.
50    MissingPackage,
51    /// The `Version` field is missing.
52    MissingVersion,
53    /// A line could not be parsed (not a field and not a continuation).
54    MalformedLine { line_number: usize, line: String },
55}
56
57impl std::fmt::Display for DescriptionError {
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        match self {
60            DescriptionError::MissingPackage => {
61                write!(f, "DESCRIPTION is missing required 'Package' field")
62            }
63            DescriptionError::MissingVersion => {
64                write!(f, "DESCRIPTION is missing required 'Version' field")
65            }
66            DescriptionError::MalformedLine { line_number, line } => {
67                write!(
68                    f,
69                    "DESCRIPTION line {line_number}: malformed line (not a field or continuation): {line:?}"
70                )
71            }
72        }
73    }
74}
75
76impl std::error::Error for DescriptionError {}
77
78impl PackageDescription {
79    /// Parse a DESCRIPTION file from its text content.
80    pub fn parse(input: &str) -> Result<Self, DescriptionError> {
81        let fields = parse_dcf(input)?;
82
83        let package = fields
84            .get("Package")
85            .cloned()
86            .ok_or(DescriptionError::MissingPackage)?;
87        let version = fields
88            .get("Version")
89            .cloned()
90            .ok_or(DescriptionError::MissingVersion)?;
91        let title = fields.get("Title").cloned();
92
93        let depends = fields
94            .get("Depends")
95            .map(|s| parse_dependency_list(s))
96            .unwrap_or_default();
97        let imports = fields
98            .get("Imports")
99            .map(|s| parse_dependency_list(s))
100            .unwrap_or_default();
101        let suggests = fields
102            .get("Suggests")
103            .map(|s| parse_dependency_list(s))
104            .unwrap_or_default();
105        let linking_to = fields
106            .get("LinkingTo")
107            .map(|s| parse_dependency_list(s))
108            .unwrap_or_default();
109
110        Ok(PackageDescription {
111            package,
112            version,
113            title,
114            depends,
115            imports,
116            suggests,
117            linking_to,
118            fields,
119        })
120    }
121}
122
123/// Parse DCF (Debian Control File) format into a field map.
124///
125/// Returns fields in their original casing. Continuation lines are joined
126/// with a single space (leading whitespace on continuation lines is stripped,
127/// but the continuation itself is space-separated from the previous line).
128fn parse_dcf(input: &str) -> Result<HashMap<String, String>, DescriptionError> {
129    let mut fields = HashMap::new();
130    let mut current_field: Option<String> = None;
131    let mut current_value = String::new();
132
133    for (line_number, line) in input.lines().enumerate() {
134        let line_number = line_number + 1; // 1-indexed for error messages
135
136        // Blank lines end the current stanza. DESCRIPTION has one stanza,
137        // so we stop collecting after the first blank line.
138        if line.trim().is_empty() {
139            if let Some(field) = current_field.take() {
140                fields.insert(field, current_value.trim().to_string());
141                current_value.clear();
142            }
143            continue;
144        }
145
146        // Continuation line: starts with whitespace
147        if line.starts_with(' ') || line.starts_with('\t') {
148            if current_field.is_some() {
149                // Preserve the continuation with a newline for multiline fields
150                // like Description, but for dependency fields the newlines
151                // will be handled by the dependency parser.
152                current_value.push('\n');
153                current_value.push_str(line.trim());
154            } else {
155                return Err(DescriptionError::MalformedLine {
156                    line_number,
157                    line: line.to_string(),
158                });
159            }
160            continue;
161        }
162
163        // New field: `Field: Value`
164        if let Some(colon_pos) = line.find(':') {
165            let field_name = line[..colon_pos].trim();
166            // Field names must not contain whitespace
167            if field_name.contains(' ') || field_name.contains('\t') {
168                return Err(DescriptionError::MalformedLine {
169                    line_number,
170                    line: line.to_string(),
171                });
172            }
173
174            // Save the previous field
175            if let Some(prev_field) = current_field.take() {
176                fields.insert(prev_field, current_value.trim().to_string());
177                current_value.clear();
178            }
179
180            current_field = Some(field_name.to_string());
181            let value_part = &line[colon_pos + 1..];
182            current_value.push_str(value_part.trim());
183            continue;
184        }
185
186        // Line is not blank, not a continuation, not a field — error
187        return Err(DescriptionError::MalformedLine {
188            line_number,
189            line: line.to_string(),
190        });
191    }
192
193    // Don't forget the last field
194    if let Some(field) = current_field.take() {
195        fields.insert(field, current_value.trim().to_string());
196    }
197
198    Ok(fields)
199}
200
201/// Parse a comma-separated dependency list like `R (>= 3.5.0), dplyr, Matrix (>= 1.2-0)`.
202fn parse_dependency_list(input: &str) -> Vec<Dependency> {
203    // Dependencies are comma-separated, possibly spanning multiple lines
204    // (already joined by parse_dcf with newlines, which we treat as whitespace).
205    input
206        .split(',')
207        .filter_map(|entry| {
208            let entry = entry.replace(['\n', '\r'], " ");
209            let entry = entry.trim().to_string();
210            if entry.is_empty() {
211                return None;
212            }
213            Some(parse_single_dependency(&entry))
214        })
215        .collect()
216}
217
218/// Parse a single dependency entry like `Matrix (>= 1.2-0)` or just `dplyr`.
219fn parse_single_dependency(entry: &str) -> Dependency {
220    let entry = entry.trim();
221    if let Some(paren_start) = entry.find('(') {
222        let package = entry[..paren_start].trim().to_string();
223        let constraint = if let Some(paren_end) = entry.find(')') {
224            entry[paren_start + 1..paren_end].trim().to_string()
225        } else {
226            // Unclosed paren — take everything after '(' as the constraint
227            entry[paren_start + 1..].trim().to_string()
228        };
229        Dependency {
230            package,
231            version_constraint: if constraint.is_empty() {
232                None
233            } else {
234                Some(constraint)
235            },
236        }
237    } else {
238        Dependency {
239            package: entry.to_string(),
240            version_constraint: None,
241        }
242    }
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248
249    #[test]
250    fn parse_simple_description() {
251        let input = "\
252Package: myPkg
253Version: 1.0.0
254Title: A Test Package
255Depends: R (>= 3.5.0), methods
256Imports: dplyr, Matrix (>= 1.2-0)
257License: MIT
258";
259        let desc = PackageDescription::parse(input).unwrap();
260        assert_eq!(desc.package, "myPkg");
261        assert_eq!(desc.version, "1.0.0");
262        assert_eq!(desc.title.as_deref(), Some("A Test Package"));
263        assert_eq!(desc.depends.len(), 2);
264        assert_eq!(desc.depends[0].package, "R");
265        assert_eq!(
266            desc.depends[0].version_constraint.as_deref(),
267            Some(">= 3.5.0")
268        );
269        assert_eq!(desc.depends[1].package, "methods");
270        assert_eq!(desc.depends[1].version_constraint, None);
271        assert_eq!(desc.imports.len(), 2);
272        assert_eq!(desc.imports[0].package, "dplyr");
273        assert_eq!(desc.imports[1].package, "Matrix");
274        assert_eq!(
275            desc.imports[1].version_constraint.as_deref(),
276            Some(">= 1.2-0")
277        );
278        assert!(desc.suggests.is_empty());
279        assert!(desc.linking_to.is_empty());
280    }
281
282    #[test]
283    fn parse_continuation_lines() {
284        let input = "\
285Package: bigPkg
286Version: 2.3.1
287Title: A Package with
288    a Multi-Line Title
289Description: This is a long description
290    that spans multiple lines and explains
291    what the package does.
292Depends: R (>= 4.0.0),
293    rlang (>= 0.4.0),
294    vctrs
295Imports: lifecycle,
296    pillar (>= 1.5.0)
297";
298        let desc = PackageDescription::parse(input).unwrap();
299        assert_eq!(desc.package, "bigPkg");
300        assert_eq!(desc.version, "2.3.1");
301        // Title continuation is joined
302        assert!(desc.title.as_deref().unwrap().contains("Multi-Line Title"));
303        assert_eq!(desc.depends.len(), 3);
304        assert_eq!(desc.depends[0].package, "R");
305        assert_eq!(desc.depends[1].package, "rlang");
306        assert_eq!(
307            desc.depends[1].version_constraint.as_deref(),
308            Some(">= 0.4.0")
309        );
310        assert_eq!(desc.depends[2].package, "vctrs");
311        assert_eq!(desc.imports.len(), 2);
312        assert_eq!(desc.imports[0].package, "lifecycle");
313        assert_eq!(desc.imports[1].package, "pillar");
314        assert_eq!(
315            desc.imports[1].version_constraint.as_deref(),
316            Some(">= 1.5.0")
317        );
318    }
319
320    #[test]
321    fn parse_missing_package_field() {
322        let input = "\
323Version: 1.0
324Title: No package name
325";
326        let err = PackageDescription::parse(input).unwrap_err();
327        assert_eq!(err, DescriptionError::MissingPackage);
328    }
329
330    #[test]
331    fn parse_missing_version_field() {
332        let input = "\
333Package: oops
334Title: No version
335";
336        let err = PackageDescription::parse(input).unwrap_err();
337        assert_eq!(err, DescriptionError::MissingVersion);
338    }
339
340    #[test]
341    fn parse_pkgb_description() {
342        // Real DESCRIPTION from tests/Pkgs/pkgB
343        let input = "\
344Package: pkgB
345Title: Simple Package with NameSpace and S4 Methods and Classes
346Type: Package
347Imports: methods, graphics, pkgA
348Version: 1.0
349Date: 2019-01-21
350Author: Yohan Chalabi and R-core
351Maintainer: R Core <R-core@almost.r-project.org>
352Description: Example package with a namespace and imports of S4, but empty R/ ....
353 used for regression testing the correct working of tools::codoc(), undoc()
354 etc, but also S4 in connection with other packages.
355License: GPL (>= 2)
356";
357        let desc = PackageDescription::parse(input).unwrap();
358        assert_eq!(desc.package, "pkgB");
359        assert_eq!(desc.version, "1.0");
360        assert_eq!(desc.imports.len(), 3);
361        assert_eq!(desc.imports[0].package, "methods");
362        assert_eq!(desc.imports[1].package, "graphics");
363        assert_eq!(desc.imports[2].package, "pkgA");
364        assert!(desc.depends.is_empty());
365    }
366
367    #[test]
368    fn parse_pkgd_description() {
369        // Real DESCRIPTION from tests/Pkgs/pkgD — has versioned Depends and Imports
370        let input = "\
371Package: pkgD
372Title: Simple Package with NameSpace and S4 Methods and Classes
373Version: 1.2.0
374Date: 2015-10-10
375Type: Package
376Depends: R (>= 2.14.0), R (>= r56550), methods
377Imports: Matrix (>= 1.2-0), Matrix (<= 99.9-9)
378LazyData: true
379Author: Yohan Chalabi and R-core
380Maintainer: R Core <R-core@almost.r-project.org>
381Description: Example package with a namespace, and S4 method for \"plot\".
382 used for regression testing the correct working of tools::codoc(), undoc()
383 etc, but also S4 in connection with other packages.
384License: GPL (>= 2)
385";
386        let desc = PackageDescription::parse(input).unwrap();
387        assert_eq!(desc.package, "pkgD");
388        assert_eq!(desc.version, "1.2.0");
389        assert_eq!(desc.depends.len(), 3);
390        assert_eq!(desc.depends[0].package, "R");
391        assert_eq!(
392            desc.depends[0].version_constraint.as_deref(),
393            Some(">= 2.14.0")
394        );
395        assert_eq!(desc.depends[1].package, "R");
396        assert_eq!(
397            desc.depends[1].version_constraint.as_deref(),
398            Some(">= r56550")
399        );
400        assert_eq!(desc.depends[2].package, "methods");
401        assert_eq!(desc.imports.len(), 2);
402        assert_eq!(desc.imports[0].package, "Matrix");
403        assert_eq!(
404            desc.imports[0].version_constraint.as_deref(),
405            Some(">= 1.2-0")
406        );
407        assert_eq!(desc.imports[1].package, "Matrix");
408        assert_eq!(
409            desc.imports[1].version_constraint.as_deref(),
410            Some("<= 99.9-9")
411        );
412    }
413
414    #[test]
415    fn parse_suggests_and_linking_to() {
416        let input = "\
417Package: testPkg
418Version: 0.1.0
419Suggests: testthat (>= 3.0.0), knitr
420LinkingTo: Rcpp, RcppArmadillo (>= 0.9)
421";
422        let desc = PackageDescription::parse(input).unwrap();
423        assert_eq!(desc.suggests.len(), 2);
424        assert_eq!(desc.suggests[0].package, "testthat");
425        assert_eq!(
426            desc.suggests[0].version_constraint.as_deref(),
427            Some(">= 3.0.0")
428        );
429        assert_eq!(desc.suggests[1].package, "knitr");
430        assert_eq!(desc.linking_to.len(), 2);
431        assert_eq!(desc.linking_to[0].package, "Rcpp");
432        assert_eq!(desc.linking_to[1].package, "RcppArmadillo");
433        assert_eq!(
434            desc.linking_to[1].version_constraint.as_deref(),
435            Some(">= 0.9")
436        );
437    }
438
439    #[test]
440    fn raw_fields_accessible() {
441        let input = "\
442Package: myPkg
443Version: 1.0
444License: MIT
445NeedsCompilation: no
446";
447        let desc = PackageDescription::parse(input).unwrap();
448        assert_eq!(desc.fields.get("License").unwrap(), "MIT");
449        assert_eq!(desc.fields.get("NeedsCompilation").unwrap(), "no");
450    }
451
452    #[test]
453    fn empty_dependency_fields() {
454        let input = "\
455Package: minimal
456Version: 0.0.1
457";
458        let desc = PackageDescription::parse(input).unwrap();
459        assert!(desc.depends.is_empty());
460        assert!(desc.imports.is_empty());
461        assert!(desc.suggests.is_empty());
462        assert!(desc.linking_to.is_empty());
463    }
464
465    #[test]
466    fn trailing_comma_in_deps() {
467        // Some packages have trailing commas
468        let input = "\
469Package: messy
470Version: 1.0
471Imports: dplyr, tidyr,
472";
473        let desc = PackageDescription::parse(input).unwrap();
474        assert_eq!(desc.imports.len(), 2);
475        assert_eq!(desc.imports[0].package, "dplyr");
476        assert_eq!(desc.imports[1].package, "tidyr");
477    }
478}