Skip to main content

miniextendr_api/
registry.rs

1//! Automatic registration for miniextendr.
2//!
3//! Every `#[miniextendr]` item self-registers at link time. `package_init()`
4//! (generated by `miniextendr_init!`) calls [`miniextendr_register_routines`](crate::registry::miniextendr_register_routines)
5//! during `R_init_*` to finalize registration with R. Users never interact
6//! with this module.
7
8use crate::abi::{mx_erased, mx_tag};
9use crate::ffi::{DllInfo, R_CallMethodDef};
10use linkme::distributed_slice;
11use std::os::raw::c_void;
12
13// region: Distributed Slices
14
15/// R `.Call` method registrations (function + method C wrappers).
16///
17/// Each `#[miniextendr]` function or method emits an entry here.
18#[distributed_slice]
19pub static MX_CALL_DEFS: [R_CallMethodDef];
20
21/// R wrapper code fragments with priority for ordering.
22///
23/// Each `#[miniextendr]` function, impl block, or trait impl emits an entry.
24/// Priorities ensure correct evaluation order when R sources the wrapper file
25/// (sidecar helpers must be defined before class definitions that reference them).
26#[distributed_slice]
27pub static MX_R_WRAPPERS: [RWrapperEntry];
28
29/// ALTREP class registration functions, called once at package init.
30///
31/// Each ALTREP struct or trait impl emits an entry.
32#[distributed_slice]
33pub static MX_ALTREP_REGISTRATIONS: [fn()];
34
35/// Trait dispatch entries for [`universal_query`].
36///
37/// Each `#[miniextendr] impl Trait for Type` emits an entry mapping
38/// `(concrete_tag, trait_tag)` to the trait's vtable pointer.
39#[distributed_slice]
40pub static MX_TRAIT_DISPATCH: [TraitDispatchEntry];
41// endregion
42
43// region: Entry Types
44
45/// Ordering priority for R wrapper code fragments.
46///
47/// Variant declaration order = output order. The order matters because
48/// R evaluates the wrapper file top-to-bottom, so dependencies must come first:
49/// sidecar accessors before class definitions, classes before functions, etc.
50#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
51pub enum RWrapperPriority {
52    /// `#[r_data]` getters/setters — must come before class definitions.
53    Sidecar,
54    /// Class definitions (impl blocks: env/R6/S3/S4/S7).
55    Class,
56    /// Standalone `#[miniextendr]` functions.
57    Function,
58    /// Trait impl wrappers (`impl Trait for Type`).
59    TraitImpl,
60    /// Vctrs S3 method wrappers (`#[derive(Vctrs)]`).
61    Vctrs,
62}
63
64/// R wrapper code with priority for ordering.
65pub struct RWrapperEntry {
66    /// Ordering priority (lower = earlier in output file).
67    pub priority: RWrapperPriority,
68    /// R source code fragment.
69    pub content: &'static str,
70    /// Source file path (from `file!()`). Used to derive a default `@rdname`
71    /// for standalone functions that don't have an explicit one, so that all
72    /// functions from the same source file share a single .Rd page.
73    pub source_file: &'static str,
74}
75
76// SAFETY: All fields are immutable and valid for 'static lifetime.
77unsafe impl Sync for RWrapperEntry {}
78
79/// Trait dispatch entry mapping (concrete_tag, trait_tag) → vtable.
80#[repr(C)]
81pub struct TraitDispatchEntry {
82    /// Tag identifying the concrete type.
83    pub concrete_tag: mx_tag,
84    /// Tag identifying the trait interface.
85    pub trait_tag: mx_tag,
86    /// Pointer to the trait's vtable (cast from `&'static SomeVTable`).
87    pub vtable: *const c_void,
88}
89
90// SAFETY: vtable points to a static vtable valid for program lifetime.
91// Tags are Copy values. All fields are safe to read from any thread.
92unsafe impl Sync for TraitDispatchEntry {}
93unsafe impl Send for TraitDispatchEntry {}
94// endregion
95
96// region: Universal Query
97
98/// Universal query function for trait dispatch.
99///
100/// Scans [`MX_TRAIT_DISPATCH`] for a matching `(concrete_tag, trait_tag)` pair.
101/// Returns the vtable pointer, or null if the trait is not implemented.
102///
103/// This replaces per-type query functions — a single function handles all types
104/// by reading from the global dispatch table.
105///
106/// # Safety
107///
108/// - `ptr` must point to a valid `mx_erased` with a valid base vtable.
109/// - Must be called on R's main thread.
110pub unsafe extern "C" fn universal_query(ptr: *mut mx_erased, trait_tag: mx_tag) -> *const c_void {
111    let concrete_tag = unsafe { (*(*ptr).base).concrete_tag };
112    for entry in MX_TRAIT_DISPATCH.iter() {
113        if entry.concrete_tag == concrete_tag && entry.trait_tag == trait_tag {
114            return entry.vtable;
115        }
116    }
117    std::ptr::null()
118}
119// endregion
120
121// region: Initialization
122
123/// Register all `#[miniextendr]` routines and ALTREP classes with R.
124///
125/// Called from `package_init()` during `R_init_*` (via `miniextendr_init!`).
126/// Everything else is automatic.
127///
128/// # Safety
129///
130/// Must be called from R's main thread during `R_init_*`.
131/// `dll` must be a valid pointer provided by R.
132#[unsafe(no_mangle)]
133pub unsafe extern "C" fn miniextendr_register_routines(dll: *mut DllInfo) {
134    // 1. Register ALTREP classes (skip during cdylib wrapper generation)
135    //
136    // During wrapper-gen, the cdylib is loaded temporarily via dyn.load() then
137    // unloaded via dyn.unload(). ALTREP class registration creates R-global entries
138    // with method pointers into the cdylib code. After dyn.unload(), those pointers
139    // become dangling. When the staticlib later re-registers, R may still have the
140    // stale entries, leading to heap corruption (e.g., "malloc(): unsorted double
141    // linked list corrupted" on Linux).
142    let wrapper_gen = std::env::var_os("MINIEXTENDR_CDYLIB_WRAPPERS").is_some();
143    if !wrapper_gen {
144        // User-defined ALTREP classes (from #[miniextendr] structs)
145        for reg_fn in MX_ALTREP_REGISTRATIONS.iter() {
146            reg_fn();
147        }
148        // Built-in ALTREP classes (Vec, Box, Range, Arrow) — must be
149        // registered eagerly so readRDS can find them in fresh sessions.
150        crate::altrep_impl::register_builtin_altrep_classes();
151        #[cfg(feature = "arrow")]
152        crate::altrep_impl::register_arrow_altrep_classes();
153
154        // Verify no two ALTREP types registered the same class name.
155        // Duplicates cause silent overwrites in R — the wrong type gets
156        // reconstructed on readRDS, leading to memory corruption.
157        crate::altrep::assert_altrep_class_uniqueness();
158    }
159
160    // 2. Build call method defs with null sentinel
161    let mut call_defs: Vec<R_CallMethodDef> = MX_CALL_DEFS.iter().copied().collect();
162    // Always register miniextendr_write_wrappers so it's visible via
163    // getNativeSymbolInfo even when R_forceSymbols(TRUE) is set.
164    // SAFETY: DL_FUNC is Option<extern "C-unwind" fn() -> *mut c_void> — R's
165    // standard erased function pointer. The actual signature (SEXP -> SEXP) is
166    // ABI-compatible; R dispatches based on numArgs.
167    call_defs.push(R_CallMethodDef {
168        name: c"miniextendr_write_wrappers".as_ptr(),
169        fun: unsafe {
170            std::mem::transmute::<*const (), Option<unsafe extern "C-unwind" fn() -> *mut c_void>>(
171                miniextendr_write_wrappers as *const (),
172            )
173        },
174        numArgs: 1,
175    });
176    call_defs.push(R_CallMethodDef {
177        name: std::ptr::null(),
178        fun: None,
179        numArgs: 0,
180    });
181
182    // 3. Register routines
183    // Leak the Vec — init runs once at package load, so this is fine.
184    unsafe {
185        crate::ffi::R_registerRoutines_unchecked(
186            dll,
187            std::ptr::null(),
188            call_defs.leak().as_ptr(),
189            std::ptr::null(),
190            std::ptr::null(),
191        );
192    }
193}
194
195/// Collect all R wrapper entries, sorted by priority and deduplicated.
196///
197/// Within each priority group, S7 class definitions are topologically sorted
198/// so parents are defined before children (S7 `parent = X` requires X to exist).
199pub fn collect_r_wrappers() -> Vec<std::borrow::Cow<'static, str>> {
200    let mut entries: Vec<&RWrapperEntry> = MX_R_WRAPPERS.iter().collect();
201    entries.sort_by_key(|e| e.priority);
202
203    let mut seen = std::collections::HashSet::<&str>::new();
204    let mut result: Vec<std::borrow::Cow<'static, str>> = Vec::with_capacity(entries.len());
205    for entry in entries {
206        let trimmed = entry.content.trim();
207        if !trimmed.is_empty() && seen.insert(trimmed) {
208            // For standalone functions without explicit @rdname, inject one
209            // derived from the source file stem so same-file functions share
210            // a single .Rd page.
211            if entry.priority == RWrapperPriority::Function
212                && !has_rdname_tag(trimmed)
213                && !has_no_rd_tag(trimmed)
214            {
215                if let Some(rdname) = rdname_from_source_file(entry.source_file) {
216                    result.push(std::borrow::Cow::Owned(inject_rdname(trimmed, &rdname)));
217                    continue;
218                }
219            }
220            result.push(std::borrow::Cow::Borrowed(trimmed));
221        }
222    }
223
224    // Topological sort for S7 inheritance ordering
225    sort_s7_classes(&mut result);
226
227    result
228}
229
230/// Check if an R wrapper fragment already has an `@rdname` tag.
231fn has_rdname_tag(content: &str) -> bool {
232    content.lines().any(|line| {
233        let trimmed = line.trim();
234        trimmed.starts_with("#' @rdname ")
235    })
236}
237
238/// Check if an R wrapper fragment has `@noRd`.
239fn has_no_rd_tag(content: &str) -> bool {
240    content.lines().any(|line| {
241        let trimmed = line.trim();
242        trimmed == "#' @noRd"
243    })
244}
245
246/// Derive an `@rdname` value from a source file path.
247///
248/// `"src/rust/zero_copy_tests.rs"` → `"zero_copy_tests"`
249/// `"lib.rs"` → `"lib"`
250fn rdname_from_source_file(path: &str) -> Option<String> {
251    let file_name = path.rsplit(['/', '\\']).next()?;
252    let stem = file_name.strip_suffix(".rs").unwrap_or(file_name);
253    if stem.is_empty() || stem == "lib" || stem == "mod" {
254        return None;
255    }
256    Some(stem.to_string())
257}
258
259/// Inject `#' @rdname <value>` (and `@title` if missing) into an R wrapper
260/// fragment. Inserts before the first `@export`/`@keywords`/`@source` line,
261/// or after the last roxygen line.
262fn inject_rdname(content: &str, rdname: &str) -> String {
263    let rdname_line = format!("#' @rdname {rdname}");
264    let has_title = content.lines().any(|l| l.trim().starts_with("#' @title "));
265    // Functions with no doc comments need a title so the @rdname page has an anchor
266    let title_line = if has_title {
267        None
268    } else {
269        Some(format!("#' @title {}", rdname.replace('_', " ")))
270    };
271
272    let lines: Vec<&str> = content.lines().collect();
273    let mut result = Vec::with_capacity(lines.len() + 2);
274    let mut inserted = false;
275
276    for line in &lines {
277        let trimmed = line.trim();
278        // Insert before @export, @keywords, or @source lines
279        if !inserted
280            && (trimmed.starts_with("#' @export")
281                || trimmed.starts_with("#' @keywords")
282                || trimmed.starts_with("#' @source"))
283        {
284            if let Some(ref t) = title_line {
285                result.push(t.as_str());
286            }
287            result.push(rdname_line.as_str());
288            inserted = true;
289        }
290        result.push(line);
291    }
292
293    // If we never found a good insertion point, insert before the function def
294    if !inserted {
295        let last_roxy = lines
296            .iter()
297            .rposition(|l| l.trim().starts_with("#'"))
298            .unwrap_or(0);
299        let insert_at = last_roxy + 1;
300        if let Some(ref t) = title_line {
301            result.insert(insert_at, t.as_str());
302            result.insert(insert_at + 1, rdname_line.as_str());
303        } else {
304            result.insert(insert_at, rdname_line.as_str());
305        }
306    }
307
308    result.join("\n")
309}
310
311/// Sort S7 class definitions so parents come before children.
312///
313/// Detects `S7::new_class()` calls, extracts `parent = ClassName` relationships,
314/// and performs topological sort. Non-S7 entries keep their relative order.
315fn sort_s7_classes(entries: &mut [std::borrow::Cow<'static, str>]) {
316    use std::collections::HashMap;
317
318    // Parse S7 class definitions: find (index, name, parent)
319    let mut s7_info: Vec<(usize, String, Option<String>)> = Vec::new();
320
321    for (i, entry) in entries.iter().enumerate() {
322        if let Some(nc_pos) = entry.find("S7::new_class(") {
323            // Extract class name: "NAME <- S7::new_class("
324            let before = entry[..nc_pos].trim_end();
325            let name = before
326                .strip_suffix("<-")
327                .or_else(|| before.rsplit_once("<-").map(|(_, r)| r))
328                .map(|s| s.trim())
329                .and_then(|s| s.split_whitespace().last());
330
331            let Some(name) = name else { continue };
332
333            // Extract parent: "parent = ParentName,"
334            let after = &entry[nc_pos..];
335            let parent = after.find("parent = ").and_then(|p| {
336                let rest = &after[p + "parent = ".len()..];
337                let end = rest.find([',', ')', '\n']).unwrap_or(rest.len());
338                let p = rest[..end].trim();
339                if p.is_empty() {
340                    None
341                } else {
342                    Some(p.to_string())
343                }
344            });
345
346            s7_info.push((i, name.to_string(), parent));
347        }
348    }
349
350    if s7_info.len() <= 1 {
351        return;
352    }
353
354    // Build name → position-in-s7_info map
355    let name_to_pos: HashMap<&str, usize> = s7_info
356        .iter()
357        .enumerate()
358        .map(|(pos, (_, name, _))| (name.as_str(), pos))
359        .collect();
360
361    // Topological sort: repeatedly emit classes whose parent is already placed
362    let n = s7_info.len();
363    let mut order: Vec<usize> = Vec::with_capacity(n);
364    let mut placed = vec![false; n];
365
366    for _ in 0..n {
367        for (pos, (_, _, parent)) in s7_info.iter().enumerate() {
368            if placed[pos] {
369                continue;
370            }
371            let ready = match parent {
372                None => true,
373                Some(pname) => match name_to_pos.get(pname.as_str()) {
374                    None => true, // external parent
375                    Some(&pp) => placed[pp],
376                },
377            };
378            if ready {
379                order.push(pos);
380                placed[pos] = true;
381            }
382        }
383    }
384
385    // Fallback: add any remaining (cycles) in original order
386    for (pos, &is_placed) in placed.iter().enumerate().take(n) {
387        if !is_placed {
388            order.push(pos);
389        }
390    }
391
392    // Apply: place sorted S7 entries back at their original indices
393    let s7_indices: Vec<usize> = s7_info.iter().map(|(i, _, _)| *i).collect();
394    let original: Vec<std::borrow::Cow<'static, str>> =
395        s7_indices.iter().map(|&i| entries[i].clone()).collect();
396
397    for (slot, &src) in order.iter().enumerate() {
398        entries[s7_indices[slot]] = original[src].clone();
399    }
400}
401// endregion
402
403// region: R Wrapper File Generation
404
405/// Write all R wrapper entries to a file.
406///
407/// Called from [`miniextendr_write_wrappers`] (via cdylib `dyn.load`/`.Call`).
408/// All distributed_slice entries from `#[miniextendr]` items are available
409/// because the cdylib includes all symbols by design.
410pub fn write_r_wrappers_to_file(path: &str) {
411    // Build the new content in memory
412    let mut content = String::from(
413        "# ---- AUTO-GENERATED FILE - DO NOT EDIT ----
414# This file is generated by the miniextendr proc-macro during package build.
415# Any manual changes will be overwritten.
416#
417# To regenerate: rebuild the package (R CMD INSTALL or devtools::install).
418# nolint start
419# nocov start
420
421",
422    );
423
424    for fragment in collect_r_wrappers() {
425        content.push_str(fragment.as_ref());
426        content.push_str("\n\n");
427    }
428
429    content.push_str("# nocov end\n# nolint end\n");
430
431    // Only write if content changed (avoids unnecessary NAMESPACE/man regeneration)
432    let existing = std::fs::read_to_string(path).unwrap_or_default();
433    if existing == content {
434        return;
435    }
436
437    std::fs::write(path, content.as_bytes())
438        .unwrap_or_else(|e| panic!("failed to write {path}: {e}"));
439
440    if !existing.is_empty() {
441        let filename = std::path::Path::new(path)
442            .file_name()
443            .and_then(|f| f.to_str())
444            .unwrap_or("wrappers.R");
445        eprintln!();
446        eprintln!("NOTE: {filename} changed — run devtools::document() to update NAMESPACE.");
447        eprintln!();
448    }
449}
450// endregion
451
452// region: C-Callable Entry Points (cdylib)
453
454/// C-callable entry point for R wrapper generation via cdylib.
455///
456/// Called from Makevars via Rscript: loads the cdylib with `dyn.load()`,
457/// then `.Call("miniextendr_write_wrappers", path)` to write
458/// `R/miniextendr-wrappers.R`. NAMESPACE generation is left to roxygen2
459/// (`devtools::document()`).
460///
461/// # Safety
462///
463/// `path_sexp` must be a valid STRSXP of length >= 1.
464#[unsafe(no_mangle)]
465pub unsafe extern "C" fn miniextendr_write_wrappers(
466    path_sexp: crate::ffi::SEXP,
467) -> crate::ffi::SEXP {
468    unsafe {
469        use crate::ffi::{SEXP, SexpExt};
470
471        let char_sexp = path_sexp.string_elt_unchecked(0);
472        let c_str = std::ffi::CStr::from_ptr(char_sexp.r_char_unchecked());
473        let path = c_str
474            .to_str()
475            .unwrap_or_else(|e| panic!("invalid UTF-8 in path: {e}"));
476
477        write_r_wrappers_to_file(path);
478
479        SEXP::nil()
480    }
481}
482// endregion