miniextendr_api/registry.rs
1//! Automatic registration for miniextendr.
2//!
3//! Every `#[miniextendr]` item self-registers at link time. `package_init()`
4//! (generated by `miniextendr_init!`) calls [`miniextendr_register_routines`](crate::registry::miniextendr_register_routines)
5//! during `R_init_*` to finalize registration with R. Users never interact
6//! with this module.
7
8use crate::abi::{mx_erased, mx_tag};
9use crate::ffi::{DllInfo, R_CallMethodDef};
10use linkme::distributed_slice;
11use std::os::raw::c_void;
12
13// region: Distributed Slices
14
15/// R `.Call` method registrations (function + method C wrappers).
16///
17/// Each `#[miniextendr]` function or method emits an entry here.
18#[distributed_slice]
19pub static MX_CALL_DEFS: [R_CallMethodDef];
20
21/// R wrapper code fragments with priority for ordering.
22///
23/// Each `#[miniextendr]` function, impl block, or trait impl emits an entry.
24/// Priorities ensure correct evaluation order when R sources the wrapper file
25/// (sidecar helpers must be defined before class definitions that reference them).
26#[distributed_slice]
27pub static MX_R_WRAPPERS: [RWrapperEntry];
28
29/// ALTREP class registration functions, called once at package init.
30///
31/// Each ALTREP struct or trait impl emits an entry.
32#[distributed_slice]
33pub static MX_ALTREP_REGISTRATIONS: [fn()];
34
35/// Trait dispatch entries for [`universal_query`].
36///
37/// Each `#[miniextendr] impl Trait for Type` emits an entry mapping
38/// `(concrete_tag, trait_tag)` to the trait's vtable pointer.
39#[distributed_slice]
40pub static MX_TRAIT_DISPATCH: [TraitDispatchEntry];
41// endregion
42
43// region: Entry Types
44
45/// Ordering priority for R wrapper code fragments.
46///
47/// Variant declaration order = output order. The order matters because
48/// R evaluates the wrapper file top-to-bottom, so dependencies must come first:
49/// sidecar accessors before class definitions, classes before functions, etc.
50#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
51pub enum RWrapperPriority {
52 /// `#[r_data]` getters/setters — must come before class definitions.
53 Sidecar,
54 /// Class definitions (impl blocks: env/R6/S3/S4/S7).
55 Class,
56 /// Standalone `#[miniextendr]` functions.
57 Function,
58 /// Trait impl wrappers (`impl Trait for Type`).
59 TraitImpl,
60 /// Vctrs S3 method wrappers (`#[derive(Vctrs)]`).
61 Vctrs,
62}
63
64/// R wrapper code with priority for ordering.
65pub struct RWrapperEntry {
66 /// Ordering priority (lower = earlier in output file).
67 pub priority: RWrapperPriority,
68 /// R source code fragment.
69 pub content: &'static str,
70 /// Source file path (from `file!()`). Used to derive a default `@rdname`
71 /// for standalone functions that don't have an explicit one, so that all
72 /// functions from the same source file share a single .Rd page.
73 pub source_file: &'static str,
74}
75
76// SAFETY: All fields are immutable and valid for 'static lifetime.
77unsafe impl Sync for RWrapperEntry {}
78
79/// Trait dispatch entry mapping (concrete_tag, trait_tag) → vtable.
80#[repr(C)]
81pub struct TraitDispatchEntry {
82 /// Tag identifying the concrete type.
83 pub concrete_tag: mx_tag,
84 /// Tag identifying the trait interface.
85 pub trait_tag: mx_tag,
86 /// Pointer to the trait's vtable (cast from `&'static SomeVTable`).
87 pub vtable: *const c_void,
88}
89
90// SAFETY: vtable points to a static vtable valid for program lifetime.
91// Tags are Copy values. All fields are safe to read from any thread.
92unsafe impl Sync for TraitDispatchEntry {}
93unsafe impl Send for TraitDispatchEntry {}
94// endregion
95
96// region: Universal Query
97
98/// Universal query function for trait dispatch.
99///
100/// Scans [`MX_TRAIT_DISPATCH`] for a matching `(concrete_tag, trait_tag)` pair.
101/// Returns the vtable pointer, or null if the trait is not implemented.
102///
103/// This replaces per-type query functions — a single function handles all types
104/// by reading from the global dispatch table.
105///
106/// # Safety
107///
108/// - `ptr` must point to a valid `mx_erased` with a valid base vtable.
109/// - Must be called on R's main thread.
110pub unsafe extern "C" fn universal_query(ptr: *mut mx_erased, trait_tag: mx_tag) -> *const c_void {
111 let concrete_tag = unsafe { (*(*ptr).base).concrete_tag };
112 for entry in MX_TRAIT_DISPATCH.iter() {
113 if entry.concrete_tag == concrete_tag && entry.trait_tag == trait_tag {
114 return entry.vtable;
115 }
116 }
117 std::ptr::null()
118}
119// endregion
120
121// region: Initialization
122
123/// Register all `#[miniextendr]` routines and ALTREP classes with R.
124///
125/// Called from `package_init()` during `R_init_*` (via `miniextendr_init!`).
126/// Everything else is automatic.
127///
128/// # Safety
129///
130/// Must be called from R's main thread during `R_init_*`.
131/// `dll` must be a valid pointer provided by R.
132#[unsafe(no_mangle)]
133pub unsafe extern "C" fn miniextendr_register_routines(dll: *mut DllInfo) {
134 // 1. Register ALTREP classes (skip during cdylib wrapper generation)
135 //
136 // During wrapper-gen, the cdylib is loaded temporarily via dyn.load() then
137 // unloaded via dyn.unload(). ALTREP class registration creates R-global entries
138 // with method pointers into the cdylib code. After dyn.unload(), those pointers
139 // become dangling. When the staticlib later re-registers, R may still have the
140 // stale entries, leading to heap corruption (e.g., "malloc(): unsorted double
141 // linked list corrupted" on Linux).
142 let wrapper_gen = std::env::var_os("MINIEXTENDR_CDYLIB_WRAPPERS").is_some();
143 if !wrapper_gen {
144 // User-defined ALTREP classes (from #[miniextendr] structs)
145 for reg_fn in MX_ALTREP_REGISTRATIONS.iter() {
146 reg_fn();
147 }
148 // Built-in ALTREP classes (Vec, Box, Range, Arrow) — must be
149 // registered eagerly so readRDS can find them in fresh sessions.
150 crate::altrep_impl::register_builtin_altrep_classes();
151 #[cfg(feature = "arrow")]
152 crate::altrep_impl::register_arrow_altrep_classes();
153
154 // Verify no two ALTREP types registered the same class name.
155 // Duplicates cause silent overwrites in R — the wrong type gets
156 // reconstructed on readRDS, leading to memory corruption.
157 crate::altrep::assert_altrep_class_uniqueness();
158 }
159
160 // 2. Build call method defs with null sentinel
161 let mut call_defs: Vec<R_CallMethodDef> = MX_CALL_DEFS.iter().copied().collect();
162 // Always register miniextendr_write_wrappers so it's visible via
163 // getNativeSymbolInfo even when R_forceSymbols(TRUE) is set.
164 // SAFETY: DL_FUNC is Option<extern "C-unwind" fn() -> *mut c_void> — R's
165 // standard erased function pointer. The actual signature (SEXP -> SEXP) is
166 // ABI-compatible; R dispatches based on numArgs.
167 call_defs.push(R_CallMethodDef {
168 name: c"miniextendr_write_wrappers".as_ptr(),
169 fun: unsafe {
170 std::mem::transmute::<*const (), Option<unsafe extern "C-unwind" fn() -> *mut c_void>>(
171 miniextendr_write_wrappers as *const (),
172 )
173 },
174 numArgs: 1,
175 });
176 call_defs.push(R_CallMethodDef {
177 name: std::ptr::null(),
178 fun: None,
179 numArgs: 0,
180 });
181
182 // 3. Register routines
183 // Leak the Vec — init runs once at package load, so this is fine.
184 unsafe {
185 crate::ffi::R_registerRoutines_unchecked(
186 dll,
187 std::ptr::null(),
188 call_defs.leak().as_ptr(),
189 std::ptr::null(),
190 std::ptr::null(),
191 );
192 }
193}
194
195/// Collect all R wrapper entries, sorted by priority and deduplicated.
196///
197/// Within each priority group, S7 class definitions are topologically sorted
198/// so parents are defined before children (S7 `parent = X` requires X to exist).
199pub fn collect_r_wrappers() -> Vec<std::borrow::Cow<'static, str>> {
200 let mut entries: Vec<&RWrapperEntry> = MX_R_WRAPPERS.iter().collect();
201 entries.sort_by_key(|e| e.priority);
202
203 let mut seen = std::collections::HashSet::<&str>::new();
204 let mut result: Vec<std::borrow::Cow<'static, str>> = Vec::with_capacity(entries.len());
205 for entry in entries {
206 let trimmed = entry.content.trim();
207 if !trimmed.is_empty() && seen.insert(trimmed) {
208 // For standalone functions without explicit @rdname, inject one
209 // derived from the source file stem so same-file functions share
210 // a single .Rd page.
211 if entry.priority == RWrapperPriority::Function
212 && !has_rdname_tag(trimmed)
213 && !has_no_rd_tag(trimmed)
214 {
215 if let Some(rdname) = rdname_from_source_file(entry.source_file) {
216 result.push(std::borrow::Cow::Owned(inject_rdname(trimmed, &rdname)));
217 continue;
218 }
219 }
220 result.push(std::borrow::Cow::Borrowed(trimmed));
221 }
222 }
223
224 // Topological sort for S7 inheritance ordering
225 sort_s7_classes(&mut result);
226
227 result
228}
229
230/// Check if an R wrapper fragment already has an `@rdname` tag.
231fn has_rdname_tag(content: &str) -> bool {
232 content.lines().any(|line| {
233 let trimmed = line.trim();
234 trimmed.starts_with("#' @rdname ")
235 })
236}
237
238/// Check if an R wrapper fragment has `@noRd`.
239fn has_no_rd_tag(content: &str) -> bool {
240 content.lines().any(|line| {
241 let trimmed = line.trim();
242 trimmed == "#' @noRd"
243 })
244}
245
246/// Derive an `@rdname` value from a source file path.
247///
248/// `"src/rust/zero_copy_tests.rs"` → `"zero_copy_tests"`
249/// `"lib.rs"` → `"lib"`
250fn rdname_from_source_file(path: &str) -> Option<String> {
251 let file_name = path.rsplit(['/', '\\']).next()?;
252 let stem = file_name.strip_suffix(".rs").unwrap_or(file_name);
253 if stem.is_empty() || stem == "lib" || stem == "mod" {
254 return None;
255 }
256 Some(stem.to_string())
257}
258
259/// Inject `#' @rdname <value>` (and `@title` if missing) into an R wrapper
260/// fragment. Inserts before the first `@export`/`@keywords`/`@source` line,
261/// or after the last roxygen line.
262fn inject_rdname(content: &str, rdname: &str) -> String {
263 let rdname_line = format!("#' @rdname {rdname}");
264 let has_title = content.lines().any(|l| l.trim().starts_with("#' @title "));
265 // Functions with no doc comments need a title so the @rdname page has an anchor
266 let title_line = if has_title {
267 None
268 } else {
269 Some(format!("#' @title {}", rdname.replace('_', " ")))
270 };
271
272 let lines: Vec<&str> = content.lines().collect();
273 let mut result = Vec::with_capacity(lines.len() + 2);
274 let mut inserted = false;
275
276 for line in &lines {
277 let trimmed = line.trim();
278 // Insert before @export, @keywords, or @source lines
279 if !inserted
280 && (trimmed.starts_with("#' @export")
281 || trimmed.starts_with("#' @keywords")
282 || trimmed.starts_with("#' @source"))
283 {
284 if let Some(ref t) = title_line {
285 result.push(t.as_str());
286 }
287 result.push(rdname_line.as_str());
288 inserted = true;
289 }
290 result.push(line);
291 }
292
293 // If we never found a good insertion point, insert before the function def
294 if !inserted {
295 let last_roxy = lines
296 .iter()
297 .rposition(|l| l.trim().starts_with("#'"))
298 .unwrap_or(0);
299 let insert_at = last_roxy + 1;
300 if let Some(ref t) = title_line {
301 result.insert(insert_at, t.as_str());
302 result.insert(insert_at + 1, rdname_line.as_str());
303 } else {
304 result.insert(insert_at, rdname_line.as_str());
305 }
306 }
307
308 result.join("\n")
309}
310
311/// Sort S7 class definitions so parents come before children.
312///
313/// Detects `S7::new_class()` calls, extracts `parent = ClassName` relationships,
314/// and performs topological sort. Non-S7 entries keep their relative order.
315fn sort_s7_classes(entries: &mut [std::borrow::Cow<'static, str>]) {
316 use std::collections::HashMap;
317
318 // Parse S7 class definitions: find (index, name, parent)
319 let mut s7_info: Vec<(usize, String, Option<String>)> = Vec::new();
320
321 for (i, entry) in entries.iter().enumerate() {
322 if let Some(nc_pos) = entry.find("S7::new_class(") {
323 // Extract class name: "NAME <- S7::new_class("
324 let before = entry[..nc_pos].trim_end();
325 let name = before
326 .strip_suffix("<-")
327 .or_else(|| before.rsplit_once("<-").map(|(_, r)| r))
328 .map(|s| s.trim())
329 .and_then(|s| s.split_whitespace().last());
330
331 let Some(name) = name else { continue };
332
333 // Extract parent: "parent = ParentName,"
334 let after = &entry[nc_pos..];
335 let parent = after.find("parent = ").and_then(|p| {
336 let rest = &after[p + "parent = ".len()..];
337 let end = rest.find([',', ')', '\n']).unwrap_or(rest.len());
338 let p = rest[..end].trim();
339 if p.is_empty() {
340 None
341 } else {
342 Some(p.to_string())
343 }
344 });
345
346 s7_info.push((i, name.to_string(), parent));
347 }
348 }
349
350 if s7_info.len() <= 1 {
351 return;
352 }
353
354 // Build name → position-in-s7_info map
355 let name_to_pos: HashMap<&str, usize> = s7_info
356 .iter()
357 .enumerate()
358 .map(|(pos, (_, name, _))| (name.as_str(), pos))
359 .collect();
360
361 // Topological sort: repeatedly emit classes whose parent is already placed
362 let n = s7_info.len();
363 let mut order: Vec<usize> = Vec::with_capacity(n);
364 let mut placed = vec![false; n];
365
366 for _ in 0..n {
367 for (pos, (_, _, parent)) in s7_info.iter().enumerate() {
368 if placed[pos] {
369 continue;
370 }
371 let ready = match parent {
372 None => true,
373 Some(pname) => match name_to_pos.get(pname.as_str()) {
374 None => true, // external parent
375 Some(&pp) => placed[pp],
376 },
377 };
378 if ready {
379 order.push(pos);
380 placed[pos] = true;
381 }
382 }
383 }
384
385 // Fallback: add any remaining (cycles) in original order
386 for (pos, &is_placed) in placed.iter().enumerate().take(n) {
387 if !is_placed {
388 order.push(pos);
389 }
390 }
391
392 // Apply: place sorted S7 entries back at their original indices
393 let s7_indices: Vec<usize> = s7_info.iter().map(|(i, _, _)| *i).collect();
394 let original: Vec<std::borrow::Cow<'static, str>> =
395 s7_indices.iter().map(|&i| entries[i].clone()).collect();
396
397 for (slot, &src) in order.iter().enumerate() {
398 entries[s7_indices[slot]] = original[src].clone();
399 }
400}
401// endregion
402
403// region: R Wrapper File Generation
404
405/// Write all R wrapper entries to a file.
406///
407/// Called from [`miniextendr_write_wrappers`] (via cdylib `dyn.load`/`.Call`).
408/// All distributed_slice entries from `#[miniextendr]` items are available
409/// because the cdylib includes all symbols by design.
410pub fn write_r_wrappers_to_file(path: &str) {
411 // Build the new content in memory
412 let mut content = String::from(
413 "# ---- AUTO-GENERATED FILE - DO NOT EDIT ----
414# This file is generated by the miniextendr proc-macro during package build.
415# Any manual changes will be overwritten.
416#
417# To regenerate: rebuild the package (R CMD INSTALL or devtools::install).
418# nolint start
419# nocov start
420
421",
422 );
423
424 for fragment in collect_r_wrappers() {
425 content.push_str(fragment.as_ref());
426 content.push_str("\n\n");
427 }
428
429 content.push_str("# nocov end\n# nolint end\n");
430
431 // Only write if content changed (avoids unnecessary NAMESPACE/man regeneration)
432 let existing = std::fs::read_to_string(path).unwrap_or_default();
433 if existing == content {
434 return;
435 }
436
437 std::fs::write(path, content.as_bytes())
438 .unwrap_or_else(|e| panic!("failed to write {path}: {e}"));
439
440 if !existing.is_empty() {
441 let filename = std::path::Path::new(path)
442 .file_name()
443 .and_then(|f| f.to_str())
444 .unwrap_or("wrappers.R");
445 eprintln!();
446 eprintln!("NOTE: {filename} changed — run devtools::document() to update NAMESPACE.");
447 eprintln!();
448 }
449}
450// endregion
451
452// region: C-Callable Entry Points (cdylib)
453
454/// C-callable entry point for R wrapper generation via cdylib.
455///
456/// Called from Makevars via Rscript: loads the cdylib with `dyn.load()`,
457/// then `.Call("miniextendr_write_wrappers", path)` to write
458/// `R/miniextendr-wrappers.R`. NAMESPACE generation is left to roxygen2
459/// (`devtools::document()`).
460///
461/// # Safety
462///
463/// `path_sexp` must be a valid STRSXP of length >= 1.
464#[unsafe(no_mangle)]
465pub unsafe extern "C" fn miniextendr_write_wrappers(
466 path_sexp: crate::ffi::SEXP,
467) -> crate::ffi::SEXP {
468 unsafe {
469 use crate::ffi::{SEXP, SexpExt};
470
471 let char_sexp = path_sexp.string_elt_unchecked(0);
472 let c_str = std::ffi::CStr::from_ptr(char_sexp.r_char_unchecked());
473 let path = c_str
474 .to_str()
475 .unwrap_or_else(|e| panic!("invalid UTF-8 in path: {e}"));
476
477 write_r_wrappers_to_file(path);
478
479 SEXP::nil()
480 }
481}
482// endregion