Skip to main content

r/interpreter/builtins/
digest.rs

1//! Cryptographic digest builtins (SHA-256, SHA-512, BLAKE3, CRC32).
2//!
3//! Provides `digest(x, algo)` for hashing character strings, `md5(x)` as
4//! an error stub directing users to SHA-256, and BLAKE3 builtins for fast
5//! hashing of strings, raw vectors, and files. CRC32 is available via
6//! `digest(x, algo="crc32")` using the `crc32fast` crate.
7
8#[cfg(feature = "digest")]
9use sha2::{Digest, Sha256, Sha512};
10
11use crate::interpreter::value::*;
12use minir_macros::builtin;
13
14// region: helpers
15
16/// Extract a character or raw input from the first argument.
17///
18/// Returns `Ok(bytes)` for the input data, or the appropriate error/NA result.
19fn extract_input_bytes(args: &[RValue]) -> Result<Result<Vec<u8>, RValue>, RError> {
20    match args.first() {
21        Some(RValue::Vector(rv)) => match &**rv {
22            Vector::Raw(bytes) => Ok(Ok(bytes.clone())),
23            Vector::Character(chars) => match chars.first() {
24                Some(Some(s)) => Ok(Ok(s.as_bytes().to_vec())),
25                Some(None) => Ok(Err(RValue::vec(Vector::Character(vec![None].into())))),
26                None => Err(RError::new(
27                    RErrorKind::Argument,
28                    "argument must be a non-empty character vector".to_string(),
29                )),
30            },
31            _ => match rv.to_characters().first() {
32                Some(Some(s)) => Ok(Ok(s.as_bytes().to_vec())),
33                Some(None) => Ok(Err(RValue::vec(Vector::Character(vec![None].into())))),
34                None => Err(RError::new(
35                    RErrorKind::Argument,
36                    "argument must be a non-empty character or raw vector".to_string(),
37                )),
38            },
39        },
40        _ => Err(RError::new(
41            RErrorKind::Argument,
42            "argument must be a character or raw vector".to_string(),
43        )),
44    }
45}
46
47// endregion
48
49// region: digest
50
51/// Compute a hash of a character string.
52///
53/// Supports SHA-256 (default), SHA-512, CRC32, and BLAKE3 (when the blake3
54/// feature is enabled). Returns the hash as a lowercase hex string, matching
55/// the output format of R's `digest` package.
56///
57/// @param x character scalar to hash
58/// @param algo algorithm name: "sha256" (default), "sha512", "crc32", or "blake3"
59/// @return character scalar containing the hex digest
60#[cfg(feature = "digest")]
61#[builtin(min_args = 1, namespace = "digest")]
62fn builtin_digest(args: &[RValue], named: &[(String, RValue)]) -> Result<RValue, RError> {
63    let input = match extract_input_bytes(args)? {
64        Ok(bytes) => bytes,
65        Err(na) => return Ok(na),
66    };
67
68    let algo = named
69        .iter()
70        .find(|(n, _)| n == "algo")
71        .and_then(|(_, v)| v.as_vector().and_then(|v| v.as_character_scalar()))
72        .or_else(|| {
73            args.get(1)
74                .and_then(|v| v.as_vector().and_then(|v| v.as_character_scalar()))
75        })
76        .unwrap_or_else(|| "sha256".to_string());
77
78    let hex = match algo.as_str() {
79        "sha256" => {
80            let result = Sha256::digest(&input);
81            format!("{:x}", result)
82        }
83        "sha512" => {
84            let result = Sha512::digest(&input);
85            format!("{:x}", result)
86        }
87        "crc32" => {
88            let checksum = crc32fast::hash(&input);
89            format!("{:08x}", checksum)
90        }
91        #[cfg(feature = "blake3")]
92        "blake3" => {
93            let result = blake3::hash(&input);
94            result.to_hex().to_string()
95        }
96        other => {
97            #[cfg(feature = "blake3")]
98            let supported = "\"sha256\", \"sha512\", \"crc32\", or \"blake3\"";
99            #[cfg(not(feature = "blake3"))]
100            let supported = "\"sha256\", \"sha512\", or \"crc32\"";
101            return Err(RError::new(
102                RErrorKind::Argument,
103                format!(
104                    "unsupported algorithm {:?} \u{2014} use {}",
105                    other, supported
106                ),
107            ));
108        }
109    };
110
111    Ok(RValue::vec(Vector::Character(vec![Some(hex)].into())))
112}
113
114/// MD5 is deprecated — error stub suggesting SHA-256.
115///
116/// MD5 is cryptographically broken and should not be used for any purpose.
117/// This function always errors with a suggestion to use `digest(x, algo="sha256")`.
118///
119/// @param x ignored
120/// @return always errors
121#[cfg(feature = "digest")]
122#[builtin(min_args = 0, namespace = "digest")]
123fn builtin_md5(_args: &[RValue], _: &[(String, RValue)]) -> Result<RValue, RError> {
124    Err(RError::new(
125        RErrorKind::Other,
126        "md5() is not available \u{2014} MD5 is cryptographically broken. \
127         Use digest(x, algo=\"sha256\") for secure hashing."
128            .to_string(),
129    ))
130}
131
132// endregion
133
134// region: blake3
135
136/// Compute a BLAKE3 hash of a character string or raw vector.
137///
138/// Returns the hash as a 64-character lowercase hex string.
139///
140/// @param x character scalar or raw vector to hash
141/// @return character scalar containing the 64-char hex digest
142#[cfg(feature = "blake3")]
143#[builtin(min_args = 1)]
144fn builtin_blake3(args: &[RValue], _named: &[(String, RValue)]) -> Result<RValue, RError> {
145    let input = match extract_input_bytes(args)? {
146        Ok(bytes) => bytes,
147        Err(na) => return Ok(na),
148    };
149    let hash = blake3::hash(&input);
150    Ok(RValue::vec(Vector::Character(
151        vec![Some(hash.to_hex().to_string())].into(),
152    )))
153}
154
155/// Compute a BLAKE3 hash and return as a 32-byte raw vector.
156///
157/// @param x character scalar or raw vector to hash
158/// @return raw vector of 32 bytes (the BLAKE3 digest)
159#[cfg(feature = "blake3")]
160#[builtin(name = "blake3_raw", min_args = 1)]
161fn builtin_blake3_raw(args: &[RValue], _named: &[(String, RValue)]) -> Result<RValue, RError> {
162    let input = match extract_input_bytes(args)? {
163        Ok(bytes) => bytes,
164        Err(na) => return Ok(na),
165    };
166    let hash = blake3::hash(&input);
167    Ok(RValue::vec(Vector::Raw(hash.as_bytes().to_vec())))
168}
169
170/// Compute a BLAKE3 hash of a file's contents using streaming I/O.
171///
172/// Reads the file in chunks to avoid loading the entire file into memory,
173/// making it efficient for large files.
174///
175/// @param path character scalar: path to the file to hash
176/// @return character scalar containing the 64-char hex digest
177#[cfg(feature = "blake3")]
178#[builtin(name = "blake3_file", min_args = 1)]
179fn builtin_blake3_file(args: &[RValue], _named: &[(String, RValue)]) -> Result<RValue, RError> {
180    let path = match args.first() {
181        Some(RValue::Vector(rv)) => rv.as_character_scalar().ok_or_else(|| {
182            RError::new(
183                RErrorKind::Argument,
184                "blake3_file() requires a character scalar path".to_string(),
185            )
186        })?,
187        _ => {
188            return Err(RError::new(
189                RErrorKind::Argument,
190                "blake3_file() requires a character scalar path".to_string(),
191            ))
192        }
193    };
194
195    let file = std::fs::File::open(&path).map_err(|e| {
196        RError::new(
197            RErrorKind::Other,
198            format!("cannot open file {:?}: {}", path, e),
199        )
200    })?;
201
202    let mut reader = std::io::BufReader::new(file);
203    let mut hasher = blake3::Hasher::new();
204    std::io::copy(&mut reader, &mut hasher).map_err(|e| {
205        RError::new(
206            RErrorKind::Other,
207            format!("error reading file {:?}: {}", path, e),
208        )
209    })?;
210
211    let hash = hasher.finalize();
212    Ok(RValue::vec(Vector::Character(
213        vec![Some(hash.to_hex().to_string())].into(),
214    )))
215}
216
217// endregion
218
219// region: Base64 encoding/decoding
220
221/// Encode a character string or raw vector to base64.
222///
223/// @param x character string or raw vector to encode
224/// @return character string with base64 encoding
225#[builtin(name = "base64enc", min_args = 1, namespace = "base")]
226fn builtin_base64enc(args: &[RValue], _named: &[(String, RValue)]) -> Result<RValue, RError> {
227    use base64::Engine;
228    let input = &args[0];
229    let bytes = match input {
230        RValue::Vector(rv) => match &rv.inner {
231            Vector::Character(vals) => vals
232                .first()
233                .and_then(|s| s.as_ref())
234                .map(|s| s.as_bytes().to_vec())
235                .unwrap_or_default(),
236            Vector::Raw(vals) => vals.to_vec(),
237            _ => {
238                return Err(RError::new(
239                    RErrorKind::Argument,
240                    "base64enc() requires a character string or raw vector".to_string(),
241                ))
242            }
243        },
244        _ => {
245            return Err(RError::new(
246                RErrorKind::Argument,
247                "base64enc() requires a character string or raw vector".to_string(),
248            ))
249        }
250    };
251    let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes);
252    Ok(RValue::vec(Vector::Character(vec![Some(encoded)].into())))
253}
254
255/// Decode a base64-encoded string to a character string.
256///
257/// @param x base64-encoded character string
258/// @return decoded character string
259#[builtin(name = "base64dec", min_args = 1, namespace = "base")]
260fn builtin_base64dec(args: &[RValue], _named: &[(String, RValue)]) -> Result<RValue, RError> {
261    use base64::Engine;
262    let input = args
263        .first()
264        .and_then(|v| v.as_vector())
265        .and_then(|v| v.as_character_scalar())
266        .ok_or_else(|| {
267            RError::new(
268                RErrorKind::Argument,
269                "base64dec() requires a character string".to_string(),
270            )
271        })?;
272    let bytes = base64::engine::general_purpose::STANDARD
273        .decode(&input)
274        .map_err(|e| RError::new(RErrorKind::Other, format!("base64 decode error: {e}")))?;
275    let decoded = String::from_utf8(bytes).map_err(|e| {
276        RError::new(
277            RErrorKind::Other,
278            format!("base64 decode: invalid UTF-8: {e}"),
279        )
280    })?;
281    Ok(RValue::vec(Vector::Character(vec![Some(decoded)].into())))
282}
283
284// endregion