Skip to main content

r/interpreter/builtins/
net.rs

1//! HTTP/HTTPS networking builtins — `download.file()` and `url()`.
2//!
3//! Provides TLS support via `rustls` with system certificate trust
4//! (`rustls-native-certs`) and Mozilla root certificates (`webpki-roots`).
5//!
6//! `download.file(url, destfile)` performs a one-shot HTTP/HTTPS GET request
7//! and writes the response body to a local file.
8//!
9//! `url(description)` creates a URL connection object that can be read
10//! with `readLines()`.
11
12use std::io::{Read, Write};
13use std::net::TcpStream;
14use std::sync::Arc;
15
16use rustls::pki_types::ServerName;
17
18use super::CallArgs;
19use crate::interpreter::builtins::connections::ConnectionInfo;
20use crate::interpreter::value::*;
21use crate::interpreter::BuiltinContext;
22use minir_macros::interpreter_builtin;
23
24// region: URL parsing
25
26/// Parsed URL components for HTTP/HTTPS.
27struct ParsedUrl {
28    scheme: Scheme,
29    host: String,
30    port: u16,
31    path: String,
32}
33
34#[derive(Clone, Copy, PartialEq, Eq)]
35enum Scheme {
36    Http,
37    Https,
38}
39
40/// Parse a URL into its components. Only supports http:// and https://.
41fn parse_url(url: &str) -> Result<ParsedUrl, RError> {
42    let (scheme, rest) = if let Some(rest) = url.strip_prefix("https://") {
43        (Scheme::Https, rest)
44    } else if let Some(rest) = url.strip_prefix("http://") {
45        (Scheme::Http, rest)
46    } else {
47        return Err(RError::new(
48            RErrorKind::Argument,
49            format!(
50                "unsupported URL scheme in '{}' — only http:// and https:// are supported",
51                url
52            ),
53        ));
54    };
55
56    // Split host from path
57    let (host_port, path) = match rest.find('/') {
58        Some(pos) => (&rest[..pos], &rest[pos..]),
59        None => (rest, "/"),
60    };
61
62    // Split host from port
63    let (host, port) = if let Some(colon_pos) = host_port.rfind(':') {
64        let port_str = &host_port[colon_pos + 1..];
65        let port: u16 = port_str.parse().map_err(|_| {
66            RError::new(
67                RErrorKind::Argument,
68                format!("invalid port number '{}' in URL '{}'", port_str, url),
69            )
70        })?;
71        (host_port[..colon_pos].to_string(), port)
72    } else {
73        let default_port = match scheme {
74            Scheme::Http => 80,
75            Scheme::Https => 443,
76        };
77        (host_port.to_string(), default_port)
78    };
79
80    if host.is_empty() {
81        return Err(RError::new(
82            RErrorKind::Argument,
83            format!("empty host in URL '{}'", url),
84        ));
85    }
86
87    Ok(ParsedUrl {
88        scheme,
89        host,
90        port,
91        path: path.to_string(),
92    })
93}
94
95// endregion
96
97// region: TLS client configuration
98
99/// Build a rustls `ClientConfig` using system certificates (via rustls-native-certs)
100/// with Mozilla roots as fallback (via webpki-roots).
101fn tls_client_config() -> Result<Arc<rustls::ClientConfig>, RError> {
102    let mut root_store = rustls::RootCertStore::empty();
103
104    // Try loading system certificates first.
105    let cert_result = rustls_native_certs::load_native_certs();
106    for cert in cert_result.certs {
107        // Ignore individual cert errors — some system certs may be
108        // unparseable but that shouldn't block the whole store.
109        drop(root_store.add(cert));
110    }
111
112    // If no system certs loaded, use Mozilla roots as fallback.
113    if root_store.is_empty() {
114        root_store.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
115    }
116
117    let config = rustls::ClientConfig::builder()
118        .with_root_certificates(root_store)
119        .with_no_client_auth();
120
121    Ok(Arc::new(config))
122}
123
124/// Open a TCP connection and optionally wrap it in TLS.
125/// Returns a boxed Read+Write stream.
126fn connect_stream(parsed: &ParsedUrl) -> Result<Box<dyn ReadWriteStream>, RError> {
127    let tcp = TcpStream::connect((parsed.host.as_str(), parsed.port)).map_err(|e| {
128        RError::new(
129            RErrorKind::Other,
130            format!(
131                "cannot connect to {}:{} — {}. \
132                 Check that the host is reachable and the port is open.",
133                parsed.host, parsed.port, e
134            ),
135        )
136    })?;
137
138    match parsed.scheme {
139        Scheme::Http => Ok(Box::new(tcp)),
140        Scheme::Https => {
141            let config = tls_client_config()?;
142            let server_name = ServerName::try_from(parsed.host.clone()).map_err(|e| {
143                RError::new(
144                    RErrorKind::Argument,
145                    format!("invalid server name '{}': {}", parsed.host, e),
146                )
147            })?;
148            let conn = rustls::ClientConnection::new(config, server_name).map_err(|e| {
149                RError::new(
150                    RErrorKind::Other,
151                    format!("TLS handshake failed for '{}': {}", parsed.host, e),
152                )
153            })?;
154            let tls_stream = rustls::StreamOwned::new(conn, tcp);
155            Ok(Box::new(tls_stream))
156        }
157    }
158}
159
160/// Trait alias for streams that are both Read and Write.
161trait ReadWriteStream: Read + Write {}
162impl<T: Read + Write> ReadWriteStream for T {}
163
164// endregion
165
166// region: HTTP helpers
167
168/// Perform an HTTP GET request on the given stream, returning the response body as bytes.
169fn http_get(stream: &mut dyn ReadWriteStream, host: &str, path: &str) -> Result<Vec<u8>, RError> {
170    // Send HTTP/1.1 GET request
171    let request = format!(
172        "GET {} HTTP/1.1\r\nHost: {}\r\nConnection: close\r\nUser-Agent: miniR/0.1\r\nAccept: */*\r\n\r\n",
173        path, host
174    );
175    stream.write_all(request.as_bytes()).map_err(|e| {
176        RError::new(
177            RErrorKind::Other,
178            format!("failed to send HTTP request: {}", e),
179        )
180    })?;
181
182    // Read entire response
183    let mut response = Vec::new();
184    stream.read_to_end(&mut response).map_err(|e| {
185        RError::new(
186            RErrorKind::Other,
187            format!("failed to read HTTP response: {}", e),
188        )
189    })?;
190
191    // Parse response: find end of headers (blank line)
192    let header_end = find_header_end(&response).ok_or_else(|| {
193        RError::new(
194            RErrorKind::Other,
195            "malformed HTTP response — could not find end of headers".to_string(),
196        )
197    })?;
198
199    let header_bytes = &response[..header_end];
200    let header_str = String::from_utf8_lossy(header_bytes);
201
202    // Check status line
203    let status_line = header_str.lines().next().unwrap_or("");
204    let status_code = parse_status_code(status_line);
205
206    // Handle redirects (301, 302, 303, 307, 308)
207    if matches!(status_code, Some(301 | 302 | 303 | 307 | 308)) {
208        // Find Location header
209        let location = header_str.lines().find_map(|line| {
210            if line.to_ascii_lowercase().starts_with("location:") {
211                Some(line[9..].trim().to_string())
212            } else {
213                None
214            }
215        });
216
217        if let Some(redirect_url) = location {
218            return Err(RError::new(
219                RErrorKind::Other,
220                format!(
221                    "HTTP redirect to '{}' — redirect following is not yet supported. \
222                     Try using the redirected URL directly.",
223                    redirect_url
224                ),
225            ));
226        }
227    }
228
229    // Check for error status
230    if let Some(code) = status_code {
231        if code >= 400 {
232            return Err(RError::new(
233                RErrorKind::Other,
234                format!("HTTP request failed with status {} — {}", code, status_line),
235            ));
236        }
237    }
238
239    // Return body (everything after headers + \r\n\r\n)
240    let body_start = header_end + 4; // skip \r\n\r\n
241    if body_start <= response.len() {
242        Ok(response[body_start..].to_vec())
243    } else {
244        Ok(Vec::new())
245    }
246}
247
248/// Find the position of \r\n\r\n in bytes (end of HTTP headers).
249fn find_header_end(data: &[u8]) -> Option<usize> {
250    data.windows(4).position(|w| w == b"\r\n\r\n")
251}
252
253/// Parse the HTTP status code from a status line like "HTTP/1.1 200 OK".
254fn parse_status_code(status_line: &str) -> Option<u16> {
255    let parts: Vec<&str> = status_line.splitn(3, ' ').collect();
256    if parts.len() >= 2 {
257        parts[1].parse().ok()
258    } else {
259        None
260    }
261}
262
263// endregion
264
265// region: download.file builtin
266
267/// Download a file from a URL.
268///
269/// Performs an HTTP or HTTPS GET request and writes the response body
270/// to the specified local file. Returns 0 (success) or non-zero (failure),
271/// matching R's `download.file()` return convention.
272///
273/// @param url character scalar: the URL to download from
274/// @param destfile character scalar: the local file path to write to
275/// @param method character scalar: download method (ignored, always "internal")
276/// @param quiet logical scalar: if TRUE, suppress progress messages (default FALSE)
277/// @return integer scalar: 0 for success
278#[interpreter_builtin(name = "download.file", min_args = 2, namespace = "net")]
279fn interp_download_file(
280    args: &[RValue],
281    named: &[(String, RValue)],
282    context: &BuiltinContext,
283) -> Result<RValue, RError> {
284    let call_args = CallArgs::new(args, named);
285    let url_str = call_args.string("url", 0)?;
286    let destfile = call_args.string("destfile", 1)?;
287    let quiet = call_args.logical_flag("quiet", 3, false);
288
289    let parsed = parse_url(&url_str)?;
290
291    if !quiet {
292        // R prints a message about the download
293        context.write_err(&format!("trying URL '{}'\n", url_str));
294    }
295
296    let mut stream = connect_stream(&parsed)?;
297    let body = http_get(stream.as_mut(), &parsed.host, &parsed.path)?;
298
299    // Resolve destfile relative to interpreter working directory
300    let interp = context.interpreter();
301    let dest_path = if std::path::Path::new(&destfile).is_absolute() {
302        std::path::PathBuf::from(&destfile)
303    } else {
304        let wd = interp.get_working_dir();
305        wd.join(&destfile)
306    };
307
308    std::fs::write(&dest_path, &body).map_err(|e| {
309        RError::new(
310            RErrorKind::Other,
311            format!("cannot write to '{}': {}", dest_path.display(), e),
312        )
313    })?;
314
315    if !quiet {
316        context.write_err(&format!("Content length {} bytes\n", body.len()));
317        context.write_err(&format!("downloaded {} bytes\n", body.len()));
318    }
319
320    // Return 0 for success (R convention)
321    Ok(RValue::vec(Vector::Integer(vec![Some(0)].into())))
322}
323
324// endregion
325
326// region: url connection builtin
327
328/// Create a URL connection.
329///
330/// Opens an HTTP or HTTPS connection to the specified URL. The connection
331/// can be read with `readLines()`. Only "r" (read) mode is supported.
332///
333/// When opened, the connection performs an HTTP GET request and buffers
334/// the response body for subsequent reads.
335///
336/// @param description character scalar: the URL to connect to
337/// @param open character scalar: open mode ("" or "r")
338/// @return integer scalar with class "connection"
339#[interpreter_builtin(name = "url", min_args = 1)]
340fn interp_url(
341    args: &[RValue],
342    named: &[(String, RValue)],
343    context: &BuiltinContext,
344) -> Result<RValue, RError> {
345    let call_args = CallArgs::new(args, named);
346    let url_str = call_args.string("description", 0)?;
347    let open_mode = call_args.optional_string("open", 1).unwrap_or_default();
348
349    // Validate the URL scheme
350    let parsed = parse_url(&url_str)?;
351
352    let interp = context.interpreter();
353
354    // Create a URL connection
355    let mut info = ConnectionInfo::url_connection(url_str.clone());
356
357    if !open_mode.is_empty() {
358        if open_mode != "r" && open_mode != "rt" && open_mode != "rb" {
359            return Err(RError::new(
360                RErrorKind::Argument,
361                format!(
362                    "url() connections only support read mode ('r', 'rt', 'rb'), got '{}'",
363                    open_mode
364                ),
365            ));
366        }
367        info.mode = open_mode;
368        info.is_open = true;
369
370        // Eagerly fetch the content when opening
371        let mut stream = connect_stream(&parsed)?;
372        let body = http_get(stream.as_mut(), &parsed.host, &parsed.path)?;
373
374        let id = interp.add_connection(info);
375        interp.store_url_body(id, body);
376        Ok(connection_value(id))
377    } else {
378        // Create connection but don't open it yet
379        let id = interp.add_connection(info);
380        Ok(connection_value(id))
381    }
382}
383
384// endregion
385
386// region: Helper — build a connection RValue
387
388/// Build an integer scalar with class `"connection"` representing connection `id`.
389fn connection_value(id: usize) -> RValue {
390    let mut rv = RVector::from(Vector::Integer(
391        vec![Some(i64::try_from(id).unwrap_or(0))].into(),
392    ));
393    rv.set_attr(
394        "class".to_string(),
395        RValue::vec(Vector::Character(
396            vec![Some("connection".to_string())].into(),
397        )),
398    );
399    RValue::Vector(rv)
400}
401
402// endregion