Skip to main content

r/interpreter/value/
integer.rs

1//! R integer vectors backed by Apache Arrow `Int64Array`.
2//!
3//! The Arrow array provides: contiguous i64 buffer + validity bitmap for NA tracking.
4//! Memory layout matches the old `NullableBuffer<i64>`: 8 bytes per i64 + 1 bit per element.
5
6use std::fmt;
7
8use arrow_array::builder::PrimitiveBuilder;
9use arrow_array::types::Int64Type;
10use arrow_array::{Array, Int64Array};
11
12/// Newtype for R integer vectors backed by Arrow `Int64Array`.
13///
14/// Wraps `Int64Array` where NA tracking uses a validity bitmap
15/// instead of `Option<i64>` per element (halving memory for dense integers).
16#[derive(Clone)]
17pub struct Integer(pub Int64Array);
18
19impl Integer {
20    /// Number of elements.
21    #[inline]
22    pub fn len(&self) -> usize {
23        self.0.len()
24    }
25
26    /// True if the buffer is empty.
27    #[inline]
28    pub fn is_empty(&self) -> bool {
29        self.0.is_empty()
30    }
31
32    /// Get element `i` as `Option<i64>` by value.
33    ///
34    /// # Panics
35    /// Panics if `i >= len`.
36    #[inline]
37    pub fn get_opt(&self, i: usize) -> Option<i64> {
38        if self.0.is_null(i) {
39            None
40        } else {
41            Some(self.0.value(i))
42        }
43    }
44
45    /// Get the first element as `Option<i64>`.
46    pub fn first_opt(&self) -> Option<i64> {
47        if self.is_empty() {
48            None
49        } else {
50            self.get_opt(0)
51        }
52    }
53
54    /// Iterate yielding `Option<i64>` by value.
55    pub fn iter_opt(&self) -> impl Iterator<Item = Option<i64>> + Clone + '_ {
56        self.0.iter()
57    }
58
59    /// Iterate yielding `Option<i64>` by value (alias for `iter_opt`).
60    ///
61    /// Matches the iteration signature of the old `NullableBuffer::iter()`.
62    pub fn iter(&self) -> impl Iterator<Item = Option<i64>> + Clone + '_ {
63        self.0.iter()
64    }
65
66    /// True if element `i` is NA.
67    ///
68    /// # Panics
69    /// Panics if `i >= len`.
70    #[inline]
71    pub fn is_na(&self, i: usize) -> bool {
72        self.0.is_null(i)
73    }
74
75    /// Number of NA values.
76    pub fn na_count(&self) -> usize {
77        self.0.null_count()
78    }
79
80    /// True if there are any NAs.
81    pub fn has_na(&self) -> bool {
82        self.0.null_count() > 0
83    }
84
85    /// Convert to `Vec<Option<i64>>`.
86    pub fn into_vec(self) -> Vec<Option<i64>> {
87        self.0.iter().collect()
88    }
89
90    /// Borrow as `Vec<Option<i64>>` (allocates).
91    pub fn to_option_vec(&self) -> Vec<Option<i64>> {
92        self.0.iter().collect()
93    }
94
95    /// Raw values slice. NA positions hold arbitrary values.
96    #[inline]
97    pub fn values_slice(&self) -> &[i64] {
98        self.0.values().as_ref()
99    }
100
101    /// Create a buffer from values with no NAs.
102    pub fn from_values(values: Vec<i64>) -> Self {
103        Integer(Int64Array::from(values))
104    }
105
106    /// Collect indices, producing a new `Integer`. Out-of-bounds indices become NA.
107    pub fn select_indices(&self, indices: &[usize]) -> Integer {
108        let mut builder = PrimitiveBuilder::<Int64Type>::with_capacity(indices.len());
109        for &i in indices {
110            if i < self.len() {
111                builder.append_option(self.get_opt(i));
112            } else {
113                builder.append_null();
114            }
115        }
116        Integer(builder.finish())
117    }
118
119    /// Set element `i`.
120    ///
121    /// Since Arrow arrays are immutable, this rebuilds the array.
122    ///
123    /// # Panics
124    /// Panics if `i >= len`.
125    pub fn set(&mut self, i: usize, val: Option<i64>) {
126        assert!(
127            i < self.len(),
128            "Integer::set: index {i} out of bounds (len {})",
129            self.len()
130        );
131        let mut builder = PrimitiveBuilder::<Int64Type>::with_capacity(self.len());
132        for j in 0..self.len() {
133            if j == i {
134                builder.append_option(val);
135            } else {
136                builder.append_option(self.get_opt(j));
137            }
138        }
139        self.0 = builder.finish();
140    }
141
142    /// Push an element onto the end.
143    pub fn push(&mut self, val: Option<i64>) {
144        let new_len = self.len() + 1;
145        let mut builder = PrimitiveBuilder::<Int64Type>::with_capacity(new_len);
146        for j in 0..self.len() {
147            builder.append_option(self.get_opt(j));
148        }
149        builder.append_option(val);
150        self.0 = builder.finish();
151    }
152
153    /// Extend this buffer with elements from another.
154    pub fn extend(&mut self, other: &Integer) {
155        let new_len = self.len() + other.len();
156        let mut builder = PrimitiveBuilder::<Int64Type>::with_capacity(new_len);
157        for j in 0..self.len() {
158            builder.append_option(self.get_opt(j));
159        }
160        for j in 0..other.len() {
161            builder.append_option(other.get_opt(j));
162        }
163        self.0 = builder.finish();
164    }
165
166    /// Truncate the buffer to `len` elements.
167    pub fn truncate(&mut self, len: usize) {
168        if len >= self.len() {
169            return;
170        }
171        self.0 = self.0.slice(0, len);
172    }
173
174    /// Reverse the buffer in-place.
175    pub fn reverse(&mut self) {
176        let len = self.len();
177        let mut builder = PrimitiveBuilder::<Int64Type>::with_capacity(len);
178        for i in (0..len).rev() {
179            builder.append_option(self.get_opt(i));
180        }
181        self.0 = builder.finish();
182    }
183
184    /// Extract a sub-range as a new `Integer`.
185    pub fn slice(&self, offset: usize, length: usize) -> Integer {
186        Integer(self.0.slice(offset, length))
187    }
188
189    /// Access the underlying Arrow array.
190    #[inline]
191    pub fn arrow_array(&self) -> &Int64Array {
192        &self.0
193    }
194
195    /// Create a new `Integer` of `len` elements, all NA.
196    pub fn new_na(len: usize) -> Self {
197        let mut builder = PrimitiveBuilder::<Int64Type>::with_capacity(len);
198        for _ in 0..len {
199            builder.append_null();
200        }
201        Integer(builder.finish())
202    }
203}
204
205// region: Display / Debug / PartialEq
206
207impl fmt::Debug for Integer {
208    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
209        write!(f, "Integer(")?;
210        f.debug_list().entries(self.0.iter()).finish()?;
211        write!(f, ")")
212    }
213}
214
215impl PartialEq for Integer {
216    fn eq(&self, other: &Self) -> bool {
217        if self.len() != other.len() {
218            return false;
219        }
220        for i in 0..self.len() {
221            let a = self.get_opt(i);
222            let b = other.get_opt(i);
223            if a != b {
224                return false;
225            }
226        }
227        true
228    }
229}
230
231// endregion
232
233// region: Conversions
234
235impl From<Vec<Option<i64>>> for Integer {
236    fn from(v: Vec<Option<i64>>) -> Self {
237        Integer(Int64Array::from(v))
238    }
239}
240
241impl From<Integer> for Vec<Option<i64>> {
242    fn from(i: Integer) -> Self {
243        i.into_vec()
244    }
245}
246
247impl From<Int64Array> for Integer {
248    fn from(arr: Int64Array) -> Self {
249        Integer(arr)
250    }
251}
252
253impl FromIterator<Option<i64>> for Integer {
254    fn from_iter<I: IntoIterator<Item = Option<i64>>>(iter: I) -> Self {
255        let arr: Int64Array = iter.into_iter().collect();
256        Integer(arr)
257    }
258}
259
260// endregion