1use std::ops::RangeBounds;
7
8#[derive(Clone, Debug)]
15pub struct BitVec {
16 words: Vec<u64>,
17 len: usize,
18}
19
20impl BitVec {
21 pub fn all_valid(len: usize) -> Self {
23 let n_words = len.div_ceil(64);
24 let mut words = vec![u64::MAX; n_words];
25 let excess = n_words * 64 - len;
27 if excess > 0 && !words.is_empty() {
28 let last = words.len() - 1;
29 words[last] = u64::MAX >> excess;
30 }
31 BitVec { words, len }
32 }
33
34 pub fn all_na(len: usize) -> Self {
36 let n_words = len.div_ceil(64);
37 BitVec {
38 words: vec![0; n_words],
39 len,
40 }
41 }
42
43 #[inline]
45 pub fn len(&self) -> usize {
46 self.len
47 }
48
49 #[inline]
51 pub fn is_empty(&self) -> bool {
52 self.len == 0
53 }
54
55 #[inline]
60 pub fn get(&self, i: usize) -> bool {
61 assert!(
62 i < self.len,
63 "BitVec::get: index {i} out of bounds (len {})",
64 self.len
65 );
66 let word = self.words[i / 64];
67 (word >> (i % 64)) & 1 == 1
68 }
69
70 #[inline]
75 pub fn set(&mut self, i: usize, val: bool) {
76 assert!(
77 i < self.len,
78 "BitVec::set: index {i} out of bounds (len {})",
79 self.len
80 );
81 if val {
82 self.words[i / 64] |= 1u64 << (i % 64);
83 } else {
84 self.words[i / 64] &= !(1u64 << (i % 64));
85 }
86 }
87
88 pub fn count_zeros(&self) -> usize {
90 self.len - self.count_ones()
91 }
92
93 pub fn count_ones(&self) -> usize {
95 self.words.iter().map(|w| w.count_ones() as usize).sum()
96 }
97
98 pub fn all_set(&self) -> bool {
100 self.count_ones() == self.len
101 }
102
103 pub fn push(&mut self, val: bool) {
105 let bit_idx = self.len;
106 self.len += 1;
107 let word_idx = bit_idx / 64;
108 if word_idx >= self.words.len() {
109 self.words.push(0);
110 }
111 if val {
112 self.words[word_idx] |= 1u64 << (bit_idx % 64);
113 }
114 }
115
116 pub fn slice(&self, start: usize, end: usize) -> Self {
118 assert!(start <= end && end <= self.len);
119 let new_len = end - start;
120 let mut result = BitVec::all_na(new_len);
121 for i in 0..new_len {
122 if self.get(start + i) {
123 result.set(i, true);
124 }
125 }
126 result
127 }
128}
129
130impl PartialEq for BitVec {
131 fn eq(&self, other: &Self) -> bool {
132 if self.len != other.len {
133 return false;
134 }
135 self.words == other.words
137 }
138}
139
140impl Eq for BitVec {}
141
142#[derive(Clone, Debug)]
151pub struct NullableBuffer<T: Clone + Default> {
152 values: Vec<T>,
154 validity: Option<BitVec>,
157}
158
159impl<T: Clone + Default> NullableBuffer<T> {
160 pub fn new(len: usize) -> Self {
162 NullableBuffer {
163 values: vec![T::default(); len],
164 validity: Some(BitVec::all_na(len)),
165 }
166 }
167
168 pub fn from_values(values: Vec<T>) -> Self {
170 NullableBuffer {
171 values,
172 validity: None,
173 }
174 }
175
176 #[inline]
178 pub fn len(&self) -> usize {
179 self.values.len()
180 }
181
182 #[inline]
184 pub fn is_empty(&self) -> bool {
185 self.values.len() == 0
186 }
187
188 #[inline]
193 pub fn get(&self, i: usize) -> Option<&T> {
194 if self.is_na(i) {
195 None
196 } else {
197 Some(&self.values[i])
198 }
199 }
200
201 pub fn set(&mut self, i: usize, val: Option<T>) {
206 match val {
207 Some(v) => {
208 self.values[i] = v;
209 if let Some(ref mut bv) = self.validity {
210 bv.set(i, true);
211 }
212 }
214 None => {
215 self.values[i] = T::default();
216 self.ensure_validity();
217 if let Some(ref mut bv) = self.validity {
218 bv.set(i, false);
219 }
220 }
221 }
222 }
223
224 #[inline]
229 pub fn is_na(&self, i: usize) -> bool {
230 match &self.validity {
231 None => false,
232 Some(bv) => !bv.get(i),
233 }
234 }
235
236 pub fn na_count(&self) -> usize {
238 match &self.validity {
239 None => 0,
240 Some(bv) => bv.count_zeros(),
241 }
242 }
243
244 pub fn has_na(&self) -> bool {
246 match &self.validity {
247 None => false,
248 Some(bv) => !bv.all_set(),
249 }
250 }
251
252 pub fn iter(&self) -> NullableIter<'_, T> {
254 NullableIter { buf: self, pos: 0 }
255 }
256
257 pub fn into_vec(self) -> Vec<Option<T>> {
259 match self.validity {
260 None => self.values.into_iter().map(Some).collect(),
261 Some(bv) => self
262 .values
263 .into_iter()
264 .enumerate()
265 .map(|(i, v)| if bv.get(i) { Some(v) } else { None })
266 .collect(),
267 }
268 }
269
270 pub fn to_option_vec(&self) -> Vec<Option<T>> {
272 match &self.validity {
273 None => self.values.iter().cloned().map(Some).collect(),
274 Some(bv) => self
275 .values
276 .iter()
277 .enumerate()
278 .map(|(i, v)| if bv.get(i) { Some(v.clone()) } else { None })
279 .collect(),
280 }
281 }
282
283 #[inline]
285 pub fn values_slice(&self) -> &[T] {
286 &self.values
287 }
288
289 #[inline]
291 pub fn values_slice_mut(&mut self) -> &mut [T] {
292 &mut self.values
293 }
294
295 #[inline]
297 pub fn validity(&self) -> Option<&BitVec> {
298 self.validity.as_ref()
299 }
300
301 pub fn slice<R: RangeBounds<usize>>(&self, range: R) -> NullableBuffer<T> {
303 let start = match range.start_bound() {
304 std::ops::Bound::Included(&s) => s,
305 std::ops::Bound::Excluded(&s) => s + 1,
306 std::ops::Bound::Unbounded => 0,
307 };
308 let end = match range.end_bound() {
309 std::ops::Bound::Included(&e) => e + 1,
310 std::ops::Bound::Excluded(&e) => e,
311 std::ops::Bound::Unbounded => self.len(),
312 };
313 assert!(start <= end && end <= self.len());
314 let values = self.values[start..end].to_vec();
315 let validity = self.validity.as_ref().map(|bv| bv.slice(start, end));
316 let validity = match validity {
318 Some(bv) if bv.all_set() => None,
319 other => other,
320 };
321 NullableBuffer { values, validity }
322 }
323
324 fn ensure_validity(&mut self) {
326 if self.validity.is_none() {
327 self.validity = Some(BitVec::all_valid(self.len()));
328 }
329 }
330
331 pub fn reverse(&mut self) {
333 self.values.reverse();
334 if let Some(ref bv) = self.validity {
335 let len = bv.len();
337 let mut new_bv = BitVec::all_na(len);
338 for i in 0..len {
339 if bv.get(i) {
340 new_bv.set(len - 1 - i, true);
341 }
342 }
343 self.validity = Some(new_bv);
344 }
345 }
346
347 pub fn extend(&mut self, other: &NullableBuffer<T>) {
349 for i in 0..other.len() {
350 if other.is_na(i) {
351 self.push(None);
352 } else {
353 self.push(Some(other.values[i].clone()));
354 }
355 }
356 }
357
358 pub fn truncate(&mut self, len: usize) {
360 if len >= self.len() {
361 return;
362 }
363 self.values.truncate(len);
364 if let Some(ref mut bv) = self.validity {
365 let new_bv = bv.slice(0, len);
367 *bv = new_bv;
368 if bv.all_set() {
370 self.validity = None;
371 }
372 }
373 }
374
375 pub fn push(&mut self, val: Option<T>) {
377 match val {
378 Some(v) => {
379 self.values.push(v);
380 if let Some(ref mut bv) = self.validity {
381 bv.push(true);
382 }
383 }
384 None => {
385 self.ensure_validity();
387 self.values.push(T::default());
388 if let Some(ref mut bv) = self.validity {
389 bv.push(false);
390 }
391 }
392 }
393 }
394}
395
396impl<T: Clone + Default + Copy> NullableBuffer<T> {
397 #[inline]
402 pub fn get_opt(&self, i: usize) -> Option<T> {
403 if self.is_na(i) {
404 None
405 } else {
406 Some(self.values[i])
407 }
408 }
409
410 pub fn first_opt(&self) -> Option<T> {
412 if self.is_empty() {
413 None
414 } else {
415 self.get_opt(0)
416 }
417 }
418
419 pub fn iter_opt(&self) -> impl Iterator<Item = Option<T>> + Clone + '_ {
422 (0..self.len()).map(move |i| self.get_opt(i))
423 }
424
425 pub fn select_indices(&self, indices: &[usize]) -> NullableBuffer<T> {
427 let mut values = Vec::with_capacity(indices.len());
428 let mut has_na = false;
429 for &i in indices {
430 if i < self.len() {
431 if let Some(v) = self.get_opt(i) {
432 values.push(Some(v));
433 } else {
434 values.push(None);
435 has_na = true;
436 }
437 } else {
438 values.push(None);
439 has_na = true;
440 }
441 }
442 if has_na {
443 NullableBuffer::from(values)
444 } else {
445 NullableBuffer::from_values(values.into_iter().map(|v| v.unwrap_or_default()).collect())
446 }
447 }
448}
449
450#[derive(Clone)]
456pub struct NullableIter<'a, T: Clone + Default> {
457 buf: &'a NullableBuffer<T>,
458 pos: usize,
459}
460
461impl<'a, T: Clone + Default> Iterator for NullableIter<'a, T> {
462 type Item = Option<&'a T>;
463
464 fn next(&mut self) -> Option<Self::Item> {
465 if self.pos >= self.buf.len() {
466 None
467 } else {
468 let i = self.pos;
469 self.pos += 1;
470 Some(self.buf.get(i))
471 }
472 }
473
474 fn size_hint(&self) -> (usize, Option<usize>) {
475 let remaining = self.buf.len() - self.pos;
476 (remaining, Some(remaining))
477 }
478}
479
480impl<T: Clone + Default> ExactSizeIterator for NullableIter<'_, T> {}
481
482impl<T: Clone + Default> From<Vec<Option<T>>> for NullableBuffer<T> {
487 fn from(v: Vec<Option<T>>) -> Self {
488 let len = v.len();
489 let mut values = Vec::with_capacity(len);
490 let mut has_na = false;
491
492 for item in &v {
493 match item {
494 Some(val) => values.push(val.clone()),
495 None => {
496 values.push(T::default());
497 has_na = true;
498 }
499 }
500 }
501
502 let validity = if has_na {
503 let mut bv = BitVec::all_valid(len);
504 for (i, item) in v.iter().enumerate() {
505 if item.is_none() {
506 bv.set(i, false);
507 }
508 }
509 Some(bv)
510 } else {
511 None
512 };
513
514 NullableBuffer { values, validity }
515 }
516}
517
518impl<T: Clone + Default> From<NullableBuffer<T>> for Vec<Option<T>> {
519 fn from(buf: NullableBuffer<T>) -> Self {
520 buf.into_vec()
521 }
522}
523
524impl<T: Clone + Default + PartialEq> PartialEq for NullableBuffer<T> {
525 fn eq(&self, other: &Self) -> bool {
526 if self.len() != other.len() {
527 return false;
528 }
529 for i in 0..self.len() {
530 let a_na = self.is_na(i);
531 let b_na = other.is_na(i);
532 if a_na != b_na {
533 return false;
534 }
535 if !a_na && self.values[i] != other.values[i] {
536 return false;
537 }
538 }
539 true
540 }
541}
542
543pub struct NullableIntoIter<T: Clone + Default> {
549 values: std::vec::IntoIter<T>,
550 validity: Option<BitVec>,
551 pos: usize,
552}
553
554impl<T: Clone + Default> Iterator for NullableIntoIter<T> {
555 type Item = Option<T>;
556
557 fn next(&mut self) -> Option<Self::Item> {
558 let val = self.values.next()?;
559 let i = self.pos;
560 self.pos += 1;
561 match &self.validity {
562 None => Some(Some(val)),
563 Some(bv) => {
564 if bv.get(i) {
565 Some(Some(val))
566 } else {
567 Some(None)
568 }
569 }
570 }
571 }
572
573 fn size_hint(&self) -> (usize, Option<usize>) {
574 self.values.size_hint()
575 }
576}
577
578impl<T: Clone + Default> ExactSizeIterator for NullableIntoIter<T> {}
579
580impl<T: Clone + Default> IntoIterator for NullableBuffer<T> {
581 type Item = Option<T>;
582 type IntoIter = NullableIntoIter<T>;
583
584 fn into_iter(self) -> Self::IntoIter {
585 NullableIntoIter {
586 values: self.values.into_iter(),
587 validity: self.validity,
588 pos: 0,
589 }
590 }
591}
592
593impl<T: Clone + Default> FromIterator<Option<T>> for NullableBuffer<T> {
594 fn from_iter<I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
595 let v: Vec<Option<T>> = iter.into_iter().collect();
596 NullableBuffer::from(v)
597 }
598}
599
600#[cfg(test)]
603mod tests {
604 use super::*;
605
606 #[test]
609 fn bitvec_all_valid() {
610 let bv = BitVec::all_valid(100);
611 assert_eq!(bv.len(), 100);
612 assert_eq!(bv.count_ones(), 100);
613 assert_eq!(bv.count_zeros(), 0);
614 assert!(bv.all_set());
615 for i in 0..100 {
616 assert!(bv.get(i));
617 }
618 }
619
620 #[test]
621 fn bitvec_all_na() {
622 let bv = BitVec::all_na(100);
623 assert_eq!(bv.len(), 100);
624 assert_eq!(bv.count_ones(), 0);
625 assert_eq!(bv.count_zeros(), 100);
626 assert!(!bv.all_set());
627 for i in 0..100 {
628 assert!(!bv.get(i));
629 }
630 }
631
632 #[test]
633 fn bitvec_set_and_get() {
634 let mut bv = BitVec::all_na(10);
635 bv.set(0, true);
636 bv.set(5, true);
637 bv.set(9, true);
638 assert!(bv.get(0));
639 assert!(!bv.get(1));
640 assert!(bv.get(5));
641 assert!(!bv.get(6));
642 assert!(bv.get(9));
643 assert_eq!(bv.count_ones(), 3);
644 assert_eq!(bv.count_zeros(), 7);
645 }
646
647 #[test]
648 fn bitvec_push() {
649 let mut bv = BitVec::all_valid(0);
650 bv.push(true);
651 bv.push(false);
652 bv.push(true);
653 assert_eq!(bv.len(), 3);
654 assert!(bv.get(0));
655 assert!(!bv.get(1));
656 assert!(bv.get(2));
657 }
658
659 #[test]
660 fn bitvec_equality() {
661 let a = BitVec::all_valid(64);
662 let b = BitVec::all_valid(64);
663 assert_eq!(a, b);
664 let mut c = BitVec::all_valid(64);
665 c.set(0, false);
666 assert_ne!(a, c);
667 }
668
669 #[test]
670 fn bitvec_slice() {
671 let mut bv = BitVec::all_valid(10);
672 bv.set(3, false);
673 bv.set(7, false);
674 let sliced = bv.slice(2, 8);
675 assert_eq!(sliced.len(), 6);
676 assert!(sliced.get(0)); assert!(!sliced.get(1)); assert!(sliced.get(2)); assert!(!sliced.get(5)); }
681
682 #[test]
687 fn buffer_from_values_no_na() {
688 let buf = NullableBuffer::from_values(vec![1.0, 2.0, 3.0]);
689 assert_eq!(buf.len(), 3);
690 assert!(!buf.has_na());
691 assert_eq!(buf.na_count(), 0);
692 assert_eq!(buf.get(0), Some(&1.0));
693 assert_eq!(buf.get(1), Some(&2.0));
694 assert_eq!(buf.get(2), Some(&3.0));
695 }
696
697 #[test]
698 fn buffer_from_option_vec() {
699 let buf: NullableBuffer<f64> = vec![Some(1.0), None, Some(3.0)].into();
700 assert_eq!(buf.len(), 3);
701 assert!(buf.has_na());
702 assert_eq!(buf.na_count(), 1);
703 assert_eq!(buf.get(0), Some(&1.0));
704 assert_eq!(buf.get(1), None);
705 assert_eq!(buf.get(2), Some(&3.0));
706 }
707
708 #[test]
709 fn buffer_all_na() {
710 let buf: NullableBuffer<i64> = NullableBuffer::new(3);
711 assert_eq!(buf.len(), 3);
712 assert!(buf.has_na());
713 assert_eq!(buf.na_count(), 3);
714 assert_eq!(buf.get(0), None);
715 assert_eq!(buf.get(1), None);
716 assert_eq!(buf.get(2), None);
717 }
718
719 #[test]
720 fn buffer_set() {
721 let mut buf: NullableBuffer<f64> = NullableBuffer::from_values(vec![1.0, 2.0, 3.0]);
722 buf.set(1, None);
723 assert!(buf.is_na(1));
724 assert_eq!(buf.get(1), None);
725 buf.set(1, Some(42.0));
726 assert!(!buf.is_na(1));
727 assert_eq!(buf.get(1), Some(&42.0));
728 }
729
730 #[test]
731 fn buffer_iter() {
732 let buf: NullableBuffer<i64> = vec![Some(10), None, Some(30)].into();
733 let collected: Vec<Option<&i64>> = buf.iter().collect();
734 assert_eq!(collected, vec![Some(&10), None, Some(&30)]);
735 }
736
737 #[test]
738 fn buffer_into_vec() {
739 let buf: NullableBuffer<f64> = vec![Some(1.0), None, Some(3.0)].into();
740 let v: Vec<Option<f64>> = buf.into_vec();
741 assert_eq!(v, vec![Some(1.0), None, Some(3.0)]);
742 }
743
744 #[test]
745 fn buffer_into_vec_no_na() {
746 let buf = NullableBuffer::from_values(vec![1.0, 2.0]);
747 let v: Vec<Option<f64>> = buf.into_vec();
748 assert_eq!(v, vec![Some(1.0), Some(2.0)]);
749 }
750
751 #[test]
752 fn buffer_get_opt() {
753 let buf: NullableBuffer<i64> = vec![Some(10), None, Some(30)].into();
754 assert_eq!(buf.get_opt(0), Some(10));
755 assert_eq!(buf.get_opt(1), None);
756 assert_eq!(buf.get_opt(2), Some(30));
757 }
758
759 #[test]
760 fn buffer_first_opt() {
761 let buf: NullableBuffer<f64> = vec![Some(1.0), None].into();
762 assert_eq!(buf.first_opt(), Some(1.0));
763
764 let buf2: NullableBuffer<f64> = vec![None, Some(2.0)].into();
765 assert_eq!(buf2.first_opt(), None);
766
767 let buf3: NullableBuffer<f64> = NullableBuffer::from_values(vec![]);
768 assert_eq!(buf3.first_opt(), None);
769 }
770
771 #[test]
772 fn buffer_slice() {
773 let buf: NullableBuffer<i64> = vec![Some(1), Some(2), None, Some(4), Some(5)].into();
774 let sub = buf.slice(1..4);
775 assert_eq!(sub.len(), 3);
776 assert_eq!(sub.get_opt(0), Some(2));
777 assert_eq!(sub.get_opt(1), None);
778 assert_eq!(sub.get_opt(2), Some(4));
779 }
780
781 #[test]
782 fn buffer_push() {
783 let mut buf = NullableBuffer::<i64>::from_values(vec![1, 2]);
784 buf.push(Some(3));
785 buf.push(None);
786 assert_eq!(buf.len(), 4);
787 assert_eq!(buf.get_opt(2), Some(3));
788 assert_eq!(buf.get_opt(3), None);
789 }
790
791 #[test]
792 fn buffer_equality() {
793 let a: NullableBuffer<f64> = vec![Some(1.0), None, Some(3.0)].into();
794 let b: NullableBuffer<f64> = vec![Some(1.0), None, Some(3.0)].into();
795 assert_eq!(a, b);
796
797 let c = NullableBuffer::from_values(vec![1.0, 2.0]);
798 let d = NullableBuffer::from_values(vec![1.0, 2.0]);
799 assert_eq!(c, d);
800
801 assert_ne!(a, c);
802 }
803
804 #[test]
805 fn buffer_into_iter() {
806 let buf: NullableBuffer<i64> = vec![Some(1), None, Some(3)].into();
807 let collected: Vec<Option<i64>> = buf.into_iter().collect();
808 assert_eq!(collected, vec![Some(1), None, Some(3)]);
809 }
810
811 #[test]
812 fn buffer_from_iter() {
813 let buf: NullableBuffer<f64> = vec![Some(1.0), None].into_iter().collect();
814 assert_eq!(buf.len(), 2);
815 assert_eq!(buf.get_opt(0), Some(1.0));
816 assert_eq!(buf.get_opt(1), None);
817 }
818
819 #[test]
820 fn buffer_select_indices() {
821 let buf: NullableBuffer<i64> = vec![Some(10), Some(20), None, Some(40)].into();
822 let selected = buf.select_indices(&[3, 0, 2, 99]);
823 assert_eq!(selected.len(), 4);
824 assert_eq!(selected.get_opt(0), Some(40));
825 assert_eq!(selected.get_opt(1), Some(10));
826 assert_eq!(selected.get_opt(2), None); assert_eq!(selected.get_opt(3), None); }
829
830 }