Struct datafusion::common::arrow::array::GenericByteViewArray
source · pub struct GenericByteViewArray<T>where
T: ByteViewType + ?Sized,{ /* private fields */ }
Expand description
Variable-size Binary View Layout: An array of variable length bytes view arrays.
Different than crate::GenericByteArray
as it stores both an offset and length
meaning that take / filter operations can be implemented without copying the underlying data.
See StringViewArray
for storing utf8 encoded string data and
BinaryViewArray
for storing bytes.
A GenericByteViewArray
stores variable length byte strings. An array of
N
elements is stored as N
fixed length “views” and a variable number
of variable length “buffers”.
Each view is a u128
value layout is different depending on the
length of the string stored at that location:
┌──────┬────────────────────────┐
│length│ string value │
Strings (len <= 12) │ │ (padded with 0) │
└──────┴────────────────────────┘
0 31 127
┌───────┬───────┬───────┬───────┐
│length │prefix │ buf │offset │
Strings (len > 12) │ │ │ index │ │
└───────┴───────┴───────┴───────┘
0 31 63 95 127
-
Strings with length <= 12 are stored directly in the view.
-
Strings with length > 12: The first four bytes are stored inline in the view and the entire string is stored in one of the buffers.
Unlike GenericByteArray
, there are no constraints on the offsets other
than they must point into a valid buffer. However, they can be out of order,
non continuous and overlapping.
For example, in the following diagram, the strings “FishWasInTownToday” and “CrumpleFacedFish” are both longer than 12 bytes and thus are stored in a separate buffer while the string “LavaMonster” is stored inlined in the view. In this case, the same bytes for “Fish” are used to store both strings.
┌───┐
┌──────┬──────┬──────┬──────┐ offset │...│
"FishWasInTownTodayYay" │ 21 │ Fish │ 0 │ 115 │─ ─ 103 │Mr.│
└──────┴──────┴──────┴──────┘ │ ┌ ─ ─ ─ ─ ▶ │Cru│
┌──────┬──────┬──────┬──────┐ │mpl│
"CrumpleFacedFish" │ 16 │ Crum │ 0 │ 103 │─ ─│─ ─ ─ ┘ │eFa│
└──────┴──────┴──────┴──────┘ │ced│
┌──────┬────────────────────┐ └ ─ ─ ─ ─ ─ ─ ─ ─ ▶│Fis│
"LavaMonster" │ 11 │ LavaMonster\0 │ │hWa│
└──────┴────────────────────┘ offset │sIn│
115 │Tow│
│nTo│
│day│
u128 "views" │Yay│
buffer 0 │...│
└───┘
Implementations§
source§impl<T> GenericByteViewArray<T>where
T: ByteViewType + ?Sized,
impl<T> GenericByteViewArray<T>where
T: ByteViewType + ?Sized,
sourcepub fn new(
views: ScalarBuffer<u128>,
buffers: Vec<Buffer>,
nulls: Option<NullBuffer>,
) -> GenericByteViewArray<T>
pub fn new( views: ScalarBuffer<u128>, buffers: Vec<Buffer>, nulls: Option<NullBuffer>, ) -> GenericByteViewArray<T>
Create a new GenericByteViewArray
from the provided parts, panicking on failure
§Panics
Panics if GenericByteViewArray::try_new
returns an error
sourcepub fn try_new(
views: ScalarBuffer<u128>,
buffers: Vec<Buffer>,
nulls: Option<NullBuffer>,
) -> Result<GenericByteViewArray<T>, ArrowError>
pub fn try_new( views: ScalarBuffer<u128>, buffers: Vec<Buffer>, nulls: Option<NullBuffer>, ) -> Result<GenericByteViewArray<T>, ArrowError>
Create a new GenericByteViewArray
from the provided parts, returning an error on failure
§Errors
views.len() != nulls.len()
- ByteViewType::validate fails
sourcepub unsafe fn new_unchecked(
views: ScalarBuffer<u128>,
buffers: Vec<Buffer>,
nulls: Option<NullBuffer>,
) -> GenericByteViewArray<T>
pub unsafe fn new_unchecked( views: ScalarBuffer<u128>, buffers: Vec<Buffer>, nulls: Option<NullBuffer>, ) -> GenericByteViewArray<T>
Create a new GenericByteViewArray
from the provided parts, without validation
§Safety
Safe if Self::try_new
would not error
sourcepub fn new_null(len: usize) -> GenericByteViewArray<T>
pub fn new_null(len: usize) -> GenericByteViewArray<T>
Create a new GenericByteViewArray
of length len
where all values are null
sourcepub fn new_scalar(
value: impl AsRef<<T as ByteViewType>::Native>,
) -> Scalar<GenericByteViewArray<T>>
pub fn new_scalar( value: impl AsRef<<T as ByteViewType>::Native>, ) -> Scalar<GenericByteViewArray<T>>
Create a new Scalar
from value
sourcepub fn from_iter_values<Ptr, I>(iter: I) -> GenericByteViewArray<T>
pub fn from_iter_values<Ptr, I>(iter: I) -> GenericByteViewArray<T>
Creates a GenericByteViewArray
based on an iterator of values without nulls
sourcepub fn into_parts(self) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>)
pub fn into_parts(self) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>)
Deconstruct this array into its constituent parts
sourcepub fn views(&self) -> &ScalarBuffer<u128>
pub fn views(&self) -> &ScalarBuffer<u128>
Returns the views buffer
sourcepub fn data_buffers(&self) -> &[Buffer]
pub fn data_buffers(&self) -> &[Buffer]
Returns the buffers storing string data
sourcepub fn value(&self, i: usize) -> &<T as ByteViewType>::Native
pub fn value(&self, i: usize) -> &<T as ByteViewType>::Native
sourcepub unsafe fn value_unchecked(&self, idx: usize) -> &<T as ByteViewType>::Native
pub unsafe fn value_unchecked(&self, idx: usize) -> &<T as ByteViewType>::Native
Returns the element at index i
§Safety
Caller is responsible for ensuring that the index is within the bounds of the array
sourcepub unsafe fn inline_value(view: &u128, len: usize) -> &[u8] ⓘ
pub unsafe fn inline_value(view: &u128, len: usize) -> &[u8] ⓘ
Returns the inline value of the view.
§Safety
- The
view
must be a valid element fromSelf::views()
that adheres to the view layout. - The
len
must be the length of the inlined value. It should never be larger than 12.
sourcepub fn iter(&self) -> ArrayIter<&GenericByteViewArray<T>> ⓘ
pub fn iter(&self) -> ArrayIter<&GenericByteViewArray<T>> ⓘ
constructs a new iterator
sourcepub fn slice(&self, offset: usize, length: usize) -> GenericByteViewArray<T>
pub fn slice(&self, offset: usize, length: usize) -> GenericByteViewArray<T>
Returns a zero-copy slice of this array with the indicated offset and length.
sourcepub fn gc(&self) -> GenericByteViewArray<T>
pub fn gc(&self) -> GenericByteViewArray<T>
Returns a “compacted” version of this array
The original array will not be modified
§Garbage Collection
Before GC:
┌──────┐
│......│
│......│
┌────────────────────┐ ┌ ─ ─ ─ ▶ │Data1 │ Large buffer
│ View 1 │─ ─ ─ ─ │......│ with data that
├────────────────────┤ │......│ is not referred
│ View 2 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data2 │ to by View 1 or
└────────────────────┘ │......│ View 2
│......│
2 views, refer to │......│
small portions of a └──────┘
large buffer
After GC:
┌────────────────────┐ ┌─────┐ After gc, only
│ View 1 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data1│ data that is
├────────────────────┤ ┌ ─ ─ ─ ▶ │Data2│ pointed to by
│ View 2 │─ ─ ─ ─ └─────┘ the views is
└────────────────────┘ left
2 views
This method will compact the data buffers by recreating the view array and only include the data that is pointed to by the views.
Note that it will copy the array regardless of whether the original array is compact. Use with caution as this can be an expensive operation, only use it when you are sure that the view array is significantly smaller than when it is originally created, e.g., after filtering or slicing.
source§impl GenericByteViewArray<BinaryViewType>
impl GenericByteViewArray<BinaryViewType>
sourcepub fn to_string_view(
self,
) -> Result<GenericByteViewArray<StringViewType>, ArrowError>
pub fn to_string_view( self, ) -> Result<GenericByteViewArray<StringViewType>, ArrowError>
Convert the BinaryViewArray
to StringViewArray
If items not utf8 data, validate will fail and error returned.
sourcepub unsafe fn to_string_view_unchecked(
self,
) -> GenericByteViewArray<StringViewType>
pub unsafe fn to_string_view_unchecked( self, ) -> GenericByteViewArray<StringViewType>
Convert the BinaryViewArray
to StringViewArray
§Safety
Caller is responsible for ensuring that items in array are utf8 data.
source§impl GenericByteViewArray<StringViewType>
impl GenericByteViewArray<StringViewType>
sourcepub fn to_binary_view(self) -> GenericByteViewArray<BinaryViewType>
pub fn to_binary_view(self) -> GenericByteViewArray<BinaryViewType>
Convert the StringViewArray
to BinaryViewArray
Trait Implementations§
source§impl<T> Array for GenericByteViewArray<T>where
T: ByteViewType + ?Sized,
impl<T> Array for GenericByteViewArray<T>where
T: ByteViewType + ?Sized,
source§fn slice(&self, offset: usize, length: usize) -> Arc<dyn Array>
fn slice(&self, offset: usize, length: usize) -> Arc<dyn Array>
source§fn offset(&self) -> usize
fn offset(&self) -> usize
0
. Read moresource§fn nulls(&self) -> Option<&NullBuffer>
fn nulls(&self) -> Option<&NullBuffer>
source§fn get_buffer_memory_size(&self) -> usize
fn get_buffer_memory_size(&self) -> usize
source§fn get_array_memory_size(&self) -> usize
fn get_array_memory_size(&self) -> usize
get_buffer_memory_size()
and
includes the overhead of the data structures that contain the pointers to the various buffers.source§fn logical_nulls(&self) -> Option<NullBuffer>
fn logical_nulls(&self) -> Option<NullBuffer>
NullBuffer
that represents the logical
null values of this array, if any. Read moresource§fn null_count(&self) -> usize
fn null_count(&self) -> usize
source§fn is_nullable(&self) -> bool
fn is_nullable(&self) -> bool
false
if the array is guaranteed to not contain any logical nulls Read moresource§impl<'a, T> ArrayAccessor for &'a GenericByteViewArray<T>where
T: ByteViewType + ?Sized,
impl<'a, T> ArrayAccessor for &'a GenericByteViewArray<T>where
T: ByteViewType + ?Sized,
§type Item = &'a <T as ByteViewType>::Native
type Item = &'a <T as ByteViewType>::Native
source§fn value(
&self,
index: usize,
) -> <&'a GenericByteViewArray<T> as ArrayAccessor>::Item
fn value( &self, index: usize, ) -> <&'a GenericByteViewArray<T> as ArrayAccessor>::Item
i
Read moresource§unsafe fn value_unchecked(
&self,
index: usize,
) -> <&'a GenericByteViewArray<T> as ArrayAccessor>::Item
unsafe fn value_unchecked( &self, index: usize, ) -> <&'a GenericByteViewArray<T> as ArrayAccessor>::Item
i
Read moresource§impl<T> Clone for GenericByteViewArray<T>where
T: ByteViewType + ?Sized,
impl<T> Clone for GenericByteViewArray<T>where
T: ByteViewType + ?Sized,
source§fn clone(&self) -> GenericByteViewArray<T>
fn clone(&self) -> GenericByteViewArray<T>
1.0.0 · source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read moresource§impl<T> Debug for GenericByteViewArray<T>where
T: ByteViewType + ?Sized,
impl<T> Debug for GenericByteViewArray<T>where
T: ByteViewType + ?Sized,
source§impl<FROM, V> From<&GenericByteArray<FROM>> for GenericByteViewArray<V>where
FROM: ByteArrayType,
<FROM as ByteArrayType>::Offset: OffsetSizeTrait + ToPrimitive,
V: ByteViewType<Native = <FROM as ByteArrayType>::Native>,
impl<FROM, V> From<&GenericByteArray<FROM>> for GenericByteViewArray<V>where
FROM: ByteArrayType,
<FROM as ByteArrayType>::Offset: OffsetSizeTrait + ToPrimitive,
V: ByteViewType<Native = <FROM as ByteArrayType>::Native>,
Convert a GenericByteArray
to a GenericByteViewArray
but in a smart way:
If the offsets are all less than u32::MAX, then we directly build the view array on top of existing buffer.
source§fn from(byte_array: &GenericByteArray<FROM>) -> GenericByteViewArray<V>
fn from(byte_array: &GenericByteArray<FROM>) -> GenericByteViewArray<V>
source§impl<T> From<ArrayData> for GenericByteViewArray<T>where
T: ByteViewType + ?Sized,
impl<T> From<ArrayData> for GenericByteViewArray<T>where
T: ByteViewType + ?Sized,
source§fn from(value: ArrayData) -> GenericByteViewArray<T>
fn from(value: ArrayData) -> GenericByteViewArray<T>
source§impl<T> From<GenericByteViewArray<T>> for ArrayDatawhere
T: ByteViewType + ?Sized,
impl<T> From<GenericByteViewArray<T>> for ArrayDatawhere
T: ByteViewType + ?Sized,
source§fn from(array: GenericByteViewArray<T>) -> ArrayData
fn from(array: GenericByteViewArray<T>) -> ArrayData
source§impl From<Vec<&[u8]>> for GenericByteViewArray<BinaryViewType>
impl From<Vec<&[u8]>> for GenericByteViewArray<BinaryViewType>
source§fn from(v: Vec<&[u8]>) -> GenericByteViewArray<BinaryViewType>
fn from(v: Vec<&[u8]>) -> GenericByteViewArray<BinaryViewType>
source§impl From<Vec<&str>> for GenericByteViewArray<StringViewType>
impl From<Vec<&str>> for GenericByteViewArray<StringViewType>
source§fn from(v: Vec<&str>) -> GenericByteViewArray<StringViewType>
fn from(v: Vec<&str>) -> GenericByteViewArray<StringViewType>
source§impl From<Vec<Option<&[u8]>>> for GenericByteViewArray<BinaryViewType>
impl From<Vec<Option<&[u8]>>> for GenericByteViewArray<BinaryViewType>
source§fn from(v: Vec<Option<&[u8]>>) -> GenericByteViewArray<BinaryViewType>
fn from(v: Vec<Option<&[u8]>>) -> GenericByteViewArray<BinaryViewType>
source§impl From<Vec<Option<&str>>> for GenericByteViewArray<StringViewType>
impl From<Vec<Option<&str>>> for GenericByteViewArray<StringViewType>
source§fn from(v: Vec<Option<&str>>) -> GenericByteViewArray<StringViewType>
fn from(v: Vec<Option<&str>>) -> GenericByteViewArray<StringViewType>
source§impl From<Vec<Option<String>>> for GenericByteViewArray<StringViewType>
impl From<Vec<Option<String>>> for GenericByteViewArray<StringViewType>
source§fn from(v: Vec<Option<String>>) -> GenericByteViewArray<StringViewType>
fn from(v: Vec<Option<String>>) -> GenericByteViewArray<StringViewType>
source§impl From<Vec<String>> for GenericByteViewArray<StringViewType>
impl From<Vec<String>> for GenericByteViewArray<StringViewType>
source§fn from(v: Vec<String>) -> GenericByteViewArray<StringViewType>
fn from(v: Vec<String>) -> GenericByteViewArray<StringViewType>
source§impl<'a, Ptr, T> FromIterator<&'a Option<Ptr>> for GenericByteViewArray<T>
impl<'a, Ptr, T> FromIterator<&'a Option<Ptr>> for GenericByteViewArray<T>
source§fn from_iter<I>(iter: I) -> GenericByteViewArray<T>where
I: IntoIterator<Item = &'a Option<Ptr>>,
fn from_iter<I>(iter: I) -> GenericByteViewArray<T>where
I: IntoIterator<Item = &'a Option<Ptr>>,
source§impl<Ptr, T> FromIterator<Option<Ptr>> for GenericByteViewArray<T>
impl<Ptr, T> FromIterator<Option<Ptr>> for GenericByteViewArray<T>
source§fn from_iter<I>(iter: I) -> GenericByteViewArray<T>where
I: IntoIterator<Item = Option<Ptr>>,
fn from_iter<I>(iter: I) -> GenericByteViewArray<T>where
I: IntoIterator<Item = Option<Ptr>>,
source§impl<'a, T> IntoIterator for &'a GenericByteViewArray<T>where
T: ByteViewType + ?Sized,
impl<'a, T> IntoIterator for &'a GenericByteViewArray<T>where
T: ByteViewType + ?Sized,
§type Item = Option<&'a <T as ByteViewType>::Native>
type Item = Option<&'a <T as ByteViewType>::Native>
§type IntoIter = ArrayIter<&'a GenericByteViewArray<T>>
type IntoIter = ArrayIter<&'a GenericByteViewArray<T>>
source§fn into_iter(self) -> <&'a GenericByteViewArray<T> as IntoIterator>::IntoIter
fn into_iter(self) -> <&'a GenericByteViewArray<T> as IntoIterator>::IntoIter
Auto Trait Implementations§
impl<T> Freeze for GenericByteViewArray<T>where
T: ?Sized,
impl<T> RefUnwindSafe for GenericByteViewArray<T>where
T: RefUnwindSafe + ?Sized,
impl<T> Send for GenericByteViewArray<T>where
T: ?Sized,
impl<T> Sync for GenericByteViewArray<T>where
T: ?Sized,
impl<T> Unpin for GenericByteViewArray<T>
impl<T> UnwindSafe for GenericByteViewArray<T>where
T: UnwindSafe + ?Sized,
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
source§default unsafe fn clone_to_uninit(&self, dst: *mut T)
default unsafe fn clone_to_uninit(&self, dst: *mut T)
clone_to_uninit
)source§impl<T> IntoEither for T
impl<T> IntoEither for T
source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moresource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more