use crate::schema_null_free;
use arrow::datatypes::{DataType, Schema};
use arrow::util::bit_util::{ceil, round_upto_power_of_2};
const UTF8_DEFAULT_SIZE: usize = 20;
const BINARY_DEFAULT_SIZE: usize = 100;
#[derive(Copy, Clone, Debug)]
pub enum RowType {
Compact,
WordAligned,
}
#[derive(Debug, Clone)]
pub struct RowLayout {
row_type: RowType,
pub(crate) null_free: bool,
pub(crate) null_width: usize,
pub(crate) values_width: usize,
pub(crate) field_count: usize,
pub(crate) field_offsets: Vec<usize>,
}
impl RowLayout {
pub fn new(schema: &Schema, row_type: RowType) -> Self {
assert!(
row_supported(schema, row_type),
"{:?}Row with {:?} not supported yet.",
row_type,
schema,
);
let null_free = schema_null_free(schema);
let field_count = schema.fields().len();
let null_width = if null_free {
0
} else {
match row_type {
RowType::Compact => ceil(field_count, 8),
RowType::WordAligned => round_upto_power_of_2(ceil(field_count, 8), 8),
}
};
let (field_offsets, values_width) = match row_type {
RowType::Compact => compact_offsets(null_width, schema),
RowType::WordAligned => word_aligned_offsets(null_width, schema),
};
Self {
row_type,
null_free,
null_width,
values_width,
field_count,
field_offsets,
}
}
#[inline(always)]
pub fn fixed_part_width(&self) -> usize {
self.null_width + self.values_width
}
}
fn compact_offsets(null_width: usize, schema: &Schema) -> (Vec<usize>, usize) {
let mut offsets = vec![];
let mut offset = null_width;
for f in schema.fields() {
offsets.push(offset);
offset += compact_type_width(f.data_type());
}
(offsets, offset - null_width)
}
fn var_length(dt: &DataType) -> bool {
use DataType::*;
matches!(dt, Utf8 | Binary)
}
fn compact_type_width(dt: &DataType) -> usize {
use DataType::*;
if var_length(dt) {
return std::mem::size_of::<u64>();
}
match dt {
Boolean | UInt8 | Int8 => 1,
UInt16 | Int16 => 2,
UInt32 | Int32 | Float32 | Date32 => 4,
UInt64 | Int64 | Float64 | Date64 => 8,
_ => unreachable!(),
}
}
fn word_aligned_offsets(null_width: usize, schema: &Schema) -> (Vec<usize>, usize) {
let mut offsets = vec![];
let mut offset = null_width;
for f in schema.fields() {
offsets.push(offset);
assert!(!matches!(f.data_type(), DataType::Decimal128(_, _)));
offset += 8;
}
(offsets, offset - null_width)
}
pub(crate) fn estimate_row_width(schema: &Schema, layout: &RowLayout) -> usize {
let mut width = layout.fixed_part_width();
if matches!(layout.row_type, RowType::WordAligned) {
return width;
}
for f in schema.fields() {
match f.data_type() {
DataType::Utf8 => width += UTF8_DEFAULT_SIZE,
DataType::Binary => width += BINARY_DEFAULT_SIZE,
_ => {}
}
}
round_upto_power_of_2(width, 8)
}
pub fn row_supported(schema: &Schema, row_type: RowType) -> bool {
schema
.fields()
.iter()
.all(|f| supported_type(f.data_type(), row_type))
}
fn supported_type(dt: &DataType, row_type: RowType) -> bool {
use DataType::*;
match row_type {
RowType::Compact => {
matches!(
dt,
Boolean
| UInt8
| UInt16
| UInt32
| UInt64
| Int8
| Int16
| Int32
| Int64
| Float32
| Float64
| Date32
| Date64
| Utf8
| Binary
)
}
RowType::WordAligned => {
matches!(
dt,
Boolean
| UInt8
| UInt16
| UInt32
| UInt64
| Int8
| Int16
| Int32
| Int64
| Float32
| Float64
| Date32
| Date64
)
}
}
}