use std::fmt::Display;
use arrow::datatypes::DataType;
use crate::ScalarValue;
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct Statistics {
pub num_rows: Option<usize>,
pub total_byte_size: Option<usize>,
pub column_statistics: Option<Vec<ColumnStatistics>>,
pub is_exact: bool,
}
impl Display for Statistics {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.num_rows.is_none() && self.total_byte_size.is_none() && !self.is_exact {
return Ok(());
}
let rows = self
.num_rows
.map_or_else(|| "None".to_string(), |v| v.to_string());
let bytes = self
.total_byte_size
.map_or_else(|| "None".to_string(), |v| v.to_string());
write!(f, "rows={}, bytes={}, exact={}", rows, bytes, self.is_exact)?;
Ok(())
}
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct ColumnStatistics {
pub null_count: Option<usize>,
pub max_value: Option<ScalarValue>,
pub min_value: Option<ScalarValue>,
pub distinct_count: Option<usize>,
}
impl ColumnStatistics {
pub fn is_singleton(&self) -> bool {
match (&self.min_value, &self.max_value) {
(Some(min), Some(max)) => !min.is_null() && !max.is_null() && (min == max),
(_, _) => false,
}
}
pub fn new_with_unbounded_column(dt: &DataType) -> ColumnStatistics {
let null = ScalarValue::try_from(dt.clone()).ok();
ColumnStatistics {
null_count: None,
max_value: null.clone(),
min_value: null,
distinct_count: None,
}
}
}