use std::rc::Rc;
use std::sync::Arc;
use arrow::csv;
use arrow::datatypes::Schema;
use arrow::record_batch::RecordBatch;
use super::error::Result;
pub trait DataSource {
fn schema(&self) -> &Arc<Schema>;
fn next(&mut self) -> Result<Option<RecordBatch>>;
}
pub struct CsvDataSource {
schema: Arc<Schema>,
reader: csv::Reader,
}
impl CsvDataSource {
pub fn new(schema: Arc<Schema>, reader: csv::Reader) -> Self {
Self { schema, reader }
}
}
impl DataSource for CsvDataSource {
fn schema(&self) -> &Arc<Schema> {
&self.schema
}
fn next(&mut self) -> Result<Option<RecordBatch>> {
Ok(self.reader.next()?)
}
}
#[derive(Serialize, Deserialize, Clone)]
pub enum DataSourceMeta {
CsvFile {
filename: String,
schema: Rc<Schema>,
has_header: bool,
projection: Option<Vec<usize>>,
},
ParquetFile {
filename: String,
schema: Rc<Schema>,
projection: Option<Vec<usize>>,
},
}