use std::fs;
use std::fs::metadata;
use std::sync::{Arc, Mutex};
use crate::error::{ExecutionError, Result};
use crate::execution::physical_plan::BatchIterator;
use arrow::datatypes::Schema;
use arrow::record_batch::RecordBatch;
pub struct RecordBatchIterator {
schema: Arc<Schema>,
batches: Vec<Arc<RecordBatch>>,
index: usize,
}
impl RecordBatchIterator {
pub fn new(schema: Arc<Schema>, batches: Vec<Arc<RecordBatch>>) -> Self {
RecordBatchIterator {
schema,
index: 0,
batches,
}
}
}
impl BatchIterator for RecordBatchIterator {
fn schema(&self) -> Arc<Schema> {
self.schema.clone()
}
fn next(&mut self) -> Result<Option<RecordBatch>> {
if self.index < self.batches.len() {
self.index += 1;
Ok(Some(self.batches[self.index - 1].as_ref().clone()))
} else {
Ok(None)
}
}
}
pub fn collect(it: Arc<Mutex<dyn BatchIterator>>) -> Result<Vec<RecordBatch>> {
let mut it = it.lock().unwrap();
let mut results: Vec<RecordBatch> = vec![];
loop {
match it.next() {
Ok(Some(batch)) => {
results.push(batch);
}
Ok(None) => {
return Ok(results);
}
Err(e) => return Err(e),
}
}
}
pub fn build_file_list(dir: &str, filenames: &mut Vec<String>, ext: &str) -> Result<()> {
let metadata = metadata(dir)?;
if metadata.is_file() {
if dir.ends_with(ext) {
filenames.push(dir.to_string());
}
} else {
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if let Some(path_name) = path.to_str() {
if path.is_dir() {
build_file_list(path_name, filenames, ext)?;
} else {
if path_name.ends_with(ext) {
filenames.push(path_name.to_string());
}
}
} else {
return Err(ExecutionError::General("Invalid path".to_string()));
}
}
}
Ok(())
}