extern crate criterion;
use arrow::array::{ArrayRef, Int64Array, OffsetSizeTrait};
use arrow::datatypes::{DataType, Field};
use arrow::util::bench_util::{
create_string_array_with_len, create_string_view_array_with_len,
};
use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode};
use datafusion_common::DataFusionError;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
use datafusion_functions::string;
use std::sync::Arc;
use std::time::Duration;
fn create_args<O: OffsetSizeTrait>(
size: usize,
str_len: usize,
repeat_times: i64,
force_view_types: bool,
) -> Vec<ColumnarValue> {
let number_array = Arc::new(Int64Array::from(
(0..size).map(|_| repeat_times).collect::<Vec<_>>(),
));
if force_view_types {
let string_array =
Arc::new(create_string_view_array_with_len(size, 0.1, str_len, false));
vec![
ColumnarValue::Array(string_array),
ColumnarValue::Array(number_array),
]
} else {
let string_array =
Arc::new(create_string_array_with_len::<O>(size, 0.1, str_len));
vec![
ColumnarValue::Array(string_array),
ColumnarValue::Array(Arc::clone(&number_array) as ArrayRef),
]
}
}
fn invoke_repeat_with_args(
args: Vec<ColumnarValue>,
repeat_times: i64,
) -> Result<ColumnarValue, DataFusionError> {
let arg_fields = args
.iter()
.enumerate()
.map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into())
.collect::<Vec<_>>();
string::repeat().invoke_with_args(ScalarFunctionArgs {
args,
arg_fields,
number_rows: repeat_times as usize,
return_field: Field::new("f", DataType::Utf8, true).into(),
})
}
fn criterion_benchmark(c: &mut Criterion) {
for size in [1024, 4096] {
let repeat_times = 3;
let mut group = c.benchmark_group(format!("repeat {repeat_times} times"));
group.sampling_mode(SamplingMode::Flat);
group.sample_size(10);
group.measurement_time(Duration::from_secs(10));
let args = create_args::<i32>(size, 32, repeat_times, true);
group.bench_function(
format!("repeat_string_view [size={size}, repeat_times={repeat_times}]"),
|b| {
b.iter(|| {
let args_cloned = args.clone();
black_box(invoke_repeat_with_args(args_cloned, repeat_times))
})
},
);
let args = create_args::<i32>(size, 32, repeat_times, false);
group.bench_function(
format!("repeat_string [size={size}, repeat_times={repeat_times}]"),
|b| {
b.iter(|| {
let args_cloned = args.clone();
black_box(invoke_repeat_with_args(args_cloned, repeat_times))
})
},
);
let args = create_args::<i64>(size, 32, repeat_times, false);
group.bench_function(
format!("repeat_large_string [size={size}, repeat_times={repeat_times}]"),
|b| {
b.iter(|| {
let args_cloned = args.clone();
black_box(invoke_repeat_with_args(args_cloned, repeat_times))
})
},
);
group.finish();
let repeat_times = 30;
let mut group = c.benchmark_group(format!("repeat {repeat_times} times"));
group.sampling_mode(SamplingMode::Flat);
group.sample_size(10);
group.measurement_time(Duration::from_secs(10));
let args = create_args::<i32>(size, 32, repeat_times, true);
group.bench_function(
format!("repeat_string_view [size={size}, repeat_times={repeat_times}]"),
|b| {
b.iter(|| {
let args_cloned = args.clone();
black_box(invoke_repeat_with_args(args_cloned, repeat_times))
})
},
);
let args = create_args::<i32>(size, 32, repeat_times, false);
group.bench_function(
format!("repeat_string [size={size}, repeat_times={repeat_times}]"),
|b| {
b.iter(|| {
let args_cloned = args.clone();
black_box(invoke_repeat_with_args(args_cloned, repeat_times))
})
},
);
let args = create_args::<i64>(size, 32, repeat_times, false);
group.bench_function(
format!("repeat_large_string [size={size}, repeat_times={repeat_times}]"),
|b| {
b.iter(|| {
let args_cloned = args.clone();
black_box(invoke_repeat_with_args(args_cloned, repeat_times))
})
},
);
group.finish();
let repeat_times = 1073741824;
let mut group = c.benchmark_group(format!("repeat {repeat_times} times"));
group.sampling_mode(SamplingMode::Flat);
group.sample_size(10);
group.measurement_time(Duration::from_secs(10));
let args = create_args::<i32>(size, 2, repeat_times, false);
group.bench_function(
format!("repeat_string overflow [size={size}, repeat_times={repeat_times}]"),
|b| {
b.iter(|| {
let args_cloned = args.clone();
black_box(invoke_repeat_with_args(args_cloned, repeat_times))
})
},
);
group.finish();
}
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);