extern crate criterion;
use arrow::array::builder::StringBuilder;
use arrow::array::{ArrayRef, AsArray, StringArray};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_functions::regex::regexplike::regexp_like;
use datafusion_functions::regex::regexpmatch::regexp_match;
use datafusion_functions::regex::regexpreplace::regexp_replace;
use rand::distributions::Alphanumeric;
use rand::rngs::ThreadRng;
use rand::seq::SliceRandom;
use rand::Rng;
use std::iter;
use std::sync::Arc;
fn data(rng: &mut ThreadRng) -> StringArray {
let mut data: Vec<String> = vec![];
for _ in 0..1000 {
data.push(
rng.sample_iter(&Alphanumeric)
.take(7)
.map(char::from)
.collect(),
);
}
StringArray::from(data)
}
fn regex(rng: &mut ThreadRng) -> StringArray {
let samples = [
".*([A-Z]{1}).*".to_string(),
"^(A).*".to_string(),
r#"[\p{Letter}-]+"#.to_string(),
r#"[\p{L}-]+"#.to_string(),
"[a-zA-Z]_[a-zA-Z]{2}".to_string(),
];
let mut data: Vec<String> = vec![];
for _ in 0..1000 {
data.push(samples.choose(rng).unwrap().to_string());
}
StringArray::from(data)
}
fn flags(rng: &mut ThreadRng) -> StringArray {
let samples = [Some("i".to_string()), Some("im".to_string()), None];
let mut sb = StringBuilder::new();
for _ in 0..1000 {
let sample = samples.choose(rng).unwrap();
if sample.is_some() {
sb.append_value(sample.clone().unwrap());
} else {
sb.append_null();
}
}
sb.finish()
}
fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("regexp_like_1000", |b| {
let mut rng = rand::thread_rng();
let data = Arc::new(data(&mut rng)) as ArrayRef;
let regex = Arc::new(regex(&mut rng)) as ArrayRef;
let flags = Arc::new(flags(&mut rng)) as ArrayRef;
b.iter(|| {
black_box(
regexp_like::<i32>(&[
Arc::clone(&data),
Arc::clone(®ex),
Arc::clone(&flags),
])
.expect("regexp_like should work on valid values"),
)
})
});
c.bench_function("regexp_match_1000", |b| {
let mut rng = rand::thread_rng();
let data = Arc::new(data(&mut rng)) as ArrayRef;
let regex = Arc::new(regex(&mut rng)) as ArrayRef;
let flags = Arc::new(flags(&mut rng)) as ArrayRef;
b.iter(|| {
black_box(
regexp_match::<i32>(&[
Arc::clone(&data),
Arc::clone(®ex),
Arc::clone(&flags),
])
.expect("regexp_match should work on valid values"),
)
})
});
c.bench_function("regexp_replace_1000", |b| {
let mut rng = rand::thread_rng();
let data = Arc::new(data(&mut rng)) as ArrayRef;
let regex = Arc::new(regex(&mut rng)) as ArrayRef;
let flags = Arc::new(flags(&mut rng)) as ArrayRef;
let replacement =
Arc::new(StringArray::from_iter_values(iter::repeat("XX").take(1000)))
as ArrayRef;
b.iter(|| {
black_box(
regexp_replace::<i32, _, _>(
data.as_string::<i32>(),
regex.as_string::<i32>(),
replacement.as_string::<i32>(),
Some(&flags),
)
.expect("regexp_replace should work on valid values"),
)
})
});
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);