criterion/benchmark_group.rs
1use crate::analysis;
2use crate::benchmark::PartialBenchmarkConfig;
3use crate::connection::OutgoingMessage;
4use crate::measurement::Measurement;
5use crate::report::BenchmarkId as InternalBenchmarkId;
6use crate::report::Report;
7use crate::report::ReportContext;
8use crate::routine::{Function, Routine};
9use crate::{Bencher, Criterion, Mode, PlotConfiguration, SamplingMode, Throughput};
10use std::time::Duration;
11
12/// Structure used to group together a set of related benchmarks, along with custom configuration
13/// settings for groups of benchmarks. All benchmarks performed using a benchmark group will be
14/// grouped together in the final report.
15///
16/// # Examples:
17///
18/// ```no_run
19/// use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
20/// use std::time::Duration;
21///
22/// fn bench_simple(c: &mut Criterion) {
23/// let mut group = c.benchmark_group("My Group");
24///
25/// // Now we can perform benchmarks with this group
26/// group.bench_function("Bench 1", |b| b.iter(|| 1 ));
27/// group.bench_function("Bench 2", |b| b.iter(|| 2 ));
28///
29/// // It's recommended to call group.finish() explicitly at the end, but if you don't it will
30/// // be called automatically when the group is dropped.
31/// group.finish();
32/// }
33///
34/// fn bench_nested(c: &mut Criterion) {
35/// let mut group = c.benchmark_group("My Second Group");
36/// // We can override the configuration on a per-group level
37/// group.measurement_time(Duration::from_secs(1));
38///
39/// // We can also use loops to define multiple benchmarks, even over multiple dimensions.
40/// for x in 0..3 {
41/// for y in 0..3 {
42/// let point = (x, y);
43/// let parameter_string = format!("{} * {}", x, y);
44/// group.bench_with_input(BenchmarkId::new("Multiply", parameter_string), &point,
45/// |b, (p_x, p_y)| b.iter(|| p_x * p_y));
46/// }
47/// }
48///
49/// group.finish();
50/// }
51///
52/// fn bench_throughput(c: &mut Criterion) {
53/// let mut group = c.benchmark_group("Summation");
54///
55/// for size in [1024, 2048, 4096].iter() {
56/// // Generate input of an appropriate size...
57/// let input = vec![1u64, *size];
58///
59/// // We can use the throughput function to tell Criterion.rs how large the input is
60/// // so it can calculate the overall throughput of the function. If we wanted, we could
61/// // even change the benchmark configuration for different inputs (eg. to reduce the
62/// // number of samples for extremely large and slow inputs) or even different functions.
63/// group.throughput(Throughput::Elements(*size as u64));
64///
65/// group.bench_with_input(BenchmarkId::new("sum", *size), &input,
66/// |b, i| b.iter(|| i.iter().sum::<u64>()));
67/// group.bench_with_input(BenchmarkId::new("fold", *size), &input,
68/// |b, i| b.iter(|| i.iter().fold(0u64, |a, b| a + b)));
69/// }
70///
71/// group.finish();
72/// }
73///
74/// criterion_group!(benches, bench_simple, bench_nested, bench_throughput);
75/// criterion_main!(benches);
76/// ```
77pub struct BenchmarkGroup<'a, M: Measurement> {
78 criterion: &'a mut Criterion<M>,
79 group_name: String,
80 all_ids: Vec<InternalBenchmarkId>,
81 any_matched: bool,
82 partial_config: PartialBenchmarkConfig,
83 throughput: Option<Throughput>,
84}
85impl<'a, M: Measurement> BenchmarkGroup<'a, M> {
86 /// Changes the size of the sample for this benchmark
87 ///
88 /// A bigger sample should yield more accurate results if paired with a sufficiently large
89 /// measurement time.
90 ///
91 /// Sample size must be at least 10.
92 ///
93 /// # Panics
94 ///
95 /// Panics if n < 10.
96 pub fn sample_size(&mut self, n: usize) -> &mut Self {
97 assert!(n >= 10);
98
99 self.partial_config.sample_size = Some(n);
100 self
101 }
102
103 /// Changes the warm up time for this benchmark
104 ///
105 /// # Panics
106 ///
107 /// Panics if the input duration is zero
108 pub fn warm_up_time(&mut self, dur: Duration) -> &mut Self {
109 assert!(dur.as_nanos() > 0);
110
111 self.partial_config.warm_up_time = Some(dur);
112 self
113 }
114
115 /// Changes the target measurement time for this benchmark group.
116 ///
117 /// Criterion will attempt to spent approximately this amount of time measuring each
118 /// benchmark on a best-effort basis. If it is not possible to perform the measurement in
119 /// the requested time (eg. because each iteration of the benchmark is long) then Criterion
120 /// will spend as long as is needed to collect the desired number of samples. With a longer
121 /// time, the measurement will become more resilient to interference from other programs.
122 ///
123 /// # Panics
124 ///
125 /// Panics if the input duration is zero
126 pub fn measurement_time(&mut self, dur: Duration) -> &mut Self {
127 assert!(dur.as_nanos() > 0);
128
129 self.partial_config.measurement_time = Some(dur);
130 self
131 }
132
133 /// Changes the number of resamples for this benchmark group
134 ///
135 /// Number of resamples to use for the
136 /// [bootstrap](http://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Case_resampling)
137 ///
138 /// A larger number of resamples reduces the random sampling errors which are inherent to the
139 /// bootstrap method, but also increases the analysis time.
140 ///
141 /// # Panics
142 ///
143 /// Panics if the number of resamples is set to zero
144 pub fn nresamples(&mut self, n: usize) -> &mut Self {
145 assert!(n > 0);
146 if n <= 1000 {
147 eprintln!("\nWarning: It is not recommended to reduce nresamples below 1000.");
148 }
149
150 self.partial_config.nresamples = Some(n);
151 self
152 }
153
154 /// Changes the noise threshold for benchmarks in this group. The noise threshold
155 /// is used to filter out small changes in performance from one run to the next, even if they
156 /// are statistically significant. Sometimes benchmarking the same code twice will result in
157 /// small but statistically significant differences solely because of noise. This provides a way
158 /// to filter out some of these false positives at the cost of making it harder to detect small
159 /// changes to the true performance of the benchmark.
160 ///
161 /// The default is 0.01, meaning that changes smaller than 1% will be ignored.
162 ///
163 /// # Panics
164 ///
165 /// Panics if the threshold is set to a negative value
166 pub fn noise_threshold(&mut self, threshold: f64) -> &mut Self {
167 assert!(threshold >= 0.0);
168
169 self.partial_config.noise_threshold = Some(threshold);
170 self
171 }
172
173 /// Changes the confidence level for benchmarks in this group. The confidence
174 /// level is the desired probability that the true runtime lies within the estimated
175 /// [confidence interval](https://en.wikipedia.org/wiki/Confidence_interval). The default is
176 /// 0.95, meaning that the confidence interval should capture the true value 95% of the time.
177 ///
178 /// # Panics
179 ///
180 /// Panics if the confidence level is set to a value outside the `(0, 1)` range
181 pub fn confidence_level(&mut self, cl: f64) -> &mut Self {
182 assert!(cl > 0.0 && cl < 1.0);
183 if cl < 0.5 {
184 eprintln!("\nWarning: It is not recommended to reduce confidence level below 0.5.");
185 }
186
187 self.partial_config.confidence_level = Some(cl);
188 self
189 }
190
191 /// Changes the [significance level](https://en.wikipedia.org/wiki/Statistical_significance)
192 /// for benchmarks in this group. This is used to perform a
193 /// [hypothesis test](https://en.wikipedia.org/wiki/Statistical_hypothesis_testing) to see if
194 /// the measurements from this run are different from the measured performance of the last run.
195 /// The significance level is the desired probability that two measurements of identical code
196 /// will be considered 'different' due to noise in the measurements. The default value is 0.05,
197 /// meaning that approximately 5% of identical benchmarks will register as different due to
198 /// noise.
199 ///
200 /// This presents a trade-off. By setting the significance level closer to 0.0, you can increase
201 /// the statistical robustness against noise, but it also weakens Criterion.rs' ability to
202 /// detect small but real changes in the performance. By setting the significance level
203 /// closer to 1.0, Criterion.rs will be more able to detect small true changes, but will also
204 /// report more spurious differences.
205 ///
206 /// See also the noise threshold setting.
207 ///
208 /// # Panics
209 ///
210 /// Panics if the significance level is set to a value outside the `(0, 1)` range
211 pub fn significance_level(&mut self, sl: f64) -> &mut Self {
212 assert!(sl > 0.0 && sl < 1.0);
213
214 self.partial_config.significance_level = Some(sl);
215 self
216 }
217
218 /// Changes the plot configuration for this benchmark group.
219 pub fn plot_config(&mut self, new_config: PlotConfiguration) -> &mut Self {
220 self.partial_config.plot_config = new_config;
221 self
222 }
223
224 /// Set the input size for this benchmark group. Used for reporting the
225 /// throughput.
226 pub fn throughput(&mut self, throughput: Throughput) -> &mut Self {
227 self.throughput = Some(throughput);
228 self
229 }
230
231 /// Set the sampling mode for this benchmark group.
232 pub fn sampling_mode(&mut self, new_mode: SamplingMode) -> &mut Self {
233 self.partial_config.sampling_mode = Some(new_mode);
234 self
235 }
236
237 pub(crate) fn new(criterion: &mut Criterion<M>, group_name: String) -> BenchmarkGroup<'_, M> {
238 BenchmarkGroup {
239 criterion,
240 group_name,
241 all_ids: vec![],
242 any_matched: false,
243 partial_config: PartialBenchmarkConfig::default(),
244 throughput: None,
245 }
246 }
247
248 /// Benchmark the given parameterless function inside this benchmark group.
249 pub fn bench_function<ID: IntoBenchmarkId, F>(&mut self, id: ID, mut f: F) -> &mut Self
250 where
251 F: FnMut(&mut Bencher<'_, M>),
252 {
253 self.run_bench(id.into_benchmark_id(), &(), |b, _| f(b));
254 self
255 }
256
257 /// Benchmark the given parameterized function inside this benchmark group.
258 pub fn bench_with_input<ID: IntoBenchmarkId, F, I>(
259 &mut self,
260 id: ID,
261 input: &I,
262 f: F,
263 ) -> &mut Self
264 where
265 F: FnMut(&mut Bencher<'_, M>, &I),
266 I: ?Sized,
267 {
268 self.run_bench(id.into_benchmark_id(), input, f);
269 self
270 }
271
272 fn run_bench<F, I>(&mut self, id: BenchmarkId, input: &I, f: F)
273 where
274 F: FnMut(&mut Bencher<'_, M>, &I),
275 I: ?Sized,
276 {
277 let config = self.partial_config.to_complete(&self.criterion.config);
278 let report_context = ReportContext {
279 output_directory: self.criterion.output_directory.clone(),
280 plot_config: self.partial_config.plot_config.clone(),
281 };
282
283 let mut id = InternalBenchmarkId::new(
284 self.group_name.clone(),
285 id.function_name,
286 id.parameter,
287 self.throughput.clone(),
288 );
289
290 assert!(
291 !self.all_ids.contains(&id),
292 "Benchmark IDs must be unique within a group. Encountered duplicated benchmark ID {}",
293 &id
294 );
295
296 id.ensure_directory_name_unique(&self.criterion.all_directories);
297 self.criterion
298 .all_directories
299 .insert(id.as_directory_name().to_owned());
300 id.ensure_title_unique(&self.criterion.all_titles);
301 self.criterion.all_titles.insert(id.as_title().to_owned());
302
303 let do_run = self.criterion.filter_matches(id.id());
304 self.any_matched |= do_run;
305 let mut func = Function::new(f);
306
307 match &self.criterion.mode {
308 Mode::Benchmark => {
309 if let Some(conn) = &self.criterion.connection {
310 if do_run {
311 conn.send(&OutgoingMessage::BeginningBenchmark { id: (&id).into() })
312 .unwrap();
313 } else {
314 conn.send(&OutgoingMessage::SkippingBenchmark { id: (&id).into() })
315 .unwrap();
316 }
317 }
318 if do_run {
319 analysis::common(
320 &id,
321 &mut func,
322 &config,
323 self.criterion,
324 &report_context,
325 input,
326 self.throughput.clone(),
327 );
328 }
329 }
330 Mode::List(_) => {
331 if do_run {
332 println!("{}: benchmark", id);
333 }
334 }
335 Mode::Test => {
336 if do_run {
337 // In test mode, run the benchmark exactly once, then exit.
338 self.criterion.report.test_start(&id, &report_context);
339 func.test(&self.criterion.measurement, input);
340 self.criterion.report.test_pass(&id, &report_context);
341 }
342 }
343 &Mode::Profile(duration) => {
344 if do_run {
345 func.profile(
346 &self.criterion.measurement,
347 &id,
348 self.criterion,
349 &report_context,
350 duration,
351 input,
352 );
353 }
354 }
355 }
356
357 self.all_ids.push(id);
358 }
359
360 /// Consume the benchmark group and generate the summary reports for the group.
361 ///
362 /// It is recommended to call this explicitly, but if you forget it will be called when the
363 /// group is dropped.
364 pub fn finish(self) {
365 ::std::mem::drop(self);
366 }
367}
368impl<'a, M: Measurement> Drop for BenchmarkGroup<'a, M> {
369 fn drop(&mut self) {
370 // I don't really like having a bunch of non-trivial code in drop, but this is the only way
371 // to really write linear types like this in Rust...
372 if let Some(conn) = &mut self.criterion.connection {
373 conn.send(&OutgoingMessage::FinishedBenchmarkGroup {
374 group: &self.group_name,
375 })
376 .unwrap();
377
378 conn.serve_value_formatter(self.criterion.measurement.formatter())
379 .unwrap();
380 }
381
382 if self.all_ids.len() > 1 && self.any_matched && self.criterion.mode.is_benchmark() {
383 let report_context = ReportContext {
384 output_directory: self.criterion.output_directory.clone(),
385 plot_config: self.partial_config.plot_config.clone(),
386 };
387
388 self.criterion.report.summarize(
389 &report_context,
390 &self.all_ids,
391 self.criterion.measurement.formatter(),
392 );
393 }
394 if self.any_matched && !self.criterion.mode.is_terse() {
395 self.criterion.report.group_separator();
396 }
397 }
398}
399
400/// Simple structure representing an ID for a benchmark. The ID must be unique within a benchmark
401/// group.
402#[derive(Clone, Eq, PartialEq, Hash)]
403pub struct BenchmarkId {
404 pub(crate) function_name: Option<String>,
405 pub(crate) parameter: Option<String>,
406}
407impl BenchmarkId {
408 /// Construct a new benchmark ID from a string function name and a parameter value.
409 ///
410 /// Note that the parameter value need not be the same as the parameter passed to your
411 /// actual benchmark. For instance, you might have a benchmark that takes a 1MB string as
412 /// input. It would be impractical to embed the whole string in the benchmark ID, so instead
413 /// your parameter value might be a descriptive string like "1MB Alphanumeric".
414 ///
415 /// # Examples
416 /// ```
417 /// # use criterion::{BenchmarkId, Criterion};
418 /// // A basic benchmark ID is typically constructed from a constant string and a simple
419 /// // parameter
420 /// let basic_id = BenchmarkId::new("my_id", 5);
421 ///
422 /// // The function name can be a string
423 /// let function_name = "test_string".to_string();
424 /// let string_id = BenchmarkId::new(function_name, 12);
425 ///
426 /// // Benchmark IDs are passed to benchmark groups:
427 /// let mut criterion = Criterion::default();
428 /// let mut group = criterion.benchmark_group("My Group");
429 /// // Generate a very large input
430 /// let input : String = ::std::iter::repeat("X").take(1024 * 1024).collect();
431 ///
432 /// // Note that we don't have to use the input as the parameter in the ID
433 /// group.bench_with_input(BenchmarkId::new("Test long string", "1MB X's"), &input, |b, i| {
434 /// b.iter(|| i.len())
435 /// });
436 /// ```
437 pub fn new<S: Into<String>, P: ::std::fmt::Display>(
438 function_name: S,
439 parameter: P,
440 ) -> BenchmarkId {
441 BenchmarkId {
442 function_name: Some(function_name.into()),
443 parameter: Some(format!("{}", parameter)),
444 }
445 }
446
447 /// Construct a new benchmark ID from just a parameter value. Use this when benchmarking a
448 /// single function with a variety of different inputs.
449 pub fn from_parameter<P: ::std::fmt::Display>(parameter: P) -> BenchmarkId {
450 BenchmarkId {
451 function_name: None,
452 parameter: Some(format!("{}", parameter)),
453 }
454 }
455
456 pub(crate) fn no_function() -> BenchmarkId {
457 BenchmarkId {
458 function_name: None,
459 parameter: None,
460 }
461 }
462
463 pub(crate) fn no_function_with_input<P: ::std::fmt::Display>(parameter: P) -> BenchmarkId {
464 BenchmarkId {
465 function_name: None,
466 parameter: Some(format!("{}", parameter)),
467 }
468 }
469}
470
471mod private {
472 pub trait Sealed {}
473 impl Sealed for super::BenchmarkId {}
474 impl<S: Into<String>> Sealed for S {}
475}
476
477/// Sealed trait which allows users to automatically convert strings to benchmark IDs.
478pub trait IntoBenchmarkId: private::Sealed {
479 fn into_benchmark_id(self) -> BenchmarkId;
480}
481impl IntoBenchmarkId for BenchmarkId {
482 fn into_benchmark_id(self) -> BenchmarkId {
483 self
484 }
485}
486impl<S: Into<String>> IntoBenchmarkId for S {
487 fn into_benchmark_id(self) -> BenchmarkId {
488 let function_name = self.into();
489 assert!(
490 !function_name.is_empty(),
491 "Function name must not be empty."
492 );
493
494 BenchmarkId {
495 function_name: Some(function_name),
496 parameter: None,
497 }
498 }
499}