criterion/bencher.rs
1use std::hint::black_box;
2use std::time::Duration;
3use std::time::Instant;
4
5use crate::measurement::{Measurement, WallTime};
6use crate::BatchSize;
7
8#[cfg(feature = "async")]
9use std::future::Future;
10
11#[cfg(feature = "async")]
12use crate::async_executor::AsyncExecutor;
13
14// ================================== MAINTENANCE NOTE =============================================
15// Any changes made to either Bencher or AsyncBencher will have to be replicated to the other!
16// ================================== MAINTENANCE NOTE =============================================
17
18/// Timer struct used to iterate a benchmarked function and measure the runtime.
19///
20/// This struct provides different timing loops as methods. Each timing loop provides a different
21/// way to time a routine and each has advantages and disadvantages.
22///
23/// * If you want to do the iteration and measurement yourself (eg. passing the iteration count
24/// to a separate process), use [`iter_custom`].
25/// * If your routine requires no per-iteration setup and returns a value with an expensive `drop`
26/// method, use [`iter_with_large_drop`].
27/// * If your routine requires some per-iteration setup that shouldn't be timed, use [`iter_batched`]
28/// or [`iter_batched_ref`]. See [`BatchSize`] for a discussion of batch sizes.
29/// If the setup value implements `Drop` and you don't want to include the `drop` time in the
30/// measurement, use [`iter_batched_ref`], otherwise use [`iter_batched`]. These methods are also
31/// suitable for benchmarking routines which return a value with an expensive `drop` method,
32/// but are more complex than [`iter_with_large_drop`].
33/// * Otherwise, use [`iter`].
34///
35/// [`iter`]: Bencher::iter
36/// [`iter_custom`]: Bencher::iter_custom
37/// [`iter_with_large_drop`]: Bencher::iter_with_large_drop
38/// [`iter_batched`]: Bencher::iter_batched
39/// [`iter_batched_ref`]: Bencher::iter_batched_ref
40pub struct Bencher<'a, M: Measurement = WallTime> {
41 pub(crate) iterated: bool, // Have we iterated this benchmark?
42 pub(crate) iters: u64, // Number of times to iterate this benchmark
43 pub(crate) value: M::Value, // The measured value
44 pub(crate) measurement: &'a M, // Reference to the measurement object
45 pub(crate) elapsed_time: Duration, // How much time did it take to perform the iteration? Used for the warmup period.
46}
47impl<'a, M: Measurement> Bencher<'a, M> {
48 /// Times a `routine` by executing it many times and timing the total elapsed time.
49 ///
50 /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor.
51 ///
52 /// # Timing model
53 ///
54 /// Note that the `Bencher` also times the time required to destroy the output of `routine()`.
55 /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared
56 /// to the runtime of the `routine`.
57 ///
58 /// ```text
59 /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next)
60 /// ```
61 ///
62 /// # Example
63 ///
64 /// ```rust
65 /// use criterion::{criterion_group, criterion_main, Criterion};
66 ///
67 /// // The function to benchmark
68 /// fn foo() {
69 /// // ...
70 /// }
71 ///
72 /// fn bench(c: &mut Criterion) {
73 /// c.bench_function("iter", move |b| {
74 /// b.iter(|| foo())
75 /// });
76 /// }
77 ///
78 /// criterion_group!(benches, bench);
79 /// criterion_main!(benches);
80 /// ```
81 ///
82 #[inline(never)]
83 pub fn iter<O, R>(&mut self, mut routine: R)
84 where
85 R: FnMut() -> O,
86 {
87 self.iterated = true;
88 let time_start = Instant::now();
89 let start = self.measurement.start();
90 for _ in 0..self.iters {
91 black_box(routine());
92 }
93 self.value = self.measurement.end(start);
94 self.elapsed_time = time_start.elapsed();
95 }
96
97 /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time.
98 ///
99 /// Prefer this timing loop in cases where `routine` has to do its own measurements to
100 /// get accurate timing information (for example in multi-threaded scenarios where you spawn
101 /// and coordinate with multiple threads).
102 ///
103 /// # Timing model
104 /// Custom, the timing model is whatever is returned as the [`Duration`] from `routine`.
105 ///
106 /// # Example
107 /// ```rust
108 /// use criterion::{criterion_group, criterion_main, Criterion};
109 /// use std::time::Instant;
110 ///
111 /// fn foo() {
112 /// // ...
113 /// }
114 ///
115 /// fn bench(c: &mut Criterion) {
116 /// c.bench_function("iter", move |b| {
117 /// b.iter_custom(|iters| {
118 /// let start = Instant::now();
119 /// for _i in 0..iters {
120 /// std::hint::black_box(foo());
121 /// }
122 /// start.elapsed()
123 /// })
124 /// });
125 /// }
126 ///
127 /// criterion_group!(benches, bench);
128 /// criterion_main!(benches);
129 /// ```
130 ///
131 #[inline(never)]
132 pub fn iter_custom<R>(&mut self, mut routine: R)
133 where
134 R: FnMut(u64) -> M::Value,
135 {
136 self.iterated = true;
137 let time_start = Instant::now();
138 self.value = routine(self.iters);
139 self.elapsed_time = time_start.elapsed();
140 }
141
142 #[doc(hidden)]
143 pub fn iter_with_setup<I, O, S, R>(&mut self, setup: S, routine: R)
144 where
145 S: FnMut() -> I,
146 R: FnMut(I) -> O,
147 {
148 self.iter_batched(setup, routine, BatchSize::PerIteration);
149 }
150
151 /// Times a `routine` by collecting its output on each iteration. This avoids timing the
152 /// destructor of the value returned by `routine`.
153 ///
154 /// WARNING: This requires `O(iters * mem::size_of::<O>())` of memory, and `iters` is not
155 /// under the control of the caller. If this causes out-of-memory errors, use
156 /// [`iter_batched`](Self::iter_batched) instead.
157 ///
158 /// # Timing model
159 ///
160 /// ``` text
161 /// elapsed = Instant::now + iters * (routine) + Iterator::collect::<Vec<_>>
162 /// ```
163 ///
164 /// # Example
165 ///
166 /// ```rust
167 /// use criterion::{criterion_group, criterion_main, Criterion};
168 ///
169 /// fn create_vector() -> Vec<u64> {
170 /// # vec![]
171 /// // ...
172 /// }
173 ///
174 /// fn bench(c: &mut Criterion) {
175 /// c.bench_function("with_drop", move |b| {
176 /// // This will avoid timing the Vec::drop.
177 /// b.iter_with_large_drop(|| create_vector())
178 /// });
179 /// }
180 ///
181 /// criterion_group!(benches, bench);
182 /// criterion_main!(benches);
183 /// ```
184 ///
185 pub fn iter_with_large_drop<O, R>(&mut self, mut routine: R)
186 where
187 R: FnMut() -> O,
188 {
189 self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput);
190 }
191
192 /// Times a `routine` that requires some input by generating a batch of input, then timing the
193 /// iteration of the benchmark over the input. See [`BatchSize`] for
194 /// details on choosing the batch size. Use this when the routine must consume its input.
195 ///
196 /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
197 /// data on each iteration.
198 ///
199 /// # Timing model
200 ///
201 /// ```text
202 /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend
203 /// ```
204 ///
205 /// # Example
206 ///
207 /// ```rust
208 /// use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
209 ///
210 /// fn create_scrambled_data() -> Vec<u64> {
211 /// # vec![]
212 /// // ...
213 /// }
214 ///
215 /// // The sorting algorithm to test
216 /// fn sort(data: &mut [u64]) {
217 /// // ...
218 /// }
219 ///
220 /// fn bench(c: &mut Criterion) {
221 /// let data = create_scrambled_data();
222 ///
223 /// c.bench_function("with_setup", move |b| {
224 /// // This will avoid timing the clone call.
225 /// b.iter_batched(|| data.clone(), |mut data| sort(&mut data), BatchSize::SmallInput)
226 /// });
227 /// }
228 ///
229 /// criterion_group!(benches, bench);
230 /// criterion_main!(benches);
231 /// ```
232 ///
233 #[inline(never)]
234 pub fn iter_batched<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
235 where
236 S: FnMut() -> I,
237 R: FnMut(I) -> O,
238 {
239 self.iterated = true;
240 let batch_size = size.iters_per_batch(self.iters);
241 assert!(batch_size != 0, "Batch size must not be zero.");
242 let time_start = Instant::now();
243 self.value = self.measurement.zero();
244
245 if batch_size == 1 {
246 for _ in 0..self.iters {
247 let input = black_box(setup());
248
249 let start = self.measurement.start();
250 let output = routine(input);
251 let end = self.measurement.end(start);
252 self.value = self.measurement.add(&self.value, &end);
253
254 drop(black_box(output));
255 }
256 } else {
257 let mut iteration_counter = 0;
258
259 while iteration_counter < self.iters {
260 let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter);
261
262 let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
263 let mut outputs = Vec::with_capacity(batch_size as usize);
264
265 let start = self.measurement.start();
266 outputs.extend(inputs.into_iter().map(&mut routine));
267 let end = self.measurement.end(start);
268 self.value = self.measurement.add(&self.value, &end);
269
270 black_box(outputs);
271
272 iteration_counter += batch_size;
273 }
274 }
275
276 self.elapsed_time = time_start.elapsed();
277 }
278
279 /// Times a `routine` that requires some input by generating a batch of input, then timing the
280 /// iteration of the benchmark over the input. See [`BatchSize`] for
281 /// details on choosing the batch size. Use this when the routine should accept the input by
282 /// mutable reference.
283 ///
284 /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
285 /// data on each iteration.
286 ///
287 /// # Timing model
288 ///
289 /// ```text
290 /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend
291 /// ```
292 ///
293 /// # Example
294 ///
295 /// ```rust
296 /// use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
297 ///
298 /// fn create_scrambled_data() -> Vec<u64> {
299 /// # vec![]
300 /// // ...
301 /// }
302 ///
303 /// // The sorting algorithm to test
304 /// fn sort(data: &mut [u64]) {
305 /// // ...
306 /// }
307 ///
308 /// fn bench(c: &mut Criterion) {
309 /// let data = create_scrambled_data();
310 ///
311 /// c.bench_function("with_setup", move |b| {
312 /// // This will avoid timing the clone call.
313 /// b.iter_batched_ref(|| data.clone(), |mut data| sort(&mut data), BatchSize::SmallInput)
314 /// });
315 /// }
316 ///
317 /// criterion_group!(benches, bench);
318 /// criterion_main!(benches);
319 /// ```
320 ///
321 #[inline(never)]
322 pub fn iter_batched_ref<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
323 where
324 S: FnMut() -> I,
325 R: FnMut(&mut I) -> O,
326 {
327 self.iterated = true;
328 let batch_size = size.iters_per_batch(self.iters);
329 assert!(batch_size != 0, "Batch size must not be zero.");
330 let time_start = Instant::now();
331 self.value = self.measurement.zero();
332
333 if batch_size == 1 {
334 for _ in 0..self.iters {
335 let mut input = black_box(setup());
336
337 let start = self.measurement.start();
338 let output = routine(&mut input);
339 let end = self.measurement.end(start);
340 self.value = self.measurement.add(&self.value, &end);
341
342 drop(black_box(output));
343 drop(black_box(input));
344 }
345 } else {
346 let mut iteration_counter = 0;
347
348 while iteration_counter < self.iters {
349 let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter);
350
351 let mut inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
352 let mut outputs = Vec::with_capacity(batch_size as usize);
353
354 let start = self.measurement.start();
355 outputs.extend(inputs.iter_mut().map(&mut routine));
356 let end = self.measurement.end(start);
357 self.value = self.measurement.add(&self.value, &end);
358
359 black_box(outputs);
360
361 iteration_counter += batch_size;
362 }
363 }
364 self.elapsed_time = time_start.elapsed();
365 }
366
367 // Benchmarks must actually call one of the iter methods. This causes benchmarks to fail loudly
368 // if they don't.
369 pub(crate) fn assert_iterated(&mut self) {
370 assert!(
371 self.iterated,
372 "Benchmark function must call Bencher::iter or related method."
373 );
374 self.iterated = false;
375 }
376
377 /// Convert this bencher into an [`AsyncBencher`], which enables async/await support.
378 #[cfg(feature = "async")]
379 pub fn to_async<'b, A: AsyncExecutor>(&'b mut self, runner: A) -> AsyncBencher<'a, 'b, A, M> {
380 AsyncBencher { b: self, runner }
381 }
382}
383
384/// Async/await variant of [`Bencher`].
385#[cfg(feature = "async")]
386pub struct AsyncBencher<'a, 'b, A: AsyncExecutor, M: Measurement = WallTime> {
387 b: &'b mut Bencher<'a, M>,
388 runner: A,
389}
390#[cfg(feature = "async")]
391impl<'a, 'b, A: AsyncExecutor, M: Measurement> AsyncBencher<'a, 'b, A, M> {
392 /// Times a `routine` by executing it many times and timing the total elapsed time.
393 ///
394 /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor.
395 ///
396 /// # Timing model
397 ///
398 /// Note that the `AsyncBencher` also times the time required to destroy the output of `routine()`.
399 /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared
400 /// to the runtime of the `routine`.
401 ///
402 /// ```text
403 /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next)
404 /// ```
405 ///
406 /// # Example
407 ///
408 /// ```rust
409 /// use criterion::{criterion_group, criterion_main, Criterion};
410 /// use criterion::async_executor::FuturesExecutor;
411 ///
412 /// // The function to benchmark
413 /// async fn foo() {
414 /// // ...
415 /// }
416 ///
417 /// fn bench(c: &mut Criterion) {
418 /// c.bench_function("iter", move |b| {
419 /// b.to_async(FuturesExecutor).iter(|| async { foo().await } )
420 /// });
421 /// }
422 ///
423 /// criterion_group!(benches, bench);
424 /// criterion_main!(benches);
425 /// ```
426 ///
427 #[inline(never)]
428 pub fn iter<O, R, F>(&mut self, mut routine: R)
429 where
430 R: FnMut() -> F,
431 F: Future<Output = O>,
432 {
433 let AsyncBencher { b, runner } = self;
434 runner.block_on(async {
435 b.iterated = true;
436 let time_start = Instant::now();
437 let start = b.measurement.start();
438 for _ in 0..b.iters {
439 black_box(routine().await);
440 }
441 b.value = b.measurement.end(start);
442 b.elapsed_time = time_start.elapsed();
443 });
444 }
445
446 /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time.
447 ///
448 /// Prefer this timing loop in cases where `routine` has to do its own measurements to
449 /// get accurate timing information (for example in multi-threaded scenarios where you spawn
450 /// and coordinate with multiple threads).
451 ///
452 /// # Timing model
453 /// Custom, the timing model is whatever is returned as the [`Duration`] from `routine`.
454 ///
455 /// # Example
456 /// ```rust
457 /// use criterion::{criterion_group, criterion_main, Criterion};
458 /// use criterion::async_executor::FuturesExecutor;
459 /// use std::time::Instant;
460 ///
461 /// async fn foo() {
462 /// // ...
463 /// }
464 ///
465 /// fn bench(c: &mut Criterion) {
466 /// c.bench_function("iter", move |b| {
467 /// b.to_async(FuturesExecutor).iter_custom(|iters| {
468 /// async move {
469 /// let start = Instant::now();
470 /// for _i in 0..iters {
471 /// std::hint::black_box(foo().await);
472 /// }
473 /// start.elapsed()
474 /// }
475 /// })
476 /// });
477 /// }
478 ///
479 /// criterion_group!(benches, bench);
480 /// criterion_main!(benches);
481 /// ```
482 ///
483 #[inline(never)]
484 pub fn iter_custom<R, F>(&mut self, mut routine: R)
485 where
486 R: FnMut(u64) -> F,
487 F: Future<Output = M::Value>,
488 {
489 let AsyncBencher { b, runner } = self;
490 runner.block_on(async {
491 b.iterated = true;
492 let time_start = Instant::now();
493 b.value = routine(b.iters).await;
494 b.elapsed_time = time_start.elapsed();
495 })
496 }
497
498 #[doc(hidden)]
499 pub fn iter_with_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)
500 where
501 S: FnMut() -> I,
502 R: FnMut(I) -> F,
503 F: Future<Output = O>,
504 {
505 self.iter_batched(setup, routine, BatchSize::PerIteration);
506 }
507
508 /// Times a `routine` by collecting its output on each iteration. This avoids timing the
509 /// destructor of the value returned by `routine`.
510 ///
511 /// WARNING: This requires `O(iters * mem::size_of::<O>())` of memory, and `iters`
512 /// is not under the control of the caller. If this causes out-of-memory errors, use
513 /// [`iter_batched`](Self::iter_batched) instead.
514 ///
515 /// # Timing model
516 ///
517 /// ``` text
518 /// elapsed = Instant::now + iters * (routine) + Iterator::collect::<Vec<_>>
519 /// ```
520 ///
521 /// # Example
522 ///
523 /// ```rust
524 /// use criterion::{criterion_group, criterion_main, Criterion};
525 /// use criterion::async_executor::FuturesExecutor;
526 ///
527 /// async fn create_vector() -> Vec<u64> {
528 /// # vec![]
529 /// // ...
530 /// }
531 ///
532 /// fn bench(c: &mut Criterion) {
533 /// c.bench_function("with_drop", move |b| {
534 /// // This will avoid timing the Vec::drop.
535 /// b.to_async(FuturesExecutor).iter_with_large_drop(|| async { create_vector().await })
536 /// });
537 /// }
538 ///
539 /// criterion_group!(benches, bench);
540 /// criterion_main!(benches);
541 /// ```
542 ///
543 pub fn iter_with_large_drop<O, R, F>(&mut self, mut routine: R)
544 where
545 R: FnMut() -> F,
546 F: Future<Output = O>,
547 {
548 self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput);
549 }
550
551 #[doc(hidden)]
552 pub fn iter_with_large_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)
553 where
554 S: FnMut() -> I,
555 R: FnMut(I) -> F,
556 F: Future<Output = O>,
557 {
558 self.iter_batched(setup, routine, BatchSize::NumBatches(1));
559 }
560
561 /// Times a `routine` that requires some input by generating a batch of input, then timing the
562 /// iteration of the benchmark over the input. See [`BatchSize`] for
563 /// details on choosing the batch size. Use this when the routine must consume its input.
564 ///
565 /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
566 /// data on each iteration.
567 ///
568 /// # Timing model
569 ///
570 /// ```text
571 /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend
572 /// ```
573 ///
574 /// # Example
575 ///
576 /// ```rust
577 /// use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
578 /// use criterion::async_executor::FuturesExecutor;
579 ///
580 /// fn create_scrambled_data() -> Vec<u64> {
581 /// # vec![]
582 /// // ...
583 /// }
584 ///
585 /// // The sorting algorithm to test
586 /// async fn sort(data: &mut [u64]) {
587 /// // ...
588 /// }
589 ///
590 /// fn bench(c: &mut Criterion) {
591 /// let data = create_scrambled_data();
592 ///
593 /// c.bench_function("with_setup", move |b| {
594 /// // This will avoid timing the clone call.
595 /// b.iter_batched(|| data.clone(), |mut data| async move { sort(&mut data).await }, BatchSize::SmallInput)
596 /// });
597 /// }
598 ///
599 /// criterion_group!(benches, bench);
600 /// criterion_main!(benches);
601 /// ```
602 ///
603 #[inline(never)]
604 pub fn iter_batched<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
605 where
606 S: FnMut() -> I,
607 R: FnMut(I) -> F,
608 F: Future<Output = O>,
609 {
610 let AsyncBencher { b, runner } = self;
611 runner.block_on(async {
612 b.iterated = true;
613 let batch_size = size.iters_per_batch(b.iters);
614 assert!(batch_size != 0, "Batch size must not be zero.");
615 let time_start = Instant::now();
616 b.value = b.measurement.zero();
617
618 if batch_size == 1 {
619 for _ in 0..b.iters {
620 let input = black_box(setup());
621
622 let start = b.measurement.start();
623 let output = routine(input).await;
624 let end = b.measurement.end(start);
625 b.value = b.measurement.add(&b.value, &end);
626
627 drop(black_box(output));
628 }
629 } else {
630 let mut iteration_counter = 0;
631
632 while iteration_counter < b.iters {
633 let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter);
634
635 let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
636 let mut outputs = Vec::with_capacity(batch_size as usize);
637
638 let start = b.measurement.start();
639 // Can't use .extend here like the sync version does
640 for input in inputs {
641 outputs.push(routine(input).await);
642 }
643 let end = b.measurement.end(start);
644 b.value = b.measurement.add(&b.value, &end);
645
646 black_box(outputs);
647
648 iteration_counter += batch_size;
649 }
650 }
651
652 b.elapsed_time = time_start.elapsed();
653 })
654 }
655
656 /// Times a `routine` that requires some input by generating a batch of input, then timing the
657 /// iteration of the benchmark over the input. See [`BatchSize`] for
658 /// details on choosing the batch size. Use this when the routine should accept the input by
659 /// mutable reference.
660 ///
661 /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
662 /// data on each iteration.
663 ///
664 /// # Timing model
665 ///
666 /// ```text
667 /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend
668 /// ```
669 ///
670 /// # Example
671 ///
672 /// ```rust
673 /// use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
674 /// use criterion::async_executor::FuturesExecutor;
675 ///
676 /// fn create_scrambled_data() -> Vec<u64> {
677 /// # vec![]
678 /// // ...
679 /// }
680 ///
681 /// // The sorting algorithm to test
682 /// async fn sort(data: &mut [u64]) {
683 /// // ...
684 /// }
685 ///
686 /// fn bench(c: &mut Criterion) {
687 /// let data = create_scrambled_data();
688 ///
689 /// c.bench_function("with_setup", move |b| {
690 /// // This will avoid timing the clone call.
691 /// b.iter_batched(|| data.clone(), |mut data| async move { sort(&mut data).await }, BatchSize::SmallInput)
692 /// });
693 /// }
694 ///
695 /// criterion_group!(benches, bench);
696 /// criterion_main!(benches);
697 /// ```
698 ///
699 #[inline(never)]
700 pub fn iter_batched_ref<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
701 where
702 S: FnMut() -> I,
703 R: FnMut(&mut I) -> F,
704 F: Future<Output = O>,
705 {
706 let AsyncBencher { b, runner } = self;
707 runner.block_on(async {
708 b.iterated = true;
709 let batch_size = size.iters_per_batch(b.iters);
710 assert!(batch_size != 0, "Batch size must not be zero.");
711 let time_start = Instant::now();
712 b.value = b.measurement.zero();
713
714 if batch_size == 1 {
715 for _ in 0..b.iters {
716 let mut input = black_box(setup());
717
718 let start = b.measurement.start();
719 let output = routine(&mut input).await;
720 let end = b.measurement.end(start);
721 b.value = b.measurement.add(&b.value, &end);
722
723 drop(black_box(output));
724 drop(black_box(input));
725 }
726 } else {
727 let mut iteration_counter = 0;
728
729 while iteration_counter < b.iters {
730 let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter);
731
732 let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
733 let mut outputs = Vec::with_capacity(batch_size as usize);
734
735 let start = b.measurement.start();
736 // Can't use .extend here like the sync version does
737 for mut input in inputs {
738 outputs.push(routine(&mut input).await);
739 }
740 let end = b.measurement.end(start);
741 b.value = b.measurement.add(&b.value, &end);
742
743 black_box(outputs);
744
745 iteration_counter += batch_size;
746 }
747 }
748 b.elapsed_time = time_start.elapsed();
749 });
750 }
751}