Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
bencher.rs - source
[go: Go Back, main page]

criterion/
bencher.rs

1use std::hint::black_box;
2use std::time::Duration;
3use std::time::Instant;
4
5use crate::measurement::{Measurement, WallTime};
6use crate::BatchSize;
7
8#[cfg(feature = "async")]
9use std::future::Future;
10
11#[cfg(feature = "async")]
12use crate::async_executor::AsyncExecutor;
13
14// ================================== MAINTENANCE NOTE =============================================
15// Any changes made to either Bencher or AsyncBencher will have to be replicated to the other!
16// ================================== MAINTENANCE NOTE =============================================
17
18/// Timer struct used to iterate a benchmarked function and measure the runtime.
19///
20/// This struct provides different timing loops as methods. Each timing loop provides a different
21/// way to time a routine and each has advantages and disadvantages.
22///
23/// * If you want to do the iteration and measurement yourself (eg. passing the iteration count
24///   to a separate process), use [`iter_custom`].
25/// * If your routine requires no per-iteration setup and returns a value with an expensive `drop`
26///   method, use [`iter_with_large_drop`].
27/// * If your routine requires some per-iteration setup that shouldn't be timed, use [`iter_batched`]
28///   or [`iter_batched_ref`]. See [`BatchSize`] for a discussion of batch sizes.
29///   If the setup value implements `Drop` and you don't want to include the `drop` time in the
30///   measurement, use [`iter_batched_ref`], otherwise use [`iter_batched`]. These methods are also
31///   suitable for benchmarking routines which return a value with an expensive `drop` method,
32///   but are more complex than [`iter_with_large_drop`].
33/// * Otherwise, use [`iter`].
34///
35/// [`iter`]: Bencher::iter
36/// [`iter_custom`]: Bencher::iter_custom
37/// [`iter_with_large_drop`]: Bencher::iter_with_large_drop
38/// [`iter_batched`]: Bencher::iter_batched
39/// [`iter_batched_ref`]: Bencher::iter_batched_ref
40pub struct Bencher<'a, M: Measurement = WallTime> {
41    pub(crate) iterated: bool,         // Have we iterated this benchmark?
42    pub(crate) iters: u64,             // Number of times to iterate this benchmark
43    pub(crate) value: M::Value,        // The measured value
44    pub(crate) measurement: &'a M,     // Reference to the measurement object
45    pub(crate) elapsed_time: Duration, // How much time did it take to perform the iteration? Used for the warmup period.
46}
47impl<'a, M: Measurement> Bencher<'a, M> {
48    /// Times a `routine` by executing it many times and timing the total elapsed time.
49    ///
50    /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor.
51    ///
52    /// # Timing model
53    ///
54    /// Note that the `Bencher` also times the time required to destroy the output of `routine()`.
55    /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared
56    /// to the runtime of the `routine`.
57    ///
58    /// ```text
59    /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next)
60    /// ```
61    ///
62    /// # Example
63    ///
64    /// ```rust
65    /// use criterion::{criterion_group, criterion_main, Criterion};
66    ///
67    /// // The function to benchmark
68    /// fn foo() {
69    ///     // ...
70    /// }
71    ///
72    /// fn bench(c: &mut Criterion) {
73    ///     c.bench_function("iter", move |b| {
74    ///         b.iter(|| foo())
75    ///     });
76    /// }
77    ///
78    /// criterion_group!(benches, bench);
79    /// criterion_main!(benches);
80    /// ```
81    ///
82    #[inline(never)]
83    pub fn iter<O, R>(&mut self, mut routine: R)
84    where
85        R: FnMut() -> O,
86    {
87        self.iterated = true;
88        let time_start = Instant::now();
89        let start = self.measurement.start();
90        for _ in 0..self.iters {
91            black_box(routine());
92        }
93        self.value = self.measurement.end(start);
94        self.elapsed_time = time_start.elapsed();
95    }
96
97    /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time.
98    ///
99    /// Prefer this timing loop in cases where `routine` has to do its own measurements to
100    /// get accurate timing information (for example in multi-threaded scenarios where you spawn
101    /// and coordinate with multiple threads).
102    ///
103    /// # Timing model
104    /// Custom, the timing model is whatever is returned as the [`Duration`] from `routine`.
105    ///
106    /// # Example
107    /// ```rust
108    /// use criterion::{criterion_group, criterion_main, Criterion};
109    /// use std::time::Instant;
110    ///
111    /// fn foo() {
112    ///     // ...
113    /// }
114    ///
115    /// fn bench(c: &mut Criterion) {
116    ///     c.bench_function("iter", move |b| {
117    ///         b.iter_custom(|iters| {
118    ///             let start = Instant::now();
119    ///             for _i in 0..iters {
120    ///                 std::hint::black_box(foo());
121    ///             }
122    ///             start.elapsed()
123    ///         })
124    ///     });
125    /// }
126    ///
127    /// criterion_group!(benches, bench);
128    /// criterion_main!(benches);
129    /// ```
130    ///
131    #[inline(never)]
132    pub fn iter_custom<R>(&mut self, mut routine: R)
133    where
134        R: FnMut(u64) -> M::Value,
135    {
136        self.iterated = true;
137        let time_start = Instant::now();
138        self.value = routine(self.iters);
139        self.elapsed_time = time_start.elapsed();
140    }
141
142    #[doc(hidden)]
143    pub fn iter_with_setup<I, O, S, R>(&mut self, setup: S, routine: R)
144    where
145        S: FnMut() -> I,
146        R: FnMut(I) -> O,
147    {
148        self.iter_batched(setup, routine, BatchSize::PerIteration);
149    }
150
151    /// Times a `routine` by collecting its output on each iteration. This avoids timing the
152    /// destructor of the value returned by `routine`.
153    ///
154    /// WARNING: This requires `O(iters * mem::size_of::<O>())` of memory, and `iters` is not
155    /// under the control of the caller. If this causes out-of-memory errors, use
156    /// [`iter_batched`](Self::iter_batched) instead.
157    ///
158    /// # Timing model
159    ///
160    /// ``` text
161    /// elapsed = Instant::now + iters * (routine) + Iterator::collect::<Vec<_>>
162    /// ```
163    ///
164    /// # Example
165    ///
166    /// ```rust
167    /// use criterion::{criterion_group, criterion_main, Criterion};
168    ///
169    /// fn create_vector() -> Vec<u64> {
170    ///     # vec![]
171    ///     // ...
172    /// }
173    ///
174    /// fn bench(c: &mut Criterion) {
175    ///     c.bench_function("with_drop", move |b| {
176    ///         // This will avoid timing the Vec::drop.
177    ///         b.iter_with_large_drop(|| create_vector())
178    ///     });
179    /// }
180    ///
181    /// criterion_group!(benches, bench);
182    /// criterion_main!(benches);
183    /// ```
184    ///
185    pub fn iter_with_large_drop<O, R>(&mut self, mut routine: R)
186    where
187        R: FnMut() -> O,
188    {
189        self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput);
190    }
191
192    /// Times a `routine` that requires some input by generating a batch of input, then timing the
193    /// iteration of the benchmark over the input. See [`BatchSize`] for
194    /// details on choosing the batch size. Use this when the routine must consume its input.
195    ///
196    /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
197    /// data on each iteration.
198    ///
199    /// # Timing model
200    ///
201    /// ```text
202    /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend
203    /// ```
204    ///
205    /// # Example
206    ///
207    /// ```rust
208    /// use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
209    ///
210    /// fn create_scrambled_data() -> Vec<u64> {
211    ///     # vec![]
212    ///     // ...
213    /// }
214    ///
215    /// // The sorting algorithm to test
216    /// fn sort(data: &mut [u64]) {
217    ///     // ...
218    /// }
219    ///
220    /// fn bench(c: &mut Criterion) {
221    ///     let data = create_scrambled_data();
222    ///
223    ///     c.bench_function("with_setup", move |b| {
224    ///         // This will avoid timing the clone call.
225    ///         b.iter_batched(|| data.clone(), |mut data| sort(&mut data), BatchSize::SmallInput)
226    ///     });
227    /// }
228    ///
229    /// criterion_group!(benches, bench);
230    /// criterion_main!(benches);
231    /// ```
232    ///
233    #[inline(never)]
234    pub fn iter_batched<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
235    where
236        S: FnMut() -> I,
237        R: FnMut(I) -> O,
238    {
239        self.iterated = true;
240        let batch_size = size.iters_per_batch(self.iters);
241        assert!(batch_size != 0, "Batch size must not be zero.");
242        let time_start = Instant::now();
243        self.value = self.measurement.zero();
244
245        if batch_size == 1 {
246            for _ in 0..self.iters {
247                let input = black_box(setup());
248
249                let start = self.measurement.start();
250                let output = routine(input);
251                let end = self.measurement.end(start);
252                self.value = self.measurement.add(&self.value, &end);
253
254                drop(black_box(output));
255            }
256        } else {
257            let mut iteration_counter = 0;
258
259            while iteration_counter < self.iters {
260                let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter);
261
262                let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
263                let mut outputs = Vec::with_capacity(batch_size as usize);
264
265                let start = self.measurement.start();
266                outputs.extend(inputs.into_iter().map(&mut routine));
267                let end = self.measurement.end(start);
268                self.value = self.measurement.add(&self.value, &end);
269
270                black_box(outputs);
271
272                iteration_counter += batch_size;
273            }
274        }
275
276        self.elapsed_time = time_start.elapsed();
277    }
278
279    /// Times a `routine` that requires some input by generating a batch of input, then timing the
280    /// iteration of the benchmark over the input. See [`BatchSize`] for
281    /// details on choosing the batch size. Use this when the routine should accept the input by
282    /// mutable reference.
283    ///
284    /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
285    /// data on each iteration.
286    ///
287    /// # Timing model
288    ///
289    /// ```text
290    /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend
291    /// ```
292    ///
293    /// # Example
294    ///
295    /// ```rust
296    /// use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
297    ///
298    /// fn create_scrambled_data() -> Vec<u64> {
299    ///     # vec![]
300    ///     // ...
301    /// }
302    ///
303    /// // The sorting algorithm to test
304    /// fn sort(data: &mut [u64]) {
305    ///     // ...
306    /// }
307    ///
308    /// fn bench(c: &mut Criterion) {
309    ///     let data = create_scrambled_data();
310    ///
311    ///     c.bench_function("with_setup", move |b| {
312    ///         // This will avoid timing the clone call.
313    ///         b.iter_batched_ref(|| data.clone(), |mut data| sort(&mut data), BatchSize::SmallInput)
314    ///     });
315    /// }
316    ///
317    /// criterion_group!(benches, bench);
318    /// criterion_main!(benches);
319    /// ```
320    ///
321    #[inline(never)]
322    pub fn iter_batched_ref<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
323    where
324        S: FnMut() -> I,
325        R: FnMut(&mut I) -> O,
326    {
327        self.iterated = true;
328        let batch_size = size.iters_per_batch(self.iters);
329        assert!(batch_size != 0, "Batch size must not be zero.");
330        let time_start = Instant::now();
331        self.value = self.measurement.zero();
332
333        if batch_size == 1 {
334            for _ in 0..self.iters {
335                let mut input = black_box(setup());
336
337                let start = self.measurement.start();
338                let output = routine(&mut input);
339                let end = self.measurement.end(start);
340                self.value = self.measurement.add(&self.value, &end);
341
342                drop(black_box(output));
343                drop(black_box(input));
344            }
345        } else {
346            let mut iteration_counter = 0;
347
348            while iteration_counter < self.iters {
349                let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter);
350
351                let mut inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
352                let mut outputs = Vec::with_capacity(batch_size as usize);
353
354                let start = self.measurement.start();
355                outputs.extend(inputs.iter_mut().map(&mut routine));
356                let end = self.measurement.end(start);
357                self.value = self.measurement.add(&self.value, &end);
358
359                black_box(outputs);
360
361                iteration_counter += batch_size;
362            }
363        }
364        self.elapsed_time = time_start.elapsed();
365    }
366
367    // Benchmarks must actually call one of the iter methods. This causes benchmarks to fail loudly
368    // if they don't.
369    pub(crate) fn assert_iterated(&mut self) {
370        assert!(
371            self.iterated,
372            "Benchmark function must call Bencher::iter or related method."
373        );
374        self.iterated = false;
375    }
376
377    /// Convert this bencher into an [`AsyncBencher`], which enables async/await support.
378    #[cfg(feature = "async")]
379    pub fn to_async<'b, A: AsyncExecutor>(&'b mut self, runner: A) -> AsyncBencher<'a, 'b, A, M> {
380        AsyncBencher { b: self, runner }
381    }
382}
383
384/// Async/await variant of [`Bencher`].
385#[cfg(feature = "async")]
386pub struct AsyncBencher<'a, 'b, A: AsyncExecutor, M: Measurement = WallTime> {
387    b: &'b mut Bencher<'a, M>,
388    runner: A,
389}
390#[cfg(feature = "async")]
391impl<'a, 'b, A: AsyncExecutor, M: Measurement> AsyncBencher<'a, 'b, A, M> {
392    /// Times a `routine` by executing it many times and timing the total elapsed time.
393    ///
394    /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor.
395    ///
396    /// # Timing model
397    ///
398    /// Note that the `AsyncBencher` also times the time required to destroy the output of `routine()`.
399    /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared
400    /// to the runtime of the `routine`.
401    ///
402    /// ```text
403    /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next)
404    /// ```
405    ///
406    /// # Example
407    ///
408    /// ```rust
409    /// use criterion::{criterion_group, criterion_main, Criterion};
410    /// use criterion::async_executor::FuturesExecutor;
411    ///
412    /// // The function to benchmark
413    /// async fn foo() {
414    ///     // ...
415    /// }
416    ///
417    /// fn bench(c: &mut Criterion) {
418    ///     c.bench_function("iter", move |b| {
419    ///         b.to_async(FuturesExecutor).iter(|| async { foo().await } )
420    ///     });
421    /// }
422    ///
423    /// criterion_group!(benches, bench);
424    /// criterion_main!(benches);
425    /// ```
426    ///
427    #[inline(never)]
428    pub fn iter<O, R, F>(&mut self, mut routine: R)
429    where
430        R: FnMut() -> F,
431        F: Future<Output = O>,
432    {
433        let AsyncBencher { b, runner } = self;
434        runner.block_on(async {
435            b.iterated = true;
436            let time_start = Instant::now();
437            let start = b.measurement.start();
438            for _ in 0..b.iters {
439                black_box(routine().await);
440            }
441            b.value = b.measurement.end(start);
442            b.elapsed_time = time_start.elapsed();
443        });
444    }
445
446    /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time.
447    ///
448    /// Prefer this timing loop in cases where `routine` has to do its own measurements to
449    /// get accurate timing information (for example in multi-threaded scenarios where you spawn
450    /// and coordinate with multiple threads).
451    ///
452    /// # Timing model
453    /// Custom, the timing model is whatever is returned as the [`Duration`] from `routine`.
454    ///
455    /// # Example
456    /// ```rust
457    /// use criterion::{criterion_group, criterion_main, Criterion};
458    /// use criterion::async_executor::FuturesExecutor;
459    /// use std::time::Instant;
460    ///
461    /// async fn foo() {
462    ///     // ...
463    /// }
464    ///
465    /// fn bench(c: &mut Criterion) {
466    ///     c.bench_function("iter", move |b| {
467    ///         b.to_async(FuturesExecutor).iter_custom(|iters| {
468    ///             async move {
469    ///                 let start = Instant::now();
470    ///                 for _i in 0..iters {
471    ///                     std::hint::black_box(foo().await);
472    ///                 }
473    ///                 start.elapsed()
474    ///             }
475    ///         })
476    ///     });
477    /// }
478    ///
479    /// criterion_group!(benches, bench);
480    /// criterion_main!(benches);
481    /// ```
482    ///
483    #[inline(never)]
484    pub fn iter_custom<R, F>(&mut self, mut routine: R)
485    where
486        R: FnMut(u64) -> F,
487        F: Future<Output = M::Value>,
488    {
489        let AsyncBencher { b, runner } = self;
490        runner.block_on(async {
491            b.iterated = true;
492            let time_start = Instant::now();
493            b.value = routine(b.iters).await;
494            b.elapsed_time = time_start.elapsed();
495        })
496    }
497
498    #[doc(hidden)]
499    pub fn iter_with_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)
500    where
501        S: FnMut() -> I,
502        R: FnMut(I) -> F,
503        F: Future<Output = O>,
504    {
505        self.iter_batched(setup, routine, BatchSize::PerIteration);
506    }
507
508    /// Times a `routine` by collecting its output on each iteration. This avoids timing the
509    /// destructor of the value returned by `routine`.
510    ///
511    /// WARNING: This requires `O(iters * mem::size_of::<O>())` of memory, and `iters`
512    /// is not under the control of the caller. If this causes out-of-memory errors, use
513    /// [`iter_batched`](Self::iter_batched) instead.
514    ///
515    /// # Timing model
516    ///
517    /// ``` text
518    /// elapsed = Instant::now + iters * (routine) + Iterator::collect::<Vec<_>>
519    /// ```
520    ///
521    /// # Example
522    ///
523    /// ```rust
524    /// use criterion::{criterion_group, criterion_main, Criterion};
525    /// use criterion::async_executor::FuturesExecutor;
526    ///
527    /// async fn create_vector() -> Vec<u64> {
528    ///     # vec![]
529    ///     // ...
530    /// }
531    ///
532    /// fn bench(c: &mut Criterion) {
533    ///     c.bench_function("with_drop", move |b| {
534    ///         // This will avoid timing the Vec::drop.
535    ///         b.to_async(FuturesExecutor).iter_with_large_drop(|| async { create_vector().await })
536    ///     });
537    /// }
538    ///
539    /// criterion_group!(benches, bench);
540    /// criterion_main!(benches);
541    /// ```
542    ///
543    pub fn iter_with_large_drop<O, R, F>(&mut self, mut routine: R)
544    where
545        R: FnMut() -> F,
546        F: Future<Output = O>,
547    {
548        self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput);
549    }
550
551    #[doc(hidden)]
552    pub fn iter_with_large_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)
553    where
554        S: FnMut() -> I,
555        R: FnMut(I) -> F,
556        F: Future<Output = O>,
557    {
558        self.iter_batched(setup, routine, BatchSize::NumBatches(1));
559    }
560
561    /// Times a `routine` that requires some input by generating a batch of input, then timing the
562    /// iteration of the benchmark over the input. See [`BatchSize`] for
563    /// details on choosing the batch size. Use this when the routine must consume its input.
564    ///
565    /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
566    /// data on each iteration.
567    ///
568    /// # Timing model
569    ///
570    /// ```text
571    /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend
572    /// ```
573    ///
574    /// # Example
575    ///
576    /// ```rust
577    /// use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
578    /// use criterion::async_executor::FuturesExecutor;
579    ///
580    /// fn create_scrambled_data() -> Vec<u64> {
581    ///     # vec![]
582    ///     // ...
583    /// }
584    ///
585    /// // The sorting algorithm to test
586    /// async fn sort(data: &mut [u64]) {
587    ///     // ...
588    /// }
589    ///
590    /// fn bench(c: &mut Criterion) {
591    ///     let data = create_scrambled_data();
592    ///
593    ///     c.bench_function("with_setup", move |b| {
594    ///         // This will avoid timing the clone call.
595    ///         b.iter_batched(|| data.clone(), |mut data| async move { sort(&mut data).await }, BatchSize::SmallInput)
596    ///     });
597    /// }
598    ///
599    /// criterion_group!(benches, bench);
600    /// criterion_main!(benches);
601    /// ```
602    ///
603    #[inline(never)]
604    pub fn iter_batched<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
605    where
606        S: FnMut() -> I,
607        R: FnMut(I) -> F,
608        F: Future<Output = O>,
609    {
610        let AsyncBencher { b, runner } = self;
611        runner.block_on(async {
612            b.iterated = true;
613            let batch_size = size.iters_per_batch(b.iters);
614            assert!(batch_size != 0, "Batch size must not be zero.");
615            let time_start = Instant::now();
616            b.value = b.measurement.zero();
617
618            if batch_size == 1 {
619                for _ in 0..b.iters {
620                    let input = black_box(setup());
621
622                    let start = b.measurement.start();
623                    let output = routine(input).await;
624                    let end = b.measurement.end(start);
625                    b.value = b.measurement.add(&b.value, &end);
626
627                    drop(black_box(output));
628                }
629            } else {
630                let mut iteration_counter = 0;
631
632                while iteration_counter < b.iters {
633                    let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter);
634
635                    let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
636                    let mut outputs = Vec::with_capacity(batch_size as usize);
637
638                    let start = b.measurement.start();
639                    // Can't use .extend here like the sync version does
640                    for input in inputs {
641                        outputs.push(routine(input).await);
642                    }
643                    let end = b.measurement.end(start);
644                    b.value = b.measurement.add(&b.value, &end);
645
646                    black_box(outputs);
647
648                    iteration_counter += batch_size;
649                }
650            }
651
652            b.elapsed_time = time_start.elapsed();
653        })
654    }
655
656    /// Times a `routine` that requires some input by generating a batch of input, then timing the
657    /// iteration of the benchmark over the input. See [`BatchSize`] for
658    /// details on choosing the batch size. Use this when the routine should accept the input by
659    /// mutable reference.
660    ///
661    /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
662    /// data on each iteration.
663    ///
664    /// # Timing model
665    ///
666    /// ```text
667    /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend
668    /// ```
669    ///
670    /// # Example
671    ///
672    /// ```rust
673    /// use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
674    /// use criterion::async_executor::FuturesExecutor;
675    ///
676    /// fn create_scrambled_data() -> Vec<u64> {
677    ///     # vec![]
678    ///     // ...
679    /// }
680    ///
681    /// // The sorting algorithm to test
682    /// async fn sort(data: &mut [u64]) {
683    ///     // ...
684    /// }
685    ///
686    /// fn bench(c: &mut Criterion) {
687    ///     let data = create_scrambled_data();
688    ///
689    ///     c.bench_function("with_setup", move |b| {
690    ///         // This will avoid timing the clone call.
691    ///         b.iter_batched(|| data.clone(), |mut data| async move { sort(&mut data).await }, BatchSize::SmallInput)
692    ///     });
693    /// }
694    ///
695    /// criterion_group!(benches, bench);
696    /// criterion_main!(benches);
697    /// ```
698    ///
699    #[inline(never)]
700    pub fn iter_batched_ref<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
701    where
702        S: FnMut() -> I,
703        R: FnMut(&mut I) -> F,
704        F: Future<Output = O>,
705    {
706        let AsyncBencher { b, runner } = self;
707        runner.block_on(async {
708            b.iterated = true;
709            let batch_size = size.iters_per_batch(b.iters);
710            assert!(batch_size != 0, "Batch size must not be zero.");
711            let time_start = Instant::now();
712            b.value = b.measurement.zero();
713
714            if batch_size == 1 {
715                for _ in 0..b.iters {
716                    let mut input = black_box(setup());
717
718                    let start = b.measurement.start();
719                    let output = routine(&mut input).await;
720                    let end = b.measurement.end(start);
721                    b.value = b.measurement.add(&b.value, &end);
722
723                    drop(black_box(output));
724                    drop(black_box(input));
725                }
726            } else {
727                let mut iteration_counter = 0;
728
729                while iteration_counter < b.iters {
730                    let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter);
731
732                    let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
733                    let mut outputs = Vec::with_capacity(batch_size as usize);
734
735                    let start = b.measurement.start();
736                    // Can't use .extend here like the sync version does
737                    for mut input in inputs {
738                        outputs.push(routine(&mut input).await);
739                    }
740                    let end = b.measurement.end(start);
741                    b.value = b.measurement.add(&b.value, &end);
742
743                    black_box(outputs);
744
745                    iteration_counter += batch_size;
746                }
747            }
748            b.elapsed_time = time_start.elapsed();
749        });
750    }
751}