From 1545735a027a478aea4e9614974bac78d257689b Mon Sep 17 00:00:00 2001 From: not-matthias Date: Mon, 2 Feb 2026 18:45:55 +0100 Subject: [PATCH 1/2] feat(divan): run multiple rounds of analysis mode --- crates/codspeed/src/instrument_hooks/mod.rs | 11 +++- crates/codspeed/src/measurement.rs | 5 ++ crates/codspeed/src/request/mod.rs | 1 + crates/divan_compat/src/compat/bench/mod.rs | 63 +++++++++++++++++---- 4 files changed, 69 insertions(+), 11 deletions(-) diff --git a/crates/codspeed/src/instrument_hooks/mod.rs b/crates/codspeed/src/instrument_hooks/mod.rs index ff2edd93..52a9237f 100644 --- a/crates/codspeed/src/instrument_hooks/mod.rs +++ b/crates/codspeed/src/instrument_hooks/mod.rs @@ -3,8 +3,8 @@ mod ffi; #[cfg(use_instrument_hooks)] mod linux_impl { - use super::ffi; + use crate::measurement; use std::ffi::CString; use std::sync::OnceLock; @@ -44,6 +44,12 @@ mod linux_impl { unsafe { ffi::instrument_hooks_is_instrumented(self.0) } } + // FIXME: Should we move this to instrument-hooks native library? + #[inline(always)] + pub fn toggle_collect() { + measurement::toggle_collect(); + } + #[inline(always)] pub fn start_benchmark(&self) -> Result<(), u8> { let result = unsafe { ffi::instrument_hooks_start_benchmark(self.0) }; @@ -165,6 +171,9 @@ mod other_impl { false } + #[inline(always)] + pub fn toggle_collect() {} + pub fn start_benchmark(&self) -> Result<(), u8> { Ok(()) } diff --git a/crates/codspeed/src/measurement.rs b/crates/codspeed/src/measurement.rs index 9780e667..af4ac704 100644 --- a/crates/codspeed/src/measurement.rs +++ b/crates/codspeed/src/measurement.rs @@ -35,6 +35,11 @@ pub fn set_metadata() { } } +#[inline(always)] +pub fn toggle_collect() { + unsafe { send_client_request(0, &[ClientRequest::ToggleCollect as Value, 0, 0, 0, 0, 0]) }; +} + #[inline(always)] pub fn start() { unsafe { diff --git a/crates/codspeed/src/request/mod.rs b/crates/codspeed/src/request/mod.rs index a59fd322..a7be8698 100644 --- a/crates/codspeed/src/request/mod.rs +++ b/crates/codspeed/src/request/mod.rs @@ -5,6 +5,7 @@ const CG_BASE: u32 = ((b'C' as u32) << 24) + ((b'T' as u32) << 16); pub enum ClientRequest { RunningOnValgrind = 0x1001, ZeroStatistics = CG_BASE + 1, + ToggleCollect = CG_BASE + 2, DumpStatisticsAt = CG_BASE + 3, StartInstrumentation = CG_BASE + 4, StopInstrumentation = CG_BASE + 5, diff --git a/crates/divan_compat/src/compat/bench/mod.rs b/crates/divan_compat/src/compat/bench/mod.rs index 5ec824b9..5b19e7a0 100644 --- a/crates/divan_compat/src/compat/bench/mod.rs +++ b/crates/divan_compat/src/compat/bench/mod.rs @@ -10,7 +10,8 @@ pub use self::{ options::BenchOptions, }; -use codspeed::codspeed::CodSpeed; +use ::codspeed::codspeed::CodSpeed; +use ::codspeed::instrument_hooks::InstrumentHooks; use std::cell::RefCell; /// Using this in place of `()` for `GenI` prevents `Bencher::with_inputs` from @@ -136,11 +137,15 @@ where { let mut codspeed = self.codspeed.borrow_mut(); let mut gen_input = self.config.gen_input.borrow_mut(); - let input = gen_input(); - codspeed.start_benchmark(self.uri.as_str()); - let output = benched(input); - codspeed.end_benchmark(); - divan::black_box(output); + + codspeed::run_rounds(&mut codspeed, self.uri.as_str(), || { + // FIXME: We could also run multiple rounds here + let input = gen_input(); + InstrumentHooks::toggle_collect(); + let output = benched(divan::black_box(input)); + InstrumentHooks::toggle_collect(); + divan::black_box(output); + }); } pub fn bench_local_refs(self, mut benched: B) @@ -149,11 +154,49 @@ where { let mut codspeed = self.codspeed.borrow_mut(); let mut gen_input = self.config.gen_input.borrow_mut(); - let mut input = gen_input(); - codspeed.start_benchmark(self.uri.as_str()); - let output = benched(&mut input); + codspeed::run_rounds(&mut codspeed, self.uri.as_str(), || { + let mut input = gen_input(); + InstrumentHooks::toggle_collect(); + let output = benched(&mut input); + InstrumentHooks::toggle_collect(); + divan::black_box(input); + divan::black_box(output); + }); + } +} + +mod codspeed { + use super::*; + use std::time::{Duration, Instant}; + + pub fn run_rounds(codspeed: &mut CodSpeed, uri: &str, mut run_iteration: impl FnMut()) { + // FIXME: Maybe move this to codspeed + let (max_rounds, max_duration) = match std::env::var("CODSPEED_RUNNER_MODE").as_deref() { + Ok("simulation") | Ok("instrumentation") => (None, Some(Duration::from_millis(100))), + Ok("memory") => (Some(1), None), + Ok(m) => unreachable!("Invalid runner mode: {m}"), + Err(err) => panic!("Failed to get runner mode: {err}"), + }; + let mut rounds = 0; + let rounds_start_time = Instant::now(); + + codspeed.start_benchmark(uri); + InstrumentHooks::toggle_collect(); // Pause collection + + loop { + rounds += 1; + + run_iteration(); + + let within_rounds = max_rounds.map_or(true, |max| rounds < max); + let within_duration = + max_duration.map_or(true, |max| rounds_start_time.elapsed() < max); + if !(within_rounds && within_duration) { + break; + } + } + codspeed.end_benchmark(); - divan::black_box(output); } } From 71187bd5cd8cb0d254cd887afe9bb61500fc6749 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Mon, 2 Feb 2026 19:23:03 +0100 Subject: [PATCH 2/2] wip: add criterion multiple rounds poc --- crates/codspeed/src/compat_utils.rs | 49 ++++ crates/codspeed/src/lib.rs | 1 + crates/criterion_compat/src/compat/bencher.rs | 222 ++++++++++++++---- crates/divan_compat/src/compat/bench/mod.rs | 40 +--- 4 files changed, 223 insertions(+), 89 deletions(-) create mode 100644 crates/codspeed/src/compat_utils.rs diff --git a/crates/codspeed/src/compat_utils.rs b/crates/codspeed/src/compat_utils.rs new file mode 100644 index 00000000..89794c4d --- /dev/null +++ b/crates/codspeed/src/compat_utils.rs @@ -0,0 +1,49 @@ +use crate::codspeed::CodSpeed; +use crate::instrument_hooks::InstrumentHooks; +use std::time::{Duration, Instant}; + +/// Runs multiple rounds of a benchmark based on CODSPEED_RUNNER_MODE. +/// +/// # Important +/// `start_benchmark()` and `end_benchmark()` are called on the OUTSIDE because they +/// clear CPU caches. This is expensive and should only happen once per benchmark. +/// Inside the loop, we use `toggle_collect()` to pause/resume data collection +/// between iterations without clearing caches. +/// +/// # Arguments +/// * `codspeed` - The CodSpeed instance to use for benchmarking +/// * `uri` - The benchmark identifier/URI +/// * `run_iteration` - Closure that runs a single benchmark iteration. +/// Should call `toggle_collect()` to resume/pause collection +/// around the measured code. +pub fn run_rounds(codspeed: &mut CodSpeed, uri: &str, mut run_iteration: impl FnMut()) { + let (max_rounds, max_duration) = match std::env::var("CODSPEED_RUNNER_MODE").as_deref() { + Ok("simulation") | Ok("instrumentation") => (None, Some(Duration::from_millis(100))), + Ok("memory") => (Some(1), None), + Ok(m) => unreachable!("Invalid runner mode: {m}"), + Err(err) => panic!("Failed to get runner mode: {err}"), + }; + + let mut rounds = 0; + let rounds_start_time = Instant::now(); + + // Start benchmark ONCE - this clears CPU caches + codspeed.start_benchmark(uri); + InstrumentHooks::toggle_collect(); // Pause collection before first iteration + + loop { + rounds += 1; + + run_iteration(); + + let within_rounds = max_rounds.map_or(true, |max| rounds < max); + let within_duration = max_duration.map_or(true, |max| rounds_start_time.elapsed() < max); + + if !(within_rounds && within_duration) { + break; + } + } + + // End benchmark ONCE + codspeed.end_benchmark(); +} diff --git a/crates/codspeed/src/lib.rs b/crates/codspeed/src/lib.rs index 929c3662..82708469 100644 --- a/crates/codspeed/src/lib.rs +++ b/crates/codspeed/src/lib.rs @@ -1,4 +1,5 @@ pub mod codspeed; +pub mod compat_utils; pub mod instrument_hooks; diff --git a/crates/criterion_compat/src/compat/bencher.rs b/crates/criterion_compat/src/compat/bencher.rs index 73310e4f..627c35ab 100644 --- a/crates/criterion_compat/src/compat/bencher.rs +++ b/crates/criterion_compat/src/compat/bencher.rs @@ -1,4 +1,6 @@ use codspeed::codspeed::{black_box, CodSpeed}; +use codspeed::compat_utils; +use codspeed::instrument_hooks::InstrumentHooks; use colored::Colorize; use criterion::BatchSize; @@ -25,15 +27,19 @@ impl<'a> Bencher<'a> { { // NOTE: this structure hardens our benchmark against dead code elimination // https://godbolt.org/z/KnYeKMd1o - for i in 0..codspeed::codspeed::WARMUP_RUNS + 1 { - if i < codspeed::codspeed::WARMUP_RUNS { - black_box(routine()); - } else { - self.codspeed.start_benchmark(self.uri.as_str()); - black_box(routine()); - self.codspeed.end_benchmark(); - } + + // Warmup runs + for _ in 0..codspeed::codspeed::WARMUP_RUNS { + black_box(routine()); } + + // Multiple measured rounds + compat_utils::run_rounds(self.codspeed, self.uri.as_str(), || { + InstrumentHooks::toggle_collect(); // Resume collection + let output = routine(); + InstrumentHooks::toggle_collect(); // Pause collection + black_box(output); + }); } #[inline(never)] @@ -54,19 +60,21 @@ impl<'a> Bencher<'a> { S: FnMut() -> I, R: FnMut(I) -> O, { - for i in 0..codspeed::codspeed::WARMUP_RUNS + 1 { + // Warmup runs + for _ in 0..codspeed::codspeed::WARMUP_RUNS { let input = black_box(setup()); - let output = if i < codspeed::codspeed::WARMUP_RUNS { - routine(input) - } else { - let input = black_box(setup()); - self.codspeed.start_benchmark(self.uri.as_str()); - let output = routine(input); - self.codspeed.end_benchmark(); - output - }; + let output = routine(input); drop(black_box(output)); } + + // Multiple measured rounds + compat_utils::run_rounds(self.codspeed, self.uri.as_str(), || { + let input = setup(); // Setup runs while collection is paused + InstrumentHooks::toggle_collect(); // Resume collection + let output = routine(input); + InstrumentHooks::toggle_collect(); // Pause collection + black_box(output); + }); } pub fn iter_with_setup(&mut self, setup: S, routine: R) @@ -98,19 +106,23 @@ impl<'a> Bencher<'a> { S: FnMut() -> I, R: FnMut(&mut I) -> O, { - for i in 0..codspeed::codspeed::WARMUP_RUNS + 1 { + // Warmup runs + for _ in 0..codspeed::codspeed::WARMUP_RUNS { let mut input = black_box(setup()); - let output = if i < codspeed::codspeed::WARMUP_RUNS { - black_box(routine(&mut input)) - } else { - self.codspeed.start_benchmark(self.uri.as_str()); - let output = black_box(routine(&mut input)); - self.codspeed.end_benchmark(); - output - }; + let output = black_box(routine(&mut input)); drop(black_box(output)); drop(black_box(input)); } + + // Multiple measured rounds + compat_utils::run_rounds(self.codspeed, self.uri.as_str(), || { + let mut input = setup(); // Setup runs while collection is paused + InstrumentHooks::toggle_collect(); // Resume collection + let output = routine(&mut input); + InstrumentHooks::toggle_collect(); // Pause collection + black_box(input); + black_box(output); + }); } #[cfg(feature = "async")] @@ -135,17 +147,52 @@ impl<'a, 'b, A: AsyncExecutor> AsyncBencher<'a, 'b, A> { R: FnMut() -> F, F: Future, { + use std::time::{Duration, Instant}; + let AsyncBencher { b, runner } = self; runner.block_on(async { - for i in 0..codspeed::codspeed::WARMUP_RUNS + 1 { - if i < codspeed::codspeed::WARMUP_RUNS { - black_box(routine().await); - } else { - b.codspeed.start_benchmark(b.uri.as_str()); - black_box(routine().await); - b.codspeed.end_benchmark(); + // Warmup runs + for _ in 0..codspeed::codspeed::WARMUP_RUNS { + black_box(routine().await); + } + + // Multiple measured rounds + let (max_rounds, max_duration) = match std::env::var("CODSPEED_RUNNER_MODE").as_deref() + { + Ok("simulation") | Ok("instrumentation") => { + (None, Some(Duration::from_millis(100))) + } + Ok("memory") => (Some(1), None), + Ok(m) => unreachable!("Invalid runner mode: {m}"), + Err(err) => panic!("Failed to get runner mode: {err}"), + }; + + let mut rounds = 0; + let rounds_start_time = Instant::now(); + + // Start benchmark ONCE - this clears CPU caches + b.codspeed.start_benchmark(b.uri.as_str()); + InstrumentHooks::toggle_collect(); // Pause collection before first iteration + + loop { + rounds += 1; + + InstrumentHooks::toggle_collect(); // Resume collection + let output = routine().await; + InstrumentHooks::toggle_collect(); // Pause collection + black_box(output); + + let within_rounds = max_rounds.map_or(true, |max| rounds < max); + let within_duration = + max_duration.map_or(true, |max| rounds_start_time.elapsed() < max); + + if !(within_rounds && within_duration) { + break; } } + + // End benchmark ONCE + b.codspeed.end_benchmark(); }); } @@ -199,20 +246,55 @@ impl<'a, 'b, A: AsyncExecutor> AsyncBencher<'a, 'b, A> { R: FnMut(I) -> F, F: Future, { + use std::time::{Duration, Instant}; + let AsyncBencher { b, runner } = self; runner.block_on(async { - for i in 0..codspeed::codspeed::WARMUP_RUNS + 1 { + // Warmup runs + for _ in 0..codspeed::codspeed::WARMUP_RUNS { let input = black_box(setup()); - let output = if i < codspeed::codspeed::WARMUP_RUNS { - routine(input).await - } else { - b.codspeed.start_benchmark(b.uri.as_str()); - let output = routine(input).await; - b.codspeed.end_benchmark(); - output - }; + let output = routine(input).await; drop(black_box(output)); } + + // Multiple measured rounds + let (max_rounds, max_duration) = match std::env::var("CODSPEED_RUNNER_MODE").as_deref() + { + Ok("simulation") | Ok("instrumentation") => { + (None, Some(Duration::from_millis(100))) + } + Ok("memory") => (Some(1), None), + Ok(m) => unreachable!("Invalid runner mode: {m}"), + Err(err) => panic!("Failed to get runner mode: {err}"), + }; + + let mut rounds = 0; + let rounds_start_time = Instant::now(); + + // Start benchmark ONCE - this clears CPU caches + b.codspeed.start_benchmark(b.uri.as_str()); + InstrumentHooks::toggle_collect(); // Pause collection before first iteration + + loop { + rounds += 1; + + let input = setup(); // Setup runs while collection is paused + InstrumentHooks::toggle_collect(); // Resume collection + let output = routine(input).await; + InstrumentHooks::toggle_collect(); // Pause collection + black_box(output); + + let within_rounds = max_rounds.map_or(true, |max| rounds < max); + let within_duration = + max_duration.map_or(true, |max| rounds_start_time.elapsed() < max); + + if !(within_rounds && within_duration) { + break; + } + } + + // End benchmark ONCE + b.codspeed.end_benchmark(); }) } @@ -228,21 +310,57 @@ impl<'a, 'b, A: AsyncExecutor> AsyncBencher<'a, 'b, A> { R: FnMut(&mut I) -> F, F: Future, { + use std::time::{Duration, Instant}; + let AsyncBencher { b, runner } = self; runner.block_on(async { - for i in 0..codspeed::codspeed::WARMUP_RUNS + 1 { + // Warmup runs + for _ in 0..codspeed::codspeed::WARMUP_RUNS { let mut input = black_box(setup()); - let output = if i < codspeed::codspeed::WARMUP_RUNS { - black_box(routine(&mut input).await) - } else { - b.codspeed.start_benchmark(b.uri.as_str()); - let output = black_box(routine(&mut input).await); - b.codspeed.end_benchmark(); - output - }; + let output = black_box(routine(&mut input).await); drop(black_box(output)); drop(black_box(input)); } + + // Multiple measured rounds + let (max_rounds, max_duration) = match std::env::var("CODSPEED_RUNNER_MODE").as_deref() + { + Ok("simulation") | Ok("instrumentation") => { + (None, Some(Duration::from_millis(100))) + } + Ok("memory") => (Some(1), None), + Ok(m) => unreachable!("Invalid runner mode: {m}"), + Err(err) => panic!("Failed to get runner mode: {err}"), + }; + + let mut rounds = 0; + let rounds_start_time = Instant::now(); + + // Start benchmark ONCE - this clears CPU caches + b.codspeed.start_benchmark(b.uri.as_str()); + InstrumentHooks::toggle_collect(); // Pause collection before first iteration + + loop { + rounds += 1; + + let mut input = setup(); // Setup runs while collection is paused + InstrumentHooks::toggle_collect(); // Resume collection + let output = routine(&mut input).await; + InstrumentHooks::toggle_collect(); // Pause collection + black_box(input); + black_box(output); + + let within_rounds = max_rounds.map_or(true, |max| rounds < max); + let within_duration = + max_duration.map_or(true, |max| rounds_start_time.elapsed() < max); + + if !(within_rounds && within_duration) { + break; + } + } + + // End benchmark ONCE + b.codspeed.end_benchmark(); }); } } diff --git a/crates/divan_compat/src/compat/bench/mod.rs b/crates/divan_compat/src/compat/bench/mod.rs index 5b19e7a0..90d44a43 100644 --- a/crates/divan_compat/src/compat/bench/mod.rs +++ b/crates/divan_compat/src/compat/bench/mod.rs @@ -11,6 +11,7 @@ pub use self::{ }; use ::codspeed::codspeed::CodSpeed; +use ::codspeed::compat_utils; use ::codspeed::instrument_hooks::InstrumentHooks; use std::cell::RefCell; @@ -138,7 +139,7 @@ where let mut codspeed = self.codspeed.borrow_mut(); let mut gen_input = self.config.gen_input.borrow_mut(); - codspeed::run_rounds(&mut codspeed, self.uri.as_str(), || { + compat_utils::run_rounds(&mut codspeed, self.uri.as_str(), || { // FIXME: We could also run multiple rounds here let input = gen_input(); InstrumentHooks::toggle_collect(); @@ -155,7 +156,7 @@ where let mut codspeed = self.codspeed.borrow_mut(); let mut gen_input = self.config.gen_input.borrow_mut(); - codspeed::run_rounds(&mut codspeed, self.uri.as_str(), || { + compat_utils::run_rounds(&mut codspeed, self.uri.as_str(), || { let mut input = gen_input(); InstrumentHooks::toggle_collect(); let output = benched(&mut input); @@ -165,38 +166,3 @@ where }); } } - -mod codspeed { - use super::*; - use std::time::{Duration, Instant}; - - pub fn run_rounds(codspeed: &mut CodSpeed, uri: &str, mut run_iteration: impl FnMut()) { - // FIXME: Maybe move this to codspeed - let (max_rounds, max_duration) = match std::env::var("CODSPEED_RUNNER_MODE").as_deref() { - Ok("simulation") | Ok("instrumentation") => (None, Some(Duration::from_millis(100))), - Ok("memory") => (Some(1), None), - Ok(m) => unreachable!("Invalid runner mode: {m}"), - Err(err) => panic!("Failed to get runner mode: {err}"), - }; - let mut rounds = 0; - let rounds_start_time = Instant::now(); - - codspeed.start_benchmark(uri); - InstrumentHooks::toggle_collect(); // Pause collection - - loop { - rounds += 1; - - run_iteration(); - - let within_rounds = max_rounds.map_or(true, |max| rounds < max); - let within_duration = - max_duration.map_or(true, |max| rounds_start_time.elapsed() < max); - if !(within_rounds && within_duration) { - break; - } - } - - codspeed.end_benchmark(); - } -}