From 5a236697473e554d7b27b29d25f7b188163d3b49 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 3 Apr 2026 08:27:29 -1000 Subject: [PATCH] [scheduler] Fix unrealistic scheduler benchmarks missing periodic drain (#15396) --- tests/benchmarks/core/bench_scheduler.cpp | 124 +++++++++++++++++++--- 1 file changed, 107 insertions(+), 17 deletions(-) diff --git a/tests/benchmarks/core/bench_scheduler.cpp b/tests/benchmarks/core/bench_scheduler.cpp index 9357734cc8..214fe0e4b8 100644 --- a/tests/benchmarks/core/bench_scheduler.cpp +++ b/tests/benchmarks/core/bench_scheduler.cpp @@ -8,7 +8,24 @@ namespace esphome::benchmarks { // Inner iteration count to amortize CodSpeed instrumentation overhead. // Without this, the ~60ns per-iteration valgrind start/stop cost dominates // sub-microsecond benchmarks. -static constexpr int kInnerIterations = 2000; +// Must be divisible by all batch sizes used below (3, 10) to avoid +// pool imbalance at iteration boundaries that causes spurious malloc. +static constexpr int kInnerIterations = 2100; + +// Warm the scheduler pool by registering and replacing items twice. +// The first batch allocates fresh items; the second batch cancels them and +// populates the recycling pool with the cancelled items from the first batch. +static void warm_pool(Scheduler &scheduler, Component *component, int batch_size, uint32_t delay) { + uint32_t now = millis(); + for (int i = 0; i < batch_size; i++) { + scheduler.set_timeout(component, static_cast(i), delay, []() {}); + } + scheduler.call(++now); + for (int i = 0; i < batch_size; i++) { + scheduler.set_timeout(component, static_cast(i), delay, []() {}); + } + scheduler.call(++now); +} // --- Scheduler fast path: no work to do --- @@ -83,11 +100,21 @@ static void Scheduler_SetTimeout(benchmark::State &state) { Scheduler scheduler; Component dummy_component; + // Register 3 timeouts then call() — realistic worst case where multiple + // components schedule in the same loop iteration. Keeps item count within + // the recycling pool (MAX_POOL_SIZE=5) to avoid spurious malloc/free. + static constexpr int kBatchSize = 3; + static_assert(kInnerIterations % kBatchSize == 0, "kInnerIterations must be divisible by kBatchSize"); + warm_pool(scheduler, &dummy_component, kBatchSize, 1000); for (auto _ : state) { + uint32_t now = millis(); for (int i = 0; i < kInnerIterations; i++) { - scheduler.set_timeout(&dummy_component, static_cast(i % 5), 1000, []() {}); + scheduler.set_timeout(&dummy_component, static_cast(i % kBatchSize), 1000, []() {}); + if ((i + 1) % kBatchSize == 0) { + scheduler.call(++now); + } } - scheduler.process_to_add(); + scheduler.call(++now); benchmark::DoNotOptimize(scheduler); } state.SetItemsProcessed(state.iterations() * kInnerIterations); @@ -99,22 +126,22 @@ BENCHMARK(Scheduler_SetTimeout); static void Scheduler_SetInterval(benchmark::State &state) { Scheduler scheduler; Component dummy_component; - // Number of distinct interval keys; controls how many unique timers exist - // simultaneously and the drain cadence for process_to_add(). - static constexpr int kKeyCount = 5; + // Register 3 intervals then call() — realistic worst case where multiple + // components schedule in the same loop iteration. Keeps item count within + // the recycling pool (MAX_POOL_SIZE=5) to avoid spurious malloc/free. + static constexpr int kBatchSize = 3; + static_assert(kInnerIterations % kBatchSize == 0, "kInnerIterations must be divisible by kBatchSize"); + warm_pool(scheduler, &dummy_component, kBatchSize, 1000); for (auto _ : state) { + uint32_t now = millis(); for (int i = 0; i < kInnerIterations; i++) { - scheduler.set_interval(&dummy_component, static_cast(i % kKeyCount), 1000, []() {}); - // Drain to_add_ periodically to reflect production behavior where - // process_to_add() runs each main loop iteration. Without this, - // cancelled items accumulate in to_add_ causing O(n²) scan cost. - if ((i + 1) % kKeyCount == 0) { - scheduler.process_to_add(); + scheduler.set_interval(&dummy_component, static_cast(i % kBatchSize), 1000, []() {}); + if ((i + 1) % kBatchSize == 0) { + scheduler.call(++now); } } - // Final drain in case kInnerIterations is not a multiple of 5 - scheduler.process_to_add(); + scheduler.call(++now); benchmark::DoNotOptimize(scheduler); } state.SetItemsProcessed(state.iterations() * kInnerIterations); @@ -128,16 +155,79 @@ static void Scheduler_Defer(benchmark::State &state) { Component dummy_component; // defer() is Component::defer which calls set_timeout(delay=0). - // Call set_timeout directly since defer() is protected. + // Component::defer(func) passes nullptr as the name, which skips + // cancel_item_locked_ entirely — matching production behavior where + // defers are anonymous fire-and-forget callbacks. + static constexpr int kBatchSize = 3; + static_assert(kInnerIterations % kBatchSize == 0, "kInnerIterations must be divisible by kBatchSize"); + warm_pool(scheduler, &dummy_component, kBatchSize, 0); for (auto _ : state) { + uint32_t now = millis(); for (int i = 0; i < kInnerIterations; i++) { - scheduler.set_timeout(&dummy_component, static_cast(i % 5), 0, []() {}); + scheduler.set_timeout(&dummy_component, static_cast(nullptr), 0, []() {}); + if ((i + 1) % kBatchSize == 0) { + scheduler.call(++now); + } } - scheduler.process_to_add(); + scheduler.call(++now); benchmark::DoNotOptimize(scheduler); } state.SetItemsProcessed(state.iterations() * kInnerIterations); } BENCHMARK(Scheduler_Defer); +// --- Scheduler: defer with same ID (cancel-and-replace pattern) --- + +static void Scheduler_Defer_SameID(benchmark::State &state) { + Scheduler scheduler; + Component dummy_component; + + // Measures defer with a fixed numeric ID — each call cancels the previous + // pending defer before adding the new one. This is the pattern used by + // components that defer work but want to coalesce rapid updates. + static constexpr int kBatchSize = 3; + static_assert(kInnerIterations % kBatchSize == 0, "kInnerIterations must be divisible by kBatchSize"); + warm_pool(scheduler, &dummy_component, kBatchSize, 0); + for (auto _ : state) { + uint32_t now = millis(); + for (int i = 0; i < kInnerIterations; i++) { + scheduler.set_timeout(&dummy_component, static_cast(0), 0, []() {}); + if ((i + 1) % kBatchSize == 0) { + scheduler.call(++now); + } + } + scheduler.call(++now); + benchmark::DoNotOptimize(scheduler); + } + state.SetItemsProcessed(state.iterations() * kInnerIterations); +} +BENCHMARK(Scheduler_Defer_SameID); + +// --- Scheduler: set_timeout with batch size exceeding pool (cliff test) --- + +static void Scheduler_SetTimeout_ExceedPool(benchmark::State &state) { + Scheduler scheduler; + Component dummy_component; + + // Register 10 timeouts then call() — exceeds MAX_POOL_SIZE=5 to measure + // the performance cliff when the recycling pool is exhausted and items + // must be malloc'd/freed. + static constexpr int kBatchSize = 10; + static_assert(kInnerIterations % kBatchSize == 0, "kInnerIterations must be divisible by kBatchSize"); + warm_pool(scheduler, &dummy_component, kBatchSize, 1000); + for (auto _ : state) { + uint32_t now = millis(); + for (int i = 0; i < kInnerIterations; i++) { + scheduler.set_timeout(&dummy_component, static_cast(i % kBatchSize), 1000, []() {}); + if ((i + 1) % kBatchSize == 0) { + scheduler.call(++now); + } + } + scheduler.call(++now); + benchmark::DoNotOptimize(scheduler); + } + state.SetItemsProcessed(state.iterations() * kInnerIterations); +} +BENCHMARK(Scheduler_SetTimeout_ExceedPool); + } // namespace esphome::benchmarks