[core] Add CodSpeed C++ benchmarks for protobuf, main loop, and helpers (#14878)

2026-06-24 12:17:23 +00:00 · 2026-03-17 12:29:38 -10:00
parent 1adf05e2d5
commit 1670f04a87
17 changed files with 1541 additions and 0 deletions
--- a/tests/benchmarks/components/.gitignore
+++ b/tests/benchmarks/components/.gitignore
@@ -0,0 +1,2 @@
+/.esphome/
+/secrets.yaml
--- a/tests/benchmarks/components/api/bench_proto_decode.cpp
+++ b/tests/benchmarks/components/api/bench_proto_decode.cpp
@@ -0,0 +1,93 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/components/api/api_pb2.h"
+#include "esphome/components/api/api_buffer.h"
+
+namespace esphome::api::benchmarks {
+
+// Inner iteration count to amortize CodSpeed instrumentation overhead.
+// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
+// sub-microsecond benchmarks.
+static constexpr int kInnerIterations = 2000;
+
+// Helper: encode a message into a buffer and return it.
+// Benchmarks encode once in setup, then decode the resulting bytes in a loop.
+// This keeps decode benchmarks in sync with the actual protobuf schema —
+// hand-encoded byte arrays would silently break when fields change.
+template<typename T> static APIBuffer encode_message(const T &msg) {
+  APIBuffer buffer;
+  uint32_t size = msg.calculate_size();
+  buffer.resize(size);
+  ProtoWriteBuffer writer(&buffer, 0);
+  msg.encode(writer);
+  return buffer;
+}
+
+// --- HelloRequest decode (string + varint fields) ---
+
+static void Decode_HelloRequest(benchmark::State &state) {
+  HelloRequest source;
+  source.client_info = StringRef::from_lit("aioesphomeapi");
+  source.api_version_major = 1;
+  source.api_version_minor = 10;
+  auto encoded = encode_message(source);
+
+  for (auto _ : state) {
+    HelloRequest msg;
+    for (int i = 0; i < kInnerIterations; i++) {
+      msg.decode(encoded.data(), encoded.size());
+    }
+    benchmark::DoNotOptimize(msg.api_version_major);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Decode_HelloRequest);
+
+// --- SwitchCommandRequest decode (simple command) ---
+
+static void Decode_SwitchCommandRequest(benchmark::State &state) {
+  SwitchCommandRequest source;
+  source.key = 0x12345678;
+  source.state = true;
+  auto encoded = encode_message(source);
+
+  for (auto _ : state) {
+    SwitchCommandRequest msg;
+    for (int i = 0; i < kInnerIterations; i++) {
+      msg.decode(encoded.data(), encoded.size());
+    }
+    benchmark::DoNotOptimize(msg.state);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Decode_SwitchCommandRequest);
+
+// --- LightCommandRequest decode (complex command with many fields) ---
+
+static void Decode_LightCommandRequest(benchmark::State &state) {
+  LightCommandRequest source;
+  source.key = 0x11223344;
+  source.has_state = true;
+  source.state = true;
+  source.has_brightness = true;
+  source.brightness = 0.8f;
+  source.has_rgb = true;
+  source.red = 1.0f;
+  source.green = 0.5f;
+  source.blue = 0.2f;
+  source.has_effect = true;
+  source.effect = StringRef::from_lit("rainbow");
+  auto encoded = encode_message(source);
+
+  for (auto _ : state) {
+    LightCommandRequest msg;
+    for (int i = 0; i < kInnerIterations; i++) {
+      msg.decode(encoded.data(), encoded.size());
+    }
+    benchmark::DoNotOptimize(msg.brightness);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Decode_LightCommandRequest);
+
+}  // namespace esphome::api::benchmarks
--- a/tests/benchmarks/components/api/bench_proto_encode.cpp
+++ b/tests/benchmarks/components/api/bench_proto_encode.cpp
@@ -0,0 +1,298 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/components/api/api_pb2.h"
+#include "esphome/components/api/api_buffer.h"
+
+namespace esphome::api::benchmarks {
+
+// Inner iteration count to amortize CodSpeed instrumentation overhead.
+// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
+// sub-microsecond benchmarks.
+static constexpr int kInnerIterations = 2000;
+
+// --- SensorStateResponse (highest frequency message) ---
+
+static void Encode_SensorStateResponse(benchmark::State &state) {
+  APIBuffer buffer;
+  SensorStateResponse msg;
+  msg.key = 0x12345678;
+  msg.state = 23.5f;
+  msg.missing_state = false;
+  uint32_t size = msg.calculate_size();
+  buffer.resize(size);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_SensorStateResponse);
+
+static void CalculateSize_SensorStateResponse(benchmark::State &state) {
+  SensorStateResponse msg;
+  msg.key = 0x12345678;
+  msg.state = 23.5f;
+  msg.missing_state = false;
+
+  for (auto _ : state) {
+    uint32_t result = 0;
+    for (int i = 0; i < kInnerIterations; i++) {
+      result += msg.calculate_size();
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(CalculateSize_SensorStateResponse);
+
+// Steady state: buffer already allocated from previous iteration
+static void CalcAndEncode_SensorStateResponse(benchmark::State &state) {
+  APIBuffer buffer;
+  SensorStateResponse msg;
+  msg.key = 0x12345678;
+  msg.state = 23.5f;
+  msg.missing_state = false;
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      uint32_t size = msg.calculate_size();
+      buffer.resize(size);
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(CalcAndEncode_SensorStateResponse);
+
+// Cold path: fresh buffer each iteration (measures heap allocation cost).
+// Inner loop still needed to amortize CodSpeed instrumentation overhead.
+// Each inner iteration creates a fresh buffer, so this measures
+// alloc+calc+encode per item.
+static void CalcAndEncode_SensorStateResponse_Fresh(benchmark::State &state) {
+  SensorStateResponse msg;
+  msg.key = 0x12345678;
+  msg.state = 23.5f;
+  msg.missing_state = false;
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      APIBuffer buffer;
+      uint32_t size = msg.calculate_size();
+      buffer.resize(size);
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+      benchmark::DoNotOptimize(buffer.data());
+    }
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(CalcAndEncode_SensorStateResponse_Fresh);
+
+// --- BinarySensorStateResponse ---
+
+static void Encode_BinarySensorStateResponse(benchmark::State &state) {
+  APIBuffer buffer;
+  BinarySensorStateResponse msg;
+  msg.key = 0xAABBCCDD;
+  msg.state = true;
+  msg.missing_state = false;
+  uint32_t size = msg.calculate_size();
+  buffer.resize(size);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_BinarySensorStateResponse);
+
+// --- HelloResponse (string fields) ---
+
+static void Encode_HelloResponse(benchmark::State &state) {
+  APIBuffer buffer;
+  HelloResponse msg;
+  msg.api_version_major = 1;
+  msg.api_version_minor = 10;
+  msg.server_info = StringRef::from_lit("esphome v2026.3.0");
+  msg.name = StringRef::from_lit("living-room-sensor");
+  uint32_t size = msg.calculate_size();
+  buffer.resize(size);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_HelloResponse);
+
+// --- LightStateResponse (complex multi-field message) ---
+
+static void Encode_LightStateResponse(benchmark::State &state) {
+  APIBuffer buffer;
+  LightStateResponse msg;
+  msg.key = 0x11223344;
+  msg.state = true;
+  msg.brightness = 0.8f;
+  msg.color_mode = enums::COLOR_MODE_RGB_WHITE;
+  msg.color_brightness = 1.0f;
+  msg.red = 1.0f;
+  msg.green = 0.5f;
+  msg.blue = 0.2f;
+  msg.white = 0.0f;
+  msg.color_temperature = 4000.0f;
+  msg.cold_white = 0.0f;
+  msg.warm_white = 0.0f;
+  msg.effect = StringRef::from_lit("rainbow");
+  uint32_t size = msg.calculate_size();
+  buffer.resize(size);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_LightStateResponse);
+
+static void CalculateSize_LightStateResponse(benchmark::State &state) {
+  LightStateResponse msg;
+  msg.key = 0x11223344;
+  msg.state = true;
+  msg.brightness = 0.8f;
+  msg.color_mode = enums::COLOR_MODE_RGB_WHITE;
+  msg.color_brightness = 1.0f;
+  msg.red = 1.0f;
+  msg.green = 0.5f;
+  msg.blue = 0.2f;
+  msg.white = 0.0f;
+  msg.color_temperature = 4000.0f;
+  msg.cold_white = 0.0f;
+  msg.warm_white = 0.0f;
+  msg.effect = StringRef::from_lit("rainbow");
+
+  for (auto _ : state) {
+    uint32_t result = 0;
+    for (int i = 0; i < kInnerIterations; i++) {
+      result += msg.calculate_size();
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(CalculateSize_LightStateResponse);
+
+// --- DeviceInfoResponse (nested submessages: 20 devices + 20 areas) ---
+
+static DeviceInfoResponse make_device_info_response() {
+  DeviceInfoResponse msg;
+  msg.name = StringRef::from_lit("living-room-sensor");
+  msg.mac_address = StringRef::from_lit("AA:BB:CC:DD:EE:FF");
+  msg.esphome_version = StringRef::from_lit("2026.3.0");
+  msg.compilation_time = StringRef::from_lit("Mar 16 2026, 12:00:00");
+  msg.model = StringRef::from_lit("esp32-poe-iso");
+  msg.manufacturer = StringRef::from_lit("Olimex");
+  msg.friendly_name = StringRef::from_lit("Living Room Sensor");
+#ifdef USE_DEVICES
+  for (uint32_t i = 0; i < ESPHOME_DEVICE_COUNT && i < 20; i++) {
+    msg.devices[i].device_id = i + 1;
+    msg.devices[i].name = StringRef::from_lit("device");
+    msg.devices[i].area_id = (i % 20) + 1;
+  }
+#endif
+#ifdef USE_AREAS
+  for (uint32_t i = 0; i < ESPHOME_AREA_COUNT && i < 20; i++) {
+    msg.areas[i].area_id = i + 1;
+    msg.areas[i].name = StringRef::from_lit("area");
+  }
+#endif
+  return msg;
+}
+
+static void CalculateSize_DeviceInfoResponse(benchmark::State &state) {
+  auto msg = make_device_info_response();
+
+  for (auto _ : state) {
+    uint32_t result = 0;
+    for (int i = 0; i < kInnerIterations; i++) {
+      result += msg.calculate_size();
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(CalculateSize_DeviceInfoResponse);
+
+static void Encode_DeviceInfoResponse(benchmark::State &state) {
+  auto msg = make_device_info_response();
+  APIBuffer buffer;
+  uint32_t total_size = msg.calculate_size();
+  buffer.resize(total_size);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_DeviceInfoResponse);
+
+// Steady state: buffer already allocated from previous iteration
+static void CalcAndEncode_DeviceInfoResponse(benchmark::State &state) {
+  auto msg = make_device_info_response();
+  APIBuffer buffer;
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      uint32_t size = msg.calculate_size();
+      buffer.resize(size);
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(CalcAndEncode_DeviceInfoResponse);
+
+// Cold path: fresh buffer each iteration (measures heap allocation cost).
+// Inner loop still needed to amortize CodSpeed instrumentation overhead.
+// Each inner iteration creates a fresh buffer, so this measures
+// alloc+calc+encode per item.
+static void CalcAndEncode_DeviceInfoResponse_Fresh(benchmark::State &state) {
+  auto msg = make_device_info_response();
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      APIBuffer buffer;
+      uint32_t size = msg.calculate_size();
+      buffer.resize(size);
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+      benchmark::DoNotOptimize(buffer.data());
+    }
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(CalcAndEncode_DeviceInfoResponse_Fresh);
+
+}  // namespace esphome::api::benchmarks
--- a/tests/benchmarks/components/api/bench_proto_varint.cpp
+++ b/tests/benchmarks/components/api/bench_proto_varint.cpp
@@ -0,0 +1,133 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/components/api/proto.h"
+#include "esphome/components/api/api_buffer.h"
+
+namespace esphome::api::benchmarks {
+
+// Inner iteration count to amortize CodSpeed instrumentation overhead.
+// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
+// sub-microsecond benchmarks.
+static constexpr int kInnerIterations = 2000;
+
+// --- ProtoVarInt::parse() benchmarks ---
+
+static void ProtoVarInt_Parse_SingleByte(benchmark::State &state) {
+  uint8_t buf[] = {0x42};  // value = 66
+
+  for (auto _ : state) {
+    ProtoVarIntResult result{};
+    for (int i = 0; i < kInnerIterations; i++) {
+      result = ProtoVarInt::parse(buf, sizeof(buf));
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(ProtoVarInt_Parse_SingleByte);
+
+static void ProtoVarInt_Parse_TwoByte(benchmark::State &state) {
+  uint8_t buf[] = {0x80, 0x01};  // value = 128
+
+  for (auto _ : state) {
+    ProtoVarIntResult result{};
+    for (int i = 0; i < kInnerIterations; i++) {
+      result = ProtoVarInt::parse(buf, sizeof(buf));
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(ProtoVarInt_Parse_TwoByte);
+
+static void ProtoVarInt_Parse_FiveByte(benchmark::State &state) {
+  uint8_t buf[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x0F};
+
+  for (auto _ : state) {
+    ProtoVarIntResult result{};
+    for (int i = 0; i < kInnerIterations; i++) {
+      result = ProtoVarInt::parse(buf, sizeof(buf));
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(ProtoVarInt_Parse_FiveByte);
+
+// --- Varint encoding benchmarks ---
+
+static void Encode_Varint_Small(benchmark::State &state) {
+  APIBuffer buffer;
+  buffer.resize(16);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      writer.encode_varint_raw(42);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_Varint_Small);
+
+static void Encode_Varint_Large(benchmark::State &state) {
+  APIBuffer buffer;
+  buffer.resize(16);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      writer.encode_varint_raw(300);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_Varint_Large);
+
+static void Encode_Varint_MaxUint32(benchmark::State &state) {
+  APIBuffer buffer;
+  buffer.resize(16);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      writer.encode_varint_raw(0xFFFFFFFF);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_Varint_MaxUint32);
+
+// --- ProtoSize::varint() benchmarks ---
+
+static void ProtoSize_Varint_Small(benchmark::State &state) {
+  // Use varying input to prevent constant folding.
+  // Values 0-127 all take 1 byte but the compiler can't prove that.
+  for (auto _ : state) {
+    uint32_t result = 0;
+    for (int i = 0; i < kInnerIterations; i++) {
+      result += ProtoSize::varint(static_cast<uint32_t>(i) & 0x7F);
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(ProtoSize_Varint_Small);
+
+static void ProtoSize_Varint_Large(benchmark::State &state) {
+  // Use varying input to prevent constant folding.
+  for (auto _ : state) {
+    uint32_t result = 0;
+    for (int i = 0; i < kInnerIterations; i++) {
+      result += ProtoSize::varint(0xFFFF0000 | static_cast<uint32_t>(i));
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(ProtoSize_Varint_Large);
+
+}  // namespace esphome::api::benchmarks
--- a/tests/benchmarks/components/api/benchmark.yaml
+++ b/tests/benchmarks/components/api/benchmark.yaml
@@ -0,0 +1,114 @@
+# Components needed for API protobuf benchmarks.
+# Merged into the base config before validation so all
+# dependencies get proper defaults.
+#
+# esphome: sub-keys are merged into the base config.
+esphome:
+  areas:
+    - id: area_1
+      name: "Area 1"
+    - id: area_2
+      name: "Area 2"
+    - id: area_3
+      name: "Area 3"
+    - id: area_4
+      name: "Area 4"
+    - id: area_5
+      name: "Area 5"
+    - id: area_6
+      name: "Area 6"
+    - id: area_7
+      name: "Area 7"
+    - id: area_8
+      name: "Area 8"
+    - id: area_9
+      name: "Area 9"
+    - id: area_10
+      name: "Area 10"
+    - id: area_11
+      name: "Area 11"
+    - id: area_12
+      name: "Area 12"
+    - id: area_13
+      name: "Area 13"
+    - id: area_14
+      name: "Area 14"
+    - id: area_15
+      name: "Area 15"
+    - id: area_16
+      name: "Area 16"
+    - id: area_17
+      name: "Area 17"
+    - id: area_18
+      name: "Area 18"
+    - id: area_19
+      name: "Area 19"
+    - id: area_20
+      name: "Area 20"
+  devices:
+    - id: device_1
+      name: "Device 1"
+      area_id: area_1
+    - id: device_2
+      name: "Device 2"
+      area_id: area_2
+    - id: device_3
+      name: "Device 3"
+      area_id: area_3
+    - id: device_4
+      name: "Device 4"
+      area_id: area_4
+    - id: device_5
+      name: "Device 5"
+      area_id: area_5
+    - id: device_6
+      name: "Device 6"
+      area_id: area_6
+    - id: device_7
+      name: "Device 7"
+      area_id: area_7
+    - id: device_8
+      name: "Device 8"
+      area_id: area_8
+    - id: device_9
+      name: "Device 9"
+      area_id: area_9
+    - id: device_10
+      name: "Device 10"
+      area_id: area_10
+    - id: device_11
+      name: "Device 11"
+      area_id: area_11
+    - id: device_12
+      name: "Device 12"
+      area_id: area_12
+    - id: device_13
+      name: "Device 13"
+      area_id: area_13
+    - id: device_14
+      name: "Device 14"
+      area_id: area_14
+    - id: device_15
+      name: "Device 15"
+      area_id: area_15
+    - id: device_16
+      name: "Device 16"
+      area_id: area_16
+    - id: device_17
+      name: "Device 17"
+      area_id: area_17
+    - id: device_18
+      name: "Device 18"
+      area_id: area_18
+    - id: device_19
+      name: "Device 19"
+      area_id: area_19
+    - id: device_20
+      name: "Device 20"
+      area_id: area_20
+
+api:
+sensor:
+binary_sensor:
+light:
+switch:
--- a/tests/benchmarks/components/main.cpp
+++ b/tests/benchmarks/components/main.cpp
@@ -0,0 +1,42 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/components/logger/logger.h"
+
+/*
+This special main.cpp provides the entry point for Google Benchmark.
+It replaces the default ESPHome main with a benchmark runner.
+
+*/
+
+// Auto generated code by esphome
+// ========== AUTO GENERATED INCLUDE BLOCK BEGIN ===========
+// ========== AUTO GENERATED INCLUDE BLOCK END ===========
+
+void original_setup() {
+  // Code-generated App initialization (pre_setup, area/device registration, etc.)
+
+  // ========== AUTO GENERATED CODE BEGIN ===========
+  // =========== AUTO GENERATED CODE END ============
+}
+
+void setup() {
+  // Run auto-generated initialization (App.pre_setup, area/device registration,
+  // looping_components_.init, etc.) so benchmarks that use App work correctly.
+  original_setup();
+
+  // Log functions call global_logger->log_vprintf_() without a null check,
+  // so we must set up a Logger before any test that triggers logging.
+  static esphome::logger::Logger test_logger(0);
+  test_logger.set_log_level(ESPHOME_LOG_LEVEL);
+  test_logger.pre_setup();
+
+  int argc = 1;
+  char arg0[] = "benchmark";
+  char *argv[] = {arg0, nullptr};
+  ::benchmark::Initialize(&argc, argv);
+  ::benchmark::RunSpecifiedBenchmarks();
+  ::benchmark::Shutdown();
+  exit(0);
+}
+
+void loop() {}
--- a/tests/benchmarks/core/bench_application_loop.cpp
+++ b/tests/benchmarks/core/bench_application_loop.cpp
@@ -0,0 +1,22 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/core/application.h"
+
+namespace esphome::benchmarks {
+
+// Benchmark Application::loop() with no registered components.
+// App is initialized by original_setup() in main.cpp (code-generated
+// pre_setup, area/device registration, looping_components_.init).
+// This measures the baseline overhead of the main loop: scheduler,
+// timing, before/after loop tasks, and yield_with_select_.
+static void ApplicationLoop_Empty(benchmark::State &state) {
+  // Set loop interval to 0 so yield_with_select_ returns immediately
+  // instead of sleeping. This benchmarks the loop overhead, not the sleep.
+  App.set_loop_interval(0);
+  for (auto _ : state) {
+    App.loop();
+  }
+}
+BENCHMARK(ApplicationLoop_Empty);
+
+}  // namespace esphome::benchmarks
--- a/tests/benchmarks/core/bench_helpers.cpp
+++ b/tests/benchmarks/core/bench_helpers.cpp
@@ -0,0 +1,41 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/core/helpers.h"
+
+namespace esphome::benchmarks {
+
+// Inner iteration count to amortize CodSpeed instrumentation overhead.
+// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
+// sub-microsecond benchmarks.
+static constexpr int kInnerIterations = 2000;
+
+// --- random_float() ---
+// Ported from ol.yaml:148 "Random Float Benchmark"
+
+static void RandomFloat(benchmark::State &state) {
+  for (auto _ : state) {
+    float result = 0.0f;
+    for (int i = 0; i < kInnerIterations; i++) {
+      result += random_float();
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(RandomFloat);
+
+// --- random_uint32() ---
+
+static void RandomUint32(benchmark::State &state) {
+  for (auto _ : state) {
+    uint32_t result = 0;
+    for (int i = 0; i < kInnerIterations; i++) {
+      result += random_uint32();
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(RandomUint32);
+
+}  // namespace esphome::benchmarks
--- a/tests/benchmarks/core/bench_logger.cpp
+++ b/tests/benchmarks/core/bench_logger.cpp
@@ -0,0 +1,54 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/core/log.h"
+
+namespace esphome::benchmarks {
+
+// Inner iteration count to amortize CodSpeed instrumentation overhead.
+// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
+// sub-microsecond benchmarks.
+static constexpr int kInnerIterations = 2000;
+
+static const char *const TAG = "bench";
+
+// --- Log a message with no format specifiers (fastest path) ---
+
+static void Logger_NoFormat(benchmark::State &state) {
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ESP_LOGW(TAG, "Something happened");
+    }
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Logger_NoFormat);
+
+// --- Log a message with 3 uint32_t format specifiers ---
+
+static void Logger_3Uint32(benchmark::State &state) {
+  uint32_t a = 12345, b = 67890, c = 99999;
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ESP_LOGW(TAG, "Values: %" PRIu32 " %" PRIu32 " %" PRIu32, a, b, c);
+    }
+    benchmark::DoNotOptimize(a);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Logger_3Uint32);
+
+// --- Log a message with 3 floats (common for sensor values) ---
+
+static void Logger_3Float(benchmark::State &state) {
+  float temp = 23.456f, humidity = 67.89f, pressure = 1013.25f;
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ESP_LOGW(TAG, "Sensor: %.2f %.1f %.2f", temp, humidity, pressure);
+    }
+    benchmark::DoNotOptimize(temp);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Logger_3Float);
+
+}  // namespace esphome::benchmarks
--- a/tests/benchmarks/core/bench_scheduler.cpp
+++ b/tests/benchmarks/core/bench_scheduler.cpp
@@ -0,0 +1,133 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/core/scheduler.h"
+#include "esphome/core/hal.h"
+
+namespace esphome::benchmarks {
+
+// Inner iteration count to amortize CodSpeed instrumentation overhead.
+// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
+// sub-microsecond benchmarks.
+static constexpr int kInnerIterations = 2000;
+
+// --- Scheduler fast path: no work to do ---
+
+static void Scheduler_Call_NoWork(benchmark::State &state) {
+  Scheduler scheduler;
+  uint32_t now = millis();
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      scheduler.call(now);
+    }
+    benchmark::DoNotOptimize(now);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Scheduler_Call_NoWork);
+
+// --- Scheduler with timers: call() when timers exist but aren't due ---
+
+static void Scheduler_Call_TimersNotDue(benchmark::State &state) {
+  Scheduler scheduler;
+  Component dummy_component;
+
+  // Add some timeouts far in the future
+  for (int i = 0; i < 10; i++) {
+    scheduler.set_timeout(&dummy_component, static_cast<uint32_t>(i), 1000000, []() {});
+  }
+  scheduler.process_to_add();
+
+  uint32_t now = millis();
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      scheduler.call(now);
+    }
+    benchmark::DoNotOptimize(now);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Scheduler_Call_TimersNotDue);
+
+// --- Scheduler with 5 intervals firing every call ---
+
+static void Scheduler_Call_5IntervalsFiring(benchmark::State &state) {
+  Scheduler scheduler;
+  Component dummy_component;
+  int fire_count = 0;
+
+  // Benchmarks the heap-based scheduler dispatch with 5 callbacks firing.
+  // Uses monotonically increasing fake time so intervals reliably fire every call.
+  // USE_BENCHMARK ifdef in component.h disables WarnIfComponentBlockingGuard
+  // (fake now > real millis() would cause underflow in finish()).
+  // interval=0 would cause an infinite loop (reschedules at same now).
+  for (int i = 0; i < 5; i++) {
+    scheduler.set_interval(&dummy_component, static_cast<uint32_t>(i), 1, [&fire_count]() { fire_count++; });
+  }
+  scheduler.process_to_add();
+
+  uint32_t now = millis() + 100;
+
+  for (auto _ : state) {
+    scheduler.call(now);
+    now++;
+    benchmark::DoNotOptimize(fire_count);
+  }
+}
+BENCHMARK(Scheduler_Call_5IntervalsFiring);
+
+// --- Scheduler: set_timeout registration ---
+
+static void Scheduler_SetTimeout(benchmark::State &state) {
+  Scheduler scheduler;
+  Component dummy_component;
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      scheduler.set_timeout(&dummy_component, static_cast<uint32_t>(i % 5), 1000, []() {});
+    }
+    scheduler.process_to_add();
+    benchmark::DoNotOptimize(scheduler);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Scheduler_SetTimeout);
+
+// --- Scheduler: set_interval registration ---
+
+static void Scheduler_SetInterval(benchmark::State &state) {
+  Scheduler scheduler;
+  Component dummy_component;
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      scheduler.set_interval(&dummy_component, static_cast<uint32_t>(i % 5), 1000, []() {});
+    }
+    scheduler.process_to_add();
+    benchmark::DoNotOptimize(scheduler);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Scheduler_SetInterval);
+
+// --- Scheduler: defer registration (set_timeout with delay=0) ---
+
+static void Scheduler_Defer(benchmark::State &state) {
+  Scheduler scheduler;
+  Component dummy_component;
+
+  // defer() is Component::defer which calls set_timeout(delay=0).
+  // Call set_timeout directly since defer() is protected.
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      scheduler.set_timeout(&dummy_component, static_cast<uint32_t>(i % 5), 0, []() {});
+    }
+    scheduler.process_to_add();
+    benchmark::DoNotOptimize(scheduler);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Scheduler_Defer);
+
+}  // namespace esphome::benchmarks