[core] Add CodSpeed C++ benchmarks for protobuf, main loop, and helpers (#14878)

This commit is contained in:
J. Nick Koston
2026-03-17 12:29:38 -10:00
committed by GitHub
parent 1adf05e2d5
commit 1670f04a87
17 changed files with 1541 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
/.esphome/
/secrets.yaml

View File

@@ -0,0 +1,93 @@
#include <benchmark/benchmark.h>
#include "esphome/components/api/api_pb2.h"
#include "esphome/components/api/api_buffer.h"
namespace esphome::api::benchmarks {
// Inner iteration count to amortize CodSpeed instrumentation overhead.
// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
// sub-microsecond benchmarks.
static constexpr int kInnerIterations = 2000;
// Helper: encode a message into a buffer and return it.
// Benchmarks encode once in setup, then decode the resulting bytes in a loop.
// This keeps decode benchmarks in sync with the actual protobuf schema —
// hand-encoded byte arrays would silently break when fields change.
template<typename T> static APIBuffer encode_message(const T &msg) {
APIBuffer buffer;
uint32_t size = msg.calculate_size();
buffer.resize(size);
ProtoWriteBuffer writer(&buffer, 0);
msg.encode(writer);
return buffer;
}
// --- HelloRequest decode (string + varint fields) ---
static void Decode_HelloRequest(benchmark::State &state) {
HelloRequest source;
source.client_info = StringRef::from_lit("aioesphomeapi");
source.api_version_major = 1;
source.api_version_minor = 10;
auto encoded = encode_message(source);
for (auto _ : state) {
HelloRequest msg;
for (int i = 0; i < kInnerIterations; i++) {
msg.decode(encoded.data(), encoded.size());
}
benchmark::DoNotOptimize(msg.api_version_major);
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(Decode_HelloRequest);
// --- SwitchCommandRequest decode (simple command) ---
static void Decode_SwitchCommandRequest(benchmark::State &state) {
SwitchCommandRequest source;
source.key = 0x12345678;
source.state = true;
auto encoded = encode_message(source);
for (auto _ : state) {
SwitchCommandRequest msg;
for (int i = 0; i < kInnerIterations; i++) {
msg.decode(encoded.data(), encoded.size());
}
benchmark::DoNotOptimize(msg.state);
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(Decode_SwitchCommandRequest);
// --- LightCommandRequest decode (complex command with many fields) ---
static void Decode_LightCommandRequest(benchmark::State &state) {
LightCommandRequest source;
source.key = 0x11223344;
source.has_state = true;
source.state = true;
source.has_brightness = true;
source.brightness = 0.8f;
source.has_rgb = true;
source.red = 1.0f;
source.green = 0.5f;
source.blue = 0.2f;
source.has_effect = true;
source.effect = StringRef::from_lit("rainbow");
auto encoded = encode_message(source);
for (auto _ : state) {
LightCommandRequest msg;
for (int i = 0; i < kInnerIterations; i++) {
msg.decode(encoded.data(), encoded.size());
}
benchmark::DoNotOptimize(msg.brightness);
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(Decode_LightCommandRequest);
} // namespace esphome::api::benchmarks

View File

@@ -0,0 +1,298 @@
#include <benchmark/benchmark.h>
#include "esphome/components/api/api_pb2.h"
#include "esphome/components/api/api_buffer.h"
namespace esphome::api::benchmarks {
// Inner iteration count to amortize CodSpeed instrumentation overhead.
// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
// sub-microsecond benchmarks.
static constexpr int kInnerIterations = 2000;
// --- SensorStateResponse (highest frequency message) ---
static void Encode_SensorStateResponse(benchmark::State &state) {
APIBuffer buffer;
SensorStateResponse msg;
msg.key = 0x12345678;
msg.state = 23.5f;
msg.missing_state = false;
uint32_t size = msg.calculate_size();
buffer.resize(size);
for (auto _ : state) {
for (int i = 0; i < kInnerIterations; i++) {
ProtoWriteBuffer writer(&buffer, 0);
msg.encode(writer);
}
benchmark::DoNotOptimize(buffer.data());
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(Encode_SensorStateResponse);
static void CalculateSize_SensorStateResponse(benchmark::State &state) {
SensorStateResponse msg;
msg.key = 0x12345678;
msg.state = 23.5f;
msg.missing_state = false;
for (auto _ : state) {
uint32_t result = 0;
for (int i = 0; i < kInnerIterations; i++) {
result += msg.calculate_size();
}
benchmark::DoNotOptimize(result);
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(CalculateSize_SensorStateResponse);
// Steady state: buffer already allocated from previous iteration
static void CalcAndEncode_SensorStateResponse(benchmark::State &state) {
APIBuffer buffer;
SensorStateResponse msg;
msg.key = 0x12345678;
msg.state = 23.5f;
msg.missing_state = false;
for (auto _ : state) {
for (int i = 0; i < kInnerIterations; i++) {
uint32_t size = msg.calculate_size();
buffer.resize(size);
ProtoWriteBuffer writer(&buffer, 0);
msg.encode(writer);
}
benchmark::DoNotOptimize(buffer.data());
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(CalcAndEncode_SensorStateResponse);
// Cold path: fresh buffer each iteration (measures heap allocation cost).
// Inner loop still needed to amortize CodSpeed instrumentation overhead.
// Each inner iteration creates a fresh buffer, so this measures
// alloc+calc+encode per item.
static void CalcAndEncode_SensorStateResponse_Fresh(benchmark::State &state) {
SensorStateResponse msg;
msg.key = 0x12345678;
msg.state = 23.5f;
msg.missing_state = false;
for (auto _ : state) {
for (int i = 0; i < kInnerIterations; i++) {
APIBuffer buffer;
uint32_t size = msg.calculate_size();
buffer.resize(size);
ProtoWriteBuffer writer(&buffer, 0);
msg.encode(writer);
benchmark::DoNotOptimize(buffer.data());
}
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(CalcAndEncode_SensorStateResponse_Fresh);
// --- BinarySensorStateResponse ---
static void Encode_BinarySensorStateResponse(benchmark::State &state) {
APIBuffer buffer;
BinarySensorStateResponse msg;
msg.key = 0xAABBCCDD;
msg.state = true;
msg.missing_state = false;
uint32_t size = msg.calculate_size();
buffer.resize(size);
for (auto _ : state) {
for (int i = 0; i < kInnerIterations; i++) {
ProtoWriteBuffer writer(&buffer, 0);
msg.encode(writer);
}
benchmark::DoNotOptimize(buffer.data());
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(Encode_BinarySensorStateResponse);
// --- HelloResponse (string fields) ---
static void Encode_HelloResponse(benchmark::State &state) {
APIBuffer buffer;
HelloResponse msg;
msg.api_version_major = 1;
msg.api_version_minor = 10;
msg.server_info = StringRef::from_lit("esphome v2026.3.0");
msg.name = StringRef::from_lit("living-room-sensor");
uint32_t size = msg.calculate_size();
buffer.resize(size);
for (auto _ : state) {
for (int i = 0; i < kInnerIterations; i++) {
ProtoWriteBuffer writer(&buffer, 0);
msg.encode(writer);
}
benchmark::DoNotOptimize(buffer.data());
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(Encode_HelloResponse);
// --- LightStateResponse (complex multi-field message) ---
static void Encode_LightStateResponse(benchmark::State &state) {
APIBuffer buffer;
LightStateResponse msg;
msg.key = 0x11223344;
msg.state = true;
msg.brightness = 0.8f;
msg.color_mode = enums::COLOR_MODE_RGB_WHITE;
msg.color_brightness = 1.0f;
msg.red = 1.0f;
msg.green = 0.5f;
msg.blue = 0.2f;
msg.white = 0.0f;
msg.color_temperature = 4000.0f;
msg.cold_white = 0.0f;
msg.warm_white = 0.0f;
msg.effect = StringRef::from_lit("rainbow");
uint32_t size = msg.calculate_size();
buffer.resize(size);
for (auto _ : state) {
for (int i = 0; i < kInnerIterations; i++) {
ProtoWriteBuffer writer(&buffer, 0);
msg.encode(writer);
}
benchmark::DoNotOptimize(buffer.data());
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(Encode_LightStateResponse);
static void CalculateSize_LightStateResponse(benchmark::State &state) {
LightStateResponse msg;
msg.key = 0x11223344;
msg.state = true;
msg.brightness = 0.8f;
msg.color_mode = enums::COLOR_MODE_RGB_WHITE;
msg.color_brightness = 1.0f;
msg.red = 1.0f;
msg.green = 0.5f;
msg.blue = 0.2f;
msg.white = 0.0f;
msg.color_temperature = 4000.0f;
msg.cold_white = 0.0f;
msg.warm_white = 0.0f;
msg.effect = StringRef::from_lit("rainbow");
for (auto _ : state) {
uint32_t result = 0;
for (int i = 0; i < kInnerIterations; i++) {
result += msg.calculate_size();
}
benchmark::DoNotOptimize(result);
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(CalculateSize_LightStateResponse);
// --- DeviceInfoResponse (nested submessages: 20 devices + 20 areas) ---
static DeviceInfoResponse make_device_info_response() {
DeviceInfoResponse msg;
msg.name = StringRef::from_lit("living-room-sensor");
msg.mac_address = StringRef::from_lit("AA:BB:CC:DD:EE:FF");
msg.esphome_version = StringRef::from_lit("2026.3.0");
msg.compilation_time = StringRef::from_lit("Mar 16 2026, 12:00:00");
msg.model = StringRef::from_lit("esp32-poe-iso");
msg.manufacturer = StringRef::from_lit("Olimex");
msg.friendly_name = StringRef::from_lit("Living Room Sensor");
#ifdef USE_DEVICES
for (uint32_t i = 0; i < ESPHOME_DEVICE_COUNT && i < 20; i++) {
msg.devices[i].device_id = i + 1;
msg.devices[i].name = StringRef::from_lit("device");
msg.devices[i].area_id = (i % 20) + 1;
}
#endif
#ifdef USE_AREAS
for (uint32_t i = 0; i < ESPHOME_AREA_COUNT && i < 20; i++) {
msg.areas[i].area_id = i + 1;
msg.areas[i].name = StringRef::from_lit("area");
}
#endif
return msg;
}
static void CalculateSize_DeviceInfoResponse(benchmark::State &state) {
auto msg = make_device_info_response();
for (auto _ : state) {
uint32_t result = 0;
for (int i = 0; i < kInnerIterations; i++) {
result += msg.calculate_size();
}
benchmark::DoNotOptimize(result);
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(CalculateSize_DeviceInfoResponse);
static void Encode_DeviceInfoResponse(benchmark::State &state) {
auto msg = make_device_info_response();
APIBuffer buffer;
uint32_t total_size = msg.calculate_size();
buffer.resize(total_size);
for (auto _ : state) {
for (int i = 0; i < kInnerIterations; i++) {
ProtoWriteBuffer writer(&buffer, 0);
msg.encode(writer);
}
benchmark::DoNotOptimize(buffer.data());
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(Encode_DeviceInfoResponse);
// Steady state: buffer already allocated from previous iteration
static void CalcAndEncode_DeviceInfoResponse(benchmark::State &state) {
auto msg = make_device_info_response();
APIBuffer buffer;
for (auto _ : state) {
for (int i = 0; i < kInnerIterations; i++) {
uint32_t size = msg.calculate_size();
buffer.resize(size);
ProtoWriteBuffer writer(&buffer, 0);
msg.encode(writer);
}
benchmark::DoNotOptimize(buffer.data());
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(CalcAndEncode_DeviceInfoResponse);
// Cold path: fresh buffer each iteration (measures heap allocation cost).
// Inner loop still needed to amortize CodSpeed instrumentation overhead.
// Each inner iteration creates a fresh buffer, so this measures
// alloc+calc+encode per item.
static void CalcAndEncode_DeviceInfoResponse_Fresh(benchmark::State &state) {
auto msg = make_device_info_response();
for (auto _ : state) {
for (int i = 0; i < kInnerIterations; i++) {
APIBuffer buffer;
uint32_t size = msg.calculate_size();
buffer.resize(size);
ProtoWriteBuffer writer(&buffer, 0);
msg.encode(writer);
benchmark::DoNotOptimize(buffer.data());
}
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(CalcAndEncode_DeviceInfoResponse_Fresh);
} // namespace esphome::api::benchmarks

View File

@@ -0,0 +1,133 @@
#include <benchmark/benchmark.h>
#include "esphome/components/api/proto.h"
#include "esphome/components/api/api_buffer.h"
namespace esphome::api::benchmarks {
// Inner iteration count to amortize CodSpeed instrumentation overhead.
// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
// sub-microsecond benchmarks.
static constexpr int kInnerIterations = 2000;
// --- ProtoVarInt::parse() benchmarks ---
static void ProtoVarInt_Parse_SingleByte(benchmark::State &state) {
uint8_t buf[] = {0x42}; // value = 66
for (auto _ : state) {
ProtoVarIntResult result{};
for (int i = 0; i < kInnerIterations; i++) {
result = ProtoVarInt::parse(buf, sizeof(buf));
}
benchmark::DoNotOptimize(result);
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(ProtoVarInt_Parse_SingleByte);
static void ProtoVarInt_Parse_TwoByte(benchmark::State &state) {
uint8_t buf[] = {0x80, 0x01}; // value = 128
for (auto _ : state) {
ProtoVarIntResult result{};
for (int i = 0; i < kInnerIterations; i++) {
result = ProtoVarInt::parse(buf, sizeof(buf));
}
benchmark::DoNotOptimize(result);
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(ProtoVarInt_Parse_TwoByte);
static void ProtoVarInt_Parse_FiveByte(benchmark::State &state) {
uint8_t buf[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x0F};
for (auto _ : state) {
ProtoVarIntResult result{};
for (int i = 0; i < kInnerIterations; i++) {
result = ProtoVarInt::parse(buf, sizeof(buf));
}
benchmark::DoNotOptimize(result);
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(ProtoVarInt_Parse_FiveByte);
// --- Varint encoding benchmarks ---
static void Encode_Varint_Small(benchmark::State &state) {
APIBuffer buffer;
buffer.resize(16);
for (auto _ : state) {
for (int i = 0; i < kInnerIterations; i++) {
ProtoWriteBuffer writer(&buffer, 0);
writer.encode_varint_raw(42);
}
benchmark::DoNotOptimize(buffer.data());
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(Encode_Varint_Small);
static void Encode_Varint_Large(benchmark::State &state) {
APIBuffer buffer;
buffer.resize(16);
for (auto _ : state) {
for (int i = 0; i < kInnerIterations; i++) {
ProtoWriteBuffer writer(&buffer, 0);
writer.encode_varint_raw(300);
}
benchmark::DoNotOptimize(buffer.data());
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(Encode_Varint_Large);
static void Encode_Varint_MaxUint32(benchmark::State &state) {
APIBuffer buffer;
buffer.resize(16);
for (auto _ : state) {
for (int i = 0; i < kInnerIterations; i++) {
ProtoWriteBuffer writer(&buffer, 0);
writer.encode_varint_raw(0xFFFFFFFF);
}
benchmark::DoNotOptimize(buffer.data());
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(Encode_Varint_MaxUint32);
// --- ProtoSize::varint() benchmarks ---
static void ProtoSize_Varint_Small(benchmark::State &state) {
// Use varying input to prevent constant folding.
// Values 0-127 all take 1 byte but the compiler can't prove that.
for (auto _ : state) {
uint32_t result = 0;
for (int i = 0; i < kInnerIterations; i++) {
result += ProtoSize::varint(static_cast<uint32_t>(i) & 0x7F);
}
benchmark::DoNotOptimize(result);
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(ProtoSize_Varint_Small);
static void ProtoSize_Varint_Large(benchmark::State &state) {
// Use varying input to prevent constant folding.
for (auto _ : state) {
uint32_t result = 0;
for (int i = 0; i < kInnerIterations; i++) {
result += ProtoSize::varint(0xFFFF0000 | static_cast<uint32_t>(i));
}
benchmark::DoNotOptimize(result);
}
state.SetItemsProcessed(state.iterations() * kInnerIterations);
}
BENCHMARK(ProtoSize_Varint_Large);
} // namespace esphome::api::benchmarks

View File

@@ -0,0 +1,114 @@
# Components needed for API protobuf benchmarks.
# Merged into the base config before validation so all
# dependencies get proper defaults.
#
# esphome: sub-keys are merged into the base config.
esphome:
areas:
- id: area_1
name: "Area 1"
- id: area_2
name: "Area 2"
- id: area_3
name: "Area 3"
- id: area_4
name: "Area 4"
- id: area_5
name: "Area 5"
- id: area_6
name: "Area 6"
- id: area_7
name: "Area 7"
- id: area_8
name: "Area 8"
- id: area_9
name: "Area 9"
- id: area_10
name: "Area 10"
- id: area_11
name: "Area 11"
- id: area_12
name: "Area 12"
- id: area_13
name: "Area 13"
- id: area_14
name: "Area 14"
- id: area_15
name: "Area 15"
- id: area_16
name: "Area 16"
- id: area_17
name: "Area 17"
- id: area_18
name: "Area 18"
- id: area_19
name: "Area 19"
- id: area_20
name: "Area 20"
devices:
- id: device_1
name: "Device 1"
area_id: area_1
- id: device_2
name: "Device 2"
area_id: area_2
- id: device_3
name: "Device 3"
area_id: area_3
- id: device_4
name: "Device 4"
area_id: area_4
- id: device_5
name: "Device 5"
area_id: area_5
- id: device_6
name: "Device 6"
area_id: area_6
- id: device_7
name: "Device 7"
area_id: area_7
- id: device_8
name: "Device 8"
area_id: area_8
- id: device_9
name: "Device 9"
area_id: area_9
- id: device_10
name: "Device 10"
area_id: area_10
- id: device_11
name: "Device 11"
area_id: area_11
- id: device_12
name: "Device 12"
area_id: area_12
- id: device_13
name: "Device 13"
area_id: area_13
- id: device_14
name: "Device 14"
area_id: area_14
- id: device_15
name: "Device 15"
area_id: area_15
- id: device_16
name: "Device 16"
area_id: area_16
- id: device_17
name: "Device 17"
area_id: area_17
- id: device_18
name: "Device 18"
area_id: area_18
- id: device_19
name: "Device 19"
area_id: area_19
- id: device_20
name: "Device 20"
area_id: area_20
api:
sensor:
binary_sensor:
light:
switch:

View File

@@ -0,0 +1,42 @@
#include <benchmark/benchmark.h>
#include "esphome/components/logger/logger.h"
/*
This special main.cpp provides the entry point for Google Benchmark.
It replaces the default ESPHome main with a benchmark runner.
*/
// Auto generated code by esphome
// ========== AUTO GENERATED INCLUDE BLOCK BEGIN ===========
// ========== AUTO GENERATED INCLUDE BLOCK END ===========
void original_setup() {
// Code-generated App initialization (pre_setup, area/device registration, etc.)
// ========== AUTO GENERATED CODE BEGIN ===========
// =========== AUTO GENERATED CODE END ============
}
void setup() {
// Run auto-generated initialization (App.pre_setup, area/device registration,
// looping_components_.init, etc.) so benchmarks that use App work correctly.
original_setup();
// Log functions call global_logger->log_vprintf_() without a null check,
// so we must set up a Logger before any test that triggers logging.
static esphome::logger::Logger test_logger(0);
test_logger.set_log_level(ESPHOME_LOG_LEVEL);
test_logger.pre_setup();
int argc = 1;
char arg0[] = "benchmark";
char *argv[] = {arg0, nullptr};
::benchmark::Initialize(&argc, argv);
::benchmark::RunSpecifiedBenchmarks();
::benchmark::Shutdown();
exit(0);
}
void loop() {}