Compare commits

..

3 Commits

Author SHA1 Message Date
J. Nick Koston cb2e8320f0 [api] Pass pos by value through encode_varint_raw_loop 2026-04-09 22:13:14 -10:00
J. Nick Koston 49ef5f9ad6 [api] Force inline encode_varint_raw_loop so pos stays in a register 2026-04-09 22:07:15 -10:00
J. Nick Koston 707f3f4749 [api] Unroll varint encode loop for compile-time bounded types 2026-04-09 21:59:35 -10:00
27 changed files with 77 additions and 324 deletions
+2 -2
View File
@@ -47,7 +47,7 @@ runs:
- name: Build and push to ghcr by digest
id: build-ghcr
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
env:
DOCKER_BUILD_SUMMARY: false
DOCKER_BUILD_RECORD_UPLOAD: false
@@ -73,7 +73,7 @@ runs:
- name: Build and push to dockerhub by digest
id: build-dockerhub
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
env:
DOCKER_BUILD_SUMMARY: false
DOCKER_BUILD_RECORD_UPLOAD: false
+1 -1
View File
@@ -33,7 +33,7 @@ jobs:
private-key: ${{ secrets.ESPHOME_GITHUB_APP_PRIVATE_KEY }}
- name: Auto Label PR
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
github-token: ${{ steps.generate-token.outputs.token }}
script: |
+3 -3
View File
@@ -47,7 +47,7 @@ jobs:
fi
- if: failure()
name: Review PR
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
script: |
await github.rest.pulls.createReview({
@@ -62,7 +62,7 @@ jobs:
run: git diff
- if: failure()
name: Archive artifacts
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: generated-proto-files
path: |
@@ -70,7 +70,7 @@ jobs:
esphome/components/api/api_pb2_service.*
- if: success()
name: Dismiss review
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
script: |
let reviews = await github.rest.pulls.listReviews({
+2 -2
View File
@@ -42,7 +42,7 @@ jobs:
- if: failure() && github.event.pull_request.head.repo.full_name == github.repository
name: Request changes
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
script: |
await github.rest.pulls.createReview({
@@ -55,7 +55,7 @@ jobs:
- if: success() && github.event.pull_request.head.repo.full_name == github.repository
name: Dismiss review
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
script: |
let reviews = await github.rest.pulls.listReviews({
+2 -2
View File
@@ -904,7 +904,7 @@ jobs:
fi
- name: Upload memory analysis JSON
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: memory-analysis-target
path: memory-analysis-target.json
@@ -969,7 +969,7 @@ jobs:
--platform "$platform"
- name: Upload memory analysis JSON
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: memory-analysis-pr
path: memory-analysis-pr.json
@@ -34,7 +34,7 @@ jobs:
CODEOWNERS
- name: Check codeowner approval and update label
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
with:
@@ -33,7 +33,7 @@ jobs:
ref: ${{ github.event.pull_request.base.sha }}
- name: Request reviews from component codeowners
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
script: |
const { loadCodeowners, getEffectiveOwners } = require('./.github/scripts/codeowners.js');
+1 -1
View File
@@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Add external component comment
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
+1 -1
View File
@@ -19,7 +19,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Notify codeowners for component issues
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
script: |
const owner = context.repo.owner;
+1 -1
View File
@@ -18,7 +18,7 @@ jobs:
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
- uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
script: |
const {
+4 -4
View File
@@ -138,7 +138,7 @@ jobs:
# version: ${{ needs.init.outputs.tag }}
- name: Upload digests
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: digests-${{ matrix.platform.arch }}
path: /tmp/digests
@@ -229,7 +229,7 @@ jobs:
repositories: home-assistant-addon
- name: Trigger Workflow
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
github-token: ${{ steps.generate-token.outputs.token }}
script: |
@@ -264,7 +264,7 @@ jobs:
repositories: esphome-schema
- name: Trigger Workflow
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
github-token: ${{ steps.generate-token.outputs.token }}
script: |
@@ -295,7 +295,7 @@ jobs:
repositories: version-notifier
- name: Trigger Workflow
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
github-token: ${{ steps.generate-token.outputs.token }}
script: |
+1 -1
View File
@@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Check for blocking labels
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
script: |
const blockingLabels = ['needs-docs', 'merge-after-release', 'chained-pr'];
+1 -1
View File
@@ -41,7 +41,7 @@ jobs:
python script/run-in-env.py pre-commit run --all-files
- name: Commit changes
uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1
uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0
with:
commit-message: "Synchronise Device Classes from Home Assistant"
committer: esphomebot <esphome@openhomefoundation.org>
+26 -8
View File
@@ -298,15 +298,33 @@ constexpr uint32_t VARINT_MAX_2_BYTE = 1 << 14; // 16384
class ProtoEncode {
public:
/// Write a multi-byte varint directly through a pos pointer.
/// Unrolled based on the compile-time max varint length for T
/// (5 bytes for uint32_t, 10 bytes for uint64_t). The explicit unroll gives
/// the compiler straight-line code with early exits instead of a
/// data-dependent back-edge branch, which measurably speeds up BLE raw
/// advertisement MAC encoding (always 7-byte varints) among other hot paths.
///
/// Takes/returns pos by value so this function can remain out-of-line without
/// forcing the caller to spill pos to memory on every byte write. Callers
/// (which are ALWAYS_INLINE) should assign the returned pos back to their
/// local. Code size win vs. inlining the whole unroll at every call site.
template<typename T>
static inline void encode_varint_raw_loop(uint8_t *__restrict__ &pos PROTO_ENCODE_DEBUG_PARAM, T value) {
do {
static uint8_t *encode_varint_raw_loop(uint8_t *__restrict__ pos PROTO_ENCODE_DEBUG_PARAM, T value) {
constexpr int MAX_VARINT_BYTES = (sizeof(T) * 8 + 6) / 7; // 5 for u32, 10 for u64
#pragma GCC unroll 10
for (int i = 0; i < MAX_VARINT_BYTES - 1; i++) {
PROTO_ENCODE_CHECK_BOUNDS(pos, 1);
*pos++ = static_cast<uint8_t>(value | 0x80);
*pos++ = static_cast<uint8_t>(value) | 0x80;
value >>= 7;
} while (value > 0x7F);
if (value <= 0x7F) {
PROTO_ENCODE_CHECK_BOUNDS(pos, 1);
*pos++ = static_cast<uint8_t>(value);
return pos;
}
}
PROTO_ENCODE_CHECK_BOUNDS(pos, 1);
*pos++ = static_cast<uint8_t>(value);
return pos;
}
static inline void ESPHOME_ALWAYS_INLINE encode_varint_raw(uint8_t *__restrict__ &pos PROTO_ENCODE_DEBUG_PARAM,
uint32_t value) {
@@ -315,7 +333,7 @@ class ProtoEncode {
*pos++ = static_cast<uint8_t>(value);
return;
}
encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, value);
pos = encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, value);
}
/// Encode a varint that is expected to be 1-2 bytes (e.g. zigzag RSSI, small lengths).
static inline void ESPHOME_ALWAYS_INLINE encode_varint_raw_short(uint8_t *__restrict__ &pos PROTO_ENCODE_DEBUG_PARAM,
@@ -331,7 +349,7 @@ class ProtoEncode {
*pos++ = static_cast<uint8_t>(value >> 7);
return;
}
encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, value);
pos = encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, value);
}
static inline void ESPHOME_ALWAYS_INLINE encode_varint_raw_64(uint8_t *__restrict__ &pos PROTO_ENCODE_DEBUG_PARAM,
uint64_t value) {
@@ -340,7 +358,7 @@ class ProtoEncode {
*pos++ = static_cast<uint8_t>(value);
return;
}
encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, value);
pos = encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, value);
}
static inline void ESPHOME_ALWAYS_INLINE encode_field_raw(uint8_t *__restrict__ &pos PROTO_ENCODE_DEBUG_PARAM,
uint32_t field_id, uint32_t type) {
@@ -402,7 +420,7 @@ class ProtoEncode {
PROTO_ENCODE_CHECK_BOUNDS(pos, 1 + len);
*pos++ = static_cast<uint8_t>(len);
} else {
encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, len);
pos = encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, len);
PROTO_ENCODE_CHECK_BOUNDS(pos, len);
}
std::memcpy(pos, string, len);
+9 -16
View File
@@ -102,8 +102,6 @@ CC1101Component::CC1101Component() {
memset(this->pa_table_, 0, sizeof(this->pa_table_));
}
void IRAM_ATTR CC1101Component::gpio_intr(CC1101Component *arg) { arg->enable_loop_soon_any_context(); }
void CC1101Component::setup() {
this->spi_setup();
this->cs_->digital_write(true);
@@ -150,12 +148,11 @@ void CC1101Component::setup() {
// Defer pin mode setup until after all components have completed setup()
// This handles the case where remote_transmitter runs after CC1101 and changes pin mode
if (this->gdo0_pin_ != nullptr) {
this->defer([this]() {
this->gdo0_pin_->pin_mode(gpio::FLAG_INPUT);
if (this->state_.PKT_FORMAT == static_cast<uint8_t>(PacketFormat::PACKET_FORMAT_FIFO)) {
this->gdo0_pin_->attach_interrupt(&CC1101Component::gpio_intr, this, gpio::INTERRUPT_RISING_EDGE);
}
});
this->defer([this]() { this->gdo0_pin_->pin_mode(gpio::FLAG_INPUT); });
}
if (this->state_.PKT_FORMAT != static_cast<uint8_t>(PacketFormat::PACKET_FORMAT_FIFO)) {
this->disable_loop();
}
}
@@ -167,7 +164,6 @@ void CC1101Component::call_listeners_(const std::vector<uint8_t> &packet, float
}
void CC1101Component::loop() {
this->disable_loop();
if (this->state_.PKT_FORMAT != static_cast<uint8_t>(PacketFormat::PACKET_FORMAT_FIFO) || this->gdo0_pin_ == nullptr ||
!this->gdo0_pin_->digital_read()) {
return;
@@ -248,7 +244,6 @@ void CC1101Component::begin_tx() {
this->write_(Register::PKTCTRL0, 0x32);
ESP_LOGV(TAG, "Beginning TX sequence");
if (this->gdo0_pin_ != nullptr) {
this->gdo0_pin_->detach_interrupt();
this->gdo0_pin_->pin_mode(gpio::FLAG_OUTPUT);
}
// Transition through IDLE to bypass CCA (Clear Channel Assessment) which can
@@ -678,12 +673,10 @@ void CC1101Component::set_packet_mode(bool value) {
this->state_.GDO0_CFG = 0x0D;
}
if (this->initialized_) {
if (this->gdo0_pin_ != nullptr) {
if (value) {
this->gdo0_pin_->attach_interrupt(&CC1101Component::gpio_intr, this, gpio::INTERRUPT_RISING_EDGE);
} else {
this->gdo0_pin_->detach_interrupt();
}
if (value) {
this->enable_loop();
} else {
this->disable_loop();
}
this->write_(Register::PKTCTRL0);
this->write_(Register::PKTCTRL1);
-1
View File
@@ -93,7 +93,6 @@ class CC1101Component : public Component,
// GDO pin for packet reception
InternalGPIOPin *gdo0_pin_{nullptr};
static void IRAM_ATTR gpio_intr(CC1101Component *arg);
// Packet handling
void call_listeners_(const std::vector<uint8_t> &packet, float freq_offset, float rssi, uint8_t lqi);
@@ -451,8 +451,6 @@ async def to_code(config):
ota.request_ota_state_listeners()
esp32.add_idf_component(name="espressif/esp-tflite-micro", ref="1.3.3~1")
# Pin esp-nn for stable future builds (esp-tflite-micro depends on esp-nn)
esp32.add_idf_component(name="espressif/esp-nn", ref="1.2.1")
cg.add_build_flag("-DTF_LITE_STATIC_MEMORY")
cg.add_build_flag("-DTF_LITE_DISABLE_X86_NEON")
@@ -29,6 +29,14 @@ void VADModel::log_model_config() {
bool StreamingModel::load_model_() {
RAMAllocator<uint8_t> arena_allocator;
if (this->tensor_arena_ == nullptr) {
this->tensor_arena_ = arena_allocator.allocate(this->tensor_arena_size_);
if (this->tensor_arena_ == nullptr) {
ESP_LOGE(TAG, "Could not allocate the streaming model's tensor arena.");
return false;
}
}
if (this->var_arena_ == nullptr) {
this->var_arena_ = arena_allocator.allocate(STREAMING_MODEL_VARIABLE_ARENA_SIZE);
if (this->var_arena_ == nullptr) {
@@ -45,26 +53,6 @@ bool StreamingModel::load_model_() {
return false;
}
// Probe for the actual required tensor arena size if not yet determined
if (!this->tensor_arena_size_probed_) {
size_t probed_size = this->probe_arena_size_();
if (probed_size > 0) {
ESP_LOGD(TAG, "Probed tensor arena size: %zu bytes", probed_size);
this->tensor_arena_size_ = probed_size;
} else {
ESP_LOGW(TAG, "Arena size probe failed, using manifest size: %zu bytes", this->tensor_arena_size_);
}
this->tensor_arena_size_probed_ = true;
}
if (this->tensor_arena_ == nullptr) {
this->tensor_arena_ = arena_allocator.allocate(this->tensor_arena_size_);
if (this->tensor_arena_ == nullptr) {
ESP_LOGE(TAG, "Could not allocate the streaming model's tensor arena.");
return false;
}
}
if (this->interpreter_ == nullptr) {
this->interpreter_ =
make_unique<tflite::MicroInterpreter>(tflite::GetModel(this->model_start_), this->streaming_op_resolver_,
@@ -106,70 +94,6 @@ bool StreamingModel::load_model_() {
return true;
}
size_t StreamingModel::probe_arena_size_() {
RAMAllocator<uint8_t> arena_allocator;
// Try with the manifest size first, then escalates to 1.5, then 2x if it fails. Different platforms and different
// versions of the esp-nn library require different amounts of memory, so the manifest size may not always be correct,
// and probing allows us to find the actual required size for the current build and platform. Aligns test sizes to 16
// bytes.
size_t attempt_sizes[] = {(this->tensor_arena_size_ + 15) & ~15, (this->tensor_arena_size_ * 3 / 2 + 15) & ~15,
(this->tensor_arena_size_ * 2 + 15) & ~15};
for (size_t attempt_size : attempt_sizes) {
uint8_t *probe_arena = arena_allocator.allocate(attempt_size);
if (probe_arena == nullptr) {
continue;
}
// Verify the model works at all with this arena size
auto probe_interpreter = make_unique<tflite::MicroInterpreter>(
tflite::GetModel(this->model_start_), this->streaming_op_resolver_, probe_arena, attempt_size, this->mrv_);
if (probe_interpreter->AllocateTensors() != kTfLiteOk) {
probe_interpreter.reset();
arena_allocator.deallocate(probe_arena, attempt_size);
this->ma_ = tflite::MicroAllocator::Create(this->var_arena_, STREAMING_MODEL_VARIABLE_ARENA_SIZE);
this->mrv_ = tflite::MicroResourceVariables::Create(this->ma_, 20);
continue;
}
// Try to shrink the arena. Start with arena_used_bytes() + 16 (rounded to 16-byte alignment).
// If that works, use it. Otherwise, try midpoints between that and the full size until one succeeds.
size_t lower = (probe_interpreter->arena_used_bytes() + 16 + 15) & ~15;
probe_interpreter.reset();
this->ma_ = tflite::MicroAllocator::Create(this->var_arena_, STREAMING_MODEL_VARIABLE_ARENA_SIZE);
this->mrv_ = tflite::MicroResourceVariables::Create(this->ma_, 20);
size_t upper = attempt_size;
while (lower < upper) {
auto test_interpreter = make_unique<tflite::MicroInterpreter>(
tflite::GetModel(this->model_start_), this->streaming_op_resolver_, probe_arena, lower, this->mrv_);
bool ok = test_interpreter->AllocateTensors() == kTfLiteOk;
test_interpreter.reset();
this->ma_ = tflite::MicroAllocator::Create(this->var_arena_, STREAMING_MODEL_VARIABLE_ARENA_SIZE);
this->mrv_ = tflite::MicroResourceVariables::Create(this->ma_, 20);
if (ok) {
// Found a working size smaller than the full arena
upper = lower + 16; // Pad by 16 bytes to be safe for future allocations
break;
}
// Try the midpoint between current attempt and full size
lower = ((lower + upper) / 2 + 15) & ~15;
}
arena_allocator.deallocate(probe_arena, attempt_size);
return upper;
}
return 0;
}
void StreamingModel::unload_model() {
this->interpreter_.reset();
@@ -63,10 +63,6 @@ class StreamingModel {
/// @brief Allocates tensor and variable arenas and sets up the model interpreter
/// @return True if successful, false otherwise
bool load_model_();
/// @brief Probes the actual required tensor arena size by trial allocation.
/// Tries the manifest size first, then 2x if that fails.
/// @return The required arena size rounded up to 16-byte alignment, or 0 on failure.
size_t probe_arena_size_();
/// @brief Returns true if successfully registered the streaming model's TensorFlow operations
bool register_streaming_ops_(tflite::MicroMutableOpResolver<20> &op_resolver);
@@ -74,7 +70,6 @@ class StreamingModel {
bool loaded_{false};
bool enabled_{true};
bool tensor_arena_size_probed_{false};
bool unprocessed_probability_status_{false};
uint8_t current_stride_step_{0};
int16_t ignore_windows_{-MIN_SLICES_BEFORE_DETECTION};
+6 -25
View File
@@ -9,7 +9,7 @@
#include <WiFi.h>
#include <pico/cyw43_arch.h> // For cyw43_arch_lwip_begin/end (LwIPLock)
#elif defined(USE_ETHERNET)
#include <lwip_wrap.h> // For LWIPMutex — LwIPLock mirrors its semantics (see below)
#include <LwipEthernet.h> // For ethernet_arch_lwip_begin/end (LwIPLock)
#include "esphome/components/ethernet/ethernet_component.h"
#endif
#include <hardware/structs/rosc.h>
@@ -43,18 +43,9 @@ IRAM_ATTR InterruptLock::~InterruptLock() { restore_interrupts(state_); }
// main loop, corrupting the shared rx_buf_ pbuf chain (use-after-free, pbuf_cat
// assertion failures). See esphome#10681.
//
// WiFi uses cyw43_arch_lwip_begin/end.
//
// For wired Ethernet, taking only the async_context lock is NOT enough. The
// W5500 GPIO IRQ path (LwipIntfDev::_irq) checks arduino-pico's `__inLWIP`
// counter to decide whether to defer packet processing. If we hold the
// async_context lock without bumping `__inLWIP`, an interrupt-driven packet
// arrival re-enters lwIP from IRQ context and corrupts pbufs (the `pbuf_cat`
// assertion crash on wiznet-w5500-evb-pico). We mirror arduino-pico's
// LWIPMutex (cores/rp2040/lwip_wrap.h) exactly: bump `__inLWIP`, take the
// lock, and on release re-unmask any GPIO IRQs that were deferred while we
// held it. We can't `using LwIPLock = LWIPMutex;` in helpers.h because
// pulling lwip_wrap.h there poisons many TUs with lwIP types.
// WiFi uses cyw43_arch_lwip_begin/end; Ethernet uses ethernet_arch_lwip_begin/end.
// Both acquire the async_context recursive mutex to prevent IRQ callbacks from
// firing during critical sections.
//
// When neither WiFi nor Ethernet is configured, this is a no-op since
// there's no network stack and no lwip callbacks to race with.
@@ -62,18 +53,8 @@ IRAM_ATTR InterruptLock::~InterruptLock() { restore_interrupts(state_); }
LwIPLock::LwIPLock() { cyw43_arch_lwip_begin(); }
LwIPLock::~LwIPLock() { cyw43_arch_lwip_end(); }
#elif defined(USE_ETHERNET)
LwIPLock::LwIPLock() {
__inLWIP++;
ethernet_arch_lwip_begin();
}
LwIPLock::~LwIPLock() {
ethernet_arch_lwip_end();
__inLWIP--;
if (__needsIRQEN && !__inLWIP) {
__needsIRQEN = false;
ethernet_arch_lwip_gpio_unmask();
}
}
LwIPLock::LwIPLock() { ethernet_arch_lwip_begin(); }
LwIPLock::~LwIPLock() { ethernet_arch_lwip_end(); }
#else
LwIPLock::LwIPLock() {}
LwIPLock::~LwIPLock() {}
-9
View File
@@ -104,17 +104,11 @@ void SX126x::write_register_(uint16_t reg, uint8_t *data, uint8_t size) {
delayMicroseconds(SWITCHING_DELAY_US);
}
void IRAM_ATTR SX126x::gpio_intr(SX126x *arg) { arg->enable_loop_soon_any_context(); }
void SX126x::setup() {
// setup pins
this->busy_pin_->setup();
this->rst_pin_->setup();
this->dio1_pin_->setup();
if (this->dio1_pin_->is_internal()) {
static_cast<InternalGPIOPin *>(this->dio1_pin_)
->attach_interrupt(&SX126x::gpio_intr, this, gpio::INTERRUPT_RISING_EDGE);
}
// start spi
this->spi_setup();
@@ -354,9 +348,6 @@ void SX126x::call_listeners_(const std::vector<uint8_t> &packet, float rssi, flo
}
void SX126x::loop() {
if (this->dio1_pin_->is_internal()) {
this->disable_loop();
}
if (!this->dio1_pin_->digital_read()) {
return;
}
-2
View File
@@ -3,7 +3,6 @@
#include "esphome/components/spi/spi.h"
#include "esphome/core/automation.h"
#include "esphome/core/component.h"
#include "esphome/core/hal.h"
#include "sx126x_reg.h"
#include <utility>
#include <vector>
@@ -101,7 +100,6 @@ class SX126x : public Component,
Trigger<std::vector<uint8_t>, float, float> *get_packet_trigger() { return &this->packet_trigger_; }
protected:
static void IRAM_ATTR gpio_intr(SX126x *arg);
void configure_fsk_ook_();
void configure_lora_();
void set_packet_params_(uint8_t payload_length);
+5 -4
View File
@@ -53,8 +53,6 @@ void SX127x::write_fifo_(const std::vector<uint8_t> &packet) {
this->disable();
}
void IRAM_ATTR SX127x::gpio_intr(SX127x *arg) { arg->enable_loop_soon_any_context(); }
void SX127x::setup() {
// setup reset
this->rst_pin_->setup();
@@ -62,7 +60,6 @@ void SX127x::setup() {
// setup dio0
if (this->dio0_pin_) {
this->dio0_pin_->setup();
this->dio0_pin_->attach_interrupt(&SX127x::gpio_intr, this, gpio::INTERRUPT_RISING_EDGE);
}
// start spi
@@ -316,7 +313,6 @@ void SX127x::call_listeners_(const std::vector<uint8_t> &packet, float rssi, flo
}
void SX127x::loop() {
this->disable_loop();
if (this->dio0_pin_ == nullptr || !this->dio0_pin_->digital_read()) {
return;
}
@@ -390,6 +386,11 @@ void SX127x::set_mode_(uint8_t modulation, uint8_t mode) {
return;
}
}
if (mode == MODE_RX && (modulation == MOD_LORA || this->packet_mode_)) {
this->enable_loop();
} else {
this->disable_loop();
}
}
void SX127x::set_mode_rx() {
-2
View File
@@ -4,7 +4,6 @@
#include "esphome/components/spi/spi.h"
#include "esphome/core/automation.h"
#include "esphome/core/component.h"
#include "esphome/core/hal.h"
#include <vector>
namespace esphome {
@@ -87,7 +86,6 @@ class SX127x : public Component,
Trigger<std::vector<uint8_t>, float, float> *get_packet_trigger() { return &this->packet_trigger_; }
protected:
static void IRAM_ATTR gpio_intr(SX127x *arg);
void configure_fsk_ook_();
void configure_lora_();
void set_mode_(uint8_t modulation, uint8_t mode);
-93
View File
@@ -450,99 +450,6 @@ void Application::enable_pending_loops_() {
}
#ifdef USE_LWIP_FAST_SELECT
std::atomic<uint32_t> Application::fast_select_scan_total_{0};
std::atomic<uint32_t> Application::fast_select_scan_found_data_{0};
std::atomic<uint32_t> Application::fast_select_scan_load_bearing_{0};
std::atomic<uint32_t> Application::fast_select_scan_load_bearing_race_{0};
std::atomic<uint32_t> Application::fast_select_scan_load_bearing_micro_{0};
std::atomic<uint32_t> Application::fast_select_scan_load_bearing_stall_{0};
void Application::log_fast_select_scan_stats_() {
uint32_t total = fast_select_scan_total_.load(std::memory_order_relaxed);
uint32_t found = fast_select_scan_found_data_.load(std::memory_order_relaxed);
uint32_t load_bearing = fast_select_scan_load_bearing_.load(std::memory_order_relaxed);
uint32_t lb_race = fast_select_scan_load_bearing_race_.load(std::memory_order_relaxed);
uint32_t lb_micro = fast_select_scan_load_bearing_micro_.load(std::memory_order_relaxed);
uint32_t lb_stall = fast_select_scan_load_bearing_stall_.load(std::memory_order_relaxed);
ESP_LOGD(TAG,
"fast_select scan: total=%" PRIu32 " found_data=%" PRIu32 " load_bearing=%" PRIu32 " (race<10us=%" PRIu32
" micro<100us=%" PRIu32 " stall>100us=%" PRIu32 ")",
total, found, load_bearing, lb_race, lb_micro, lb_stall);
}
void Application::note_fast_select_load_bearing_(struct lwip_sock *sock, uint32_t delay_ms) {
uint32_t load_bearing = fast_select_scan_load_bearing_.fetch_add(1, std::memory_order_relaxed) + 1;
// Spin-poll the task notification value for a short bounded window to measure how long
// the counterfactual ulTaskNotifyTake would actually have blocked. This distinguishes
// three cases:
// race (<10µs) — notification arrived within ~10µs of scan start: callback-ordering
// race between the lwip event_callback writing rcvevent and calling
// xTaskNotifyGive a few instructions later. Scan is noise.
// micro (<100µs) — notification arrived within 100µs: still noise at loop_interval scale.
// stall (≥100µs) — notification did not arrive within our polling window. This is the
// only case where the scan could be rescuing a real latency spike.
// Cap the spin at 100µs so that if we're wrong and this IS a real stall, we only add
// 100µs of extra work to that one unlucky loop iteration.
uint32_t t_start = micros();
uint32_t gap_us = UINT32_MAX;
while (true) {
if (ulTaskNotifyValueClear(nullptr, 0) != 0) {
gap_us = micros() - t_start;
break;
}
uint32_t elapsed = micros() - t_start;
if (elapsed >= 100) {
break;
}
}
const char *bucket;
if (gap_us == UINT32_MAX) {
fast_select_scan_load_bearing_stall_.fetch_add(1, std::memory_order_relaxed);
bucket = "STALL";
} else if (gap_us < 10) {
fast_select_scan_load_bearing_race_.fetch_add(1, std::memory_order_relaxed);
bucket = "race";
} else {
fast_select_scan_load_bearing_micro_.fetch_add(1, std::memory_order_relaxed);
bucket = "micro";
}
// Find the socket's index in monitored_sockets_ for easier correlation with registration order.
int index = -1;
for (size_t i = 0; i < this->monitored_sockets_.size(); i++) {
if (this->monitored_sockets_[i] == sock) {
index = static_cast<int>(i);
break;
}
}
// Read the rcvevent value directly. This is the same offset-based read used by
// esphome_lwip_socket_has_data(); value > 0 means unread data is queued.
int16_t rcvevent =
*reinterpret_cast<volatile int16_t *>(reinterpret_cast<char *>(sock) + ESPHOME_LWIP_SOCK_RCVEVENT_OFFSET);
// Count how many other sockets also had data at this scan (could reveal whether it's always
// the same socket or a burst across multiple).
size_t sockets_with_data = 0;
for (struct lwip_sock *s : this->monitored_sockets_) {
if (esphome_lwip_socket_has_data(s))
sockets_with_data++;
}
if (gap_us == UINT32_MAX) {
ESP_LOGW(TAG,
"fast_select LOAD-BEARING #%" PRIu32 " [%s]: sock=%p idx=%d/%u rcvevent=%d delay_ms=%" PRIu32
" sockets_with_data=%u gap_us=>100",
load_bearing, bucket, sock, index, static_cast<unsigned>(this->monitored_sockets_.size()), rcvevent,
delay_ms, static_cast<unsigned>(sockets_with_data));
} else {
ESP_LOGW(TAG,
"fast_select LOAD-BEARING #%" PRIu32 " [%s]: sock=%p idx=%d/%u rcvevent=%d delay_ms=%" PRIu32
" sockets_with_data=%u gap_us=%" PRIu32,
load_bearing, bucket, sock, index, static_cast<unsigned>(this->monitored_sockets_.size()), rcvevent,
delay_ms, static_cast<unsigned>(sockets_with_data), gap_us);
}
}
bool Application::register_socket(struct lwip_sock *sock) {
// It modifies monitored_sockets_ without locking — must only be called from the main loop.
if (sock == nullptr)
-50
View File
@@ -1,7 +1,6 @@
#pragma once
#include <algorithm>
#include <atomic>
#include <ctime>
#include <limits>
#include <span>
@@ -656,25 +655,6 @@ class Application {
FixedVector<Component *> looping_components_{};
#ifdef USE_LWIP_FAST_SELECT
std::vector<struct lwip_sock *> monitored_sockets_; // Cached lwip_sock pointers for direct rcvevent read
// Stats to verify whether the pre-sleep socket scan in yield_with_select_() is ever load-bearing.
// If fast_select_scan_load_bearing_ stays 0 under real workloads, the scan can be removed.
// These are static because yield_with_select_() is inlined at every call site.
static std::atomic<uint32_t> fast_select_scan_total_;
static std::atomic<uint32_t> fast_select_scan_found_data_;
// Umbrella counter: pre-scan notify peek was 0 and scan found data.
// Broken down into three buckets based on the post-scan spin-poll result:
// _race_ — notify arrived in < 10µs (callback-ordering race, scan is noise)
// _micro_ — notify arrived in 10..100µs (still noise at loop_interval scale)
// _stall_ — notify did not arrive within 100µs (the only case that could be a real stall)
// If _stall_ stays 0, the scan is provably irrelevant under this workload.
static std::atomic<uint32_t> fast_select_scan_load_bearing_;
static std::atomic<uint32_t> fast_select_scan_load_bearing_race_;
static std::atomic<uint32_t> fast_select_scan_load_bearing_micro_;
static std::atomic<uint32_t> fast_select_scan_load_bearing_stall_;
uint32_t fast_select_scan_stats_last_log_{0};
void log_fast_select_scan_stats_();
// Non-inline, called only on the rare load-bearing event so the hot path stays unchanged.
void note_fast_select_load_bearing_(struct lwip_sock *sock, uint32_t delay_ms);
#elif defined(USE_HOST)
std::vector<int> socket_fds_; // Vector of all monitored socket file descriptors
#endif
@@ -909,14 +889,6 @@ inline void ESPHOME_ALWAYS_INLINE Application::loop() {
this->yield_with_select_(delay_time);
this->last_loop_ = last_op_end_time;
#ifdef USE_LWIP_FAST_SELECT
// Periodic fast-select scan stats (debug). Remove once the scan is proven unneeded.
if (last_op_end_time - this->fast_select_scan_stats_last_log_ >= 30000) {
this->fast_select_scan_stats_last_log_ = last_op_end_time;
this->log_fast_select_scan_stats_();
}
#endif
if (this->dump_config_at_ < this->components_.size()) {
this->process_dump_config_();
}
@@ -937,30 +909,8 @@ inline void ESPHOME_ALWAYS_INLINE Application::yield_with_select_(uint32_t delay
// If a socket still has unread data (rcvevent > 0) but the task notification was already
// consumed, ulTaskNotifyTake would block until timeout — adding up to delay_ms latency.
// This scan preserves select() semantics: return immediately when any fd is ready.
//
// Debug stats: peek the task notification value BEFORE scanning. This answers the
// counterfactual "if the scan did not exist and we called ulTaskNotifyTake right now,
// would it stall?". ulTaskNotifyValueClear(nullptr, 0) is a pure read — it returns the
// current value and clears zero bits, leaving the notification state untouched. Reading
// before the loop (rather than after finding data) makes the answer TOCTOU-free: the
// value we compare against is the value at the moment Take would have been called.
// LibreTiny's FreeRTOS port predates ulTaskNotifyValueClear (added in FreeRTOS 10.4.0),
// so we fall back to a pessimistic 0, which makes load_bearing an upper bound == found_data
// on that platform. Zero there is still a valid proof that the scan is unused.
#ifdef USE_ESP32
uint32_t fast_select_notify_value_before_scan = ulTaskNotifyValueClear(nullptr, 0);
#else
uint32_t fast_select_notify_value_before_scan = 0;
#endif
fast_select_scan_total_.fetch_add(1, std::memory_order_relaxed);
for (struct lwip_sock *sock : this->monitored_sockets_) {
if (esphome_lwip_socket_has_data(sock)) {
fast_select_scan_found_data_.fetch_add(1, std::memory_order_relaxed);
if (fast_select_notify_value_before_scan == 0) {
// Scan was load-bearing: no notification pending, so Take would have stalled.
// Delegate to a non-inline helper so the hot path stays the same size.
this->note_fast_select_load_bearing_(sock, delay_ms);
}
yield();
return;
}
+2 -2
View File
@@ -12,14 +12,14 @@ platformio==6.1.19
esptool==5.2.0
click==8.3.2
esphome-dashboard==20260408.1
aioesphomeapi==44.13.2
aioesphomeapi==44.13.1
zeroconf==0.148.0
puremagic==1.30
ruamel.yaml==0.19.1 # dashboard_import
ruamel.yaml.clib==0.2.15 # dashboard_import
esphome-glyphsets==0.2.0
pillow==12.2.0
resvg-py==0.3.0
resvg-py==0.2.6
freetype-py==2.5.1
jinja2==3.1.6
bleak==2.1.1