[api] Pass pos by value through encode_varint_raw_loop

[api] Force inline encode_varint_raw_loop so pos stays in a register
[api] Unroll varint encode loop for compile-time bounded types
2026-07-03 05:43:16 +00:00 · 2026-04-09 22:13:14 -10:00 · 2026-04-09 22:07:15 -10:00 · 2026-04-09 21:59:35 -10:00
27 changed files with 77 additions and 324 deletions
@@ -47,7 +47,7 @@ runs:

    - name: Build and push to ghcr by digest
      id: build-ghcr
-      uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
+      uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
      env:
        DOCKER_BUILD_SUMMARY: false
        DOCKER_BUILD_RECORD_UPLOAD: false
@@ -73,7 +73,7 @@ runs:

    - name: Build and push to dockerhub by digest
      id: build-dockerhub
-      uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
+      uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
      env:
        DOCKER_BUILD_SUMMARY: false
        DOCKER_BUILD_RECORD_UPLOAD: false
@@ -33,7 +33,7 @@ jobs:
          private-key: ${{ secrets.ESPHOME_GITHUB_APP_PRIVATE_KEY }}

      - name: Auto Label PR
-        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          github-token: ${{ steps.generate-token.outputs.token }}
          script: |
@@ -47,7 +47,7 @@ jobs:
          fi
      - if: failure()
        name: Review PR
-        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          script: |
            await github.rest.pulls.createReview({
@@ -62,7 +62,7 @@ jobs:
        run: git diff
      - if: failure()
        name: Archive artifacts
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
        with:
          name: generated-proto-files
          path: |
@@ -70,7 +70,7 @@ jobs:
            esphome/components/api/api_pb2_service.*
      - if: success()
        name: Dismiss review
-        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          script: |
            let reviews = await github.rest.pulls.listReviews({
@@ -42,7 +42,7 @@ jobs:

      - if: failure() && github.event.pull_request.head.repo.full_name == github.repository
        name: Request changes
-        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          script: |
            await github.rest.pulls.createReview({
@@ -55,7 +55,7 @@ jobs:

      - if: success() && github.event.pull_request.head.repo.full_name == github.repository
        name: Dismiss review
-        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          script: |
            let reviews = await github.rest.pulls.listReviews({
@@ -904,7 +904,7 @@ jobs:
          fi

      - name: Upload memory analysis JSON
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
        with:
          name: memory-analysis-target
          path: memory-analysis-target.json
@@ -969,7 +969,7 @@ jobs:
            --platform "$platform"

      - name: Upload memory analysis JSON
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
        with:
          name: memory-analysis-pr
          path: memory-analysis-pr.json
@@ -34,7 +34,7 @@ jobs:
            CODEOWNERS

      - name: Check codeowner approval and update label
-        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        env:
          PR_NUMBER: ${{ github.event.pull_request.number }}
        with:
@@ -33,7 +33,7 @@ jobs:
          ref: ${{ github.event.pull_request.base.sha }}

      - name: Request reviews from component codeowners
-        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          script: |
            const { loadCodeowners, getEffectiveOwners } = require('./.github/scripts/codeowners.js');
@@ -15,7 +15,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Add external component comment
-        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
@@ -19,7 +19,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Notify codeowners for component issues
-        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          script: |
            const owner = context.repo.owner;
@@ -18,7 +18,7 @@ jobs:
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

-      - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+      - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          script: |
            const {
@@ -138,7 +138,7 @@ jobs:
      #     version: ${{ needs.init.outputs.tag }}

      - name: Upload digests
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
        with:
          name: digests-${{ matrix.platform.arch }}
          path: /tmp/digests
@@ -229,7 +229,7 @@ jobs:
          repositories: home-assistant-addon

      - name: Trigger Workflow
-        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          github-token: ${{ steps.generate-token.outputs.token }}
          script: |
@@ -264,7 +264,7 @@ jobs:
          repositories: esphome-schema

      - name: Trigger Workflow
-        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          github-token: ${{ steps.generate-token.outputs.token }}
          script: |
@@ -295,7 +295,7 @@ jobs:
          repositories: version-notifier

      - name: Trigger Workflow
-        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          github-token: ${{ steps.generate-token.outputs.token }}
          script: |
@@ -14,7 +14,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Check for blocking labels
-        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          script: |
            const blockingLabels = ['needs-docs', 'merge-after-release', 'chained-pr'];
@@ -41,7 +41,7 @@ jobs:
          python script/run-in-env.py pre-commit run --all-files

      - name: Commit changes
-        uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1
+        uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0
        with:
          commit-message: "Synchronise Device Classes from Home Assistant"
          committer: esphomebot <esphome@openhomefoundation.org>
@@ -298,15 +298,33 @@ constexpr uint32_t VARINT_MAX_2_BYTE = 1 << 14;  // 16384
 class ProtoEncode {
 public:
  /// Write a multi-byte varint directly through a pos pointer.
+  /// Unrolled based on the compile-time max varint length for T
+  /// (5 bytes for uint32_t, 10 bytes for uint64_t). The explicit unroll gives
+  /// the compiler straight-line code with early exits instead of a
+  /// data-dependent back-edge branch, which measurably speeds up BLE raw
+  /// advertisement MAC encoding (always 7-byte varints) among other hot paths.
+  ///
+  /// Takes/returns pos by value so this function can remain out-of-line without
+  /// forcing the caller to spill pos to memory on every byte write. Callers
+  /// (which are ALWAYS_INLINE) should assign the returned pos back to their
+  /// local. Code size win vs. inlining the whole unroll at every call site.
  template<typename T>
-  static inline void encode_varint_raw_loop(uint8_t *__restrict__ &pos PROTO_ENCODE_DEBUG_PARAM, T value) {
-    do {
+  static uint8_t *encode_varint_raw_loop(uint8_t *__restrict__ pos PROTO_ENCODE_DEBUG_PARAM, T value) {
+    constexpr int MAX_VARINT_BYTES = (sizeof(T) * 8 + 6) / 7;  // 5 for u32, 10 for u64
+#pragma GCC unroll 10
+    for (int i = 0; i < MAX_VARINT_BYTES - 1; i++) {
      PROTO_ENCODE_CHECK_BOUNDS(pos, 1);
-      *pos++ = static_cast<uint8_t>(value | 0x80);
+      *pos++ = static_cast<uint8_t>(value) | 0x80;
      value >>= 7;
-    } while (value > 0x7F);
+      if (value <= 0x7F) {
+        PROTO_ENCODE_CHECK_BOUNDS(pos, 1);
+        *pos++ = static_cast<uint8_t>(value);
+        return pos;
+      }
+    }
    PROTO_ENCODE_CHECK_BOUNDS(pos, 1);
    *pos++ = static_cast<uint8_t>(value);
+    return pos;
  }
  static inline void ESPHOME_ALWAYS_INLINE encode_varint_raw(uint8_t *__restrict__ &pos PROTO_ENCODE_DEBUG_PARAM,
                                                             uint32_t value) {
@@ -315,7 +333,7 @@ class ProtoEncode {
      *pos++ = static_cast<uint8_t>(value);
      return;
    }
-    encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, value);
+    pos = encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, value);
  }
  /// Encode a varint that is expected to be 1-2 bytes (e.g. zigzag RSSI, small lengths).
  static inline void ESPHOME_ALWAYS_INLINE encode_varint_raw_short(uint8_t *__restrict__ &pos PROTO_ENCODE_DEBUG_PARAM,
@@ -331,7 +349,7 @@ class ProtoEncode {
      *pos++ = static_cast<uint8_t>(value >> 7);
      return;
    }
-    encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, value);
+    pos = encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, value);
  }
  static inline void ESPHOME_ALWAYS_INLINE encode_varint_raw_64(uint8_t *__restrict__ &pos PROTO_ENCODE_DEBUG_PARAM,
                                                                uint64_t value) {
@@ -340,7 +358,7 @@ class ProtoEncode {
      *pos++ = static_cast<uint8_t>(value);
      return;
    }
-    encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, value);
+    pos = encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, value);
  }
  static inline void ESPHOME_ALWAYS_INLINE encode_field_raw(uint8_t *__restrict__ &pos PROTO_ENCODE_DEBUG_PARAM,
                                                            uint32_t field_id, uint32_t type) {
@@ -402,7 +420,7 @@ class ProtoEncode {
      PROTO_ENCODE_CHECK_BOUNDS(pos, 1 + len);
      *pos++ = static_cast<uint8_t>(len);
    } else {
-      encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, len);
+      pos = encode_varint_raw_loop(pos PROTO_ENCODE_DEBUG_ARG, len);
      PROTO_ENCODE_CHECK_BOUNDS(pos, len);
    }
    std::memcpy(pos, string, len);
@@ -102,8 +102,6 @@ CC1101Component::CC1101Component() {
  memset(this->pa_table_, 0, sizeof(this->pa_table_));
 }

-void IRAM_ATTR CC1101Component::gpio_intr(CC1101Component *arg) { arg->enable_loop_soon_any_context(); }
-
 void CC1101Component::setup() {
  this->spi_setup();
  this->cs_->digital_write(true);
@@ -150,12 +148,11 @@ void CC1101Component::setup() {
  // Defer pin mode setup until after all components have completed setup()
  // This handles the case where remote_transmitter runs after CC1101 and changes pin mode
  if (this->gdo0_pin_ != nullptr) {
-    this->defer([this]() {
-      this->gdo0_pin_->pin_mode(gpio::FLAG_INPUT);
-      if (this->state_.PKT_FORMAT == static_cast<uint8_t>(PacketFormat::PACKET_FORMAT_FIFO)) {
-        this->gdo0_pin_->attach_interrupt(&CC1101Component::gpio_intr, this, gpio::INTERRUPT_RISING_EDGE);
-      }
-    });
+    this->defer([this]() { this->gdo0_pin_->pin_mode(gpio::FLAG_INPUT); });
+  }
+
+  if (this->state_.PKT_FORMAT != static_cast<uint8_t>(PacketFormat::PACKET_FORMAT_FIFO)) {
+    this->disable_loop();
  }
 }

@@ -167,7 +164,6 @@ void CC1101Component::call_listeners_(const std::vector<uint8_t> &packet, float
 }

 void CC1101Component::loop() {
-  this->disable_loop();
  if (this->state_.PKT_FORMAT != static_cast<uint8_t>(PacketFormat::PACKET_FORMAT_FIFO) || this->gdo0_pin_ == nullptr ||
      !this->gdo0_pin_->digital_read()) {
    return;
@@ -248,7 +244,6 @@ void CC1101Component::begin_tx() {
  this->write_(Register::PKTCTRL0, 0x32);
  ESP_LOGV(TAG, "Beginning TX sequence");
  if (this->gdo0_pin_ != nullptr) {
-    this->gdo0_pin_->detach_interrupt();
    this->gdo0_pin_->pin_mode(gpio::FLAG_OUTPUT);
  }
  // Transition through IDLE to bypass CCA (Clear Channel Assessment) which can
@@ -678,12 +673,10 @@ void CC1101Component::set_packet_mode(bool value) {
    this->state_.GDO0_CFG = 0x0D;
  }
  if (this->initialized_) {
-    if (this->gdo0_pin_ != nullptr) {
-      if (value) {
-        this->gdo0_pin_->attach_interrupt(&CC1101Component::gpio_intr, this, gpio::INTERRUPT_RISING_EDGE);
-      } else {
-        this->gdo0_pin_->detach_interrupt();
-      }
+    if (value) {
+      this->enable_loop();
+    } else {
+      this->disable_loop();
    }
    this->write_(Register::PKTCTRL0);
    this->write_(Register::PKTCTRL1);
@@ -93,7 +93,6 @@ class CC1101Component : public Component,

  // GDO pin for packet reception
  InternalGPIOPin *gdo0_pin_{nullptr};
-  static void IRAM_ATTR gpio_intr(CC1101Component *arg);

  // Packet handling
  void call_listeners_(const std::vector<uint8_t> &packet, float freq_offset, float rssi, uint8_t lqi);
@@ -451,8 +451,6 @@ async def to_code(config):
    ota.request_ota_state_listeners()

    esp32.add_idf_component(name="espressif/esp-tflite-micro", ref="1.3.3~1")
-    # Pin esp-nn for stable future builds (esp-tflite-micro depends on esp-nn)
-    esp32.add_idf_component(name="espressif/esp-nn", ref="1.2.1")

    cg.add_build_flag("-DTF_LITE_STATIC_MEMORY")
    cg.add_build_flag("-DTF_LITE_DISABLE_X86_NEON")
@@ -29,6 +29,14 @@ void VADModel::log_model_config() {
 bool StreamingModel::load_model_() {
  RAMAllocator<uint8_t> arena_allocator;

+  if (this->tensor_arena_ == nullptr) {
+    this->tensor_arena_ = arena_allocator.allocate(this->tensor_arena_size_);
+    if (this->tensor_arena_ == nullptr) {
+      ESP_LOGE(TAG, "Could not allocate the streaming model's tensor arena.");
+      return false;
+    }
+  }
+
  if (this->var_arena_ == nullptr) {
    this->var_arena_ = arena_allocator.allocate(STREAMING_MODEL_VARIABLE_ARENA_SIZE);
    if (this->var_arena_ == nullptr) {
@@ -45,26 +53,6 @@ bool StreamingModel::load_model_() {
    return false;
  }

-  // Probe for the actual required tensor arena size if not yet determined
-  if (!this->tensor_arena_size_probed_) {
-    size_t probed_size = this->probe_arena_size_();
-    if (probed_size > 0) {
-      ESP_LOGD(TAG, "Probed tensor arena size: %zu bytes", probed_size);
-      this->tensor_arena_size_ = probed_size;
-    } else {
-      ESP_LOGW(TAG, "Arena size probe failed, using manifest size: %zu bytes", this->tensor_arena_size_);
-    }
-    this->tensor_arena_size_probed_ = true;
-  }
-
-  if (this->tensor_arena_ == nullptr) {
-    this->tensor_arena_ = arena_allocator.allocate(this->tensor_arena_size_);
-    if (this->tensor_arena_ == nullptr) {
-      ESP_LOGE(TAG, "Could not allocate the streaming model's tensor arena.");
-      return false;
-    }
-  }
-
  if (this->interpreter_ == nullptr) {
    this->interpreter_ =
        make_unique<tflite::MicroInterpreter>(tflite::GetModel(this->model_start_), this->streaming_op_resolver_,
@@ -106,70 +94,6 @@ bool StreamingModel::load_model_() {
  return true;
 }

-size_t StreamingModel::probe_arena_size_() {
-  RAMAllocator<uint8_t> arena_allocator;
-
-  // Try with the manifest size first, then escalates to 1.5, then 2x if it fails. Different platforms and different
-  // versions of the esp-nn library require different amounts of memory, so the manifest size may not always be correct,
-  // and probing allows us to find the actual required size for the current build and platform. Aligns test sizes to 16
-  // bytes.
-  size_t attempt_sizes[] = {(this->tensor_arena_size_ + 15) & ~15, (this->tensor_arena_size_ * 3 / 2 + 15) & ~15,
-                            (this->tensor_arena_size_ * 2 + 15) & ~15};
-
-  for (size_t attempt_size : attempt_sizes) {
-    uint8_t *probe_arena = arena_allocator.allocate(attempt_size);
-    if (probe_arena == nullptr) {
-      continue;
-    }
-
-    // Verify the model works at all with this arena size
-    auto probe_interpreter = make_unique<tflite::MicroInterpreter>(
-        tflite::GetModel(this->model_start_), this->streaming_op_resolver_, probe_arena, attempt_size, this->mrv_);
-
-    if (probe_interpreter->AllocateTensors() != kTfLiteOk) {
-      probe_interpreter.reset();
-      arena_allocator.deallocate(probe_arena, attempt_size);
-      this->ma_ = tflite::MicroAllocator::Create(this->var_arena_, STREAMING_MODEL_VARIABLE_ARENA_SIZE);
-      this->mrv_ = tflite::MicroResourceVariables::Create(this->ma_, 20);
-      continue;
-    }
-
-    // Try to shrink the arena. Start with arena_used_bytes() + 16 (rounded to 16-byte alignment).
-    // If that works, use it. Otherwise, try midpoints between that and the full size until one succeeds.
-    size_t lower = (probe_interpreter->arena_used_bytes() + 16 + 15) & ~15;
-    probe_interpreter.reset();
-    this->ma_ = tflite::MicroAllocator::Create(this->var_arena_, STREAMING_MODEL_VARIABLE_ARENA_SIZE);
-    this->mrv_ = tflite::MicroResourceVariables::Create(this->ma_, 20);
-
-    size_t upper = attempt_size;
-
-    while (lower < upper) {
-      auto test_interpreter = make_unique<tflite::MicroInterpreter>(
-          tflite::GetModel(this->model_start_), this->streaming_op_resolver_, probe_arena, lower, this->mrv_);
-
-      bool ok = test_interpreter->AllocateTensors() == kTfLiteOk;
-
-      test_interpreter.reset();
-      this->ma_ = tflite::MicroAllocator::Create(this->var_arena_, STREAMING_MODEL_VARIABLE_ARENA_SIZE);
-      this->mrv_ = tflite::MicroResourceVariables::Create(this->ma_, 20);
-
-      if (ok) {
-        // Found a working size smaller than the full arena
-        upper = lower + 16;  // Pad by 16 bytes to be safe for future allocations
-        break;
-      }
-
-      // Try the midpoint between current attempt and full size
-      lower = ((lower + upper) / 2 + 15) & ~15;
-    }
-
-    arena_allocator.deallocate(probe_arena, attempt_size);
-    return upper;
-  }
-
-  return 0;
-}
-
 void StreamingModel::unload_model() {
  this->interpreter_.reset();

@@ -63,10 +63,6 @@ class StreamingModel {
  /// @brief Allocates tensor and variable arenas and sets up the model interpreter
  /// @return True if successful, false otherwise
  bool load_model_();
-  /// @brief Probes the actual required tensor arena size by trial allocation.
-  /// Tries the manifest size first, then 2x if that fails.
-  /// @return The required arena size rounded up to 16-byte alignment, or 0 on failure.
-  size_t probe_arena_size_();
  /// @brief Returns true if successfully registered the streaming model's TensorFlow operations
  bool register_streaming_ops_(tflite::MicroMutableOpResolver<20> &op_resolver);

@@ -74,7 +70,6 @@ class StreamingModel {

  bool loaded_{false};
  bool enabled_{true};
-  bool tensor_arena_size_probed_{false};
  bool unprocessed_probability_status_{false};
  uint8_t current_stride_step_{0};
  int16_t ignore_windows_{-MIN_SLICES_BEFORE_DETECTION};
@@ -9,7 +9,7 @@
 #include <WiFi.h>
 #include <pico/cyw43_arch.h>  // For cyw43_arch_lwip_begin/end (LwIPLock)
 #elif defined(USE_ETHERNET)
-#include <lwip_wrap.h>  // For LWIPMutex — LwIPLock mirrors its semantics (see below)
+#include <LwipEthernet.h>  // For ethernet_arch_lwip_begin/end (LwIPLock)
 #include "esphome/components/ethernet/ethernet_component.h"
 #endif
 #include <hardware/structs/rosc.h>
@@ -43,18 +43,9 @@ IRAM_ATTR InterruptLock::~InterruptLock() { restore_interrupts(state_); }
 // main loop, corrupting the shared rx_buf_ pbuf chain (use-after-free, pbuf_cat
 // assertion failures). See esphome#10681.
 //
-// WiFi uses cyw43_arch_lwip_begin/end.
-//
-// For wired Ethernet, taking only the async_context lock is NOT enough. The
-// W5500 GPIO IRQ path (LwipIntfDev::_irq) checks arduino-pico's `__inLWIP`
-// counter to decide whether to defer packet processing. If we hold the
-// async_context lock without bumping `__inLWIP`, an interrupt-driven packet
-// arrival re-enters lwIP from IRQ context and corrupts pbufs (the `pbuf_cat`
-// assertion crash on wiznet-w5500-evb-pico). We mirror arduino-pico's
-// LWIPMutex (cores/rp2040/lwip_wrap.h) exactly: bump `__inLWIP`, take the
-// lock, and on release re-unmask any GPIO IRQs that were deferred while we
-// held it. We can't `using LwIPLock = LWIPMutex;` in helpers.h because
-// pulling lwip_wrap.h there poisons many TUs with lwIP types.
+// WiFi uses cyw43_arch_lwip_begin/end; Ethernet uses ethernet_arch_lwip_begin/end.
+// Both acquire the async_context recursive mutex to prevent IRQ callbacks from
+// firing during critical sections.
 //
 // When neither WiFi nor Ethernet is configured, this is a no-op since
 // there's no network stack and no lwip callbacks to race with.
@@ -62,18 +53,8 @@ IRAM_ATTR InterruptLock::~InterruptLock() { restore_interrupts(state_); }
 LwIPLock::LwIPLock() { cyw43_arch_lwip_begin(); }
 LwIPLock::~LwIPLock() { cyw43_arch_lwip_end(); }
 #elif defined(USE_ETHERNET)
-LwIPLock::LwIPLock() {
-  __inLWIP++;
-  ethernet_arch_lwip_begin();
-}
-LwIPLock::~LwIPLock() {
-  ethernet_arch_lwip_end();
-  __inLWIP--;
-  if (__needsIRQEN && !__inLWIP) {
-    __needsIRQEN = false;
-    ethernet_arch_lwip_gpio_unmask();
-  }
-}
+LwIPLock::LwIPLock() { ethernet_arch_lwip_begin(); }
+LwIPLock::~LwIPLock() { ethernet_arch_lwip_end(); }
 #else
 LwIPLock::LwIPLock() {}
 LwIPLock::~LwIPLock() {}
@@ -104,17 +104,11 @@ void SX126x::write_register_(uint16_t reg, uint8_t *data, uint8_t size) {
  delayMicroseconds(SWITCHING_DELAY_US);
 }

-void IRAM_ATTR SX126x::gpio_intr(SX126x *arg) { arg->enable_loop_soon_any_context(); }
-
 void SX126x::setup() {
  // setup pins
  this->busy_pin_->setup();
  this->rst_pin_->setup();
  this->dio1_pin_->setup();
-  if (this->dio1_pin_->is_internal()) {
-    static_cast<InternalGPIOPin *>(this->dio1_pin_)
-        ->attach_interrupt(&SX126x::gpio_intr, this, gpio::INTERRUPT_RISING_EDGE);
-  }

  // start spi
  this->spi_setup();
@@ -354,9 +348,6 @@ void SX126x::call_listeners_(const std::vector<uint8_t> &packet, float rssi, flo
 }

 void SX126x::loop() {
-  if (this->dio1_pin_->is_internal()) {
-    this->disable_loop();
-  }
  if (!this->dio1_pin_->digital_read()) {
    return;
  }
@@ -3,7 +3,6 @@
 #include "esphome/components/spi/spi.h"
 #include "esphome/core/automation.h"
 #include "esphome/core/component.h"
-#include "esphome/core/hal.h"
 #include "sx126x_reg.h"
 #include <utility>
 #include <vector>
@@ -101,7 +100,6 @@ class SX126x : public Component,
  Trigger<std::vector<uint8_t>, float, float> *get_packet_trigger() { return &this->packet_trigger_; }

 protected:
-  static void IRAM_ATTR gpio_intr(SX126x *arg);
  void configure_fsk_ook_();
  void configure_lora_();
  void set_packet_params_(uint8_t payload_length);
@@ -53,8 +53,6 @@ void SX127x::write_fifo_(const std::vector<uint8_t> &packet) {
  this->disable();
 }

-void IRAM_ATTR SX127x::gpio_intr(SX127x *arg) { arg->enable_loop_soon_any_context(); }
-
 void SX127x::setup() {
  // setup reset
  this->rst_pin_->setup();
@@ -62,7 +60,6 @@ void SX127x::setup() {
  // setup dio0
  if (this->dio0_pin_) {
    this->dio0_pin_->setup();
-    this->dio0_pin_->attach_interrupt(&SX127x::gpio_intr, this, gpio::INTERRUPT_RISING_EDGE);
  }

  // start spi
@@ -316,7 +313,6 @@ void SX127x::call_listeners_(const std::vector<uint8_t> &packet, float rssi, flo
 }

 void SX127x::loop() {
-  this->disable_loop();
  if (this->dio0_pin_ == nullptr || !this->dio0_pin_->digital_read()) {
    return;
  }
@@ -390,6 +386,11 @@ void SX127x::set_mode_(uint8_t modulation, uint8_t mode) {
      return;
    }
  }
+  if (mode == MODE_RX && (modulation == MOD_LORA || this->packet_mode_)) {
+    this->enable_loop();
+  } else {
+    this->disable_loop();
+  }
 }

 void SX127x::set_mode_rx() {
@@ -4,7 +4,6 @@
 #include "esphome/components/spi/spi.h"
 #include "esphome/core/automation.h"
 #include "esphome/core/component.h"
-#include "esphome/core/hal.h"
 #include <vector>

 namespace esphome {
@@ -87,7 +86,6 @@ class SX127x : public Component,
  Trigger<std::vector<uint8_t>, float, float> *get_packet_trigger() { return &this->packet_trigger_; }

 protected:
-  static void IRAM_ATTR gpio_intr(SX127x *arg);
  void configure_fsk_ook_();
  void configure_lora_();
  void set_mode_(uint8_t modulation, uint8_t mode);
@@ -450,99 +450,6 @@ void Application::enable_pending_loops_() {
 }

 #ifdef USE_LWIP_FAST_SELECT
-std::atomic<uint32_t> Application::fast_select_scan_total_{0};
-std::atomic<uint32_t> Application::fast_select_scan_found_data_{0};
-std::atomic<uint32_t> Application::fast_select_scan_load_bearing_{0};
-std::atomic<uint32_t> Application::fast_select_scan_load_bearing_race_{0};
-std::atomic<uint32_t> Application::fast_select_scan_load_bearing_micro_{0};
-std::atomic<uint32_t> Application::fast_select_scan_load_bearing_stall_{0};
-
-void Application::log_fast_select_scan_stats_() {
-  uint32_t total = fast_select_scan_total_.load(std::memory_order_relaxed);
-  uint32_t found = fast_select_scan_found_data_.load(std::memory_order_relaxed);
-  uint32_t load_bearing = fast_select_scan_load_bearing_.load(std::memory_order_relaxed);
-  uint32_t lb_race = fast_select_scan_load_bearing_race_.load(std::memory_order_relaxed);
-  uint32_t lb_micro = fast_select_scan_load_bearing_micro_.load(std::memory_order_relaxed);
-  uint32_t lb_stall = fast_select_scan_load_bearing_stall_.load(std::memory_order_relaxed);
-  ESP_LOGD(TAG,
-           "fast_select scan: total=%" PRIu32 " found_data=%" PRIu32 " load_bearing=%" PRIu32 " (race<10us=%" PRIu32
-           " micro<100us=%" PRIu32 " stall>100us=%" PRIu32 ")",
-           total, found, load_bearing, lb_race, lb_micro, lb_stall);
-}
-
-void Application::note_fast_select_load_bearing_(struct lwip_sock *sock, uint32_t delay_ms) {
-  uint32_t load_bearing = fast_select_scan_load_bearing_.fetch_add(1, std::memory_order_relaxed) + 1;
-
-  // Spin-poll the task notification value for a short bounded window to measure how long
-  // the counterfactual ulTaskNotifyTake would actually have blocked. This distinguishes
-  // three cases:
-  //   race  (<10µs)   — notification arrived within ~10µs of scan start: callback-ordering
-  //                     race between the lwip event_callback writing rcvevent and calling
-  //                     xTaskNotifyGive a few instructions later. Scan is noise.
-  //   micro (<100µs)  — notification arrived within 100µs: still noise at loop_interval scale.
-  //   stall (≥100µs)  — notification did not arrive within our polling window. This is the
-  //                     only case where the scan could be rescuing a real latency spike.
-  // Cap the spin at 100µs so that if we're wrong and this IS a real stall, we only add
-  // 100µs of extra work to that one unlucky loop iteration.
-  uint32_t t_start = micros();
-  uint32_t gap_us = UINT32_MAX;
-  while (true) {
-    if (ulTaskNotifyValueClear(nullptr, 0) != 0) {
-      gap_us = micros() - t_start;
-      break;
-    }
-    uint32_t elapsed = micros() - t_start;
-    if (elapsed >= 100) {
-      break;
-    }
-  }
-
-  const char *bucket;
-  if (gap_us == UINT32_MAX) {
-    fast_select_scan_load_bearing_stall_.fetch_add(1, std::memory_order_relaxed);
-    bucket = "STALL";
-  } else if (gap_us < 10) {
-    fast_select_scan_load_bearing_race_.fetch_add(1, std::memory_order_relaxed);
-    bucket = "race";
-  } else {
-    fast_select_scan_load_bearing_micro_.fetch_add(1, std::memory_order_relaxed);
-    bucket = "micro";
-  }
-
-  // Find the socket's index in monitored_sockets_ for easier correlation with registration order.
-  int index = -1;
-  for (size_t i = 0; i < this->monitored_sockets_.size(); i++) {
-    if (this->monitored_sockets_[i] == sock) {
-      index = static_cast<int>(i);
-      break;
-    }
-  }
-  // Read the rcvevent value directly. This is the same offset-based read used by
-  // esphome_lwip_socket_has_data(); value > 0 means unread data is queued.
-  int16_t rcvevent =
-      *reinterpret_cast<volatile int16_t *>(reinterpret_cast<char *>(sock) + ESPHOME_LWIP_SOCK_RCVEVENT_OFFSET);
-  // Count how many other sockets also had data at this scan (could reveal whether it's always
-  // the same socket or a burst across multiple).
-  size_t sockets_with_data = 0;
-  for (struct lwip_sock *s : this->monitored_sockets_) {
-    if (esphome_lwip_socket_has_data(s))
-      sockets_with_data++;
-  }
-  if (gap_us == UINT32_MAX) {
-    ESP_LOGW(TAG,
-             "fast_select LOAD-BEARING #%" PRIu32 " [%s]: sock=%p idx=%d/%u rcvevent=%d delay_ms=%" PRIu32
-             " sockets_with_data=%u gap_us=>100",
-             load_bearing, bucket, sock, index, static_cast<unsigned>(this->monitored_sockets_.size()), rcvevent,
-             delay_ms, static_cast<unsigned>(sockets_with_data));
-  } else {
-    ESP_LOGW(TAG,
-             "fast_select LOAD-BEARING #%" PRIu32 " [%s]: sock=%p idx=%d/%u rcvevent=%d delay_ms=%" PRIu32
-             " sockets_with_data=%u gap_us=%" PRIu32,
-             load_bearing, bucket, sock, index, static_cast<unsigned>(this->monitored_sockets_.size()), rcvevent,
-             delay_ms, static_cast<unsigned>(sockets_with_data), gap_us);
-  }
-}
-
 bool Application::register_socket(struct lwip_sock *sock) {
  // It modifies monitored_sockets_ without locking — must only be called from the main loop.
  if (sock == nullptr)
@@ -1,7 +1,6 @@
 #pragma once

 #include <algorithm>
-#include <atomic>
 #include <ctime>
 #include <limits>
 #include <span>
@@ -656,25 +655,6 @@ class Application {
  FixedVector<Component *> looping_components_{};
 #ifdef USE_LWIP_FAST_SELECT
  std::vector<struct lwip_sock *> monitored_sockets_;  // Cached lwip_sock pointers for direct rcvevent read
-  // Stats to verify whether the pre-sleep socket scan in yield_with_select_() is ever load-bearing.
-  // If fast_select_scan_load_bearing_ stays 0 under real workloads, the scan can be removed.
-  // These are static because yield_with_select_() is inlined at every call site.
-  static std::atomic<uint32_t> fast_select_scan_total_;
-  static std::atomic<uint32_t> fast_select_scan_found_data_;
-  // Umbrella counter: pre-scan notify peek was 0 and scan found data.
-  // Broken down into three buckets based on the post-scan spin-poll result:
-  //   _race_  — notify arrived in < 10µs   (callback-ordering race, scan is noise)
-  //   _micro_ — notify arrived in 10..100µs (still noise at loop_interval scale)
-  //   _stall_ — notify did not arrive within 100µs (the only case that could be a real stall)
-  // If _stall_ stays 0, the scan is provably irrelevant under this workload.
-  static std::atomic<uint32_t> fast_select_scan_load_bearing_;
-  static std::atomic<uint32_t> fast_select_scan_load_bearing_race_;
-  static std::atomic<uint32_t> fast_select_scan_load_bearing_micro_;
-  static std::atomic<uint32_t> fast_select_scan_load_bearing_stall_;
-  uint32_t fast_select_scan_stats_last_log_{0};
-  void log_fast_select_scan_stats_();
-  // Non-inline, called only on the rare load-bearing event so the hot path stays unchanged.
-  void note_fast_select_load_bearing_(struct lwip_sock *sock, uint32_t delay_ms);
 #elif defined(USE_HOST)
  std::vector<int> socket_fds_;  // Vector of all monitored socket file descriptors
 #endif
@@ -909,14 +889,6 @@ inline void ESPHOME_ALWAYS_INLINE Application::loop() {
  this->yield_with_select_(delay_time);
  this->last_loop_ = last_op_end_time;

-#ifdef USE_LWIP_FAST_SELECT
-  // Periodic fast-select scan stats (debug). Remove once the scan is proven unneeded.
-  if (last_op_end_time - this->fast_select_scan_stats_last_log_ >= 30000) {
-    this->fast_select_scan_stats_last_log_ = last_op_end_time;
-    this->log_fast_select_scan_stats_();
-  }
-#endif
-
  if (this->dump_config_at_ < this->components_.size()) {
    this->process_dump_config_();
  }
@@ -937,30 +909,8 @@ inline void ESPHOME_ALWAYS_INLINE Application::yield_with_select_(uint32_t delay
  // If a socket still has unread data (rcvevent > 0) but the task notification was already
  // consumed, ulTaskNotifyTake would block until timeout — adding up to delay_ms latency.
  // This scan preserves select() semantics: return immediately when any fd is ready.
-  //
-  // Debug stats: peek the task notification value BEFORE scanning. This answers the
-  // counterfactual "if the scan did not exist and we called ulTaskNotifyTake right now,
-  // would it stall?". ulTaskNotifyValueClear(nullptr, 0) is a pure read — it returns the
-  // current value and clears zero bits, leaving the notification state untouched. Reading
-  // before the loop (rather than after finding data) makes the answer TOCTOU-free: the
-  // value we compare against is the value at the moment Take would have been called.
-  // LibreTiny's FreeRTOS port predates ulTaskNotifyValueClear (added in FreeRTOS 10.4.0),
-  // so we fall back to a pessimistic 0, which makes load_bearing an upper bound == found_data
-  // on that platform. Zero there is still a valid proof that the scan is unused.
-#ifdef USE_ESP32
-  uint32_t fast_select_notify_value_before_scan = ulTaskNotifyValueClear(nullptr, 0);
-#else
-  uint32_t fast_select_notify_value_before_scan = 0;
-#endif
-  fast_select_scan_total_.fetch_add(1, std::memory_order_relaxed);
  for (struct lwip_sock *sock : this->monitored_sockets_) {
    if (esphome_lwip_socket_has_data(sock)) {
-      fast_select_scan_found_data_.fetch_add(1, std::memory_order_relaxed);
-      if (fast_select_notify_value_before_scan == 0) {
-        // Scan was load-bearing: no notification pending, so Take would have stalled.
-        // Delegate to a non-inline helper so the hot path stays the same size.
-        this->note_fast_select_load_bearing_(sock, delay_ms);
-      }
      yield();
      return;
    }
@@ -12,14 +12,14 @@ platformio==6.1.19
 esptool==5.2.0
 click==8.3.2
 esphome-dashboard==20260408.1
-aioesphomeapi==44.13.2
+aioesphomeapi==44.13.1
 zeroconf==0.148.0
 puremagic==1.30
 ruamel.yaml==0.19.1 # dashboard_import
 ruamel.yaml.clib==0.2.15 # dashboard_import
 esphome-glyphsets==0.2.0
 pillow==12.2.0
-resvg-py==0.3.0
+resvg-py==0.2.6
 freetype-py==2.5.1
 jinja2==3.1.6
 bleak==2.1.1
Author	SHA1	Message	Date
J. Nick Koston	cb2e8320f0	[api] Pass pos by value through encode_varint_raw_loop	2026-04-09 22:13:14 -10:00
J. Nick Koston	49ef5f9ad6	[api] Force inline encode_varint_raw_loop so pos stays in a register	2026-04-09 22:07:15 -10:00
J. Nick Koston	707f3f4749	[api] Unroll varint encode loop for compile-time bounded types	2026-04-09 21:59:35 -10:00