From 4b8568e94824341dd49aa8ee05d5f5927f65af9c Mon Sep 17 00:00:00 2001 From: rwrozelle Date: Wed, 17 Jun 2026 21:54:29 -0400 Subject: [PATCH] [socket] bugfix Set wake-request gate flag on LwIP socket receive event (#17010) Co-authored-by: Claude Sonnet 4.6 --- esphome/core/lwip_fast_select.c | 7 +- esphome/core/wake/wake_freertos.cpp | 5 ++ esphome/core/wake/wake_host.cpp | 8 ++ .../fixtures/socket_wake_gate_tcp.yaml | 27 +++++++ .../integration/test_socket_wake_gate_tcp.py | 75 +++++++++++++++++++ 5 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 tests/integration/fixtures/socket_wake_gate_tcp.yaml create mode 100644 tests/integration/test_socket_wake_gate_tcp.py diff --git a/esphome/core/lwip_fast_select.c b/esphome/core/lwip_fast_select.c index 36000d4e77..2042c43804 100644 --- a/esphome/core/lwip_fast_select.c +++ b/esphome/core/lwip_fast_select.c @@ -157,6 +157,8 @@ _Static_assert(offsetof(struct lwip_sock, rcvevent) == ESPHOME_LWIP_SOCK_RCVEVEN // Saved original event_callback pointer — written once in first hook_socket(), read from TCP/IP task. static netconn_callback s_original_callback = NULL; +extern void esphome_wake_loop_threadsafe(void); + #ifdef USE_OTA_PLATFORM_ESPHOME static struct netconn *s_ota_listener_conn = NULL; extern void esphome_wake_ota_component_any_context(void); @@ -189,10 +191,7 @@ static void esphome_socket_event_callback(struct netconn *conn, enum netconn_evt esphome_wake_ota_component_any_context(); } #endif - TaskHandle_t task = esphome_main_task_handle; - if (task != NULL) { - xTaskNotifyGive(task); - } + esphome_wake_loop_threadsafe(); } } diff --git a/esphome/core/wake/wake_freertos.cpp b/esphome/core/wake/wake_freertos.cpp index 0bf700daa8..458ef51f89 100644 --- a/esphome/core/wake/wake_freertos.cpp +++ b/esphome/core/wake/wake_freertos.cpp @@ -30,4 +30,9 @@ void IRAM_ATTR wake_loop_any_context() { wake_main_task_any_context(); } } // namespace esphome +extern "C" void esphome_wake_loop_threadsafe() { + esphome::wake_request_set(); + esphome_main_task_notify(); +} + #endif // USE_ESP32 || USE_LIBRETINY diff --git a/esphome/core/wake/wake_host.cpp b/esphome/core/wake/wake_host.cpp index 9d2a650ca2..8cb382a77e 100644 --- a/esphome/core/wake/wake_host.cpp +++ b/esphome/core/wake/wake_host.cpp @@ -123,6 +123,14 @@ void wakeable_delay(uint32_t ms) { if (ms == 0) [[unlikely]] { yield(); } + // A socket woke select() early — open the component-phase gate so the + // owning component's loop() drains the data on this tick rather than + // waiting up to loop_interval_ ms. Idempotent if wake_loop_threadsafe() + // already set the flag (wake socket fired); required when an application + // socket fired and nothing else set the flag. + if (ret > 0) { + wake_request_set(); + } return; } // ret < 0: error (EINTR is normal, anything else is unexpected). diff --git a/tests/integration/fixtures/socket_wake_gate_tcp.yaml b/tests/integration/fixtures/socket_wake_gate_tcp.yaml new file mode 100644 index 0000000000..4dbf89cbf0 --- /dev/null +++ b/tests/integration/fixtures/socket_wake_gate_tcp.yaml @@ -0,0 +1,27 @@ +esphome: + name: socket-wake-gate-tcp + on_boot: + priority: -100 + then: + - lambda: |- + // Raise loop_interval_ to 2000ms. Without wake_request_set() being + // called when select() returns due to socket data, the component + // phase would be gated for up to 2000ms after a TCP request arrives. + App.set_loop_interval(2000); + # Let boot transients and API handshake settle. + - delay: 500ms + - lambda: |- + ESP_LOGI("test", "BOOT_DONE"); + +host: + +api: + actions: + - action: ping + then: + - logger.log: + format: "PONG" + level: INFO + +logger: + level: INFO diff --git a/tests/integration/test_socket_wake_gate_tcp.py b/tests/integration/test_socket_wake_gate_tcp.py new file mode 100644 index 0000000000..2955d2803a --- /dev/null +++ b/tests/integration/test_socket_wake_gate_tcp.py @@ -0,0 +1,75 @@ +"""Test that a TCP socket receive opens the component-phase gate immediately. + +Regression test for the wake-request flag not being set when select() returns +due to socket data on the host platform (wake_host.cpp wakeable_delay fix). + +The API server's accepted connection sockets use accept_loop_monitored(), so +they are registered with the host select() loop. A service call from the Python +client arrives on that socket. Without the fix, select() returning early did not +set g_wake_requested, so Application::loop()'s Phase B gate stayed closed until +loop_interval_ expired. With the fix, the gate opens immediately. +""" + +from __future__ import annotations + +import asyncio +import time + +import pytest + +from .types import APIClientConnectedFactory, RunCompiledFunction + + +@pytest.mark.asyncio +async def test_socket_wake_gate_tcp( + yaml_config: str, + run_compiled: RunCompiledFunction, + api_client_connected: APIClientConnectedFactory, +) -> None: + """TCP socket receive must open the component-phase gate immediately, + even with loop_interval_ raised to 2000ms.""" + loop = asyncio.get_running_loop() + boot_done: asyncio.Future[None] = loop.create_future() + pong: asyncio.Future[None] = loop.create_future() + + def on_log_line(line: str) -> None: + if "BOOT_DONE" in line and not boot_done.done(): + boot_done.set_result(None) + if "PONG" in line and not pong.done(): + pong.set_result(None) + + async with ( + run_compiled(yaml_config, line_callback=on_log_line), + api_client_connected() as client, + ): + device_info = await client.device_info() + assert device_info is not None + assert device_info.name == "socket-wake-gate-tcp" + + try: + await asyncio.wait_for(boot_done, timeout=15.0) + except TimeoutError: + pytest.fail("BOOT_DONE never appeared — device did not complete boot") + + _, services = await client.list_entities_services() + ping_service = next((s for s in services if s.name == "ping"), None) + assert ping_service is not None, "ping service not found" + + # Execute the service and time how long until PONG appears in logs. + # The request bytes arrive on an accept_loop_monitored() TCP socket, + # which is registered with the host select() loop. + t_send = time.monotonic() + await client.execute_service(ping_service, {}) + + try: + await asyncio.wait_for(pong, timeout=5.0) + except TimeoutError: + pytest.fail("PONG never appeared — service did not execute") + + elapsed_ms = (time.monotonic() - t_send) * 1000 + # Without the fix the gate stays closed for up to loop_interval_=2000ms. + # With the fix the gate opens on the next tick; 500ms gives ample CI headroom. + assert elapsed_ms < 500, ( + f"Service response took {elapsed_ms:.0f}ms with loop_interval_=2000ms — " + f"expected < 500ms; without the wake-request fix this would take up to 2000ms" + )