[socket] bugfix Set wake-request gate flag on LwIP socket receive event (#17010)

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
rwrozelle
2026-06-17 21:54:29 -04:00
committed by GitHub
parent ac6a0f34ec
commit 4b8568e948
5 changed files with 118 additions and 4 deletions

View File

@@ -157,6 +157,8 @@ _Static_assert(offsetof(struct lwip_sock, rcvevent) == ESPHOME_LWIP_SOCK_RCVEVEN
// Saved original event_callback pointer — written once in first hook_socket(), read from TCP/IP task.
static netconn_callback s_original_callback = NULL;
extern void esphome_wake_loop_threadsafe(void);
#ifdef USE_OTA_PLATFORM_ESPHOME
static struct netconn *s_ota_listener_conn = NULL;
extern void esphome_wake_ota_component_any_context(void);
@@ -189,10 +191,7 @@ static void esphome_socket_event_callback(struct netconn *conn, enum netconn_evt
esphome_wake_ota_component_any_context();
}
#endif
TaskHandle_t task = esphome_main_task_handle;
if (task != NULL) {
xTaskNotifyGive(task);
}
esphome_wake_loop_threadsafe();
}
}

View File

@@ -30,4 +30,9 @@ void IRAM_ATTR wake_loop_any_context() { wake_main_task_any_context(); }
} // namespace esphome
extern "C" void esphome_wake_loop_threadsafe() {
esphome::wake_request_set();
esphome_main_task_notify();
}
#endif // USE_ESP32 || USE_LIBRETINY

View File

@@ -123,6 +123,14 @@ void wakeable_delay(uint32_t ms) {
if (ms == 0) [[unlikely]] {
yield();
}
// A socket woke select() early — open the component-phase gate so the
// owning component's loop() drains the data on this tick rather than
// waiting up to loop_interval_ ms. Idempotent if wake_loop_threadsafe()
// already set the flag (wake socket fired); required when an application
// socket fired and nothing else set the flag.
if (ret > 0) {
wake_request_set();
}
return;
}
// ret < 0: error (EINTR is normal, anything else is unexpected).

View File

@@ -0,0 +1,27 @@
esphome:
name: socket-wake-gate-tcp
on_boot:
priority: -100
then:
- lambda: |-
// Raise loop_interval_ to 2000ms. Without wake_request_set() being
// called when select() returns due to socket data, the component
// phase would be gated for up to 2000ms after a TCP request arrives.
App.set_loop_interval(2000);
# Let boot transients and API handshake settle.
- delay: 500ms
- lambda: |-
ESP_LOGI("test", "BOOT_DONE");
host:
api:
actions:
- action: ping
then:
- logger.log:
format: "PONG"
level: INFO
logger:
level: INFO

View File

@@ -0,0 +1,75 @@
"""Test that a TCP socket receive opens the component-phase gate immediately.
Regression test for the wake-request flag not being set when select() returns
due to socket data on the host platform (wake_host.cpp wakeable_delay fix).
The API server's accepted connection sockets use accept_loop_monitored(), so
they are registered with the host select() loop. A service call from the Python
client arrives on that socket. Without the fix, select() returning early did not
set g_wake_requested, so Application::loop()'s Phase B gate stayed closed until
loop_interval_ expired. With the fix, the gate opens immediately.
"""
from __future__ import annotations
import asyncio
import time
import pytest
from .types import APIClientConnectedFactory, RunCompiledFunction
@pytest.mark.asyncio
async def test_socket_wake_gate_tcp(
yaml_config: str,
run_compiled: RunCompiledFunction,
api_client_connected: APIClientConnectedFactory,
) -> None:
"""TCP socket receive must open the component-phase gate immediately,
even with loop_interval_ raised to 2000ms."""
loop = asyncio.get_running_loop()
boot_done: asyncio.Future[None] = loop.create_future()
pong: asyncio.Future[None] = loop.create_future()
def on_log_line(line: str) -> None:
if "BOOT_DONE" in line and not boot_done.done():
boot_done.set_result(None)
if "PONG" in line and not pong.done():
pong.set_result(None)
async with (
run_compiled(yaml_config, line_callback=on_log_line),
api_client_connected() as client,
):
device_info = await client.device_info()
assert device_info is not None
assert device_info.name == "socket-wake-gate-tcp"
try:
await asyncio.wait_for(boot_done, timeout=15.0)
except TimeoutError:
pytest.fail("BOOT_DONE never appeared — device did not complete boot")
_, services = await client.list_entities_services()
ping_service = next((s for s in services if s.name == "ping"), None)
assert ping_service is not None, "ping service not found"
# Execute the service and time how long until PONG appears in logs.
# The request bytes arrive on an accept_loop_monitored() TCP socket,
# which is registered with the host select() loop.
t_send = time.monotonic()
await client.execute_service(ping_service, {})
try:
await asyncio.wait_for(pong, timeout=5.0)
except TimeoutError:
pytest.fail("PONG never appeared — service did not execute")
elapsed_ms = (time.monotonic() - t_send) * 1000
# Without the fix the gate stays closed for up to loop_interval_=2000ms.
# With the fix the gate opens on the next tick; 500ms gives ample CI headroom.
assert elapsed_ms < 500, (
f"Service response took {elapsed_ms:.0f}ms with loop_interval_=2000ms — "
f"expected < 500ms; without the wake-request fix this would take up to 2000ms"
)