diff --git a/esphome/core/application.h b/esphome/core/application.h index 04e0f1138e..4a18714d0d 100644 --- a/esphome/core/application.h +++ b/esphome/core/application.h @@ -9,6 +9,10 @@ #include #include "esphome/core/component.h" #include "esphome/core/defines.h" + +#if defined(USE_LWIP_FAST_SELECT) && defined(ESPHOME_THREAD_MULTI_ATOMICS) +#include // for std::atomic_thread_fence in Application::loop() +#endif #include "esphome/core/hal.h" #include "esphome/core/helpers.h" #include "esphome/core/preferences.h" @@ -580,6 +584,15 @@ inline ESPHOME_ALWAYS_INLINE Application::ComponentPhaseGuard::ComponentPhaseGua } inline void ESPHOME_ALWAYS_INLINE Application::loop() { +#if defined(USE_LWIP_FAST_SELECT) && defined(ESPHOME_THREAD_MULTI_ATOMICS) + // Pairs with the TCP/IP thread's SYS_ARCH_UNPROTECT release on rcvevent so + // subsequent Socket::ready() checks in this iter observe the published state + // without a per-call memw. Wake is independent (xTaskNotifyGive/ + // ulTaskNotifyTake), so non-losing. Skipped on MULTI_NO_ATOMICS (e.g. + // BK72xx) — that path keeps `volatile` in esphome_lwip_socket_has_data() + // instead. + std::atomic_thread_fence(std::memory_order_acquire); +#endif #ifdef USE_RUNTIME_STATS // Capture the start of the active (non-sleeping) portion of this iteration. // Used to derive main-loop overhead = active time − Σ(component time) − diff --git a/esphome/core/lwip_fast_select.h b/esphome/core/lwip_fast_select.h index 3b5e449148..4ba2606d76 100644 --- a/esphome/core/lwip_fast_select.h +++ b/esphome/core/lwip_fast_select.h @@ -26,25 +26,23 @@ extern "C" { struct lwip_sock *esphome_lwip_get_sock(int fd); /// Check if a cached LwIP socket has data ready via unlocked hint read of rcvevent. -/// This avoids lwIP core lock contention between the main loop (CPU0) and -/// streaming/networking work (CPU1). Correctness is preserved because callers -/// already handle EWOULDBLOCK on nonblocking sockets — a stale hint simply causes -/// a harmless retry on the next loop iteration. In practice, stale reads have not -/// been observed across multi-day testing, but the design does not depend on that. -/// -/// The sock pointer must have been obtained from esphome_lwip_get_sock() and must -/// remain valid (caller owns socket lifetime — no concurrent close). -/// Hot path: inlined volatile 16-bit load — no function call overhead. -/// Uses offset-based access because lwip/priv/sockets_priv.h conflicts with C++. +/// On ESPHOME_THREAD_MULTI_ATOMICS builds, the caller must run on the main +/// loop task after Application::loop's per-iter std::atomic_thread_fence +/// (memory_order_acquire); that fence pairs with the TCP/IP thread's +/// SYS_ARCH_UNPROTECT release, so a plain load suffices and avoids the +/// per-call `memw` that volatile would emit on Xtensa under default +/// -mserialize-volatile. Without atomics (e.g. BK72xx), the fence is skipped +/// and the volatile load provides ordering on its own. +/// Stale reads are harmless either way: the hooked event_callback +/// xTaskNotifyGives on RCVPLUS, so the next iteration re-snapshots and +/// ulTaskNotifyTake never loses a wake. /// The offset and size are verified at compile time in lwip_fast_select.c. static inline bool esphome_lwip_socket_has_data(struct lwip_sock *sock) { - // Unlocked hint read — no lwIP core lock needed. - // volatile prevents the compiler from caching/reordering this cross-thread read. - // The write side (TCP/IP thread) commits via SYS_ARCH_UNPROTECT which releases a - // FreeRTOS mutex (ESP32) or resumes the scheduler (LibreTiny), ensuring the value - // is visible. Aligned 16-bit reads are single-instruction loads (L16SI/LH/LDRH) on - // Xtensa/RISC-V/ARM and cannot produce torn values. +#ifdef ESPHOME_THREAD_MULTI_ATOMICS + return *(int16_t *) ((char *) sock + (int) ESPHOME_LWIP_SOCK_RCVEVENT_OFFSET) > 0; +#else return *(volatile int16_t *) ((char *) sock + (int) ESPHOME_LWIP_SOCK_RCVEVENT_OFFSET) > 0; +#endif } /// Hook a socket's netconn callback to notify the main loop task on receive events.