mirror of
https://github.com/esphome/esphome.git
synced 2026-06-24 12:35:25 +00:00
[core] Fix delay on failed component being dropped; DRY the is_failed check
The is_failed() skip exists in two execution paths: the heap loop in call() and should_skip_item_() (defer queue / delay:0). The previous commit only exempted SELF_POINTER items from the heap-path check, so on multi-threaded platforms a delay:0 continuation whose host component had failed would still be silently dropped. Extract a single is_item_failed_() helper (with the SELF_POINTER exemption) and use it from both paths so they cannot drift again. Add an integration test that schedules a delay from a component that marks itself failed and asserts the continuation still fires (verified to fail without the exemption).
This commit is contained in:
@@ -642,10 +642,8 @@ uint32_t HOT Scheduler::call(uint32_t now) {
|
||||
// Not reached timeout yet, done for this call
|
||||
break;
|
||||
}
|
||||
// Don't run on failed components.
|
||||
// SELF_POINTER items (e.g. DelayAction) store the component for log attribution only and
|
||||
// must always fire regardless of that component's failed state, so skip the check for them.
|
||||
if (item->component != nullptr && item->get_name_type() != NameType::SELF_POINTER && item->component->is_failed()) {
|
||||
// Don't run on failed components (see is_item_failed_ for the SELF_POINTER exception).
|
||||
if (this->is_item_failed_(item)) {
|
||||
LockGuard guard{this->lock_};
|
||||
this->recycle_item_main_loop_(this->pop_raw_locked_());
|
||||
continue;
|
||||
|
||||
@@ -429,11 +429,17 @@ class Scheduler {
|
||||
// Helper to execute a scheduler item
|
||||
uint32_t execute_item_(SchedulerItem *item, uint32_t now);
|
||||
|
||||
// Helper to check if item should be skipped
|
||||
bool should_skip_item_(SchedulerItem *item) const {
|
||||
return is_item_removed_(item) || (item->component != nullptr && item->component->is_failed());
|
||||
// True if the item belongs to a failed component and should therefore not run.
|
||||
// SELF_POINTER items (e.g. DelayAction) store the component for log attribution only, so they
|
||||
// must always fire regardless of that component's failed state and are never skipped here.
|
||||
bool is_item_failed_(SchedulerItem *item) const {
|
||||
return item->component != nullptr && item->get_name_type() != NameType::SELF_POINTER &&
|
||||
item->component->is_failed();
|
||||
}
|
||||
|
||||
// Helper to check if item should be skipped
|
||||
bool should_skip_item_(SchedulerItem *item) const { return is_item_removed_(item) || this->is_item_failed_(item); }
|
||||
|
||||
// Helper to recycle a SchedulerItem back to the pool.
|
||||
// Takes a raw pointer — caller transfers ownership. The item is either added to the
|
||||
// pool or deleted if the pool is full.
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
esphome:
|
||||
name: scheduler-delay-failed
|
||||
|
||||
host:
|
||||
api:
|
||||
logger:
|
||||
level: DEBUG
|
||||
|
||||
globals:
|
||||
- id: started
|
||||
type: bool
|
||||
restore_value: false
|
||||
initial_value: "false"
|
||||
|
||||
# The interval fires with itself as the current component, schedules a delay (which
|
||||
# captures that component for log attribution), then marks itself failed. The delay
|
||||
# continuation must still fire: a failed component must not drop an already-scheduled
|
||||
# delay, because the SELF_POINTER scheduler item stores the component for attribution
|
||||
# only.
|
||||
interval:
|
||||
- interval: 100ms
|
||||
id: host_interval
|
||||
then:
|
||||
- if:
|
||||
condition:
|
||||
lambda: "return !id(started);"
|
||||
then:
|
||||
- lambda: |-
|
||||
id(started) = true;
|
||||
id(host_interval)->mark_failed();
|
||||
- delay: 200ms
|
||||
- logger.log: "DELAY_FIRED_AFTER_FAIL"
|
||||
@@ -60,3 +60,32 @@ async def test_scheduler_blocking_warning(
|
||||
match = WARN_PATTERN.search(warning_line)
|
||||
assert match is not None
|
||||
assert match.group(2) == "50", f"Expected 'max is 50 ms', got: {warning_line}"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scheduler_delay_runs_on_failed_component(
|
||||
yaml_config: str,
|
||||
run_compiled: RunCompiledFunction,
|
||||
api_client_connected: APIClientConnectedFactory,
|
||||
) -> None:
|
||||
"""A delay must still fire even when its host component is marked failed.
|
||||
|
||||
DelayAction records the current component on its scheduler item purely for log
|
||||
attribution. That component must not gate execution: the scheduler skips items
|
||||
belonging to failed components, but SELF_POINTER items (delays) are exempt. This
|
||||
guards the is_item_failed_() exception on both the heap and defer-queue paths.
|
||||
"""
|
||||
loop = asyncio.get_running_loop()
|
||||
fired: asyncio.Future[bool] = loop.create_future()
|
||||
|
||||
def check_output(line: str) -> None:
|
||||
if "DELAY_FIRED_AFTER_FAIL" in line and not fired.done():
|
||||
fired.set_result(True)
|
||||
|
||||
async with (
|
||||
run_compiled(yaml_config, line_callback=check_output),
|
||||
api_client_connected() as client,
|
||||
):
|
||||
assert await client.device_info() is not None
|
||||
# If the failed host component wrongly dropped the delay, this times out.
|
||||
await asyncio.wait_for(fired, timeout=10.0)
|
||||
|
||||
Reference in New Issue
Block a user