mirror of
https://github.com/esphome/esphome.git
synced 2026-06-24 18:06:34 +00:00
Compare commits
2 Commits
web-server
...
core-block
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3a4f67def8 | ||
|
|
5b728f19c3 |
@@ -198,7 +198,12 @@ template<typename... Ts> class DelayAction : public Action<Ts...> {
|
||||
// to avoid overhead from capturing arguments by value
|
||||
if constexpr (sizeof...(Ts) == 0) {
|
||||
App.scheduler.set_timer_common_(
|
||||
/* component= */ nullptr, Scheduler::SchedulerItem::TIMEOUT, Scheduler::NameType::SELF_POINTER,
|
||||
// The component is stored for blocking-warning log attribution only (SELF_POINTER items
|
||||
// match by `this`, so it does not affect cancellation). Capturing the current component
|
||||
// lets the warning name the source instead of "<null>"; it propagates across chained
|
||||
// delays because the scheduler restores it as the current component before each callback.
|
||||
/* component= */ App.get_current_component(), Scheduler::SchedulerItem::TIMEOUT,
|
||||
Scheduler::NameType::SELF_POINTER,
|
||||
/* static_name= */ reinterpret_cast<const char *>(this), /* hash_or_id= */ 0, this->delay_.value(),
|
||||
[this]() { this->play_next_(); },
|
||||
/* is_retry= */ false, /* skip_cancel= */ this->num_running_ > 1);
|
||||
@@ -209,7 +214,9 @@ template<typename... Ts> class DelayAction : public Action<Ts...> {
|
||||
// are passed as non-const lvalues to play_next_(const Ts&...) where Ts may be `T&`
|
||||
auto f = [this, x...]() mutable { this->play_next_(x...); };
|
||||
App.scheduler.set_timer_common_(
|
||||
/* component= */ nullptr, Scheduler::SchedulerItem::TIMEOUT, Scheduler::NameType::SELF_POINTER,
|
||||
// See the no-argument branch above: component is captured for log attribution only.
|
||||
/* component= */ App.get_current_component(), Scheduler::SchedulerItem::TIMEOUT,
|
||||
Scheduler::NameType::SELF_POINTER,
|
||||
/* static_name= */ reinterpret_cast<const char *>(this), /* hash_or_id= */ 0, this->delay_.value(x...),
|
||||
std::move(f),
|
||||
/* is_retry= */ false, /* skip_cancel= */ this->num_running_ > 1);
|
||||
|
||||
@@ -258,9 +258,11 @@ void Component::call() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
bool Component::should_warn_of_blocking(uint32_t blocking_time) {
|
||||
bool Component::should_warn_of_blocking(uint32_t blocking_time, uint32_t &threshold_ms_out) {
|
||||
// Convert centisecond threshold to milliseconds for comparison
|
||||
uint32_t threshold_ms = static_cast<uint32_t>(this->warn_if_blocking_over_) * 10U;
|
||||
// Report the threshold that was exceeded (before any ratcheting below) so the warning is accurate.
|
||||
threshold_ms_out = threshold_ms;
|
||||
if (blocking_time > threshold_ms) {
|
||||
// Set new threshold: blocking_time + increment, converted back to centiseconds
|
||||
uint32_t new_threshold_ms = blocking_time + WARN_IF_BLOCKING_INCREMENT_MS;
|
||||
@@ -493,17 +495,17 @@ uint64_t ComponentRuntimeStats::global_recorded_us = 0; // NOLINT(cppcoreguidel
|
||||
|
||||
void __attribute__((noinline, cold))
|
||||
WarnIfComponentBlockingGuard::warn_blocking(Component *component, uint32_t blocking_time) {
|
||||
bool should_warn;
|
||||
if (component != nullptr) {
|
||||
should_warn = component->should_warn_of_blocking(blocking_time);
|
||||
} else {
|
||||
should_warn = true; // Already checked > WARN_IF_BLOCKING_OVER_MS in caller
|
||||
}
|
||||
if (should_warn) {
|
||||
ESP_LOGW(TAG, "%s took a long time for an operation (%" PRIu32 " ms), max is 30 ms",
|
||||
component == nullptr ? LOG_STR_LITERAL("<null>") : LOG_STR_ARG(component->get_component_log_str()),
|
||||
blocking_time);
|
||||
// Default threshold for the null path (no component to consult); the caller already checked
|
||||
// blocking_time > WARN_IF_BLOCKING_OVER_MS, so always warn in that case.
|
||||
uint32_t threshold_ms = WARN_IF_BLOCKING_OVER_MS;
|
||||
if (component != nullptr && !component->should_warn_of_blocking(blocking_time, threshold_ms)) {
|
||||
return; // Component's (possibly ratcheted) threshold not exceeded yet
|
||||
}
|
||||
// A null component means the work ran in a scheduler continuation with no associated component
|
||||
// (e.g. a delay inside a script/automation), so report it as a scheduled task rather than "<null>".
|
||||
ESP_LOGW(TAG, "%s took a long time for an operation (%" PRIu32 " ms), max is %" PRIu32 " ms",
|
||||
component == nullptr ? LOG_STR_LITERAL("a scheduled task") : LOG_STR_ARG(component->get_component_log_str()),
|
||||
blocking_time, threshold_ms);
|
||||
}
|
||||
|
||||
#ifdef USE_SETUP_PRIORITY_OVERRIDE
|
||||
|
||||
@@ -326,7 +326,7 @@ class Component {
|
||||
return component_source_lookup(this->component_source_index_);
|
||||
}
|
||||
|
||||
bool should_warn_of_blocking(uint32_t blocking_time);
|
||||
bool should_warn_of_blocking(uint32_t blocking_time, uint32_t &threshold_ms_out);
|
||||
|
||||
protected:
|
||||
friend class Application;
|
||||
|
||||
@@ -642,8 +642,8 @@ uint32_t HOT Scheduler::call(uint32_t now) {
|
||||
// Not reached timeout yet, done for this call
|
||||
break;
|
||||
}
|
||||
// Don't run on failed components
|
||||
if (item->component != nullptr && item->component->is_failed()) {
|
||||
// Don't run on failed components (see is_item_failed_ for the SELF_POINTER exception).
|
||||
if (this->is_item_failed_(item)) {
|
||||
LockGuard guard{this->lock_};
|
||||
this->recycle_item_main_loop_(this->pop_raw_locked_());
|
||||
continue;
|
||||
|
||||
@@ -402,7 +402,7 @@ class Scheduler {
|
||||
// Fixes: https://github.com/esphome/esphome/issues/11940
|
||||
if (item == nullptr)
|
||||
return false;
|
||||
if (item->component != component || item->type != type || (skip_removed && this->is_item_removed_locked_(item)) ||
|
||||
if (item->type != type || (skip_removed && this->is_item_removed_locked_(item)) ||
|
||||
(match_retry && !item->is_retry)) {
|
||||
return false;
|
||||
}
|
||||
@@ -411,23 +411,35 @@ class Scheduler {
|
||||
return false;
|
||||
// STATIC_STRING: compare string content. SELF_POINTER: raw pointer equality (no strcmp).
|
||||
// Other types: compare hash/ID value.
|
||||
if (name_type == NameType::SELF_POINTER) {
|
||||
// SELF_POINTER keys are globally unique (the caller's `this`), so the stored component is
|
||||
// log-attribution only (e.g. DelayAction records the current component for blocking
|
||||
// warnings) and must NOT participate in matching. Match by pointer equality alone.
|
||||
return item->name_.static_name == static_name;
|
||||
}
|
||||
// All other name types must also match on component identity.
|
||||
if (item->component != component)
|
||||
return false;
|
||||
if (name_type == NameType::STATIC_STRING) {
|
||||
return this->names_match_static_(item->get_name(), static_name);
|
||||
}
|
||||
if (name_type == NameType::SELF_POINTER) {
|
||||
return item->name_.static_name == static_name;
|
||||
}
|
||||
return item->get_name_hash_or_id() == hash_or_id;
|
||||
}
|
||||
|
||||
// Helper to execute a scheduler item
|
||||
uint32_t execute_item_(SchedulerItem *item, uint32_t now);
|
||||
|
||||
// Helper to check if item should be skipped
|
||||
bool should_skip_item_(SchedulerItem *item) const {
|
||||
return is_item_removed_(item) || (item->component != nullptr && item->component->is_failed());
|
||||
// True if the item belongs to a failed component and should therefore not run.
|
||||
// SELF_POINTER items (e.g. DelayAction) store the component for log attribution only, so they
|
||||
// must always fire regardless of that component's failed state and are never skipped here.
|
||||
bool is_item_failed_(SchedulerItem *item) const {
|
||||
return item->component != nullptr && item->get_name_type() != NameType::SELF_POINTER &&
|
||||
item->component->is_failed();
|
||||
}
|
||||
|
||||
// Helper to check if item should be skipped
|
||||
bool should_skip_item_(SchedulerItem *item) const { return is_item_removed_(item) || this->is_item_failed_(item); }
|
||||
|
||||
// Helper to recycle a SchedulerItem back to the pool.
|
||||
// Takes a raw pointer — caller transfers ownership. The item is either added to the
|
||||
// pool or deleted if the pool is full.
|
||||
|
||||
21
tests/integration/fixtures/scheduler_blocking_warning.yaml
Normal file
21
tests/integration/fixtures/scheduler_blocking_warning.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
esphome:
|
||||
name: scheduler-blocking-warning
|
||||
|
||||
host:
|
||||
api:
|
||||
logger:
|
||||
level: DEBUG
|
||||
|
||||
# An interval fires via the scheduler (so the current component is the interval),
|
||||
# defers through a delay, then busy-blocks well over the 50 ms warn threshold inside
|
||||
# the deferred continuation. The blocking warning must be attributed to the interval
|
||||
# component (captured at schedule time) instead of "<null>".
|
||||
interval:
|
||||
- interval: 500ms
|
||||
then:
|
||||
- delay: 10ms
|
||||
- lambda: |-
|
||||
const uint32_t start = millis();
|
||||
// Spin for longer than WARN_IF_BLOCKING_OVER_MS (50 ms) to trip the warning.
|
||||
while (millis() - start < 80) {
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
esphome:
|
||||
name: scheduler-delay-failed
|
||||
|
||||
host:
|
||||
api:
|
||||
logger:
|
||||
level: DEBUG
|
||||
|
||||
globals:
|
||||
- id: started
|
||||
type: bool
|
||||
restore_value: false
|
||||
initial_value: "false"
|
||||
|
||||
# The interval fires with itself as the current component, schedules a delay (which
|
||||
# captures that component for log attribution), then marks itself failed. The delay
|
||||
# continuation must still fire: a failed component must not drop an already-scheduled
|
||||
# delay, because the SELF_POINTER scheduler item stores the component for attribution
|
||||
# only.
|
||||
interval:
|
||||
- interval: 100ms
|
||||
id: host_interval
|
||||
then:
|
||||
- if:
|
||||
condition:
|
||||
lambda: "return !id(started);"
|
||||
then:
|
||||
- lambda: |-
|
||||
id(started) = true;
|
||||
id(host_interval)->mark_failed();
|
||||
- delay: 200ms
|
||||
- logger.log: "DELAY_FIRED_AFTER_FAIL"
|
||||
91
tests/integration/test_scheduler_blocking_warning.py
Normal file
91
tests/integration/test_scheduler_blocking_warning.py
Normal file
@@ -0,0 +1,91 @@
|
||||
"""Integration test for blocking-warning source attribution.
|
||||
|
||||
A blocking operation that runs inside a deferred scheduler continuation (e.g. after
|
||||
a ``delay`` in a script/automation) used to be reported as
|
||||
``<null> took a long time for an operation (NN ms), max is 30 ms`` because the
|
||||
continuation carries no component. The warning should instead name the component that
|
||||
was current when the delay was scheduled and report the real threshold (50 ms).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from .types import APIClientConnectedFactory, RunCompiledFunction
|
||||
|
||||
# Matches: "<source> took a long time for an operation (NN ms), max is NN ms"
|
||||
WARN_PATTERN = re.compile(
|
||||
r"took a long time for an operation \((\d+) ms\), max is (\d+) ms"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scheduler_blocking_warning(
|
||||
yaml_config: str,
|
||||
run_compiled: RunCompiledFunction,
|
||||
api_client_connected: APIClientConnectedFactory,
|
||||
) -> None:
|
||||
"""Deferred blocking work is attributed to a real component, not "<null>"."""
|
||||
loop = asyncio.get_running_loop()
|
||||
warning_future: asyncio.Future[str] = loop.create_future()
|
||||
|
||||
def check_output(line: str) -> None:
|
||||
if WARN_PATTERN.search(line) and not warning_future.done():
|
||||
warning_future.set_result(line)
|
||||
|
||||
async with (
|
||||
run_compiled(yaml_config, line_callback=check_output),
|
||||
api_client_connected() as client,
|
||||
):
|
||||
device_info = await client.device_info()
|
||||
assert device_info is not None
|
||||
|
||||
# The interval fires, defers via delay, then busy-blocks > 50 ms in the
|
||||
# continuation, which should trip the blocking warning.
|
||||
warning_line = await asyncio.wait_for(warning_future, timeout=10.0)
|
||||
|
||||
# The deferred block must be attributed to a real component, not "<null>".
|
||||
assert "<null>" not in warning_line, (
|
||||
f"Warning should name a component, got: {warning_line}"
|
||||
)
|
||||
# The delay was scheduled from a known component (the interval), so the warning
|
||||
# must name it rather than falling back to the generic scheduled-task label.
|
||||
assert "a scheduled task" not in warning_line, (
|
||||
f"Warning should name the interval component, got: {warning_line}"
|
||||
)
|
||||
# The reported threshold must be the real default (50 ms), not the stale "30 ms".
|
||||
match = WARN_PATTERN.search(warning_line)
|
||||
assert match is not None
|
||||
assert match.group(2) == "50", f"Expected 'max is 50 ms', got: {warning_line}"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scheduler_delay_runs_on_failed_component(
|
||||
yaml_config: str,
|
||||
run_compiled: RunCompiledFunction,
|
||||
api_client_connected: APIClientConnectedFactory,
|
||||
) -> None:
|
||||
"""A delay must still fire even when its host component is marked failed.
|
||||
|
||||
DelayAction records the current component on its scheduler item purely for log
|
||||
attribution. That component must not gate execution: the scheduler skips items
|
||||
belonging to failed components, but SELF_POINTER items (delays) are exempt. This
|
||||
guards the is_item_failed_() exception on both the heap and defer-queue paths.
|
||||
"""
|
||||
loop = asyncio.get_running_loop()
|
||||
fired: asyncio.Future[bool] = loop.create_future()
|
||||
|
||||
def check_output(line: str) -> None:
|
||||
if "DELAY_FIRED_AFTER_FAIL" in line and not fired.done():
|
||||
fired.set_result(True)
|
||||
|
||||
async with (
|
||||
run_compiled(yaml_config, line_callback=check_output),
|
||||
api_client_connected() as client,
|
||||
):
|
||||
assert await client.device_info() is not None
|
||||
# If the failed host component wrongly dropped the delay, this times out.
|
||||
await asyncio.wait_for(fired, timeout=10.0)
|
||||
Reference in New Issue
Block a user