mirror of
https://github.com/esphome/esphome.git
synced 2026-06-24 11:07:33 +00:00
[core] Attribute "took a long time" blocking warning to the owning script (#16768)
This commit is contained in:
committed by
Jesse Hills
parent
33ace9d698
commit
9bf35ab8fb
@@ -47,7 +47,7 @@ class RuntimeStatsCollector {
|
||||
// overhead between Phase A and stats belongs to "residual").
|
||||
// Residual overhead at log time = active − Σ(component) − before − tail,
|
||||
// which captures per-iteration inter-component bookkeeping (set_current_component,
|
||||
// WarnIfComponentBlockingGuard construction/destruction, feed_wdt_with_time calls,
|
||||
// LoopBlockingGuard construction/destruction, feed_wdt_with_time calls,
|
||||
// the for-loop itself).
|
||||
void record_loop_active(uint32_t active_us, uint32_t before_us, uint32_t tail_us) {
|
||||
this->period_active_count_++;
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include "esphome/core/application.h"
|
||||
#include "esphome/core/automation.h"
|
||||
#include "esphome/core/component.h"
|
||||
#include "esphome/core/helpers.h"
|
||||
@@ -57,6 +58,14 @@ template<typename... Ts> class Script : public ScriptLogger, public Trigger<Ts..
|
||||
this->execute(std::get<S>(tuple)...);
|
||||
}
|
||||
|
||||
// Run the action chain with this script's name published as the current source (RAII save/restore,
|
||||
// so nesting composes), so deferred work inside the script is attributed to it in blocking
|
||||
// warnings. Force-inlined to fold into the always-inlined trigger chain (no extra stack frame).
|
||||
inline void run_actions_(const Ts &...x) ESPHOME_ALWAYS_INLINE {
|
||||
ScopedSourceGuard source_guard{this->name_};
|
||||
this->trigger(x...);
|
||||
}
|
||||
|
||||
const LogString *name_{nullptr};
|
||||
};
|
||||
|
||||
@@ -74,7 +83,7 @@ template<typename... Ts> class SingleScript : public Script<Ts...> {
|
||||
return;
|
||||
}
|
||||
|
||||
this->trigger(x...);
|
||||
this->run_actions_(x...);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -91,7 +100,7 @@ template<typename... Ts> class RestartScript : public Script<Ts...> {
|
||||
this->stop_action();
|
||||
}
|
||||
|
||||
this->trigger(x...);
|
||||
this->run_actions_(x...);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -136,7 +145,7 @@ template<typename... Ts> class QueueingScript : public Script<Ts...>, public Com
|
||||
return;
|
||||
}
|
||||
|
||||
this->trigger(x...);
|
||||
this->run_actions_(x...);
|
||||
// Check if the trigger was immediate and we can continue right away.
|
||||
this->loop();
|
||||
}
|
||||
@@ -175,7 +184,7 @@ template<typename... Ts> class QueueingScript : public Script<Ts...>, public Com
|
||||
}
|
||||
|
||||
template<size_t... S> void trigger_tuple_(const std::tuple<Ts...> &tuple, std::index_sequence<S...> /*unused*/) {
|
||||
this->trigger(std::get<S>(tuple)...);
|
||||
this->run_actions_(std::get<S>(tuple)...);
|
||||
}
|
||||
|
||||
int num_queued_ = 0; // Number of queued instances (not including currently running)
|
||||
@@ -197,7 +206,7 @@ template<typename... Ts> class ParallelScript : public Script<Ts...> {
|
||||
LOG_STR_ARG(this->name_));
|
||||
return;
|
||||
}
|
||||
this->trigger(x...);
|
||||
this->run_actions_(x...);
|
||||
}
|
||||
void set_max_runs(int max_runs) { max_runs_ = max_runs; }
|
||||
|
||||
|
||||
@@ -104,9 +104,13 @@ class Application {
|
||||
void register_area(Area *area) { this->areas_.push_back(area); }
|
||||
#endif
|
||||
|
||||
void set_current_component(Component *component) { this->current_component_ = component; }
|
||||
Component *get_current_component() { return this->current_component_; }
|
||||
|
||||
// Owning script of the action chain currently executing (nullptr when none); used to attribute
|
||||
// blocking warnings for deferred work to the script that scheduled it.
|
||||
void set_current_source(const LogString *source) { this->current_source_ = source; }
|
||||
const LogString *get_current_source() { return this->current_source_; }
|
||||
|
||||
// Entity register methods (generated from entity_types.h).
|
||||
// Each entity type gets two overloads:
|
||||
// - register_<entity>(obj) — bare push_back
|
||||
@@ -393,6 +397,7 @@ class Application {
|
||||
protected:
|
||||
friend Component;
|
||||
friend class Scheduler;
|
||||
friend class LoopBlockingGuard;
|
||||
#ifdef USE_RUNTIME_STATS
|
||||
friend class runtime_stats::RuntimeStatsCollector;
|
||||
#endif
|
||||
@@ -402,6 +407,14 @@ class Application {
|
||||
/// Freshen the cached loop component start time. Called by Scheduler before each dispatch.
|
||||
void set_loop_component_start_time_(uint32_t now) { this->loop_component_start_time_ = now; }
|
||||
|
||||
// Publish the running unit's identity (component + source) and dispatch time together, so a
|
||||
// dispatch site can't set one without the others. Friend-only (Scheduler).
|
||||
void set_current_execution_context_(Component *component, const LogString *source, uint32_t now) {
|
||||
this->current_component_ = component;
|
||||
this->current_source_ = source;
|
||||
this->set_loop_component_start_time_(now);
|
||||
}
|
||||
|
||||
/// Walk all registered components looking for any whose component_state_
|
||||
/// has the given flag set. Used by Component::status_clear_*_slow_path_()
|
||||
/// (which is a friend) to decide whether to clear the corresponding bit on
|
||||
@@ -482,6 +495,7 @@ class Application {
|
||||
|
||||
// Pointer-sized members first
|
||||
Component *current_component_{nullptr};
|
||||
const LogString *current_source_{nullptr};
|
||||
|
||||
// std::vector (3 pointers each: begin, end, capacity)
|
||||
// Partitioned vector design for looping components
|
||||
@@ -554,6 +568,76 @@ class Application {
|
||||
/// Global storage of Application pointer - only one Application can exist.
|
||||
extern Application App; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
|
||||
/// RAII guard that publishes a current source (e.g. a script name) for a scope and restores the
|
||||
/// previous value on exit, attributing deferred work scheduled inside to that source.
|
||||
class ScopedSourceGuard {
|
||||
public:
|
||||
explicit ScopedSourceGuard(const LogString *source) : prev_(App.get_current_source()) {
|
||||
App.set_current_source(source);
|
||||
}
|
||||
~ScopedSourceGuard() { App.set_current_source(this->prev_); }
|
||||
ScopedSourceGuard(const ScopedSourceGuard &) = delete;
|
||||
ScopedSourceGuard &operator=(const ScopedSourceGuard &) = delete;
|
||||
|
||||
private:
|
||||
const LogString *prev_;
|
||||
};
|
||||
|
||||
// Times one unit of work (a component loop() or a scheduled callback) and warns if it blocks the
|
||||
// main loop too long. The constructor publishes the unit's identity + dispatch time to App;
|
||||
// finish()/the cold warning path read them back, so the guard stores no copy.
|
||||
//
|
||||
// Guards must not nest: the constructor publishes to App but never restores on destruction, so a
|
||||
// nested guard would clobber the outer's context. Safe because the two dispatch sites (component
|
||||
// loop phase, execute_item_) run strictly sequentially and aren't re-entered from a timed callback.
|
||||
class LoopBlockingGuard {
|
||||
public:
|
||||
// Publish the unit's identity + dispatch time, then start timing. The millis start lives in App,
|
||||
// so only the runtime-stats micros stamp is kept here.
|
||||
LoopBlockingGuard(Component *component, const LogString *source, uint32_t now) {
|
||||
App.set_current_execution_context_(component, source, now);
|
||||
#ifdef USE_RUNTIME_STATS
|
||||
this->started_us_ = micros();
|
||||
#endif
|
||||
}
|
||||
|
||||
// Finish the timing operation and return the current time (millis)
|
||||
// Inlined: the fast path is just millis() + subtract + compare
|
||||
inline uint32_t HOT finish() {
|
||||
#ifdef USE_RUNTIME_STATS
|
||||
uint32_t elapsed_us = micros() - this->started_us_;
|
||||
// Delays have no component; accumulate into the global counter so loop() can subtract them.
|
||||
Component *component = App.get_current_component();
|
||||
if (component != nullptr) {
|
||||
component->runtime_stats_.record_time(elapsed_us);
|
||||
} else {
|
||||
ComponentRuntimeStats::global_recorded_us += elapsed_us;
|
||||
}
|
||||
#endif
|
||||
uint32_t curr_time = MillisInternal::get();
|
||||
#ifndef USE_BENCHMARK
|
||||
// Fast path: compare against constant threshold in ms (computed at compile time from centiseconds)
|
||||
static constexpr uint32_t WARN_IF_BLOCKING_OVER_MS = static_cast<uint32_t>(WARN_IF_BLOCKING_OVER_CS) * 10U;
|
||||
uint32_t blocking_time = curr_time - App.get_loop_component_start_time();
|
||||
if (blocking_time > WARN_IF_BLOCKING_OVER_MS) [[unlikely]] {
|
||||
warn_blocking(blocking_time);
|
||||
}
|
||||
#endif
|
||||
return curr_time;
|
||||
}
|
||||
|
||||
~LoopBlockingGuard() = default;
|
||||
|
||||
#ifdef USE_RUNTIME_STATS
|
||||
protected:
|
||||
uint32_t started_us_;
|
||||
#endif
|
||||
|
||||
private:
|
||||
// Cold path; defined in component.cpp. Reads the current component/source from App to name the culprit.
|
||||
static void __attribute__((noinline, cold)) warn_blocking(uint32_t blocking_time);
|
||||
};
|
||||
|
||||
// Phase A: drain wake notifications and run the scheduler. Invoked on every
|
||||
// Application::loop() tick regardless of whether a component phase runs, so
|
||||
// scheduler items fire at their requested cadence even when the caller has
|
||||
@@ -607,7 +691,7 @@ inline void ESPHOME_ALWAYS_INLINE Application::loop() {
|
||||
// before/tail splits recorded below.
|
||||
uint32_t loop_active_start_us = micros();
|
||||
// Snapshot the cumulative component-recorded time so we can subtract the
|
||||
// slice that the scheduler spends inside its own WarnIfComponentBlockingGuard
|
||||
// slice that the scheduler spends inside its own LoopBlockingGuard
|
||||
// (scheduler.cpp) — that time is already counted in per-component stats,
|
||||
// so charging it again to "before" would double-count.
|
||||
uint64_t loop_recorded_snap = ComponentRuntimeStats::global_recorded_us;
|
||||
@@ -660,12 +744,9 @@ inline void ESPHOME_ALWAYS_INLINE Application::loop() {
|
||||
this->current_loop_index_++) {
|
||||
Component *component = this->looping_components_[this->current_loop_index_];
|
||||
|
||||
// Update the cached time before each component runs
|
||||
this->loop_component_start_time_ = last_op_end_time;
|
||||
|
||||
{
|
||||
this->set_current_component(component);
|
||||
WarnIfComponentBlockingGuard guard{component, last_op_end_time};
|
||||
// Guard publishes this component (no script source) + dispatch time, then times loop().
|
||||
LoopBlockingGuard guard{component, nullptr, last_op_end_time};
|
||||
component->loop();
|
||||
// Use the finish method to get the current time as the end time
|
||||
last_op_end_time = guard.finish();
|
||||
|
||||
@@ -201,7 +201,10 @@ template<typename... Ts> class DelayAction : public Action<Ts...> {
|
||||
/* component= */ nullptr, Scheduler::SchedulerItem::TIMEOUT, Scheduler::NameType::SELF_POINTER,
|
||||
/* static_name= */ reinterpret_cast<const char *>(this), /* hash_or_id= */ 0, this->delay_.value(),
|
||||
[this]() { this->play_next_(); },
|
||||
/* is_retry= */ false, /* skip_cancel= */ this->num_running_ > 1);
|
||||
/* is_retry= */ false, /* skip_cancel= */ this->num_running_ > 1,
|
||||
// Record the owning script (if any) so the blocking warning can name it; propagates across
|
||||
// chained delays via the scheduler.
|
||||
/* source= */ App.get_current_source());
|
||||
} else {
|
||||
// For delays with arguments, capture by value to preserve argument values
|
||||
// Arguments must be copied because original references may be invalid after delay
|
||||
@@ -212,7 +215,9 @@ template<typename... Ts> class DelayAction : public Action<Ts...> {
|
||||
/* component= */ nullptr, Scheduler::SchedulerItem::TIMEOUT, Scheduler::NameType::SELF_POINTER,
|
||||
/* static_name= */ reinterpret_cast<const char *>(this), /* hash_or_id= */ 0, this->delay_.value(x...),
|
||||
std::move(f),
|
||||
/* is_retry= */ false, /* skip_cancel= */ this->num_running_ > 1);
|
||||
/* is_retry= */ false, /* skip_cancel= */ this->num_running_ > 1,
|
||||
// See the no-argument branch above: record the owning script for log attribution.
|
||||
/* source= */ App.get_current_source());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -258,9 +258,11 @@ void Component::call() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
bool Component::should_warn_of_blocking(uint32_t blocking_time) {
|
||||
bool Component::should_warn_of_blocking(uint32_t blocking_time, uint32_t &threshold_ms_out) {
|
||||
// Convert centisecond threshold to milliseconds for comparison
|
||||
uint32_t threshold_ms = static_cast<uint32_t>(this->warn_if_blocking_over_) * 10U;
|
||||
// Report the threshold that was exceeded (before any ratcheting below) so the warning is accurate.
|
||||
threshold_ms_out = threshold_ms;
|
||||
if (blocking_time > threshold_ms) {
|
||||
// Set new threshold: blocking_time + increment, converted back to centiseconds
|
||||
uint32_t new_threshold_ms = blocking_time + WARN_IF_BLOCKING_INCREMENT_MS;
|
||||
@@ -491,19 +493,25 @@ uint32_t PollingComponent::get_update_interval() const { return this->update_int
|
||||
uint64_t ComponentRuntimeStats::global_recorded_us = 0; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
#endif
|
||||
|
||||
void __attribute__((noinline, cold))
|
||||
WarnIfComponentBlockingGuard::warn_blocking(Component *component, uint32_t blocking_time) {
|
||||
bool should_warn;
|
||||
void __attribute__((noinline, cold)) LoopBlockingGuard::warn_blocking(uint32_t blocking_time) {
|
||||
// Identity is published on App by the caller before the guard is built; read it back here.
|
||||
Component *component = App.get_current_component();
|
||||
// Component-less path always warns (the caller already checked the constant threshold).
|
||||
uint32_t threshold_ms = WARN_IF_BLOCKING_OVER_MS;
|
||||
if (component != nullptr && !component->should_warn_of_blocking(blocking_time, threshold_ms)) {
|
||||
return; // Component's (possibly ratcheted) threshold not exceeded yet
|
||||
}
|
||||
// Component name if any, else the published source (owning script), else a generic label.
|
||||
const LogString *name;
|
||||
if (component != nullptr) {
|
||||
should_warn = component->should_warn_of_blocking(blocking_time);
|
||||
name = component->get_component_log_str();
|
||||
} else {
|
||||
should_warn = true; // Already checked > WARN_IF_BLOCKING_OVER_MS in caller
|
||||
}
|
||||
if (should_warn) {
|
||||
ESP_LOGW(TAG, "%s took a long time for an operation (%" PRIu32 " ms), max is 30 ms",
|
||||
component == nullptr ? LOG_STR_LITERAL("<null>") : LOG_STR_ARG(component->get_component_log_str()),
|
||||
blocking_time);
|
||||
name = App.get_current_source();
|
||||
if (name == nullptr)
|
||||
name = LOG_STR("a scheduled task");
|
||||
}
|
||||
ESP_LOGW(TAG, "%s took a long time for an operation (%" PRIu32 " ms), max is %" PRIu32 " ms", LOG_STR_ARG(name),
|
||||
blocking_time, threshold_ms);
|
||||
}
|
||||
|
||||
#ifdef USE_SETUP_PRIORITY_OVERRIDE
|
||||
|
||||
@@ -118,7 +118,7 @@ struct ComponentRuntimeStats {
|
||||
|
||||
// Cumulative sum of every record_time() duration since boot, across all
|
||||
// components. Used by Application::loop() to snapshot time spent inside
|
||||
// WarnIfComponentBlockingGuard (including guards constructed by the
|
||||
// LoopBlockingGuard (including guards constructed by the
|
||||
// scheduler at scheduler.cpp) so main-loop overhead accounting can
|
||||
// subtract scheduled-callback time from the before_loop_tasks_ wall time.
|
||||
static uint64_t global_recorded_us; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
@@ -326,7 +326,7 @@ class Component {
|
||||
return component_source_lookup(this->component_source_index_);
|
||||
}
|
||||
|
||||
bool should_warn_of_blocking(uint32_t blocking_time);
|
||||
bool should_warn_of_blocking(uint32_t blocking_time, uint32_t &threshold_ms_out);
|
||||
|
||||
protected:
|
||||
friend class Application;
|
||||
@@ -571,7 +571,7 @@ class Component {
|
||||
volatile bool pending_enable_loop_{false}; ///< ISR-safe flag for enable_loop_soon_any_context
|
||||
#ifdef USE_RUNTIME_STATS
|
||||
friend class runtime_stats::RuntimeStatsCollector;
|
||||
friend class WarnIfComponentBlockingGuard;
|
||||
friend class LoopBlockingGuard;
|
||||
ComponentRuntimeStats runtime_stats_;
|
||||
#endif
|
||||
};
|
||||
@@ -619,59 +619,7 @@ class PollingComponent : public Component {
|
||||
uint32_t update_interval_;
|
||||
};
|
||||
|
||||
// millis() and micros() are available via hal.h
|
||||
|
||||
class WarnIfComponentBlockingGuard {
|
||||
public:
|
||||
WarnIfComponentBlockingGuard(Component *component, uint32_t start_time)
|
||||
: started_(start_time),
|
||||
component_(component)
|
||||
#ifdef USE_RUNTIME_STATS
|
||||
,
|
||||
started_us_(micros())
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
||||
// Finish the timing operation and return the current time (millis)
|
||||
// Inlined: the fast path is just millis() + subtract + compare
|
||||
inline uint32_t HOT finish() {
|
||||
#ifdef USE_RUNTIME_STATS
|
||||
uint32_t elapsed_us = micros() - this->started_us_;
|
||||
// component_ is nullptr for self-keyed scheduler items (set_timeout/set_interval(self, ...))
|
||||
if (this->component_ != nullptr) {
|
||||
this->component_->runtime_stats_.record_time(elapsed_us);
|
||||
} else {
|
||||
// Still accumulate into the global counter so Application::loop() can subtract
|
||||
// this time from before_loop_tasks_ wall time.
|
||||
ComponentRuntimeStats::global_recorded_us += elapsed_us;
|
||||
}
|
||||
#endif
|
||||
uint32_t curr_time = MillisInternal::get();
|
||||
#ifndef USE_BENCHMARK
|
||||
// Fast path: compare against constant threshold in ms (computed at compile time from centiseconds)
|
||||
static constexpr uint32_t WARN_IF_BLOCKING_OVER_MS = static_cast<uint32_t>(WARN_IF_BLOCKING_OVER_CS) * 10U;
|
||||
uint32_t blocking_time = curr_time - this->started_;
|
||||
if (blocking_time > WARN_IF_BLOCKING_OVER_MS) [[unlikely]] {
|
||||
warn_blocking(this->component_, blocking_time);
|
||||
}
|
||||
#endif
|
||||
return curr_time;
|
||||
}
|
||||
|
||||
~WarnIfComponentBlockingGuard() = default;
|
||||
|
||||
protected:
|
||||
uint32_t started_;
|
||||
Component *component_;
|
||||
#ifdef USE_RUNTIME_STATS
|
||||
uint32_t started_us_;
|
||||
#endif
|
||||
|
||||
private:
|
||||
// Cold path for blocking warning - defined in component.cpp
|
||||
static void __attribute__((noinline, cold)) warn_blocking(Component *component, uint32_t blocking_time);
|
||||
};
|
||||
// LoopBlockingGuard lives in application.h because it reads its state from App.
|
||||
|
||||
// Function to clear setup priority overrides after all components are set up
|
||||
// Only has an implementation when USE_SETUP_PRIORITY_OVERRIDE is defined
|
||||
|
||||
@@ -16,7 +16,7 @@ namespace esphome {
|
||||
|
||||
// Friend-gated accessor for a fast millis() variant intended only for
|
||||
// known task-context callers on the main loop hot path (Application::loop()
|
||||
// and WarnIfComponentBlockingGuard::finish()). It skips the ISR-context
|
||||
// and LoopBlockingGuard::finish()). It skips the ISR-context
|
||||
// dispatch that the public esphome::millis() pays on ESP32 and libretiny.
|
||||
//
|
||||
// MUST NOT be called from ISR context: on ESP32 and libretiny it calls the
|
||||
@@ -50,7 +50,7 @@ class MillisInternal {
|
||||
#endif
|
||||
}
|
||||
friend class Application;
|
||||
friend class WarnIfComponentBlockingGuard;
|
||||
friend class LoopBlockingGuard;
|
||||
};
|
||||
|
||||
} // namespace esphome
|
||||
|
||||
@@ -131,7 +131,8 @@ bool Scheduler::is_retry_cancelled_locked_(Component *component, NameType name_t
|
||||
// name_type determines storage type: STATIC_STRING uses static_name, others use hash_or_id
|
||||
void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type type, NameType name_type,
|
||||
const char *static_name, uint32_t hash_or_id, uint32_t delay,
|
||||
std::function<void()> &&func, bool is_retry, bool skip_cancel) {
|
||||
std::function<void()> &&func, bool is_retry, bool skip_cancel,
|
||||
const LogString *source) {
|
||||
if (delay == SCHEDULER_DONT_RUN) {
|
||||
// Still need to cancel existing timer if we have a name/id
|
||||
if (!skip_cancel) {
|
||||
@@ -174,7 +175,12 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type
|
||||
|
||||
// Create and populate the scheduler item
|
||||
SchedulerItem *item = this->get_item_from_pool_locked_();
|
||||
item->component = component;
|
||||
// SELF_POINTER items store the source name (owning script) in the union slot instead of a component.
|
||||
if (name_type == NameType::SELF_POINTER) {
|
||||
item->source_name = source;
|
||||
} else {
|
||||
item->component = component;
|
||||
}
|
||||
item->set_name(name_type, static_name, hash_or_id);
|
||||
item->type = type;
|
||||
// Use destroy + placement-new instead of move-assignment.
|
||||
@@ -642,8 +648,8 @@ uint32_t HOT Scheduler::call(uint32_t now) {
|
||||
// Not reached timeout yet, done for this call
|
||||
break;
|
||||
}
|
||||
// Don't run on failed components
|
||||
if (item->component != nullptr && item->component->is_failed()) {
|
||||
// Don't run on failed components (is_item_failed_ exempts SELF_POINTER delays).
|
||||
if (this->is_item_failed_(item)) {
|
||||
LockGuard guard{this->lock_};
|
||||
this->recycle_item_main_loop_(this->pop_raw_locked_());
|
||||
continue;
|
||||
@@ -790,10 +796,21 @@ Scheduler::SchedulerItem *HOT Scheduler::pop_raw_locked_() {
|
||||
|
||||
// Helper to execute a scheduler item
|
||||
uint32_t HOT Scheduler::execute_item_(SchedulerItem *item, uint32_t now) {
|
||||
App.set_current_component(item->component);
|
||||
// Freshen so callbacks reading App.get_loop_component_start_time() see this item's dispatch time.
|
||||
App.set_loop_component_start_time_(now);
|
||||
WarnIfComponentBlockingGuard guard{item->component, now};
|
||||
// Resolve the component and (for SELF_POINTER/deferred items) the source name from the shared
|
||||
// union slot with a single name-type check. Self-keyed items have no owning component; their slot
|
||||
// holds the source name (e.g. the owning script), published so deferred work chained inside the
|
||||
// callback re-captures it and the blocking warning can name the script instead of "<null>".
|
||||
Component *component;
|
||||
const LogString *source;
|
||||
if (item->get_name_type() == NameType::SELF_POINTER) {
|
||||
component = nullptr;
|
||||
source = item->source_name;
|
||||
} else {
|
||||
component = item->component;
|
||||
source = nullptr;
|
||||
}
|
||||
// Guard publishes the item's identity + dispatch time, then times the callback.
|
||||
LoopBlockingGuard guard{component, source, now};
|
||||
item->callback();
|
||||
uint32_t end = guard.finish();
|
||||
// Feed the watchdog after each scheduled item (both main heap and defer
|
||||
|
||||
@@ -183,11 +183,12 @@ class Scheduler {
|
||||
|
||||
protected:
|
||||
struct SchedulerItem {
|
||||
// Ordered by size to minimize padding.
|
||||
// `component` while live; `next_free` while in scheduler_item_pool_head_ (mutually exclusive).
|
||||
// Ordered by size to minimize padding. Mutually exclusive by state; read the component via
|
||||
// get_component() so SELF_POINTER items read as component-less.
|
||||
union {
|
||||
Component *component;
|
||||
SchedulerItem *next_free;
|
||||
Component *component; // live, non-SELF_POINTER: owning component
|
||||
const LogString *source_name; // live SELF_POINTER: owning script name (log attribution)
|
||||
SchedulerItem *next_free; // while pooled
|
||||
};
|
||||
// Optimized name storage using tagged union - zero heap allocation
|
||||
union {
|
||||
@@ -302,14 +303,23 @@ class Scheduler {
|
||||
next_execution_high_ = static_cast<uint16_t>(value >> 32);
|
||||
}
|
||||
constexpr const char *get_type_str() const { return (type == TIMEOUT) ? "timeout" : "interval"; }
|
||||
const LogString *get_source() const { return component ? component->get_component_log_str() : LOG_STR("unknown"); }
|
||||
// The owning component, or nullptr for SELF_POINTER items (whose slot holds source_name instead).
|
||||
// All component access goes through this so SELF_POINTER items read as component-less.
|
||||
Component *get_component() const { return name_type_ == NameType::SELF_POINTER ? nullptr : component; }
|
||||
const LogString *get_source() const {
|
||||
// Same no-source label as warn_blocking, for consistent log vocabulary.
|
||||
if (name_type_ == NameType::SELF_POINTER)
|
||||
return source_name != nullptr ? source_name : LOG_STR("a scheduled task");
|
||||
return component != nullptr ? component->get_component_log_str() : LOG_STR("unknown");
|
||||
}
|
||||
};
|
||||
|
||||
// Common implementation for both timeout and interval
|
||||
// name_type determines storage type: STATIC_STRING uses static_name, others use hash_or_id
|
||||
// `source` is stored (in the union slot) only for SELF_POINTER items; ignored otherwise.
|
||||
void set_timer_common_(Component *component, SchedulerItem::Type type, NameType name_type, const char *static_name,
|
||||
uint32_t hash_or_id, uint32_t delay, std::function<void()> &&func, bool is_retry = false,
|
||||
bool skip_cancel = false);
|
||||
bool skip_cancel = false, const LogString *source = nullptr);
|
||||
|
||||
// Common implementation for retry - Remove before 2026.8.0
|
||||
// name_type determines storage type: STATIC_STRING uses static_name, others use hash_or_id
|
||||
@@ -402,8 +412,10 @@ class Scheduler {
|
||||
// Fixes: https://github.com/esphome/esphome/issues/11940
|
||||
if (item == nullptr)
|
||||
return false;
|
||||
if (item->component != component || item->type != type || (skip_removed && this->is_item_removed_locked_(item)) ||
|
||||
(match_retry && !item->is_retry)) {
|
||||
// get_component() is nullptr for SELF_POINTER items (their cancels pass nullptr too), so they
|
||||
// match by the `this` key alone.
|
||||
if (item->get_component() != component || item->type != type ||
|
||||
(skip_removed && this->is_item_removed_locked_(item)) || (match_retry && !item->is_retry)) {
|
||||
return false;
|
||||
}
|
||||
// Name type must match
|
||||
@@ -423,11 +435,16 @@ class Scheduler {
|
||||
// Helper to execute a scheduler item
|
||||
uint32_t execute_item_(SchedulerItem *item, uint32_t now);
|
||||
|
||||
// Helper to check if item should be skipped
|
||||
bool should_skip_item_(SchedulerItem *item) const {
|
||||
return is_item_removed_(item) || (item->component != nullptr && item->component->is_failed());
|
||||
// True if the item's component is failed (so it must not run). SELF_POINTER delays have no
|
||||
// component (get_component() == nullptr) and always fire.
|
||||
bool is_item_failed_(SchedulerItem *item) const {
|
||||
Component *component = item->get_component();
|
||||
return component != nullptr && component->is_failed();
|
||||
}
|
||||
|
||||
// Helper to check if item should be skipped
|
||||
bool should_skip_item_(SchedulerItem *item) const { return is_item_removed_(item) || this->is_item_failed_(item); }
|
||||
|
||||
// Helper to recycle a SchedulerItem back to the pool.
|
||||
// Takes a raw pointer — caller transfers ownership. The item is either added to the
|
||||
// pool or deleted if the pool is full.
|
||||
|
||||
22
tests/integration/fixtures/scheduler_blocking_warning.yaml
Normal file
22
tests/integration/fixtures/scheduler_blocking_warning.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
esphome:
|
||||
name: scheduler-blocking-warning
|
||||
on_boot:
|
||||
then:
|
||||
- script.execute: blocking_script
|
||||
|
||||
host:
|
||||
api:
|
||||
logger:
|
||||
level: DEBUG
|
||||
|
||||
# The busy-block runs in the second delay's continuation; the warning must name the script. Two
|
||||
# delays verify the source survives chained delays (the scheduler republishes it each continuation).
|
||||
script:
|
||||
- id: blocking_script
|
||||
then:
|
||||
- delay: 10ms
|
||||
- delay: 10ms
|
||||
- lambda: |-
|
||||
const uint32_t start = millis();
|
||||
while (millis() - start < 80) {
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
esphome:
|
||||
name: scheduler-blocking-generic
|
||||
|
||||
host:
|
||||
api:
|
||||
logger:
|
||||
level: DEBUG
|
||||
|
||||
globals:
|
||||
- id: done
|
||||
type: bool
|
||||
restore_value: false
|
||||
initial_value: "false"
|
||||
|
||||
# A delay in a plain (non-script) automation has no owning script, so the block must log the
|
||||
# generic "a scheduled task" label, not a script name.
|
||||
interval:
|
||||
- interval: 100ms
|
||||
id: gen_interval
|
||||
then:
|
||||
- if:
|
||||
condition:
|
||||
lambda: "return !id(done);"
|
||||
then:
|
||||
- lambda: "id(done) = true;"
|
||||
- delay: 10ms
|
||||
- lambda: |-
|
||||
const uint32_t start = millis();
|
||||
while (millis() - start < 80) {
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
esphome:
|
||||
name: scheduler-delay-failed
|
||||
|
||||
host:
|
||||
api:
|
||||
logger:
|
||||
level: DEBUG
|
||||
|
||||
globals:
|
||||
- id: started
|
||||
type: bool
|
||||
restore_value: false
|
||||
initial_value: "false"
|
||||
|
||||
# The interval marks itself failed, then schedules a delay. The delay must still fire: a failed
|
||||
# component must not drop it, since the SELF_POINTER scheduler item has no owning component.
|
||||
interval:
|
||||
- interval: 100ms
|
||||
id: host_interval
|
||||
then:
|
||||
- if:
|
||||
condition:
|
||||
lambda: "return !id(started);"
|
||||
then:
|
||||
- lambda: |-
|
||||
id(started) = true;
|
||||
id(host_interval)->mark_failed();
|
||||
- delay: 200ms
|
||||
- logger.log: "DELAY_FIRED_AFTER_FAIL"
|
||||
120
tests/integration/test_scheduler_blocking_warning.py
Normal file
120
tests/integration/test_scheduler_blocking_warning.py
Normal file
@@ -0,0 +1,120 @@
|
||||
"""Integration tests for blocking-warning source attribution.
|
||||
|
||||
A blocking operation that runs inside a deferred scheduler continuation (e.g. after a ``delay``
|
||||
in a script) used to be reported as ``<null> took a long time for an operation (NN ms),
|
||||
max is 30 ms`` because the continuation carries no component. The warning should instead name
|
||||
the owning script and report the real threshold (50 ms).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from .types import APIClientConnectedFactory, RunCompiledFunction
|
||||
|
||||
# Matches: "<source> took a long time for an operation (NN ms), max is NN ms"
|
||||
WARN_PATTERN = re.compile(
|
||||
r"(\S+) took a long time for an operation \((\d+) ms\), max is (\d+) ms"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scheduler_blocking_warning(
|
||||
yaml_config: str,
|
||||
run_compiled: RunCompiledFunction,
|
||||
api_client_connected: APIClientConnectedFactory,
|
||||
) -> None:
|
||||
"""Deferred blocking work inside a script is attributed to the script, not "<null>"."""
|
||||
loop = asyncio.get_running_loop()
|
||||
warning_future: asyncio.Future[str] = loop.create_future()
|
||||
|
||||
def check_output(line: str) -> None:
|
||||
if WARN_PATTERN.search(line) and not warning_future.done():
|
||||
warning_future.set_result(line)
|
||||
|
||||
async with (
|
||||
run_compiled(yaml_config, line_callback=check_output),
|
||||
api_client_connected() as client,
|
||||
):
|
||||
device_info = await client.device_info()
|
||||
assert device_info is not None
|
||||
|
||||
# on_boot runs the script, which defers via delay then busy-blocks > 50 ms in the
|
||||
# continuation, tripping the blocking warning.
|
||||
warning_line = await asyncio.wait_for(warning_future, timeout=10.0)
|
||||
|
||||
# Must name the owning script, not "<null>" and not the generic fallback.
|
||||
assert "<null>" not in warning_line, (
|
||||
f"Warning should name the script, got: {warning_line}"
|
||||
)
|
||||
assert "a scheduled task" not in warning_line, (
|
||||
f"Warning should name the script, got: {warning_line}"
|
||||
)
|
||||
match = WARN_PATTERN.search(warning_line)
|
||||
assert match is not None
|
||||
assert match.group(1) == "blocking_script", (
|
||||
f"Warning should name 'blocking_script', got: {warning_line}"
|
||||
)
|
||||
# The reported threshold must be the real default (50 ms), not the stale "30 ms".
|
||||
assert match.group(3) == "50", f"Expected 'max is 50 ms', got: {warning_line}"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scheduler_blocking_warning_generic_source(
|
||||
yaml_config: str,
|
||||
run_compiled: RunCompiledFunction,
|
||||
api_client_connected: APIClientConnectedFactory,
|
||||
) -> None:
|
||||
"""A delay in a plain (non-script) automation logs the generic label, not a script name."""
|
||||
loop = asyncio.get_running_loop()
|
||||
warning_future: asyncio.Future[str] = loop.create_future()
|
||||
|
||||
def check_output(line: str) -> None:
|
||||
if WARN_PATTERN.search(line) and not warning_future.done():
|
||||
warning_future.set_result(line)
|
||||
|
||||
async with (
|
||||
run_compiled(yaml_config, line_callback=check_output),
|
||||
api_client_connected() as client,
|
||||
):
|
||||
assert await client.device_info() is not None
|
||||
warning_line = await asyncio.wait_for(warning_future, timeout=10.0)
|
||||
|
||||
assert "a scheduled task took a long time" in warning_line, (
|
||||
f"Non-script deferred work should log the generic label, got: {warning_line}"
|
||||
)
|
||||
assert "<null>" not in warning_line
|
||||
match = WARN_PATTERN.search(warning_line)
|
||||
assert match is not None and match.group(3) == "50", (
|
||||
f"Expected 'max is 50 ms', got: {warning_line}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scheduler_delay_runs_on_failed_component(
|
||||
yaml_config: str,
|
||||
run_compiled: RunCompiledFunction,
|
||||
api_client_connected: APIClientConnectedFactory,
|
||||
) -> None:
|
||||
"""A delay must still fire even when its context component is marked failed.
|
||||
|
||||
Deferred (SELF_POINTER) scheduler items have no owning component, so the scheduler's
|
||||
failed-component skip must not drop them.
|
||||
"""
|
||||
loop = asyncio.get_running_loop()
|
||||
fired: asyncio.Future[bool] = loop.create_future()
|
||||
|
||||
def check_output(line: str) -> None:
|
||||
if "DELAY_FIRED_AFTER_FAIL" in line and not fired.done():
|
||||
fired.set_result(True)
|
||||
|
||||
async with (
|
||||
run_compiled(yaml_config, line_callback=check_output),
|
||||
api_client_connected() as client,
|
||||
):
|
||||
assert await client.device_info() is not None
|
||||
# If the failed host component wrongly dropped the delay, this times out.
|
||||
await asyncio.wait_for(fired, timeout=10.0)
|
||||
Reference in New Issue
Block a user